48template <
typename Tout,
typename Tin>
51 static constexpr size_t in =
52 (
sizeof(Tout) <
sizeof(Tin)) ? (
sizeof(Tin) /
sizeof(Tout)) : 1;
53 static constexpr size_t out =
54 (
sizeof(Tout) >
sizeof(Tin)) ? (
sizeof(Tout) /
sizeof(Tin)) : 1;
59template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
62 static constexpr size_t vectors = NUM;
63 static constexpr size_t elements = NUM * Vec<T, SIMD_WIDTH>::elements;
64 static constexpr size_t bytes = NUM * Vec<T, SIMD_WIDTH>::bytes;
66 static constexpr size_t vecs = vectors;
67 static constexpr size_t elems = elements;
69 Vec<T, SIMD_WIDTH> vec[NUM];
74template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
76 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &inVecs,
77 Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &outVecs)
79 convert(inVecs.vec, outVecs.vec);
82template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
84 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsNum,
85 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsDenom,
double fac,
86 Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vecsOut)
88 fdivmul(vecsNum.vec, vecsDenom.vec, fac, vecsOut.vec);
91template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
92static SIMD_INLINE
void fmul(
93 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsIn,
double fac,
94 Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vecsOut)
96 fmul(vecsIn.vec, fac, vecsOut.vec);
99template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
100static SIMD_INLINE
void faddmul(
101 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsIn,
double off,
102 double fac, Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vecsOut)
104 faddmul(vecsIn.vec, off, fac, vecsOut.vec);
107template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
108static SIMD_INLINE
void fmuladd(
109 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsIn,
double fac,
110 double off, Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vecsOut)
112 fmuladd(vecsIn.vec, fac, off, vecsOut.vec);
115template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
117 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsIn1,
118 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsIn2,
double w,
119 double fac, Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vecsOut)
121 fwaddmul(vecsIn1.vec, vecsIn2.vec, w, fac, vecsOut.vec);
124template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
125static SIMD_INLINE
void load(
const T *
const p, Vecs<NUM, T, SIMD_WIDTH> &inVecs)
127 load(p, inVecs.vec, inVecs.vectors);
130template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
131static SIMD_INLINE
void loadu(
const T *
const p,
132 Vecs<NUM, T, SIMD_WIDTH> &inVecs)
134 loadu(p, inVecs.vec, inVecs.vectors);
137template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
138static SIMD_INLINE
void store(T *
const p,
139 const Vecs<NUM, T, SIMD_WIDTH> &outVecs)
141 store(p, outVecs.vec, outVecs.vectors);
144template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
145static SIMD_INLINE
void storeu(T *
const p,
146 const Vecs<NUM, T, SIMD_WIDTH> &outVecs)
148 storeu(p, outVecs.vec, outVecs.vectors);
151template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
152static SIMD_INLINE
void store16(T *
const p,
153 const Vecs<NUM, T, SIMD_WIDTH> &outVecs)
155 store16(p, outVecs.vec);
158template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
159static SIMD_INLINE
void storeu16(T *
const p,
160 const Vecs<NUM, T, SIMD_WIDTH> &outVecs)
162 storeu16(p, outVecs.vec);
165template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
166static SIMD_INLINE Vec<Tout, SIMD_WIDTH>
packs(
167 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &a)
172template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
173static SIMD_INLINE
void extend(
174 const Vec<Tin, SIMD_WIDTH> &vIn,
175 Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vOut)
181template <
typename Tout,
typename Tin,
size_t SIMD_WIDTH>
182static SIMD_INLINE Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH>
extend(
183 const Vec<Tin, SIMD_WIDTH> &vIn)
185 Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> vOut;
190template <
size_t N,
typename T,
size_t SIMD_WIDTH>
191static SIMD_INLINE
void swizzle(Vecs<N, T, SIMD_WIDTH> &v)
198template <
typename T,
size_t SIMD_WIDTH>
200 const Vecs<(SIMD_WIDTH /
sizeof(T)), T, SIMD_WIDTH> inRows,
201 Vecs<(SIMD_WIDTH /
sizeof(T)), T, SIMD_WIDTH> &outRows)
206template <
typename T,
size_t SIMD_WIDTH>
208 Vecs<(SIMD_WIDTH /
sizeof(T)), T, SIMD_WIDTH> &rows)
213template <
size_t N,
typename T,
size_t SIMD_WIDTH>
214static SIMD_INLINE
void swizzle2(Vecs<2 * N, T, SIMD_WIDTH> &v)
219template <
size_t N,
typename T,
size_t SIMD_WIDTH>
220static SIMD_INLINE
void unswizzle(Vecs<2 * N, T, SIMD_WIDTH> &v)
225template <
typename T,
size_t SIMD_WIDTH>
226static SIMD_INLINE Vec<T, SIMD_WIDTH>
hadd(
227 const Vecs<(SIMD_WIDTH /
sizeof(T)), T, SIMD_WIDTH> &v)
232template <
typename T,
size_t SIMD_WIDTH>
233static SIMD_INLINE Vec<T, SIMD_WIDTH>
hadds(
234 const Vecs<(SIMD_WIDTH /
sizeof(T)), T, SIMD_WIDTH> &v)
239template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
240static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH>
add(
241 const Vecs<NUM, T, SIMD_WIDTH> &a,
const Vecs<NUM, T, SIMD_WIDTH> &b)
243 Vecs<NUM, T, SIMD_WIDTH> res;
244 for (
size_t v = 0; v < NUM; v++) res.vec[v] =
add(a.vec[v], b.vec[v]);
248template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
249static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH>
adds(
250 const Vecs<NUM, T, SIMD_WIDTH> &a,
const Vecs<NUM, T, SIMD_WIDTH> &b)
252 Vecs<NUM, T, SIMD_WIDTH> res;
253 for (
size_t v = 0; v < NUM; v++) res.vec[v] =
adds(a.vec[v], b.vec[v]);
257template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
258static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH>
sub(
259 const Vecs<NUM, T, SIMD_WIDTH> &a,
const Vecs<NUM, T, SIMD_WIDTH> &b)
261 Vecs<NUM, T, SIMD_WIDTH> res;
262 for (
size_t v = 0; v < NUM; v++) res.vec[v] =
sub(a.vec[v], b.vec[v]);
266template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
267static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH>
subs(
268 const Vecs<NUM, T, SIMD_WIDTH> &a,
const Vecs<NUM, T, SIMD_WIDTH> &b)
270 Vecs<NUM, T, SIMD_WIDTH> res;
271 for (
size_t v = 0; v < NUM; v++) res.vec[v] =
subs(a.vec[v], b.vec[v]);
275template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
276static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH>
min(
277 const Vecs<NUM, T, SIMD_WIDTH> &a,
const Vecs<NUM, T, SIMD_WIDTH> &b)
279 Vecs<NUM, T, SIMD_WIDTH> res;
280 for (
size_t v = 0; v < NUM; v++) res.vec[v] =
min(a.vec[v], b.vec[v]);
284template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
285static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH>
max(
286 const Vecs<NUM, T, SIMD_WIDTH> &a,
const Vecs<NUM, T, SIMD_WIDTH> &b)
288 Vecs<NUM, T, SIMD_WIDTH> res;
289 for (
size_t v = 0; v < NUM; v++) res.vec[v] =
max(a.vec[v], b.vec[v]);
293template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
294static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH>
setzero()
296 Vecs<NUM, T, SIMD_WIDTH> res;
301template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
302static SIMD_INLINE
void setzero(Vecs<NUM, T, SIMD_WIDTH> &res)
307template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
308static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH>
set1(T a)
310 Vecs<NUM, T, SIMD_WIDTH> res;
315template <
size_t NUM,
typename T,
size_t SIMD_WIDTH>
316static SIMD_INLINE
void set1(Vecs<NUM, T, SIMD_WIDTH> &res, T a)
static Vec< T, SIMD_WIDTH > sub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's.
Definition base.H:388
static Vec< T, SIMD_WIDTH > subs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's using saturated arithmetic.
Definition base.H:405
static Vec< T, SIMD_WIDTH > adds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's using saturated arithmetic.
Definition base.H:374
static Vec< T, SIMD_WIDTH > add(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's.
Definition base.H:357
static void fdivmul(const Vec< Tin, SIMD_WIDTH > vecsNum[numInVecs< Tout, Tin >()], const Vec< Tin, SIMD_WIDTH > vecsDenom[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
Divides Vec's element-wise, then multiplies with a constant factor in floating point arithmetic.
Definition ext.H:748
static void fwaddmul(const Vec< Tin, SIMD_WIDTH > vecsIn1[numInVecs< Tout, Tin >()], const Vec< Tin, SIMD_WIDTH > vecsIn2[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > w, dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
Linearly interpolates Vec's element-wise with a constant weight and then scales by a constant factor ...
Definition ext.H:1050
static void fmul(const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
Multiplies Vec's element-wise with a floating point constant in floating point arithmetic.
Definition ext.H:931
static void faddmul(const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > off, dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
Adds a floating point constant to the elements of Vec's, then multiplies with a floating point consta...
Definition ext.H:965
static void fmuladd(const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, dont_deduce< Tfloat > off, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
Multiplies the elements of Vec's with a floating point constant, then adds a floating point constant ...
Definition ext.H:1004
static Vec< T, SIMD_WIDTH > hadds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's with saturation.
Definition base.H:493
static Vec< T, SIMD_WIDTH > hadd(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's.
Definition base.H:477
static Vec< T, SIMD_WIDTH > setzero()
Returns a Vec with all elements set to zero.
Definition base.H:70
static Vec< T, SIMD_WIDTH > set1(const dont_deduce< T > a)
Returns a Vec with all elements set to the same value.
Definition base.H:88
static Vec< T, SIMD_WIDTH > min(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the minimum of the elements of two Vec's.
Definition base.H:606
static Vec< T, SIMD_WIDTH > max(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the maximum of the elements of two Vec's.
Definition base.H:620
static Vec< T, SIMD_WIDTH > load(const T *const p)
Loads a Vec from aligned memory.
Definition base.H:209
static Vec< T, SIMD_WIDTH > loadu(const T *const p)
Loads a Vec from unaligned memory.
Definition base.H:231
static void store(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to aligned memory.
Definition base.H:246
static void storeu(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to unaligned memory.
Definition base.H:265
static void transpose(const Vec< T, SIMD_WIDTH > inRows[Vec< T, SIMD_WIDTH >::elems], Vec< T, SIMD_WIDTH > outRows[Vec< T, SIMD_WIDTH >::elems])
Transposes a matrix held in an array of Vec's.
Definition ext.H:3223
static void swizzle(Vec< T, SIMD_WIDTH > v[N])
Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.
Definition base.H:1141
static void unswizzle(Vec< T, SIMD_WIDTH > v[2 *N])
Unswizzle/interleave/convert from SoA to AoS multiple Vec's in-place.
Definition ext.H:3203
static void swizzle2(Vec< T, SIMD_WIDTH > v[2 *N])
Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.
Definition ext.H:3160
static Vec< Tout, SIMD_WIDTH > packs(const Vec< Tin, SIMD_WIDTH > &a, const Vec< Tin, SIMD_WIDTH > &b)
Packs two Vec's into one by converting the elements into the next smaller type with saturation.
Definition base.H:1397
static void extend(const Vec< Tin, SIMD_WIDTH > &vIn, Vec< Tout, SIMD_WIDTH > vOut[sizeof(Tout)/sizeof(Tin)])
Extends the elements of a Vec to a larger or equally sized type.
Definition base.H:1423
static void convert(const Vec< Tin, SIMD_WIDTH > inVecs[numInVecs< Tout, Tin >()], Vec< Tout, SIMD_WIDTH > outVecs[numOutVecs< Tout, Tin >()])
Converts (potentially multiple) Vec's between different types.
Definition ext.H:676
Namespace for T-SIMD.
Definition time_measurement.H:161