T-SIMD v31.1.0
A C++ template SIMD library
Loading...
Searching...
No Matches
vecs.H
1// ===========================================================================
2//
3// multiple Vec in a template class plus some functions
4//
5// This source code file is part of the following software:
6//
7// - the low-level C++ template SIMD library
8// - the SIMD implementation of the MinWarping and the 2D-Warping methods
9// for local visual homing.
10//
11// The software is provided based on the accompanying license agreement in the
12// file LICENSE.md.
13// The software is provided "as is" without any warranty by the licensor and
14// without any liability of the licensor, and the software may not be
15// distributed by the licensee; see the license agreement for details.
16//
17// (C) Ralf Möller
18// Computer Engineering
19// Faculty of Technology
20// Bielefeld University
21// www.ti.uni-bielefeld.de
22//
23// ===========================================================================
24
25#pragma once
26#ifndef SIMD_VECS_H_
27#define SIMD_VECS_H_
28
29#include "base.H"
30#include "defs.H"
31#include "ext.H"
32#include "vec.H"
33
34// exclude from doxygen
35// TODO: include in doxygen documentation?
36
37// exclude from doxygen (until endcond)
39
40namespace simd {
41
42// ===========================================================================
43// Vecs: for functions operating on multiple Vec
44// ===========================================================================
45
46// class to compute number of input and output vectors of conversion functions
47// (convert, packs, extend, f*)
48template <typename Tout, typename Tin>
49struct NumVecs
50{
51 static constexpr size_t in =
52 (sizeof(Tout) < sizeof(Tin)) ? (sizeof(Tin) / sizeof(Tout)) : 1;
53 static constexpr size_t out =
54 (sizeof(Tout) > sizeof(Tin)) ? (sizeof(Tout) / sizeof(Tin)) : 1;
55};
56
57// multiple Vec
58
59template <size_t NUM, typename T, size_t SIMD_WIDTH>
60struct Vecs
61{
62 static constexpr size_t vectors = NUM;
63 static constexpr size_t elements = NUM * Vec<T, SIMD_WIDTH>::elements;
64 static constexpr size_t bytes = NUM * Vec<T, SIMD_WIDTH>::bytes;
65 // shorter version:
66 static constexpr size_t vecs = vectors;
67 static constexpr size_t elems = elements;
68
69 Vec<T, SIMD_WIDTH> vec[NUM];
70};
71
72// wrapper functions working on Vecs
73
74template <typename Tout, typename Tin, size_t SIMD_WIDTH>
75static SIMD_INLINE void convert(
76 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &inVecs,
77 Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &outVecs)
78{
79 convert(inVecs.vec, outVecs.vec);
80}
81
82template <typename Tout, typename Tin, size_t SIMD_WIDTH>
83static SIMD_INLINE void fdivmul(
84 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsNum,
85 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsDenom, double fac,
86 Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vecsOut)
87{
88 fdivmul(vecsNum.vec, vecsDenom.vec, fac, vecsOut.vec);
89}
90
91template <typename Tout, typename Tin, size_t SIMD_WIDTH>
92static SIMD_INLINE void fmul(
93 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsIn, double fac,
94 Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vecsOut)
95{
96 fmul(vecsIn.vec, fac, vecsOut.vec);
97}
98
99template <typename Tout, typename Tin, size_t SIMD_WIDTH>
100static SIMD_INLINE void faddmul(
101 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsIn, double off,
102 double fac, Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vecsOut)
103{
104 faddmul(vecsIn.vec, off, fac, vecsOut.vec);
105}
106
107template <typename Tout, typename Tin, size_t SIMD_WIDTH>
108static SIMD_INLINE void fmuladd(
109 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsIn, double fac,
110 double off, Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vecsOut)
111{
112 fmuladd(vecsIn.vec, fac, off, vecsOut.vec);
113}
114
115template <typename Tout, typename Tin, size_t SIMD_WIDTH>
116static SIMD_INLINE void fwaddmul(
117 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsIn1,
118 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &vecsIn2, double w,
119 double fac, Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vecsOut)
120{
121 fwaddmul(vecsIn1.vec, vecsIn2.vec, w, fac, vecsOut.vec);
122}
123
124template <size_t NUM, typename T, size_t SIMD_WIDTH>
125static SIMD_INLINE void load(const T *const p, Vecs<NUM, T, SIMD_WIDTH> &inVecs)
126{
127 load(p, inVecs.vec, inVecs.vectors);
128}
129
130template <size_t NUM, typename T, size_t SIMD_WIDTH>
131static SIMD_INLINE void loadu(const T *const p,
132 Vecs<NUM, T, SIMD_WIDTH> &inVecs)
133{
134 loadu(p, inVecs.vec, inVecs.vectors);
135}
136
137template <size_t NUM, typename T, size_t SIMD_WIDTH>
138static SIMD_INLINE void store(T *const p,
139 const Vecs<NUM, T, SIMD_WIDTH> &outVecs)
140{
141 store(p, outVecs.vec, outVecs.vectors);
142}
143
144template <size_t NUM, typename T, size_t SIMD_WIDTH>
145static SIMD_INLINE void storeu(T *const p,
146 const Vecs<NUM, T, SIMD_WIDTH> &outVecs)
147{
148 storeu(p, outVecs.vec, outVecs.vectors);
149}
150
151template <size_t NUM, typename T, size_t SIMD_WIDTH>
152static SIMD_INLINE void store16(T *const p,
153 const Vecs<NUM, T, SIMD_WIDTH> &outVecs)
154{
155 store16(p, outVecs.vec);
156}
157
158template <size_t NUM, typename T, size_t SIMD_WIDTH>
159static SIMD_INLINE void storeu16(T *const p,
160 const Vecs<NUM, T, SIMD_WIDTH> &outVecs)
161{
162 storeu16(p, outVecs.vec);
163}
164
165template <typename Tout, typename Tin, size_t SIMD_WIDTH>
166static SIMD_INLINE Vec<Tout, SIMD_WIDTH> packs(
167 const Vecs<NumVecs<Tout, Tin>::in, Tin, SIMD_WIDTH> &a)
168{
169 return packs<Tout>(a.vec);
170}
171
172template <typename Tout, typename Tin, size_t SIMD_WIDTH>
173static SIMD_INLINE void extend(
174 const Vec<Tin, SIMD_WIDTH> &vIn,
175 Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> &vOut)
176{
177 extend(vIn, vOut.vec);
178}
179
180// with Vecs as return value
181template <typename Tout, typename Tin, size_t SIMD_WIDTH>
182static SIMD_INLINE Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> extend(
183 const Vec<Tin, SIMD_WIDTH> &vIn)
184{
185 Vecs<NumVecs<Tout, Tin>::out, Tout, SIMD_WIDTH> vOut;
186 extend(vIn, vOut);
187 return vOut;
188}
189
190template <size_t N, typename T, size_t SIMD_WIDTH>
191static SIMD_INLINE void swizzle(Vecs<N, T, SIMD_WIDTH> &v)
192{
193 swizzle<N>(v.vec);
194}
195
196// inRows passed by-value to allow in-place transpose
197// 30. Sep 22 (rm): was called transpose1, moved back to transpose
198template <typename T, size_t SIMD_WIDTH>
199static SIMD_INLINE void transpose(
200 const Vecs<(SIMD_WIDTH / sizeof(T)), T, SIMD_WIDTH> inRows,
201 Vecs<(SIMD_WIDTH / sizeof(T)), T, SIMD_WIDTH> &outRows)
202{
203 transpose(inRows.vec, outRows.vec);
204}
205
206template <typename T, size_t SIMD_WIDTH>
207static SIMD_INLINE void transpose(
208 Vecs<(SIMD_WIDTH / sizeof(T)), T, SIMD_WIDTH> &rows)
209{
210 transpose(rows.vec);
211}
212
213template <size_t N, typename T, size_t SIMD_WIDTH>
214static SIMD_INLINE void swizzle2(Vecs<2 * N, T, SIMD_WIDTH> &v)
215{
216 swizzle2(v.vec);
217}
218
219template <size_t N, typename T, size_t SIMD_WIDTH>
220static SIMD_INLINE void unswizzle(Vecs<2 * N, T, SIMD_WIDTH> &v)
221{
222 unswizzle(v.vec);
223}
224
225template <typename T, size_t SIMD_WIDTH>
226static SIMD_INLINE Vec<T, SIMD_WIDTH> hadd(
227 const Vecs<(SIMD_WIDTH / sizeof(T)), T, SIMD_WIDTH> &v)
228{
229 return hadd(v.vec);
230}
231
232template <typename T, size_t SIMD_WIDTH>
233static SIMD_INLINE Vec<T, SIMD_WIDTH> hadds(
234 const Vecs<(SIMD_WIDTH / sizeof(T)), T, SIMD_WIDTH> &v)
235{
236 return hadds(v.vec);
237}
238
239template <size_t NUM, typename T, size_t SIMD_WIDTH>
240static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH> add(
241 const Vecs<NUM, T, SIMD_WIDTH> &a, const Vecs<NUM, T, SIMD_WIDTH> &b)
242{
243 Vecs<NUM, T, SIMD_WIDTH> res;
244 for (size_t v = 0; v < NUM; v++) res.vec[v] = add(a.vec[v], b.vec[v]);
245 return res;
246}
247
248template <size_t NUM, typename T, size_t SIMD_WIDTH>
249static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH> adds(
250 const Vecs<NUM, T, SIMD_WIDTH> &a, const Vecs<NUM, T, SIMD_WIDTH> &b)
251{
252 Vecs<NUM, T, SIMD_WIDTH> res;
253 for (size_t v = 0; v < NUM; v++) res.vec[v] = adds(a.vec[v], b.vec[v]);
254 return res;
255}
256
257template <size_t NUM, typename T, size_t SIMD_WIDTH>
258static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH> sub(
259 const Vecs<NUM, T, SIMD_WIDTH> &a, const Vecs<NUM, T, SIMD_WIDTH> &b)
260{
261 Vecs<NUM, T, SIMD_WIDTH> res;
262 for (size_t v = 0; v < NUM; v++) res.vec[v] = sub(a.vec[v], b.vec[v]);
263 return res;
264}
265
266template <size_t NUM, typename T, size_t SIMD_WIDTH>
267static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH> subs(
268 const Vecs<NUM, T, SIMD_WIDTH> &a, const Vecs<NUM, T, SIMD_WIDTH> &b)
269{
270 Vecs<NUM, T, SIMD_WIDTH> res;
271 for (size_t v = 0; v < NUM; v++) res.vec[v] = subs(a.vec[v], b.vec[v]);
272 return res;
273}
274
275template <size_t NUM, typename T, size_t SIMD_WIDTH>
276static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH> min(
277 const Vecs<NUM, T, SIMD_WIDTH> &a, const Vecs<NUM, T, SIMD_WIDTH> &b)
278{
279 Vecs<NUM, T, SIMD_WIDTH> res;
280 for (size_t v = 0; v < NUM; v++) res.vec[v] = min(a.vec[v], b.vec[v]);
281 return res;
282}
283
284template <size_t NUM, typename T, size_t SIMD_WIDTH>
285static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH> max(
286 const Vecs<NUM, T, SIMD_WIDTH> &a, const Vecs<NUM, T, SIMD_WIDTH> &b)
287{
288 Vecs<NUM, T, SIMD_WIDTH> res;
289 for (size_t v = 0; v < NUM; v++) res.vec[v] = max(a.vec[v], b.vec[v]);
290 return res;
291}
292
293template <size_t NUM, typename T, size_t SIMD_WIDTH>
294static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH> setzero()
295{
296 Vecs<NUM, T, SIMD_WIDTH> res;
297 for (size_t v = 0; v < NUM; v++) res.vec[v] = setzero<T, SIMD_WIDTH>();
298 return res;
299}
300
301template <size_t NUM, typename T, size_t SIMD_WIDTH>
302static SIMD_INLINE void setzero(Vecs<NUM, T, SIMD_WIDTH> &res)
303{
304 for (size_t v = 0; v < NUM; v++) res.vec[v] = setzero<T, SIMD_WIDTH>();
305}
306
307template <size_t NUM, typename T, size_t SIMD_WIDTH>
308static SIMD_INLINE Vecs<NUM, T, SIMD_WIDTH> set1(T a)
309{
310 Vecs<NUM, T, SIMD_WIDTH> res;
311 for (size_t v = 0; v < NUM; v++) res.vec[v] = set1<T, SIMD_WIDTH>(a);
312 return res;
313}
314
315template <size_t NUM, typename T, size_t SIMD_WIDTH>
316static SIMD_INLINE void set1(Vecs<NUM, T, SIMD_WIDTH> &res, T a)
317{
318 for (size_t v = 0; v < NUM; v++) res.vec[v] = set1<T, SIMD_WIDTH>(a);
319}
320
321// TODO: add more Vecs functions if needed
322
323} // namespace simd
324
326
327#endif
static Vec< T, SIMD_WIDTH > sub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's.
Definition base.H:388
static Vec< T, SIMD_WIDTH > subs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's using saturated arithmetic.
Definition base.H:405
static Vec< T, SIMD_WIDTH > adds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's using saturated arithmetic.
Definition base.H:374
static Vec< T, SIMD_WIDTH > add(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's.
Definition base.H:357
static void fdivmul(const Vec< Tin, SIMD_WIDTH > vecsNum[numInVecs< Tout, Tin >()], const Vec< Tin, SIMD_WIDTH > vecsDenom[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
Divides Vec's element-wise, then multiplies with a constant factor in floating point arithmetic.
Definition ext.H:748
static void fwaddmul(const Vec< Tin, SIMD_WIDTH > vecsIn1[numInVecs< Tout, Tin >()], const Vec< Tin, SIMD_WIDTH > vecsIn2[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > w, dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
Linearly interpolates Vec's element-wise with a constant weight and then scales by a constant factor ...
Definition ext.H:1050
static void fmul(const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
Multiplies Vec's element-wise with a floating point constant in floating point arithmetic.
Definition ext.H:931
static void faddmul(const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > off, dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
Adds a floating point constant to the elements of Vec's, then multiplies with a floating point consta...
Definition ext.H:965
static void fmuladd(const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, dont_deduce< Tfloat > off, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
Multiplies the elements of Vec's with a floating point constant, then adds a floating point constant ...
Definition ext.H:1004
static Vec< T, SIMD_WIDTH > hadds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's with saturation.
Definition base.H:493
static Vec< T, SIMD_WIDTH > hadd(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's.
Definition base.H:477
static Vec< T, SIMD_WIDTH > setzero()
Returns a Vec with all elements set to zero.
Definition base.H:70
static Vec< T, SIMD_WIDTH > set1(const dont_deduce< T > a)
Returns a Vec with all elements set to the same value.
Definition base.H:88
static Vec< T, SIMD_WIDTH > min(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the minimum of the elements of two Vec's.
Definition base.H:606
static Vec< T, SIMD_WIDTH > max(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the maximum of the elements of two Vec's.
Definition base.H:620
static Vec< T, SIMD_WIDTH > load(const T *const p)
Loads a Vec from aligned memory.
Definition base.H:209
static Vec< T, SIMD_WIDTH > loadu(const T *const p)
Loads a Vec from unaligned memory.
Definition base.H:231
static void store(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to aligned memory.
Definition base.H:246
static void storeu(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to unaligned memory.
Definition base.H:265
static void transpose(const Vec< T, SIMD_WIDTH > inRows[Vec< T, SIMD_WIDTH >::elems], Vec< T, SIMD_WIDTH > outRows[Vec< T, SIMD_WIDTH >::elems])
Transposes a matrix held in an array of Vec's.
Definition ext.H:3223
static void swizzle(Vec< T, SIMD_WIDTH > v[N])
Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.
Definition base.H:1141
static void unswizzle(Vec< T, SIMD_WIDTH > v[2 *N])
Unswizzle/interleave/convert from SoA to AoS multiple Vec's in-place.
Definition ext.H:3203
static void swizzle2(Vec< T, SIMD_WIDTH > v[2 *N])
Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.
Definition ext.H:3160
static Vec< Tout, SIMD_WIDTH > packs(const Vec< Tin, SIMD_WIDTH > &a, const Vec< Tin, SIMD_WIDTH > &b)
Packs two Vec's into one by converting the elements into the next smaller type with saturation.
Definition base.H:1397
static void extend(const Vec< Tin, SIMD_WIDTH > &vIn, Vec< Tout, SIMD_WIDTH > vOut[sizeof(Tout)/sizeof(Tin)])
Extends the elements of a Vec to a larger or equally sized type.
Definition base.H:1423
static void convert(const Vec< Tin, SIMD_WIDTH > inVecs[numInVecs< Tout, Tin >()], Vec< Tout, SIMD_WIDTH > outVecs[numOutVecs< Tout, Tin >()])
Converts (potentially multiple) Vec's between different types.
Definition ext.H:676
Namespace for T-SIMD.
Definition time_measurement.H:161