T-SIMD v31.1.0
A C++ template SIMD library
Loading...
Searching...
No Matches
simd Namespace Reference

Namespace for T-SIMD. More...

Detailed Description

Namespace for T-SIMD.

Classes

class  aligned_allocator
 Aligned allocator. More...
 
struct  Decimal
 Class for formatting SIMD types as decimal numbers. More...
 
struct  Format
 Class for generating format strings for printf for SIMD types. More...
 
struct  HAcc
 Iterative horizontal accumulator. Calculates the horizontal accumulation of multiple (Vec<T, SIMD_WIDTH>::elems) Vec's into a single Vec in parallel with the Vec's to be accumulated pushed one by one. More...
 
class  HAccStore
 Iterative horizontal accumulator with store of the result. Calculates the horizontal accumulation of multiple Vec's in parallel with the Vec's to be accumulated pushed one by one. Stores the result of the horizontal accumulation every Vec<T, SIMD_WIDTH>::elems Vec's into memory. More...
 
struct  HAdd
 Horizontal addition class for iterative horizontal accumulation. More...
 
struct  HAdds
 Horizontal saturated addition class for iterative horizontal accumulation. More...
 
struct  HMax
 Horizontal maximum class for iterative horizontal accumulation. More...
 
struct  HMin
 Horizontal minimum class for iterative horizontal accumulation. More...
 
class  Mask
 SIMD mask class consisting of as many bits as the corresponding Vec has elements. More...
 
struct  TypeInfo
 Type information for SIMD types. More...
 
class  Vec
 SIMD vector class, holds multiple elements of the same type. More...
 

Typedefs

template<typename Tout , typename Tin >
using BigEnoughFloat
 Smallest floating point type that is at least as big as the input and output types.
 
using Byte
 Unsigned 8-bit integer.
 
template<typename T >
using dont_deduce
 Helper type to prevent template argument deduction.
 
using Double
 Double-precision floating point number (64-bit)
 
using Float
 Single-precision floating point number (32-bit)
 
using Int
 Signed 32-bit integer.
 
using Long
 Signed 64-bit integer.
 
template<typename Tout , typename Tin >
using NumSIMDVecs
 Alias for NumVecs.
 
using Short
 Signed 16-bit integer.
 
using SignedByte
 Signed 8-bit integer.
 
using SIMDByte
 Alias for Byte.
 
using SIMDBytePtr
 Pointer to SIMDByte.
 
template<typename T >
using SIMDDecimal
 Alias for Decimal.
 
using SIMDFloat
 Alias for Float.
 
using SIMDFloatPtr
 Pointer to SIMDFloat.
 
template<typename T >
using SIMDFormat
 Alias for Format.
 
using SIMDInt
 Alias for Int.
 
using SIMDIntPtr
 Pointer to SIMDInt.
 
template<typename T , size_t SIMD_WIDTH>
using SIMDMask
 Alias for Mask.
 
using SIMDShort
 Alias for Short.
 
using SIMDShortPtr
 Pointer to SIMDShort.
 
using SIMDSignedByte
 Alias for SignedByte.
 
using SIMDSignedBytePtr
 Pointer to SIMDSignedByte.
 
template<typename T >
using SIMDTypeInfo
 Alias for TypeInfo.
 
template<typename T , size_t SIMD_WIDTH>
using SIMDVec
 Alias for Vec.
 
template<size_t NUM, typename T , size_t SIMD_WIDTH>
using SIMDVecs
 Alias for Vecs.
 
using SIMDWord
 Alias for Word.
 
using SIMDWordPtr
 Pointer to SIMDWord.
 
using Word
 Unsigned 16-bit integer.
 

Enumerations

enum class  SortSlope { ASCENDING , DESCENDING }
 Used to indicate the direction of a sort function.
 

Functions

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > abs (const Vec< T, SIMD_WIDTH > &a)
 Computes the absolute value of the elements of a Vec.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > absDiff (const Vec< T, SIMD_WIDTH > &v1, const Vec< T, SIMD_WIDTH > &v2)
 Computes the absolute difference of the elements of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > add (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Adds the elements of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > adds (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Adds the elements of two Vec's using saturated arithmetic.
 
void aligned_free (void *ptr)
 Aligned memory deallocation.
 
void * aligned_malloc (size_t alignment, size_t size)
 Aligned memory allocation.
 
void * aligned_malloc (size_t size)
 Aligned memory allocation aligned to NATIVE_SIMD_WIDTH.
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > alignre (const Vec< T, SIMD_WIDTH > &h, const Vec< T, SIMD_WIDTH > &l)
 Concatenates two Vec's, shifts the result right by a constant number of elements, and returns the low half of the result.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > and_ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Alias for bit_and().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > avg (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Computes the average of the elements of two Vec's, rounded up.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > avgrd (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Computes the average of the elements of two Vecs, rounding down.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > avgru (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Computes the average of the elements of two Vec's, rounded up.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > bit_and (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Computes the bitwise AND of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > bit_andnot (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Computes the bitwise ANDNOT of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > bit_not (const Vec< T, SIMD_WIDTH > &a)
 Computes the bitwise NOT of a Vec.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > bit_or (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Computes the bitwise OR of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > bit_xor (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Computes the bitwise XOR of two Vec's.
 
template<SortSlope SLOPE, typename T , size_t SIMD_WIDTH>
static void bitonicSort (Vec< T, SIMD_WIDTH > vecs[Vec< T, SIMD_WIDTH >::elems])
 Sorts multiple Vec's independently using the bitonic sort algorithm.
 
template<SortSlope SLOPE, typename T , size_t SIMD_WIDTH>
static void bitonicSortSortedPairs (Vec< T, SIMD_WIDTH > vecs[Vec< T, SIMD_WIDTH >::elems])
 Fuses consecutive pairs of sorted Vec's such that the pair is sorted over the two vectors.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > ceil (const Vec< T, SIMD_WIDTH > &a)
 Rounds the elements of a Vec up to the nearest integer.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > cmpeq (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for equality ( == ).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > cmpge (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for greater-than-or-equal ( >= ).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > cmpgt (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for greater-than ( > ).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > cmple (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for less-than-or-equal ( <= ).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > cmplt (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for less-than ( < ).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > cmpneq (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for inequality ( != ).
 
template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static void convert (const Vec< Tin, SIMD_WIDTH > inVecs[numInVecs< Tout, Tin >()], Vec< Tout, SIMD_WIDTH > outVecs[numOutVecs< Tout, Tin >()])
 Converts (potentially multiple) Vec's between different types.
 
template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH > cvts (const Vec< Tin, SIMD_WIDTH > &a)
 Converts the elements of a Vec between integer and floating point types of the same size.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > div (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Divides the elements of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > div2r0 (const Vec< T, SIMD_WIDTH > &a)
 Divides all elements of a Vec by 2 and rounds the result to 0.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > div2rd (const Vec< T, SIMD_WIDTH > &a)
 Divides all elements of a Vec by 2 and rounds down the result.
 
template<typename T , size_t SIMD_WIDTH>
static T elem0 (const Vec< T, SIMD_WIDTH > &a)
 Alias for extract<0>().
 
template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static void extend (const Vec< Tin, SIMD_WIDTH > &vIn, Vec< Tout, SIMD_WIDTH > vOut[sizeof(Tout)/sizeof(Tin)])
 Extends the elements of a Vec to a larger or equally sized type.
 
template<size_t INDEX, typename T , size_t SIMD_WIDTH>
static T extract (const Vec< T, SIMD_WIDTH > &a)
 Extracts a single value from a Vec.
 
template<size_t LANE_INDEX, typename T , size_t SIMD_WIDTH>
static Vec< T, 16 > extractLane (const Vec< T, SIMD_WIDTH > &a)
 Extracts a 16-byte lane from a Vec as a Vec < T, 16 >.
 
template<typename Tout , typename Tin , typename Tfloat = BigEnoughFloat<Tout, Tin>, size_t SIMD_WIDTH>
static void faddmul (const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > off, dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
 Adds a floating point constant to the elements of Vec's, then multiplies with a floating point constant in floating point arithmetic.
 
template<size_t DIM, size_t NVEC, typename Tout , typename Tin , size_t SIMD_WIDTH>
static void fdivMsigmoidmul (const Vec< Tin, SIMD_WIDTH > vecsNum[DIM][NVEC], const Vec< Tin, SIMD_WIDTH > vecsDenom[DIM][NVEC], const double w[DIM], const double w0[DIM], double fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
 Special function used in MinWarping.
 
template<typename Tout , typename Tin , typename Tfloat = BigEnoughFloat<Tout, Tin>, size_t SIMD_WIDTH>
static void fdivmul (const Vec< Tin, SIMD_WIDTH > vecsNum[numInVecs< Tout, Tin >()], const Vec< Tin, SIMD_WIDTH > vecsDenom[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
 Divides Vec's element-wise, then multiplies with a constant factor in floating point arithmetic.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > floor (const Vec< T, SIMD_WIDTH > &a)
 Rounds the elements of a Vec down to the nearest integer.
 
template<typename Tout , typename Tin , typename Tfloat = BigEnoughFloat<Tout, Tin>, size_t SIMD_WIDTH>
static void fmul (const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
 Multiplies Vec's element-wise with a floating point constant in floating point arithmetic.
 
template<typename Tout , typename Tin , typename Tfloat = BigEnoughFloat<Tout, Tin>, size_t SIMD_WIDTH>
static void fmuladd (const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, dont_deduce< Tfloat > off, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
 Multiplies the elements of Vec's with a floating point constant, then adds a floating point constant in floating point arithmetic.
 
template<typename T , size_t SIMD_WIDTH>
static void fprint (FILE *f, const char *format, const char *separator, const Vec< T, SIMD_WIDTH > &vec)
 Writes the formatted elements of a Vec to a file separated by a separator string.
 
template<typename T , size_t SIMD_WIDTH>
static void fprint (FILE *f, const char *format, const Vec< T, SIMD_WIDTH > &vec)
 Writes the formatted elements of a Vec to a file.
 
template<typename Tout , typename Tin , typename Tfloat = BigEnoughFloat<Tout, Tin>, size_t SIMD_WIDTH>
static void fwaddmul (const Vec< Tin, SIMD_WIDTH > vecsIn1[numInVecs< Tout, Tin >()], const Vec< Tin, SIMD_WIDTH > vecsIn2[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > w, dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
 Linearly interpolates Vec's element-wise with a constant weight and then scales by a constant factor in floating point arithmetic.
 
struct timespec getTimeSpec ()
 Get the current value of this processes CPU time clock.
 
struct timespec getTimeSpecMonotonic ()
 Get the current value of the system-wide real-time clock.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > hadd (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Horizontally adds adjacent elements of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static T hadd (const Vec< T, SIMD_WIDTH > &v)
 Adds all elements of a Vec.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > hadd (const Vec< T, SIMD_WIDTH > v[Vec< T, SIMD_WIDTH >::elems])
 Sums the elements of multiple Vec's independently and returns a Vec with the results.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > hadds (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Horizontally adds adjacent elements of two Vec's with saturation.
 
template<typename T , size_t SIMD_WIDTH>
static T hadds (const Vec< T, SIMD_WIDTH > &v)
 Adds all elements of a Vec using saturated arithmetic.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > hadds (const Vec< T, SIMD_WIDTH > v[Vec< T, SIMD_WIDTH >::elems])
 Sums the elements of multiple Vec's independently using saturated arithmetic and returns a Vec with the results.
 
template<typename T , size_t SIMD_WIDTH>
static T hmax (const Vec< T, SIMD_WIDTH > &v)
 Calculates the maximum of all elements of a Vec.
 
template<typename T , size_t SIMD_WIDTH>
static T hmin (const Vec< T, SIMD_WIDTH > &v)
 Calculates the minimum of all elements of a Vec.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > hsub (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Horizontally subtracts adjacent elements of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > hsubs (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Horizontally subtracts adjacent elements of two Vec's with saturation.
 
template<typename Tcond , typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > ifelse (const Vec< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal, const Vec< T, SIMD_WIDTH > &falseVal)
 Selects elements from two Vec's based on a condition Vec.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > int2bits (const uint64_t a)
 Sets all bits of each element of a Vec to the corresponding bit of an integer.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > int2msb (const uint64_t a)
 Sets the most significant bit of each element of a Vec to the corresponding bit of an integer.
 
template<typename T , size_t SIMD_WIDTH>
static simd::Vec< T, SIMD_WIDTH > integrate (const simd::Vec< T, SIMD_WIDTH > &v)
 Integrates the values of a Vec.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > iota ()
 Creates a Vec with sequentially increasing numbers, starting with 0.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > kadd (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
 Adds two Mask's together as if they were integers.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > kand (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
 Computes the bitwise AND of two Mask's.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > kandn (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
 Computes bitwise ANDNOT of two Mask's.
 
template<typename T , size_t SIMD_WIDTH>
static bool kcmpeq (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
 Tests if all bits of two Mask's are equal.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > knot (const Mask< T, SIMD_WIDTH > &a)
 Computes the bitwise NOT of a Mask.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > kor (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
 Computes the bitwise OR of two Mask's.
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > kshiftli (const Mask< T, SIMD_WIDTH > &a)
 Shifts the bits of a Mask to the left by a constant number of bits.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > kshiftli (const Mask< T, SIMD_WIDTH > &a, const uint64_t count)
 Shifts the bits of a Mask to the left by a variable number of bits.
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > kshiftri (const Mask< T, SIMD_WIDTH > &a)
 Shifts the bits of a Mask to the right by a constant number of bits.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > kshiftri (const Mask< T, SIMD_WIDTH > &a, const uint64_t count)
 Shifts the bits of a Mask to the right by a variable number of bits.
 
template<typename T , size_t SIMD_WIDTH>
static bool ktest_all_ones (const Mask< T, SIMD_WIDTH > &a)
 Tests if all bits of a Mask are set to true.
 
template<typename T , size_t SIMD_WIDTH>
static bool ktest_all_zeros (const Mask< T, SIMD_WIDTH > &a)
 Tests if all bits of a Mask are set to false.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > kxnor (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
 Computes the bitwise XNOR of two Mask's.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > kxor (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
 Computes the bitwise XOR of two Mask's.
 
static void lfence ()
 Load fence.
 
template<size_t SIMD_WIDTH, typename T >
static Vec< T, SIMD_WIDTH > load (const T *const p)
 Loads a Vec from aligned memory.
 
template<typename T , size_t SIMD_WIDTH>
static void load (const T *const p, Vec< T, SIMD_WIDTH > inVecs[], size_t numInVecs)
 Loads multiple successive Vec's from aligned memory.
 
template<size_t SIMD_WIDTH, typename T >
static void load_store (const T *const src, T *const dst)
 Copies a single Vec from one aligned memory location to another aligned memory location.
 
template<size_t SIMD_WIDTH, typename T >
static void load_storeu (const T *const src, T *const dst)
 Copies a single Vec from one aligned memory location to another unaligned memory location.
 
template<size_t SIMD_WIDTH, typename T >
static Vec< T, SIMD_WIDTH > loadu (const T *const p)
 Loads a Vec from unaligned memory.
 
template<typename T , size_t SIMD_WIDTH>
static void loadu (const T *const p, Vec< T, SIMD_WIDTH > inVecs[], size_t numInVecs)
 Loads multiple successive Vec's from unaligned memory.
 
template<size_t SIMD_WIDTH, typename T >
static void loadu_store (const T *const src, T *const dst)
 Copies a single Vec from one unaligned memory location to another aligned memory location.
 
template<size_t SIMD_WIDTH, typename T >
static void loadu_storeu (const T *const src, T *const dst)
 Copies a single Vec from one unaligned memory location to another unaligned memory location.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_abs (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of abs(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_add (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of add(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_adds (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of adds(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_all_ones ()
 Creates a Mask with all elements set to true.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_and (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of bit_and(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_andnot (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of bit_andnot(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_avg (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of avg(const Vec<T, SIMD_WIDTH> &a, const Vec<T, SIMD_WIDTH> &b).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_ceil (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of ceil(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmpeq (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked comparison between corresponding elements of two Vec's for equality ( == ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmpeq (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for equality ( == ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmpge (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked comparison between corresponding elements of two Vec's for greater-than-or-equal ( >= ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmpge (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for greater-than-or-equal ( >= ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmpgt (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked comparison between corresponding elements of two Vec's for greater-than ( > ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmpgt (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for greater-than ( > ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmple (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked comparison between corresponding elements of two Vec's for less-than-or-equal ( <= ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmple (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for less-than-or-equal ( <= ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmplt (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked comparison between corresponding elements of two Vec's for less-than ( < ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmplt (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for less-than ( < ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmpneq (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked comparison between corresponding elements of two Vec's for inequality ( != ).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_cmpneq (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Compares corresponding elements of two Vec's for inequality ( != ).
 
template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH > mask_cvts (const Vec< Tout, SIMD_WIDTH > &src, const Mask< Tin, SIMD_WIDTH > &k, const Vec< Tin, SIMD_WIDTH > &a)
 Masked version of cvts(const Vec<Tin, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_div (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of div(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_div2r0 (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of div2r0(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_div2rd (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of div2rd(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_floor (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of floor(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_hadd (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of hadd(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_hadds (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of hadds(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_hsub (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of hsub(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_hsubs (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of hsubs(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename Tcond , typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_ifelse (const Mask< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal, const Vec< T, SIMD_WIDTH > &falseVal)
 Selects elements from two Vec's based on a condition Mask.
 
template<typename Tcond , typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_ifelsezero (const Mask< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal)
 Selects elements from a Vec and zero based on a condition Mask.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_load (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const T *const p)
 Masked versions of load(const T *const).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_loadu (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const T *const p)
 Masked version of loadu(const T *const).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_max (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of max(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_min (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of min(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_mul (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of mul(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_neg (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of neg(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_not (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of bit_not(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_or (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of bit_or(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_rcp (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of rcp(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_round (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of round(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_rsqrt (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of rsqrt(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_set1 (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const T a)
 Masked version of set1(const T).
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_set_false_high (const size_t x)
 Sets the upper x bits of a Mask to false.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_set_false_low (const size_t x)
 Sets the lower x bits of a Mask to false.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_set_true_high (const size_t x)
 Sets the upper x bits of a Mask to true.
 
template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH > mask_set_true_low (const size_t x)
 Sets the lower x bits of a Mask to true.
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_slli (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of slli(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_sqrt (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of sqrt(const Vec<T, SIMD_WIDTH> &).
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_srai (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of srai(const Vec<T, SIMD_WIDTH> &).
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_srli (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of srli(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static void mask_store (T *const p, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of store(T *const, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static void mask_storeu (T *const p, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of storeu(T *const, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_sub (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of sub(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_subs (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of subs(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static bool mask_test_all_ones (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Tests if all bits of all elements of an Vec are one, while ignoring elements where the corresponding bit in an Mask is zero.
 
template<typename T , size_t SIMD_WIDTH>
static bool mask_test_all_zeros (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Tests if all elements of an Vec are zero, while ignoring elements where the corresponding bit in an Mask is zero.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_truncate (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Masked version of truncate(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mask_xor (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Masked version of bit_xor(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_abs (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of abs(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_add (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of add(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_adds (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of adds(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_and (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of bit_and(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_andnot (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of bit_andnot(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_avg (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of avg(const Vec<T, SIMD_WIDTH> &a, const Vec<T, SIMD_WIDTH> &b).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_ceil (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of ceil(const Vec<T, SIMD_WIDTH> &).
 
template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH > maskz_cvts (const Mask< Tin, SIMD_WIDTH > &k, const Vec< Tin, SIMD_WIDTH > &a)
 Zero-masked version of cvts(const Vec<Tin, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_div (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of div(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_div2r0 (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of div2r0(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_div2rd (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of div2rd(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_floor (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of floor(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_hadd (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of hadd(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_hadds (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of hadds(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_hsub (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of hsub(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_hsubs (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of hsubs(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_load (const Mask< T, SIMD_WIDTH > &k, const T *const p)
 Zero-masked version of load(const T *const).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_loadu (const Mask< T, SIMD_WIDTH > &k, const T *const p)
 Zero-masked version of loadu(const T *const).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_max (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of max(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_min (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of min(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_mul (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of mul(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_neg (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of neg(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_not (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of bit_not(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_or (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of bit_or(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_rcp (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of rcp(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_round (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of round(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_rsqrt (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of rsqrt(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_set1 (const Mask< T, SIMD_WIDTH > &k, const T a)
 Zero-masked version of set1(const T).
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_slli (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of slli(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_sqrt (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of sqrt(const Vec<T, SIMD_WIDTH> &).
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_srai (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of srai(const Vec<T, SIMD_WIDTH> &).
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_srli (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of srli(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_sub (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of sub(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_subs (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of subs(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_truncate (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
 Zero-masked version of truncate(const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > maskz_xor (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Zero-masked version of bit_xor(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > max (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Computes the maximum of the elements of two Vec's.
 
static void mfence ()
 Full memory fence.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > min (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Computes the minimum of the elements of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static uint64_t msb2int (const Vec< T, SIMD_WIDTH > &a)
 Collects the most significant bit of each element of a Vec into an integer.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > mul (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Multiplies the elements of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > neg (const Vec< T, SIMD_WIDTH > &a)
 Negates the elements of a Vec.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > not_ (const Vec< T, SIMD_WIDTH > &a)
 Alias for bit_not().
 
template<typename Tout , typename Tin >
static constexpr size_t numInputSIMDVecs ()
 Alias for numInVecs().
 
template<typename Tout , typename Tin >
static constexpr size_t numInVecs ()
 Number of input vectors for functions that potentially change the size of the elements but not the number of elements.
 
template<typename Tout , typename Tin >
static constexpr size_t numOutputSIMDVecs ()
 Alias for numOutVecs().
 
template<typename Tout , typename Tin >
static constexpr size_t numOutVecs ()
 Number of output vectors for functions that potentially change the size of the elements but not the number of elements.
 
template<typename T , size_t SIMD_WIDTH>
static constexpr size_t numSIMDVecElements ()
 Alias for Vec<T, SIMD_WIDTH>::elems.
 
template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static constexpr size_t numSIMDVecsElements ()
 Number of elements in all input vectors for functions that potentially change the size of the elements but not the number of elements.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator!= (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Not equal to operator. Maps to cmpneq().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator& (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Bitwise AND operator. Maps to bit_and().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator&= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Bitwise AND assignment operator. Maps to bit_and().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator* (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Multiplication operator. Maps to mul().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator*= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Multiplication assignment operator. Maps to mul().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator+ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Addition operator. Maps to adds().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator+= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Addition assignment operator. Maps to adds().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator- (const Vec< T, SIMD_WIDTH > &a)
 Negation operator. Maps to neg().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator- (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Subtraction operator. Maps to subs().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator-= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Subtraction assignment operator. Maps to subs().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator/ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Division operator. Maps to div().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator/= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Division assignment operator. Maps to div().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator< (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Less than operator. Maps to cmplt().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator<= (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Less than or equal operator. Maps to cmple().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator== (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Equal to operator. Maps to cmpeq().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator> (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Greater than operator. Maps to cmpgt().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator>= (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Greater than or equal operator. Maps to cmpge().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator^ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Bitwise XOR operator. Maps to bit_xor().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator^= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Bitwise XOR assignment operator. Maps to bit_xor().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator| (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Bitwise OR operator. Maps to bit_or().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator|= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Bitwise OR assignment operator. Maps to bit_or().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > operator~ (const Vec< T, SIMD_WIDTH > &a)
 Bitwise NOT operator. Maps to bit_not().
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > or_ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Alias for bit_or().
 
template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH > packs (const Vec< Tin, SIMD_WIDTH > &a, const Vec< Tin, SIMD_WIDTH > &b)
 Packs two Vec's into one by converting the elements into the next smaller type with saturation.
 
template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH > packs (const Vec< Tin, SIMD_WIDTH > a[sizeof(Tin)/sizeof(Tout)])
 Packs multiple Vec's into a single Vec by converting the elements into smaller or equally sized types.
 
template<typename T , size_t SIMD_WIDTH>
static void print (const char *format, const char *separator, const Vec< T, SIMD_WIDTH > &vec)
 Writes the formatted elements of a Vec to stdout separated by a separator string.
 
template<typename T , size_t SIMD_WIDTH>
static void print (const char *format, const Vec< T, SIMD_WIDTH > &vec)
 Writes the formatted elements of a Vec to stdout.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > rcp (const Vec< T, SIMD_WIDTH > &a)
 Computes the approximate reciprocal of the elements of a Vec.
 
template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH > reinterpret (const Vec< Tin, SIMD_WIDTH > &a)
 Reinterprets a given Vec as a Vec with a different element type.
 
template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Mask< Tout, SIMD_WIDTH > reinterpret_mask (const Mask< Tin, SIMD_WIDTH > &a)
 Reinterprets a Mask of one type as a Mask of another type.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > reverse (const Vec< T, SIMD_WIDTH > &a)
 Reverses the order of the elements of a Vec.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > round (const Vec< T, SIMD_WIDTH > &a)
 Rounds the elements of a Vec to the nearest integer.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > rsqrt (const Vec< T, SIMD_WIDTH > &a)
 Computes the approximate reciprocal square root of the elements of a Vec.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > set1 (const dont_deduce< T > a)
 Returns a Vec with all elements set to the same value.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > setmax ()
 Sets all elements of a Vec to the maximum value of the element type.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > setmin ()
 Sets all elements of a Vec to the minimum value of the element type.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > setnegunity ()
 Sets all elements of a Vec to the value -1.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > setones ()
 Sets all bits of a Vec to 1.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > setunity ()
 Sets all elements of a Vec to the value 1.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > setzero ()
 Returns a Vec with all elements set to zero.
 
static void sfence ()
 Store fence.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > sign (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Negates the elements of a Vec of floating-point numbers where the corresponding element of a second Vec of floating-point numbers is negative.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > sll (const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
 Shifts the elements of a Vec left by a variable number of bits while shifting in zeros.
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > slle (const Vec< T, SIMD_WIDTH > &a)
 Shifts a Vec left by a constant number of elements, shifting in zero elements.
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > slli (const Vec< T, SIMD_WIDTH > &a)
 Shifts the elements of a Vec left by a constant number of bits while shifting in zeros.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > sqrt (const Vec< T, SIMD_WIDTH > &a)
 Computes the square root of the elements of a Vec.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > sra (const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
 Shifts the elements of a Vec right by a variable number of bits while shifting in the sign bit.
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > srai (const Vec< T, SIMD_WIDTH > &a)
 Shifts the elements of a Vec right by a constant number of bits while shifting in the sign bit.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > srl (const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
 Shifts the elements of a Vec right by a variable number of bits while shifting in zeros.
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > srle (const Vec< T, SIMD_WIDTH > &a)
 Shifts a Vec right by a constant number of elements, shifting in zero elements.
 
template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > srli (const Vec< T, SIMD_WIDTH > &a)
 Shifts the elements of a Vec right by a constant number of bits while shifting in zeros.
 
template<typename T , size_t SIMD_WIDTH>
static void store (T *const p, const Vec< T, SIMD_WIDTH > &a)
 Stores a Vec to aligned memory.
 
template<typename T , size_t SIMD_WIDTH>
static void store (T *const p, const Vec< T, SIMD_WIDTH > &outVec, size_t numOutVecs)
 Stores a single Vec multiple times to aligned memory.
 
template<typename T , size_t SIMD_WIDTH>
static void store (T *const p, const Vec< T, SIMD_WIDTH > outVecs[], size_t numOutVecs)
 Stores multiple successive Vec's to aligned memory.
 
template<typename T , size_t SIMD_WIDTH>
static void storeu (T *const p, const Vec< T, SIMD_WIDTH > &a)
 Stores a Vec to unaligned memory.
 
template<typename T , size_t SIMD_WIDTH>
static void storeu (T *const p, const Vec< T, SIMD_WIDTH > &outVec, size_t numOutVecs)
 Stores a single Vec multiple times to unaligned memory.
 
template<typename T , size_t SIMD_WIDTH>
static void storeu (T *const p, const Vec< T, SIMD_WIDTH > outVecs[], size_t numOutVecs)
 Stores multiple successive Vec's to unaligned memory.
 
template<typename T , size_t SIMD_WIDTH>
static void stream_store (T *const p, const Vec< T, SIMD_WIDTH > &a)
 Stores a Vec to aligned memory using a non-temporal memory hint.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > sub (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Subtracts the elements of two Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > subs (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Subtracts the elements of two Vec's using saturated arithmetic.
 
template<size_t N, typename T , size_t SIMD_WIDTH>
static void swizzle (Vec< T, SIMD_WIDTH > v[N])
 Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.
 
template<size_t N, typename T , size_t SIMD_WIDTH>
static void swizzle2 (Vec< T, SIMD_WIDTH > v[2 *N])
 Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.
 
template<typename T , size_t SIMD_WIDTH>
static bool test_all_ones (const Vec< T, SIMD_WIDTH > &a)
 Tests if all bits of a Vec are one.
 
template<typename T , size_t SIMD_WIDTH>
static bool test_all_zeros (const Vec< T, SIMD_WIDTH > &a)
 Tests if all bits of a Vec are zero.
 
int timespec_subtract (const struct timespec &xx, const struct timespec &yy, struct timespec &result)
 Subtracts two timespecs.
 
double timespec_usec (const struct timespec &x)
 Converts a timespec to microseconds. Intended for results of timespec_subtract.
 
long int timeSpecDiffNsec (const struct timespec &x, const struct timespec &y)
 Computes the difference between two timespec's in nanoseconds.
 
double timeSpecDiffUsec (const struct timespec &x, const struct timespec &y)
 Computes the difference between two timespec's in microseconds.
 
template<typename T , size_t SIMD_WIDTH>
static void transpose (const Vec< T, SIMD_WIDTH > inRows[Vec< T, SIMD_WIDTH >::elems], Vec< T, SIMD_WIDTH > outRows[Vec< T, SIMD_WIDTH >::elems])
 Transposes a matrix held in an array of Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static void transpose (Vec< T, SIMD_WIDTH > rows[Vec< T, SIMD_WIDTH >::elems])
 Transposes a matrix held in an array of Vec's.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > truncate (const Vec< T, SIMD_WIDTH > &a)
 Truncates the elements of a Vec to the nearest integer i.e. rounds towards zero.
 
template<size_t PART, size_t NUM_ELEMS, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > unpack (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Interleaves blocks of elements from the high or low half of two Vec's.
 
template<size_t PART, size_t NUM_ELEMS, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > unpack16 (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Interleaves blocks of elements from the high or low half of each 16-byte lane of two Vec's.
 
template<size_t N, typename T , size_t SIMD_WIDTH>
static void unswizzle (Vec< T, SIMD_WIDTH > v[2 *N])
 Unswizzle/interleave/convert from SoA to AoS multiple Vec's in-place.
 
template<size_t NUM_ELEMS, typename T , size_t SIMD_WIDTH>
static void unzip (const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
 Deinterleaves blocks of elements two Vec's.
 
template<size_t LENGTH, SortSlope SLOPE, typename T , size_t SIMD_WIDTH>
static void verticalBitonicSort (T data[LENGTH])
 Sorts data vector using vertical version of bitonic sort. Assumes that data size is a power of 2 times the number of elements in a SIMD vector squared; if not, a static assertion is raised. Note: This function has not been subjected to auto-tests.
 
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > xor_ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
 Alias for bit_xor().
 
template<size_t NUM_ELEMS, typename T , size_t SIMD_WIDTH>
static void zip (const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
 Interleaves blocks of elements of two Vec's.
 
template<size_t NUM_ELEMS, typename T , size_t SIMD_WIDTH>
static void zip16 (const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
 Interleaves blocks of elements of each 16-byte lane of two Vec's.
 

Typedef Documentation

◆ dont_deduce

template<typename T >
using simd::dont_deduce

Helper type to prevent template argument deduction.

This type can be used to prevent template argument deduction for a given type.

Template Parameters
TThe type to prevent deduction for.

◆ NumSIMDVecs

template<typename Tout , typename Tin >
using simd::NumSIMDVecs

Alias for NumVecs.

Deprecated
Use NumVecs instead.

◆ SIMDDecimal

template<typename T >
using simd::SIMDDecimal

Alias for Decimal.

Deprecated
Use Decimal instead.

◆ SIMDFormat

template<typename T >
using simd::SIMDFormat

Alias for Format.

Deprecated
Use Format instead.

◆ SIMDMask

template<typename T , size_t SIMD_WIDTH>
using simd::SIMDMask

Alias for Mask.

Deprecated
Use Mask instead.

◆ SIMDTypeInfo

template<typename T >
using simd::SIMDTypeInfo

Alias for TypeInfo.

Deprecated
Use TypeInfo instead.

◆ SIMDVec

template<typename T , size_t SIMD_WIDTH>
using simd::SIMDVec

Alias for Vec.

Deprecated
Use Vec instead.

◆ SIMDVecs

template<size_t NUM, typename T , size_t SIMD_WIDTH>
using simd::SIMDVecs

Alias for Vecs.

Deprecated
Use Vecs instead.

Function Documentation

◆ and_()

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > simd::and_ ( const Vec< T, SIMD_WIDTH > & a,
const Vec< T, SIMD_WIDTH > & b )
inlinestatic

Alias for bit_and().

Deprecated
Use bit_and() instead.

◆ elem0()

template<typename T , size_t SIMD_WIDTH>
static T simd::elem0 ( const Vec< T, SIMD_WIDTH > & a)
inlinestatic

Alias for extract<0>().

Deprecated
Use extract<0>() instead.

◆ getTimeSpec()

struct timespec simd::getTimeSpec ( )

Get the current value of this processes CPU time clock.

Warning
This clock has a resolution of 100ns on some systems (e.g. Windows).
Returns
current value of this processes CPU time clock

◆ getTimeSpecMonotonic()

struct timespec simd::getTimeSpecMonotonic ( )

Get the current value of the system-wide real-time clock.

Returns
current value of the system-wide real-time clock

◆ not_()

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > simd::not_ ( const Vec< T, SIMD_WIDTH > & a)
inlinestatic

Alias for bit_not().

Deprecated
Use bit_not() instead.

◆ numInputSIMDVecs()

template<typename Tout , typename Tin >
static constexpr size_t simd::numInputSIMDVecs ( )
inlinestaticconstexpr

Alias for numInVecs().

Deprecated
Use numInVecs() instead.

◆ numOutputSIMDVecs()

template<typename Tout , typename Tin >
static constexpr size_t simd::numOutputSIMDVecs ( )
inlinestaticconstexpr

Alias for numOutVecs().

Deprecated
Use numOutVecs() instead.

◆ numSIMDVecElements()

template<typename T , size_t SIMD_WIDTH>
static constexpr size_t simd::numSIMDVecElements ( )
inlinestaticconstexpr

Alias for Vec<T, SIMD_WIDTH>::elems.

Deprecated
Use Vec::elems instead.

◆ or_()

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > simd::or_ ( const Vec< T, SIMD_WIDTH > & a,
const Vec< T, SIMD_WIDTH > & b )
inlinestatic

Alias for bit_or().

Deprecated
Use bit_or() instead.

◆ timespec_subtract()

int simd::timespec_subtract ( const struct timespec & xx,
const struct timespec & yy,
struct timespec & result )

Subtracts two timespecs.

Parameters
[in]xxfirst timespec
[in]yysecond timespec
[out]resultresult of the subtraction
Returns
1 if the difference is negative, otherwise 0

◆ timespec_usec()

double simd::timespec_usec ( const struct timespec & x)

Converts a timespec to microseconds. Intended for results of timespec_subtract.

Parameters
[in]xtimespec to convert
Returns
timespec converted to microseconds

◆ timeSpecDiffNsec()

long int simd::timeSpecDiffNsec ( const struct timespec & x,
const struct timespec & y )

Computes the difference between two timespec's in nanoseconds.

Warning
May overflow on platforms where long is 32 bits if the difference is more than 2 seconds.
Parameters
xfirst timespec
ysecond timespec
Returns
difference between the two timespec's in nanoseconds

◆ timeSpecDiffUsec()

double simd::timeSpecDiffUsec ( const struct timespec & x,
const struct timespec & y )

Computes the difference between two timespec's in microseconds.

Parameters
xfirst timespec
ysecond timespec
Returns
difference between the two timespec's in microseconds

◆ xor_()

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH > simd::xor_ ( const Vec< T, SIMD_WIDTH > & a,
const Vec< T, SIMD_WIDTH > & b )
inlinestatic

Alias for bit_xor().

Deprecated
Use bit_xor() instead.