Namespace for T-SIMD. More...

Detailed Description

Namespace for T-SIMD.

Classes
class	aligned_allocator
	Aligned allocator. More...

struct	Decimal
	Class for formatting SIMD types as decimal numbers. More...

struct	Format
	Class for generating format strings for printf for SIMD types. More...

struct	HAcc
	Iterative horizontal accumulator. Calculates the horizontal accumulation of multiple (Vec<T, SIMD_WIDTH>::elems) Vec's into a single Vec in parallel with the Vec's to be accumulated pushed one by one. More...

class	HAccStore
	Iterative horizontal accumulator with store of the result. Calculates the horizontal accumulation of multiple Vec's in parallel with the Vec's to be accumulated pushed one by one. Stores the result of the horizontal accumulation every Vec<T, SIMD_WIDTH>::elems Vec's into memory. More...

struct	HAdd
	Horizontal addition class for iterative horizontal accumulation. More...

struct	HAdds
	Horizontal saturated addition class for iterative horizontal accumulation. More...

struct	HMax
	Horizontal maximum class for iterative horizontal accumulation. More...

struct	HMin
	Horizontal minimum class for iterative horizontal accumulation. More...

class	Mask
	SIMD mask class consisting of as many bits as the corresponding Vec has elements. More...

struct	TypeInfo
	Type information for SIMD types. More...

class	Vec
	SIMD vector class, holds multiple elements of the same type. More...

Typedefs
template<typename Tout , typename Tin >
using	BigEnoughFloat
	Smallest floating point type that is at least as big as the input and output types.

using	Byte
	Unsigned 8-bit integer.

template<typename T >
using	dont_deduce
	Helper type to prevent template argument deduction.

using	Double
	Double-precision floating point number (64-bit)

using	Float
	Single-precision floating point number (32-bit)

using	Int
	Signed 32-bit integer.

using	Long
	Signed 64-bit integer.

template<typename Tout , typename Tin >
using	NumSIMDVecs
	Alias for NumVecs.

using	Short
	Signed 16-bit integer.

using	SignedByte
	Signed 8-bit integer.

using	SIMDByte
	Alias for Byte.

using	SIMDBytePtr
	Pointer to SIMDByte.

template<typename T >
using	SIMDDecimal
	Alias for Decimal.

using	SIMDFloat
	Alias for Float.

using	SIMDFloatPtr
	Pointer to SIMDFloat.

template<typename T >
using	SIMDFormat
	Alias for Format.

using	SIMDInt
	Alias for Int.

using	SIMDIntPtr
	Pointer to SIMDInt.

template<typename T , size_t SIMD_WIDTH>
using	SIMDMask
	Alias for Mask.

using	SIMDShort
	Alias for Short.

using	SIMDShortPtr
	Pointer to SIMDShort.

using	SIMDSignedByte
	Alias for SignedByte.

using	SIMDSignedBytePtr
	Pointer to SIMDSignedByte.

template<typename T >
using	SIMDTypeInfo
	Alias for TypeInfo.

template<typename T , size_t SIMD_WIDTH>
using	SIMDVec
	Alias for Vec.

template<size_t NUM, typename T , size_t SIMD_WIDTH>
using	SIMDVecs
	Alias for Vecs.

using	SIMDWord
	Alias for Word.

using	SIMDWordPtr
	Pointer to SIMDWord.

using	Word
	Unsigned 16-bit integer.

Enumerations
enum class	SortSlope { ASCENDING , DESCENDING }
	Used to indicate the direction of a sort function.

Functions
template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	abs (const Vec< T, SIMD_WIDTH > &a)
	Computes the absolute value of the elements of a Vec.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	absDiff (const Vec< T, SIMD_WIDTH > &v1, const Vec< T, SIMD_WIDTH > &v2)
	Computes the absolute difference of the elements of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	add (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Adds the elements of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	adds (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Adds the elements of two Vec's using saturated arithmetic.

void	aligned_free (void *ptr)
	Aligned memory deallocation.

void *	aligned_malloc (size_t alignment, size_t size)
	Aligned memory allocation.

void *	aligned_malloc (size_t size)
	Aligned memory allocation aligned to `NATIVE_SIMD_WIDTH`.

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	alignre (const Vec< T, SIMD_WIDTH > &h, const Vec< T, SIMD_WIDTH > &l)
	Concatenates two Vec's, shifts the result right by a constant number of elements, and returns the low half of the result.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	and_ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Alias for bit_and().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	avg (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Computes the average of the elements of two Vec's, rounded up.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	avgrd (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Computes the average of the elements of two Vecs, rounding down.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	avgru (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Computes the average of the elements of two Vec's, rounded up.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	bit_and (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Computes the bitwise AND of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	bit_andnot (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Computes the bitwise ANDNOT of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	bit_not (const Vec< T, SIMD_WIDTH > &a)
	Computes the bitwise NOT of a Vec.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	bit_or (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Computes the bitwise OR of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	bit_xor (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Computes the bitwise XOR of two Vec's.

template<SortSlope SLOPE, typename T , size_t SIMD_WIDTH>
static void	bitonicSort (Vec< T, SIMD_WIDTH > vecs[Vec< T, SIMD_WIDTH >::elems])
	Sorts multiple Vec's independently using the bitonic sort algorithm.

template<SortSlope SLOPE, typename T , size_t SIMD_WIDTH>
static void	bitonicSortSortedPairs (Vec< T, SIMD_WIDTH > vecs[Vec< T, SIMD_WIDTH >::elems])
	Fuses consecutive pairs of sorted Vec's such that the pair is sorted over the two vectors.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	ceil (const Vec< T, SIMD_WIDTH > &a)
	Rounds the elements of a Vec up to the nearest integer.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	cmpeq (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for equality ( `==` ).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	cmpge (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for greater-than-or-equal ( `>=` ).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	cmpgt (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for greater-than ( `>` ).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	cmple (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for less-than-or-equal ( `<=` ).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	cmplt (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for less-than ( `<` ).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	cmpneq (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for inequality ( `!=` ).

template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static void	convert (const Vec< Tin, SIMD_WIDTH > inVecs[numInVecs< Tout, Tin >()], Vec< Tout, SIMD_WIDTH > outVecs[numOutVecs< Tout, Tin >()])
	Converts (potentially multiple) Vec's between different types.

template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH >	cvts (const Vec< Tin, SIMD_WIDTH > &a)
	Converts the elements of a Vec between integer and floating point types of the same size.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	div (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Divides the elements of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	div2r0 (const Vec< T, SIMD_WIDTH > &a)
	Divides all elements of a Vec by 2 and rounds the result to 0.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	div2rd (const Vec< T, SIMD_WIDTH > &a)
	Divides all elements of a Vec by 2 and rounds down the result.

template<typename T , size_t SIMD_WIDTH>
static T	elem0 (const Vec< T, SIMD_WIDTH > &a)
	Alias for extract<0>().

template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static void	extend (const Vec< Tin, SIMD_WIDTH > &vIn, Vec< Tout, SIMD_WIDTH > vOut[sizeof(Tout)/sizeof(Tin)])
	Extends the elements of a Vec to a larger or equally sized type.

template<size_t INDEX, typename T , size_t SIMD_WIDTH>
static T	extract (const Vec< T, SIMD_WIDTH > &a)
	Extracts a single value from a Vec.

template<size_t LANE_INDEX, typename T , size_t SIMD_WIDTH>
static Vec< T, 16 >	extractLane (const Vec< T, SIMD_WIDTH > &a)
	Extracts a 16-byte lane from a Vec as a Vec < T, 16 >.

template<typename Tout , typename Tin , typename Tfloat = BigEnoughFloat<Tout, Tin>, size_t SIMD_WIDTH>
static void	faddmul (const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > off, dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
	Adds a floating point constant to the elements of Vec's, then multiplies with a floating point constant in floating point arithmetic.

template<size_t DIM, size_t NVEC, typename Tout , typename Tin , size_t SIMD_WIDTH>
static void	fdivMsigmoidmul (const Vec< Tin, SIMD_WIDTH > vecsNum[DIM][NVEC], const Vec< Tin, SIMD_WIDTH > vecsDenom[DIM][NVEC], const double w[DIM], const double w0[DIM], double fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
	Special function used in MinWarping.

template<typename Tout , typename Tin , typename Tfloat = BigEnoughFloat<Tout, Tin>, size_t SIMD_WIDTH>
static void	fdivmul (const Vec< Tin, SIMD_WIDTH > vecsNum[numInVecs< Tout, Tin >()], const Vec< Tin, SIMD_WIDTH > vecsDenom[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
	Divides Vec's element-wise, then multiplies with a constant factor in floating point arithmetic.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	floor (const Vec< T, SIMD_WIDTH > &a)
	Rounds the elements of a Vec down to the nearest integer.

template<typename Tout , typename Tin , typename Tfloat = BigEnoughFloat<Tout, Tin>, size_t SIMD_WIDTH>
static void	fmul (const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
	Multiplies Vec's element-wise with a floating point constant in floating point arithmetic.

template<typename Tout , typename Tin , typename Tfloat = BigEnoughFloat<Tout, Tin>, size_t SIMD_WIDTH>
static void	fmuladd (const Vec< Tin, SIMD_WIDTH > vecsIn[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > fac, dont_deduce< Tfloat > off, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
	Multiplies the elements of Vec's with a floating point constant, then adds a floating point constant in floating point arithmetic.

template<typename T , size_t SIMD_WIDTH>
static void	fprint (FILE f, const char format, const char *separator, const Vec< T, SIMD_WIDTH > &vec)
	Writes the formatted elements of a Vec to a file separated by a separator string.

template<typename T , size_t SIMD_WIDTH>
static void	fprint (FILE f, const char format, const Vec< T, SIMD_WIDTH > &vec)
	Writes the formatted elements of a Vec to a file.

template<typename Tout , typename Tin , typename Tfloat = BigEnoughFloat<Tout, Tin>, size_t SIMD_WIDTH>
static void	fwaddmul (const Vec< Tin, SIMD_WIDTH > vecsIn1[numInVecs< Tout, Tin >()], const Vec< Tin, SIMD_WIDTH > vecsIn2[numInVecs< Tout, Tin >()], dont_deduce< Tfloat > w, dont_deduce< Tfloat > fac, Vec< Tout, SIMD_WIDTH > vecsOut[numOutVecs< Tout, Tin >()])
	Linearly interpolates Vec's element-wise with a constant weight and then scales by a constant factor in floating point arithmetic.

struct timespec	getTimeSpec ()
	Get the current value of this processes CPU time clock.

struct timespec	getTimeSpecMonotonic ()
	Get the current value of the system-wide real-time clock.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	hadd (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Horizontally adds adjacent elements of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static T	hadd (const Vec< T, SIMD_WIDTH > &v)
	Adds all elements of a Vec.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	hadd (const Vec< T, SIMD_WIDTH > v[Vec< T, SIMD_WIDTH >::elems])
	Sums the elements of multiple Vec's independently and returns a Vec with the results.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	hadds (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Horizontally adds adjacent elements of two Vec's with saturation.

template<typename T , size_t SIMD_WIDTH>
static T	hadds (const Vec< T, SIMD_WIDTH > &v)
	Adds all elements of a Vec using saturated arithmetic.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	hadds (const Vec< T, SIMD_WIDTH > v[Vec< T, SIMD_WIDTH >::elems])
	Sums the elements of multiple Vec's independently using saturated arithmetic and returns a Vec with the results.

template<typename T , size_t SIMD_WIDTH>
static T	hmax (const Vec< T, SIMD_WIDTH > &v)
	Calculates the maximum of all elements of a Vec.

template<typename T , size_t SIMD_WIDTH>
static T	hmin (const Vec< T, SIMD_WIDTH > &v)
	Calculates the minimum of all elements of a Vec.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	hsub (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Horizontally subtracts adjacent elements of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	hsubs (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Horizontally subtracts adjacent elements of two Vec's with saturation.

template<typename Tcond , typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	ifelse (const Vec< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal, const Vec< T, SIMD_WIDTH > &falseVal)
	Selects elements from two Vec's based on a condition Vec.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	int2bits (const uint64_t a)
	Sets all bits of each element of a Vec to the corresponding bit of an integer.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	int2msb (const uint64_t a)
	Sets the most significant bit of each element of a Vec to the corresponding bit of an integer.

template<typename T , size_t SIMD_WIDTH>
static simd::Vec< T, SIMD_WIDTH >	integrate (const simd::Vec< T, SIMD_WIDTH > &v)
	Integrates the values of a Vec.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	iota ()
	Creates a Vec with sequentially increasing numbers, starting with 0.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	kadd (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
	Adds two Mask's together as if they were integers.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	kand (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
	Computes the bitwise AND of two Mask's.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	kandn (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
	Computes bitwise ANDNOT of two Mask's.

template<typename T , size_t SIMD_WIDTH>
static bool	kcmpeq (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
	Tests if all bits of two Mask's are equal.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	knot (const Mask< T, SIMD_WIDTH > &a)
	Computes the bitwise NOT of a Mask.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	kor (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
	Computes the bitwise OR of two Mask's.

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	kshiftli (const Mask< T, SIMD_WIDTH > &a)
	Shifts the bits of a Mask to the left by a constant number of bits.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	kshiftli (const Mask< T, SIMD_WIDTH > &a, const uint64_t count)
	Shifts the bits of a Mask to the left by a variable number of bits.

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	kshiftri (const Mask< T, SIMD_WIDTH > &a)
	Shifts the bits of a Mask to the right by a constant number of bits.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	kshiftri (const Mask< T, SIMD_WIDTH > &a, const uint64_t count)
	Shifts the bits of a Mask to the right by a variable number of bits.

template<typename T , size_t SIMD_WIDTH>
static bool	ktest_all_ones (const Mask< T, SIMD_WIDTH > &a)
	Tests if all bits of a Mask are set to true.

template<typename T , size_t SIMD_WIDTH>
static bool	ktest_all_zeros (const Mask< T, SIMD_WIDTH > &a)
	Tests if all bits of a Mask are set to false.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	kxnor (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
	Computes the bitwise XNOR of two Mask's.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	kxor (const Mask< T, SIMD_WIDTH > &a, const Mask< T, SIMD_WIDTH > &b)
	Computes the bitwise XOR of two Mask's.

static void	lfence ()
	Load fence.

template<size_t SIMD_WIDTH, typename T >
static Vec< T, SIMD_WIDTH >	load (const T *const p)
	Loads a Vec from aligned memory.

template<typename T , size_t SIMD_WIDTH>
static void	load (const T *const p, Vec< T, SIMD_WIDTH > inVecs[], size_t numInVecs)
	Loads multiple successive Vec's from aligned memory.

template<size_t SIMD_WIDTH, typename T >
static void	load_store (const T const src, T const dst)
	Copies a single Vec from one aligned memory location to another aligned memory location.

template<size_t SIMD_WIDTH, typename T >
static void	load_storeu (const T const src, T const dst)
	Copies a single Vec from one aligned memory location to another unaligned memory location.

template<size_t SIMD_WIDTH, typename T >
static Vec< T, SIMD_WIDTH >	loadu (const T *const p)
	Loads a Vec from unaligned memory.

template<typename T , size_t SIMD_WIDTH>
static void	loadu (const T *const p, Vec< T, SIMD_WIDTH > inVecs[], size_t numInVecs)
	Loads multiple successive Vec's from unaligned memory.

template<size_t SIMD_WIDTH, typename T >
static void	loadu_store (const T const src, T const dst)
	Copies a single Vec from one unaligned memory location to another aligned memory location.

template<size_t SIMD_WIDTH, typename T >
static void	loadu_storeu (const T const src, T const dst)
	Copies a single Vec from one unaligned memory location to another unaligned memory location.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_abs (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of abs(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_add (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of add(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_adds (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of adds(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_all_ones ()
	Creates a Mask with all elements set to true.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_and (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of bit_and(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_andnot (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of bit_andnot(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_avg (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of avg(const Vec<T, SIMD_WIDTH> &a, const Vec<T, SIMD_WIDTH> &b).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_ceil (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of ceil(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmpeq (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked comparison between corresponding elements of two Vec's for equality ( `==` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmpeq (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for equality ( `==` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmpge (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked comparison between corresponding elements of two Vec's for greater-than-or-equal ( `>=` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmpge (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for greater-than-or-equal ( `>=` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmpgt (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked comparison between corresponding elements of two Vec's for greater-than ( `>` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmpgt (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for greater-than ( `>` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmple (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked comparison between corresponding elements of two Vec's for less-than-or-equal ( `<=` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmple (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for less-than-or-equal ( `<=` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmplt (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked comparison between corresponding elements of two Vec's for less-than ( `<` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmplt (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for less-than ( `<` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmpneq (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked comparison between corresponding elements of two Vec's for inequality ( `!=` ).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_cmpneq (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Compares corresponding elements of two Vec's for inequality ( `!=` ).

template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH >	mask_cvts (const Vec< Tout, SIMD_WIDTH > &src, const Mask< Tin, SIMD_WIDTH > &k, const Vec< Tin, SIMD_WIDTH > &a)
	Masked version of cvts(const Vec<Tin, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_div (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of div(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_div2r0 (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of div2r0(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_div2rd (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of div2rd(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_floor (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of floor(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_hadd (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of hadd(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_hadds (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of hadds(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_hsub (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of hsub(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_hsubs (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of hsubs(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename Tcond , typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_ifelse (const Mask< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal, const Vec< T, SIMD_WIDTH > &falseVal)
	Selects elements from two Vec's based on a condition Mask.

template<typename Tcond , typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_ifelsezero (const Mask< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal)
	Selects elements from a Vec and zero based on a condition Mask.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_load (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const T *const p)
	Masked versions of load(const T *const).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_loadu (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const T *const p)
	Masked version of loadu(const T *const).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_max (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of max(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_min (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of min(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_mul (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of mul(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_neg (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of neg(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_not (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of bit_not(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_or (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of bit_or(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_rcp (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of rcp(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_round (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of round(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_rsqrt (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of rsqrt(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_set1 (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const T a)
	Masked version of set1(const T).

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_set_false_high (const size_t x)
	Sets the upper `x` bits of a Mask to false.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_set_false_low (const size_t x)
	Sets the lower `x` bits of a Mask to false.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_set_true_high (const size_t x)
	Sets the upper `x` bits of a Mask to true.

template<typename T , size_t SIMD_WIDTH>
static Mask< T, SIMD_WIDTH >	mask_set_true_low (const size_t x)
	Sets the lower `x` bits of a Mask to true.

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_slli (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of slli(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_sqrt (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of sqrt(const Vec<T, SIMD_WIDTH> &).

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_srai (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of srai(const Vec<T, SIMD_WIDTH> &).

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_srli (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of srli(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static void	mask_store (T *const p, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of store(T *const, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static void	mask_storeu (T *const p, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of storeu(T *const, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_sub (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of sub(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_subs (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of subs(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static bool	mask_test_all_ones (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Tests if all bits of all elements of an Vec are one, while ignoring elements where the corresponding bit in an Mask is zero.

template<typename T , size_t SIMD_WIDTH>
static bool	mask_test_all_zeros (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Tests if all elements of an Vec are zero, while ignoring elements where the corresponding bit in an Mask is zero.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_truncate (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Masked version of truncate(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mask_xor (const Vec< T, SIMD_WIDTH > &src, const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Masked version of bit_xor(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_abs (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of abs(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_add (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of add(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_adds (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of adds(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_and (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of bit_and(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_andnot (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of bit_andnot(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_avg (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of avg(const Vec<T, SIMD_WIDTH> &a, const Vec<T, SIMD_WIDTH> &b).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_ceil (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of ceil(const Vec<T, SIMD_WIDTH> &).

template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH >	maskz_cvts (const Mask< Tin, SIMD_WIDTH > &k, const Vec< Tin, SIMD_WIDTH > &a)
	Zero-masked version of cvts(const Vec<Tin, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_div (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of div(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_div2r0 (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of div2r0(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_div2rd (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of div2rd(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_floor (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of floor(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_hadd (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of hadd(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_hadds (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of hadds(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_hsub (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of hsub(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_hsubs (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of hsubs(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_load (const Mask< T, SIMD_WIDTH > &k, const T *const p)
	Zero-masked version of load(const T *const).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_loadu (const Mask< T, SIMD_WIDTH > &k, const T *const p)
	Zero-masked version of loadu(const T *const).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_max (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of max(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_min (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of min(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_mul (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of mul(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_neg (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of neg(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_not (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of bit_not(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_or (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of bit_or(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_rcp (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of rcp(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_round (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of round(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_rsqrt (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of rsqrt(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_set1 (const Mask< T, SIMD_WIDTH > &k, const T a)
	Zero-masked version of set1(const T).

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_slli (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of slli(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_sqrt (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of sqrt(const Vec<T, SIMD_WIDTH> &).

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_srai (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of srai(const Vec<T, SIMD_WIDTH> &).

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_srli (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of srli(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_sub (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of sub(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_subs (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of subs(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_truncate (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a)
	Zero-masked version of truncate(const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	maskz_xor (const Mask< T, SIMD_WIDTH > &k, const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Zero-masked version of bit_xor(const Vec<T, SIMD_WIDTH> &, const Vec<T, SIMD_WIDTH> &).

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	max (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Computes the maximum of the elements of two Vec's.

static void	mfence ()
	Full memory fence.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	min (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Computes the minimum of the elements of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static uint64_t	msb2int (const Vec< T, SIMD_WIDTH > &a)
	Collects the most significant bit of each element of a Vec into an integer.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	mul (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Multiplies the elements of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	neg (const Vec< T, SIMD_WIDTH > &a)
	Negates the elements of a Vec.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	not_ (const Vec< T, SIMD_WIDTH > &a)
	Alias for bit_not().

template<typename Tout , typename Tin >
static constexpr size_t	numInputSIMDVecs ()
	Alias for numInVecs().

template<typename Tout , typename Tin >
static constexpr size_t	numInVecs ()
	Number of input vectors for functions that potentially change the size of the elements but not the number of elements.

template<typename Tout , typename Tin >
static constexpr size_t	numOutputSIMDVecs ()
	Alias for numOutVecs().

template<typename Tout , typename Tin >
static constexpr size_t	numOutVecs ()
	Number of output vectors for functions that potentially change the size of the elements but not the number of elements.

template<typename T , size_t SIMD_WIDTH>
static constexpr size_t	numSIMDVecElements ()
	Alias for Vec<T, SIMD_WIDTH>::elems.

template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static constexpr size_t	numSIMDVecsElements ()
	Number of elements in all input vectors for functions that potentially change the size of the elements but not the number of elements.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator!= (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Not equal to operator. Maps to cmpneq().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator& (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Bitwise AND operator. Maps to bit_and().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator&= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Bitwise AND assignment operator. Maps to bit_and().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator* (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Multiplication operator. Maps to mul().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator*= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Multiplication assignment operator. Maps to mul().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator+ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Addition operator. Maps to adds().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator+= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Addition assignment operator. Maps to adds().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator- (const Vec< T, SIMD_WIDTH > &a)
	Negation operator. Maps to neg().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator- (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Subtraction operator. Maps to subs().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator-= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Subtraction assignment operator. Maps to subs().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator/ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Division operator. Maps to div().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator/= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Division assignment operator. Maps to div().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator< (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Less than operator. Maps to cmplt().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator<= (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Less than or equal operator. Maps to cmple().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator== (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Equal to operator. Maps to cmpeq().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator> (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Greater than operator. Maps to cmpgt().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator>= (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Greater than or equal operator. Maps to cmpge().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator^ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Bitwise XOR operator. Maps to bit_xor().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator^= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Bitwise XOR assignment operator. Maps to bit_xor().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator\| (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Bitwise OR operator. Maps to bit_or().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator\|= (Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Bitwise OR assignment operator. Maps to bit_or().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	operator~ (const Vec< T, SIMD_WIDTH > &a)
	Bitwise NOT operator. Maps to bit_not().

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	or_ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Alias for bit_or().

template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH >	packs (const Vec< Tin, SIMD_WIDTH > &a, const Vec< Tin, SIMD_WIDTH > &b)
	Packs two Vec's into one by converting the elements into the next smaller type with saturation.

template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH >	packs (const Vec< Tin, SIMD_WIDTH > a[sizeof(Tin)/sizeof(Tout)])
	Packs multiple Vec's into a single Vec by converting the elements into smaller or equally sized types.

template<typename T , size_t SIMD_WIDTH>
static void	print (const char format, const char separator, const Vec< T, SIMD_WIDTH > &vec)
	Writes the formatted elements of a Vec to stdout separated by a separator string.

template<typename T , size_t SIMD_WIDTH>
static void	print (const char *format, const Vec< T, SIMD_WIDTH > &vec)
	Writes the formatted elements of a Vec to stdout.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	rcp (const Vec< T, SIMD_WIDTH > &a)
	Computes the approximate reciprocal of the elements of a Vec.

template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Vec< Tout, SIMD_WIDTH >	reinterpret (const Vec< Tin, SIMD_WIDTH > &a)
	Reinterprets a given Vec as a Vec with a different element type.

template<typename Tout , typename Tin , size_t SIMD_WIDTH>
static Mask< Tout, SIMD_WIDTH >	reinterpret_mask (const Mask< Tin, SIMD_WIDTH > &a)
	Reinterprets a Mask of one type as a Mask of another type.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	reverse (const Vec< T, SIMD_WIDTH > &a)
	Reverses the order of the elements of a Vec.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	round (const Vec< T, SIMD_WIDTH > &a)
	Rounds the elements of a Vec to the nearest integer.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	rsqrt (const Vec< T, SIMD_WIDTH > &a)
	Computes the approximate reciprocal square root of the elements of a Vec.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	set1 (const dont_deduce< T > a)
	Returns a Vec with all elements set to the same value.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	setmax ()
	Sets all elements of a Vec to the maximum value of the element type.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	setmin ()
	Sets all elements of a Vec to the minimum value of the element type.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	setnegunity ()
	Sets all elements of a Vec to the value -1.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	setones ()
	Sets all bits of a Vec to 1.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	setunity ()
	Sets all elements of a Vec to the value 1.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	setzero ()
	Returns a Vec with all elements set to zero.

static void	sfence ()
	Store fence.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	sign (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Negates the elements of a Vec of floating-point numbers where the corresponding element of a second Vec of floating-point numbers is negative.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	sll (const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
	Shifts the elements of a Vec left by a variable number of bits while shifting in zeros.

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	slle (const Vec< T, SIMD_WIDTH > &a)
	Shifts a Vec left by a constant number of elements, shifting in zero elements.

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	slli (const Vec< T, SIMD_WIDTH > &a)
	Shifts the elements of a Vec left by a constant number of bits while shifting in zeros.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	sqrt (const Vec< T, SIMD_WIDTH > &a)
	Computes the square root of the elements of a Vec.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	sra (const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
	Shifts the elements of a Vec right by a variable number of bits while shifting in the sign bit.

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	srai (const Vec< T, SIMD_WIDTH > &a)
	Shifts the elements of a Vec right by a constant number of bits while shifting in the sign bit.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	srl (const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
	Shifts the elements of a Vec right by a variable number of bits while shifting in zeros.

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	srle (const Vec< T, SIMD_WIDTH > &a)
	Shifts a Vec right by a constant number of elements, shifting in zero elements.

template<size_t COUNT, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	srli (const Vec< T, SIMD_WIDTH > &a)
	Shifts the elements of a Vec right by a constant number of bits while shifting in zeros.

template<typename T , size_t SIMD_WIDTH>
static void	store (T *const p, const Vec< T, SIMD_WIDTH > &a)
	Stores a Vec to aligned memory.

template<typename T , size_t SIMD_WIDTH>
static void	store (T *const p, const Vec< T, SIMD_WIDTH > &outVec, size_t numOutVecs)
	Stores a single Vec multiple times to aligned memory.

template<typename T , size_t SIMD_WIDTH>
static void	store (T *const p, const Vec< T, SIMD_WIDTH > outVecs[], size_t numOutVecs)
	Stores multiple successive Vec's to aligned memory.

template<typename T , size_t SIMD_WIDTH>
static void	storeu (T *const p, const Vec< T, SIMD_WIDTH > &a)
	Stores a Vec to unaligned memory.

template<typename T , size_t SIMD_WIDTH>
static void	storeu (T *const p, const Vec< T, SIMD_WIDTH > &outVec, size_t numOutVecs)
	Stores a single Vec multiple times to unaligned memory.

template<typename T , size_t SIMD_WIDTH>
static void	storeu (T *const p, const Vec< T, SIMD_WIDTH > outVecs[], size_t numOutVecs)
	Stores multiple successive Vec's to unaligned memory.

template<typename T , size_t SIMD_WIDTH>
static void	stream_store (T *const p, const Vec< T, SIMD_WIDTH > &a)
	Stores a Vec to aligned memory using a non-temporal memory hint.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	sub (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Subtracts the elements of two Vec's.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	subs (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Subtracts the elements of two Vec's using saturated arithmetic.

template<size_t N, typename T , size_t SIMD_WIDTH>
static void	swizzle (Vec< T, SIMD_WIDTH > v[N])
	Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.

template<size_t N, typename T , size_t SIMD_WIDTH>
static void	swizzle2 (Vec< T, SIMD_WIDTH > v[2 *N])
	Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.

template<typename T , size_t SIMD_WIDTH>
static bool	test_all_ones (const Vec< T, SIMD_WIDTH > &a)
	Tests if all bits of a Vec are one.

template<typename T , size_t SIMD_WIDTH>
static bool	test_all_zeros (const Vec< T, SIMD_WIDTH > &a)
	Tests if all bits of a Vec are zero.

int	timespec_subtract (const struct timespec &xx, const struct timespec &yy, struct timespec &result)
	Subtracts two timespecs.

double	timespec_usec (const struct timespec &x)
	Converts a timespec to microseconds. Intended for results of timespec_subtract.

long int	timeSpecDiffNsec (const struct timespec &x, const struct timespec &y)
	Computes the difference between two timespec's in nanoseconds.

double	timeSpecDiffUsec (const struct timespec &x, const struct timespec &y)
	Computes the difference between two timespec's in microseconds.

template<typename T , size_t SIMD_WIDTH>
static void	transpose (const Vec< T, SIMD_WIDTH > inRows[Vec< T, SIMD_WIDTH >::elems], Vec< T, SIMD_WIDTH > outRows[Vec< T, SIMD_WIDTH >::elems])
	Transposes a matrix held in an array of Vec's.

template<typename T , size_t SIMD_WIDTH>
static void	transpose (Vec< T, SIMD_WIDTH > rows[Vec< T, SIMD_WIDTH >::elems])
	Transposes a matrix held in an array of Vec's.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	truncate (const Vec< T, SIMD_WIDTH > &a)
	Truncates the elements of a Vec to the nearest integer i.e. rounds towards zero.

template<size_t PART, size_t NUM_ELEMS, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	unpack (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Interleaves blocks of elements from the high or low half of two Vec's.

template<size_t PART, size_t NUM_ELEMS, typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	unpack16 (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Interleaves blocks of elements from the high or low half of each 16-byte lane of two Vec's.

template<size_t N, typename T , size_t SIMD_WIDTH>
static void	unswizzle (Vec< T, SIMD_WIDTH > v[2 *N])
	Unswizzle/interleave/convert from SoA to AoS multiple Vec's in-place.

template<size_t NUM_ELEMS, typename T , size_t SIMD_WIDTH>
static void	unzip (const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
	Deinterleaves blocks of elements two Vec's.

template<size_t LENGTH, SortSlope SLOPE, typename T , size_t SIMD_WIDTH>
static void	verticalBitonicSort (T data[LENGTH])
	Sorts data vector using vertical version of bitonic sort. Assumes that data size is a power of 2 times the number of elements in a SIMD vector squared; if not, a static assertion is raised. Note: This function has not been subjected to auto-tests.

template<typename T , size_t SIMD_WIDTH>
static Vec< T, SIMD_WIDTH >	xor_ (const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
	Alias for bit_xor().

template<size_t NUM_ELEMS, typename T , size_t SIMD_WIDTH>
static void	zip (const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
	Interleaves blocks of elements of two Vec's.

template<size_t NUM_ELEMS, typename T , size_t SIMD_WIDTH>
static void	zip16 (const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
	Interleaves blocks of elements of each 16-byte lane of two Vec's.

Typedef Documentation

◆ dont_deduce

template<typename T >

using simd::dont_deduce

Helper type to prevent template argument deduction.

This type can be used to prevent template argument deduction for a given type.

Template Parameters

T	The type to prevent deduction for.

◆ NumSIMDVecs

template<typename Tout , typename Tin >

using simd::NumSIMDVecs

Alias for NumVecs.

Deprecated: Use NumVecs instead.

◆ SIMDDecimal

template<typename T >

using simd::SIMDDecimal

Alias for Decimal.

Deprecated: Use Decimal instead.

◆ SIMDFormat

template<typename T >

using simd::SIMDFormat

Alias for Format.

Deprecated: Use Format instead.

◆ SIMDMask

template<typename T , size_t SIMD_WIDTH>

using simd::SIMDMask

Alias for Mask.

Deprecated: Use Mask instead.

◆ SIMDTypeInfo

template<typename T >

using simd::SIMDTypeInfo

Alias for TypeInfo.

Deprecated: Use TypeInfo instead.

◆ SIMDVec

template<typename T , size_t SIMD_WIDTH>

using simd::SIMDVec

Alias for Vec.

Deprecated: Use Vec instead.

◆ SIMDVecs

template<size_t NUM, typename T , size_t SIMD_WIDTH>

using simd::SIMDVecs

Alias for Vecs.

Deprecated: Use Vecs instead.

Function Documentation

◆ and_()

template<typename T , size_t SIMD_WIDTH>

static Vec< T, SIMD_WIDTH > simd::and_	(	const Vec< T, SIMD_WIDTH > &	a,
		const Vec< T, SIMD_WIDTH > &	b )

inlinestatic

Alias for bit_and().

Deprecated: Use bit_and() instead.

◆ elem0()

template<typename T , size_t SIMD_WIDTH>

static T simd::elem0 ( const Vec< T, SIMD_WIDTH > & a )

inlinestatic

Alias for extract<0>().

Deprecated: Use extract<0>() instead.

◆ getTimeSpec()

struct timespec simd::getTimeSpec ( )

Get the current value of this processes CPU time clock.

Warning: This clock has a resolution of 100ns on some systems (e.g. Windows).

Returns: current value of this processes CPU time clock

◆ getTimeSpecMonotonic()

struct timespec simd::getTimeSpecMonotonic ( )

Get the current value of the system-wide real-time clock.

Returns: current value of the system-wide real-time clock

◆ not_()

template<typename T , size_t SIMD_WIDTH>

static Vec< T, SIMD_WIDTH > simd::not_ ( const Vec< T, SIMD_WIDTH > & a )

inlinestatic

Alias for bit_not().

Deprecated: Use bit_not() instead.

◆ numInputSIMDVecs()

template<typename Tout , typename Tin >

static constexpr size_t simd::numInputSIMDVecs ( )

inlinestaticconstexpr

Alias for numInVecs().

Deprecated: Use numInVecs() instead.

◆ numOutputSIMDVecs()

template<typename Tout , typename Tin >

static constexpr size_t simd::numOutputSIMDVecs ( )

inlinestaticconstexpr

Alias for numOutVecs().

Deprecated: Use numOutVecs() instead.

◆ numSIMDVecElements()

template<typename T , size_t SIMD_WIDTH>

static constexpr size_t simd::numSIMDVecElements ( )

inlinestaticconstexpr

Alias for Vec<T, SIMD_WIDTH>::elems.

Deprecated: Use Vec::elems instead.

◆ or_()

template<typename T , size_t SIMD_WIDTH>

static Vec< T, SIMD_WIDTH > simd::or_	(	const Vec< T, SIMD_WIDTH > &	a,
		const Vec< T, SIMD_WIDTH > &	b )

inlinestatic

Alias for bit_or().

Deprecated: Use bit_or() instead.

◆ timespec_subtract()

int simd::timespec_subtract	(	const struct timespec &	xx,
		const struct timespec &	yy,
		struct timespec &	result )

Subtracts two timespecs.

Parameters

[in]	xx	first timespec
[in]	yy	second timespec
[out]	result	result of the subtraction

Returns: 1 if the difference is negative, otherwise 0

◆ timespec_usec()

double simd::timespec_usec ( const struct timespec & x )

Converts a timespec to microseconds. Intended for results of timespec_subtract.

Parameters

[in] x timespec to convert

Returns: timespec converted to microseconds

◆ timeSpecDiffNsec()

long int simd::timeSpecDiffNsec	(	const struct timespec &	x,
		const struct timespec &	y )

Computes the difference between two timespec's in nanoseconds.

Warning: May overflow on platforms where long is 32 bits if the difference is more than 2 seconds.

Parameters

x	first timespec
y	second timespec

Returns: difference between the two timespec's in nanoseconds

◆ timeSpecDiffUsec()

double simd::timeSpecDiffUsec	(	const struct timespec &	x,
		const struct timespec &	y )

Computes the difference between two timespec's in microseconds.

Parameters

x	first timespec
y	second timespec

Returns: difference between the two timespec's in microseconds

◆ xor_()

template<typename T , size_t SIMD_WIDTH>

static Vec< T, SIMD_WIDTH > simd::xor_	(	const Vec< T, SIMD_WIDTH > &	a,
		const Vec< T, SIMD_WIDTH > &	b )

inlinestatic

Alias for bit_xor().

Deprecated: Use bit_xor() instead.

Detailed Description

Classes

Typedefs

Enumerations

Functions

Typedef Documentation

◆ dont_deduce

◆ NumSIMDVecs

◆ SIMDDecimal

◆ SIMDFormat

◆ SIMDMask

◆ SIMDTypeInfo

◆ SIMDVec

◆ SIMDVecs

Function Documentation

◆ and_()

◆ elem0()

◆ getTimeSpec()

◆ getTimeSpecMonotonic()

◆ not_()

◆ numInputSIMDVecs()

◆ numOutputSIMDVecs()

◆ numSIMDVecElements()

◆ or_()

◆ timespec_subtract()

◆ timespec_usec()

◆ timeSpecDiffNsec()

◆ timeSpecDiffUsec()

◆ xor_()