Functions for storing Vec's to memory.
|
template<typename T , size_t SIMD_WIDTH> |
static void | simd::store (T *const p, const Vec< T, SIMD_WIDTH > &a) |
| Stores a Vec to aligned memory.
|
|
template<typename T , size_t SIMD_WIDTH> |
static void | simd::store (T *const p, const Vec< T, SIMD_WIDTH > &outVec, size_t numOutVecs) |
| Stores a single Vec multiple times to aligned memory.
|
|
template<typename T , size_t SIMD_WIDTH> |
static void | simd::store (T *const p, const Vec< T, SIMD_WIDTH > outVecs[], size_t numOutVecs) |
| Stores multiple successive Vec's to aligned memory.
|
|
template<typename T , size_t SIMD_WIDTH> |
static void | simd::storeu (T *const p, const Vec< T, SIMD_WIDTH > &a) |
| Stores a Vec to unaligned memory.
|
|
template<typename T , size_t SIMD_WIDTH> |
static void | simd::storeu (T *const p, const Vec< T, SIMD_WIDTH > &outVec, size_t numOutVecs) |
| Stores a single Vec multiple times to unaligned memory.
|
|
template<typename T , size_t SIMD_WIDTH> |
static void | simd::storeu (T *const p, const Vec< T, SIMD_WIDTH > outVecs[], size_t numOutVecs) |
| Stores multiple successive Vec's to unaligned memory.
|
|
template<typename T , size_t SIMD_WIDTH> |
static void | simd::stream_store (T *const p, const Vec< T, SIMD_WIDTH > &a) |
| Stores a Vec to aligned memory using a non-temporal memory hint.
|
|
template<typename T , size_t SIMD_WIDTH>
static void simd::storeu |
( |
T *const | p, |
|
|
const Vec< T, SIMD_WIDTH > | outVecs[], |
|
|
size_t | numOutVecs ) |
|
inlinestatic |
Stores multiple successive Vec's to unaligned memory.
In contrast to store(T *const, const Vec<T, SIMD_WIDTH>[], size_t), the memory location does not need to be aligned to any boundary.
- Parameters
-
[out] | p | pointer to the memory location to store to |
[in] | outVecs | array of Vec's to store |
| numOutVecs | number of Vec's to store |
template<typename T , size_t SIMD_WIDTH>
static void simd::stream_store |
( |
T *const | p, |
|
|
const Vec< T, SIMD_WIDTH > & | a ) |
|
inlinestatic |
Stores a Vec to aligned memory using a non-temporal memory hint.
This function uses the _mm*_stream_*
intrinsics on Intel and regular store intrinsics on NEON. A call to sfence() may be required in order for other threads/processors to see the stored values. This function may improve performance on some architectures compared to store().
The memory location must be aligned to the SIMD_WIDTH
.
- Parameters
-
[out] | p | pointer to the aligned memory location to store to |
| a | Vec to store |