T-SIMD/base_8H_source.html

// ===========================================================================

//

// base-level classes and functions

//

// This source code file is part of the following software:

//

//    - the low-level C++ template SIMD library

//    - the SIMD implementation of the MinWarping and the 2D-Warping methods

//      for local visual homing.

//

// The software is provided based on the accompanying license agreement in the

// file LICENSE.md.

// The software is provided "as is" without any warranty by the licensor and

// without any liability of the licensor, and the software may not be

// distributed by the licensee; see the license agreement for details.

//

// (C) Jonas Keller, Ralf Möller

//     Computer Engineering

//     Faculty of Technology

//     Bielefeld University

//     www.ti.uni-bielefeld.de

//

// ===========================================================================


// 22. Jan 23 (Jonas Keller): introduced wrapper layer that wraps the internal

// architecture-specific implementations


// 09. Mar 23 (Jonas Keller): added doxygen documentation


#pragma once

#ifndef SIMD_VEC_BASE_H_

#define SIMD_VEC_BASE_H_


#include "defs.H"

#include "intel/base_impl_intel16.H"

#include "intel/base_impl_intel32.H"

#include "intel/base_impl_intel64.H"

#include "neon/base_impl_neon16.H"

#include "sandbox/base_impl_sandbox.H"

#include "types.H"

#include "vec.H"


#include <cstdint>

#include <type_traits>


namespace simd {

template <typename Tout, typename Tin, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<Tout, SIMD_WIDTH> reinterpret(

  const Vec<Tin, SIMD_WIDTH> &a)

{

  return internal::base::reinterpret(a, internal::OutputType<Tout>());

}


template <typename T, size_t SIMD_WIDTH_DEFAULT_NATIVE>


static SIMD_INLINE Vec<T, SIMD_WIDTH> setzero()

{

  return internal::base::setzero(internal::OutputType<T>(),

                                 internal::Integer<SIMD_WIDTH>());

}


template <typename T, size_t SIMD_WIDTH_DEFAULT_NATIVE>


static SIMD_INLINE Vec<T, SIMD_WIDTH> set1(const dont_deduce<T> a)

{

  return internal::base::set1(a, internal::Integer<SIMD_WIDTH>());

}


// 30. Jan 23 (Jonas Keller): added iota


template <typename T, size_t SIMD_WIDTH_DEFAULT_NATIVE>


static SIMD_INLINE Vec<T, SIMD_WIDTH> iota()

{

  return internal::base::iota(internal::OutputType<T>(),

                              internal::Integer<SIMD_WIDTH>());

}


template <typename Tcond, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> ifelse(const Vec<Tcond, SIMD_WIDTH> &cond,

                                             const Vec<T, SIMD_WIDTH> &trueVal,

                                             const Vec<T, SIMD_WIDTH> &falseVal)

{

  static_assert(sizeof(Tcond) == sizeof(T),

                "condition and value types must have the same size");

  return internal::base::ifelse(reinterpret<T>(cond), trueVal, falseVal);

}


// 27. Aug 22 (Jonas Keller): added msb2int


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE uint64_t msb2int(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::msb2int(a);

}


// 09. Oct 22 (Jonas Keller): added int2msb


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> int2msb(const uint64_t a)

{

  return internal::base::int2msb(a, internal::OutputType<T>(),

                                 internal::Integer<SIMD_WIDTH>());

}


// 09. Oct 22 (Jonas Keller): added int2bits


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> int2bits(const uint64_t a)

{

  return internal::base::int2bits(a, internal::OutputType<T>(),

                                  internal::Integer<SIMD_WIDTH>());

}


template <size_t SIMD_WIDTH_DEFAULT_NATIVE, typename T>


static SIMD_INLINE Vec<T, SIMD_WIDTH> load(const T *const p)

{

  // 08. Apr 23 (Jonas Keller):

  // added alignment check (if SIMD_ALIGN_CHK is defined)

  SIMD_CHECK_ALIGNMENT(p, SIMD_WIDTH);

  return internal::base::load(p, internal::Integer<SIMD_WIDTH>());

}


template <size_t SIMD_WIDTH_DEFAULT_NATIVE, typename T>


static SIMD_INLINE Vec<T, SIMD_WIDTH> loadu(const T *const p)

{

  return internal::base::loadu(p, internal::Integer<SIMD_WIDTH>());

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE void store(T *const p, const Vec<T, SIMD_WIDTH> &a)

{

  // 08. Apr 23 (Jonas Keller):

  // added alignment check (if SIMD_ALIGN_CHK is defined)

  SIMD_CHECK_ALIGNMENT(p, SIMD_WIDTH);

  internal::base::store(p, a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE void storeu(T *const p, const Vec<T, SIMD_WIDTH> &a)

{

  internal::base::storeu(p, a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE void stream_store(T *const p, const Vec<T, SIMD_WIDTH> &a)

{

  // 08. Apr 23 (Jonas Keller):

  // added alignment check (if SIMD_ALIGN_CHK is defined)

  SIMD_CHECK_ALIGNMENT(p, SIMD_WIDTH);

  internal::base::stream_store(p, a);

}


static SIMD_INLINE void lfence()

{

  internal::base::lfence();

}


static SIMD_INLINE void sfence()

{

  internal::base::sfence();

}


static SIMD_INLINE void mfence()

{

  internal::base::mfence();

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> add(const Vec<T, SIMD_WIDTH> &a,

                                          const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::add(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> adds(const Vec<T, SIMD_WIDTH> &a,

                                           const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::adds(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> sub(const Vec<T, SIMD_WIDTH> &a,

                                          const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::sub(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> subs(const Vec<T, SIMD_WIDTH> &a,

                                           const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::subs(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> mul(const Vec<T, SIMD_WIDTH> &a,

                                          const Vec<T, SIMD_WIDTH> &b)

{

  static_assert(TypeInfo<T>::isFloatingPoint,

                "mul() is only available for floating point types");

  return internal::base::mul(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> div(const Vec<T, SIMD_WIDTH> &a,

                                          const Vec<T, SIMD_WIDTH> &b)

{

  static_assert(TypeInfo<T>::isFloatingPoint,

                "div() is only available for floating point types");

  return internal::base::div(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> avg(const Vec<T, SIMD_WIDTH> &a,

                                          const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::avg(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> hadd(const Vec<T, SIMD_WIDTH> &a,

                                           const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::hadd(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> hadds(const Vec<T, SIMD_WIDTH> &a,

                                            const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::hadds(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> hsub(const Vec<T, SIMD_WIDTH> &a,

                                           const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::hsub(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> hsubs(const Vec<T, SIMD_WIDTH> &a,

                                            const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::hsubs(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> rcp(const Vec<T, SIMD_WIDTH> &a)

{

  static_assert(TypeInfo<T>::isFloatingPoint,

                "rcp() is only available for floating point types");

  return internal::base::rcp(a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> rsqrt(const Vec<T, SIMD_WIDTH> &a)

{

  static_assert(TypeInfo<T>::isFloatingPoint,

                "rsqrt() is only available for floating point types");

  return internal::base::rsqrt(a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> sqrt(const Vec<T, SIMD_WIDTH> &a)

{

  static_assert(TypeInfo<T>::isFloatingPoint,

                "sqrt() is only available for floating point types");

  return internal::base::sqrt(a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> min(const Vec<T, SIMD_WIDTH> &a,

                                          const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::min(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> max(const Vec<T, SIMD_WIDTH> &a,

                                          const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::max(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> neg(const Vec<T, SIMD_WIDTH> &a)

{

  static_assert(TypeInfo<T>::isSigned,

                "neg() is only available for signed types");

  return internal::base::neg(a);

}


// 25. Mar 23 (Jonas Keller): added integer version of ceil, floor, round,

// truncate and unsigned version of abs


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> abs(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::abs(a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> ceil(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::ceil(a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> floor(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::floor(a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> round(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::round(a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> truncate(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::truncate(a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> bit_and(const Vec<T, SIMD_WIDTH> &a,

                                              const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::bit_and(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> bit_or(const Vec<T, SIMD_WIDTH> &a,

                                             const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::bit_or(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> bit_andnot(const Vec<T, SIMD_WIDTH> &a,

                                                 const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::bit_andnot(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> bit_xor(const Vec<T, SIMD_WIDTH> &a,

                                              const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::bit_xor(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> bit_not(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::bit_not(a);

}


template <size_t COUNT, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> srai(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::srai<COUNT>(a);

}


template <size_t COUNT, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> srli(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::srli<COUNT>(a);

}


template <size_t COUNT, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> slli(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::slli<COUNT>(a);

}


// 12. Jan 23 (Jonas Keller): added sra, srl and sll functions


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> sra(const Vec<T, SIMD_WIDTH> &a,

                                          const uint8_t count)

{

  return internal::base::sra(a, count);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> srl(const Vec<T, SIMD_WIDTH> &a,

                                          const uint8_t count)

{

  return internal::base::srl(a, count);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> sll(const Vec<T, SIMD_WIDTH> &a,

                                          const uint8_t count)

{

  return internal::base::sll(a, count);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> cmplt(const Vec<T, SIMD_WIDTH> &a,

                                            const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::cmplt(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> cmple(const Vec<T, SIMD_WIDTH> &a,

                                            const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::cmple(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> cmpeq(const Vec<T, SIMD_WIDTH> &a,

                                            const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::cmpeq(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> cmpge(const Vec<T, SIMD_WIDTH> &a,

                                            const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::cmpge(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> cmpgt(const Vec<T, SIMD_WIDTH> &a,

                                            const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::cmpgt(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> cmpneq(const Vec<T, SIMD_WIDTH> &a,

                                             const Vec<T, SIMD_WIDTH> &b)

{

  return internal::base::cmpneq(a, b);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE bool test_all_zeros(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::test_all_zeros(a);

}


template <typename T, size_t SIMD_WIDTH>


static SIMD_INLINE bool test_all_ones(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::test_all_ones(a);

}


template <size_t INDEX, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE T extract(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::extract<INDEX>(a);

}


template <size_t LANE_INDEX, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, 16> extractLane(const Vec<T, SIMD_WIDTH> &a)

{

  static_assert(LANE_INDEX < SIMD_WIDTH / 16,

                "LANE_INDEX must be less than SIMD_WIDTH / 16");

  return internal::base::extractLane<LANE_INDEX>(a);

}


template <typename T, size_t SIMD_WIDTH>


static Vec<T, SIMD_WIDTH> reverse(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::reverse(a);

}


template <size_t N, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE void swizzle(Vec<T, SIMD_WIDTH> v[N])

{

  static_assert(N >= 1 && N <= 5, "N must be between 1 and 5");

  internal::base::swizzle(v, internal::Integer<N>());

}


template <size_t PART, size_t NUM_ELEMS, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> unpack(const Vec<T, SIMD_WIDTH> &a,

                                             const Vec<T, SIMD_WIDTH> &b)

{

  static_assert(PART == 0 || PART == 1, "PART must be 0 or 1");

  static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,

                "NUM_ELEMS must be at most half of one Vec");

  static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,

                "NUM_ELEMS must be a power of two");

  return internal::base::unpack(a, b, internal::Part<PART>(),

                                internal::Bytes<NUM_ELEMS * sizeof(T)>());

}


template <size_t PART, size_t NUM_ELEMS, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> unpack16(const Vec<T, SIMD_WIDTH> &a,

                                               const Vec<T, SIMD_WIDTH> &b)

{

  static_assert(PART == 0 || PART == 1, "PART must be 0 or 1");

  static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,

                "NUM_ELEMS must be at most half of one Vec");

  static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,

                "NUM_ELEMS must be a power of two");

  return internal::base::unpack16(a, b, internal::Part<PART>(),

                                  internal::Bytes<NUM_ELEMS * sizeof(T)>());

}


template <size_t NUM_ELEMS, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE void zip(const Vec<T, SIMD_WIDTH> a,

                            const Vec<T, SIMD_WIDTH> b, Vec<T, SIMD_WIDTH> &l,

                            Vec<T, SIMD_WIDTH> &h)

{

  static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,

                "NUM_ELEMS must be at most half of one Vec");

  static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,

                "NUM_ELEMS must be a power of two");

  return internal::base::zip<NUM_ELEMS>(a, b, l, h);

}


template <size_t NUM_ELEMS, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE void zip16(const Vec<T, SIMD_WIDTH> a,

                              const Vec<T, SIMD_WIDTH> b, Vec<T, SIMD_WIDTH> &l,

                              Vec<T, SIMD_WIDTH> &h)

{

  static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,

                "NUM_ELEMS must be at most half of one Vec");

  static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,

                "NUM_ELEMS must be a power of two");

  return internal::base::zip16<NUM_ELEMS>(a, b, l, h);

}


template <size_t NUM_ELEMS, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE void unzip(const Vec<T, SIMD_WIDTH> a,

                              const Vec<T, SIMD_WIDTH> b, Vec<T, SIMD_WIDTH> &l,

                              Vec<T, SIMD_WIDTH> &h)

{

  static_assert(NUM_ELEMS <= Vec<T, SIMD_WIDTH>::elements / 2,

                "NUM_ELEMS must be at most half of one Vec");

  static_assert(NUM_ELEMS > 0 && (NUM_ELEMS & (NUM_ELEMS - 1)) == 0,

                "NUM_ELEMS must be a power of two");

  return internal::base::unzip(a, b, l, h,

                               internal::Bytes<NUM_ELEMS * sizeof(T)>());

}


template <size_t COUNT, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> srle(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::srle<COUNT>(a);

}


template <size_t COUNT, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> slle(const Vec<T, SIMD_WIDTH> &a)

{

  return internal::base::slle<COUNT>(a);

}


template <size_t COUNT, typename T, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<T, SIMD_WIDTH> alignre(const Vec<T, SIMD_WIDTH> &h,

                                              const Vec<T, SIMD_WIDTH> &l)

{

  return internal::base::alignre<COUNT>(h, l);

}


template <typename Tout, typename Tin, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<Tout, SIMD_WIDTH> packs(const Vec<Tin, SIMD_WIDTH> &a,

                                               const Vec<Tin, SIMD_WIDTH> &b)

{

  return internal::base::packs(a, b, internal::OutputType<Tout>());

}


template <typename Tout, typename Tin, size_t SIMD_WIDTH>


static SIMD_INLINE void extend(

  const Vec<Tin, SIMD_WIDTH> &vIn,

  Vec<Tout, SIMD_WIDTH> vOut[sizeof(Tout) / sizeof(Tin)])

{

  return internal::base::extend(vIn, vOut);

}


template <typename Tout, typename Tin, size_t SIMD_WIDTH>


static SIMD_INLINE Vec<Tout, SIMD_WIDTH> cvts(const Vec<Tin, SIMD_WIDTH> &a)

{

  static_assert(sizeof(Tout) == sizeof(Tin),

                "Tout and Tin must be the same size");

  static_assert(std::is_floating_point<Tout>::value !=

                  std::is_floating_point<Tin>::value,

                "exactly one of Tout and Tin must be a floating point type");

  return internal::base::cvts(a, internal::OutputType<Tout>());

}


} // namespace simd


#endif // SIMD_VEC_BASE_H_

simd::Vec
SIMD vector class, holds multiple elements of the same type.
Definition vec.H:75

simd::sub
static Vec< T, SIMD_WIDTH > sub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's.
Definition base.H:388

simd::subs
static Vec< T, SIMD_WIDTH > subs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Subtracts the elements of two Vec's using saturated arithmetic.
Definition base.H:405

simd::avg
static Vec< T, SIMD_WIDTH > avg(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the average of the elements of two Vec's, rounded up.
Definition base.H:456

simd::adds
static Vec< T, SIMD_WIDTH > adds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's using saturated arithmetic.
Definition base.H:374

simd::div
static Vec< T, SIMD_WIDTH > div(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Divides the elements of two Vec's.
Definition base.H:439

simd::add
static Vec< T, SIMD_WIDTH > add(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Adds the elements of two Vec's.
Definition base.H:357

simd::mul
static Vec< T, SIMD_WIDTH > mul(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Multiplies the elements of two Vec's.
Definition base.H:421

simd::cmplt
static Vec< T, SIMD_WIDTH > cmplt(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for less-than ( < ).
Definition base.H:924

simd::test_all_ones
static bool test_all_ones(const Vec< T, SIMD_WIDTH > &a)
Tests if all bits of a Vec are one.
Definition base.H:1054

simd::cmple
static Vec< T, SIMD_WIDTH > cmple(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for less-than-or-equal ( <= ).
Definition base.H:945

simd::test_all_zeros
static bool test_all_zeros(const Vec< T, SIMD_WIDTH > &a)
Tests if all bits of a Vec are zero.
Definition base.H:1042

simd::cmpneq
static Vec< T, SIMD_WIDTH > cmpneq(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for inequality ( != ).
Definition base.H:1029

simd::cmpge
static Vec< T, SIMD_WIDTH > cmpge(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for greater-than-or-equal ( >= ).
Definition base.H:987

simd::cmpgt
static Vec< T, SIMD_WIDTH > cmpgt(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for greater-than ( > ).
Definition base.H:1008

simd::cmpeq
static Vec< T, SIMD_WIDTH > cmpeq(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Compares corresponding elements of two Vec's for equality ( == ).
Definition base.H:966

simd::ifelse
static Vec< T, SIMD_WIDTH > ifelse(const Vec< Tcond, SIMD_WIDTH > &cond, const Vec< T, SIMD_WIDTH > &trueVal, const Vec< T, SIMD_WIDTH > &falseVal)
Selects elements from two Vec's based on a condition Vec.
Definition base.H:126

simd::slle
static Vec< T, SIMD_WIDTH > slle(const Vec< T, SIMD_WIDTH > &a)
Shifts a Vec left by a constant number of elements, shifting in zero elements.
Definition base.H:1353

simd::srle
static Vec< T, SIMD_WIDTH > srle(const Vec< T, SIMD_WIDTH > &a)
Shifts a Vec right by a constant number of elements, shifting in zero elements.
Definition base.H:1338

simd::alignre
static Vec< T, SIMD_WIDTH > alignre(const Vec< T, SIMD_WIDTH > &h, const Vec< T, SIMD_WIDTH > &l)
Concatenates two Vec's, shifts the result right by a constant number of elements, and returns the low...
Definition base.H:1370

simd::extract
static T extract(const Vec< T, SIMD_WIDTH > &a)
Extracts a single value from a Vec.
Definition base.H:1072

simd::extractLane
static Vec< T, 16 > extractLane(const Vec< T, SIMD_WIDTH > &a)
Extracts a 16-byte lane from a Vec as a Vec < T, 16 >.
Definition base.H:1086

simd::hsubs
static Vec< T, SIMD_WIDTH > hsubs(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally subtracts adjacent elements of two Vec's with saturation.
Definition base.H:523

simd::hadds
static Vec< T, SIMD_WIDTH > hadds(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's with saturation.
Definition base.H:493

simd::hadd
static Vec< T, SIMD_WIDTH > hadd(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally adds adjacent elements of two Vec's.
Definition base.H:477

simd::hsub
static Vec< T, SIMD_WIDTH > hsub(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Horizontally subtracts adjacent elements of two Vec's.
Definition base.H:507

simd::iota
static Vec< T, SIMD_WIDTH > iota()
Creates a Vec with sequentially increasing numbers, starting with 0.
Definition base.H:106

simd::int2bits
static Vec< T, SIMD_WIDTH > int2bits(const uint64_t a)
Sets all bits of each element of a Vec to the corresponding bit of an integer.
Definition base.H:190

simd::msb2int
static uint64_t msb2int(const Vec< T, SIMD_WIDTH > &a)
Collects the most significant bit of each element of a Vec into an integer.
Definition base.H:147

simd::int2msb
static Vec< T, SIMD_WIDTH > int2msb(const uint64_t a)
Sets the most significant bit of each element of a Vec to the corresponding bit of an integer.
Definition base.H:169

simd::setzero
static Vec< T, SIMD_WIDTH > setzero()
Returns a Vec with all elements set to zero.
Definition base.H:70

simd::set1
static Vec< T, SIMD_WIDTH > set1(const dont_deduce< T > a)
Returns a Vec with all elements set to the same value.
Definition base.H:88

simd::bit_andnot
static Vec< T, SIMD_WIDTH > bit_andnot(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise ANDNOT of two Vec's.
Definition base.H:762

simd::bit_and
static Vec< T, SIMD_WIDTH > bit_and(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise AND of two Vec's.
Definition base.H:732

simd::bit_xor
static Vec< T, SIMD_WIDTH > bit_xor(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise XOR of two Vec's.
Definition base.H:776

simd::bit_or
static Vec< T, SIMD_WIDTH > bit_or(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the bitwise OR of two Vec's.
Definition base.H:746

simd::bit_not
static Vec< T, SIMD_WIDTH > bit_not(const Vec< T, SIMD_WIDTH > &a)
Computes the bitwise NOT of a Vec.
Definition base.H:789

simd::sqrt
static Vec< T, SIMD_WIDTH > sqrt(const Vec< T, SIMD_WIDTH > &a)
Computes the square root of the elements of a Vec.
Definition base.H:584

simd::rcp
static Vec< T, SIMD_WIDTH > rcp(const Vec< T, SIMD_WIDTH > &a)
Computes the approximate reciprocal of the elements of a Vec.
Definition base.H:547

simd::rsqrt
static Vec< T, SIMD_WIDTH > rsqrt(const Vec< T, SIMD_WIDTH > &a)
Computes the approximate reciprocal square root of the elements of a Vec.
Definition base.H:565

simd::truncate
static Vec< T, SIMD_WIDTH > truncate(const Vec< T, SIMD_WIDTH > &a)
Truncates the elements of a Vec to the nearest integer i.e. rounds towards zero.
Definition base.H:712

simd::min
static Vec< T, SIMD_WIDTH > min(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the minimum of the elements of two Vec's.
Definition base.H:606

simd::floor
static Vec< T, SIMD_WIDTH > floor(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec down to the nearest integer.
Definition base.H:683

simd::max
static Vec< T, SIMD_WIDTH > max(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Computes the maximum of the elements of two Vec's.
Definition base.H:620

simd::ceil
static Vec< T, SIMD_WIDTH > ceil(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec up to the nearest integer.
Definition base.H:668

simd::neg
static Vec< T, SIMD_WIDTH > neg(const Vec< T, SIMD_WIDTH > &a)
Negates the elements of a Vec.
Definition base.H:635

simd::round
static Vec< T, SIMD_WIDTH > round(const Vec< T, SIMD_WIDTH > &a)
Rounds the elements of a Vec to the nearest integer.
Definition base.H:697

simd::abs
static Vec< T, SIMD_WIDTH > abs(const Vec< T, SIMD_WIDTH > &a)
Computes the absolute value of the elements of a Vec.
Definition base.H:654

simd::load
static Vec< T, SIMD_WIDTH > load(const T *const p)
Loads a Vec from aligned memory.
Definition base.H:209

simd::loadu
static Vec< T, SIMD_WIDTH > loadu(const T *const p)
Loads a Vec from unaligned memory.
Definition base.H:231

simd::store
static void store(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to aligned memory.
Definition base.H:246

simd::storeu
static void storeu(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to unaligned memory.
Definition base.H:265

simd::stream_store
static void stream_store(T *const p, const Vec< T, SIMD_WIDTH > &a)
Stores a Vec to aligned memory using a non-temporal memory hint.
Definition base.H:286

simd::mfence
static void mfence()
Full memory fence.
Definition base.H:337

simd::sfence
static void sfence()
Store fence.
Definition base.H:324

simd::lfence
static void lfence()
Load fence.
Definition base.H:309

simd::reverse
static Vec< T, SIMD_WIDTH > reverse(const Vec< T, SIMD_WIDTH > &a)
Reverses the order of the elements of a Vec.
Definition base.H:1101

simd::sra
static Vec< T, SIMD_WIDTH > sra(const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
Shifts the elements of a Vec right by a variable number of bits while shifting in the sign bit.
Definition base.H:862

simd::srli
static Vec< T, SIMD_WIDTH > srli(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec right by a constant number of bits while shifting in zeros.
Definition base.H:828

simd::sll
static Vec< T, SIMD_WIDTH > sll(const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
Shifts the elements of a Vec left by a variable number of bits while shifting in zeros.
Definition base.H:896

simd::slli
static Vec< T, SIMD_WIDTH > slli(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec left by a constant number of bits while shifting in zeros.
Definition base.H:844

simd::srai
static Vec< T, SIMD_WIDTH > srai(const Vec< T, SIMD_WIDTH > &a)
Shifts the elements of a Vec right by a constant number of bits while shifting in the sign bit.
Definition base.H:812

simd::srl
static Vec< T, SIMD_WIDTH > srl(const Vec< T, SIMD_WIDTH > &a, const uint8_t count)
Shifts the elements of a Vec right by a variable number of bits while shifting in zeros.
Definition base.H:879

simd::swizzle
static void swizzle(Vec< T, SIMD_WIDTH > v[N])
Swizzle/de-interleave/convert from AoS to SoA multiple Vec's in-place.
Definition base.H:1141

simd::cvts
static Vec< Tout, SIMD_WIDTH > cvts(const Vec< Tin, SIMD_WIDTH > &a)
Converts the elements of a Vec between integer and floating point types of the same size.
Definition base.H:1445

simd::packs
static Vec< Tout, SIMD_WIDTH > packs(const Vec< Tin, SIMD_WIDTH > &a, const Vec< Tin, SIMD_WIDTH > &b)
Packs two Vec's into one by converting the elements into the next smaller type with saturation.
Definition base.H:1397

simd::extend
static void extend(const Vec< Tin, SIMD_WIDTH > &vIn, Vec< Tout, SIMD_WIDTH > vOut[sizeof(Tout)/sizeof(Tin)])
Extends the elements of a Vec to a larger or equally sized type.
Definition base.H:1423

simd::reinterpret
static Vec< Tout, SIMD_WIDTH > reinterpret(const Vec< Tin, SIMD_WIDTH > &a)
Reinterprets a given Vec as a Vec with a different element type.
Definition base.H:58

simd::zip16
static void zip16(const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
Interleaves blocks of elements of each 16-byte lane of two Vec's.
Definition base.H:1286

simd::zip
static void zip(const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
Interleaves blocks of elements of two Vec's.
Definition base.H:1247

simd::unpack
static Vec< T, SIMD_WIDTH > unpack(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Interleaves blocks of elements from the high or low half of two Vec's.
Definition base.H:1169

simd::unzip
static void unzip(const Vec< T, SIMD_WIDTH > a, const Vec< T, SIMD_WIDTH > b, Vec< T, SIMD_WIDTH > &l, Vec< T, SIMD_WIDTH > &h)
Deinterleaves blocks of elements two Vec's.
Definition base.H:1316

simd::unpack16
static Vec< T, SIMD_WIDTH > unpack16(const Vec< T, SIMD_WIDTH > &a, const Vec< T, SIMD_WIDTH > &b)
Interleaves blocks of elements from the high or low half of each 16-byte lane of two Vec's.
Definition base.H:1209

simd
Namespace for T-SIMD.
Definition time_measurement.H:161

simd::dont_deduce
typename internal::dont_deduce< T >::type dont_deduce
Helper type to prevent template argument deduction.
Definition types.H:416

simd::TypeInfo
Type information for SIMD types.
Definition types.H:257