T-SIMD v31.1.0
A C++ template SIMD library
Loading...
Searching...
No Matches
intrins_intel.H
1// ===========================================================================
2//
3// includes include files for vector intrinsics on Intel CPUs
4//
5// This source code file is part of the following software:
6//
7// - the low-level C++ template SIMD library
8// - the SIMD implementation of the MinWarping and the 2D-Warping methods
9// for local visual homing.
10//
11// The software is provided based on the accompanying license agreement in the
12// file LICENSE.md.
13// The software is provided "as is" without any warranty by the licensor and
14// without any liability of the licensor, and the software may not be
15// distributed by the licensee; see the license agreement for details.
16//
17// (C) Ralf Möller
18// Computer Engineering
19// Faculty of Technology
20// Bielefeld University
21// www.ti.uni-bielefeld.de
22//
23// ===========================================================================
24
25#pragma once
26#ifndef SIMD_INTRINS_INTEL_H_
27#define SIMD_INTRINS_INTEL_H_
28
29#include "../defs.H"
30
31#ifdef SIMDVEC_INTEL_ENABLE
32
33// 30. Aug 22 (Jonas Keller):
34// gcc warns that the value returned by the _mm*_undefined_* intrinsics is
35// used uninitialized, which is exactly what these intrinsics are for,
36// so disabling the warning when compiling with gcc
37#pragma GCC diagnostic push
38#if defined(__GNUC__) && !defined(__llvm__) && !defined(__INTEL_COMPILER)
39#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
40#pragma GCC diagnostic ignored "-Wuninitialized"
41#endif
42#include <x86intrin.h>
43#pragma GCC diagnostic pop
44
45// ---------------------------------------------------------------------------
46// some definitions are missing for -O0 in some versions of gcc (e.g. 5.4)
47// ---------------------------------------------------------------------------
48
49// 15. Nov 22 (Jonas Keller): moved this to here from SIMDVecBaseImplIntel64.H
50
51// bug seems to be fixed in avx512bwintrin.h in gcc 5.5.0
52
53#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \
54 (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 50500 && \
55 !defined(__OPTIMIZE__) && defined(__AVX512BW__)
56
57// _mm512_pack[u]s_epi32 doesn't need a define (no int arguments),
58// but is not available without optimization (error in include file)
59
60extern __inline __m512i
61 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
62 _mm512_packs_epi32(__m512i __A, __m512i __B)
63{
64 return (__m512i) __builtin_ia32_packssdw512_mask(
65 (__v16si) __A, (__v16si) __B, (__v32hi) _mm512_setzero_hi(),
66 (__mmask32) -1);
67}
68
69extern __inline __m512i
70 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
71 _mm512_packus_epi32(__m512i __A, __m512i __B)
72{
73 return (__m512i) __builtin_ia32_packusdw512_mask(
74 (__v16si) __A, (__v16si) __B, (__v32hi) _mm512_setzero_hi(),
75 (__mmask32) -1);
76}
77
78#endif
79
80// ---------------------------------------------------------------------------
81// masked abs for float and double missing in gcc below version 7
82// ---------------------------------------------------------------------------
83
84// 24. Nov 23 (Jonas Keller):
85// added this fix for missing masked abs for float and double in gcc < 7
86
87// _mm512_abs_ps, _mm512_mask_abs_ps, _mm512_abs_pd and _mm512_mask_abs_pd are
88// missing in gcc below version 7
89// see https://gcc.gnu.org/pipermail/gcc-patches/2017-April/472183.html
90
91// implementation from
92// https://github.com/gcc-mirror/gcc/blob/master/gcc/config/i386/avx512fintrin.h
93
94#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \
95 (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 70000 && \
96 defined(__AVX512F__)
97
98extern __inline __m512
99 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
100 _mm512_abs_ps(__m512 __A)
101{
102 return (__m512) _mm512_and_epi32((__m512i) __A,
103 _mm512_set1_epi32(0x7fffffff));
104}
105
106extern __inline __m512
107 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
108 _mm512_mask_abs_ps(__m512 __W, __mmask16 __U, __m512 __A)
109{
110 return (__m512) _mm512_mask_and_epi32((__m512i) __W, __U, (__m512i) __A,
111 _mm512_set1_epi32(0x7fffffff));
112}
113
114extern __inline __m512d
115 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
116 _mm512_abs_pd(__m512d __A)
117{
118 return (__m512d) _mm512_and_epi64((__m512i) __A,
119 _mm512_set1_epi64(0x7fffffffffffffffLL));
120}
121
122extern __inline __m512d
123 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
124 _mm512_mask_abs_pd(__m512d __W, __mmask8 __U, __m512d __A)
125{
126 return (__m512d) _mm512_mask_and_epi64(
127 (__m512i) __W, __U, (__m512i) __A, _mm512_set1_epi64(0x7fffffffffffffffLL));
128}
129
130#endif
131
132// ---------------------------------------------------------------------------
133// _mm256_set_m128 and friends are missing in gcc below version 8
134// ---------------------------------------------------------------------------
135
136// 14. May 23 (Jonas Keller):
137// added this fix for missing _mm256_set_m128 and friends in gcc < 8
138
139// implementation from
140// https://github.com/gcc-mirror/gcc/blob/master/gcc/config/i386/avxintrin.h
141
142#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \
143 (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 80000 && \
144 defined(__AVX__)
145
146extern __inline __m256
147 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
148 _mm256_set_m128(__m128 __H, __m128 __L)
149{
150 return _mm256_insertf128_ps(_mm256_castps128_ps256(__L), __H, 1);
151}
152
153extern __inline __m256d
154 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
155 _mm256_set_m128d(__m128d __H, __m128d __L)
156{
157 return _mm256_insertf128_pd(_mm256_castpd128_pd256(__L), __H, 1);
158}
159
160extern __inline __m256i
161 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
162 _mm256_set_m128i(__m128i __H, __m128i __L)
163{
164 return _mm256_insertf128_si256(_mm256_castsi128_si256(__L), __H, 1);
165}
166
167extern __inline __m256
168 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
169 _mm256_setr_m128(__m128 __L, __m128 __H)
170{
171 return _mm256_set_m128(__H, __L);
172}
173
174extern __inline __m256d
175 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
176 _mm256_setr_m128d(__m128d __L, __m128d __H)
177{
178 return _mm256_set_m128d(__H, __L);
179}
180
181extern __inline __m256i
182 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
183 _mm256_setr_m128i(__m128i __L, __m128i __H)
184{
185 return _mm256_set_m128i(__H, __L);
186}
187
188#endif
189
190// ---------------------------------------------------------------------------
191// _mm512_set_epi8 and _mm512_set_epi16 missing in gcc below version 9
192// ---------------------------------------------------------------------------
193
194// 29. Sep 23 (Jonas Keller):
195// added this fix for missing _mm512_set_epi8 and _mm512_set_epi16 in gcc < 9
196
197// implementation from
198// https://github.com/gcc-mirror/gcc/blob/master/gcc/config/i386/avx512fintrin.h
199
200#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \
201 (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 90000 && \
202 defined(__AVX512F__)
203
204extern __inline __m512i
205 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
206 _mm512_set_epi8(char __q63, char __q62, char __q61, char __q60, char __q59,
207 char __q58, char __q57, char __q56, char __q55, char __q54,
208 char __q53, char __q52, char __q51, char __q50, char __q49,
209 char __q48, char __q47, char __q46, char __q45, char __q44,
210 char __q43, char __q42, char __q41, char __q40, char __q39,
211 char __q38, char __q37, char __q36, char __q35, char __q34,
212 char __q33, char __q32, char __q31, char __q30, char __q29,
213 char __q28, char __q27, char __q26, char __q25, char __q24,
214 char __q23, char __q22, char __q21, char __q20, char __q19,
215 char __q18, char __q17, char __q16, char __q15, char __q14,
216 char __q13, char __q12, char __q11, char __q10, char __q09,
217 char __q08, char __q07, char __q06, char __q05, char __q04,
218 char __q03, char __q02, char __q01, char __q00)
219{
220 return __extension__(__m512i)(__v64qi) {
221 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, __q08, __q09, __q10,
222 __q11, __q12, __q13, __q14, __q15, __q16, __q17, __q18, __q19, __q20, __q21,
223 __q22, __q23, __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31, __q32,
224 __q33, __q34, __q35, __q36, __q37, __q38, __q39, __q40, __q41, __q42, __q43,
225 __q44, __q45, __q46, __q47, __q48, __q49, __q50, __q51, __q52, __q53, __q54,
226 __q55, __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63};
227}
228
229extern __inline __m512i
230 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
231 _mm512_set_epi16(short __q31, short __q30, short __q29, short __q28,
232 short __q27, short __q26, short __q25, short __q24,
233 short __q23, short __q22, short __q21, short __q20,
234 short __q19, short __q18, short __q17, short __q16,
235 short __q15, short __q14, short __q13, short __q12,
236 short __q11, short __q10, short __q09, short __q08,
237 short __q07, short __q06, short __q05, short __q04,
238 short __q03, short __q02, short __q01, short __q00)
239{
240 return __extension__(__m512i)(__v32hi) {
241 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, __q08, __q09, __q10,
242 __q11, __q12, __q13, __q14, __q15, __q16, __q17, __q18, __q19, __q20, __q21,
243 __q22, __q23, __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31};
244}
245
246#endif
247
248#endif // SIMDVEC_INTEL_ENABLE
249
250#endif // SIMD_INTRINS_INTEL_H_