libstdc++
simd_builtin.h
1 // Simd Abi specific implementations -*- C++ -*-
2 
3 // Copyright (C) 2020-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
27 
28 #if __cplusplus >= 201703L
29 
30 #include <array>
31 #include <cmath>
32 #include <cstdlib>
33 
34 _GLIBCXX_SIMD_BEGIN_NAMESPACE
35 // _S_allbits{{{
36 template <typename _V>
37  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_allbits
38  = reinterpret_cast<_V>(~__vector_type_t<char, sizeof(_V) / sizeof(char)>());
39 
40 // }}}
41 // _S_signmask, _S_absmask{{{
42 template <typename _V, typename = _VectorTraits<_V>>
43  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_signmask
44  = __xor(_V() + 1, _V() - 1);
45 
46 template <typename _V, typename = _VectorTraits<_V>>
47  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_absmask
48  = __andnot(_S_signmask<_V>, _S_allbits<_V>);
49 
50 //}}}
51 // __vector_permute<Indices...>{{{
52 // Index == -1 requests zeroing of the output element
53 template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>,
54  typename = __detail::__odr_helper>
55  _Tp
56  __vector_permute(_Tp __x)
57  {
58  static_assert(sizeof...(_Indices) == _TVT::_S_full_size);
59  return __make_vector<typename _TVT::value_type>(
60  (_Indices == -1 ? 0 : __x[_Indices == -1 ? 0 : _Indices])...);
61  }
62 
63 // }}}
64 // __vector_shuffle<Indices...>{{{
65 // Index == -1 requests zeroing of the output element
66 template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>,
67  typename = __detail::__odr_helper>
68  _Tp
69  __vector_shuffle(_Tp __x, _Tp __y)
70  {
71  return _Tp{(_Indices == -1 ? 0
72  : _Indices < _TVT::_S_full_size
73  ? __x[_Indices]
74  : __y[_Indices - _TVT::_S_full_size])...};
75  }
76 
77 // }}}
78 // __make_wrapper{{{
79 template <typename _Tp, typename... _Args>
80  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, sizeof...(_Args)>
81  __make_wrapper(const _Args&... __args)
82  { return __make_vector<_Tp>(__args...); }
83 
84 // }}}
85 // __wrapper_bitcast{{{
86 template <typename _Tp, size_t _ToN = 0, typename _Up, size_t _M,
87  size_t _Np = _ToN != 0 ? _ToN : sizeof(_Up) * _M / sizeof(_Tp)>
88  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _Np>
89  __wrapper_bitcast(_SimdWrapper<_Up, _M> __x)
90  {
91  static_assert(_Np > 1);
92  return __intrin_bitcast<__vector_type_t<_Tp, _Np>>(__x._M_data);
93  }
94 
95 // }}}
96 // __shift_elements_right{{{
97 // if (__shift % 2ⁿ == 0) => the low n Bytes are correct
98 template <unsigned __shift, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
99  _GLIBCXX_SIMD_INTRINSIC _Tp
100  __shift_elements_right(_Tp __v)
101  {
102  [[maybe_unused]] const auto __iv = __to_intrin(__v);
103  static_assert(__shift <= sizeof(_Tp));
104  if constexpr (__shift == 0)
105  return __v;
106  else if constexpr (__shift == sizeof(_Tp))
107  return _Tp();
108 #if _GLIBCXX_SIMD_X86INTRIN // {{{
109  else if constexpr (__have_sse && __shift == 8
110  && _TVT::template _S_is<float, 4>)
111  return _mm_movehl_ps(__iv, __iv);
112  else if constexpr (__have_sse2 && __shift == 8
113  && _TVT::template _S_is<double, 2>)
114  return _mm_unpackhi_pd(__iv, __iv);
115  else if constexpr (__have_sse2 && sizeof(_Tp) == 16)
116  return reinterpret_cast<typename _TVT::type>(
117  _mm_srli_si128(reinterpret_cast<__m128i>(__iv), __shift));
118  else if constexpr (__shift == 16 && sizeof(_Tp) == 32)
119  {
120  /*if constexpr (__have_avx && _TVT::template _S_is<double, 4>)
121  return _mm256_permute2f128_pd(__iv, __iv, 0x81);
122  else if constexpr (__have_avx && _TVT::template _S_is<float, 8>)
123  return _mm256_permute2f128_ps(__iv, __iv, 0x81);
124  else if constexpr (__have_avx)
125  return reinterpret_cast<typename _TVT::type>(
126  _mm256_permute2f128_si256(__iv, __iv, 0x81));
127  else*/
128  return __zero_extend(__hi128(__v));
129  }
130  else if constexpr (__have_avx2 && sizeof(_Tp) == 32 && __shift < 16)
131  {
132  const auto __vll = __vector_bitcast<_LLong>(__v);
133  return reinterpret_cast<typename _TVT::type>(
134  _mm256_alignr_epi8(_mm256_permute2x128_si256(__vll, __vll, 0x81),
135  __vll, __shift));
136  }
137  else if constexpr (__have_avx && sizeof(_Tp) == 32 && __shift < 16)
138  {
139  const auto __vll = __vector_bitcast<_LLong>(__v);
140  return reinterpret_cast<typename _TVT::type>(
141  __concat(_mm_alignr_epi8(__hi128(__vll), __lo128(__vll), __shift),
142  _mm_srli_si128(__hi128(__vll), __shift)));
143  }
144  else if constexpr (sizeof(_Tp) == 32 && __shift > 16)
145  return __zero_extend(__shift_elements_right<__shift - 16>(__hi128(__v)));
146  else if constexpr (sizeof(_Tp) == 64 && __shift == 32)
147  return __zero_extend(__hi256(__v));
148  else if constexpr (__have_avx512f && sizeof(_Tp) == 64)
149  {
150  if constexpr (__shift >= 48)
151  return __zero_extend(
152  __shift_elements_right<__shift - 48>(__extract<3, 4>(__v)));
153  else if constexpr (__shift >= 32)
154  return __zero_extend(
155  __shift_elements_right<__shift - 32>(__hi256(__v)));
156  else if constexpr (__shift % 8 == 0)
157  return reinterpret_cast<typename _TVT::type>(
158  _mm512_alignr_epi64(__m512i(), __intrin_bitcast<__m512i>(__v),
159  __shift / 8));
160  else if constexpr (__shift % 4 == 0)
161  return reinterpret_cast<typename _TVT::type>(
162  _mm512_alignr_epi32(__m512i(), __intrin_bitcast<__m512i>(__v),
163  __shift / 4));
164  else if constexpr (__have_avx512bw && __shift < 16)
165  {
166  const auto __vll = __vector_bitcast<_LLong>(__v);
167  return reinterpret_cast<typename _TVT::type>(
168  _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __vll, 0xf9),
169  __vll, __shift));
170  }
171  else if constexpr (__have_avx512bw && __shift < 32)
172  {
173  const auto __vll = __vector_bitcast<_LLong>(__v);
174  return reinterpret_cast<typename _TVT::type>(
175  _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __m512i(), 0xee),
176  _mm512_shuffle_i32x4(__vll, __vll, 0xf9),
177  __shift - 16));
178  }
179  else
180  __assert_unreachable<_Tp>();
181  }
182  /*
183  } else if constexpr (__shift % 16 == 0 && sizeof(_Tp) == 64)
184  return __auto_bitcast(__extract<__shift / 16, 4>(__v));
185  */
186 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
187  else
188  {
189  constexpr int __chunksize = __shift % 8 == 0 ? 8
190  : __shift % 4 == 0 ? 4
191  : __shift % 2 == 0 ? 2
192  : 1;
193  auto __w = __vector_bitcast<__int_with_sizeof_t<__chunksize>>(__v);
194  using _Up = decltype(__w);
195  return __intrin_bitcast<_Tp>(
196  __call_with_n_evaluations<(sizeof(_Tp) - __shift) / __chunksize>(
197  [](auto... __chunks) { return _Up{__chunks...}; },
198  [&](auto __i) { return __w[__shift / __chunksize + __i]; }));
199  }
200  }
201 
202 // }}}
203 // __extract_part(_SimdWrapper<_Tp, _Np>) {{{
204 template <int _Index, int _Total, int _Combine, typename _Tp, size_t _Np>
205  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST
206  _SimdWrapper<_Tp, _Np / _Total * _Combine>
207  __extract_part(const _SimdWrapper<_Tp, _Np> __x)
208  {
209  if constexpr (_Index % 2 == 0 && _Total % 2 == 0 && _Combine % 2 == 0)
210  return __extract_part<_Index / 2, _Total / 2, _Combine / 2>(__x);
211  else
212  {
213  constexpr size_t __values_per_part = _Np / _Total;
214  constexpr size_t __values_to_skip = _Index * __values_per_part;
215  constexpr size_t __return_size = __values_per_part * _Combine;
216  using _R = __vector_type_t<_Tp, __return_size>;
217  static_assert((_Index + _Combine) * __values_per_part * sizeof(_Tp)
218  <= sizeof(__x),
219  "out of bounds __extract_part");
220  // the following assertion would ensure no "padding" to be read
221  // static_assert(_Total >= _Index + _Combine, "_Total must be greater
222  // than _Index");
223 
224  // static_assert(__return_size * _Total == _Np, "_Np must be divisible
225  // by _Total");
226  if (__x._M_is_constprop())
227  return __generate_from_n_evaluations<__return_size, _R>(
228  [&](auto __i) { return __x[__values_to_skip + __i]; });
229  if constexpr (_Index == 0 && _Total == 1)
230  return __x;
231  else if constexpr (_Index == 0)
232  return __intrin_bitcast<_R>(__as_vector(__x));
233 #if _GLIBCXX_SIMD_X86INTRIN // {{{
234  else if constexpr (sizeof(__x) == 32
235  && __return_size * sizeof(_Tp) <= 16)
236  {
237  constexpr size_t __bytes_to_skip = __values_to_skip * sizeof(_Tp);
238  if constexpr (__bytes_to_skip == 16)
239  return __vector_bitcast<_Tp, __return_size>(
240  __hi128(__as_vector(__x)));
241  else
242  return __vector_bitcast<_Tp, __return_size>(
243  _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
244  __lo128(__vector_bitcast<_LLong>(__x)),
245  __bytes_to_skip));
246  }
247 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
248  else if constexpr (_Index > 0
249  && (__values_to_skip % __return_size != 0
250  || sizeof(_R) >= 8)
251  && (__values_to_skip + __return_size) * sizeof(_Tp)
252  <= 64
253  && sizeof(__x) >= 16)
254  return __intrin_bitcast<_R>(
255  __shift_elements_right<__values_to_skip * sizeof(_Tp)>(
256  __as_vector(__x)));
257  else
258  {
259  _R __r = {};
260  __builtin_memcpy(&__r,
261  reinterpret_cast<const char*>(&__x)
262  + sizeof(_Tp) * __values_to_skip,
263  __return_size * sizeof(_Tp));
264  return __r;
265  }
266  }
267  }
268 
269 // }}}
270 // __extract_part(_SimdWrapper<bool, _Np>) {{{
271 template <int _Index, int _Total, int _Combine = 1, size_t _Np>
272  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _Np / _Total * _Combine>
273  __extract_part(const _SimdWrapper<bool, _Np> __x)
274  {
275  static_assert(_Combine == 1, "_Combine != 1 not implemented");
276  static_assert(__have_avx512f && _Np == _Np);
277  static_assert(_Total >= 2 && _Index + _Combine <= _Total && _Index >= 0);
278  return __x._M_data >> (_Index * _Np / _Total);
279  }
280 
281 // }}}
282 
283 // __vector_convert {{{
284 // implementation requires an index sequence
285 template <typename _To, typename _From, size_t... _I>
286  _GLIBCXX_SIMD_INTRINSIC constexpr _To
287  __vector_convert(_From __a, index_sequence<_I...>)
288  {
289  using _Tp = typename _VectorTraits<_To>::value_type;
290  return _To{static_cast<_Tp>(__a[_I])...};
291  }
292 
293 template <typename _To, typename _From, size_t... _I>
294  _GLIBCXX_SIMD_INTRINSIC constexpr _To
295  __vector_convert(_From __a, _From __b, index_sequence<_I...>)
296  {
297  using _Tp = typename _VectorTraits<_To>::value_type;
298  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...};
299  }
300 
301 template <typename _To, typename _From, size_t... _I>
302  _GLIBCXX_SIMD_INTRINSIC constexpr _To
303  __vector_convert(_From __a, _From __b, _From __c, index_sequence<_I...>)
304  {
305  using _Tp = typename _VectorTraits<_To>::value_type;
306  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
307  static_cast<_Tp>(__c[_I])...};
308  }
309 
310 template <typename _To, typename _From, size_t... _I>
311  _GLIBCXX_SIMD_INTRINSIC constexpr _To
312  __vector_convert(_From __a, _From __b, _From __c, _From __d,
313  index_sequence<_I...>)
314  {
315  using _Tp = typename _VectorTraits<_To>::value_type;
316  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
317  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...};
318  }
319 
320 template <typename _To, typename _From, size_t... _I>
321  _GLIBCXX_SIMD_INTRINSIC constexpr _To
322  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
323  index_sequence<_I...>)
324  {
325  using _Tp = typename _VectorTraits<_To>::value_type;
326  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
327  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
328  static_cast<_Tp>(__e[_I])...};
329  }
330 
331 template <typename _To, typename _From, size_t... _I>
332  _GLIBCXX_SIMD_INTRINSIC constexpr _To
333  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
334  _From __f, index_sequence<_I...>)
335  {
336  using _Tp = typename _VectorTraits<_To>::value_type;
337  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
338  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
339  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...};
340  }
341 
342 template <typename _To, typename _From, size_t... _I>
343  _GLIBCXX_SIMD_INTRINSIC constexpr _To
344  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
345  _From __f, _From __g, index_sequence<_I...>)
346  {
347  using _Tp = typename _VectorTraits<_To>::value_type;
348  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
349  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
350  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
351  static_cast<_Tp>(__g[_I])...};
352  }
353 
354 template <typename _To, typename _From, size_t... _I>
355  _GLIBCXX_SIMD_INTRINSIC constexpr _To
356  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
357  _From __f, _From __g, _From __h, index_sequence<_I...>)
358  {
359  using _Tp = typename _VectorTraits<_To>::value_type;
360  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
361  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
362  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
363  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...};
364  }
365 
366 template <typename _To, typename _From, size_t... _I>
367  _GLIBCXX_SIMD_INTRINSIC constexpr _To
368  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
369  _From __f, _From __g, _From __h, _From __i,
370  index_sequence<_I...>)
371  {
372  using _Tp = typename _VectorTraits<_To>::value_type;
373  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
374  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
375  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
376  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
377  static_cast<_Tp>(__i[_I])...};
378  }
379 
380 template <typename _To, typename _From, size_t... _I>
381  _GLIBCXX_SIMD_INTRINSIC constexpr _To
382  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
383  _From __f, _From __g, _From __h, _From __i, _From __j,
384  index_sequence<_I...>)
385  {
386  using _Tp = typename _VectorTraits<_To>::value_type;
387  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
388  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
389  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
390  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
391  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...};
392  }
393 
394 template <typename _To, typename _From, size_t... _I>
395  _GLIBCXX_SIMD_INTRINSIC constexpr _To
396  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
397  _From __f, _From __g, _From __h, _From __i, _From __j,
398  _From __k, index_sequence<_I...>)
399  {
400  using _Tp = typename _VectorTraits<_To>::value_type;
401  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
402  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
403  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
404  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
405  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
406  static_cast<_Tp>(__k[_I])...};
407  }
408 
409 template <typename _To, typename _From, size_t... _I>
410  _GLIBCXX_SIMD_INTRINSIC constexpr _To
411  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
412  _From __f, _From __g, _From __h, _From __i, _From __j,
413  _From __k, _From __l, index_sequence<_I...>)
414  {
415  using _Tp = typename _VectorTraits<_To>::value_type;
416  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
417  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
418  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
419  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
420  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
421  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...};
422  }
423 
424 template <typename _To, typename _From, size_t... _I>
425  _GLIBCXX_SIMD_INTRINSIC constexpr _To
426  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
427  _From __f, _From __g, _From __h, _From __i, _From __j,
428  _From __k, _From __l, _From __m, index_sequence<_I...>)
429  {
430  using _Tp = typename _VectorTraits<_To>::value_type;
431  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
432  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
433  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
434  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
435  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
436  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
437  static_cast<_Tp>(__m[_I])...};
438  }
439 
440 template <typename _To, typename _From, size_t... _I>
441  _GLIBCXX_SIMD_INTRINSIC constexpr _To
442  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
443  _From __f, _From __g, _From __h, _From __i, _From __j,
444  _From __k, _From __l, _From __m, _From __n,
445  index_sequence<_I...>)
446  {
447  using _Tp = typename _VectorTraits<_To>::value_type;
448  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
449  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
450  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
451  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
452  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
453  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
454  static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...};
455  }
456 
457 template <typename _To, typename _From, size_t... _I>
458  _GLIBCXX_SIMD_INTRINSIC constexpr _To
459  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
460  _From __f, _From __g, _From __h, _From __i, _From __j,
461  _From __k, _From __l, _From __m, _From __n, _From __o,
462  index_sequence<_I...>)
463  {
464  using _Tp = typename _VectorTraits<_To>::value_type;
465  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
466  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
467  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
468  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
469  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
470  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
471  static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...,
472  static_cast<_Tp>(__o[_I])...};
473  }
474 
475 template <typename _To, typename _From, size_t... _I>
476  _GLIBCXX_SIMD_INTRINSIC constexpr _To
477  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
478  _From __f, _From __g, _From __h, _From __i, _From __j,
479  _From __k, _From __l, _From __m, _From __n, _From __o,
480  _From __p, index_sequence<_I...>)
481  {
482  using _Tp = typename _VectorTraits<_To>::value_type;
483  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
484  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
485  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
486  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
487  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
488  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
489  static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...,
490  static_cast<_Tp>(__o[_I])..., static_cast<_Tp>(__p[_I])...};
491  }
492 
493 // Defer actual conversion to the overload that takes an index sequence. Note
494 // that this function adds zeros or drops values off the end if you don't ensure
495 // matching width.
496 template <typename _To, typename... _From, size_t _FromSize>
497  _GLIBCXX_SIMD_INTRINSIC constexpr _To
498  __vector_convert(_SimdWrapper<_From, _FromSize>... __xs)
499  {
500 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
501  using _From0 = __first_of_pack_t<_From...>;
502  using _FW = _SimdWrapper<_From0, _FromSize>;
503  if (!_FW::_S_is_partial && !(... && __xs._M_is_constprop()))
504  {
505  if constexpr ((sizeof...(_From) & (sizeof...(_From) - 1))
506  == 0) // power-of-two number of arguments
507  return __convert_x86<_To>(__as_vector(__xs)...);
508  else // append zeros and recurse until the above branch is taken
509  return __vector_convert<_To>(__xs..., _FW{});
510  }
511  else
512 #endif
513  return __vector_convert<_To>(
514  __as_vector(__xs)...,
515  make_index_sequence<(sizeof...(__xs) == 1 ? std::min(
516  _VectorTraits<_To>::_S_full_size, int(_FromSize))
517  : _FromSize)>());
518  }
519 
520 // }}}
521 // __convert function{{{
522 template <typename _To, typename _From, typename... _More>
523  _GLIBCXX_SIMD_INTRINSIC constexpr auto
524  __convert(_From __v0, _More... __vs)
525  {
526  static_assert((true && ... && is_same_v<_From, _More>) );
527  if constexpr (__is_vectorizable_v<_From>)
528  {
529  using _V = typename _VectorTraits<_To>::type;
530  using _Tp = typename _VectorTraits<_To>::value_type;
531  return _V{static_cast<_Tp>(__v0), static_cast<_Tp>(__vs)...};
532  }
533  else if constexpr (__is_vector_type_v<_From>)
534  return __convert<_To>(__as_wrapper(__v0), __as_wrapper(__vs)...);
535  else // _SimdWrapper arguments
536  {
537  constexpr size_t __input_size = _From::_S_size * (1 + sizeof...(_More));
538  if constexpr (__is_vectorizable_v<_To>)
539  return __convert<__vector_type_t<_To, __input_size>>(__v0, __vs...);
540  else if constexpr (!__is_vector_type_v<_To>)
541  return _To(__convert<typename _To::_BuiltinType>(__v0, __vs...));
542  else
543  {
544  static_assert(
545  sizeof...(_More) == 0
546  || _VectorTraits<_To>::_S_full_size >= __input_size,
547  "__convert(...) requires the input to fit into the output");
548  return __vector_convert<_To>(__v0, __vs...);
549  }
550  }
551  }
552 
553 // }}}
554 // __convert_all{{{
555 // Converts __v into array<_To, N>, where N is _NParts if non-zero or
556 // otherwise deduced from _To such that N * #elements(_To) <= #elements(__v).
557 // Note: this function may return less than all converted elements
558 template <typename _To,
559  size_t _NParts = 0, // allows to convert fewer or more (only last
560  // _To, to be partially filled) than all
561  size_t _Offset = 0, // where to start, # of elements (not Bytes or
562  // Parts)
563  typename _From, typename _FromVT = _VectorTraits<_From>>
564  _GLIBCXX_SIMD_INTRINSIC auto
565  __convert_all(_From __v)
566  {
567  if constexpr (is_arithmetic_v<_To> && _NParts != 1)
568  {
569  static_assert(_Offset < _FromVT::_S_full_size);
570  constexpr auto _Np
571  = _NParts == 0 ? _FromVT::_S_partial_width - _Offset : _NParts;
572  return __generate_from_n_evaluations<_Np, array<_To, _Np>>(
573  [&](auto __i) { return static_cast<_To>(__v[__i + _Offset]); });
574  }
575  else
576  {
577  static_assert(__is_vector_type_v<_To>);
578  using _ToVT = _VectorTraits<_To>;
579  if constexpr (__is_vector_type_v<_From>)
580  return __convert_all<_To, _NParts>(__as_wrapper(__v));
581  else if constexpr (_NParts == 1)
582  {
583  static_assert(_Offset % _ToVT::_S_full_size == 0);
584  return array<_To, 1>{__vector_convert<_To>(
585  __extract_part<_Offset / _ToVT::_S_full_size,
586  __div_roundup(_FromVT::_S_partial_width,
587  _ToVT::_S_full_size)>(__v))};
588  }
589 #if _GLIBCXX_SIMD_X86INTRIN // {{{
590  else if constexpr (!__have_sse4_1 && _Offset == 0
591  && is_integral_v<typename _FromVT::value_type>
592  && sizeof(typename _FromVT::value_type)
593  < sizeof(typename _ToVT::value_type)
594  && !(sizeof(typename _FromVT::value_type) == 4
595  && is_same_v<typename _ToVT::value_type, double>))
596  {
597  using _ToT = typename _ToVT::value_type;
598  using _FromT = typename _FromVT::value_type;
599  constexpr size_t _Np
600  = _NParts != 0
601  ? _NParts
602  : (_FromVT::_S_partial_width / _ToVT::_S_full_size);
603  using _R = array<_To, _Np>;
604  // __adjust modifies its input to have _Np (use _SizeConstant)
605  // entries so that no unnecessary intermediate conversions are
606  // requested and, more importantly, no intermediate conversions are
607  // missing
608  [[maybe_unused]] auto __adjust
609  = [](auto __n,
610  auto __vv) -> _SimdWrapper<_FromT, decltype(__n)::value> {
611  return __vector_bitcast<_FromT, decltype(__n)::value>(__vv);
612  };
613  [[maybe_unused]] const auto __vi = __to_intrin(__v);
614  auto&& __make_array = [](auto __x0, [[maybe_unused]] auto __x1) {
615  if constexpr (_Np == 1)
616  return _R{__intrin_bitcast<_To>(__x0)};
617  else
618  return _R{__intrin_bitcast<_To>(__x0),
619  __intrin_bitcast<_To>(__x1)};
620  };
621 
622  if constexpr (_Np == 0)
623  return _R{};
624  else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 2)
625  {
626  static_assert(is_integral_v<_FromT>);
627  static_assert(is_integral_v<_ToT>);
628  if constexpr (is_unsigned_v<_FromT>)
629  return __make_array(_mm_unpacklo_epi8(__vi, __m128i()),
630  _mm_unpackhi_epi8(__vi, __m128i()));
631  else
632  return __make_array(
633  _mm_srai_epi16(_mm_unpacklo_epi8(__vi, __vi), 8),
634  _mm_srai_epi16(_mm_unpackhi_epi8(__vi, __vi), 8));
635  }
636  else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 4)
637  {
638  static_assert(is_integral_v<_FromT>);
639  if constexpr (is_floating_point_v<_ToT>)
640  {
641  const auto __ints
642  = __convert_all<__vector_type16_t<int>, _Np>(
643  __adjust(_SizeConstant<_Np * 4>(), __v));
644  return __generate_from_n_evaluations<_Np, _R>(
645  [&](auto __i) {
646  return __vector_convert<_To>(__as_wrapper(__ints[__i]));
647  });
648  }
649  else if constexpr (is_unsigned_v<_FromT>)
650  return __make_array(_mm_unpacklo_epi16(__vi, __m128i()),
651  _mm_unpackhi_epi16(__vi, __m128i()));
652  else
653  return __make_array(
654  _mm_srai_epi32(_mm_unpacklo_epi16(__vi, __vi), 16),
655  _mm_srai_epi32(_mm_unpackhi_epi16(__vi, __vi), 16));
656  }
657  else if constexpr (sizeof(_FromT) == 4 && sizeof(_ToT) == 8
658  && is_integral_v<_FromT> && is_integral_v<_ToT>)
659  {
660  if constexpr (is_unsigned_v<_FromT>)
661  return __make_array(_mm_unpacklo_epi32(__vi, __m128i()),
662  _mm_unpackhi_epi32(__vi, __m128i()));
663  else
664  return __make_array(
665  _mm_unpacklo_epi32(__vi, _mm_srai_epi32(__vi, 31)),
666  _mm_unpackhi_epi32(__vi, _mm_srai_epi32(__vi, 31)));
667  }
668  else if constexpr (sizeof(_FromT) == 4 && sizeof(_ToT) == 8
669  && is_integral_v<_FromT> && is_integral_v<_ToT>)
670  {
671  if constexpr (is_unsigned_v<_FromT>)
672  return __make_array(_mm_unpacklo_epi32(__vi, __m128i()),
673  _mm_unpackhi_epi32(__vi, __m128i()));
674  else
675  return __make_array(
676  _mm_unpacklo_epi32(__vi, _mm_srai_epi32(__vi, 31)),
677  _mm_unpackhi_epi32(__vi, _mm_srai_epi32(__vi, 31)));
678  }
679  else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) >= 4
680  && is_signed_v<_FromT>)
681  {
682  const __m128i __vv[2] = {_mm_unpacklo_epi8(__vi, __vi),
683  _mm_unpackhi_epi8(__vi, __vi)};
684  const __vector_type_t<int, 4> __vvvv[4] = {
685  __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[0], __vv[0])),
686  __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[0], __vv[0])),
687  __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[1], __vv[1])),
688  __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[1], __vv[1]))};
689  if constexpr (sizeof(_ToT) == 4)
690  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
691  return __vector_convert<_To>(
692  _SimdWrapper<int, 4>(__vvvv[__i] >> 24));
693  });
694  else if constexpr (is_integral_v<_ToT>)
695  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
696  const auto __signbits = __to_intrin(__vvvv[__i / 2] >> 31);
697  const auto __sx32 = __to_intrin(__vvvv[__i / 2] >> 24);
698  return __vector_bitcast<_ToT>(
699  __i % 2 == 0 ? _mm_unpacklo_epi32(__sx32, __signbits)
700  : _mm_unpackhi_epi32(__sx32, __signbits));
701  });
702  else
703  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
704  const _SimdWrapper<int, 4> __int4 = __vvvv[__i / 2] >> 24;
705  return __vector_convert<_To>(
706  __i % 2 == 0 ? __int4
707  : _SimdWrapper<int, 4>(
708  _mm_unpackhi_epi64(__to_intrin(__int4),
709  __to_intrin(__int4))));
710  });
711  }
712  else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 4)
713  {
714  const auto __shorts = __convert_all<__vector_type16_t<
715  conditional_t<is_signed_v<_FromT>, short, unsigned short>>>(
716  __adjust(_SizeConstant<(_Np + 1) / 2 * 8>(), __v));
717  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
718  return __convert_all<_To>(__shorts[__i / 2])[__i % 2];
719  });
720  }
721  else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 8
722  && is_signed_v<_FromT> && is_integral_v<_ToT>)
723  {
724  const __m128i __vv[2] = {_mm_unpacklo_epi16(__vi, __vi),
725  _mm_unpackhi_epi16(__vi, __vi)};
726  const __vector_type16_t<int> __vvvv[4]
727  = {__vector_bitcast<int>(
728  _mm_unpacklo_epi32(_mm_srai_epi32(__vv[0], 16),
729  _mm_srai_epi32(__vv[0], 31))),
730  __vector_bitcast<int>(
731  _mm_unpackhi_epi32(_mm_srai_epi32(__vv[0], 16),
732  _mm_srai_epi32(__vv[0], 31))),
733  __vector_bitcast<int>(
734  _mm_unpacklo_epi32(_mm_srai_epi32(__vv[1], 16),
735  _mm_srai_epi32(__vv[1], 31))),
736  __vector_bitcast<int>(
737  _mm_unpackhi_epi32(_mm_srai_epi32(__vv[1], 16),
738  _mm_srai_epi32(__vv[1], 31)))};
739  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
740  return __vector_bitcast<_ToT>(__vvvv[__i]);
741  });
742  }
743  else if constexpr (sizeof(_FromT) <= 2 && sizeof(_ToT) == 8)
744  {
745  const auto __ints
746  = __convert_all<__vector_type16_t<conditional_t<
747  is_signed_v<_FromT> || is_floating_point_v<_ToT>, int,
748  unsigned int>>>(
749  __adjust(_SizeConstant<(_Np + 1) / 2 * 4>(), __v));
750  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
751  return __convert_all<_To>(__ints[__i / 2])[__i % 2];
752  });
753  }
754  else
755  __assert_unreachable<_To>();
756  }
757 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
758  else if constexpr ((_FromVT::_S_partial_width - _Offset)
759  > _ToVT::_S_full_size)
760  {
761  /*
762  static_assert(
763  (_FromVT::_S_partial_width & (_FromVT::_S_partial_width - 1)) ==
764  0,
765  "__convert_all only supports power-of-2 number of elements.
766  Otherwise " "the return type cannot be array<_To, N>.");
767  */
768  constexpr size_t _NTotal
769  = (_FromVT::_S_partial_width - _Offset) / _ToVT::_S_full_size;
770  constexpr size_t _Np = _NParts == 0 ? _NTotal : _NParts;
771  static_assert(
772  _Np <= _NTotal
773  || (_Np == _NTotal + 1
774  && (_FromVT::_S_partial_width - _Offset) % _ToVT::_S_full_size
775  > 0));
776  using _R = array<_To, _Np>;
777  if constexpr (_Np == 1)
778  return _R{__vector_convert<_To>(
779  __extract_part<_Offset, _FromVT::_S_partial_width,
780  _ToVT::_S_full_size>(__v))};
781  else
782  return __generate_from_n_evaluations<_Np, _R>([&](
783  auto __i) constexpr {
784  auto __part
785  = __extract_part<__i * _ToVT::_S_full_size + _Offset,
786  _FromVT::_S_partial_width,
787  _ToVT::_S_full_size>(__v);
788  return __vector_convert<_To>(__part);
789  });
790  }
791  else if constexpr (_Offset == 0)
792  return array<_To, 1>{__vector_convert<_To>(__v)};
793  else
794  return array<_To, 1>{__vector_convert<_To>(
795  __extract_part<_Offset, _FromVT::_S_partial_width,
796  _FromVT::_S_partial_width - _Offset>(__v))};
797  }
798  }
799 
800 // }}}
801 
802 // _GnuTraits {{{
803 template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
804  struct _GnuTraits
805  {
806  using _IsValid = true_type;
807  using _SimdImpl = typename _Abi::_SimdImpl;
808  using _MaskImpl = typename _Abi::_MaskImpl;
809 
810  // simd and simd_mask member types {{{
811  using _SimdMember = _SimdWrapper<_Tp, _Np>;
812  using _MaskMember = _SimdWrapper<_Mp, _Np>;
813  static constexpr size_t _S_simd_align = alignof(_SimdMember);
814  static constexpr size_t _S_mask_align = alignof(_MaskMember);
815 
816  // }}}
817  // size metadata {{{
818  static constexpr size_t _S_full_size = _SimdMember::_S_full_size;
819  static constexpr bool _S_is_partial = _SimdMember::_S_is_partial;
820 
821  // }}}
822  // _SimdBase / base class for simd, providing extra conversions {{{
823  struct _SimdBase2
824  {
825  _GLIBCXX_SIMD_ALWAYS_INLINE
826  explicit operator __intrinsic_type_t<_Tp, _Np>() const
827  {
828  return __to_intrin(static_cast<const simd<_Tp, _Abi>*>(this)->_M_data);
829  }
830  _GLIBCXX_SIMD_ALWAYS_INLINE
831  explicit operator __vector_type_t<_Tp, _Np>() const
832  {
833  return static_cast<const simd<_Tp, _Abi>*>(this)->_M_data.__builtin();
834  }
835  };
836 
837  struct _SimdBase1
838  {
839  _GLIBCXX_SIMD_ALWAYS_INLINE
840  explicit operator __intrinsic_type_t<_Tp, _Np>() const
841  { return __data(*static_cast<const simd<_Tp, _Abi>*>(this)); }
842  };
843 
844  using _SimdBase = conditional_t<
845  is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
846  _SimdBase1, _SimdBase2>;
847 
848  // }}}
849  // _MaskBase {{{
850  struct _MaskBase2
851  {
852  _GLIBCXX_SIMD_ALWAYS_INLINE
853  explicit operator __intrinsic_type_t<_Tp, _Np>() const
854  {
855  return static_cast<const simd_mask<_Tp, _Abi>*>(this)
856  ->_M_data.__intrin();
857  }
858  _GLIBCXX_SIMD_ALWAYS_INLINE
859  explicit operator __vector_type_t<_Tp, _Np>() const
860  {
861  return static_cast<const simd_mask<_Tp, _Abi>*>(this)->_M_data._M_data;
862  }
863  };
864 
865  struct _MaskBase1
866  {
867  _GLIBCXX_SIMD_ALWAYS_INLINE
868  explicit operator __intrinsic_type_t<_Tp, _Np>() const
869  { return __data(*static_cast<const simd_mask<_Tp, _Abi>*>(this)); }
870  };
871 
872  using _MaskBase = conditional_t<
873  is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
874  _MaskBase1, _MaskBase2>;
875 
876  // }}}
877  // _MaskCastType {{{
878  // parameter type of one explicit simd_mask constructor
879  class _MaskCastType
880  {
881  using _Up = __intrinsic_type_t<_Tp, _Np>;
882  _Up _M_data;
883 
884  public:
885  _GLIBCXX_SIMD_ALWAYS_INLINE
886  _MaskCastType(_Up __x) : _M_data(__x) {}
887  _GLIBCXX_SIMD_ALWAYS_INLINE
888  operator _MaskMember() const { return _M_data; }
889  };
890 
891  // }}}
892  // _SimdCastType {{{
893  // parameter type of one explicit simd constructor
894  class _SimdCastType1
895  {
896  using _Ap = __intrinsic_type_t<_Tp, _Np>;
897  _SimdMember _M_data;
898 
899  public:
900  _GLIBCXX_SIMD_ALWAYS_INLINE
901  _SimdCastType1(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
902  _GLIBCXX_SIMD_ALWAYS_INLINE
903  operator _SimdMember() const { return _M_data; }
904  };
905 
906  class _SimdCastType2
907  {
908  using _Ap = __intrinsic_type_t<_Tp, _Np>;
909  using _Bp = __vector_type_t<_Tp, _Np>;
910  _SimdMember _M_data;
911 
912  public:
913  _GLIBCXX_SIMD_ALWAYS_INLINE
914  _SimdCastType2(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
915  _GLIBCXX_SIMD_ALWAYS_INLINE
916  _SimdCastType2(_Bp __b) : _M_data(__b) {}
917  _GLIBCXX_SIMD_ALWAYS_INLINE
918  operator _SimdMember() const { return _M_data; }
919  };
920 
921  using _SimdCastType = conditional_t<
922  is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
923  _SimdCastType1, _SimdCastType2>;
924  //}}}
925  };
926 
927 // }}}
928 struct _CommonImplX86;
929 struct _CommonImplNeon;
930 struct _CommonImplBuiltin;
931 template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplBuiltin;
932 template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplBuiltin;
933 template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplX86;
934 template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplX86;
935 template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplNeon;
936 template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplNeon;
937 template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplPpc;
938 template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplPpc;
939 
940 // simd_abi::_VecBuiltin {{{
941 template <int _UsedBytes>
942  struct simd_abi::_VecBuiltin
943  {
944  template <typename _Tp>
945  static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp);
946 
947  // validity traits {{{
948  struct _IsValidAbiTag : __bool_constant<(_UsedBytes > 1)> {};
949 
950  template <typename _Tp>
951  struct _IsValidSizeFor
952  : __bool_constant<(_UsedBytes / sizeof(_Tp) > 1
953  && _UsedBytes % sizeof(_Tp) == 0
954  && _UsedBytes <= __vectorized_sizeof<_Tp>()
955  && (!__have_avx512f || _UsedBytes <= 32))> {};
956 
957  template <typename _Tp>
958  struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>,
959  _IsValidSizeFor<_Tp>> {};
960 
961  template <typename _Tp>
962  static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
963 
964  // }}}
965  // _SimdImpl/_MaskImpl {{{
966 #if _GLIBCXX_SIMD_X86INTRIN
967  using _CommonImpl = _CommonImplX86;
968  using _SimdImpl = _SimdImplX86<_VecBuiltin<_UsedBytes>>;
969  using _MaskImpl = _MaskImplX86<_VecBuiltin<_UsedBytes>>;
970 #elif _GLIBCXX_SIMD_HAVE_NEON
971  using _CommonImpl = _CommonImplNeon;
972  using _SimdImpl = _SimdImplNeon<_VecBuiltin<_UsedBytes>>;
973  using _MaskImpl = _MaskImplNeon<_VecBuiltin<_UsedBytes>>;
974 #else
975  using _CommonImpl = _CommonImplBuiltin;
976 #ifdef __ALTIVEC__
977  using _SimdImpl = _SimdImplPpc<_VecBuiltin<_UsedBytes>>;
978  using _MaskImpl = _MaskImplPpc<_VecBuiltin<_UsedBytes>>;
979 #else
980  using _SimdImpl = _SimdImplBuiltin<_VecBuiltin<_UsedBytes>>;
981  using _MaskImpl = _MaskImplBuiltin<_VecBuiltin<_UsedBytes>>;
982 #endif
983 #endif
984 
985  // }}}
986  // __traits {{{
987  template <typename _Tp>
988  using _MaskValueType = __int_for_sizeof_t<_Tp>;
989 
990  template <typename _Tp>
991  using __traits
992  = conditional_t<_S_is_valid_v<_Tp>,
993  _GnuTraits<_Tp, _MaskValueType<_Tp>,
994  _VecBuiltin<_UsedBytes>, _S_size<_Tp>>,
995  _InvalidTraits>;
996 
997  //}}}
998  // size metadata {{{
999  template <typename _Tp>
1000  static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size;
1001 
1002  template <typename _Tp>
1003  static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial;
1004 
1005  // }}}
1006  // implicit masks {{{
1007  template <typename _Tp>
1008  using _MaskMember = _SimdWrapper<_MaskValueType<_Tp>, _S_size<_Tp>>;
1009 
1010  template <typename _Tp>
1011  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1012  _S_implicit_mask()
1013  {
1014  using _UV = typename _MaskMember<_Tp>::_BuiltinType;
1015  if constexpr (!_MaskMember<_Tp>::_S_is_partial)
1016  return ~_UV();
1017  else
1018  {
1019  constexpr auto __size = _S_size<_Tp>;
1020  _GLIBCXX_SIMD_USE_CONSTEXPR auto __r = __generate_vector<_UV>(
1021  [](auto __i) constexpr { return __i < __size ? -1 : 0; });
1022  return __r;
1023  }
1024  }
1025 
1026  template <typename _Tp>
1027  _GLIBCXX_SIMD_INTRINSIC static constexpr __intrinsic_type_t<_Tp,
1028  _S_size<_Tp>>
1029  _S_implicit_mask_intrin()
1030  {
1031  return __to_intrin(
1032  __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>()._M_data));
1033  }
1034 
1035  template <typename _TW, typename _TVT = _VectorTraits<_TW>>
1036  _GLIBCXX_SIMD_INTRINSIC static constexpr _TW _S_masked(_TW __x)
1037  {
1038  using _Tp = typename _TVT::value_type;
1039  if constexpr (!_MaskMember<_Tp>::_S_is_partial)
1040  return __x;
1041  else
1042  return __and(__as_vector(__x),
1043  __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>()));
1044  }
1045 
1046  template <typename _TW, typename _TVT = _VectorTraits<_TW>>
1047  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
1048  __make_padding_nonzero(_TW __x)
1049  {
1050  using _Tp = typename _TVT::value_type;
1051  if constexpr (!_S_is_partial<_Tp>)
1052  return __x;
1053  else
1054  {
1055  _GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask
1056  = __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>());
1057  if constexpr (is_integral_v<_Tp>)
1058  return __or(__x, ~__implicit_mask);
1059  else
1060  {
1061  _GLIBCXX_SIMD_USE_CONSTEXPR auto __one
1062  = __andnot(__implicit_mask,
1063  __vector_broadcast<_S_full_size<_Tp>>(_Tp(1)));
1064  // it's not enough to return `x | 1_in_padding` because the
1065  // padding in x might be inf or nan (independent of
1066  // __FINITE_MATH_ONLY__, because it's about padding bits)
1067  return __or(__and(__x, __implicit_mask), __one);
1068  }
1069  }
1070  }
1071  // }}}
1072  };
1073 
1074 // }}}
1075 // simd_abi::_VecBltnBtmsk {{{
1076 template <int _UsedBytes>
1077  struct simd_abi::_VecBltnBtmsk
1078  {
1079  template <typename _Tp>
1080  static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp);
1081 
1082  // validity traits {{{
1083  struct _IsValidAbiTag : __bool_constant<(_UsedBytes > 1)> {};
1084 
1085  template <typename _Tp>
1086  struct _IsValidSizeFor
1087  : __bool_constant<(_UsedBytes / sizeof(_Tp) > 1
1088  && _UsedBytes % sizeof(_Tp) == 0 && _UsedBytes <= 64
1089  && (_UsedBytes > 32 || __have_avx512vl))> {};
1090 
1091  // Bitmasks require at least AVX512F. If sizeof(_Tp) < 4 the AVX512BW is also
1092  // required.
1093  template <typename _Tp>
1094  struct _IsValid
1095  : conjunction<
1096  _IsValidAbiTag, __bool_constant<__have_avx512f>,
1097  __bool_constant<__have_avx512bw || (sizeof(_Tp) >= 4)>,
1098  __bool_constant<(__vectorized_sizeof<_Tp>() > sizeof(_Tp))>,
1099  _IsValidSizeFor<_Tp>> {};
1100 
1101  template <typename _Tp>
1102  static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
1103 
1104  // }}}
1105  // simd/_MaskImpl {{{
1106  #if _GLIBCXX_SIMD_X86INTRIN
1107  using _CommonImpl = _CommonImplX86;
1108  using _SimdImpl = _SimdImplX86<_VecBltnBtmsk<_UsedBytes>>;
1109  using _MaskImpl = _MaskImplX86<_VecBltnBtmsk<_UsedBytes>>;
1110  #else
1111  template <int>
1112  struct _MissingImpl;
1113 
1114  using _CommonImpl = _MissingImpl<_UsedBytes>;
1115  using _SimdImpl = _MissingImpl<_UsedBytes>;
1116  using _MaskImpl = _MissingImpl<_UsedBytes>;
1117  #endif
1118 
1119  // }}}
1120  // __traits {{{
1121  template <typename _Tp>
1122  using _MaskMember = _SimdWrapper<bool, _S_size<_Tp>>;
1123 
1124  template <typename _Tp>
1125  using __traits = conditional_t<
1126  _S_is_valid_v<_Tp>,
1127  _GnuTraits<_Tp, bool, _VecBltnBtmsk<_UsedBytes>, _S_size<_Tp>>,
1128  _InvalidTraits>;
1129 
1130  //}}}
1131  // size metadata {{{
1132  template <typename _Tp>
1133  static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size;
1134  template <typename _Tp>
1135  static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial;
1136 
1137  // }}}
1138  // implicit mask {{{
1139  private:
1140  template <typename _Tp>
1141  using _ImplicitMask = _SimdWrapper<bool, _S_size<_Tp>>;
1142 
1143  public:
1144  template <size_t _Np>
1145  _GLIBCXX_SIMD_INTRINSIC static constexpr __bool_storage_member_type_t<_Np>
1146  __implicit_mask_n()
1147  {
1148  using _Tp = __bool_storage_member_type_t<_Np>;
1149  return _Np < sizeof(_Tp) * __CHAR_BIT__ ? _Tp((1ULL << _Np) - 1) : ~_Tp();
1150  }
1151 
1152  template <typename _Tp>
1153  _GLIBCXX_SIMD_INTRINSIC static constexpr _ImplicitMask<_Tp>
1154  _S_implicit_mask()
1155  { return __implicit_mask_n<_S_size<_Tp>>(); }
1156 
1157  template <typename _Tp>
1158  _GLIBCXX_SIMD_INTRINSIC static constexpr __bool_storage_member_type_t<
1159  _S_size<_Tp>>
1160  _S_implicit_mask_intrin()
1161  { return __implicit_mask_n<_S_size<_Tp>>(); }
1162 
1163  template <typename _Tp, size_t _Np>
1164  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1165  _S_masked(_SimdWrapper<_Tp, _Np> __x)
1166  {
1167  if constexpr (is_same_v<_Tp, bool>)
1168  if constexpr (_Np < 8 || (_Np & (_Np - 1)) != 0)
1169  return _MaskImpl::_S_bit_and(
1170  __x, _SimdWrapper<_Tp, _Np>(
1171  __bool_storage_member_type_t<_Np>((1ULL << _Np) - 1)));
1172  else
1173  return __x;
1174  else
1175  return _S_masked(__x._M_data);
1176  }
1177 
1178  template <typename _TV>
1179  _GLIBCXX_SIMD_INTRINSIC static constexpr _TV
1180  _S_masked(_TV __x)
1181  {
1182  using _Tp = typename _VectorTraits<_TV>::value_type;
1183  static_assert(
1184  !__is_bitmask_v<_TV>,
1185  "_VecBltnBtmsk::_S_masked cannot work on bitmasks, since it doesn't "
1186  "know the number of elements. Use _SimdWrapper<bool, N> instead.");
1187  if constexpr (_S_is_partial<_Tp>)
1188  {
1189  constexpr size_t _Np = _S_size<_Tp>;
1190  return __make_dependent_t<_TV, _CommonImpl>::_S_blend(
1191  _S_implicit_mask<_Tp>(), _SimdWrapper<_Tp, _Np>(),
1192  _SimdWrapper<_Tp, _Np>(__x));
1193  }
1194  else
1195  return __x;
1196  }
1197 
1198  template <typename _TV, typename _TVT = _VectorTraits<_TV>>
1199  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
1200  __make_padding_nonzero(_TV __x)
1201  {
1202  using _Tp = typename _TVT::value_type;
1203  if constexpr (!_S_is_partial<_Tp>)
1204  return __x;
1205  else
1206  {
1207  constexpr size_t _Np = _S_size<_Tp>;
1208  if constexpr (is_integral_v<typename _TVT::value_type>)
1209  return __x
1210  | __generate_vector<_Tp, _S_full_size<_Tp>>(
1211  [](auto __i) -> _Tp {
1212  if (__i < _Np)
1213  return 0;
1214  else
1215  return 1;
1216  });
1217  else
1218  return __make_dependent_t<_TV, _CommonImpl>::_S_blend(
1219  _S_implicit_mask<_Tp>(),
1220  _SimdWrapper<_Tp, _Np>(
1221  __vector_broadcast<_S_full_size<_Tp>>(_Tp(1))),
1222  _SimdWrapper<_Tp, _Np>(__x))
1223  ._M_data;
1224  }
1225  }
1226 
1227  // }}}
1228  };
1229 
1230 //}}}
1231 // _CommonImplBuiltin {{{
1232 struct _CommonImplBuiltin
1233 {
1234  // _S_converts_via_decomposition{{{
1235  // This lists all cases where a __vector_convert needs to fall back to
1236  // conversion of individual scalars (i.e. decompose the input vector into
1237  // scalars, convert, compose output vector). In those cases, _S_masked_load &
1238  // _S_masked_store prefer to use the _S_bit_iteration implementation.
1239  template <typename _From, typename _To, size_t _ToSize>
1240  static inline constexpr bool __converts_via_decomposition_v
1241  = sizeof(_From) != sizeof(_To);
1242 
1243  // }}}
1244  // _S_load{{{
1245  template <typename _Tp, size_t _Np, size_t _Bytes = _Np * sizeof(_Tp)>
1246  _GLIBCXX_SIMD_INTRINSIC static __vector_type_t<_Tp, _Np>
1247  _S_load(const void* __p)
1248  {
1249  static_assert(_Np > 1);
1250  static_assert(_Bytes % sizeof(_Tp) == 0);
1251  using _Rp = __vector_type_t<_Tp, _Np>;
1252  if constexpr (sizeof(_Rp) == _Bytes)
1253  {
1254  _Rp __r;
1255  __builtin_memcpy(&__r, __p, _Bytes);
1256  return __r;
1257  }
1258  else
1259  {
1260 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
1261  using _Up = conditional_t<
1262  is_integral_v<_Tp>,
1263  conditional_t<_Bytes % 4 == 0,
1264  conditional_t<_Bytes % 8 == 0, long long, int>,
1265  conditional_t<_Bytes % 2 == 0, short, signed char>>,
1266  conditional_t<(_Bytes < 8 || _Np % 2 == 1 || _Np == 2), _Tp,
1267  double>>;
1268  using _V = __vector_type_t<_Up, _Np * sizeof(_Tp) / sizeof(_Up)>;
1269  if constexpr (sizeof(_V) != sizeof(_Rp))
1270  { // on i386 with 4 < _Bytes <= 8
1271  _Rp __r{};
1272  __builtin_memcpy(&__r, __p, _Bytes);
1273  return __r;
1274  }
1275  else
1276 #else // _GLIBCXX_SIMD_WORKAROUND_PR90424
1277  using _V = _Rp;
1278 #endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
1279  {
1280  _V __r{};
1281  static_assert(_Bytes <= sizeof(_V));
1282  __builtin_memcpy(&__r, __p, _Bytes);
1283  return reinterpret_cast<_Rp>(__r);
1284  }
1285  }
1286  }
1287 
1288  // }}}
1289  // _S_store {{{
1290  template <size_t _ReqBytes = 0, typename _TV>
1291  _GLIBCXX_SIMD_INTRINSIC static void _S_store(_TV __x, void* __addr)
1292  {
1293  constexpr size_t _Bytes = _ReqBytes == 0 ? sizeof(__x) : _ReqBytes;
1294  static_assert(sizeof(__x) >= _Bytes);
1295 
1296  if constexpr (__is_vector_type_v<_TV>)
1297  {
1298  using _Tp = typename _VectorTraits<_TV>::value_type;
1299  constexpr size_t _Np = _Bytes / sizeof(_Tp);
1300  static_assert(_Np * sizeof(_Tp) == _Bytes);
1301 
1302 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
1303  using _Up = conditional_t<
1304  (is_integral_v<_Tp> || _Bytes < 4),
1305  conditional_t<(sizeof(__x) > sizeof(long long)), long long, _Tp>,
1306  float>;
1307  const auto __v = __vector_bitcast<_Up>(__x);
1308 #else // _GLIBCXX_SIMD_WORKAROUND_PR90424
1309  const __vector_type_t<_Tp, _Np> __v = __x;
1310 #endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
1311 
1312  if constexpr ((_Bytes & (_Bytes - 1)) != 0)
1313  {
1314  constexpr size_t _MoreBytes = std::__bit_ceil(_Bytes);
1315  alignas(decltype(__v)) char __tmp[_MoreBytes];
1316  __builtin_memcpy(__tmp, &__v, _MoreBytes);
1317  __builtin_memcpy(__addr, __tmp, _Bytes);
1318  }
1319  else
1320  __builtin_memcpy(__addr, &__v, _Bytes);
1321  }
1322  else
1323  __builtin_memcpy(__addr, &__x, _Bytes);
1324  }
1325 
1326  template <typename _Tp, size_t _Np>
1327  _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __x,
1328  void* __addr)
1329  { _S_store<_Np * sizeof(_Tp)>(__x._M_data, __addr); }
1330 
1331  // }}}
1332  // _S_store_bool_array(_BitMask) {{{
1333  template <size_t _Np, bool _Sanitized>
1334  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1335  _S_store_bool_array(_BitMask<_Np, _Sanitized> __x, bool* __mem)
1336  {
1337  if constexpr (_Np == 1)
1338  __mem[0] = __x[0];
1339  else if constexpr (_Np == 2)
1340  {
1341  short __bool2 = (__x._M_to_bits() * 0x81) & 0x0101;
1342  _S_store<_Np>(__bool2, __mem);
1343  }
1344  else if constexpr (_Np == 3)
1345  {
1346  int __bool3 = (__x._M_to_bits() * 0x4081) & 0x010101;
1347  _S_store<_Np>(__bool3, __mem);
1348  }
1349  else
1350  {
1351  __execute_n_times<__div_roundup(_Np, 4)>([&](auto __i) {
1352  constexpr int __offset = __i * 4;
1353  constexpr int __remaining = _Np - __offset;
1354  if constexpr (__remaining > 4 && __remaining <= 7)
1355  {
1356  const _ULLong __bool7
1357  = (__x.template _M_extract<__offset>()._M_to_bits()
1358  * 0x40810204081ULL)
1359  & 0x0101010101010101ULL;
1360  _S_store<__remaining>(__bool7, __mem + __offset);
1361  }
1362  else if constexpr (__remaining >= 4)
1363  {
1364  int __bits = __x.template _M_extract<__offset>()._M_to_bits();
1365  if constexpr (__remaining > 7)
1366  __bits &= 0xf;
1367  const int __bool4 = (__bits * 0x204081) & 0x01010101;
1368  _S_store<4>(__bool4, __mem + __offset);
1369  }
1370  });
1371  }
1372  }
1373 
1374  // }}}
1375  // _S_blend{{{
1376  template <typename _Tp, size_t _Np>
1377  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
1378  _S_blend(_SimdWrapper<__int_for_sizeof_t<_Tp>, _Np> __k,
1379  _SimdWrapper<_Tp, _Np> __at0, _SimdWrapper<_Tp, _Np> __at1)
1380  { return __k._M_data ? __at1._M_data : __at0._M_data; }
1381 
1382  // }}}
1383 };
1384 
1385 // }}}
1386 // _SimdImplBuiltin {{{1
1387 template <typename _Abi, typename>
1388  struct _SimdImplBuiltin
1389  {
1390  // member types {{{2
1391  template <typename _Tp>
1392  static constexpr size_t _S_max_store_size = 16;
1393 
1394  using abi_type = _Abi;
1395 
1396  template <typename _Tp>
1397  using _TypeTag = _Tp*;
1398 
1399  template <typename _Tp>
1400  using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember;
1401 
1402  template <typename _Tp>
1403  using _MaskMember = typename _Abi::template _MaskMember<_Tp>;
1404 
1405  template <typename _Tp>
1406  static constexpr size_t _S_size = _Abi::template _S_size<_Tp>;
1407 
1408  template <typename _Tp>
1409  static constexpr size_t _S_full_size = _Abi::template _S_full_size<_Tp>;
1410 
1411  using _CommonImpl = typename _Abi::_CommonImpl;
1412  using _SuperImpl = typename _Abi::_SimdImpl;
1413  using _MaskImpl = typename _Abi::_MaskImpl;
1414 
1415  // _M_make_simd(_SimdWrapper/__intrinsic_type_t) {{{2
1416  template <typename _Tp, size_t _Np>
1417  _GLIBCXX_SIMD_INTRINSIC static simd<_Tp, _Abi>
1418  _M_make_simd(_SimdWrapper<_Tp, _Np> __x)
1419  { return {__private_init, __x}; }
1420 
1421  template <typename _Tp, size_t _Np>
1422  _GLIBCXX_SIMD_INTRINSIC static simd<_Tp, _Abi>
1423  _M_make_simd(__intrinsic_type_t<_Tp, _Np> __x)
1424  { return {__private_init, __vector_bitcast<_Tp>(__x)}; }
1425 
1426  // _S_broadcast {{{2
1427  template <typename _Tp>
1428  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
1429  _S_broadcast(_Tp __x) noexcept
1430  { return __vector_broadcast<_S_full_size<_Tp>>(__x); }
1431 
1432  // _S_generator {{{2
1433  template <typename _Fp, typename _Tp>
1434  inline static constexpr _SimdMember<_Tp> _S_generator(_Fp&& __gen,
1435  _TypeTag<_Tp>)
1436  {
1437  return __generate_vector<_Tp, _S_full_size<_Tp>>([&](
1438  auto __i) constexpr {
1439  if constexpr (__i < _S_size<_Tp>)
1440  return __gen(__i);
1441  else
1442  return 0;
1443  });
1444  }
1445 
1446  // _S_load {{{2
1447  template <typename _Tp, typename _Up>
1448  _GLIBCXX_SIMD_INTRINSIC static _SimdMember<_Tp>
1449  _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept
1450  {
1451  constexpr size_t _Np = _S_size<_Tp>;
1452  constexpr size_t __max_load_size
1453  = (sizeof(_Up) >= 4 && __have_avx512f) || __have_avx512bw ? 64
1454  : (is_floating_point_v<_Up> && __have_avx) || __have_avx2 ? 32
1455  : 16;
1456  constexpr size_t __bytes_to_load = sizeof(_Up) * _Np;
1457  if constexpr (sizeof(_Up) > 8)
1458  return __generate_vector<_Tp, _SimdMember<_Tp>::_S_full_size>([&](
1459  auto __i) constexpr {
1460  return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
1461  });
1462  else if constexpr (is_same_v<_Up, _Tp>)
1463  return _CommonImpl::template _S_load<_Tp, _S_full_size<_Tp>,
1464  _Np * sizeof(_Tp)>(__mem);
1465  else if constexpr (__bytes_to_load <= __max_load_size)
1466  return __convert<_SimdMember<_Tp>>(
1467  _CommonImpl::template _S_load<_Up, _Np>(__mem));
1468  else if constexpr (__bytes_to_load % __max_load_size == 0)
1469  {
1470  constexpr size_t __n_loads = __bytes_to_load / __max_load_size;
1471  constexpr size_t __elements_per_load = _Np / __n_loads;
1472  return __call_with_n_evaluations<__n_loads>(
1473  [](auto... __uncvted) {
1474  return __convert<_SimdMember<_Tp>>(__uncvted...);
1475  },
1476  [&](auto __i) {
1477  return _CommonImpl::template _S_load<_Up, __elements_per_load>(
1478  __mem + __i * __elements_per_load);
1479  });
1480  }
1481  else if constexpr (__bytes_to_load % (__max_load_size / 2) == 0
1482  && __max_load_size > 16)
1483  { // e.g. int[] -> <char, 12> with AVX2
1484  constexpr size_t __n_loads
1485  = __bytes_to_load / (__max_load_size / 2);
1486  constexpr size_t __elements_per_load = _Np / __n_loads;
1487  return __call_with_n_evaluations<__n_loads>(
1488  [](auto... __uncvted) {
1489  return __convert<_SimdMember<_Tp>>(__uncvted...);
1490  },
1491  [&](auto __i) {
1492  return _CommonImpl::template _S_load<_Up, __elements_per_load>(
1493  __mem + __i * __elements_per_load);
1494  });
1495  }
1496  else // e.g. int[] -> <char, 9>
1497  return __call_with_subscripts(
1498  __mem, make_index_sequence<_Np>(), [](auto... __args) {
1499  return __vector_type_t<_Tp, _S_full_size<_Tp>>{
1500  static_cast<_Tp>(__args)...};
1501  });
1502  }
1503 
1504  // _S_masked_load {{{2
1505  template <typename _Tp, size_t _Np, typename _Up>
1506  static inline _SimdWrapper<_Tp, _Np>
1507  _S_masked_load(_SimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k,
1508  const _Up* __mem) noexcept
1509  {
1510  _BitOps::_S_bit_iteration(_MaskImpl::_S_to_bits(__k), [&](auto __i) {
1511  __merge._M_set(__i, static_cast<_Tp>(__mem[__i]));
1512  });
1513  return __merge;
1514  }
1515 
1516  // _S_store {{{2
1517  template <typename _Tp, typename _Up>
1518  _GLIBCXX_SIMD_INTRINSIC static void
1519  _S_store(_SimdMember<_Tp> __v, _Up* __mem, _TypeTag<_Tp>) noexcept
1520  {
1521  // TODO: converting int -> "smaller int" can be optimized with AVX512
1522  constexpr size_t _Np = _S_size<_Tp>;
1523  constexpr size_t __max_store_size
1524  = _SuperImpl::template _S_max_store_size<_Up>;
1525  if constexpr (sizeof(_Up) > 8)
1526  __execute_n_times<_Np>([&](auto __i) constexpr {
1527  __mem[__i] = __v[__i];
1528  });
1529  else if constexpr (is_same_v<_Up, _Tp>)
1530  _CommonImpl::_S_store(__v, __mem);
1531  else if constexpr (sizeof(_Up) * _Np <= __max_store_size)
1532  _CommonImpl::_S_store(_SimdWrapper<_Up, _Np>(__convert<_Up>(__v)),
1533  __mem);
1534  else
1535  {
1536  constexpr size_t __vsize = __max_store_size / sizeof(_Up);
1537  // round up to convert the last partial vector as well:
1538  constexpr size_t __stores = __div_roundup(_Np, __vsize);
1539  constexpr size_t __full_stores = _Np / __vsize;
1540  using _V = __vector_type_t<_Up, __vsize>;
1541  const array<_V, __stores> __converted
1542  = __convert_all<_V, __stores>(__v);
1543  __execute_n_times<__full_stores>([&](auto __i) constexpr {
1544  _CommonImpl::_S_store(__converted[__i], __mem + __i * __vsize);
1545  });
1546  if constexpr (__full_stores < __stores)
1547  _CommonImpl::template _S_store<(_Np - __full_stores * __vsize)
1548  * sizeof(_Up)>(
1549  __converted[__full_stores], __mem + __full_stores * __vsize);
1550  }
1551  }
1552 
1553  // _S_masked_store_nocvt {{{2
1554  template <typename _Tp, size_t _Np>
1555  _GLIBCXX_SIMD_INTRINSIC static void
1556  _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem,
1557  _MaskMember<_Tp> __k)
1558  {
1559  _BitOps::_S_bit_iteration(
1560  _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr {
1561  __mem[__i] = __v[__i];
1562  });
1563  }
1564 
1565  // _S_masked_store {{{2
1566  template <typename _TW, typename _TVT = _VectorTraits<_TW>,
1567  typename _Tp = typename _TVT::value_type, typename _Up>
1568  static inline void
1569  _S_masked_store(const _TW __v, _Up* __mem, const _MaskMember<_Tp> __k)
1570  noexcept
1571  {
1572  constexpr size_t _TV_size = _S_size<_Tp>;
1573  [[maybe_unused]] const auto __vi = __to_intrin(__v);
1574  constexpr size_t __max_store_size
1575  = _SuperImpl::template _S_max_store_size<_Up>;
1576  if constexpr (
1577  is_same_v<
1578  _Tp,
1579  _Up> || (is_integral_v<_Tp> && is_integral_v<_Up> && sizeof(_Tp) == sizeof(_Up)))
1580  {
1581  // bitwise or no conversion, reinterpret:
1582  const _MaskMember<_Up> __kk = [&]() {
1583  if constexpr (__is_bitmask_v<decltype(__k)>)
1584  return _MaskMember<_Up>(__k._M_data);
1585  else
1586  return __wrapper_bitcast<__int_for_sizeof_t<_Up>>(__k);
1587  }();
1588  _SuperImpl::_S_masked_store_nocvt(__wrapper_bitcast<_Up>(__v),
1589  __mem, __kk);
1590  }
1591  else if constexpr (__vectorized_sizeof<_Up>() > sizeof(_Up)
1592  && !_CommonImpl::
1593  template __converts_via_decomposition_v<
1594  _Tp, _Up, __max_store_size>)
1595  { // conversion via decomposition is better handled via the
1596  // bit_iteration
1597  // fallback below
1598  constexpr size_t _UW_size
1599  = std::min(_TV_size, __max_store_size / sizeof(_Up));
1600  static_assert(_UW_size <= _TV_size);
1601  using _UW = _SimdWrapper<_Up, _UW_size>;
1602  using _UV = __vector_type_t<_Up, _UW_size>;
1603  using _UAbi = simd_abi::deduce_t<_Up, _UW_size>;
1604  if constexpr (_UW_size == _TV_size) // one convert+store
1605  {
1606  const _UW __converted = __convert<_UW>(__v);
1607  _SuperImpl::_S_masked_store_nocvt(
1608  __converted, __mem,
1609  _UAbi::_MaskImpl::template _S_convert<
1610  __int_for_sizeof_t<_Up>>(__k));
1611  }
1612  else
1613  {
1614  static_assert(_UW_size * sizeof(_Up) == __max_store_size);
1615  constexpr size_t _NFullStores = _TV_size / _UW_size;
1616  constexpr size_t _NAllStores
1617  = __div_roundup(_TV_size, _UW_size);
1618  constexpr size_t _NParts = _S_full_size<_Tp> / _UW_size;
1619  const array<_UV, _NAllStores> __converted
1620  = __convert_all<_UV, _NAllStores>(__v);
1621  __execute_n_times<_NFullStores>([&](auto __i) {
1622  _SuperImpl::_S_masked_store_nocvt(
1623  _UW(__converted[__i]), __mem + __i * _UW_size,
1624  _UAbi::_MaskImpl::template _S_convert<
1625  __int_for_sizeof_t<_Up>>(
1626  __extract_part<__i, _NParts>(__k.__as_full_vector())));
1627  });
1628  if constexpr (_NAllStores
1629  > _NFullStores) // one partial at the end
1630  _SuperImpl::_S_masked_store_nocvt(
1631  _UW(__converted[_NFullStores]),
1632  __mem + _NFullStores * _UW_size,
1633  _UAbi::_MaskImpl::template _S_convert<
1634  __int_for_sizeof_t<_Up>>(
1635  __extract_part<_NFullStores, _NParts>(
1636  __k.__as_full_vector())));
1637  }
1638  }
1639  else
1640  _BitOps::_S_bit_iteration(
1641  _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr {
1642  __mem[__i] = static_cast<_Up>(__v[__i]);
1643  });
1644  }
1645 
1646  // _S_complement {{{2
1647  template <typename _Tp, size_t _Np>
1648  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1649  _S_complement(_SimdWrapper<_Tp, _Np> __x) noexcept
1650  {
1651  if constexpr (is_floating_point_v<_Tp>)
1652  return __vector_bitcast<_Tp>(~__vector_bitcast<__int_for_sizeof_t<_Tp>>(__x));
1653  else
1654  return ~__x._M_data;
1655  }
1656 
1657  // _S_unary_minus {{{2
1658  template <typename _Tp, size_t _Np>
1659  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1660  _S_unary_minus(_SimdWrapper<_Tp, _Np> __x) noexcept
1661  {
1662  // GCC doesn't use the psign instructions, but pxor & psub seem to be
1663  // just as good a choice as pcmpeqd & psign. So meh.
1664  return -__x._M_data;
1665  }
1666 
1667  // arithmetic operators {{{2
1668  template <typename _Tp, size_t _Np>
1669  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1670  _S_plus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1671  { return __x._M_data + __y._M_data; }
1672 
1673  template <typename _Tp, size_t _Np>
1674  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1675  _S_minus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1676  { return __x._M_data - __y._M_data; }
1677 
1678  template <typename _Tp, size_t _Np>
1679  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1680  _S_multiplies(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1681  { return __x._M_data * __y._M_data; }
1682 
1683  template <typename _Tp, size_t _Np>
1684  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1685  _S_divides(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1686  {
1687  // Note that division by 0 is always UB, so we must ensure we avoid the
1688  // case for partial registers
1689  if constexpr (!_Abi::template _S_is_partial<_Tp>)
1690  return __x._M_data / __y._M_data;
1691  else
1692  return __x._M_data / _Abi::__make_padding_nonzero(__y._M_data);
1693  }
1694 
1695  template <typename _Tp, size_t _Np>
1696  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1697  _S_modulus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1698  {
1699  if constexpr (!_Abi::template _S_is_partial<_Tp>)
1700  return __x._M_data % __y._M_data;
1701  else
1702  return __as_vector(__x)
1703  % _Abi::__make_padding_nonzero(__as_vector(__y));
1704  }
1705 
1706  template <typename _Tp, size_t _Np>
1707  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1708  _S_bit_and(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1709  { return __and(__x, __y); }
1710 
1711  template <typename _Tp, size_t _Np>
1712  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1713  _S_bit_or(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1714  { return __or(__x, __y); }
1715 
1716  template <typename _Tp, size_t _Np>
1717  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1718  _S_bit_xor(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1719  { return __xor(__x, __y); }
1720 
1721  template <typename _Tp, size_t _Np>
1722  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
1723  _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1724  { return __x._M_data << __y._M_data; }
1725 
1726  template <typename _Tp, size_t _Np>
1727  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
1728  _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1729  { return __x._M_data >> __y._M_data; }
1730 
1731  template <typename _Tp, size_t _Np>
1732  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1733  _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y)
1734  { return __x._M_data << __y; }
1735 
1736  template <typename _Tp, size_t _Np>
1737  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1738  _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y)
1739  { return __x._M_data >> __y; }
1740 
1741  // compares {{{2
1742  // _S_equal_to {{{3
1743  template <typename _Tp, size_t _Np>
1744  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1745  _S_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1746  { return __x._M_data == __y._M_data; }
1747 
1748  // _S_not_equal_to {{{3
1749  template <typename _Tp, size_t _Np>
1750  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1751  _S_not_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1752  { return __x._M_data != __y._M_data; }
1753 
1754  // _S_less {{{3
1755  template <typename _Tp, size_t _Np>
1756  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1757  _S_less(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1758  { return __x._M_data < __y._M_data; }
1759 
1760  // _S_less_equal {{{3
1761  template <typename _Tp, size_t _Np>
1762  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1763  _S_less_equal(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1764  { return __x._M_data <= __y._M_data; }
1765 
1766  // _S_negate {{{2
1767  template <typename _Tp, size_t _Np>
1768  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1769  _S_negate(_SimdWrapper<_Tp, _Np> __x) noexcept
1770  { return !__x._M_data; }
1771 
1772  // _S_min, _S_max, _S_minmax {{{2
1773  template <typename _Tp, size_t _Np>
1774  _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1775  _SimdWrapper<_Tp, _Np>
1776  _S_min(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
1777  { return __a._M_data < __b._M_data ? __a._M_data : __b._M_data; }
1778 
1779  template <typename _Tp, size_t _Np>
1780  _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1781  _SimdWrapper<_Tp, _Np>
1782  _S_max(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
1783  { return __a._M_data > __b._M_data ? __a._M_data : __b._M_data; }
1784 
1785  template <typename _Tp, size_t _Np>
1786  _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1787  pair<_SimdWrapper<_Tp, _Np>, _SimdWrapper<_Tp, _Np>>
1788  _S_minmax(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
1789  {
1790  return {__a._M_data < __b._M_data ? __a._M_data : __b._M_data,
1791  __a._M_data < __b._M_data ? __b._M_data : __a._M_data};
1792  }
1793 
1794  // reductions {{{2
1795  template <size_t _Np, size_t... _Is, size_t... _Zeros, typename _Tp,
1796  typename _BinaryOperation>
1797  _GLIBCXX_SIMD_INTRINSIC static _Tp
1798  _S_reduce_partial(index_sequence<_Is...>, index_sequence<_Zeros...>,
1799  simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
1800  {
1801  using _V = __vector_type_t<_Tp, _Np / 2>;
1802  static_assert(sizeof(_V) <= sizeof(__x));
1803  // _S_full_size is the size of the smallest native SIMD register that
1804  // can store _Np/2 elements:
1805  using _FullSimd = __deduced_simd<_Tp, _VectorTraits<_V>::_S_full_size>;
1806  using _HalfSimd = __deduced_simd<_Tp, _Np / 2>;
1807  const auto __xx = __as_vector(__x);
1808  return _HalfSimd::abi_type::_SimdImpl::_S_reduce(
1809  static_cast<_HalfSimd>(__as_vector(__binary_op(
1810  static_cast<_FullSimd>(__intrin_bitcast<_V>(__xx)),
1811  static_cast<_FullSimd>(__intrin_bitcast<_V>(
1812  __vector_permute<(_Np / 2 + _Is)..., (int(_Zeros * 0) - 1)...>(
1813  __xx)))))),
1814  __binary_op);
1815  }
1816 
1817  template <typename _Tp, typename _BinaryOperation>
1818  _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
1819  _S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
1820  {
1821  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
1822  if constexpr (_Np == 1)
1823  return __x[0];
1824  else if constexpr (_Np == 2)
1825  return __binary_op(simd<_Tp, simd_abi::scalar>(__x[0]),
1826  simd<_Tp, simd_abi::scalar>(__x[1]))[0];
1827  else if constexpr (_Abi::template _S_is_partial<_Tp>) //{{{
1828  {
1829  [[maybe_unused]] constexpr auto __full_size
1830  = _Abi::template _S_full_size<_Tp>;
1831  if constexpr (_Np == 3)
1832  return __binary_op(
1833  __binary_op(simd<_Tp, simd_abi::scalar>(__x[0]),
1834  simd<_Tp, simd_abi::scalar>(__x[1])),
1835  simd<_Tp, simd_abi::scalar>(__x[2]))[0];
1836  else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>,
1837  plus<>>)
1838  {
1839  using _Ap = simd_abi::deduce_t<_Tp, __full_size>;
1840  return _Ap::_SimdImpl::_S_reduce(
1841  simd<_Tp, _Ap>(__private_init,
1842  _Abi::_S_masked(__as_vector(__x))),
1843  __binary_op);
1844  }
1845  else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>,
1846  multiplies<>>)
1847  {
1848  using _Ap = simd_abi::deduce_t<_Tp, __full_size>;
1849  using _TW = _SimdWrapper<_Tp, __full_size>;
1850  _GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask_full
1851  = _Abi::template _S_implicit_mask<_Tp>().__as_full_vector();
1852  _GLIBCXX_SIMD_USE_CONSTEXPR _TW __one
1853  = __vector_broadcast<__full_size>(_Tp(1));
1854  const _TW __x_full = __data(__x).__as_full_vector();
1855  const _TW __x_padded_with_ones
1856  = _Ap::_CommonImpl::_S_blend(__implicit_mask_full, __one,
1857  __x_full);
1858  return _Ap::_SimdImpl::_S_reduce(
1859  simd<_Tp, _Ap>(__private_init, __x_padded_with_ones),
1860  __binary_op);
1861  }
1862  else if constexpr (_Np & 1)
1863  {
1864  using _Ap = simd_abi::deduce_t<_Tp, _Np - 1>;
1865  return __binary_op(
1866  simd<_Tp, simd_abi::scalar>(_Ap::_SimdImpl::_S_reduce(
1867  simd<_Tp, _Ap>(
1868  __intrin_bitcast<__vector_type_t<_Tp, _Np - 1>>(
1869  __as_vector(__x))),
1870  __binary_op)),
1871  simd<_Tp, simd_abi::scalar>(__x[_Np - 1]))[0];
1872  }
1873  else
1874  return _S_reduce_partial<_Np>(
1875  make_index_sequence<_Np / 2>(),
1876  make_index_sequence<__full_size - _Np / 2>(), __x, __binary_op);
1877  } //}}}
1878  else if constexpr (sizeof(__x) == 16) //{{{
1879  {
1880  if constexpr (_Np == 16)
1881  {
1882  const auto __y = __data(__x);
1883  __x = __binary_op(
1884  _M_make_simd<_Tp, _Np>(
1885  __vector_permute<0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
1886  7, 7>(__y)),
1887  _M_make_simd<_Tp, _Np>(
1888  __vector_permute<8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1889  14, 14, 15, 15>(__y)));
1890  }
1891  if constexpr (_Np >= 8)
1892  {
1893  const auto __y = __vector_bitcast<short>(__data(__x));
1894  __x = __binary_op(
1895  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1896  __vector_permute<0, 0, 1, 1, 2, 2, 3, 3>(__y))),
1897  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1898  __vector_permute<4, 4, 5, 5, 6, 6, 7, 7>(__y))));
1899  }
1900  if constexpr (_Np >= 4)
1901  {
1902  using _Up = conditional_t<is_floating_point_v<_Tp>, float, int>;
1903  const auto __y = __vector_bitcast<_Up>(__data(__x));
1904  __x = __binary_op(__x,
1905  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1906  __vector_permute<3, 2, 1, 0>(__y))));
1907  }
1908  using _Up = conditional_t<is_floating_point_v<_Tp>, double, _LLong>;
1909  const auto __y = __vector_bitcast<_Up>(__data(__x));
1910  __x = __binary_op(__x, _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1911  __vector_permute<1, 1>(__y))));
1912  return __x[0];
1913  } //}}}
1914  else
1915  {
1916  static_assert(sizeof(__x) > __min_vector_size<_Tp>);
1917  static_assert((_Np & (_Np - 1)) == 0); // _Np must be a power of 2
1918  using _Ap = simd_abi::deduce_t<_Tp, _Np / 2>;
1919  using _V = simd<_Tp, _Ap>;
1920  return _Ap::_SimdImpl::_S_reduce(
1921  __binary_op(_V(__private_init, __extract<0, 2>(__as_vector(__x))),
1922  _V(__private_init,
1923  __extract<1, 2>(__as_vector(__x)))),
1924  static_cast<_BinaryOperation&&>(__binary_op));
1925  }
1926  }
1927 
1928  // math {{{2
1929  // frexp, modf and copysign implemented in simd_math.h
1930 #define _GLIBCXX_SIMD_MATH_FALLBACK(__name) \
1931  template <typename _Tp, typename... _More> \
1932  static _Tp _S_##__name(const _Tp& __x, const _More&... __more) \
1933  { \
1934  return __generate_vector<_Tp>( \
1935  [&](auto __i) { return __name(__x[__i], __more[__i]...); }); \
1936  }
1937 
1938 #define _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET(__name) \
1939  template <typename _Tp, typename... _More> \
1940  static typename _Tp::mask_type _S_##__name(const _Tp& __x, \
1941  const _More&... __more) \
1942  { \
1943  return __generate_vector<_Tp>( \
1944  [&](auto __i) { return __name(__x[__i], __more[__i]...); }); \
1945  }
1946 
1947 #define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name) \
1948  template <typename _Tp, typename... _More> \
1949  static auto _S_##__name(const _Tp& __x, const _More&... __more) \
1950  { \
1951  return __fixed_size_storage_t<_RetTp, \
1952  _VectorTraits<_Tp>::_S_partial_width>:: \
1953  _S_generate([&](auto __meta) constexpr { \
1954  return __meta._S_generator( \
1955  [&](auto __i) { \
1956  return __name(__x[__meta._S_offset + __i], \
1957  __more[__meta._S_offset + __i]...); \
1958  }, \
1959  static_cast<_RetTp*>(nullptr)); \
1960  }); \
1961  }
1962 
1963  _GLIBCXX_SIMD_MATH_FALLBACK(acos)
1964  _GLIBCXX_SIMD_MATH_FALLBACK(asin)
1965  _GLIBCXX_SIMD_MATH_FALLBACK(atan)
1966  _GLIBCXX_SIMD_MATH_FALLBACK(atan2)
1967  _GLIBCXX_SIMD_MATH_FALLBACK(cos)
1968  _GLIBCXX_SIMD_MATH_FALLBACK(sin)
1969  _GLIBCXX_SIMD_MATH_FALLBACK(tan)
1970  _GLIBCXX_SIMD_MATH_FALLBACK(acosh)
1971  _GLIBCXX_SIMD_MATH_FALLBACK(asinh)
1972  _GLIBCXX_SIMD_MATH_FALLBACK(atanh)
1973  _GLIBCXX_SIMD_MATH_FALLBACK(cosh)
1974  _GLIBCXX_SIMD_MATH_FALLBACK(sinh)
1975  _GLIBCXX_SIMD_MATH_FALLBACK(tanh)
1976  _GLIBCXX_SIMD_MATH_FALLBACK(exp)
1977  _GLIBCXX_SIMD_MATH_FALLBACK(exp2)
1978  _GLIBCXX_SIMD_MATH_FALLBACK(expm1)
1979  _GLIBCXX_SIMD_MATH_FALLBACK(ldexp)
1980  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(int, ilogb)
1981  _GLIBCXX_SIMD_MATH_FALLBACK(log)
1982  _GLIBCXX_SIMD_MATH_FALLBACK(log10)
1983  _GLIBCXX_SIMD_MATH_FALLBACK(log1p)
1984  _GLIBCXX_SIMD_MATH_FALLBACK(log2)
1985  _GLIBCXX_SIMD_MATH_FALLBACK(logb)
1986 
1987  // modf implemented in simd_math.h
1988  _GLIBCXX_SIMD_MATH_FALLBACK(scalbn)
1989  _GLIBCXX_SIMD_MATH_FALLBACK(scalbln)
1990  _GLIBCXX_SIMD_MATH_FALLBACK(cbrt)
1991  _GLIBCXX_SIMD_MATH_FALLBACK(fabs)
1992  _GLIBCXX_SIMD_MATH_FALLBACK(pow)
1993  _GLIBCXX_SIMD_MATH_FALLBACK(sqrt)
1994  _GLIBCXX_SIMD_MATH_FALLBACK(erf)
1995  _GLIBCXX_SIMD_MATH_FALLBACK(erfc)
1996  _GLIBCXX_SIMD_MATH_FALLBACK(lgamma)
1997  _GLIBCXX_SIMD_MATH_FALLBACK(tgamma)
1998 
1999  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lrint)
2000  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llrint)
2001 
2002  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lround)
2003  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llround)
2004 
2005  _GLIBCXX_SIMD_MATH_FALLBACK(fmod)
2006  _GLIBCXX_SIMD_MATH_FALLBACK(remainder)
2007 
2008  template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2009  static _Tp
2010  _S_remquo(const _Tp __x, const _Tp __y,
2011  __fixed_size_storage_t<int, _TVT::_S_partial_width>* __z)
2012  {
2013  return __generate_vector<_Tp>([&](auto __i) {
2014  int __tmp;
2015  auto __r = remquo(__x[__i], __y[__i], &__tmp);
2016  __z->_M_set(__i, __tmp);
2017  return __r;
2018  });
2019  }
2020 
2021  // copysign in simd_math.h
2022  _GLIBCXX_SIMD_MATH_FALLBACK(nextafter)
2023  _GLIBCXX_SIMD_MATH_FALLBACK(fdim)
2024  _GLIBCXX_SIMD_MATH_FALLBACK(fmax)
2025  _GLIBCXX_SIMD_MATH_FALLBACK(fmin)
2026  _GLIBCXX_SIMD_MATH_FALLBACK(fma)
2027 
2028  template <typename _Tp, size_t _Np>
2029  static constexpr _MaskMember<_Tp>
2030  _S_isgreater(_SimdWrapper<_Tp, _Np> __x,
2031  _SimdWrapper<_Tp, _Np> __y) noexcept
2032  {
2033  using _Ip = __int_for_sizeof_t<_Tp>;
2034  const auto __xn = __vector_bitcast<_Ip>(__x);
2035  const auto __yn = __vector_bitcast<_Ip>(__y);
2036  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2037  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2038  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2039  __xp > __yp);
2040  }
2041 
2042  template <typename _Tp, size_t _Np>
2043  static constexpr _MaskMember<_Tp>
2044  _S_isgreaterequal(_SimdWrapper<_Tp, _Np> __x,
2045  _SimdWrapper<_Tp, _Np> __y) noexcept
2046  {
2047  using _Ip = __int_for_sizeof_t<_Tp>;
2048  const auto __xn = __vector_bitcast<_Ip>(__x);
2049  const auto __yn = __vector_bitcast<_Ip>(__y);
2050  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2051  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2052  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2053  __xp >= __yp);
2054  }
2055 
2056  template <typename _Tp, size_t _Np>
2057  static constexpr _MaskMember<_Tp>
2058  _S_isless(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) noexcept
2059  {
2060  using _Ip = __int_for_sizeof_t<_Tp>;
2061  const auto __xn = __vector_bitcast<_Ip>(__x);
2062  const auto __yn = __vector_bitcast<_Ip>(__y);
2063  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2064  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2065  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2066  __xp < __yp);
2067  }
2068 
2069  template <typename _Tp, size_t _Np>
2070  static constexpr _MaskMember<_Tp>
2071  _S_islessequal(_SimdWrapper<_Tp, _Np> __x,
2072  _SimdWrapper<_Tp, _Np> __y) noexcept
2073  {
2074  using _Ip = __int_for_sizeof_t<_Tp>;
2075  const auto __xn = __vector_bitcast<_Ip>(__x);
2076  const auto __yn = __vector_bitcast<_Ip>(__y);
2077  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2078  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2079  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2080  __xp <= __yp);
2081  }
2082 
2083  template <typename _Tp, size_t _Np>
2084  static constexpr _MaskMember<_Tp>
2085  _S_islessgreater(_SimdWrapper<_Tp, _Np> __x,
2086  _SimdWrapper<_Tp, _Np> __y) noexcept
2087  {
2088  return __andnot(_SuperImpl::_S_isunordered(__x, __y),
2089  _SuperImpl::_S_not_equal_to(__x, __y));
2090  }
2091 
2092 #undef _GLIBCXX_SIMD_MATH_FALLBACK
2093 #undef _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET
2094 #undef _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET
2095  // _S_abs {{{3
2096  template <typename _Tp, size_t _Np>
2097  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2098  _S_abs(_SimdWrapper<_Tp, _Np> __x) noexcept
2099  {
2100  // if (__builtin_is_constant_evaluated())
2101  // {
2102  // return __x._M_data < 0 ? -__x._M_data : __x._M_data;
2103  // }
2104  if constexpr (is_floating_point_v<_Tp>)
2105  // `v < 0 ? -v : v` cannot compile to the efficient implementation of
2106  // masking the signbit off because it must consider v == -0
2107 
2108  // ~(-0.) & v would be easy, but breaks with fno-signed-zeros
2109  return __and(_S_absmask<__vector_type_t<_Tp, _Np>>, __x._M_data);
2110  else
2111  return __x._M_data < 0 ? -__x._M_data : __x._M_data;
2112  }
2113 
2114  // }}}3
2115  // _S_plus_minus {{{
2116  // Returns __x + __y - __y without -fassociative-math optimizing to __x.
2117  // - _TV must be __vector_type_t<floating-point type, N>.
2118  // - _UV must be _TV or floating-point type.
2119  template <typename _TV, typename _UV>
2120  _GLIBCXX_SIMD_INTRINSIC static constexpr _TV _S_plus_minus(_TV __x,
2121  _UV __y) noexcept
2122  {
2123  #if defined __i386__ && !defined __SSE_MATH__
2124  if constexpr (sizeof(__x) == 8)
2125  { // operations on __x would use the FPU
2126  static_assert(is_same_v<_TV, __vector_type_t<float, 2>>);
2127  const auto __x4 = __vector_bitcast<float, 4>(__x);
2128  if constexpr (is_same_v<_TV, _UV>)
2129  return __vector_bitcast<float, 2>(
2130  _S_plus_minus(__x4, __vector_bitcast<float, 4>(__y)));
2131  else
2132  return __vector_bitcast<float, 2>(_S_plus_minus(__x4, __y));
2133  }
2134  #endif
2135  #if !defined __clang__ && __GCC_IEC_559 == 0
2136  if (__builtin_is_constant_evaluated()
2137  || (__builtin_constant_p(__x) && __builtin_constant_p(__y)))
2138  return (__x + __y) - __y;
2139  else
2140  return [&] {
2141  __x += __y;
2142  if constexpr(__have_sse)
2143  {
2144  if constexpr (sizeof(__x) >= 16)
2145  asm("" : "+x"(__x));
2146  else if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>)
2147  asm("" : "+x"(__x[0]), "+x"(__x[1]));
2148  else
2149  __assert_unreachable<_TV>();
2150  }
2151  else if constexpr(__have_neon)
2152  asm("" : "+w"(__x));
2153  else if constexpr (__have_power_vmx)
2154  {
2155  if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>)
2156  asm("" : "+fgr"(__x[0]), "+fgr"(__x[1]));
2157  else
2158  asm("" : "+v"(__x));
2159  }
2160  else
2161  asm("" : "+g"(__x));
2162  return __x - __y;
2163  }();
2164  #else
2165  return (__x + __y) - __y;
2166  #endif
2167  }
2168 
2169  // }}}
2170  // _S_nearbyint {{{3
2171  template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2172  _GLIBCXX_SIMD_INTRINSIC static _Tp _S_nearbyint(_Tp __x_) noexcept
2173  {
2174  using value_type = typename _TVT::value_type;
2175  using _V = typename _TVT::type;
2176  const _V __x = __x_;
2177  const _V __absx = __and(__x, _S_absmask<_V>);
2178  static_assert(__CHAR_BIT__ * sizeof(1ull) >= __digits_v<value_type>);
2179  _GLIBCXX_SIMD_USE_CONSTEXPR _V __shifter_abs
2180  = _V() + (1ull << (__digits_v<value_type> - 1));
2181  const _V __shifter = __or(__and(_S_signmask<_V>, __x), __shifter_abs);
2182  const _V __shifted = _S_plus_minus(__x, __shifter);
2183  return __absx < __shifter_abs ? __shifted : __x;
2184  }
2185 
2186  // _S_rint {{{3
2187  template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2188  _GLIBCXX_SIMD_INTRINSIC static _Tp _S_rint(_Tp __x) noexcept
2189  {
2190  return _SuperImpl::_S_nearbyint(__x);
2191  }
2192 
2193  // _S_trunc {{{3
2194  template <typename _Tp, size_t _Np>
2195  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2196  _S_trunc(_SimdWrapper<_Tp, _Np> __x)
2197  {
2198  using _V = __vector_type_t<_Tp, _Np>;
2199  const _V __absx = __and(__x._M_data, _S_absmask<_V>);
2200  static_assert(__CHAR_BIT__ * sizeof(1ull) >= __digits_v<_Tp>);
2201  constexpr _Tp __shifter = 1ull << (__digits_v<_Tp> - 1);
2202  _V __truncated = _S_plus_minus(__absx, __shifter);
2203  __truncated -= __truncated > __absx ? _V() + 1 : _V();
2204  return __absx < __shifter ? __or(__xor(__absx, __x._M_data), __truncated)
2205  : __x._M_data;
2206  }
2207 
2208  // _S_round {{{3
2209  template <typename _Tp, size_t _Np>
2210  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2211  _S_round(_SimdWrapper<_Tp, _Np> __x)
2212  {
2213  const auto __abs_x = _SuperImpl::_S_abs(__x);
2214  const auto __t_abs = _SuperImpl::_S_trunc(__abs_x)._M_data;
2215  const auto __r_abs // round(abs(x)) =
2216  = __t_abs + (__abs_x._M_data - __t_abs >= _Tp(.5) ? _Tp(1) : 0);
2217  return __or(__xor(__abs_x._M_data, __x._M_data), __r_abs);
2218  }
2219 
2220  // _S_floor {{{3
2221  template <typename _Tp, size_t _Np>
2222  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2223  _S_floor(_SimdWrapper<_Tp, _Np> __x)
2224  {
2225  const auto __y = _SuperImpl::_S_trunc(__x)._M_data;
2226  const auto __negative_input
2227  = __vector_bitcast<_Tp>(__x._M_data < __vector_broadcast<_Np, _Tp>(0));
2228  const auto __mask
2229  = __andnot(__vector_bitcast<_Tp>(__y == __x._M_data), __negative_input);
2230  return __or(__andnot(__mask, __y),
2231  __and(__mask, __y - __vector_broadcast<_Np, _Tp>(1)));
2232  }
2233 
2234  // _S_ceil {{{3
2235  template <typename _Tp, size_t _Np>
2236  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2237  _S_ceil(_SimdWrapper<_Tp, _Np> __x)
2238  {
2239  const auto __y = _SuperImpl::_S_trunc(__x)._M_data;
2240  const auto __negative_input
2241  = __vector_bitcast<_Tp>(__x._M_data < __vector_broadcast<_Np, _Tp>(0));
2242  const auto __inv_mask
2243  = __or(__vector_bitcast<_Tp>(__y == __x._M_data), __negative_input);
2244  return __or(__and(__inv_mask, __y),
2245  __andnot(__inv_mask, __y + __vector_broadcast<_Np, _Tp>(1)));
2246  }
2247 
2248  // _S_isnan {{{3
2249  template <typename _Tp, size_t _Np>
2250  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2251  _S_isnan([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
2252  {
2253  #if __FINITE_MATH_ONLY__
2254  return {}; // false
2255  #elif !defined __SUPPORT_SNAN__
2256  return ~(__x._M_data == __x._M_data);
2257  #elif defined __STDC_IEC_559__
2258  using _Ip = __int_for_sizeof_t<_Tp>;
2259  const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
2260  const auto __infn
2261  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__infinity_v<_Tp>));
2262  return __infn < __absn;
2263  #else
2264  #error "Not implemented: how to support SNaN but non-IEC559 floating-point?"
2265  #endif
2266  }
2267 
2268  // _S_isfinite {{{3
2269  template <typename _Tp, size_t _Np>
2270  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2271  _S_isfinite([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
2272  {
2273  #if __FINITE_MATH_ONLY__
2274  using _UV = typename _MaskMember<_Tp>::_BuiltinType;
2275  _GLIBCXX_SIMD_USE_CONSTEXPR _UV __alltrue = ~_UV();
2276  return __alltrue;
2277  #else
2278  // if all exponent bits are set, __x is either inf or NaN
2279  using _Ip = __int_for_sizeof_t<_Tp>;
2280  const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
2281  const auto __maxn
2282  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__finite_max_v<_Tp>));
2283  return __absn <= __maxn;
2284  #endif
2285  }
2286 
2287  // _S_isunordered {{{3
2288  template <typename _Tp, size_t _Np>
2289  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2290  _S_isunordered(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
2291  {
2292  return __or(_S_isnan(__x), _S_isnan(__y));
2293  }
2294 
2295  // _S_signbit {{{3
2296  template <typename _Tp, size_t _Np>
2297  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2298  _S_signbit(_SimdWrapper<_Tp, _Np> __x)
2299  {
2300  using _Ip = __int_for_sizeof_t<_Tp>;
2301  return __vector_bitcast<_Ip>(__x) < 0;
2302  // Arithmetic right shift (SRA) would also work (instead of compare), but
2303  // 64-bit SRA isn't available on x86 before AVX512. And in general,
2304  // compares are more likely to be efficient than SRA.
2305  }
2306 
2307  // _S_isinf {{{3
2308  template <typename _Tp, size_t _Np>
2309  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2310  _S_isinf([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
2311  {
2312  #if __FINITE_MATH_ONLY__
2313  return {}; // false
2314  #else
2315  return _SuperImpl::template _S_equal_to<_Tp, _Np>(_SuperImpl::_S_abs(__x),
2316  __vector_broadcast<_Np>(
2317  __infinity_v<_Tp>));
2318  // alternative:
2319  // compare to inf using the corresponding integer type
2320  /*
2321  return
2322  __vector_bitcast<_Tp>(__vector_bitcast<__int_for_sizeof_t<_Tp>>(
2323  _S_abs(__x)._M_data)
2324  ==
2325  __vector_bitcast<__int_for_sizeof_t<_Tp>>(__vector_broadcast<_Np>(
2326  __infinity_v<_Tp>)));
2327  */
2328  #endif
2329  }
2330 
2331  // _S_isnormal {{{3
2332  template <typename _Tp, size_t _Np>
2333  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2334  _S_isnormal(_SimdWrapper<_Tp, _Np> __x)
2335  {
2336  using _Ip = __int_for_sizeof_t<_Tp>;
2337  const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
2338  const auto __minn
2339  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__norm_min_v<_Tp>));
2340  #if __FINITE_MATH_ONLY__
2341  return __absn >= __minn;
2342  #else
2343  const auto __maxn
2344  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__finite_max_v<_Tp>));
2345  return __minn <= __absn && __absn <= __maxn;
2346  #endif
2347  }
2348 
2349  // _S_fpclassify {{{3
2350  template <typename _Tp, size_t _Np>
2351  _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t<int, _Np>
2352  _S_fpclassify(_SimdWrapper<_Tp, _Np> __x)
2353  {
2354  using _I = __int_for_sizeof_t<_Tp>;
2355  const auto __xn
2356  = __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x)));
2357  constexpr size_t _NI = sizeof(__xn) / sizeof(_I);
2358  _GLIBCXX_SIMD_USE_CONSTEXPR auto __minn
2359  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>));
2360  _GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
2361  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
2362 
2363  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal
2364  = __vector_broadcast<_NI, _I>(FP_NORMAL);
2365  #if !__FINITE_MATH_ONLY__
2366  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan
2367  = __vector_broadcast<_NI, _I>(FP_NAN);
2368  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite
2369  = __vector_broadcast<_NI, _I>(FP_INFINITE);
2370  #endif
2371  #ifndef __FAST_MATH__
2372  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal
2373  = __vector_broadcast<_NI, _I>(FP_SUBNORMAL);
2374  #endif
2375  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero
2376  = __vector_broadcast<_NI, _I>(FP_ZERO);
2377 
2378  __vector_type_t<_I, _NI>
2379  __tmp = __xn < __minn
2380  #ifdef __FAST_MATH__
2381  ? __fp_zero
2382  #else
2383  ? (__xn == 0 ? __fp_zero : __fp_subnormal)
2384  #endif
2385  #if __FINITE_MATH_ONLY__
2386  : __fp_normal;
2387  #else
2388  : (__xn < __infn ? __fp_normal
2389  : (__xn == __infn ? __fp_infinite : __fp_nan));
2390  #endif
2391 
2392  if constexpr (sizeof(_I) == sizeof(int))
2393  {
2394  using _FixedInt = __fixed_size_storage_t<int, _Np>;
2395  const auto __as_int = __vector_bitcast<int, _Np>(__tmp);
2396  if constexpr (_FixedInt::_S_tuple_size == 1)
2397  return {__as_int};
2398  else if constexpr (_FixedInt::_S_tuple_size == 2
2399  && is_same_v<
2400  typename _FixedInt::_SecondType::_FirstAbi,
2401  simd_abi::scalar>)
2402  return {__extract<0, 2>(__as_int), __as_int[_Np - 1]};
2403  else if constexpr (_FixedInt::_S_tuple_size == 2)
2404  return {__extract<0, 2>(__as_int),
2405  __auto_bitcast(__extract<1, 2>(__as_int))};
2406  else
2407  __assert_unreachable<_Tp>();
2408  }
2409  else if constexpr (_Np == 2 && sizeof(_I) == 8
2410  && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 2)
2411  {
2412  const auto __aslong = __vector_bitcast<_LLong>(__tmp);
2413  return {int(__aslong[0]), {int(__aslong[1])}};
2414  }
2415  #if _GLIBCXX_SIMD_X86INTRIN
2416  else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32
2417  && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
2418  return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)),
2419  __to_intrin(__hi128(__tmp)))};
2420  else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64
2421  && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
2422  return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))};
2423  #endif // _GLIBCXX_SIMD_X86INTRIN
2424  else if constexpr (__fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
2425  return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp),
2426  [](auto... __l) {
2427  return __make_wrapper<int>(__l...);
2428  })};
2429  else
2430  __assert_unreachable<_Tp>();
2431  }
2432 
2433  // _S_increment & _S_decrement{{{2
2434  template <typename _Tp, size_t _Np>
2435  _GLIBCXX_SIMD_INTRINSIC static void
2436  _S_increment(_SimdWrapper<_Tp, _Np>& __x)
2437  { __x = __x._M_data + 1; }
2438 
2439  template <typename _Tp, size_t _Np>
2440  _GLIBCXX_SIMD_INTRINSIC static void
2441  _S_decrement(_SimdWrapper<_Tp, _Np>& __x)
2442  { __x = __x._M_data - 1; }
2443 
2444  // smart_reference access {{{2
2445  template <typename _Tp, size_t _Np, typename _Up>
2446  _GLIBCXX_SIMD_INTRINSIC constexpr static void
2447  _S_set(_SimdWrapper<_Tp, _Np>& __v, int __i, _Up&& __x) noexcept
2448  { __v._M_set(__i, static_cast<_Up&&>(__x)); }
2449 
2450  // _S_masked_assign{{{2
2451  template <typename _Tp, typename _K, size_t _Np>
2452  _GLIBCXX_SIMD_INTRINSIC static void
2453  _S_masked_assign(_SimdWrapper<_K, _Np> __k, _SimdWrapper<_Tp, _Np>& __lhs,
2454  __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs)
2455  {
2456  if (__k._M_is_constprop_none_of())
2457  return;
2458  else if (__k._M_is_constprop_all_of())
2459  __lhs = __rhs;
2460  else
2461  __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs);
2462  }
2463 
2464  template <typename _Tp, typename _K, size_t _Np>
2465  _GLIBCXX_SIMD_INTRINSIC static void
2466  _S_masked_assign(_SimdWrapper<_K, _Np> __k, _SimdWrapper<_Tp, _Np>& __lhs,
2467  __type_identity_t<_Tp> __rhs)
2468  {
2469  if (__k._M_is_constprop_none_of())
2470  return;
2471  else if (__k._M_is_constprop_all_of())
2472  __lhs = __vector_broadcast<_Np>(__rhs);
2473  else if (__builtin_constant_p(__rhs) && __rhs == 0)
2474  {
2475  if constexpr (!is_same_v<bool, _K>)
2476  // the __andnot optimization only makes sense if __k._M_data is a
2477  // vector register
2478  __lhs._M_data
2479  = __andnot(__vector_bitcast<_Tp>(__k), __lhs._M_data);
2480  else
2481  // for AVX512/__mmask, a _mm512_maskz_mov is best
2482  __lhs
2483  = _CommonImpl::_S_blend(__k, __lhs, _SimdWrapper<_Tp, _Np>());
2484  }
2485  else
2486  __lhs = _CommonImpl::_S_blend(__k, __lhs,
2487  _SimdWrapper<_Tp, _Np>(
2488  __vector_broadcast<_Np>(__rhs)));
2489  }
2490 
2491  // _S_masked_cassign {{{2
2492  template <typename _Op, typename _Tp, typename _K, size_t _Np>
2493  _GLIBCXX_SIMD_INTRINSIC static void
2494  _S_masked_cassign(const _SimdWrapper<_K, _Np> __k,
2495  _SimdWrapper<_Tp, _Np>& __lhs,
2496  const __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs,
2497  _Op __op)
2498  {
2499  if (__k._M_is_constprop_none_of())
2500  return;
2501  else if (__k._M_is_constprop_all_of())
2502  __lhs = __op(_SuperImpl{}, __lhs, __rhs);
2503  else
2504  __lhs = _CommonImpl::_S_blend(__k, __lhs,
2505  __op(_SuperImpl{}, __lhs, __rhs));
2506  }
2507 
2508  template <typename _Op, typename _Tp, typename _K, size_t _Np>
2509  _GLIBCXX_SIMD_INTRINSIC static void
2510  _S_masked_cassign(const _SimdWrapper<_K, _Np> __k,
2511  _SimdWrapper<_Tp, _Np>& __lhs,
2512  const __type_identity_t<_Tp> __rhs, _Op __op)
2513  { _S_masked_cassign(__k, __lhs, __vector_broadcast<_Np>(__rhs), __op); }
2514 
2515  // _S_masked_unary {{{2
2516  template <template <typename> class _Op, typename _Tp, typename _K,
2517  size_t _Np>
2518  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2519  _S_masked_unary(const _SimdWrapper<_K, _Np> __k,
2520  const _SimdWrapper<_Tp, _Np> __v)
2521  {
2522  if (__k._M_is_constprop_none_of())
2523  return __v;
2524  auto __vv = _M_make_simd(__v);
2525  _Op<decltype(__vv)> __op;
2526  if (__k._M_is_constprop_all_of())
2527  return __data(__op(__vv));
2528  else
2529  return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv)));
2530  }
2531 
2532  //}}}2
2533  };
2534 
2535 // _MaskImplBuiltinMixin {{{1
2536 struct _MaskImplBuiltinMixin
2537 {
2538  template <typename _Tp>
2539  using _TypeTag = _Tp*;
2540 
2541  // _S_to_maskvector {{{
2542  template <typename _Up, size_t _ToN = 1>
2543  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
2544  _S_to_maskvector(bool __x)
2545  {
2546  static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
2547  return __x ? __vector_type_t<_Up, _ToN>{~_Up()}
2548  : __vector_type_t<_Up, _ToN>{};
2549  }
2550 
2551  template <typename _Up, size_t _UpN = 0, size_t _Np, bool _Sanitized,
2552  size_t _ToN = _UpN == 0 ? _Np : _UpN>
2553  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
2554  _S_to_maskvector(_BitMask<_Np, _Sanitized> __x)
2555  {
2556  static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
2557  return __generate_vector<__vector_type_t<_Up, _ToN>>([&](
2558  auto __i) constexpr {
2559  if constexpr (__i < _Np)
2560  return __x[__i] ? ~_Up() : _Up();
2561  else
2562  return _Up();
2563  });
2564  }
2565 
2566  template <typename _Up, size_t _UpN = 0, typename _Tp, size_t _Np,
2567  size_t _ToN = _UpN == 0 ? _Np : _UpN>
2568  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
2569  _S_to_maskvector(_SimdWrapper<_Tp, _Np> __x)
2570  {
2571  static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
2572  using _TW = _SimdWrapper<_Tp, _Np>;
2573  using _UW = _SimdWrapper<_Up, _ToN>;
2574  if constexpr (sizeof(_Up) == sizeof(_Tp) && sizeof(_TW) == sizeof(_UW))
2575  return __wrapper_bitcast<_Up, _ToN>(__x);
2576  else if constexpr (is_same_v<_Tp, bool>) // bits -> vector
2577  return _S_to_maskvector<_Up, _ToN>(_BitMask<_Np>(__x._M_data));
2578  else
2579  { // vector -> vector
2580  /*
2581  [[maybe_unused]] const auto __y = __vector_bitcast<_Up>(__x._M_data);
2582  if constexpr (sizeof(_Tp) == 8 && sizeof(_Up) == 4 && sizeof(__y) ==
2583  16) return __vector_permute<1, 3, -1, -1>(__y); else if constexpr
2584  (sizeof(_Tp) == 4 && sizeof(_Up) == 2
2585  && sizeof(__y) == 16)
2586  return __vector_permute<1, 3, 5, 7, -1, -1, -1, -1>(__y);
2587  else if constexpr (sizeof(_Tp) == 8 && sizeof(_Up) == 2
2588  && sizeof(__y) == 16)
2589  return __vector_permute<3, 7, -1, -1, -1, -1, -1, -1>(__y);
2590  else if constexpr (sizeof(_Tp) == 2 && sizeof(_Up) == 1
2591  && sizeof(__y) == 16)
2592  return __vector_permute<1, 3, 5, 7, 9, 11, 13, 15, -1, -1, -1, -1,
2593  -1, -1, -1, -1>(__y); else if constexpr (sizeof(_Tp) == 4 &&
2594  sizeof(_Up) == 1
2595  && sizeof(__y) == 16)
2596  return __vector_permute<3, 7, 11, 15, -1, -1, -1, -1, -1, -1, -1,
2597  -1, -1, -1, -1, -1>(__y); else if constexpr (sizeof(_Tp) == 8 &&
2598  sizeof(_Up) == 1
2599  && sizeof(__y) == 16)
2600  return __vector_permute<7, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
2601  -1, -1, -1, -1, -1>(__y); else
2602  */
2603  {
2604  return __generate_vector<__vector_type_t<_Up, _ToN>>([&](
2605  auto __i) constexpr {
2606  if constexpr (__i < _Np)
2607  return _Up(__x[__i.value]);
2608  else
2609  return _Up();
2610  });
2611  }
2612  }
2613  }
2614 
2615  // }}}
2616  // _S_to_bits {{{
2617  template <typename _Tp, size_t _Np>
2618  _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
2619  _S_to_bits(_SimdWrapper<_Tp, _Np> __x)
2620  {
2621  static_assert(!is_same_v<_Tp, bool>);
2622  static_assert(_Np <= __CHAR_BIT__ * sizeof(_ULLong));
2623  using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2624  const auto __bools
2625  = __vector_bitcast<_Up>(__x) >> (sizeof(_Up) * __CHAR_BIT__ - 1);
2626  _ULLong __r = 0;
2627  __execute_n_times<_Np>(
2628  [&](auto __i) { __r |= _ULLong(__bools[__i.value]) << __i; });
2629  return __r;
2630  }
2631 
2632  // }}}
2633 };
2634 
2635 // _MaskImplBuiltin {{{1
2636 template <typename _Abi, typename>
2637  struct _MaskImplBuiltin : _MaskImplBuiltinMixin
2638  {
2639  using _MaskImplBuiltinMixin::_S_to_bits;
2640  using _MaskImplBuiltinMixin::_S_to_maskvector;
2641 
2642  // member types {{{
2643  template <typename _Tp>
2644  using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember;
2645 
2646  template <typename _Tp>
2647  using _MaskMember = typename _Abi::template _MaskMember<_Tp>;
2648 
2649  using _SuperImpl = typename _Abi::_MaskImpl;
2650  using _CommonImpl = typename _Abi::_CommonImpl;
2651 
2652  template <typename _Tp>
2653  static constexpr size_t _S_size = simd_size_v<_Tp, _Abi>;
2654 
2655  // }}}
2656  // _S_broadcast {{{
2657  template <typename _Tp>
2658  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
2659  _S_broadcast(bool __x)
2660  {
2661  return __x ? _Abi::template _S_implicit_mask<_Tp>()
2662  : _MaskMember<_Tp>();
2663  }
2664 
2665  // }}}
2666  // _S_load {{{
2667  template <typename _Tp>
2668  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
2669  _S_load(const bool* __mem)
2670  {
2671  using _I = __int_for_sizeof_t<_Tp>;
2672  if constexpr (sizeof(_Tp) == sizeof(bool))
2673  {
2674  const auto __bools
2675  = _CommonImpl::template _S_load<_I, _S_size<_Tp>>(__mem);
2676  // bool is {0, 1}, everything else is UB
2677  return __bools > 0;
2678  }
2679  else
2680  return __generate_vector<_I, _S_size<_Tp>>([&](auto __i) constexpr {
2681  return __mem[__i] ? ~_I() : _I();
2682  });
2683  }
2684 
2685  // }}}
2686  // _S_convert {{{
2687  template <typename _Tp, size_t _Np, bool _Sanitized>
2688  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2689  _S_convert(_BitMask<_Np, _Sanitized> __x)
2690  {
2691  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2692  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(__x._M_to_bits());
2693  else
2694  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2695  _S_size<_Tp>>(
2696  __x._M_sanitized());
2697  }
2698 
2699  template <typename _Tp, size_t _Np>
2700  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2701  _S_convert(_SimdWrapper<bool, _Np> __x)
2702  {
2703  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2704  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(__x._M_data);
2705  else
2706  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2707  _S_size<_Tp>>(
2708  _BitMask<_Np>(__x._M_data)._M_sanitized());
2709  }
2710 
2711  template <typename _Tp, typename _Up, size_t _Np>
2712  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2713  _S_convert(_SimdWrapper<_Up, _Np> __x)
2714  {
2715  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2716  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(
2717  _SuperImpl::_S_to_bits(__x));
2718  else
2719  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2720  _S_size<_Tp>>(__x);
2721  }
2722 
2723  template <typename _Tp, typename _Up, typename _UAbi>
2724  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2725  _S_convert(simd_mask<_Up, _UAbi> __x)
2726  {
2727  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2728  {
2729  using _R = _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>;
2730  if constexpr (__is_builtin_bitmask_abi<_UAbi>()) // bits -> bits
2731  return _R(__data(__x));
2732  else if constexpr (__is_scalar_abi<_UAbi>()) // bool -> bits
2733  return _R(__data(__x));
2734  else if constexpr (__is_fixed_size_abi_v<_UAbi>) // bitset -> bits
2735  return _R(__data(__x)._M_to_bits());
2736  else // vector -> bits
2737  return _R(_UAbi::_MaskImpl::_S_to_bits(__data(__x))._M_to_bits());
2738  }
2739  else
2740  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2741  _S_size<_Tp>>(
2742  __data(__x));
2743  }
2744 
2745  // }}}
2746  // _S_masked_load {{{2
2747  template <typename _Tp, size_t _Np>
2748  static inline _SimdWrapper<_Tp, _Np>
2749  _S_masked_load(_SimdWrapper<_Tp, _Np> __merge,
2750  _SimdWrapper<_Tp, _Np> __mask, const bool* __mem) noexcept
2751  {
2752  // AVX(2) has 32/64 bit maskload, but nothing at 8 bit granularity
2753  auto __tmp = __wrapper_bitcast<__int_for_sizeof_t<_Tp>>(__merge);
2754  _BitOps::_S_bit_iteration(_SuperImpl::_S_to_bits(__mask),
2755  [&](auto __i) {
2756  __tmp._M_set(__i, -__mem[__i]);
2757  });
2758  __merge = __wrapper_bitcast<_Tp>(__tmp);
2759  return __merge;
2760  }
2761 
2762  // _S_store {{{2
2763  template <typename _Tp, size_t _Np>
2764  _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __v,
2765  bool* __mem) noexcept
2766  {
2767  __execute_n_times<_Np>([&](auto __i) constexpr {
2768  __mem[__i] = __v[__i];
2769  });
2770  }
2771 
2772  // _S_masked_store {{{2
2773  template <typename _Tp, size_t _Np>
2774  static inline void
2775  _S_masked_store(const _SimdWrapper<_Tp, _Np> __v, bool* __mem,
2776  const _SimdWrapper<_Tp, _Np> __k) noexcept
2777  {
2778  _BitOps::_S_bit_iteration(
2779  _SuperImpl::_S_to_bits(__k), [&](auto __i) constexpr {
2780  __mem[__i] = __v[__i];
2781  });
2782  }
2783 
2784  // _S_from_bitmask{{{2
2785  template <size_t _Np, typename _Tp>
2786  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2787  _S_from_bitmask(_SanitizedBitMask<_Np> __bits, _TypeTag<_Tp>)
2788  {
2789  return _SuperImpl::template _S_to_maskvector<_Tp, _S_size<_Tp>>(__bits);
2790  }
2791 
2792  // logical and bitwise operators {{{2
2793  template <typename _Tp, size_t _Np>
2794  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2795  _S_logical_and(const _SimdWrapper<_Tp, _Np>& __x,
2796  const _SimdWrapper<_Tp, _Np>& __y)
2797  { return __and(__x._M_data, __y._M_data); }
2798 
2799  template <typename _Tp, size_t _Np>
2800  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2801  _S_logical_or(const _SimdWrapper<_Tp, _Np>& __x,
2802  const _SimdWrapper<_Tp, _Np>& __y)
2803  { return __or(__x._M_data, __y._M_data); }
2804 
2805  template <typename _Tp, size_t _Np>
2806  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2807  _S_bit_not(const _SimdWrapper<_Tp, _Np>& __x)
2808  {
2809  if constexpr (_Abi::template _S_is_partial<_Tp>)
2810  return __andnot(__x, __wrapper_bitcast<_Tp>(
2811  _Abi::template _S_implicit_mask<_Tp>()));
2812  else
2813  return __not(__x._M_data);
2814  }
2815 
2816  template <typename _Tp, size_t _Np>
2817  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2818  _S_bit_and(const _SimdWrapper<_Tp, _Np>& __x,
2819  const _SimdWrapper<_Tp, _Np>& __y)
2820  { return __and(__x._M_data, __y._M_data); }
2821 
2822  template <typename _Tp, size_t _Np>
2823  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2824  _S_bit_or(const _SimdWrapper<_Tp, _Np>& __x,
2825  const _SimdWrapper<_Tp, _Np>& __y)
2826  { return __or(__x._M_data, __y._M_data); }
2827 
2828  template <typename _Tp, size_t _Np>
2829  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2830  _S_bit_xor(const _SimdWrapper<_Tp, _Np>& __x,
2831  const _SimdWrapper<_Tp, _Np>& __y)
2832  { return __xor(__x._M_data, __y._M_data); }
2833 
2834  // smart_reference access {{{2
2835  template <typename _Tp, size_t _Np>
2836  static constexpr void _S_set(_SimdWrapper<_Tp, _Np>& __k, int __i,
2837  bool __x) noexcept
2838  {
2839  if constexpr (is_same_v<_Tp, bool>)
2840  __k._M_set(__i, __x);
2841  else
2842  {
2843  static_assert(is_same_v<_Tp, __int_for_sizeof_t<_Tp>>);
2844  if (__builtin_is_constant_evaluated())
2845  {
2846  __k = __generate_from_n_evaluations<_Np,
2847  __vector_type_t<_Tp, _Np>>(
2848  [&](auto __j) {
2849  if (__i == __j)
2850  return _Tp(-__x);
2851  else
2852  return __k[+__j];
2853  });
2854  }
2855  else
2856  __k._M_data[__i] = -__x;
2857  }
2858  }
2859 
2860  // _S_masked_assign{{{2
2861  template <typename _Tp, size_t _Np>
2862  _GLIBCXX_SIMD_INTRINSIC static void
2863  _S_masked_assign(_SimdWrapper<_Tp, _Np> __k,
2864  _SimdWrapper<_Tp, _Np>& __lhs,
2865  __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs)
2866  { __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs); }
2867 
2868  template <typename _Tp, size_t _Np>
2869  _GLIBCXX_SIMD_INTRINSIC static void
2870  _S_masked_assign(_SimdWrapper<_Tp, _Np> __k,
2871  _SimdWrapper<_Tp, _Np>& __lhs, bool __rhs)
2872  {
2873  if (__builtin_constant_p(__rhs))
2874  {
2875  if (__rhs == false)
2876  __lhs = __andnot(__k, __lhs);
2877  else
2878  __lhs = __or(__k, __lhs);
2879  return;
2880  }
2881  __lhs = _CommonImpl::_S_blend(__k, __lhs,
2882  __data(simd_mask<_Tp, _Abi>(__rhs)));
2883  }
2884 
2885  //}}}2
2886  // _S_all_of {{{
2887  template <typename _Tp>
2888  _GLIBCXX_SIMD_INTRINSIC static bool
2889  _S_all_of(simd_mask<_Tp, _Abi> __k)
2890  {
2891  return __call_with_subscripts(
2892  __data(__k), make_index_sequence<_S_size<_Tp>>(),
2893  [](const auto... __ent) constexpr { return (... && !(__ent == 0)); });
2894  }
2895 
2896  // }}}
2897  // _S_any_of {{{
2898  template <typename _Tp>
2899  _GLIBCXX_SIMD_INTRINSIC static bool
2900  _S_any_of(simd_mask<_Tp, _Abi> __k)
2901  {
2902  return __call_with_subscripts(
2903  __data(__k), make_index_sequence<_S_size<_Tp>>(),
2904  [](const auto... __ent) constexpr { return (... || !(__ent == 0)); });
2905  }
2906 
2907  // }}}
2908  // _S_none_of {{{
2909  template <typename _Tp>
2910  _GLIBCXX_SIMD_INTRINSIC static bool
2911  _S_none_of(simd_mask<_Tp, _Abi> __k)
2912  {
2913  return __call_with_subscripts(
2914  __data(__k), make_index_sequence<_S_size<_Tp>>(),
2915  [](const auto... __ent) constexpr { return (... && (__ent == 0)); });
2916  }
2917 
2918  // }}}
2919  // _S_some_of {{{
2920  template <typename _Tp>
2921  _GLIBCXX_SIMD_INTRINSIC static bool
2922  _S_some_of(simd_mask<_Tp, _Abi> __k)
2923  {
2924  const int __n_true = _SuperImpl::_S_popcount(__k);
2925  return __n_true > 0 && __n_true < int(_S_size<_Tp>);
2926  }
2927 
2928  // }}}
2929  // _S_popcount {{{
2930  template <typename _Tp>
2931  _GLIBCXX_SIMD_INTRINSIC static int
2932  _S_popcount(simd_mask<_Tp, _Abi> __k)
2933  {
2934  using _I = __int_for_sizeof_t<_Tp>;
2935  if constexpr (is_default_constructible_v<simd<_I, _Abi>>)
2936  return -reduce(
2937  simd<_I, _Abi>(__private_init, __wrapper_bitcast<_I>(__data(__k))));
2938  else
2939  return -reduce(__bit_cast<rebind_simd_t<_I, simd<_Tp, _Abi>>>(
2940  simd<_Tp, _Abi>(__private_init, __data(__k))));
2941  }
2942 
2943  // }}}
2944  // _S_find_first_set {{{
2945  template <typename _Tp>
2946  _GLIBCXX_SIMD_INTRINSIC static int
2947  _S_find_first_set(simd_mask<_Tp, _Abi> __k)
2948  {
2949  return std::__countr_zero(
2950  _SuperImpl::_S_to_bits(__data(__k))._M_to_bits());
2951  }
2952 
2953  // }}}
2954  // _S_find_last_set {{{
2955  template <typename _Tp>
2956  _GLIBCXX_SIMD_INTRINSIC static int
2957  _S_find_last_set(simd_mask<_Tp, _Abi> __k)
2958  {
2959  return std::__bit_width(
2960  _SuperImpl::_S_to_bits(__data(__k))._M_to_bits()) - 1;
2961  }
2962 
2963  // }}}
2964  };
2965 
2966 //}}}1
2967 _GLIBCXX_SIMD_END_NAMESPACE
2968 #endif // __cplusplus >= 201703L
2969 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
2970 
2971 // vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100
complex< _Tp > log10(const complex< _Tp > &)
Return complex base 10 logarithm of z.
Definition: complex:829
complex< _Tp > sin(const complex< _Tp > &)
Return complex sine of z.
Definition: complex:859
complex< _Tp > log(const complex< _Tp > &)
Return complex natural logarithm of z.
Definition: complex:824
complex< _Tp > tan(const complex< _Tp > &)
Return complex tangent of z.
Definition: complex:960
complex< _Tp > exp(const complex< _Tp > &)
Return complex base e exponential of z.
Definition: complex:797
complex< _Tp > cosh(const complex< _Tp > &)
Return complex hyperbolic cosine of z.
Definition: complex:771
complex< _Tp > tanh(const complex< _Tp > &)
Return complex hyperbolic tangent of z.
Definition: complex:988
complex< _Tp > pow(const complex< _Tp > &, int)
Return x to the y'th power.
Definition: complex:1019
complex< _Tp > sinh(const complex< _Tp > &)
Return complex hyperbolic sine of z.
Definition: complex:889
complex< _Tp > cos(const complex< _Tp > &)
Return complex cosine of z.
Definition: complex:741
complex< _Tp > sqrt(const complex< _Tp > &)
Return complex square root of z.
Definition: complex:933
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:82
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2618
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:230
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:287
_Tp fabs(const std::complex< _Tp > &)
fabs(__z) [8.1.8].
Definition: complex:1817
std::complex< _Tp > asinh(const std::complex< _Tp > &)
asinh(__z) [8.1.6].
Definition: complex:1764
std::complex< _Tp > atan(const std::complex< _Tp > &)
atan(__z) [8.1.4].
Definition: complex:1689
make_integer_sequence< size_t, _Num > make_index_sequence
Alias template make_index_sequence.
Definition: utility.h:185
std::complex< _Tp > atanh(const std::complex< _Tp > &)
atanh(__z) [8.1.7].
Definition: complex:1808
std::complex< _Tp > acosh(const std::complex< _Tp > &)
acosh(__z) [8.1.5].
Definition: complex:1725
std::complex< _Tp > acos(const std::complex< _Tp > &)
acos(__z) [8.1.2].
Definition: complex:1609
std::complex< _Tp > asin(const std::complex< _Tp > &)
asin(__z) [8.1.3].
Definition: complex:1645