libstdc++
opt_random.h
Go to the documentation of this file.
1 // Optimizations for random number functions, x86 version -*- C++ -*-
2 
3 // Copyright (C) 2012-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file bits/opt_random.h
26  * This is an internal header file, included by other library headers.
27  * Do not attempt to use it directly. @headername{random}
28  */
29 
30 #ifndef _BITS_OPT_RANDOM_H
31 #define _BITS_OPT_RANDOM_H 1
32 
33 #ifdef __SSE3__
34 #include <pmmintrin.h>
35 #endif
36 
37 
38 #pragma GCC system_header
39 
40 
41 namespace std _GLIBCXX_VISIBILITY(default)
42 {
43 _GLIBCXX_BEGIN_NAMESPACE_VERSION
44 
45 #ifdef __SSE3__
46  template<>
47  template<typename _UniformRandomNumberGenerator>
48  void
49  normal_distribution<double>::
50  __generate(typename normal_distribution<double>::result_type* __f,
52  _UniformRandomNumberGenerator& __urng,
53  const param_type& __param)
54  {
55  typedef uint64_t __uctype;
56 
57  if (__f == __t)
58  return;
59 
60  if (_M_saved_available)
61  {
62  _M_saved_available = false;
63  *__f++ = _M_saved * __param.stddev() + __param.mean();
64 
65  if (__f == __t)
66  return;
67  }
68 
69  constexpr uint64_t __maskval = 0xfffffffffffffull;
70  static const __m128i __mask = _mm_set1_epi64x(__maskval);
71  static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull);
72  static const __m128d __three = _mm_set1_pd(3.0);
73  const __m128d __av = _mm_set1_pd(__param.mean());
74 
75  const __uctype __urngmin = __urng.min();
76  const __uctype __urngmax = __urng.max();
77  const __uctype __urngrange = __urngmax - __urngmin;
78  const __uctype __uerngrange = __urngrange + 1;
79 
80  while (__f + 1 < __t)
81  {
82  double __le;
83  __m128d __x;
84  do
85  {
86  union
87  {
88  __m128i __i;
89  __m128d __d;
90  } __v;
91 
92  if (__urngrange > __maskval)
93  {
94  if (__detail::_Power_of_2(__uerngrange))
95  __v.__i = _mm_and_si128(_mm_set_epi64x(__urng(),
96  __urng()),
97  __mask);
98  else
99  {
100  const __uctype __uerange = __maskval + 1;
101  const __uctype __scaling = __urngrange / __uerange;
102  const __uctype __past = __uerange * __scaling;
103  uint64_t __v1;
104  do
105  __v1 = __uctype(__urng()) - __urngmin;
106  while (__v1 >= __past);
107  __v1 /= __scaling;
108  uint64_t __v2;
109  do
110  __v2 = __uctype(__urng()) - __urngmin;
111  while (__v2 >= __past);
112  __v2 /= __scaling;
113 
114  __v.__i = _mm_set_epi64x(__v1, __v2);
115  }
116  }
117  else if (__urngrange == __maskval)
118  __v.__i = _mm_set_epi64x(__urng(), __urng());
119  else if ((__urngrange + 2) * __urngrange >= __maskval
120  && __detail::_Power_of_2(__uerngrange))
121  {
122  uint64_t __v1 = __urng() * __uerngrange + __urng();
123  uint64_t __v2 = __urng() * __uerngrange + __urng();
124 
125  __v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2),
126  __mask);
127  }
128  else
129  {
130  size_t __nrng = 2;
131  __uctype __high = __maskval / __uerngrange / __uerngrange;
132  while (__high > __uerngrange)
133  {
134  ++__nrng;
135  __high /= __uerngrange;
136  }
137  const __uctype __highrange = __high + 1;
138  const __uctype __scaling = __urngrange / __highrange;
139  const __uctype __past = __highrange * __scaling;
140  __uctype __tmp;
141 
142  uint64_t __v1;
143  do
144  {
145  do
146  __tmp = __uctype(__urng()) - __urngmin;
147  while (__tmp >= __past);
148  __v1 = __tmp / __scaling;
149  for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
150  {
151  __tmp = __v1;
152  __v1 *= __uerngrange;
153  __v1 += __uctype(__urng()) - __urngmin;
154  }
155  }
156  while (__v1 > __maskval || __v1 < __tmp);
157 
158  uint64_t __v2;
159  do
160  {
161  do
162  __tmp = __uctype(__urng()) - __urngmin;
163  while (__tmp >= __past);
164  __v2 = __tmp / __scaling;
165  for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
166  {
167  __tmp = __v2;
168  __v2 *= __uerngrange;
169  __v2 += __uctype(__urng()) - __urngmin;
170  }
171  }
172  while (__v2 > __maskval || __v2 < __tmp);
173 
174  __v.__i = _mm_set_epi64x(__v1, __v2);
175  }
176 
177  __v.__i = _mm_or_si128(__v.__i, __two);
178  __x = _mm_sub_pd(__v.__d, __three);
179  __m128d __m = _mm_mul_pd(__x, __x);
180  __le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m));
181  }
182  while (__le == 0.0 || __le >= 1.0);
183 
184  double __mult = (std::sqrt(-2.0 * std::log(__le) / __le)
185  * __param.stddev());
186 
187  __x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av);
188 
189  _mm_storeu_pd(__f, __x);
190  __f += 2;
191  }
192 
193  if (__f != __t)
194  {
195  result_type __x, __y, __r2;
196 
197  __detail::_Adaptor<_UniformRandomNumberGenerator, result_type>
198  __aurng(__urng);
199 
200  do
201  {
202  __x = result_type(2.0) * __aurng() - 1.0;
203  __y = result_type(2.0) * __aurng() - 1.0;
204  __r2 = __x * __x + __y * __y;
205  }
206  while (__r2 > 1.0 || __r2 == 0.0);
207 
208  const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2);
209  _M_saved = __x * __mult;
210  _M_saved_available = true;
211  *__f = __y * __mult * __param.stddev() + __param.mean();
212  }
213  }
214 #endif
215 
216 
217 _GLIBCXX_END_NAMESPACE_VERSION
218 } // namespace
219 
220 
221 #endif // _BITS_OPT_RANDOM_H
complex< _Tp > log(const complex< _Tp > &)
Return complex natural logarithm of z.
Definition: complex:824
complex< _Tp > sqrt(const complex< _Tp > &)
Return complex square root of z.
Definition: complex:933
ISO C++ entities toplevel namespace is std.