Skip to main content

simd_vector/
lib.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2
3mod vec4;
4mod vec8;
5
6pub use vec4::Vec4;
7pub use vec8::Vec8;
8
9// SLEEF constants for single-precision trig range reduction (u10 variants)
10const PI_A2F: f32 = 3.1414794921875;
11const PI_B2F: f32 = 0.00011315941810607910156;
12const PI_C2F: f32 = 1.9841872589410058936e-09;
13const TRIGRANGEMAX2F: f32 = 125.0;
14const M_1_PI_F: f32 = std::f32::consts::FRAC_1_PI;
15
16// SLEEF constants for exp
17const R_LN2F: f32 = 1.442695040888963407359924681001892137426645954152985934135449406931;
18const L2UF: f32 = 0.693145751953125;
19const L2LF: f32 = 1.428606765330187045e-06;
20
21// SLEEF Payne-Hanek table for single-precision pi reduction
22#[allow(clippy::excessive_precision)]
23const REMPI_TABSP: [f32; 416] = [
24    f32::from_bits(0x3E22F980),
25    f32::from_bits(0x335B9390),
26    f32::from_bits(0x2782A540),
27    f32::from_bits(0x9B762A0C),
28    f32::from_bits(0x3D0BE60C),
29    f32::from_bits(0x31DC9C88),
30    f32::from_bits(0x24A94FE0),
31    f32::from_bits(0x191D5F48),
32    f32::from_bits(0x3D0BE60C),
33    f32::from_bits(0x31DC9C88),
34    f32::from_bits(0x24A94FE0),
35    f32::from_bits(0x191D5F48),
36    f32::from_bits(0x3B3E60DC),
37    f32::from_bits(0xAED8DDF4),
38    f32::from_bits(0xA33580F4),
39    f32::from_bits(0x980A82E1),
40    f32::from_bits(0x3B3E60DC),
41    f32::from_bits(0xAED8DDF4),
42    f32::from_bits(0xA33580F4),
43    f32::from_bits(0x980A82E1),
44    f32::from_bits(0x3B3E60DC),
45    f32::from_bits(0xAED8DDF4),
46    f32::from_bits(0xA33580F4),
47    f32::from_bits(0x980A82E1),
48    f32::from_bits(0x3B3E60DC),
49    f32::from_bits(0xAED8DDF4),
50    f32::from_bits(0xA33580F4),
51    f32::from_bits(0x980A82E1),
52    f32::from_bits(0x3A79836C),
53    f32::from_bits(0x2F139104),
54    f32::from_bits(0x23A53F84),
55    f32::from_bits(0x17EAFA3F),
56    f32::from_bits(0x3A79836C),
57    f32::from_bits(0x2F139104),
58    f32::from_bits(0x23A53F84),
59    f32::from_bits(0x17EAFA3F),
60    f32::from_bits(0x39F306DC),
61    f32::from_bits(0x2D9C8828),
62    f32::from_bits(0x2294FE14),
63    f32::from_bits(0x96282E0B),
64    f32::from_bits(0x39660DB8),
65    f32::from_bits(0x2D9C8828),
66    f32::from_bits(0x2294FE14),
67    f32::from_bits(0x96282E0B),
68    f32::from_bits(0x38CC1B70),
69    f32::from_bits(0x2D9C8828),
70    f32::from_bits(0x2294FE14),
71    f32::from_bits(0x96282E0B),
72    f32::from_bits(0x381836E4),
73    f32::from_bits(0x2C644150),
74    f32::from_bits(0x2127F09C),
75    f32::from_bits(0x15AFA3EA),
76    f32::from_bits(0x36C1B724),
77    f32::from_bits(0x2BC882A4),
78    f32::from_bits(0x201FC274),
79    f32::from_bits(0x14BE8FAA),
80    f32::from_bits(0x36C1B724),
81    f32::from_bits(0x2BC882A4),
82    f32::from_bits(0x201FC274),
83    f32::from_bits(0x14BE8FAA),
84    f32::from_bits(0x36C1B724),
85    f32::from_bits(0x2BC882A4),
86    f32::from_bits(0x201FC274),
87    f32::from_bits(0x14BE8FAA),
88    f32::from_bits(0x36036E4C),
89    f32::from_bits(0x2B110548),
90    f32::from_bits(0x201FC274),
91    f32::from_bits(0x14BE8FAA),
92    f32::from_bits(0x335B9390),
93    f32::from_bits(0x2782A540),
94    f32::from_bits(0x9B762A0C),
95    f32::from_bits(0x0EFA9A6F),
96    f32::from_bits(0x335B9390),
97    f32::from_bits(0x2782A540),
98    f32::from_bits(0x9B762A0C),
99    f32::from_bits(0x0EFA9A6F),
100    f32::from_bits(0x335B9390),
101    f32::from_bits(0x2782A540),
102    f32::from_bits(0x9B762A0C),
103    f32::from_bits(0x0EFA9A6F),
104    f32::from_bits(0x335B9390),
105    f32::from_bits(0x2782A540),
106    f32::from_bits(0x9B762A0C),
107    f32::from_bits(0x0EFA9A6F),
108    f32::from_bits(0x335B9390),
109    f32::from_bits(0x2782A540),
110    f32::from_bits(0x9B762A0C),
111    f32::from_bits(0x0EFA9A6F),
112    f32::from_bits(0x335B9390),
113    f32::from_bits(0x2782A540),
114    f32::from_bits(0x9B762A0C),
115    f32::from_bits(0x0EFA9A6F),
116    f32::from_bits(0x32B72720),
117    f32::from_bits(0x2782A540),
118    f32::from_bits(0x9B762A0C),
119    f32::from_bits(0x0EFA9A6F),
120    f32::from_bits(0x31DC9C88),
121    f32::from_bits(0x24A94FE0),
122    f32::from_bits(0x191D5F48),
123    f32::from_bits(0x8C2CB224),
124    f32::from_bits(0x31DC9C88),
125    f32::from_bits(0x24A94FE0),
126    f32::from_bits(0x191D5F48),
127    f32::from_bits(0x8C2CB224),
128    f32::from_bits(0x31393910),
129    f32::from_bits(0x24A94FE0),
130    f32::from_bits(0x191D5F48),
131    f32::from_bits(0x8C2CB224),
132    f32::from_bits(0x3064E440),
133    f32::from_bits(0x24A94FE0),
134    f32::from_bits(0x191D5F48),
135    f32::from_bits(0x8C2CB224),
136    f32::from_bits(0x3064E440),
137    f32::from_bits(0x24A94FE0),
138    f32::from_bits(0x191D5F48),
139    f32::from_bits(0x8C2CB224),
140    f32::from_bits(0x2FC9C880),
141    f32::from_bits(0x24A94FE0),
142    f32::from_bits(0x191D5F48),
143    f32::from_bits(0x8C2CB224),
144    f32::from_bits(0x2F139104),
145    f32::from_bits(0x23A53F84),
146    f32::from_bits(0x17EAFA3C),
147    f32::from_bits(0x0CA9A6EE),
148    f32::from_bits(0x2D9C8828),
149    f32::from_bits(0x2294FE14),
150    f32::from_bits(0x96282E08),
151    f32::from_bits(0x8B32C890),
152    f32::from_bits(0x2D9C8828),
153    f32::from_bits(0x2294FE14),
154    f32::from_bits(0x96282E08),
155    f32::from_bits(0x8B32C890),
156    f32::from_bits(0x2D9C8828),
157    f32::from_bits(0x2294FE14),
158    f32::from_bits(0x96282E08),
159    f32::from_bits(0x8B32C890),
160    f32::from_bits(0x2C644150),
161    f32::from_bits(0x2127F09C),
162    f32::from_bits(0x15AFA3E8),
163    f32::from_bits(0x0A9A6EE0),
164    f32::from_bits(0x2C644150),
165    f32::from_bits(0x2127F09C),
166    f32::from_bits(0x15AFA3E8),
167    f32::from_bits(0x0A9A6EE0),
168    f32::from_bits(0x2C644150),
169    f32::from_bits(0x2127F09C),
170    f32::from_bits(0x15AFA3E8),
171    f32::from_bits(0x0A9A6EE0),
172    f32::from_bits(0x2BC882A4),
173    f32::from_bits(0x201FC274),
174    f32::from_bits(0x14BE8FA8),
175    f32::from_bits(0x09537703),
176    f32::from_bits(0x2B110548),
177    f32::from_bits(0x201FC274),
178    f32::from_bits(0x14BE8FA8),
179    f32::from_bits(0x09537703),
180    f32::from_bits(0x29882A54),
181    f32::from_bits(0x9B762A0C),
182    f32::from_bits(0x0EFA9A6C),
183    f32::from_bits(0x03B81B6C),
184    f32::from_bits(0x29882A54),
185    f32::from_bits(0x9B762A0C),
186    f32::from_bits(0x0EFA9A6C),
187    f32::from_bits(0x03B81B6C),
188    f32::from_bits(0x29882A54),
189    f32::from_bits(0x9B762A0C),
190    f32::from_bits(0x0EFA9A6C),
191    f32::from_bits(0x03B81B6C),
192    f32::from_bits(0x2782A540),
193    f32::from_bits(0x9B762A0C),
194    f32::from_bits(0x0EFA9A6C),
195    f32::from_bits(0x03B81B6C),
196    f32::from_bits(0x2782A540),
197    f32::from_bits(0x9B762A0C),
198    f32::from_bits(0x0EFA9A6C),
199    f32::from_bits(0x03B81B6C),
200    f32::from_bits(0x2782A540),
201    f32::from_bits(0x9B762A0C),
202    f32::from_bits(0x0EFA9A6C),
203    f32::from_bits(0x03B81B6C),
204    f32::from_bits(0x2782A540),
205    f32::from_bits(0x9B762A0C),
206    f32::from_bits(0x0EFA9A6C),
207    f32::from_bits(0x03B81B6C),
208    f32::from_bits(0x24A94FE0),
209    f32::from_bits(0x191D5F48),
210    f32::from_bits(0x8C2CB224),
211    f32::from_bits(0x0006DB15),
212    f32::from_bits(0x24A94FE0),
213    f32::from_bits(0x191D5F48),
214    f32::from_bits(0x8C2CB224),
215    f32::from_bits(0x0006DB15),
216    f32::from_bits(0x24A94FE0),
217    f32::from_bits(0x191D5F48),
218    f32::from_bits(0x8C2CB224),
219    f32::from_bits(0x0006DB15),
220    f32::from_bits(0x24A94FE0),
221    f32::from_bits(0x191D5F48),
222    f32::from_bits(0x8C2CB224),
223    f32::from_bits(0x0006DB15),
224    f32::from_bits(0x24A94FE0),
225    f32::from_bits(0x191D5F48),
226    f32::from_bits(0x8C2CB224),
227    f32::from_bits(0x0006DB15),
228    f32::from_bits(0x24A94FE0),
229    f32::from_bits(0x191D5F48),
230    f32::from_bits(0x8C2CB224),
231    f32::from_bits(0x0006DB15),
232    f32::from_bits(0x23A53F84),
233    f32::from_bits(0x17EAFA3C),
234    f32::from_bits(0x0CA9A6EC),
235    f32::from_bits(0x0181B6C5),
236    f32::from_bits(0x23A53F84),
237    f32::from_bits(0x17EAFA3C),
238    f32::from_bits(0x0CA9A6EC),
239    f32::from_bits(0x0181B6C5),
240    f32::from_bits(0x2294FE14),
241    f32::from_bits(0x96282E08),
242    f32::from_bits(0x8B32C890),
243    f32::from_bits(0x0006DB15),
244    f32::from_bits(0x2294FE14),
245    f32::from_bits(0x96282E08),
246    f32::from_bits(0x8B32C890),
247    f32::from_bits(0x0006DB15),
248    f32::from_bits(0x2127F09C),
249    f32::from_bits(0x15AFA3E8),
250    f32::from_bits(0x0A9A6EE0),
251    f32::from_bits(0x0006DB15),
252    f32::from_bits(0x2127F09C),
253    f32::from_bits(0x15AFA3E8),
254    f32::from_bits(0x0A9A6EE0),
255    f32::from_bits(0x0006DB15),
256    f32::from_bits(0x2127F09C),
257    f32::from_bits(0x15AFA3E8),
258    f32::from_bits(0x0A9A6EE0),
259    f32::from_bits(0x0006DB15),
260    f32::from_bits(0x201FC274),
261    f32::from_bits(0x14BE8FA8),
262    f32::from_bits(0x09537700),
263    f32::from_bits(0x0006DB15),
264    f32::from_bits(0x201FC274),
265    f32::from_bits(0x14BE8FA8),
266    f32::from_bits(0x09537700),
267    f32::from_bits(0x0006DB15),
268    f32::from_bits(0x1EFE13AC),
269    f32::from_bits(0x91382B2C),
270    f32::from_bits(0x8508FC90),
271    f32::from_bits(0x800004EB),
272    f32::from_bits(0x1EFE13AC),
273    f32::from_bits(0x91382B2C),
274    f32::from_bits(0x8508FC90),
275    f32::from_bits(0x800004EB),
276    f32::from_bits(0x1EFE13AC),
277    f32::from_bits(0x91382B2C),
278    f32::from_bits(0x8508FC90),
279    f32::from_bits(0x800004EB),
280    f32::from_bits(0x1E7C2758),
281    f32::from_bits(0x91382B2C),
282    f32::from_bits(0x8508FC90),
283    f32::from_bits(0x800004EB),
284    f32::from_bits(0x1DF84EB0),
285    f32::from_bits(0x91382B2C),
286    f32::from_bits(0x8508FC90),
287    f32::from_bits(0x800004EB),
288    f32::from_bits(0x3D709D5C),
289    f32::from_bits(0x3251F534),
290    f32::from_bits(0x265DC0D8),
291    f32::from_bits(0x1B58A566),
292    f32::from_bits(0x3CE13ABC),
293    f32::from_bits(0x31A3EA68),
294    f32::from_bits(0x265DC0D8),
295    f32::from_bits(0x1B58A566),
296    f32::from_bits(0x3C42757C),
297    f32::from_bits(0x308FA9A4),
298    f32::from_bits(0x25BB81B4),
299    f32::from_bits(0x1AB14ACD),
300    f32::from_bits(0x3B84EAF8),
301    f32::from_bits(0x308FA9A4),
302    f32::from_bits(0x25BB81B4),
303    f32::from_bits(0x1AB14ACD),
304    f32::from_bits(0x391D5F48),
305    f32::from_bits(0xAC2CB224),
306    f32::from_bits(0x1E5B6294),
307    f32::from_bits(0x12CC9E22),
308    f32::from_bits(0x391D5F48),
309    f32::from_bits(0xAC2CB224),
310    f32::from_bits(0x1E5B6294),
311    f32::from_bits(0x12CC9E22),
312    f32::from_bits(0x391D5F48),
313    f32::from_bits(0xAC2CB224),
314    f32::from_bits(0x1E5B6294),
315    f32::from_bits(0x12CC9E22),
316    f32::from_bits(0x391D5F48),
317    f32::from_bits(0xAC2CB224),
318    f32::from_bits(0x1E5B6294),
319    f32::from_bits(0x12CC9E22),
320    f32::from_bits(0x391D5F48),
321    f32::from_bits(0xAC2CB224),
322    f32::from_bits(0x1E5B6294),
323    f32::from_bits(0x12CC9E22),
324    f32::from_bits(0x37EAFA3C),
325    f32::from_bits(0x2CA9A6EC),
326    f32::from_bits(0x2181B6C4),
327    f32::from_bits(0x1615993C),
328    f32::from_bits(0x37EAFA3C),
329    f32::from_bits(0x2CA9A6EC),
330    f32::from_bits(0x2181B6C4),
331    f32::from_bits(0x1615993C),
332    f32::from_bits(0x37EAFA3C),
333    f32::from_bits(0x2CA9A6EC),
334    f32::from_bits(0x2181B6C4),
335    f32::from_bits(0x1615993C),
336    f32::from_bits(0x3755F47C),
337    f32::from_bits(0x2BA69BB8),
338    f32::from_bits(0x1E5B6294),
339    f32::from_bits(0x12CC9E22),
340    f32::from_bits(0x36ABE8F8),
341    f32::from_bits(0x2BA69BB8),
342    f32::from_bits(0x1E5B6294),
343    f32::from_bits(0x12CC9E22),
344    f32::from_bits(0x35AFA3E8),
345    f32::from_bits(0x2A9A6EE0),
346    f32::from_bits(0x1E5B6294),
347    f32::from_bits(0x12CC9E22),
348    f32::from_bits(0x35AFA3E8),
349    f32::from_bits(0x2A9A6EE0),
350    f32::from_bits(0x1E5B6294),
351    f32::from_bits(0x12CC9E22),
352    f32::from_bits(0x34BE8FA8),
353    f32::from_bits(0x29537700),
354    f32::from_bits(0x1E5B6294),
355    f32::from_bits(0x12CC9E22),
356    f32::from_bits(0x34BE8FA8),
357    f32::from_bits(0x29537700),
358    f32::from_bits(0x1E5B6294),
359    f32::from_bits(0x12CC9E22),
360    f32::from_bits(0x33FA3EA4),
361    f32::from_bits(0x28A6EE04),
362    f32::from_bits(0x1DB6C528),
363    f32::from_bits(0x12CC9E22),
364    f32::from_bits(0x33FA3EA4),
365    f32::from_bits(0x28A6EE04),
366    f32::from_bits(0x1DB6C528),
367    f32::from_bits(0x12CC9E22),
368    f32::from_bits(0x33747D4C),
369    f32::from_bits(0x279BB818),
370    f32::from_bits(0x1CDB14AC),
371    f32::from_bits(0x10C9E21D),
372    f32::from_bits(0x32E8FA98),
373    f32::from_bits(0x279BB818),
374    f32::from_bits(0x1CDB14AC),
375    f32::from_bits(0x10C9E21D),
376    f32::from_bits(0x3251F534),
377    f32::from_bits(0x265DC0D8),
378    f32::from_bits(0x1B58A564),
379    f32::from_bits(0x1013C439),
380    f32::from_bits(0x31A3EA68),
381    f32::from_bits(0x265DC0D8),
382    f32::from_bits(0x1B58A564),
383    f32::from_bits(0x1013C439),
384    f32::from_bits(0x308FA9A4),
385    f32::from_bits(0x25BB81B4),
386    f32::from_bits(0x1AB14ACC),
387    f32::from_bits(0x0E9E21C8),
388    f32::from_bits(0x308FA9A4),
389    f32::from_bits(0x25BB81B4),
390    f32::from_bits(0x1AB14ACC),
391    f32::from_bits(0x0E9E21C8),
392    f32::from_bits(0x2EFA9A6C),
393    f32::from_bits(0x23B81B6C),
394    f32::from_bits(0x1725664C),
395    f32::from_bits(0x0C443904),
396    f32::from_bits(0x2EFA9A6C),
397    f32::from_bits(0x23B81B6C),
398    f32::from_bits(0x1725664C),
399    f32::from_bits(0x0C443904),
400    f32::from_bits(0x2EFA9A6C),
401    f32::from_bits(0x23B81B6C),
402    f32::from_bits(0x1725664C),
403    f32::from_bits(0x0C443904),
404    f32::from_bits(0x2EFA9A6C),
405    f32::from_bits(0x23B81B6C),
406    f32::from_bits(0x1725664C),
407    f32::from_bits(0x0C443904),
408    f32::from_bits(0x2E7534DC),
409    f32::from_bits(0x22E06DB0),
410    f32::from_bits(0x1725664C),
411    f32::from_bits(0x0C443904),
412    f32::from_bits(0x2DEA69BC),
413    f32::from_bits(0xA17C9274),
414    f32::from_bits(0x95D4CD84),
415    f32::from_bits(0x8ADE37DF),
416    f32::from_bits(0x2D54D374),
417    f32::from_bits(0x2240DB60),
418    f32::from_bits(0x1725664C),
419    f32::from_bits(0x0C443904),
420    f32::from_bits(0x2CA9A6EC),
421    f32::from_bits(0x2181B6C4),
422    f32::from_bits(0x1615993C),
423    f32::from_bits(0x09872084),
424    f32::from_bits(0x2BA69BB8),
425    f32::from_bits(0x1E5B6294),
426    f32::from_bits(0x12CC9E20),
427    f32::from_bits(0x07641080),
428    f32::from_bits(0x2BA69BB8),
429    f32::from_bits(0x1E5B6294),
430    f32::from_bits(0x12CC9E20),
431    f32::from_bits(0x07641080),
432    f32::from_bits(0x2A9A6EE0),
433    f32::from_bits(0x1E5B6294),
434    f32::from_bits(0x12CC9E20),
435    f32::from_bits(0x07641080),
436    f32::from_bits(0x00000000),
437    f32::from_bits(0x00000000),
438    f32::from_bits(0x00000000),
439    f32::from_bits(0x00000000),
440];
441
442/// Scalar Payne-Hanek pi reduction for a single `f32`.
443///
444/// Returns `(reduced_hi, reduced_lo, quadrant)` where `reduced_hi + reduced_lo`
445/// is `a` reduced modulo pi/2, and `quadrant` encodes which quadrant of the
446/// original angle was occupied.
447fn rempif_scalar(a: f32) -> (f32, f32, i32) {
448    // vilogb2k: extract exponent
449    let mut ex = ((a.to_bits() >> 23) & 0xFF) as i32 - 0x7F;
450    ex -= 25;
451    let mut a = a;
452
453    // Scale down very large inputs (original exponent > 90) to prevent
454    // f32 intermediate precision loss: without this, a * table[idx] products
455    // are so large that rempisubf cannot extract quadrant/fractional bits.
456    if ex > 90 - 25 {
457        // vldexp3: add -64 to exponent field
458        a = f32::from_bits((a.to_bits() as i32 + (-64i32 << 23)) as u32);
459    }
460    // Clamp negative to 0: vandnot with arithmetic right shift
461    if ex < 0 {
462        ex = 0;
463    }
464    let idx = (ex as usize) * 4;
465
466    // First multiply
467    let (mut x_hi, mut x_lo) = df_mul_f_f(a, REMPI_TABSP[idx]);
468    // rempisubf: extract quadrant
469    let y = (x_hi * 4.0).round();
470    let mut q = (y - (x_hi.round()) * 4.0) as i32;
471    x_hi -= y * 0.25;
472    let (x_hi2, x_lo2) = df_normalize(x_hi, x_lo);
473    x_hi = x_hi2;
474    x_lo = x_lo2;
475
476    // Second multiply
477    let (y_hi, y_lo) = df_mul_f_f(a, REMPI_TABSP[idx + 1]);
478    let (x_hi2, x_lo2) = df_add2_f2_f2(x_hi, x_lo, y_hi, y_lo);
479    x_hi = x_hi2;
480    x_lo = x_lo2;
481
482    let y2 = (x_hi * 4.0).round();
483    q += (y2 - (x_hi.round()) * 4.0) as i32;
484    x_hi -= y2 * 0.25;
485    let (x_hi2, x_lo2) = df_normalize(x_hi, x_lo);
486    x_hi = x_hi2;
487    x_lo = x_lo2;
488
489    // Third and fourth multiply
490    let (y_hi, y_lo) = (REMPI_TABSP[idx + 2], REMPI_TABSP[idx + 3]);
491    let (y_hi2, y_lo2) = df_mul_f2_f(y_hi, y_lo, a);
492    let (x_hi2, x_lo2) = df_add2_f2_f2(x_hi, x_lo, y_hi2, y_lo2);
493    x_hi = x_hi2;
494    x_lo = x_lo2;
495    let (x_hi2, x_lo2) = df_normalize(x_hi, x_lo);
496    x_hi = x_hi2;
497    x_lo = x_lo2;
498
499    // Multiply by 2*pi
500    const TWO_PI_HI: f32 = 3.1415927410125732422 * 2.0;
501    const TWO_PI_LO: f32 = -8.7422776573475857731e-08 * 2.0;
502    let (r_hi, r_lo) = df_mul_f2_f2(x_hi, x_lo, TWO_PI_HI, TWO_PI_LO);
503
504    // For very small inputs, return a directly
505    if a.abs() < 0.7 {
506        return (a, 0.0, 0);
507    }
508
509    (r_hi, r_lo, q)
510}
511
512/// Normalizes a double-float pair so that `hi` carries the leading bits.
513///
514/// Returns `(s, err)` where `s = a_hi + a_lo` and `err` captures the rounding error.
515#[inline(always)]
516fn df_normalize(a_hi: f32, a_lo: f32) -> (f32, f32) {
517    let s = a_hi + a_lo;
518    (s, (a_hi - s) + a_lo)
519}
520
521/// Error-free addition of two double-float pairs: `(a_hi, a_lo) + (b_hi, b_lo)`.
522#[inline(always)]
523fn df_add2_f2_f2(a_hi: f32, a_lo: f32, b_hi: f32, b_lo: f32) -> (f32, f32) {
524    let s = a_hi + b_hi;
525    let v = s - a_hi;
526    let t = (a_hi - (s - v)) + (b_hi - v);
527    (s, t + a_lo + b_lo)
528}
529
530/// Error-free multiplication of two `f32` values using FMA. Returns `(product, error)`.
531#[inline(always)]
532fn df_mul_f_f(a: f32, b: f32) -> (f32, f32) {
533    let s = a * b;
534    let t = a.mul_add(b, -s);
535    (s, t)
536}
537
538/// Multiplies a double-float pair `(a_hi, a_lo)` by a single `f32`.
539#[inline(always)]
540fn df_mul_f2_f(a_hi: f32, a_lo: f32, b: f32) -> (f32, f32) {
541    let s = a_hi * b;
542    let t = a_hi.mul_add(b, -s) + a_lo * b;
543    (s, t)
544}
545
546/// Multiplies two double-float pairs: `(a_hi, a_lo) * (b_hi, b_lo)`.
547#[inline(always)]
548fn df_mul_f2_f2(a_hi: f32, a_lo: f32, b_hi: f32, b_lo: f32) -> (f32, f32) {
549    let s = a_hi * b_hi;
550    let t = a_hi.mul_add(b_hi, -s) + a_hi * b_lo + a_lo * b_hi;
551    (s, t)
552}
553
554#[cfg(test)]
555mod tests;