Skip to main content

rill_core/math/vector/simd/
wide.rs

1//! Кроссплатформенные SIMD реализации через крейт `wide`
2//!
3//! Этот модуль предоставляет типы векторов, использующие библиотеку `wide`,
4//! которая обеспечивает переносимые SIMD операции с fallback на скалярные реализации.
5//!
6//! Типы:
7//! - `F32x4`, `F32x8` для `f32`
8//! - `F64x2`, `F64x4` для `f64`
9
10use crate::Transcendental;
11use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
12use wide::{f32x4, f32x8, f64x2, f64x4, CmpEq, CmpGe, CmpGt, CmpLe, CmpLt, CmpNe};
13
14use crate::math::vector::traits::{Vector, VectorMask, VectorTranscendental};
15
16// -----------------------------------------------------------------------------
17// Обёртки над типами wide для реализации трейта Vector
18// -----------------------------------------------------------------------------
19
20/// SIMD вектор из 4 элементов `f32`
21#[derive(Copy, Clone, Debug, PartialEq)]
22pub struct F32x4(f32x4);
23
24/// SIMD вектор из 8 элементов `f32`
25#[derive(Copy, Clone, Debug, PartialEq)]
26pub struct F32x8(f32x8);
27
28/// SIMD вектор из 2 элементов `f64`
29#[derive(Copy, Clone, Debug, PartialEq)]
30pub struct F64x2(f64x2);
31
32/// SIMD вектор из 4 элементов `f64`
33#[derive(Copy, Clone, Debug, PartialEq)]
34pub struct F64x4(f64x4);
35
36// -----------------------------------------------------------------------------
37// Реализации Default
38// -----------------------------------------------------------------------------
39
40impl Default for F32x4 {
41    fn default() -> Self {
42        Self(f32x4::splat(0.0))
43    }
44}
45
46impl Default for F32x8 {
47    fn default() -> Self {
48        Self(f32x8::splat(0.0))
49    }
50}
51
52impl Default for F64x2 {
53    fn default() -> Self {
54        Self(f64x2::splat(0.0))
55    }
56}
57
58impl Default for F64x4 {
59    fn default() -> Self {
60        Self(f64x4::splat(0.0))
61    }
62}
63
64// -----------------------------------------------------------------------------
65// Реализация Vector для F32x4
66// -----------------------------------------------------------------------------
67
68impl Vector<f32, 4> for F32x4 {
69    fn splat(value: f32) -> Self {
70        F32x4(f32x4::splat(value))
71    }
72
73    fn load(slice: &[f32]) -> Self {
74        let mut arr = [0.0f32; 4];
75        arr.copy_from_slice(&slice[0..4]);
76        F32x4(f32x4::from(arr))
77    }
78
79    fn store(&self, slice: &mut [f32]) {
80        let arr: [f32; 4] = self.0.into();
81        slice[0..4].copy_from_slice(&arr);
82    }
83
84    fn extract(&self, index: usize) -> f32 {
85        let arr: [f32; 4] = self.0.into();
86        arr[index]
87    }
88
89    fn insert(&self, index: usize, value: f32) -> Self {
90        let mut arr: [f32; 4] = self.0.into();
91        arr[index] = value;
92        F32x4(f32x4::from(arr))
93    }
94
95    fn add(&self, other: &Self) -> Self {
96        F32x4(self.0 + other.0)
97    }
98
99    fn sub(&self, other: &Self) -> Self {
100        F32x4(self.0 - other.0)
101    }
102
103    fn mul(&self, other: &Self) -> Self {
104        F32x4(self.0 * other.0)
105    }
106
107    fn div(&self, other: &Self) -> Self {
108        F32x4(self.0 / other.0)
109    }
110
111    fn rem(&self, other: &Self) -> Self {
112        // wide не предоставляет операцию остатка, реализуем покомпонентно
113        let a: [f32; 4] = self.0.into();
114        let b: [f32; 4] = other.0.into();
115        let mut arr = [0.0f32; 4];
116        for i in 0..4 {
117            arr[i] = a[i] % b[i];
118        }
119        F32x4(f32x4::from(arr))
120    }
121
122    fn neg(&self) -> Self {
123        F32x4(-self.0)
124    }
125
126    fn abs(&self) -> Self {
127        F32x4(self.0.abs())
128    }
129
130    fn min(&self, other: &Self) -> Self {
131        F32x4(self.0.min(other.0))
132    }
133
134    fn max(&self, other: &Self) -> Self {
135        F32x4(self.0.max(other.0))
136    }
137
138    fn clamp(&self, min: &Self, max: &Self) -> Self {
139        // clamp = self.max(min).min(max)
140        F32x4(self.0.max(min.0).min(max.0))
141    }
142
143}
144
145impl VectorTranscendental<f32, 4> for F32x4 {
146    fn sqrt(&self) -> Self { F32x4(self.0.sqrt()) }
147    fn exp(&self) -> Self { F32x4(self.0.exp()) }
148    fn ln(&self) -> Self { F32x4(self.0.ln()) }
149    fn sin(&self) -> Self { F32x4(self.0.sin()) }
150    fn cos(&self) -> Self { F32x4(self.0.cos()) }
151    fn tan(&self) -> Self { F32x4(self.0.tan()) }
152}
153
154// -----------------------------------------------------------------------------
155// Реализация Vector для F32x8
156// -----------------------------------------------------------------------------
157
158impl Vector<f32, 8> for F32x8 {
159    fn splat(value: f32) -> Self {
160        F32x8(f32x8::splat(value))
161    }
162
163    fn load(slice: &[f32]) -> Self {
164        let mut arr = [0.0f32; 8];
165        arr.copy_from_slice(&slice[0..8]);
166        F32x8(f32x8::from(arr))
167    }
168
169    fn store(&self, slice: &mut [f32]) {
170        let arr: [f32; 8] = self.0.into();
171        slice[0..8].copy_from_slice(&arr);
172    }
173
174    fn extract(&self, index: usize) -> f32 {
175        let arr: [f32; 8] = self.0.into();
176        arr[index]
177    }
178
179    fn insert(&self, index: usize, value: f32) -> Self {
180        let mut arr: [f32; 8] = self.0.into();
181        arr[index] = value;
182        F32x8(f32x8::from(arr))
183    }
184
185    fn add(&self, other: &Self) -> Self {
186        F32x8(self.0 + other.0)
187    }
188
189    fn sub(&self, other: &Self) -> Self {
190        F32x8(self.0 - other.0)
191    }
192
193    fn mul(&self, other: &Self) -> Self {
194        F32x8(self.0 * other.0)
195    }
196
197    fn div(&self, other: &Self) -> Self {
198        F32x8(self.0 / other.0)
199    }
200
201    fn rem(&self, other: &Self) -> Self {
202        let a: [f32; 8] = self.0.into();
203        let b: [f32; 8] = other.0.into();
204        let mut arr = [0.0f32; 8];
205        for i in 0..8 {
206            arr[i] = a[i] % b[i];
207        }
208        F32x8(f32x8::from(arr))
209    }
210
211    fn neg(&self) -> Self {
212        F32x8(-self.0)
213    }
214
215    fn abs(&self) -> Self {
216        F32x8(self.0.abs())
217    }
218
219    fn min(&self, other: &Self) -> Self {
220        F32x8(self.0.min(other.0))
221    }
222
223    fn max(&self, other: &Self) -> Self {
224        F32x8(self.0.max(other.0))
225    }
226
227    fn clamp(&self, min: &Self, max: &Self) -> Self {
228        F32x8(self.0.max(min.0).min(max.0))
229    }
230
231}
232
233impl VectorTranscendental<f32, 8> for F32x8 {
234    fn sqrt(&self) -> Self { F32x8(self.0.sqrt()) }
235    fn exp(&self) -> Self { F32x8(self.0.exp()) }
236    fn ln(&self) -> Self { F32x8(self.0.ln()) }
237    fn sin(&self) -> Self { F32x8(self.0.sin()) }
238    fn cos(&self) -> Self { F32x8(self.0.cos()) }
239    fn tan(&self) -> Self { F32x8(self.0.tan()) }
240}
241
242// -----------------------------------------------------------------------------
243// Реализация Vector для F64x2
244// -----------------------------------------------------------------------------
245
246impl Vector<f64, 2> for F64x2 {
247    fn splat(value: f64) -> Self {
248        F64x2(f64x2::splat(value))
249    }
250
251    fn load(slice: &[f64]) -> Self {
252        let mut arr = [0.0f64; 2];
253        arr.copy_from_slice(&slice[0..2]);
254        F64x2(f64x2::from(arr))
255    }
256
257    fn store(&self, slice: &mut [f64]) {
258        let arr: [f64; 2] = self.0.into();
259        slice[0..2].copy_from_slice(&arr);
260    }
261
262    fn extract(&self, index: usize) -> f64 {
263        let arr: [f64; 2] = self.0.into();
264        arr[index]
265    }
266
267    fn insert(&self, index: usize, value: f64) -> Self {
268        let mut arr: [f64; 2] = self.0.into();
269        arr[index] = value;
270        F64x2(f64x2::from(arr))
271    }
272
273    fn add(&self, other: &Self) -> Self {
274        F64x2(self.0 + other.0)
275    }
276
277    fn sub(&self, other: &Self) -> Self {
278        F64x2(self.0 - other.0)
279    }
280
281    fn mul(&self, other: &Self) -> Self {
282        F64x2(self.0 * other.0)
283    }
284
285    fn div(&self, other: &Self) -> Self {
286        F64x2(self.0 / other.0)
287    }
288
289    fn rem(&self, other: &Self) -> Self {
290        let a: [f64; 2] = self.0.into();
291        let b: [f64; 2] = other.0.into();
292        let mut arr = [0.0f64; 2];
293        for i in 0..2 {
294            arr[i] = a[i] % b[i];
295        }
296        F64x2(f64x2::from(arr))
297    }
298
299    fn neg(&self) -> Self {
300        F64x2(-self.0)
301    }
302
303    fn abs(&self) -> Self {
304        F64x2(self.0.abs())
305    }
306
307    fn min(&self, other: &Self) -> Self {
308        F64x2(self.0.min(other.0))
309    }
310
311    fn max(&self, other: &Self) -> Self {
312        F64x2(self.0.max(other.0))
313    }
314
315    fn clamp(&self, min: &Self, max: &Self) -> Self {
316        F64x2(self.0.max(min.0).min(max.0))
317    }
318
319}
320
321impl VectorTranscendental<f64, 2> for F64x2 {
322    fn sqrt(&self) -> Self { F64x2(self.0.sqrt()) }
323    fn exp(&self) -> Self { F64x2(self.0.exp()) }
324    fn ln(&self) -> Self { F64x2(self.0.ln()) }
325    fn sin(&self) -> Self { F64x2(self.0.sin()) }
326    fn cos(&self) -> Self { F64x2(self.0.cos()) }
327    fn tan(&self) -> Self { F64x2(self.0.tan()) }
328}
329
330// -----------------------------------------------------------------------------
331// Реализация Vector для F64x4
332// -----------------------------------------------------------------------------
333
334impl Vector<f64, 4> for F64x4 {
335    fn splat(value: f64) -> Self {
336        F64x4(f64x4::splat(value))
337    }
338
339    fn load(slice: &[f64]) -> Self {
340        let mut arr = [0.0f64; 4];
341        arr.copy_from_slice(&slice[0..4]);
342        F64x4(f64x4::from(arr))
343    }
344
345    fn store(&self, slice: &mut [f64]) {
346        let arr: [f64; 4] = self.0.into();
347        slice[0..4].copy_from_slice(&arr);
348    }
349
350    fn extract(&self, index: usize) -> f64 {
351        let arr: [f64; 4] = self.0.into();
352        arr[index]
353    }
354
355    fn insert(&self, index: usize, value: f64) -> Self {
356        let mut arr: [f64; 4] = self.0.into();
357        arr[index] = value;
358        F64x4(f64x4::from(arr))
359    }
360
361    fn add(&self, other: &Self) -> Self {
362        F64x4(self.0 + other.0)
363    }
364
365    fn sub(&self, other: &Self) -> Self {
366        F64x4(self.0 - other.0)
367    }
368
369    fn mul(&self, other: &Self) -> Self {
370        F64x4(self.0 * other.0)
371    }
372
373    fn div(&self, other: &Self) -> Self {
374        F64x4(self.0 / other.0)
375    }
376
377    fn rem(&self, other: &Self) -> Self {
378        let a: [f64; 4] = self.0.into();
379        let b: [f64; 4] = other.0.into();
380        let mut arr = [0.0f64; 4];
381        for i in 0..4 {
382            arr[i] = a[i] % b[i];
383        }
384        F64x4(f64x4::from(arr))
385    }
386
387    fn neg(&self) -> Self {
388        F64x4(-self.0)
389    }
390
391    fn abs(&self) -> Self {
392        F64x4(self.0.abs())
393    }
394
395    fn min(&self, other: &Self) -> Self {
396        F64x4(self.0.min(other.0))
397    }
398
399    fn max(&self, other: &Self) -> Self {
400        F64x4(self.0.max(other.0))
401    }
402
403    fn clamp(&self, min: &Self, max: &Self) -> Self {
404        F64x4(self.0.max(min.0).min(max.0))
405    }
406
407}
408
409impl VectorTranscendental<f64, 4> for F64x4 {
410    fn sqrt(&self) -> Self { F64x4(self.0.sqrt()) }
411    fn exp(&self) -> Self { F64x4(self.0.exp()) }
412    fn ln(&self) -> Self { F64x4(self.0.ln()) }
413    fn sin(&self) -> Self { F64x4(self.0.sin()) }
414    fn cos(&self) -> Self { F64x4(self.0.cos()) }
415    fn tan(&self) -> Self { F64x4(self.0.tan()) }
416}
417
418// -----------------------------------------------------------------------------
419// Реализация VectorMask
420// -----------------------------------------------------------------------------
421
422impl VectorMask<f64, 4> for F64x4 {
423    // In wide 0.7, comparison masks are the same type as the vector,
424    // where -1.0 = true and 0.0 = false.
425    type Mask = F64x4;
426
427    fn eq(&self, other: &Self) -> F64x4 {
428        F64x4(self.0.cmp_eq(other.0))
429    }
430
431    fn ne(&self, other: &Self) -> F64x4 {
432        F64x4(self.0.cmp_ne(other.0))
433    }
434
435    fn gt(&self, other: &Self) -> F64x4 {
436        F64x4(self.0.cmp_gt(other.0))
437    }
438
439    fn ge(&self, other: &Self) -> F64x4 {
440        F64x4(self.0.cmp_ge(other.0))
441    }
442
443    fn lt(&self, other: &Self) -> F64x4 {
444        F64x4(self.0.cmp_lt(other.0))
445    }
446
447    fn le(&self, other: &Self) -> F64x4 {
448        F64x4(self.0.cmp_le(other.0))
449    }
450
451    fn select(&self, other: &Self, mask: F64x4) -> Self {
452        // f64x4::blend(self=mask, t=true_vals, f=false_vals)
453        // returns t where self != 0, f where self == 0
454        F64x4(mask.0.blend(self.0, other.0))
455    }
456
457    fn all(mask: &F64x4) -> bool {
458        // move_mask returns bit i = sign bit of lane i
459        // For -1.0 (true), sign bit is 1; for 0.0 (false), sign bit is 0.
460        mask.0.move_mask() == 0b1111
461    }
462}
463
464// -----------------------------------------------------------------------------
465// Реализации операторов (Add, Sub, Mul, Div, Rem, Neg)
466// -----------------------------------------------------------------------------
467
468impl Add for F32x4 {
469    type Output = Self;
470    fn add(self, rhs: Self) -> Self {
471        Self(self.0 + rhs.0)
472    }
473}
474
475impl Sub for F32x4 {
476    type Output = Self;
477    fn sub(self, rhs: Self) -> Self {
478        Self(self.0 - rhs.0)
479    }
480}
481
482impl Mul for F32x4 {
483    type Output = Self;
484    fn mul(self, rhs: Self) -> Self {
485        Self(self.0 * rhs.0)
486    }
487}
488
489impl Div for F32x4 {
490    type Output = Self;
491    fn div(self, rhs: Self) -> Self {
492        Self(self.0 / rhs.0)
493    }
494}
495
496impl Rem for F32x4 {
497    type Output = Self;
498    fn rem(self, rhs: Self) -> Self {
499        let a: [f32; 4] = self.0.into();
500        let b: [f32; 4] = rhs.0.into();
501        let mut arr = [0.0f32; 4];
502        for i in 0..4 {
503            arr[i] = a[i] % b[i];
504        }
505        Self(f32x4::from(arr))
506    }
507}
508
509impl Neg for F32x4 {
510    type Output = Self;
511    fn neg(self) -> Self {
512        Self(-self.0)
513    }
514}
515
516// Аналогично для F32x8, F64x2, F64x4
517
518impl Add for F32x8 {
519    type Output = Self;
520    fn add(self, rhs: Self) -> Self {
521        Self(self.0 + rhs.0)
522    }
523}
524
525impl Sub for F32x8 {
526    type Output = Self;
527    fn sub(self, rhs: Self) -> Self {
528        Self(self.0 - rhs.0)
529    }
530}
531
532impl Mul for F32x8 {
533    type Output = Self;
534    fn mul(self, rhs: Self) -> Self {
535        Self(self.0 * rhs.0)
536    }
537}
538
539impl Div for F32x8 {
540    type Output = Self;
541    fn div(self, rhs: Self) -> Self {
542        Self(self.0 / rhs.0)
543    }
544}
545
546impl Rem for F32x8 {
547    type Output = Self;
548    fn rem(self, rhs: Self) -> Self {
549        let a: [f32; 8] = self.0.into();
550        let b: [f32; 8] = rhs.0.into();
551        let mut arr = [0.0f32; 8];
552        for i in 0..8 {
553            arr[i] = a[i] % b[i];
554        }
555        Self(f32x8::from(arr))
556    }
557}
558
559impl Neg for F32x8 {
560    type Output = Self;
561    fn neg(self) -> Self {
562        Self(-self.0)
563    }
564}
565
566impl Add for F64x2 {
567    type Output = Self;
568    fn add(self, rhs: Self) -> Self {
569        Self(self.0 + rhs.0)
570    }
571}
572
573impl Sub for F64x2 {
574    type Output = Self;
575    fn sub(self, rhs: Self) -> Self {
576        Self(self.0 - rhs.0)
577    }
578}
579
580impl Mul for F64x2 {
581    type Output = Self;
582    fn mul(self, rhs: Self) -> Self {
583        Self(self.0 * rhs.0)
584    }
585}
586
587impl Div for F64x2 {
588    type Output = Self;
589    fn div(self, rhs: Self) -> Self {
590        Self(self.0 / rhs.0)
591    }
592}
593
594impl Rem for F64x2 {
595    type Output = Self;
596    fn rem(self, rhs: Self) -> Self {
597        let a: [f64; 2] = self.0.into();
598        let b: [f64; 2] = rhs.0.into();
599        let mut arr = [0.0f64; 2];
600        for i in 0..2 {
601            arr[i] = a[i] % b[i];
602        }
603        Self(f64x2::from(arr))
604    }
605}
606
607impl Neg for F64x2 {
608    type Output = Self;
609    fn neg(self) -> Self {
610        Self(-self.0)
611    }
612}
613
614impl Add for F64x4 {
615    type Output = Self;
616    fn add(self, rhs: Self) -> Self {
617        Self(self.0 + rhs.0)
618    }
619}
620
621impl Sub for F64x4 {
622    type Output = Self;
623    fn sub(self, rhs: Self) -> Self {
624        Self(self.0 - rhs.0)
625    }
626}
627
628impl Mul for F64x4 {
629    type Output = Self;
630    fn mul(self, rhs: Self) -> Self {
631        Self(self.0 * rhs.0)
632    }
633}
634
635impl Div for F64x4 {
636    type Output = Self;
637    fn div(self, rhs: Self) -> Self {
638        Self(self.0 / rhs.0)
639    }
640}
641
642impl Rem for F64x4 {
643    type Output = Self;
644    fn rem(self, rhs: Self) -> Self {
645        let a: [f64; 4] = self.0.into();
646        let b: [f64; 4] = rhs.0.into();
647        let mut arr = [0.0f64; 4];
648        for i in 0..4 {
649            arr[i] = a[i] % b[i];
650        }
651        Self(f64x4::from(arr))
652    }
653}
654
655impl Neg for F64x4 {
656    type Output = Self;
657    fn neg(self) -> Self {
658        Self(-self.0)
659    }
660}
661
662// -----------------------------------------------------------------------------
663// Unit tests
664// -----------------------------------------------------------------------------
665
666#[cfg(test)]
667mod tests {
668    use super::*;
669        use crate::math::vector::traits::VectorMask;
670
671    #[test]
672    fn test_f32x4_basic() {
673        let a = F32x4::load(&[1.0, 2.0, 3.0, 4.0]);
674        let b = F32x4::load(&[5.0, 6.0, 7.0, 8.0]);
675
676        let c = a + b;
677        let mut arr = [0.0f32; 4];
678        c.store(&mut arr);
679        assert_eq!(arr, [6.0, 8.0, 10.0, 12.0]);
680
681        let c = a * b;
682        c.store(&mut arr);
683        assert_eq!(arr, [5.0, 12.0, 21.0, 32.0]);
684    }
685
686    #[test]
687    fn test_f32x4_math() {
688        let a = F32x4::load(&[0.0, 0.5, 1.0, 2.0]);
689        let sin_a = a.sin();
690        let mut arr = [0.0f32; 4];
691        sin_a.store(&mut arr);
692        let expected = [0.0f32.sin(), 0.5f32.sin(), 1.0f32.sin(), 2.0f32.sin()];
693        for i in 0..4 {
694            assert!((arr[i] - expected[i]).abs() < 1e-5);
695        }
696    }
697
698    #[test]
699    fn test_f64x2_basic() {
700        let a = F64x2::load(&[1.0, 2.0]);
701        let b = F64x2::load(&[3.0, 4.0]);
702
703        let c = a + b;
704        let mut arr = [0.0f64; 2];
705        c.store(&mut arr);
706        assert_eq!(arr, [4.0, 6.0]);
707    }
708
709    #[test]
710    fn test_f64x4_basic() {
711        let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
712        let b = F64x4::load(&[5.0, 6.0, 7.0, 8.0]);
713
714        let c = a + b;
715        let mut arr = [0.0f64; 4];
716        c.store(&mut arr);
717        assert_eq!(arr, [6.0, 8.0, 10.0, 12.0]);
718
719        let c = a * b;
720        c.store(&mut arr);
721        assert_eq!(arr, [5.0, 12.0, 21.0, 32.0]);
722    }
723
724    #[test]
725    fn test_f64x4_math() {
726        let a = F64x4::load(&[0.0, 0.5, 1.0, 2.0]);
727        let sqrt_a = a.sqrt();
728        let mut arr = [0.0f64; 4];
729        sqrt_a.store(&mut arr);
730        let expected = [0.0f64.sqrt(), 0.5f64.sqrt(), 1.0f64.sqrt(), 2.0f64.sqrt()];
731        for i in 0..4 {
732            assert!((arr[i] - expected[i]).abs() < 1e-12);
733        }
734
735        let exp_a = a.exp();
736        exp_a.store(&mut arr);
737        let expected = [0.0f64.exp(), 0.5f64.exp(), 1.0f64.exp(), 2.0f64.exp()];
738        for i in 0..4 {
739            assert!((arr[i] - expected[i]).abs() < 1e-12);
740        }
741    }
742
743    #[test]
744    fn test_f64x4_vector_mask_lt() {
745        // wide 0.7 returns mask with from_bits(u64::MAX) = NaN for true, 0.0 for false
746        // Use move_mask to check bits
747        let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
748        let b = F64x4::load(&[3.0, 3.0, 3.0, 3.0]);
749        let mask = <F64x4 as VectorMask<f64, 4>>::lt(&a, &b);
750        // move_mask extracts sign bit of each lane
751        assert_eq!(mask.0.move_mask() & 0b1111, 0b0011); // lanes 0,1 true
752    }
753
754    #[test]
755    fn test_f64x4_vector_mask_gt() {
756        let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
757        let b = F64x4::load(&[2.0, 2.0, 2.0, 2.0]);
758        let mask = <F64x4 as VectorMask<f64, 4>>::gt(&a, &b);
759        assert_eq!(mask.0.move_mask() & 0b1111, 0b1100); // lanes 2,3 true
760    }
761
762    #[test]
763    fn test_f64x4_vector_mask_eq() {
764        let a = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
765        let b = F64x4::load(&[1.0, 0.0, 3.0, 5.0]);
766        let mask = <F64x4 as VectorMask<f64, 4>>::eq(&a, &b);
767        assert_eq!(mask.0.move_mask() & 0b1111, 0b0101); // lanes 0,2 true
768    }
769
770    #[test]
771    fn test_f64x4_vector_mask_all() {
772        let all_true = <F64x4 as VectorMask<f64, 4>>::lt(&F64x4::splat(1.0), &F64x4::splat(2.0));
773        assert!(<F64x4 as VectorMask<f64, 4>>::all(&all_true));
774
775        let partial_true = <F64x4 as VectorMask<f64, 4>>::lt(
776            &F64x4::load(&[1.0, 2.0, 3.0, 4.0]),
777            &F64x4::splat(3.0),
778        );
779        assert!(!<F64x4 as VectorMask<f64, 4>>::all(&partial_true));
780    }
781
782    #[test]
783    fn test_f64x4_vector_mask_select() {
784        let true_vals = F64x4::load(&[10.0, 20.0, 30.0, 40.0]);
785        let false_vals = F64x4::load(&[1.0, 2.0, 3.0, 4.0]);
786        // mask: true where true_vals < 25
787        let threshold = F64x4::load(&[5.0, 25.0, 25.0, 25.0]);
788        let mask = <F64x4 as VectorMask<f64, 4>>::lt(&true_vals, &threshold);
789        let selected = <F64x4 as VectorMask<f64, 4>>::select(&true_vals, &false_vals, mask);
790        // lanes 0 true (10 < 5? No — 10 < 5 false, so lane 0 is false)
791
792        // Actually: a = [10, 20, 30, 40], threshold = [5, 25, 25, 25]
793        // a < threshold: [false, true, false, false]
794        assert_eq!(mask.0.move_mask() & 0b1111, 0b0010);
795        // select: only lane 1 takes from true_vals (20)
796        let mut arr = [0.0; 4];
797        selected.store(&mut arr);
798        assert!((arr[0] - 1.0).abs() < 1e-15);
799        assert!((arr[1] - 20.0).abs() < 1e-15);
800        assert!((arr[2] - 3.0).abs() < 1e-15);
801        assert!((arr[3] - 4.0).abs() < 1e-15);
802    }
803}