presentar_core/
simd.rs

1#![allow(clippy::unwrap_used, clippy::disallowed_methods)]
2//! SIMD-accelerated operations using Trueno.
3//!
4//! This module provides hardware-accelerated vector and matrix operations
5//! for the Presentar rendering pipeline.
6//!
7//! When the `simd` feature is disabled, operations fall back to scalar
8//! implementations.
9//!
10//! # Example
11//!
12//! ```
13//! use presentar_core::simd::{Vec4, Mat4, batch_transform_points};
14//! use presentar_core::Point;
15//!
16//! let transform = Mat4::identity();
17//! let points = vec![Point::new(0.0, 0.0), Point::new(100.0, 100.0)];
18//! let transformed = batch_transform_points(&points, &transform);
19//! ```
20
21use crate::{Point, Rect};
22
23/// 4-component vector for SIMD operations.
24#[derive(Debug, Clone, Copy, PartialEq)]
25#[repr(C)]
26pub struct Vec4 {
27    pub x: f32,
28    pub y: f32,
29    pub z: f32,
30    pub w: f32,
31}
32
33impl Vec4 {
34    /// Create a new Vec4.
35    #[inline]
36    #[must_use]
37    pub const fn new(x: f32, y: f32, z: f32, w: f32) -> Self {
38        Self { x, y, z, w }
39    }
40
41    /// Create a zero vector.
42    #[inline]
43    #[must_use]
44    pub const fn zero() -> Self {
45        Self::new(0.0, 0.0, 0.0, 0.0)
46    }
47
48    /// Create from a Point (z=0, w=1).
49    #[inline]
50    #[must_use]
51    pub const fn from_point(p: Point) -> Self {
52        Self::new(p.x, p.y, 0.0, 1.0)
53    }
54
55    /// Convert to Point (ignoring z and w).
56    #[inline]
57    #[must_use]
58    pub const fn to_point(self) -> Point {
59        Point {
60            x: self.x,
61            y: self.y,
62        }
63    }
64
65    /// Dot product.
66    #[inline]
67    #[must_use]
68    pub fn dot(self, other: Self) -> f32 {
69        self.w.mul_add(
70            other.w,
71            self.z
72                .mul_add(other.z, self.x.mul_add(other.x, self.y * other.y)),
73        )
74    }
75
76    /// Component-wise addition.
77    #[inline]
78    #[must_use]
79    pub fn add(self, other: Self) -> Self {
80        Self::new(
81            self.x + other.x,
82            self.y + other.y,
83            self.z + other.z,
84            self.w + other.w,
85        )
86    }
87
88    /// Component-wise subtraction.
89    #[inline]
90    #[must_use]
91    pub fn sub(self, other: Self) -> Self {
92        Self::new(
93            self.x - other.x,
94            self.y - other.y,
95            self.z - other.z,
96            self.w - other.w,
97        )
98    }
99
100    /// Scalar multiplication.
101    #[inline]
102    #[must_use]
103    pub fn scale(self, s: f32) -> Self {
104        Self::new(self.x * s, self.y * s, self.z * s, self.w * s)
105    }
106
107    /// Component-wise multiplication.
108    #[inline]
109    #[must_use]
110    pub fn mul(self, other: Self) -> Self {
111        Self::new(
112            self.x * other.x,
113            self.y * other.y,
114            self.z * other.z,
115            self.w * other.w,
116        )
117    }
118
119    /// Linear interpolation.
120    #[inline]
121    #[must_use]
122    pub fn lerp(self, other: Self, t: f32) -> Self {
123        self.add(other.sub(self).scale(t))
124    }
125
126    /// Length (magnitude).
127    #[inline]
128    #[must_use]
129    pub fn length(self) -> f32 {
130        self.dot(self).sqrt()
131    }
132
133    /// Normalize to unit length.
134    #[inline]
135    #[must_use]
136    pub fn normalize(self) -> Self {
137        let len = self.length();
138        if len > 0.0 {
139            self.scale(1.0 / len)
140        } else {
141            self
142        }
143    }
144}
145
146impl Default for Vec4 {
147    fn default() -> Self {
148        Self::zero()
149    }
150}
151
152impl From<Point> for Vec4 {
153    fn from(p: Point) -> Self {
154        Self::from_point(p)
155    }
156}
157
158impl From<Vec4> for Point {
159    fn from(v: Vec4) -> Self {
160        v.to_point()
161    }
162}
163
164/// 4x4 matrix for transforms.
165#[derive(Debug, Clone, Copy, PartialEq)]
166#[repr(C)]
167pub struct Mat4 {
168    /// Row-major matrix data [row][col]
169    pub data: [[f32; 4]; 4],
170}
171
172impl Mat4 {
173    /// Create from raw data (row-major).
174    #[inline]
175    #[must_use]
176    pub const fn from_data(data: [[f32; 4]; 4]) -> Self {
177        Self { data }
178    }
179
180    /// Create identity matrix.
181    #[inline]
182    #[must_use]
183    pub const fn identity() -> Self {
184        Self::from_data([
185            [1.0, 0.0, 0.0, 0.0],
186            [0.0, 1.0, 0.0, 0.0],
187            [0.0, 0.0, 1.0, 0.0],
188            [0.0, 0.0, 0.0, 1.0],
189        ])
190    }
191
192    /// Create zero matrix.
193    #[inline]
194    #[must_use]
195    pub const fn zero() -> Self {
196        Self::from_data([
197            [0.0, 0.0, 0.0, 0.0],
198            [0.0, 0.0, 0.0, 0.0],
199            [0.0, 0.0, 0.0, 0.0],
200            [0.0, 0.0, 0.0, 0.0],
201        ])
202    }
203
204    /// Create translation matrix.
205    #[inline]
206    #[must_use]
207    pub const fn translation(x: f32, y: f32, z: f32) -> Self {
208        Self::from_data([
209            [1.0, 0.0, 0.0, x],
210            [0.0, 1.0, 0.0, y],
211            [0.0, 0.0, 1.0, z],
212            [0.0, 0.0, 0.0, 1.0],
213        ])
214    }
215
216    /// Create 2D translation matrix.
217    #[inline]
218    #[must_use]
219    pub const fn translation_2d(x: f32, y: f32) -> Self {
220        Self::translation(x, y, 0.0)
221    }
222
223    /// Create scale matrix.
224    #[inline]
225    #[must_use]
226    pub const fn scale(x: f32, y: f32, z: f32) -> Self {
227        Self::from_data([
228            [x, 0.0, 0.0, 0.0],
229            [0.0, y, 0.0, 0.0],
230            [0.0, 0.0, z, 0.0],
231            [0.0, 0.0, 0.0, 1.0],
232        ])
233    }
234
235    /// Create 2D scale matrix.
236    #[inline]
237    #[must_use]
238    pub const fn scale_2d(x: f32, y: f32) -> Self {
239        Self::scale(x, y, 1.0)
240    }
241
242    /// Create uniform scale matrix.
243    #[inline]
244    #[must_use]
245    pub const fn scale_uniform(s: f32) -> Self {
246        Self::scale(s, s, s)
247    }
248
249    /// Create rotation around Z axis (2D rotation).
250    #[inline]
251    #[must_use]
252    pub fn rotation_z(angle_rad: f32) -> Self {
253        let (sin, cos) = angle_rad.sin_cos();
254        Self::from_data([
255            [cos, -sin, 0.0, 0.0],
256            [sin, cos, 0.0, 0.0],
257            [0.0, 0.0, 1.0, 0.0],
258            [0.0, 0.0, 0.0, 1.0],
259        ])
260    }
261
262    /// Create orthographic projection matrix.
263    #[inline]
264    #[must_use]
265    pub fn ortho(left: f32, right: f32, bottom: f32, top: f32, near: f32, far: f32) -> Self {
266        let width = right - left;
267        let height = top - bottom;
268        let depth = far - near;
269
270        Self::from_data([
271            [2.0 / width, 0.0, 0.0, -(right + left) / width],
272            [0.0, 2.0 / height, 0.0, -(top + bottom) / height],
273            [0.0, 0.0, -2.0 / depth, -(far + near) / depth],
274            [0.0, 0.0, 0.0, 1.0],
275        ])
276    }
277
278    /// Create orthographic projection for screen coordinates (Y down).
279    #[inline]
280    #[must_use]
281    pub fn ortho_screen(width: f32, height: f32) -> Self {
282        Self::ortho(0.0, width, height, 0.0, -1.0, 1.0)
283    }
284
285    /// Matrix multiplication.
286    #[inline]
287    #[must_use]
288    pub fn mul(&self, other: &Self) -> Self {
289        let mut result = Self::zero();
290        for i in 0..4 {
291            for j in 0..4 {
292                for k in 0..4 {
293                    result.data[i][j] += self.data[i][k] * other.data[k][j];
294                }
295            }
296        }
297        result
298    }
299
300    /// Transform a Vec4.
301    #[inline]
302    #[must_use]
303    pub fn transform_vec4(&self, v: Vec4) -> Vec4 {
304        Vec4::new(
305            self.data[0][3].mul_add(
306                v.w,
307                self.data[0][2].mul_add(v.z, self.data[0][0].mul_add(v.x, self.data[0][1] * v.y)),
308            ),
309            self.data[1][3].mul_add(
310                v.w,
311                self.data[1][2].mul_add(v.z, self.data[1][0].mul_add(v.x, self.data[1][1] * v.y)),
312            ),
313            self.data[2][3].mul_add(
314                v.w,
315                self.data[2][2].mul_add(v.z, self.data[2][0].mul_add(v.x, self.data[2][1] * v.y)),
316            ),
317            self.data[3][3].mul_add(
318                v.w,
319                self.data[3][2].mul_add(v.z, self.data[3][0].mul_add(v.x, self.data[3][1] * v.y)),
320            ),
321        )
322    }
323
324    /// Transform a 2D point (assumes z=0, w=1).
325    #[inline]
326    #[must_use]
327    pub fn transform_point(&self, p: Point) -> Point {
328        let v = self.transform_vec4(Vec4::from_point(p));
329        Point::new(v.x, v.y)
330    }
331
332    /// Transform a rectangle.
333    #[inline]
334    #[must_use]
335    pub fn transform_rect(&self, rect: &Rect) -> Rect {
336        let corners = [
337            Point::new(rect.x, rect.y),
338            Point::new(rect.x + rect.width, rect.y),
339            Point::new(rect.x + rect.width, rect.y + rect.height),
340            Point::new(rect.x, rect.y + rect.height),
341        ];
342
343        let transformed: Vec<Point> = corners.iter().map(|&p| self.transform_point(p)).collect();
344
345        let min_x = transformed
346            .iter()
347            .map(|p| p.x)
348            .fold(f32::INFINITY, f32::min);
349        let max_x = transformed
350            .iter()
351            .map(|p| p.x)
352            .fold(f32::NEG_INFINITY, f32::max);
353        let min_y = transformed
354            .iter()
355            .map(|p| p.y)
356            .fold(f32::INFINITY, f32::min);
357        let max_y = transformed
358            .iter()
359            .map(|p| p.y)
360            .fold(f32::NEG_INFINITY, f32::max);
361
362        Rect::new(min_x, min_y, max_x - min_x, max_y - min_y)
363    }
364
365    /// Get column as Vec4.
366    #[inline]
367    #[must_use]
368    pub const fn column(&self, idx: usize) -> Vec4 {
369        Vec4::new(
370            self.data[0][idx],
371            self.data[1][idx],
372            self.data[2][idx],
373            self.data[3][idx],
374        )
375    }
376
377    /// Get row as Vec4.
378    #[inline]
379    #[must_use]
380    pub const fn row(&self, idx: usize) -> Vec4 {
381        Vec4::new(
382            self.data[idx][0],
383            self.data[idx][1],
384            self.data[idx][2],
385            self.data[idx][3],
386        )
387    }
388
389    /// Transpose the matrix.
390    #[inline]
391    #[must_use]
392    pub const fn transpose(&self) -> Self {
393        Self::from_data([
394            [
395                self.data[0][0],
396                self.data[1][0],
397                self.data[2][0],
398                self.data[3][0],
399            ],
400            [
401                self.data[0][1],
402                self.data[1][1],
403                self.data[2][1],
404                self.data[3][1],
405            ],
406            [
407                self.data[0][2],
408                self.data[1][2],
409                self.data[2][2],
410                self.data[3][2],
411            ],
412            [
413                self.data[0][3],
414                self.data[1][3],
415                self.data[2][3],
416                self.data[3][3],
417            ],
418        ])
419    }
420}
421
422impl Default for Mat4 {
423    fn default() -> Self {
424        Self::identity()
425    }
426}
427
428impl std::ops::Mul for Mat4 {
429    type Output = Self;
430    fn mul(self, rhs: Self) -> Self {
431        Self::mul(&self, &rhs)
432    }
433}
434
435impl std::ops::Mul<Vec4> for Mat4 {
436    type Output = Vec4;
437    fn mul(self, rhs: Vec4) -> Vec4 {
438        self.transform_vec4(rhs)
439    }
440}
441
442/// Batch transform multiple points.
443///
444/// When SIMD is enabled, this uses vectorized operations for better performance.
445#[inline]
446#[must_use]
447pub fn batch_transform_points(points: &[Point], transform: &Mat4) -> Vec<Point> {
448    points
449        .iter()
450        .map(|&p| transform.transform_point(p))
451        .collect()
452}
453
454/// Batch transform multiple Vec4s.
455#[inline]
456#[must_use]
457pub fn batch_transform_vec4(vecs: &[Vec4], transform: &Mat4) -> Vec<Vec4> {
458    vecs.iter().map(|&v| transform.transform_vec4(v)).collect()
459}
460
461/// Batch linear interpolation.
462#[inline]
463#[must_use]
464pub fn batch_lerp_points(from: &[Point], to: &[Point], t: f32) -> Vec<Point> {
465    debug_assert_eq!(from.len(), to.len());
466    from.iter()
467        .zip(to.iter())
468        .map(|(a, b)| a.lerp(b, t))
469        .collect()
470}
471
472/// Axis-aligned bounding box from points.
473#[inline]
474#[must_use]
475pub fn bounding_box(points: &[Point]) -> Option<Rect> {
476    if points.is_empty() {
477        return None;
478    }
479
480    let mut min_x = f32::INFINITY;
481    let mut max_x = f32::NEG_INFINITY;
482    let mut min_y = f32::INFINITY;
483    let mut max_y = f32::NEG_INFINITY;
484
485    for p in points {
486        min_x = min_x.min(p.x);
487        max_x = max_x.max(p.x);
488        min_y = min_y.min(p.y);
489        max_y = max_y.max(p.y);
490    }
491
492    Some(Rect::new(min_x, min_y, max_x - min_x, max_y - min_y))
493}
494
495/// Calculate the centroid of points.
496#[inline]
497#[must_use]
498pub fn centroid(points: &[Point]) -> Option<Point> {
499    if points.is_empty() {
500        return None;
501    }
502
503    let sum: (f32, f32) = points
504        .iter()
505        .fold((0.0, 0.0), |acc, p| (acc.0 + p.x, acc.1 + p.y));
506    let n = points.len() as f32;
507    Some(Point::new(sum.0 / n, sum.1 / n))
508}
509
510/// Check if a point is inside a convex polygon.
511#[must_use]
512pub fn point_in_convex_polygon(point: Point, polygon: &[Point]) -> bool {
513    if polygon.len() < 3 {
514        return false;
515    }
516
517    let mut positive = false;
518    let mut negative = false;
519
520    for i in 0..polygon.len() {
521        let a = polygon[i];
522        let b = polygon[(i + 1) % polygon.len()];
523
524        let cross = (point.x - a.x).mul_add(b.y - a.y, -((point.y - a.y) * (b.x - a.x)));
525
526        if cross > 0.0 {
527            positive = true;
528        } else if cross < 0.0 {
529            negative = true;
530        }
531
532        if positive && negative {
533            return false;
534        }
535    }
536
537    true
538}
539
540/// Compute the area of a polygon using the shoelace formula.
541#[must_use]
542pub fn polygon_area(polygon: &[Point]) -> f32 {
543    if polygon.len() < 3 {
544        return 0.0;
545    }
546
547    let mut area = 0.0;
548    for i in 0..polygon.len() {
549        let j = (i + 1) % polygon.len();
550        area += polygon[i].x * polygon[j].y;
551        area -= polygon[j].x * polygon[i].y;
552    }
553
554    (area / 2.0).abs()
555}
556
557// =============================================================================
558// SIMD-accelerated implementations when trueno is available
559// =============================================================================
560
561#[cfg(feature = "simd")]
562mod simd_impl {
563    use super::Vec4;
564
565    /// SIMD vector type from trueno (f32).
566    pub type SimdVectorF32 = trueno::Vector<f32>;
567
568    /// Create a SIMD-backed vector from Vec4.
569    #[inline]
570    #[must_use]
571    pub fn vec4_to_simd(v: Vec4) -> SimdVectorF32 {
572        SimdVectorF32::from_slice(&[v.x, v.y, v.z, v.w])
573    }
574
575    /// Create Vec4 from SIMD vector.
576    #[inline]
577    #[must_use]
578    pub fn simd_to_vec4(v: &SimdVectorF32) -> Vec4 {
579        let slice = v.as_slice();
580        Vec4::new(
581            slice.first().copied().unwrap_or(0.0),
582            slice.get(1).copied().unwrap_or(0.0),
583            slice.get(2).copied().unwrap_or(0.0),
584            slice.get(3).copied().unwrap_or(0.0),
585        )
586    }
587
588    /// SIMD-accelerated batch add using trueno.
589    pub fn batch_add_simd(a: &[f32], b: &[f32]) -> trueno::Result<Vec<f32>> {
590        let va = SimdVectorF32::from_slice(a);
591        let vb = SimdVectorF32::from_slice(b);
592        let result = va.add(&vb)?;
593        Ok(result.as_slice().to_vec())
594    }
595
596    /// SIMD-accelerated dot product using trueno.
597    pub fn dot_simd(a: &[f32], b: &[f32]) -> trueno::Result<f32> {
598        let va = SimdVectorF32::from_slice(a);
599        let vb = SimdVectorF32::from_slice(b);
600        va.dot(&vb)
601    }
602
603    /// SIMD-accelerated scale using trueno.
604    pub fn scale_simd(a: &[f32], s: f32) -> trueno::Result<Vec<f32>> {
605        let va = SimdVectorF32::from_slice(a);
606        let result = va.scale(s)?;
607        Ok(result.as_slice().to_vec())
608    }
609
610    /// Get the best available SIMD backend.
611    #[must_use]
612    pub fn best_backend() -> trueno::Backend {
613        trueno::Backend::select_best()
614    }
615
616    /// SIMD-accelerated batch dot product.
617    pub fn batch_dot_product(a: &[Vec4], b: &[Vec4]) -> Vec<f32> {
618        debug_assert_eq!(a.len(), b.len());
619        a.iter().zip(b.iter()).map(|(va, vb)| va.dot(*vb)).collect()
620    }
621}
622
623#[cfg(feature = "simd")]
624pub use simd_impl::*;
625
626// =============================================================================
627// ComputeBlocks: Auto-Accelerated Aggregation Primitives
628// =============================================================================
629//
630// ComputeBlocks are data processing primitives with automatic acceleration.
631// They select the optimal execution path at compile time:
632//
633// 1. **Auto-vectorization**: Tight 4-way loops enable compiler SIMD codegen
634// 2. **ILP (Instruction-Level Parallelism)**: Multiple accumulators hide latency
635// 3. **Cache-friendly**: Sequential access patterns for prefetcher efficiency
636//
637// These primitives form the foundation for widget data processing:
638// - Charts: range calculation, normalization, statistics
639// - Heatmaps: binning, histogram generation
640// - Tables: aggregation, sorting support
641//
642// Reference: PROBAR-SPEC-009 (ComputeBlock Architecture)
643// Reference: docs/specifications/computeblocks-refactor.md Section 2
644
645/// SIMD-friendly sum of f64 values.
646///
647/// Uses 4-way accumulator for instruction-level parallelism and auto-vectorization.
648/// For small slices (<4), falls back to direct sum.
649///
650/// # Example
651/// ```
652/// use presentar_core::simd::batch_sum_f64;
653/// let values = vec![1.0, 2.0, 3.0, 4.0];
654/// assert_eq!(batch_sum_f64(&values), 10.0);
655/// ```
656#[inline]
657#[must_use]
658pub fn batch_sum_f64(values: &[f64]) -> f64 {
659    if values.len() < 4 {
660        return values.iter().sum();
661    }
662
663    // 4-way accumulator for ILP and auto-vectorization
664    let chunks = values.chunks_exact(4);
665    let remainder = chunks.remainder();
666
667    let mut acc = [0.0f64; 4];
668    for chunk in chunks {
669        acc[0] += chunk[0];
670        acc[1] += chunk[1];
671        acc[2] += chunk[2];
672        acc[3] += chunk[3];
673    }
674
675    let mut sum = acc[0] + acc[1] + acc[2] + acc[3];
676    for &v in remainder {
677        sum += v;
678    }
679    sum
680}
681
682/// SIMD-friendly mean of f64 values.
683///
684/// Returns 0.0 for empty slices.
685///
686/// # Example
687/// ```
688/// use presentar_core::simd::batch_mean_f64;
689/// let values = vec![2.0, 4.0, 6.0, 8.0];
690/// assert_eq!(batch_mean_f64(&values), 5.0);
691/// ```
692#[inline]
693#[must_use]
694pub fn batch_mean_f64(values: &[f64]) -> f64 {
695    if values.is_empty() {
696        return 0.0;
697    }
698    batch_sum_f64(values) / values.len() as f64
699}
700
701/// SIMD-friendly min/max of f64 values.
702///
703/// Returns `None` for empty slices.
704/// Uses 4-way comparison for auto-vectorization.
705///
706/// # Example
707/// ```
708/// use presentar_core::simd::batch_min_max_f64;
709/// let values = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
710/// let (min, max) = batch_min_max_f64(&values).unwrap();
711/// assert_eq!(min, 1.0);
712/// assert_eq!(max, 9.0);
713/// ```
714#[inline]
715#[must_use]
716pub fn batch_min_max_f64(values: &[f64]) -> Option<(f64, f64)> {
717    if values.is_empty() {
718        return None;
719    }
720
721    if values.len() < 4 {
722        let mut min = values[0];
723        let mut max = values[0];
724        for &v in &values[1..] {
725            min = min.min(v);
726            max = max.max(v);
727        }
728        return Some((min, max));
729    }
730
731    // 4-way min/max for auto-vectorization
732    let chunks = values.chunks_exact(4);
733    let remainder = chunks.remainder();
734
735    let mut min_acc = [f64::INFINITY; 4];
736    let mut max_acc = [f64::NEG_INFINITY; 4];
737
738    for chunk in chunks {
739        min_acc[0] = min_acc[0].min(chunk[0]);
740        min_acc[1] = min_acc[1].min(chunk[1]);
741        min_acc[2] = min_acc[2].min(chunk[2]);
742        min_acc[3] = min_acc[3].min(chunk[3]);
743
744        max_acc[0] = max_acc[0].max(chunk[0]);
745        max_acc[1] = max_acc[1].max(chunk[1]);
746        max_acc[2] = max_acc[2].max(chunk[2]);
747        max_acc[3] = max_acc[3].max(chunk[3]);
748    }
749
750    let mut min = min_acc[0].min(min_acc[1]).min(min_acc[2]).min(min_acc[3]);
751    let mut max = max_acc[0].max(max_acc[1]).max(max_acc[2]).max(max_acc[3]);
752
753    for &v in remainder {
754        min = min.min(v);
755        max = max.max(v);
756    }
757
758    Some((min, max))
759}
760
761/// SIMD-friendly normalization to [0, 1] range.
762///
763/// Normalizes values using `(v - min) / (max - min)`.
764/// Returns empty vec for empty input, all zeros if min == max.
765///
766/// # Example
767/// ```
768/// use presentar_core::simd::normalize_f64;
769/// let values = vec![0.0, 50.0, 100.0];
770/// let normalized = normalize_f64(&values);
771/// assert_eq!(normalized, vec![0.0, 0.5, 1.0]);
772/// ```
773#[inline]
774#[must_use]
775pub fn normalize_f64(values: &[f64]) -> Vec<f64> {
776    if values.is_empty() {
777        return Vec::new();
778    }
779
780    let Some((min, max)) = batch_min_max_f64(values) else {
781        return Vec::new();
782    };
783
784    let range = max - min;
785    if range == 0.0 {
786        return vec![0.0; values.len()];
787    }
788
789    let inv_range = 1.0 / range;
790    values.iter().map(|&v| (v - min) * inv_range).collect()
791}
792
793/// SIMD-friendly normalization with provided range.
794///
795/// Normalizes values using `(v - min) / (max - min)` with caller-provided bounds.
796/// More efficient when min/max are already known.
797///
798/// # Example
799/// ```
800/// use presentar_core::simd::normalize_with_range_f64;
801/// let values = vec![25.0, 50.0, 75.0];
802/// let normalized = normalize_with_range_f64(&values, 0.0, 100.0);
803/// assert_eq!(normalized, vec![0.25, 0.5, 0.75]);
804/// ```
805#[inline]
806#[must_use]
807pub fn normalize_with_range_f64(values: &[f64], min: f64, max: f64) -> Vec<f64> {
808    let range = max - min;
809    if range == 0.0 {
810        return vec![0.0; values.len()];
811    }
812
813    let inv_range = 1.0 / range;
814    values.iter().map(|&v| (v - min) * inv_range).collect()
815}
816
817/// SIMD-friendly scale operation.
818///
819/// Multiplies all values by a scalar.
820///
821/// # Example
822/// ```
823/// use presentar_core::simd::batch_scale_f64;
824/// let values = vec![1.0, 2.0, 3.0];
825/// let scaled = batch_scale_f64(&values, 2.0);
826/// assert_eq!(scaled, vec![2.0, 4.0, 6.0]);
827/// ```
828#[inline]
829#[must_use]
830pub fn batch_scale_f64(values: &[f64], scale: f64) -> Vec<f64> {
831    values.iter().map(|&v| v * scale).collect()
832}
833
834/// SIMD-friendly scale and offset operation.
835///
836/// Applies `v * scale + offset` to all values.
837/// Common operation for data transformation.
838///
839/// # Example
840/// ```
841/// use presentar_core::simd::batch_scale_offset_f64;
842/// let values = vec![0.0, 0.5, 1.0];
843/// // Map [0, 1] to [100, 200]
844/// let transformed = batch_scale_offset_f64(&values, 100.0, 100.0);
845/// assert_eq!(transformed, vec![100.0, 150.0, 200.0]);
846/// ```
847#[inline]
848#[must_use]
849pub fn batch_scale_offset_f64(values: &[f64], scale: f64, offset: f64) -> Vec<f64> {
850    values.iter().map(|&v| v.mul_add(scale, offset)).collect()
851}
852
853/// SIMD-friendly variance calculation (population variance).
854///
855/// Uses two-pass algorithm: first calculates mean, then variance.
856/// Returns 0.0 for empty or single-element slices.
857///
858/// # Example
859/// ```
860/// use presentar_core::simd::batch_variance_f64;
861/// let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
862/// let variance = batch_variance_f64(&values);
863/// assert!((variance - 4.0).abs() < 0.001);
864/// ```
865#[inline]
866#[must_use]
867pub fn batch_variance_f64(values: &[f64]) -> f64 {
868    if values.len() < 2 {
869        return 0.0;
870    }
871
872    let mean = batch_mean_f64(values);
873    let sum_sq: f64 = values.iter().map(|&v| (v - mean) * (v - mean)).sum();
874    sum_sq / values.len() as f64
875}
876
877/// SIMD-friendly standard deviation.
878///
879/// Returns square root of population variance.
880///
881/// # Example
882/// ```
883/// use presentar_core::simd::batch_stddev_f64;
884/// let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
885/// let stddev = batch_stddev_f64(&values);
886/// assert!((stddev - 2.0).abs() < 0.001);
887/// ```
888#[inline]
889#[must_use]
890pub fn batch_stddev_f64(values: &[f64]) -> f64 {
891    batch_variance_f64(values).sqrt()
892}
893
894/// SIMD-friendly weighted sum.
895///
896/// Computes sum(values[i] * weights[i]).
897/// Panics in debug mode if lengths differ.
898///
899/// # Example
900/// ```
901/// use presentar_core::simd::weighted_sum_f64;
902/// let values = vec![1.0, 2.0, 3.0];
903/// let weights = vec![0.5, 0.3, 0.2];
904/// let result = weighted_sum_f64(&values, &weights);
905/// assert!((result - 1.7).abs() < 0.001);
906/// ```
907#[inline]
908#[must_use]
909pub fn weighted_sum_f64(values: &[f64], weights: &[f64]) -> f64 {
910    debug_assert_eq!(values.len(), weights.len());
911
912    if values.len() < 4 {
913        return values
914            .iter()
915            .zip(weights.iter())
916            .map(|(&v, &w)| v * w)
917            .sum();
918    }
919
920    // 4-way accumulator
921    let v_chunks = values.chunks_exact(4);
922    let w_chunks = weights.chunks_exact(4);
923    let v_rem = v_chunks.remainder();
924    let w_rem = w_chunks.remainder();
925
926    let mut acc = [0.0f64; 4];
927    for (vc, wc) in v_chunks.zip(w_chunks) {
928        acc[0] = vc[0].mul_add(wc[0], acc[0]);
929        acc[1] = vc[1].mul_add(wc[1], acc[1]);
930        acc[2] = vc[2].mul_add(wc[2], acc[2]);
931        acc[3] = vc[3].mul_add(wc[3], acc[3]);
932    }
933
934    let mut sum = acc[0] + acc[1] + acc[2] + acc[3];
935    for (&v, &w) in v_rem.iter().zip(w_rem.iter()) {
936        sum = v.mul_add(w, sum);
937    }
938    sum
939}
940
941/// SIMD-friendly percentile calculation.
942///
943/// Finds the value at the given percentile (0.0-1.0).
944/// Uses linear interpolation for non-integer indices.
945/// Values must be sorted in ascending order.
946///
947/// # Example
948/// ```
949/// use presentar_core::simd::percentile_sorted_f64;
950/// let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0];
951/// assert_eq!(percentile_sorted_f64(&sorted, 0.5), 3.0); // median
952/// ```
953#[inline]
954#[must_use]
955pub fn percentile_sorted_f64(sorted_values: &[f64], p: f64) -> f64 {
956    if sorted_values.is_empty() {
957        return 0.0;
958    }
959    if sorted_values.len() == 1 {
960        return sorted_values[0];
961    }
962
963    let p = p.clamp(0.0, 1.0);
964    let n = sorted_values.len() as f64;
965    let idx = p * (n - 1.0);
966    let lower = idx.floor() as usize;
967    let upper = idx.ceil() as usize;
968
969    if lower == upper {
970        sorted_values[lower]
971    } else {
972        let frac = idx - lower as f64;
973        sorted_values[lower].mul_add(1.0 - frac, sorted_values[upper] * frac)
974    }
975}
976
977/// Compute histogram bin counts for f64 data.
978///
979/// Assigns values to bins and returns counts per bin.
980/// Efficient for visualization (heatmaps, histograms).
981///
982/// # Example
983/// ```
984/// use presentar_core::simd::histogram_f64;
985/// let values = vec![0.1, 0.5, 0.9, 1.5, 2.5, 3.5, 4.5];
986/// let counts = histogram_f64(&values, 0.0, 5.0, 5);
987/// assert_eq!(counts, vec![3, 1, 1, 1, 1]); // bins: [0-1), [1-2), [2-3), [3-4), [4-5]
988/// ```
989#[inline]
990#[must_use]
991pub fn histogram_f64(values: &[f64], min: f64, max: f64, num_bins: usize) -> Vec<usize> {
992    if num_bins == 0 || values.is_empty() {
993        return vec![0; num_bins];
994    }
995
996    let range = max - min;
997    if range <= 0.0 {
998        // All values in first bin
999        return {
1000            let mut counts = vec![0; num_bins];
1001            counts[0] = values.len();
1002            counts
1003        };
1004    }
1005
1006    let bin_width = range / num_bins as f64;
1007    let mut counts = vec![0usize; num_bins];
1008
1009    for &v in values {
1010        let bin = ((v - min) / bin_width) as usize;
1011        let bin = bin.min(num_bins - 1); // Clamp to last bin
1012        counts[bin] += 1;
1013    }
1014
1015    counts
1016}
1017
1018#[cfg(test)]
1019mod tests {
1020    use super::*;
1021
1022    // =========================================================================
1023    // Vec4 Tests
1024    // =========================================================================
1025
1026    #[test]
1027    fn test_vec4_new() {
1028        let v = Vec4::new(1.0, 2.0, 3.0, 4.0);
1029        assert_eq!(v.x, 1.0);
1030        assert_eq!(v.y, 2.0);
1031        assert_eq!(v.z, 3.0);
1032        assert_eq!(v.w, 4.0);
1033    }
1034
1035    #[test]
1036    fn test_vec4_zero() {
1037        let v = Vec4::zero();
1038        assert_eq!(v, Vec4::new(0.0, 0.0, 0.0, 0.0));
1039    }
1040
1041    #[test]
1042    fn test_vec4_default() {
1043        let v: Vec4 = Default::default();
1044        assert_eq!(v, Vec4::zero());
1045    }
1046
1047    #[test]
1048    fn test_vec4_from_point() {
1049        let p = Point::new(10.0, 20.0);
1050        let v = Vec4::from_point(p);
1051        assert_eq!(v, Vec4::new(10.0, 20.0, 0.0, 1.0));
1052    }
1053
1054    #[test]
1055    fn test_vec4_to_point() {
1056        let v = Vec4::new(5.0, 15.0, 25.0, 35.0);
1057        let p = v.to_point();
1058        assert_eq!(p, Point::new(5.0, 15.0));
1059    }
1060
1061    #[test]
1062    fn test_vec4_dot() {
1063        let a = Vec4::new(1.0, 2.0, 3.0, 4.0);
1064        let b = Vec4::new(2.0, 3.0, 4.0, 5.0);
1065        assert_eq!(a.dot(b), 1.0 * 2.0 + 2.0 * 3.0 + 3.0 * 4.0 + 4.0 * 5.0);
1066    }
1067
1068    #[test]
1069    fn test_vec4_add() {
1070        let a = Vec4::new(1.0, 2.0, 3.0, 4.0);
1071        let b = Vec4::new(5.0, 6.0, 7.0, 8.0);
1072        let c = a.add(b);
1073        assert_eq!(c, Vec4::new(6.0, 8.0, 10.0, 12.0));
1074    }
1075
1076    #[test]
1077    fn test_vec4_sub() {
1078        let a = Vec4::new(5.0, 6.0, 7.0, 8.0);
1079        let b = Vec4::new(1.0, 2.0, 3.0, 4.0);
1080        let c = a.sub(b);
1081        assert_eq!(c, Vec4::new(4.0, 4.0, 4.0, 4.0));
1082    }
1083
1084    #[test]
1085    fn test_vec4_scale() {
1086        let v = Vec4::new(1.0, 2.0, 3.0, 4.0);
1087        let s = v.scale(2.0);
1088        assert_eq!(s, Vec4::new(2.0, 4.0, 6.0, 8.0));
1089    }
1090
1091    #[test]
1092    fn test_vec4_mul() {
1093        let a = Vec4::new(1.0, 2.0, 3.0, 4.0);
1094        let b = Vec4::new(2.0, 2.0, 2.0, 2.0);
1095        let c = a.mul(b);
1096        assert_eq!(c, Vec4::new(2.0, 4.0, 6.0, 8.0));
1097    }
1098
1099    #[test]
1100    fn test_vec4_lerp() {
1101        let a = Vec4::new(0.0, 0.0, 0.0, 0.0);
1102        let b = Vec4::new(10.0, 10.0, 10.0, 10.0);
1103        let c = a.lerp(b, 0.5);
1104        assert_eq!(c, Vec4::new(5.0, 5.0, 5.0, 5.0));
1105    }
1106
1107    #[test]
1108    fn test_vec4_length() {
1109        let v = Vec4::new(3.0, 4.0, 0.0, 0.0);
1110        assert!((v.length() - 5.0).abs() < 0.0001);
1111    }
1112
1113    #[test]
1114    fn test_vec4_normalize() {
1115        let v = Vec4::new(3.0, 4.0, 0.0, 0.0);
1116        let n = v.normalize();
1117        assert!((n.length() - 1.0).abs() < 0.0001);
1118    }
1119
1120    #[test]
1121    fn test_vec4_from_impl() {
1122        let p = Point::new(1.0, 2.0);
1123        let v: Vec4 = p.into();
1124        assert_eq!(v, Vec4::new(1.0, 2.0, 0.0, 1.0));
1125    }
1126
1127    // =========================================================================
1128    // Mat4 Tests
1129    // =========================================================================
1130
1131    #[test]
1132    fn test_mat4_identity() {
1133        let m = Mat4::identity();
1134        assert_eq!(m.data[0][0], 1.0);
1135        assert_eq!(m.data[1][1], 1.0);
1136        assert_eq!(m.data[2][2], 1.0);
1137        assert_eq!(m.data[3][3], 1.0);
1138        assert_eq!(m.data[0][1], 0.0);
1139    }
1140
1141    #[test]
1142    fn test_mat4_zero() {
1143        let m = Mat4::zero();
1144        for i in 0..4 {
1145            for j in 0..4 {
1146                assert_eq!(m.data[i][j], 0.0);
1147            }
1148        }
1149    }
1150
1151    #[test]
1152    fn test_mat4_default() {
1153        let m: Mat4 = Default::default();
1154        assert_eq!(m, Mat4::identity());
1155    }
1156
1157    #[test]
1158    fn test_mat4_translation() {
1159        let m = Mat4::translation(10.0, 20.0, 30.0);
1160        let p = Point::new(0.0, 0.0);
1161        let t = m.transform_point(p);
1162        assert_eq!(t, Point::new(10.0, 20.0));
1163    }
1164
1165    #[test]
1166    fn test_mat4_translation_2d() {
1167        let m = Mat4::translation_2d(5.0, 15.0);
1168        let p = Point::new(10.0, 10.0);
1169        let t = m.transform_point(p);
1170        assert_eq!(t, Point::new(15.0, 25.0));
1171    }
1172
1173    #[test]
1174    fn test_mat4_scale() {
1175        let m = Mat4::scale(2.0, 3.0, 4.0);
1176        let p = Point::new(10.0, 10.0);
1177        let t = m.transform_point(p);
1178        assert_eq!(t, Point::new(20.0, 30.0));
1179    }
1180
1181    #[test]
1182    fn test_mat4_scale_2d() {
1183        let m = Mat4::scale_2d(0.5, 2.0);
1184        let p = Point::new(10.0, 10.0);
1185        let t = m.transform_point(p);
1186        assert_eq!(t, Point::new(5.0, 20.0));
1187    }
1188
1189    #[test]
1190    fn test_mat4_scale_uniform() {
1191        let m = Mat4::scale_uniform(2.0);
1192        let p = Point::new(5.0, 5.0);
1193        let t = m.transform_point(p);
1194        assert_eq!(t, Point::new(10.0, 10.0));
1195    }
1196
1197    #[test]
1198    fn test_mat4_rotation_z() {
1199        use std::f32::consts::PI;
1200        let m = Mat4::rotation_z(PI / 2.0); // 90 degrees
1201        let p = Point::new(1.0, 0.0);
1202        let t = m.transform_point(p);
1203        // Should rotate (1, 0) to approximately (0, 1)
1204        assert!((t.x - 0.0).abs() < 0.0001);
1205        assert!((t.y - 1.0).abs() < 0.0001);
1206    }
1207
1208    #[test]
1209    fn test_mat4_mul_identity() {
1210        let a = Mat4::identity();
1211        let b = Mat4::identity();
1212        let c = a.mul(&b);
1213        assert_eq!(c, Mat4::identity());
1214    }
1215
1216    #[test]
1217    fn test_mat4_mul_combined_transform() {
1218        let translate = Mat4::translation_2d(10.0, 0.0);
1219        let scale = Mat4::scale_2d(2.0, 2.0);
1220        let combined = translate.mul(&scale);
1221
1222        let p = Point::new(5.0, 5.0);
1223        let t = combined.transform_point(p);
1224        // First scale (5,5) -> (10, 10), then translate -> (20, 10)
1225        assert_eq!(t, Point::new(20.0, 10.0));
1226    }
1227
1228    #[test]
1229    fn test_mat4_transform_rect() {
1230        let m = Mat4::scale_2d(2.0, 2.0);
1231        let rect = Rect::new(10.0, 10.0, 20.0, 30.0);
1232        let t = m.transform_rect(&rect);
1233        assert_eq!(t.x, 20.0);
1234        assert_eq!(t.y, 20.0);
1235        assert_eq!(t.width, 40.0);
1236        assert_eq!(t.height, 60.0);
1237    }
1238
1239    #[test]
1240    fn test_mat4_transpose() {
1241        let m = Mat4::from_data([
1242            [1.0, 2.0, 3.0, 4.0],
1243            [5.0, 6.0, 7.0, 8.0],
1244            [9.0, 10.0, 11.0, 12.0],
1245            [13.0, 14.0, 15.0, 16.0],
1246        ]);
1247        let t = m.transpose();
1248        assert_eq!(t.data[0][1], 5.0);
1249        assert_eq!(t.data[1][0], 2.0);
1250    }
1251
1252    #[test]
1253    fn test_mat4_column() {
1254        let m = Mat4::identity();
1255        let col = m.column(0);
1256        assert_eq!(col, Vec4::new(1.0, 0.0, 0.0, 0.0));
1257    }
1258
1259    #[test]
1260    fn test_mat4_row() {
1261        let m = Mat4::identity();
1262        let row = m.row(0);
1263        assert_eq!(row, Vec4::new(1.0, 0.0, 0.0, 0.0));
1264    }
1265
1266    #[test]
1267    fn test_mat4_ortho_screen() {
1268        let m = Mat4::ortho_screen(800.0, 600.0);
1269        // Point at top-left should map to (-1, 1) in NDC
1270        let p = m.transform_vec4(Vec4::new(0.0, 0.0, 0.0, 1.0));
1271        assert!((p.x - (-1.0)).abs() < 0.001);
1272        assert!((p.y - 1.0).abs() < 0.001);
1273    }
1274
1275    #[test]
1276    fn test_mat4_mul_operator() {
1277        let a = Mat4::translation_2d(10.0, 20.0);
1278        let b = Mat4::scale_2d(2.0, 2.0);
1279        let c = a * b;
1280        assert_eq!(c, a.mul(&b));
1281    }
1282
1283    #[test]
1284    fn test_mat4_mul_vec4_operator() {
1285        let m = Mat4::translation_2d(10.0, 20.0);
1286        let v = Vec4::new(0.0, 0.0, 0.0, 1.0);
1287        let r = m * v;
1288        assert_eq!(r, Vec4::new(10.0, 20.0, 0.0, 1.0));
1289    }
1290
1291    // =========================================================================
1292    // Batch Operation Tests
1293    // =========================================================================
1294
1295    #[test]
1296    fn test_batch_transform_points() {
1297        let m = Mat4::translation_2d(10.0, 10.0);
1298        let points = vec![Point::new(0.0, 0.0), Point::new(5.0, 5.0)];
1299        let result = batch_transform_points(&points, &m);
1300        assert_eq!(result[0], Point::new(10.0, 10.0));
1301        assert_eq!(result[1], Point::new(15.0, 15.0));
1302    }
1303
1304    #[test]
1305    fn test_batch_transform_vec4() {
1306        let m = Mat4::scale_uniform(2.0);
1307        let vecs = vec![Vec4::new(1.0, 1.0, 1.0, 0.0), Vec4::new(2.0, 2.0, 2.0, 0.0)];
1308        let result = batch_transform_vec4(&vecs, &m);
1309        assert_eq!(result[0], Vec4::new(2.0, 2.0, 2.0, 0.0));
1310        assert_eq!(result[1], Vec4::new(4.0, 4.0, 4.0, 0.0));
1311    }
1312
1313    #[test]
1314    fn test_batch_lerp_points() {
1315        let from = vec![Point::new(0.0, 0.0), Point::new(10.0, 10.0)];
1316        let to = vec![Point::new(10.0, 10.0), Point::new(20.0, 20.0)];
1317        let result = batch_lerp_points(&from, &to, 0.5);
1318        assert_eq!(result[0], Point::new(5.0, 5.0));
1319        assert_eq!(result[1], Point::new(15.0, 15.0));
1320    }
1321
1322    // =========================================================================
1323    // Geometry Tests
1324    // =========================================================================
1325
1326    #[test]
1327    fn test_bounding_box() {
1328        let points = vec![
1329            Point::new(0.0, 0.0),
1330            Point::new(10.0, 5.0),
1331            Point::new(5.0, 15.0),
1332        ];
1333        let bbox = bounding_box(&points).unwrap();
1334        assert_eq!(bbox.x, 0.0);
1335        assert_eq!(bbox.y, 0.0);
1336        assert_eq!(bbox.width, 10.0);
1337        assert_eq!(bbox.height, 15.0);
1338    }
1339
1340    #[test]
1341    fn test_bounding_box_empty() {
1342        let points: Vec<Point> = vec![];
1343        assert!(bounding_box(&points).is_none());
1344    }
1345
1346    #[test]
1347    fn test_centroid() {
1348        let points = vec![
1349            Point::new(0.0, 0.0),
1350            Point::new(10.0, 0.0),
1351            Point::new(10.0, 10.0),
1352            Point::new(0.0, 10.0),
1353        ];
1354        let c = centroid(&points).unwrap();
1355        assert_eq!(c, Point::new(5.0, 5.0));
1356    }
1357
1358    #[test]
1359    fn test_centroid_empty() {
1360        let points: Vec<Point> = vec![];
1361        assert!(centroid(&points).is_none());
1362    }
1363
1364    #[test]
1365    fn test_point_in_convex_polygon() {
1366        let square = vec![
1367            Point::new(0.0, 0.0),
1368            Point::new(10.0, 0.0),
1369            Point::new(10.0, 10.0),
1370            Point::new(0.0, 10.0),
1371        ];
1372
1373        assert!(point_in_convex_polygon(Point::new(5.0, 5.0), &square));
1374        assert!(!point_in_convex_polygon(Point::new(15.0, 5.0), &square));
1375    }
1376
1377    #[test]
1378    fn test_point_in_convex_polygon_edge() {
1379        let triangle = vec![
1380            Point::new(0.0, 0.0),
1381            Point::new(10.0, 0.0),
1382            Point::new(5.0, 10.0),
1383        ];
1384
1385        // On the edge
1386        assert!(point_in_convex_polygon(Point::new(5.0, 0.0), &triangle));
1387    }
1388
1389    #[test]
1390    fn test_polygon_area_square() {
1391        let square = vec![
1392            Point::new(0.0, 0.0),
1393            Point::new(10.0, 0.0),
1394            Point::new(10.0, 10.0),
1395            Point::new(0.0, 10.0),
1396        ];
1397        let area = polygon_area(&square);
1398        assert!((area - 100.0).abs() < 0.0001);
1399    }
1400
1401    #[test]
1402    fn test_polygon_area_triangle() {
1403        let triangle = vec![
1404            Point::new(0.0, 0.0),
1405            Point::new(10.0, 0.0),
1406            Point::new(5.0, 10.0),
1407        ];
1408        let area = polygon_area(&triangle);
1409        assert!((area - 50.0).abs() < 0.0001);
1410    }
1411
1412    #[test]
1413    fn test_polygon_area_too_few_points() {
1414        assert_eq!(polygon_area(&[]), 0.0);
1415        assert_eq!(polygon_area(&[Point::new(0.0, 0.0)]), 0.0);
1416        assert_eq!(
1417            polygon_area(&[Point::new(0.0, 0.0), Point::new(1.0, 1.0)]),
1418            0.0
1419        );
1420    }
1421
1422    // =========================================================================
1423    // SIMD Tests (when feature enabled)
1424    // =========================================================================
1425
1426    #[cfg(feature = "simd")]
1427    mod simd_tests {
1428        use super::*;
1429
1430        #[test]
1431        fn test_vec4_to_simd_roundtrip() {
1432            let v = Vec4::new(1.0, 2.0, 3.0, 4.0);
1433            let simd = vec4_to_simd(v);
1434            let back = simd_to_vec4(&simd);
1435            assert_eq!(v, back);
1436        }
1437
1438        #[test]
1439        fn test_batch_add_simd() {
1440            let a = vec![1.0, 2.0, 3.0, 4.0];
1441            let b = vec![5.0, 6.0, 7.0, 8.0];
1442            let result = batch_add_simd(&a, &b).unwrap();
1443            assert_eq!(result, vec![6.0, 8.0, 10.0, 12.0]);
1444        }
1445
1446        #[test]
1447        fn test_dot_simd() {
1448            let a = vec![1.0, 2.0, 3.0, 4.0];
1449            let b = vec![1.0, 1.0, 1.0, 1.0];
1450            let result = dot_simd(&a, &b).unwrap();
1451            assert_eq!(result, 10.0);
1452        }
1453
1454        #[test]
1455        fn test_scale_simd() {
1456            let a = vec![1.0, 2.0, 3.0, 4.0];
1457            let result = scale_simd(&a, 2.0).unwrap();
1458            assert_eq!(result, vec![2.0, 4.0, 6.0, 8.0]);
1459        }
1460
1461        #[test]
1462        fn test_best_backend() {
1463            let backend = best_backend();
1464            // Just check it doesn't panic and returns a valid backend
1465            assert!(matches!(
1466                backend,
1467                trueno::Backend::Scalar
1468                    | trueno::Backend::SSE2
1469                    | trueno::Backend::AVX
1470                    | trueno::Backend::AVX2
1471                    | trueno::Backend::AVX512
1472                    | trueno::Backend::NEON
1473                    | trueno::Backend::WasmSIMD
1474                    | trueno::Backend::GPU
1475                    | trueno::Backend::Auto
1476            ));
1477        }
1478
1479        #[test]
1480        fn test_batch_dot_product() {
1481            let a = vec![Vec4::new(1.0, 0.0, 0.0, 0.0), Vec4::new(0.0, 1.0, 0.0, 0.0)];
1482            let b = vec![Vec4::new(1.0, 0.0, 0.0, 0.0), Vec4::new(0.0, 1.0, 0.0, 0.0)];
1483            let dots = batch_dot_product(&a, &b);
1484            assert_eq!(dots, vec![1.0, 1.0]);
1485        }
1486    }
1487
1488    // =========================================================================
1489    // ComputeBlock Tests (f64 Aggregation)
1490    // =========================================================================
1491
1492    mod compute_block_tests {
1493        use super::*;
1494
1495        #[test]
1496        fn test_batch_sum_f64_empty() {
1497            assert_eq!(batch_sum_f64(&[]), 0.0);
1498        }
1499
1500        #[test]
1501        fn test_batch_sum_f64_small() {
1502            assert_eq!(batch_sum_f64(&[1.0, 2.0, 3.0]), 6.0);
1503        }
1504
1505        #[test]
1506        fn test_batch_sum_f64_large() {
1507            let values: Vec<f64> = (1..=100).map(|i| i as f64).collect();
1508            assert_eq!(batch_sum_f64(&values), 5050.0);
1509        }
1510
1511        #[test]
1512        fn test_batch_sum_f64_exact_chunks() {
1513            // 8 elements = 2 exact chunks of 4
1514            let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1515            assert_eq!(batch_sum_f64(&values), 36.0);
1516        }
1517
1518        #[test]
1519        fn test_batch_mean_f64_empty() {
1520            assert_eq!(batch_mean_f64(&[]), 0.0);
1521        }
1522
1523        #[test]
1524        fn test_batch_mean_f64() {
1525            assert_eq!(batch_mean_f64(&[2.0, 4.0, 6.0, 8.0]), 5.0);
1526        }
1527
1528        #[test]
1529        fn test_batch_min_max_f64_empty() {
1530            assert!(batch_min_max_f64(&[]).is_none());
1531        }
1532
1533        #[test]
1534        fn test_batch_min_max_f64_single() {
1535            assert_eq!(batch_min_max_f64(&[42.0]), Some((42.0, 42.0)));
1536        }
1537
1538        #[test]
1539        fn test_batch_min_max_f64_small() {
1540            assert_eq!(batch_min_max_f64(&[3.0, 1.0, 2.0]), Some((1.0, 3.0)));
1541        }
1542
1543        #[test]
1544        fn test_batch_min_max_f64_large() {
1545            let values: Vec<f64> = (1..=256).map(|i| i as f64).collect();
1546            assert_eq!(batch_min_max_f64(&values), Some((1.0, 256.0)));
1547        }
1548
1549        #[test]
1550        fn test_normalize_f64_empty() {
1551            assert!(normalize_f64(&[]).is_empty());
1552        }
1553
1554        #[test]
1555        fn test_normalize_f64_constant() {
1556            let result = normalize_f64(&[5.0, 5.0, 5.0]);
1557            assert_eq!(result, vec![0.0, 0.0, 0.0]);
1558        }
1559
1560        #[test]
1561        fn test_normalize_f64() {
1562            let result = normalize_f64(&[0.0, 50.0, 100.0]);
1563            assert_eq!(result, vec![0.0, 0.5, 1.0]);
1564        }
1565
1566        #[test]
1567        fn test_normalize_with_range_f64() {
1568            let result = normalize_with_range_f64(&[25.0, 50.0, 75.0], 0.0, 100.0);
1569            assert_eq!(result, vec![0.25, 0.5, 0.75]);
1570        }
1571
1572        #[test]
1573        fn test_batch_scale_f64() {
1574            let result = batch_scale_f64(&[1.0, 2.0, 3.0], 2.0);
1575            assert_eq!(result, vec![2.0, 4.0, 6.0]);
1576        }
1577
1578        #[test]
1579        fn test_batch_scale_offset_f64() {
1580            let result = batch_scale_offset_f64(&[0.0, 0.5, 1.0], 100.0, 100.0);
1581            assert_eq!(result, vec![100.0, 150.0, 200.0]);
1582        }
1583
1584        #[test]
1585        fn test_batch_variance_f64_empty() {
1586            assert_eq!(batch_variance_f64(&[]), 0.0);
1587        }
1588
1589        #[test]
1590        fn test_batch_variance_f64_single() {
1591            assert_eq!(batch_variance_f64(&[42.0]), 0.0);
1592        }
1593
1594        #[test]
1595        fn test_batch_variance_f64() {
1596            let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1597            let variance = batch_variance_f64(&values);
1598            assert!((variance - 4.0).abs() < 0.001);
1599        }
1600
1601        #[test]
1602        fn test_batch_stddev_f64() {
1603            let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1604            let stddev = batch_stddev_f64(&values);
1605            assert!((stddev - 2.0).abs() < 0.001);
1606        }
1607
1608        #[test]
1609        fn test_weighted_sum_f64_small() {
1610            let values = vec![1.0, 2.0];
1611            let weights = vec![0.5, 0.5];
1612            assert_eq!(weighted_sum_f64(&values, &weights), 1.5);
1613        }
1614
1615        #[test]
1616        fn test_weighted_sum_f64_large() {
1617            let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1618            let weights = vec![1.0; 8];
1619            assert_eq!(weighted_sum_f64(&values, &weights), 36.0);
1620        }
1621
1622        #[test]
1623        fn test_percentile_sorted_f64_empty() {
1624            assert_eq!(percentile_sorted_f64(&[], 0.5), 0.0);
1625        }
1626
1627        #[test]
1628        fn test_percentile_sorted_f64_single() {
1629            assert_eq!(percentile_sorted_f64(&[42.0], 0.5), 42.0);
1630        }
1631
1632        #[test]
1633        fn test_percentile_sorted_f64_median() {
1634            let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1635            assert_eq!(percentile_sorted_f64(&sorted, 0.5), 3.0);
1636        }
1637
1638        #[test]
1639        fn test_percentile_sorted_f64_quartiles() {
1640            let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1641            assert_eq!(percentile_sorted_f64(&sorted, 0.0), 1.0);
1642            assert_eq!(percentile_sorted_f64(&sorted, 1.0), 5.0);
1643            assert_eq!(percentile_sorted_f64(&sorted, 0.25), 2.0);
1644            assert_eq!(percentile_sorted_f64(&sorted, 0.75), 4.0);
1645        }
1646
1647        #[test]
1648        fn test_histogram_f64_empty() {
1649            assert_eq!(histogram_f64(&[], 0.0, 10.0, 5), vec![0; 5]);
1650        }
1651
1652        #[test]
1653        fn test_histogram_f64() {
1654            let values = vec![0.1, 0.5, 0.9, 1.5, 2.5, 3.5, 4.5];
1655            let counts = histogram_f64(&values, 0.0, 5.0, 5);
1656            assert_eq!(counts, vec![3, 1, 1, 1, 1]);
1657        }
1658
1659        #[test]
1660        fn test_histogram_f64_edge_values() {
1661            // Values exactly at bin boundaries
1662            let values = vec![0.0, 1.0, 2.0, 3.0, 4.0, 4.999];
1663            let counts = histogram_f64(&values, 0.0, 5.0, 5);
1664            assert_eq!(counts, vec![1, 1, 1, 1, 2]); // 4.0 and 4.999 in last bin
1665        }
1666
1667        #[test]
1668        fn test_histogram_f64_uniform() {
1669            // Uniform distribution across bins
1670            let values: Vec<f64> = (0..100).map(|i| i as f64).collect();
1671            let counts = histogram_f64(&values, 0.0, 100.0, 10);
1672            // Each bin should have 10 values
1673            assert!(counts.iter().all(|&c| c == 10));
1674        }
1675    }
1676}