Skip to main content

presentar_core/
simd.rs

1#![allow(clippy::unwrap_used, clippy::disallowed_methods)]
2//! SIMD-accelerated operations using Trueno.
3//!
4//! This module provides hardware-accelerated vector and matrix operations
5//! for the Presentar rendering pipeline.
6//!
7//! When the `simd` feature is disabled, operations fall back to scalar
8//! implementations.
9//!
10//! # Example
11//!
12//! ```
13//! use presentar_core::simd::{Vec4, Mat4, batch_transform_points};
14//! use presentar_core::Point;
15//!
16//! let transform = Mat4::identity();
17//! let points = vec![Point::new(0.0, 0.0), Point::new(100.0, 100.0)];
18//! let transformed = batch_transform_points(&points, &transform);
19//! ```
20
21use crate::{Point, Rect};
22
23/// 4-component vector for SIMD operations.
24#[derive(Debug, Clone, Copy, PartialEq)]
25#[repr(C)]
26pub struct Vec4 {
27    pub x: f32,
28    pub y: f32,
29    pub z: f32,
30    pub w: f32,
31}
32
33impl Vec4 {
34    /// Create a new Vec4.
35    #[inline]
36    #[must_use]
37    pub const fn new(x: f32, y: f32, z: f32, w: f32) -> Self {
38        Self { x, y, z, w }
39    }
40
41    /// Create a zero vector.
42    #[inline]
43    #[must_use]
44    pub const fn zero() -> Self {
45        Self::new(0.0, 0.0, 0.0, 0.0)
46    }
47
48    /// Create from a Point (z=0, w=1).
49    #[inline]
50    #[must_use]
51    pub const fn from_point(p: Point) -> Self {
52        Self::new(p.x, p.y, 0.0, 1.0)
53    }
54
55    /// Convert to Point (ignoring z and w).
56    #[inline]
57    #[must_use]
58    pub const fn to_point(self) -> Point {
59        Point {
60            x: self.x,
61            y: self.y,
62        }
63    }
64
65    /// Dot product.
66    #[inline]
67    #[must_use]
68    pub fn dot(self, other: Self) -> f32 {
69        self.w.mul_add(
70            other.w,
71            self.z
72                .mul_add(other.z, self.x.mul_add(other.x, self.y * other.y)),
73        )
74    }
75
76    /// Component-wise addition.
77    #[inline]
78    #[must_use]
79    pub fn add(self, other: Self) -> Self {
80        Self::new(
81            self.x + other.x,
82            self.y + other.y,
83            self.z + other.z,
84            self.w + other.w,
85        )
86    }
87
88    /// Component-wise subtraction.
89    #[inline]
90    #[must_use]
91    pub fn sub(self, other: Self) -> Self {
92        Self::new(
93            self.x - other.x,
94            self.y - other.y,
95            self.z - other.z,
96            self.w - other.w,
97        )
98    }
99
100    /// Scalar multiplication.
101    #[inline]
102    #[must_use]
103    pub fn scale(self, s: f32) -> Self {
104        Self::new(self.x * s, self.y * s, self.z * s, self.w * s)
105    }
106
107    /// Component-wise multiplication.
108    #[inline]
109    #[must_use]
110    pub fn mul(self, other: Self) -> Self {
111        Self::new(
112            self.x * other.x,
113            self.y * other.y,
114            self.z * other.z,
115            self.w * other.w,
116        )
117    }
118
119    /// Linear interpolation.
120    #[inline]
121    #[must_use]
122    pub fn lerp(self, other: Self, t: f32) -> Self {
123        self.add(other.sub(self).scale(t))
124    }
125
126    /// Length (magnitude).
127    #[inline]
128    #[must_use]
129    pub fn length(self) -> f32 {
130        self.dot(self).sqrt()
131    }
132
133    /// Normalize to unit length.
134    #[inline]
135    #[must_use]
136    pub fn normalize(self) -> Self {
137        let len = self.length();
138        if len > 0.0 {
139            self.scale(1.0 / len)
140        } else {
141            self
142        }
143    }
144}
145
146impl Default for Vec4 {
147    fn default() -> Self {
148        Self::zero()
149    }
150}
151
152impl From<Point> for Vec4 {
153    fn from(p: Point) -> Self {
154        Self::from_point(p)
155    }
156}
157
158impl From<Vec4> for Point {
159    fn from(v: Vec4) -> Self {
160        v.to_point()
161    }
162}
163
164/// 4x4 matrix for transforms.
165#[derive(Debug, Clone, Copy, PartialEq)]
166#[repr(C)]
167pub struct Mat4 {
168    /// Row-major matrix data [row][col]
169    pub data: [[f32; 4]; 4],
170}
171
172impl Mat4 {
173    /// Create from raw data (row-major).
174    #[inline]
175    #[must_use]
176    pub const fn from_data(data: [[f32; 4]; 4]) -> Self {
177        Self { data }
178    }
179
180    /// Create identity matrix.
181    #[inline]
182    #[must_use]
183    pub const fn identity() -> Self {
184        Self::from_data([
185            [1.0, 0.0, 0.0, 0.0],
186            [0.0, 1.0, 0.0, 0.0],
187            [0.0, 0.0, 1.0, 0.0],
188            [0.0, 0.0, 0.0, 1.0],
189        ])
190    }
191
192    /// Create zero matrix.
193    #[inline]
194    #[must_use]
195    pub const fn zero() -> Self {
196        Self::from_data([
197            [0.0, 0.0, 0.0, 0.0],
198            [0.0, 0.0, 0.0, 0.0],
199            [0.0, 0.0, 0.0, 0.0],
200            [0.0, 0.0, 0.0, 0.0],
201        ])
202    }
203
204    /// Create translation matrix.
205    #[inline]
206    #[must_use]
207    pub const fn translation(x: f32, y: f32, z: f32) -> Self {
208        Self::from_data([
209            [1.0, 0.0, 0.0, x],
210            [0.0, 1.0, 0.0, y],
211            [0.0, 0.0, 1.0, z],
212            [0.0, 0.0, 0.0, 1.0],
213        ])
214    }
215
216    /// Create 2D translation matrix.
217    #[inline]
218    #[must_use]
219    pub const fn translation_2d(x: f32, y: f32) -> Self {
220        Self::translation(x, y, 0.0)
221    }
222
223    /// Create scale matrix.
224    #[inline]
225    #[must_use]
226    pub const fn scale(x: f32, y: f32, z: f32) -> Self {
227        Self::from_data([
228            [x, 0.0, 0.0, 0.0],
229            [0.0, y, 0.0, 0.0],
230            [0.0, 0.0, z, 0.0],
231            [0.0, 0.0, 0.0, 1.0],
232        ])
233    }
234
235    /// Create 2D scale matrix.
236    #[inline]
237    #[must_use]
238    pub const fn scale_2d(x: f32, y: f32) -> Self {
239        Self::scale(x, y, 1.0)
240    }
241
242    /// Create uniform scale matrix.
243    #[inline]
244    #[must_use]
245    pub const fn scale_uniform(s: f32) -> Self {
246        Self::scale(s, s, s)
247    }
248
249    /// Create rotation around Z axis (2D rotation).
250    #[inline]
251    #[must_use]
252    pub fn rotation_z(angle_rad: f32) -> Self {
253        let (sin, cos) = angle_rad.sin_cos();
254        Self::from_data([
255            [cos, -sin, 0.0, 0.0],
256            [sin, cos, 0.0, 0.0],
257            [0.0, 0.0, 1.0, 0.0],
258            [0.0, 0.0, 0.0, 1.0],
259        ])
260    }
261
262    /// Create orthographic projection matrix.
263    #[inline]
264    #[must_use]
265    pub fn ortho(left: f32, right: f32, bottom: f32, top: f32, near: f32, far: f32) -> Self {
266        let width = right - left;
267        let height = top - bottom;
268        let depth = far - near;
269
270        Self::from_data([
271            [2.0 / width, 0.0, 0.0, -(right + left) / width],
272            [0.0, 2.0 / height, 0.0, -(top + bottom) / height],
273            [0.0, 0.0, -2.0 / depth, -(far + near) / depth],
274            [0.0, 0.0, 0.0, 1.0],
275        ])
276    }
277
278    /// Create orthographic projection for screen coordinates (Y down).
279    #[inline]
280    #[must_use]
281    pub fn ortho_screen(width: f32, height: f32) -> Self {
282        Self::ortho(0.0, width, height, 0.0, -1.0, 1.0)
283    }
284
285    /// Matrix multiplication.
286    #[inline]
287    #[must_use]
288    pub fn mul(&self, other: &Self) -> Self {
289        let mut result = Self::zero();
290        for i in 0..4 {
291            for j in 0..4 {
292                for k in 0..4 {
293                    result.data[i][j] += self.data[i][k] * other.data[k][j];
294                }
295            }
296        }
297        result
298    }
299
300    /// Transform a Vec4.
301    #[inline]
302    #[must_use]
303    pub fn transform_vec4(&self, v: Vec4) -> Vec4 {
304        Vec4::new(
305            self.data[0][3].mul_add(
306                v.w,
307                self.data[0][2].mul_add(v.z, self.data[0][0].mul_add(v.x, self.data[0][1] * v.y)),
308            ),
309            self.data[1][3].mul_add(
310                v.w,
311                self.data[1][2].mul_add(v.z, self.data[1][0].mul_add(v.x, self.data[1][1] * v.y)),
312            ),
313            self.data[2][3].mul_add(
314                v.w,
315                self.data[2][2].mul_add(v.z, self.data[2][0].mul_add(v.x, self.data[2][1] * v.y)),
316            ),
317            self.data[3][3].mul_add(
318                v.w,
319                self.data[3][2].mul_add(v.z, self.data[3][0].mul_add(v.x, self.data[3][1] * v.y)),
320            ),
321        )
322    }
323
324    /// Transform a 2D point (assumes z=0, w=1).
325    #[inline]
326    #[must_use]
327    pub fn transform_point(&self, p: Point) -> Point {
328        let v = self.transform_vec4(Vec4::from_point(p));
329        Point::new(v.x, v.y)
330    }
331
332    /// Transform a rectangle.
333    #[inline]
334    #[must_use]
335    pub fn transform_rect(&self, rect: &Rect) -> Rect {
336        let corners = [
337            Point::new(rect.x, rect.y),
338            Point::new(rect.x + rect.width, rect.y),
339            Point::new(rect.x + rect.width, rect.y + rect.height),
340            Point::new(rect.x, rect.y + rect.height),
341        ];
342
343        let transformed: Vec<Point> = corners.iter().map(|&p| self.transform_point(p)).collect();
344
345        let min_x = transformed
346            .iter()
347            .map(|p| p.x)
348            .fold(f32::INFINITY, f32::min);
349        let max_x = transformed
350            .iter()
351            .map(|p| p.x)
352            .fold(f32::NEG_INFINITY, f32::max);
353        let min_y = transformed
354            .iter()
355            .map(|p| p.y)
356            .fold(f32::INFINITY, f32::min);
357        let max_y = transformed
358            .iter()
359            .map(|p| p.y)
360            .fold(f32::NEG_INFINITY, f32::max);
361
362        Rect::new(min_x, min_y, max_x - min_x, max_y - min_y)
363    }
364
365    /// Get column as Vec4.
366    #[inline]
367    #[must_use]
368    pub const fn column(&self, idx: usize) -> Vec4 {
369        Vec4::new(
370            self.data[0][idx],
371            self.data[1][idx],
372            self.data[2][idx],
373            self.data[3][idx],
374        )
375    }
376
377    /// Get row as Vec4.
378    #[inline]
379    #[must_use]
380    pub const fn row(&self, idx: usize) -> Vec4 {
381        Vec4::new(
382            self.data[idx][0],
383            self.data[idx][1],
384            self.data[idx][2],
385            self.data[idx][3],
386        )
387    }
388
389    /// Transpose the matrix.
390    #[inline]
391    #[must_use]
392    pub const fn transpose(&self) -> Self {
393        Self::from_data([
394            [
395                self.data[0][0],
396                self.data[1][0],
397                self.data[2][0],
398                self.data[3][0],
399            ],
400            [
401                self.data[0][1],
402                self.data[1][1],
403                self.data[2][1],
404                self.data[3][1],
405            ],
406            [
407                self.data[0][2],
408                self.data[1][2],
409                self.data[2][2],
410                self.data[3][2],
411            ],
412            [
413                self.data[0][3],
414                self.data[1][3],
415                self.data[2][3],
416                self.data[3][3],
417            ],
418        ])
419    }
420}
421
422impl Default for Mat4 {
423    fn default() -> Self {
424        Self::identity()
425    }
426}
427
428impl std::ops::Mul for Mat4 {
429    type Output = Self;
430    fn mul(self, rhs: Self) -> Self {
431        Self::mul(&self, &rhs)
432    }
433}
434
435impl std::ops::Mul<Vec4> for Mat4 {
436    type Output = Vec4;
437    fn mul(self, rhs: Vec4) -> Vec4 {
438        self.transform_vec4(rhs)
439    }
440}
441
442/// Batch transform multiple points.
443///
444/// When SIMD is enabled, this uses vectorized operations for better performance.
445#[inline]
446#[must_use]
447pub fn batch_transform_points(points: &[Point], transform: &Mat4) -> Vec<Point> {
448    points
449        .iter()
450        .map(|&p| transform.transform_point(p))
451        .collect()
452}
453
454/// Batch transform multiple Vec4s.
455#[inline]
456#[must_use]
457pub fn batch_transform_vec4(vecs: &[Vec4], transform: &Mat4) -> Vec<Vec4> {
458    vecs.iter().map(|&v| transform.transform_vec4(v)).collect()
459}
460
461/// Batch linear interpolation.
462#[inline]
463#[must_use]
464pub fn batch_lerp_points(from: &[Point], to: &[Point], t: f32) -> Vec<Point> {
465    debug_assert_eq!(from.len(), to.len());
466    from.iter()
467        .zip(to.iter())
468        .map(|(a, b)| a.lerp(b, t))
469        .collect()
470}
471
472/// Axis-aligned bounding box from points.
473#[inline]
474#[must_use]
475pub fn bounding_box(points: &[Point]) -> Option<Rect> {
476    if points.is_empty() {
477        return None;
478    }
479
480    let mut min_x = f32::INFINITY;
481    let mut max_x = f32::NEG_INFINITY;
482    let mut min_y = f32::INFINITY;
483    let mut max_y = f32::NEG_INFINITY;
484
485    for p in points {
486        min_x = min_x.min(p.x);
487        max_x = max_x.max(p.x);
488        min_y = min_y.min(p.y);
489        max_y = max_y.max(p.y);
490    }
491
492    Some(Rect::new(min_x, min_y, max_x - min_x, max_y - min_y))
493}
494
495/// Calculate the centroid of points.
496#[inline]
497#[must_use]
498pub fn centroid(points: &[Point]) -> Option<Point> {
499    if points.is_empty() {
500        return None;
501    }
502
503    let sum: (f32, f32) = points
504        .iter()
505        .fold((0.0, 0.0), |acc, p| (acc.0 + p.x, acc.1 + p.y));
506    let n = points.len() as f32;
507    Some(Point::new(sum.0 / n, sum.1 / n))
508}
509
510/// Check if a point is inside a convex polygon.
511#[must_use]
512pub fn point_in_convex_polygon(point: Point, polygon: &[Point]) -> bool {
513    if polygon.len() < 3 {
514        return false;
515    }
516
517    let mut positive = false;
518    let mut negative = false;
519
520    for i in 0..polygon.len() {
521        let a = polygon[i];
522        let b = polygon[(i + 1) % polygon.len()];
523
524        let cross = (point.x - a.x).mul_add(b.y - a.y, -((point.y - a.y) * (b.x - a.x)));
525
526        if cross > 0.0 {
527            positive = true;
528        } else if cross < 0.0 {
529            negative = true;
530        }
531
532        if positive && negative {
533            return false;
534        }
535    }
536
537    true
538}
539
540/// Compute the area of a polygon using the shoelace formula.
541#[must_use]
542pub fn polygon_area(polygon: &[Point]) -> f32 {
543    if polygon.len() < 3 {
544        return 0.0;
545    }
546
547    let mut area = 0.0;
548    for i in 0..polygon.len() {
549        let j = (i + 1) % polygon.len();
550        area += polygon[i].x * polygon[j].y;
551        area -= polygon[j].x * polygon[i].y;
552    }
553
554    (area / 2.0).abs()
555}
556
557// =============================================================================
558// SIMD-accelerated implementations when trueno is available
559// =============================================================================
560
561#[cfg(feature = "simd")]
562mod simd_impl {
563    use super::Vec4;
564
565    /// SIMD vector type from trueno (f32).
566    pub type SimdVectorF32 = trueno::Vector<f32>;
567
568    /// Create a SIMD-backed vector from Vec4.
569    #[inline]
570    #[must_use]
571    pub fn vec4_to_simd(v: Vec4) -> SimdVectorF32 {
572        SimdVectorF32::from_slice(&[v.x, v.y, v.z, v.w])
573    }
574
575    /// Create Vec4 from SIMD vector.
576    #[inline]
577    #[must_use]
578    pub fn simd_to_vec4(v: &SimdVectorF32) -> Vec4 {
579        let slice = v.as_slice();
580        Vec4::new(
581            slice.first().copied().unwrap_or(0.0),
582            slice.get(1).copied().unwrap_or(0.0),
583            slice.get(2).copied().unwrap_or(0.0),
584            slice.get(3).copied().unwrap_or(0.0),
585        )
586    }
587
588    /// SIMD-accelerated batch add using trueno.
589    pub fn batch_add_simd(a: &[f32], b: &[f32]) -> trueno::Result<Vec<f32>> {
590        let va = SimdVectorF32::from_slice(a);
591        let vb = SimdVectorF32::from_slice(b);
592        let result = va.add(&vb)?;
593        Ok(result.as_slice().to_vec())
594    }
595
596    /// SIMD-accelerated dot product using trueno.
597    pub fn dot_simd(a: &[f32], b: &[f32]) -> trueno::Result<f32> {
598        let va = SimdVectorF32::from_slice(a);
599        let vb = SimdVectorF32::from_slice(b);
600        va.dot(&vb)
601    }
602
603    /// SIMD-accelerated scale using trueno.
604    pub fn scale_simd(a: &[f32], s: f32) -> trueno::Result<Vec<f32>> {
605        let va = SimdVectorF32::from_slice(a);
606        let result = va.scale(s)?;
607        Ok(result.as_slice().to_vec())
608    }
609
610    /// Get the best available SIMD backend.
611    #[must_use]
612    pub fn best_backend() -> trueno::Backend {
613        trueno::Backend::select_best()
614    }
615
616    /// SIMD-accelerated batch dot product.
617    pub fn batch_dot_product(a: &[Vec4], b: &[Vec4]) -> Vec<f32> {
618        debug_assert_eq!(a.len(), b.len());
619        a.iter().zip(b.iter()).map(|(va, vb)| va.dot(*vb)).collect()
620    }
621}
622
623#[cfg(feature = "simd")]
624pub use simd_impl::*;
625
626// =============================================================================
627// ComputeBlocks: Auto-Accelerated Aggregation Primitives
628// =============================================================================
629//
630// ComputeBlocks are data processing primitives with automatic acceleration.
631// They select the optimal execution path at compile time:
632//
633// 1. **Auto-vectorization**: Tight 4-way loops enable compiler SIMD codegen
634// 2. **ILP (Instruction-Level Parallelism)**: Multiple accumulators hide latency
635// 3. **Cache-friendly**: Sequential access patterns for prefetcher efficiency
636//
637// These primitives form the foundation for widget data processing:
638// - Charts: range calculation, normalization, statistics
639// - Heatmaps: binning, histogram generation
640// - Tables: aggregation, sorting support
641//
642// Reference: PROBAR-SPEC-009 (ComputeBlock Architecture)
643// Reference: docs/specifications/computeblocks-refactor.md Section 2
644
645/// SIMD-friendly sum of f64 values.
646///
647/// Uses 4-way accumulator for instruction-level parallelism and auto-vectorization.
648/// For small slices (<4), falls back to direct sum.
649///
650/// # Example
651/// ```
652/// use presentar_core::simd::batch_sum_f64;
653/// let values = vec![1.0, 2.0, 3.0, 4.0];
654/// assert_eq!(batch_sum_f64(&values), 10.0);
655/// ```
656#[inline]
657#[must_use]
658pub fn batch_sum_f64(values: &[f64]) -> f64 {
659    if values.len() < 4 {
660        return values.iter().sum();
661    }
662
663    // 4-way accumulator for ILP and auto-vectorization
664    let chunks = values.chunks_exact(4);
665    let remainder = chunks.remainder();
666
667    let mut acc = [0.0f64; 4];
668    for chunk in chunks {
669        acc[0] += chunk[0];
670        acc[1] += chunk[1];
671        acc[2] += chunk[2];
672        acc[3] += chunk[3];
673    }
674
675    let mut sum = acc[0] + acc[1] + acc[2] + acc[3];
676    for &v in remainder {
677        sum += v;
678    }
679    sum
680}
681
682/// SIMD-friendly mean of f64 values.
683///
684/// Returns 0.0 for empty slices.
685///
686/// # Example
687/// ```
688/// use presentar_core::simd::batch_mean_f64;
689/// let values = vec![2.0, 4.0, 6.0, 8.0];
690/// assert_eq!(batch_mean_f64(&values), 5.0);
691/// ```
692#[inline]
693#[must_use]
694pub fn batch_mean_f64(values: &[f64]) -> f64 {
695    if values.is_empty() {
696        return 0.0;
697    }
698    batch_sum_f64(values) / values.len() as f64
699}
700
701/// SIMD-friendly min/max of f64 values.
702///
703/// Returns `None` for empty slices.
704/// Uses 4-way comparison for auto-vectorization.
705///
706/// # Example
707/// ```
708/// use presentar_core::simd::batch_min_max_f64;
709/// let values = vec![3.0, 1.0, 4.0, 1.0, 5.0, 9.0];
710/// let (min, max) = batch_min_max_f64(&values).unwrap();
711/// assert_eq!(min, 1.0);
712/// assert_eq!(max, 9.0);
713/// ```
714#[inline]
715#[must_use]
716pub fn batch_min_max_f64(values: &[f64]) -> Option<(f64, f64)> {
717    if values.is_empty() {
718        return None;
719    }
720
721    if values.len() < 4 {
722        let mut min = values[0];
723        let mut max = values[0];
724        for &v in &values[1..] {
725            min = min.min(v);
726            max = max.max(v);
727        }
728        return Some((min, max));
729    }
730
731    // 4-way min/max for auto-vectorization
732    let chunks = values.chunks_exact(4);
733    let remainder = chunks.remainder();
734
735    let mut min_acc = [f64::INFINITY; 4];
736    let mut max_acc = [f64::NEG_INFINITY; 4];
737
738    for chunk in chunks {
739        min_acc[0] = min_acc[0].min(chunk[0]);
740        min_acc[1] = min_acc[1].min(chunk[1]);
741        min_acc[2] = min_acc[2].min(chunk[2]);
742        min_acc[3] = min_acc[3].min(chunk[3]);
743
744        max_acc[0] = max_acc[0].max(chunk[0]);
745        max_acc[1] = max_acc[1].max(chunk[1]);
746        max_acc[2] = max_acc[2].max(chunk[2]);
747        max_acc[3] = max_acc[3].max(chunk[3]);
748    }
749
750    let mut min = min_acc[0].min(min_acc[1]).min(min_acc[2]).min(min_acc[3]);
751    let mut max = max_acc[0].max(max_acc[1]).max(max_acc[2]).max(max_acc[3]);
752
753    for &v in remainder {
754        min = min.min(v);
755        max = max.max(v);
756    }
757
758    Some((min, max))
759}
760
761/// SIMD-friendly normalization to [0, 1] range.
762///
763/// Normalizes values using `(v - min) / (max - min)`.
764/// Returns empty vec for empty input, all zeros if min == max.
765///
766/// # Example
767/// ```
768/// use presentar_core::simd::normalize_f64;
769/// let values = vec![0.0, 50.0, 100.0];
770/// let normalized = normalize_f64(&values);
771/// assert_eq!(normalized, vec![0.0, 0.5, 1.0]);
772/// ```
773#[inline]
774#[must_use]
775pub fn normalize_f64(values: &[f64]) -> Vec<f64> {
776    if values.is_empty() {
777        return Vec::new();
778    }
779
780    let Some((min, max)) = batch_min_max_f64(values) else {
781        return Vec::new();
782    };
783
784    let range = max - min;
785    if range == 0.0 {
786        return vec![0.0; values.len()];
787    }
788
789    let inv_range = 1.0 / range;
790    values.iter().map(|&v| (v - min) * inv_range).collect()
791}
792
793/// SIMD-friendly normalization with provided range.
794///
795/// Normalizes values using `(v - min) / (max - min)` with caller-provided bounds.
796/// More efficient when min/max are already known.
797///
798/// # Example
799/// ```
800/// use presentar_core::simd::normalize_with_range_f64;
801/// let values = vec![25.0, 50.0, 75.0];
802/// let normalized = normalize_with_range_f64(&values, 0.0, 100.0);
803/// assert_eq!(normalized, vec![0.25, 0.5, 0.75]);
804/// ```
805#[inline]
806#[must_use]
807pub fn normalize_with_range_f64(values: &[f64], min: f64, max: f64) -> Vec<f64> {
808    let range = max - min;
809    if range == 0.0 {
810        return vec![0.0; values.len()];
811    }
812
813    let inv_range = 1.0 / range;
814    values.iter().map(|&v| (v - min) * inv_range).collect()
815}
816
817/// SIMD-friendly scale operation.
818///
819/// Multiplies all values by a scalar.
820///
821/// # Example
822/// ```
823/// use presentar_core::simd::batch_scale_f64;
824/// let values = vec![1.0, 2.0, 3.0];
825/// let scaled = batch_scale_f64(&values, 2.0);
826/// assert_eq!(scaled, vec![2.0, 4.0, 6.0]);
827/// ```
828#[inline]
829#[must_use]
830pub fn batch_scale_f64(values: &[f64], scale: f64) -> Vec<f64> {
831    values.iter().map(|&v| v * scale).collect()
832}
833
834/// SIMD-friendly scale and offset operation.
835///
836/// Applies `v * scale + offset` to all values.
837/// Common operation for data transformation.
838///
839/// # Example
840/// ```
841/// use presentar_core::simd::batch_scale_offset_f64;
842/// let values = vec![0.0, 0.5, 1.0];
843/// // Map [0, 1] to [100, 200]
844/// let transformed = batch_scale_offset_f64(&values, 100.0, 100.0);
845/// assert_eq!(transformed, vec![100.0, 150.0, 200.0]);
846/// ```
847#[inline]
848#[must_use]
849pub fn batch_scale_offset_f64(values: &[f64], scale: f64, offset: f64) -> Vec<f64> {
850    values.iter().map(|&v| v.mul_add(scale, offset)).collect()
851}
852
853/// SIMD-friendly variance calculation (population variance).
854///
855/// Uses two-pass algorithm: first calculates mean, then variance.
856/// Returns 0.0 for empty or single-element slices.
857///
858/// # Example
859/// ```
860/// use presentar_core::simd::batch_variance_f64;
861/// let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
862/// let variance = batch_variance_f64(&values);
863/// assert!((variance - 4.0).abs() < 0.001);
864/// ```
865#[inline]
866#[must_use]
867pub fn batch_variance_f64(values: &[f64]) -> f64 {
868    if values.len() < 2 {
869        return 0.0;
870    }
871
872    let mean = batch_mean_f64(values);
873    let sum_sq: f64 = values.iter().map(|&v| (v - mean) * (v - mean)).sum();
874    sum_sq / values.len() as f64
875}
876
877/// SIMD-friendly standard deviation.
878///
879/// Returns square root of population variance.
880///
881/// # Example
882/// ```
883/// use presentar_core::simd::batch_stddev_f64;
884/// let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
885/// let stddev = batch_stddev_f64(&values);
886/// assert!((stddev - 2.0).abs() < 0.001);
887/// ```
888#[inline]
889#[must_use]
890pub fn batch_stddev_f64(values: &[f64]) -> f64 {
891    batch_variance_f64(values).sqrt()
892}
893
894/// SIMD-friendly weighted sum.
895///
896/// Computes sum(values[i] * weights[i]).
897/// Panics in debug mode if lengths differ.
898///
899/// # Example
900/// ```
901/// use presentar_core::simd::weighted_sum_f64;
902/// let values = vec![1.0, 2.0, 3.0];
903/// let weights = vec![0.5, 0.3, 0.2];
904/// let result = weighted_sum_f64(&values, &weights);
905/// assert!((result - 1.7).abs() < 0.001);
906/// ```
907#[inline]
908#[must_use]
909pub fn weighted_sum_f64(values: &[f64], weights: &[f64]) -> f64 {
910    debug_assert_eq!(values.len(), weights.len());
911
912    if values.len() < 4 {
913        return values
914            .iter()
915            .zip(weights.iter())
916            .map(|(&v, &w)| v * w)
917            .sum();
918    }
919
920    // 4-way accumulator
921    let v_chunks = values.chunks_exact(4);
922    let w_chunks = weights.chunks_exact(4);
923    let v_rem = v_chunks.remainder();
924    let w_rem = w_chunks.remainder();
925
926    let mut acc = [0.0f64; 4];
927    for (vc, wc) in v_chunks.zip(w_chunks) {
928        acc[0] = vc[0].mul_add(wc[0], acc[0]);
929        acc[1] = vc[1].mul_add(wc[1], acc[1]);
930        acc[2] = vc[2].mul_add(wc[2], acc[2]);
931        acc[3] = vc[3].mul_add(wc[3], acc[3]);
932    }
933
934    let mut sum = acc[0] + acc[1] + acc[2] + acc[3];
935    for (&v, &w) in v_rem.iter().zip(w_rem.iter()) {
936        sum = v.mul_add(w, sum);
937    }
938    sum
939}
940
941/// SIMD-friendly percentile calculation.
942///
943/// Finds the value at the given percentile (0.0-1.0).
944/// Uses linear interpolation for non-integer indices.
945/// Values must be sorted in ascending order.
946///
947/// # Example
948/// ```
949/// use presentar_core::simd::percentile_sorted_f64;
950/// let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0];
951/// assert_eq!(percentile_sorted_f64(&sorted, 0.5), 3.0); // median
952/// ```
953#[inline]
954#[must_use]
955pub fn percentile_sorted_f64(sorted_values: &[f64], p: f64) -> f64 {
956    if sorted_values.is_empty() {
957        return 0.0;
958    }
959    if sorted_values.len() == 1 {
960        return sorted_values[0];
961    }
962
963    let p = p.clamp(0.0, 1.0);
964    let n = sorted_values.len() as f64;
965    let idx = p * (n - 1.0);
966    let lower = idx.floor() as usize;
967    let upper = idx.ceil() as usize;
968
969    if lower == upper {
970        sorted_values[lower]
971    } else {
972        let frac = idx - lower as f64;
973        sorted_values[lower].mul_add(1.0 - frac, sorted_values[upper] * frac)
974    }
975}
976
977/// Compute histogram bin counts for f64 data.
978///
979/// Assigns values to bins and returns counts per bin.
980/// Efficient for visualization (heatmaps, histograms).
981///
982/// # Example
983/// ```
984/// use presentar_core::simd::histogram_f64;
985/// let values = vec![0.1, 0.5, 0.9, 1.5, 2.5, 3.5, 4.5];
986/// let counts = histogram_f64(&values, 0.0, 5.0, 5);
987/// assert_eq!(counts, vec![3, 1, 1, 1, 1]); // bins: [0-1), [1-2), [2-3), [3-4), [4-5]
988/// ```
989#[inline]
990#[must_use]
991pub fn histogram_f64(values: &[f64], min: f64, max: f64, num_bins: usize) -> Vec<usize> {
992    if num_bins == 0 || values.is_empty() {
993        return vec![0; num_bins];
994    }
995
996    let range = max - min;
997    if range <= 0.0 {
998        // All values in first bin
999        return {
1000            let mut counts = vec![0; num_bins];
1001            counts[0] = values.len();
1002            counts
1003        };
1004    }
1005
1006    let bin_width = range / num_bins as f64;
1007    let mut counts = vec![0usize; num_bins];
1008
1009    for &v in values {
1010        let bin = ((v - min) / bin_width) as usize;
1011        let bin = bin.min(num_bins - 1); // Clamp to last bin
1012        counts[bin] += 1;
1013    }
1014
1015    counts
1016}
1017
1018#[cfg(test)]
1019mod tests {
1020    use super::*;
1021
1022    // =========================================================================
1023    // Vec4 Tests
1024    // =========================================================================
1025
1026    #[test]
1027    fn test_vec4_new() {
1028        let v = Vec4::new(1.0, 2.0, 3.0, 4.0);
1029        assert_eq!(v.x, 1.0);
1030        assert_eq!(v.y, 2.0);
1031        assert_eq!(v.z, 3.0);
1032        assert_eq!(v.w, 4.0);
1033    }
1034
1035    #[test]
1036    fn test_vec4_zero() {
1037        let v = Vec4::zero();
1038        assert_eq!(v, Vec4::new(0.0, 0.0, 0.0, 0.0));
1039    }
1040
1041    #[test]
1042    fn test_vec4_default() {
1043        let v = Vec4::default();
1044        assert_eq!(v, Vec4::zero());
1045    }
1046
1047    #[test]
1048    fn test_vec4_from_point() {
1049        let p = Point::new(10.0, 20.0);
1050        let v = Vec4::from_point(p);
1051        assert_eq!(v, Vec4::new(10.0, 20.0, 0.0, 1.0));
1052    }
1053
1054    #[test]
1055    fn test_vec4_to_point() {
1056        let v = Vec4::new(5.0, 15.0, 25.0, 35.0);
1057        let p = v.to_point();
1058        assert_eq!(p, Point::new(5.0, 15.0));
1059    }
1060
1061    #[test]
1062    fn test_vec4_dot() {
1063        let a = Vec4::new(1.0, 2.0, 3.0, 4.0);
1064        let b = Vec4::new(2.0, 3.0, 4.0, 5.0);
1065        assert_eq!(
1066            a.dot(b),
1067            4.0f32.mul_add(5.0, 3.0f32.mul_add(4.0, 1.0f32.mul_add(2.0, 2.0 * 3.0)))
1068        );
1069    }
1070
1071    #[test]
1072    fn test_vec4_add() {
1073        let a = Vec4::new(1.0, 2.0, 3.0, 4.0);
1074        let b = Vec4::new(5.0, 6.0, 7.0, 8.0);
1075        let c = a.add(b);
1076        assert_eq!(c, Vec4::new(6.0, 8.0, 10.0, 12.0));
1077    }
1078
1079    #[test]
1080    fn test_vec4_sub() {
1081        let a = Vec4::new(5.0, 6.0, 7.0, 8.0);
1082        let b = Vec4::new(1.0, 2.0, 3.0, 4.0);
1083        let c = a.sub(b);
1084        assert_eq!(c, Vec4::new(4.0, 4.0, 4.0, 4.0));
1085    }
1086
1087    #[test]
1088    fn test_vec4_scale() {
1089        let v = Vec4::new(1.0, 2.0, 3.0, 4.0);
1090        let s = v.scale(2.0);
1091        assert_eq!(s, Vec4::new(2.0, 4.0, 6.0, 8.0));
1092    }
1093
1094    #[test]
1095    fn test_vec4_mul() {
1096        let a = Vec4::new(1.0, 2.0, 3.0, 4.0);
1097        let b = Vec4::new(2.0, 2.0, 2.0, 2.0);
1098        let c = a.mul(b);
1099        assert_eq!(c, Vec4::new(2.0, 4.0, 6.0, 8.0));
1100    }
1101
1102    #[test]
1103    fn test_vec4_lerp() {
1104        let a = Vec4::new(0.0, 0.0, 0.0, 0.0);
1105        let b = Vec4::new(10.0, 10.0, 10.0, 10.0);
1106        let c = a.lerp(b, 0.5);
1107        assert_eq!(c, Vec4::new(5.0, 5.0, 5.0, 5.0));
1108    }
1109
1110    #[test]
1111    fn test_vec4_length() {
1112        let v = Vec4::new(3.0, 4.0, 0.0, 0.0);
1113        assert!((v.length() - 5.0).abs() < 0.0001);
1114    }
1115
1116    #[test]
1117    fn test_vec4_normalize() {
1118        let v = Vec4::new(3.0, 4.0, 0.0, 0.0);
1119        let n = v.normalize();
1120        assert!((n.length() - 1.0).abs() < 0.0001);
1121    }
1122
1123    #[test]
1124    fn test_vec4_from_impl() {
1125        let p = Point::new(1.0, 2.0);
1126        let v: Vec4 = p.into();
1127        assert_eq!(v, Vec4::new(1.0, 2.0, 0.0, 1.0));
1128    }
1129
1130    // =========================================================================
1131    // Mat4 Tests
1132    // =========================================================================
1133
1134    #[test]
1135    fn test_mat4_identity() {
1136        let m = Mat4::identity();
1137        assert_eq!(m.data[0][0], 1.0);
1138        assert_eq!(m.data[1][1], 1.0);
1139        assert_eq!(m.data[2][2], 1.0);
1140        assert_eq!(m.data[3][3], 1.0);
1141        assert_eq!(m.data[0][1], 0.0);
1142    }
1143
1144    #[test]
1145    fn test_mat4_zero() {
1146        let m = Mat4::zero();
1147        for i in 0..4 {
1148            for j in 0..4 {
1149                assert_eq!(m.data[i][j], 0.0);
1150            }
1151        }
1152    }
1153
1154    #[test]
1155    fn test_mat4_default() {
1156        let m = Mat4::default();
1157        assert_eq!(m, Mat4::identity());
1158    }
1159
1160    #[test]
1161    fn test_mat4_translation() {
1162        let m = Mat4::translation(10.0, 20.0, 30.0);
1163        let p = Point::new(0.0, 0.0);
1164        let t = m.transform_point(p);
1165        assert_eq!(t, Point::new(10.0, 20.0));
1166    }
1167
1168    #[test]
1169    fn test_mat4_translation_2d() {
1170        let m = Mat4::translation_2d(5.0, 15.0);
1171        let p = Point::new(10.0, 10.0);
1172        let t = m.transform_point(p);
1173        assert_eq!(t, Point::new(15.0, 25.0));
1174    }
1175
1176    #[test]
1177    fn test_mat4_scale() {
1178        let m = Mat4::scale(2.0, 3.0, 4.0);
1179        let p = Point::new(10.0, 10.0);
1180        let t = m.transform_point(p);
1181        assert_eq!(t, Point::new(20.0, 30.0));
1182    }
1183
1184    #[test]
1185    fn test_mat4_scale_2d() {
1186        let m = Mat4::scale_2d(0.5, 2.0);
1187        let p = Point::new(10.0, 10.0);
1188        let t = m.transform_point(p);
1189        assert_eq!(t, Point::new(5.0, 20.0));
1190    }
1191
1192    #[test]
1193    fn test_mat4_scale_uniform() {
1194        let m = Mat4::scale_uniform(2.0);
1195        let p = Point::new(5.0, 5.0);
1196        let t = m.transform_point(p);
1197        assert_eq!(t, Point::new(10.0, 10.0));
1198    }
1199
1200    #[test]
1201    fn test_mat4_rotation_z() {
1202        use std::f32::consts::PI;
1203        let m = Mat4::rotation_z(PI / 2.0); // 90 degrees
1204        let p = Point::new(1.0, 0.0);
1205        let t = m.transform_point(p);
1206        // Should rotate (1, 0) to approximately (0, 1)
1207        assert!((t.x - 0.0).abs() < 0.0001);
1208        assert!((t.y - 1.0).abs() < 0.0001);
1209    }
1210
1211    #[test]
1212    fn test_mat4_mul_identity() {
1213        let a = Mat4::identity();
1214        let b = Mat4::identity();
1215        let c = a.mul(&b);
1216        assert_eq!(c, Mat4::identity());
1217    }
1218
1219    #[test]
1220    fn test_mat4_mul_combined_transform() {
1221        let translate = Mat4::translation_2d(10.0, 0.0);
1222        let scale = Mat4::scale_2d(2.0, 2.0);
1223        let combined = translate.mul(&scale);
1224
1225        let p = Point::new(5.0, 5.0);
1226        let t = combined.transform_point(p);
1227        // First scale (5,5) -> (10, 10), then translate -> (20, 10)
1228        assert_eq!(t, Point::new(20.0, 10.0));
1229    }
1230
1231    #[test]
1232    fn test_mat4_transform_rect() {
1233        let m = Mat4::scale_2d(2.0, 2.0);
1234        let rect = Rect::new(10.0, 10.0, 20.0, 30.0);
1235        let t = m.transform_rect(&rect);
1236        assert_eq!(t.x, 20.0);
1237        assert_eq!(t.y, 20.0);
1238        assert_eq!(t.width, 40.0);
1239        assert_eq!(t.height, 60.0);
1240    }
1241
1242    #[test]
1243    fn test_mat4_transpose() {
1244        let m = Mat4::from_data([
1245            [1.0, 2.0, 3.0, 4.0],
1246            [5.0, 6.0, 7.0, 8.0],
1247            [9.0, 10.0, 11.0, 12.0],
1248            [13.0, 14.0, 15.0, 16.0],
1249        ]);
1250        let t = m.transpose();
1251        assert_eq!(t.data[0][1], 5.0);
1252        assert_eq!(t.data[1][0], 2.0);
1253    }
1254
1255    #[test]
1256    fn test_mat4_column() {
1257        let m = Mat4::identity();
1258        let col = m.column(0);
1259        assert_eq!(col, Vec4::new(1.0, 0.0, 0.0, 0.0));
1260    }
1261
1262    #[test]
1263    fn test_mat4_row() {
1264        let m = Mat4::identity();
1265        let row = m.row(0);
1266        assert_eq!(row, Vec4::new(1.0, 0.0, 0.0, 0.0));
1267    }
1268
1269    #[test]
1270    fn test_mat4_ortho_screen() {
1271        let m = Mat4::ortho_screen(800.0, 600.0);
1272        // Point at top-left should map to (-1, 1) in NDC
1273        let p = m.transform_vec4(Vec4::new(0.0, 0.0, 0.0, 1.0));
1274        assert!((p.x - (-1.0)).abs() < 0.001);
1275        assert!((p.y - 1.0).abs() < 0.001);
1276    }
1277
1278    #[test]
1279    fn test_mat4_mul_operator() {
1280        let a = Mat4::translation_2d(10.0, 20.0);
1281        let b = Mat4::scale_2d(2.0, 2.0);
1282        let c = a * b;
1283        assert_eq!(c, a.mul(&b));
1284    }
1285
1286    #[test]
1287    fn test_mat4_mul_vec4_operator() {
1288        let m = Mat4::translation_2d(10.0, 20.0);
1289        let v = Vec4::new(0.0, 0.0, 0.0, 1.0);
1290        let r = m * v;
1291        assert_eq!(r, Vec4::new(10.0, 20.0, 0.0, 1.0));
1292    }
1293
1294    // =========================================================================
1295    // Batch Operation Tests
1296    // =========================================================================
1297
1298    #[test]
1299    fn test_batch_transform_points() {
1300        let m = Mat4::translation_2d(10.0, 10.0);
1301        let points = vec![Point::new(0.0, 0.0), Point::new(5.0, 5.0)];
1302        let result = batch_transform_points(&points, &m);
1303        assert_eq!(result[0], Point::new(10.0, 10.0));
1304        assert_eq!(result[1], Point::new(15.0, 15.0));
1305    }
1306
1307    #[test]
1308    fn test_batch_transform_vec4() {
1309        let m = Mat4::scale_uniform(2.0);
1310        let vecs = vec![Vec4::new(1.0, 1.0, 1.0, 0.0), Vec4::new(2.0, 2.0, 2.0, 0.0)];
1311        let result = batch_transform_vec4(&vecs, &m);
1312        assert_eq!(result[0], Vec4::new(2.0, 2.0, 2.0, 0.0));
1313        assert_eq!(result[1], Vec4::new(4.0, 4.0, 4.0, 0.0));
1314    }
1315
1316    #[test]
1317    fn test_batch_lerp_points() {
1318        let from = vec![Point::new(0.0, 0.0), Point::new(10.0, 10.0)];
1319        let to = vec![Point::new(10.0, 10.0), Point::new(20.0, 20.0)];
1320        let result = batch_lerp_points(&from, &to, 0.5);
1321        assert_eq!(result[0], Point::new(5.0, 5.0));
1322        assert_eq!(result[1], Point::new(15.0, 15.0));
1323    }
1324
1325    // =========================================================================
1326    // Geometry Tests
1327    // =========================================================================
1328
1329    #[test]
1330    fn test_bounding_box() {
1331        let points = vec![
1332            Point::new(0.0, 0.0),
1333            Point::new(10.0, 5.0),
1334            Point::new(5.0, 15.0),
1335        ];
1336        let bbox = bounding_box(&points).unwrap();
1337        assert_eq!(bbox.x, 0.0);
1338        assert_eq!(bbox.y, 0.0);
1339        assert_eq!(bbox.width, 10.0);
1340        assert_eq!(bbox.height, 15.0);
1341    }
1342
1343    #[test]
1344    fn test_bounding_box_empty() {
1345        let points: Vec<Point> = vec![];
1346        assert!(bounding_box(&points).is_none());
1347    }
1348
1349    #[test]
1350    fn test_centroid() {
1351        let points = vec![
1352            Point::new(0.0, 0.0),
1353            Point::new(10.0, 0.0),
1354            Point::new(10.0, 10.0),
1355            Point::new(0.0, 10.0),
1356        ];
1357        let c = centroid(&points).unwrap();
1358        assert_eq!(c, Point::new(5.0, 5.0));
1359    }
1360
1361    #[test]
1362    fn test_centroid_empty() {
1363        let points: Vec<Point> = vec![];
1364        assert!(centroid(&points).is_none());
1365    }
1366
1367    #[test]
1368    fn test_point_in_convex_polygon() {
1369        let square = vec![
1370            Point::new(0.0, 0.0),
1371            Point::new(10.0, 0.0),
1372            Point::new(10.0, 10.0),
1373            Point::new(0.0, 10.0),
1374        ];
1375
1376        assert!(point_in_convex_polygon(Point::new(5.0, 5.0), &square));
1377        assert!(!point_in_convex_polygon(Point::new(15.0, 5.0), &square));
1378    }
1379
1380    #[test]
1381    fn test_point_in_convex_polygon_edge() {
1382        let triangle = vec![
1383            Point::new(0.0, 0.0),
1384            Point::new(10.0, 0.0),
1385            Point::new(5.0, 10.0),
1386        ];
1387
1388        // On the edge
1389        assert!(point_in_convex_polygon(Point::new(5.0, 0.0), &triangle));
1390    }
1391
1392    #[test]
1393    fn test_polygon_area_square() {
1394        let square = vec![
1395            Point::new(0.0, 0.0),
1396            Point::new(10.0, 0.0),
1397            Point::new(10.0, 10.0),
1398            Point::new(0.0, 10.0),
1399        ];
1400        let area = polygon_area(&square);
1401        assert!((area - 100.0).abs() < 0.0001);
1402    }
1403
1404    #[test]
1405    fn test_polygon_area_triangle() {
1406        let triangle = vec![
1407            Point::new(0.0, 0.0),
1408            Point::new(10.0, 0.0),
1409            Point::new(5.0, 10.0),
1410        ];
1411        let area = polygon_area(&triangle);
1412        assert!((area - 50.0).abs() < 0.0001);
1413    }
1414
1415    #[test]
1416    fn test_polygon_area_too_few_points() {
1417        assert_eq!(polygon_area(&[]), 0.0);
1418        assert_eq!(polygon_area(&[Point::new(0.0, 0.0)]), 0.0);
1419        assert_eq!(
1420            polygon_area(&[Point::new(0.0, 0.0), Point::new(1.0, 1.0)]),
1421            0.0
1422        );
1423    }
1424
1425    // =========================================================================
1426    // SIMD Tests (when feature enabled)
1427    // =========================================================================
1428
1429    #[cfg(feature = "simd")]
1430    mod simd_tests {
1431        use super::*;
1432
1433        #[test]
1434        fn test_vec4_to_simd_roundtrip() {
1435            let v = Vec4::new(1.0, 2.0, 3.0, 4.0);
1436            let simd = vec4_to_simd(v);
1437            let back = simd_to_vec4(&simd);
1438            assert_eq!(v, back);
1439        }
1440
1441        #[test]
1442        fn test_batch_add_simd() {
1443            let a = vec![1.0, 2.0, 3.0, 4.0];
1444            let b = vec![5.0, 6.0, 7.0, 8.0];
1445            let result = batch_add_simd(&a, &b).unwrap();
1446            assert_eq!(result, vec![6.0, 8.0, 10.0, 12.0]);
1447        }
1448
1449        #[test]
1450        fn test_dot_simd() {
1451            let a = vec![1.0, 2.0, 3.0, 4.0];
1452            let b = vec![1.0, 1.0, 1.0, 1.0];
1453            let result = dot_simd(&a, &b).unwrap();
1454            assert_eq!(result, 10.0);
1455        }
1456
1457        #[test]
1458        fn test_scale_simd() {
1459            let a = vec![1.0, 2.0, 3.0, 4.0];
1460            let result = scale_simd(&a, 2.0).unwrap();
1461            assert_eq!(result, vec![2.0, 4.0, 6.0, 8.0]);
1462        }
1463
1464        #[test]
1465        fn test_best_backend() {
1466            let backend = best_backend();
1467            // Just check it doesn't panic and returns a valid backend
1468            assert!(matches!(
1469                backend,
1470                trueno::Backend::Scalar
1471                    | trueno::Backend::SSE2
1472                    | trueno::Backend::AVX
1473                    | trueno::Backend::AVX2
1474                    | trueno::Backend::AVX512
1475                    | trueno::Backend::NEON
1476                    | trueno::Backend::WasmSIMD
1477                    | trueno::Backend::GPU
1478                    | trueno::Backend::Auto
1479            ));
1480        }
1481
1482        #[test]
1483        fn test_batch_dot_product() {
1484            let a = vec![Vec4::new(1.0, 0.0, 0.0, 0.0), Vec4::new(0.0, 1.0, 0.0, 0.0)];
1485            let b = vec![Vec4::new(1.0, 0.0, 0.0, 0.0), Vec4::new(0.0, 1.0, 0.0, 0.0)];
1486            let dots = batch_dot_product(&a, &b);
1487            assert_eq!(dots, vec![1.0, 1.0]);
1488        }
1489    }
1490
1491    // =========================================================================
1492    // ComputeBlock Tests (f64 Aggregation)
1493    // =========================================================================
1494
1495    mod compute_block_tests {
1496        use super::*;
1497
1498        #[test]
1499        fn test_batch_sum_f64_empty() {
1500            assert_eq!(batch_sum_f64(&[]), 0.0);
1501        }
1502
1503        #[test]
1504        fn test_batch_sum_f64_small() {
1505            assert_eq!(batch_sum_f64(&[1.0, 2.0, 3.0]), 6.0);
1506        }
1507
1508        #[test]
1509        fn test_batch_sum_f64_large() {
1510            let values: Vec<f64> = (1..=100).map(f64::from).collect();
1511            assert_eq!(batch_sum_f64(&values), 5050.0);
1512        }
1513
1514        #[test]
1515        fn test_batch_sum_f64_exact_chunks() {
1516            // 8 elements = 2 exact chunks of 4
1517            let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1518            assert_eq!(batch_sum_f64(&values), 36.0);
1519        }
1520
1521        #[test]
1522        fn test_batch_mean_f64_empty() {
1523            assert_eq!(batch_mean_f64(&[]), 0.0);
1524        }
1525
1526        #[test]
1527        fn test_batch_mean_f64() {
1528            assert_eq!(batch_mean_f64(&[2.0, 4.0, 6.0, 8.0]), 5.0);
1529        }
1530
1531        #[test]
1532        fn test_batch_min_max_f64_empty() {
1533            assert!(batch_min_max_f64(&[]).is_none());
1534        }
1535
1536        #[test]
1537        fn test_batch_min_max_f64_single() {
1538            assert_eq!(batch_min_max_f64(&[42.0]), Some((42.0, 42.0)));
1539        }
1540
1541        #[test]
1542        fn test_batch_min_max_f64_small() {
1543            assert_eq!(batch_min_max_f64(&[3.0, 1.0, 2.0]), Some((1.0, 3.0)));
1544        }
1545
1546        #[test]
1547        fn test_batch_min_max_f64_large() {
1548            let values: Vec<f64> = (1..=256).map(f64::from).collect();
1549            assert_eq!(batch_min_max_f64(&values), Some((1.0, 256.0)));
1550        }
1551
1552        #[test]
1553        fn test_normalize_f64_empty() {
1554            assert!(normalize_f64(&[]).is_empty());
1555        }
1556
1557        #[test]
1558        fn test_normalize_f64_constant() {
1559            let result = normalize_f64(&[5.0, 5.0, 5.0]);
1560            assert_eq!(result, vec![0.0, 0.0, 0.0]);
1561        }
1562
1563        #[test]
1564        fn test_normalize_f64() {
1565            let result = normalize_f64(&[0.0, 50.0, 100.0]);
1566            assert_eq!(result, vec![0.0, 0.5, 1.0]);
1567        }
1568
1569        #[test]
1570        fn test_normalize_with_range_f64() {
1571            let result = normalize_with_range_f64(&[25.0, 50.0, 75.0], 0.0, 100.0);
1572            assert_eq!(result, vec![0.25, 0.5, 0.75]);
1573        }
1574
1575        #[test]
1576        fn test_batch_scale_f64() {
1577            let result = batch_scale_f64(&[1.0, 2.0, 3.0], 2.0);
1578            assert_eq!(result, vec![2.0, 4.0, 6.0]);
1579        }
1580
1581        #[test]
1582        fn test_batch_scale_offset_f64() {
1583            let result = batch_scale_offset_f64(&[0.0, 0.5, 1.0], 100.0, 100.0);
1584            assert_eq!(result, vec![100.0, 150.0, 200.0]);
1585        }
1586
1587        #[test]
1588        fn test_batch_variance_f64_empty() {
1589            assert_eq!(batch_variance_f64(&[]), 0.0);
1590        }
1591
1592        #[test]
1593        fn test_batch_variance_f64_single() {
1594            assert_eq!(batch_variance_f64(&[42.0]), 0.0);
1595        }
1596
1597        #[test]
1598        fn test_batch_variance_f64() {
1599            let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1600            let variance = batch_variance_f64(&values);
1601            assert!((variance - 4.0).abs() < 0.001);
1602        }
1603
1604        #[test]
1605        fn test_batch_stddev_f64() {
1606            let values = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
1607            let stddev = batch_stddev_f64(&values);
1608            assert!((stddev - 2.0).abs() < 0.001);
1609        }
1610
1611        #[test]
1612        fn test_weighted_sum_f64_small() {
1613            let values = vec![1.0, 2.0];
1614            let weights = vec![0.5, 0.5];
1615            assert_eq!(weighted_sum_f64(&values, &weights), 1.5);
1616        }
1617
1618        #[test]
1619        fn test_weighted_sum_f64_large() {
1620            let values = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
1621            let weights = vec![1.0; 8];
1622            assert_eq!(weighted_sum_f64(&values, &weights), 36.0);
1623        }
1624
1625        #[test]
1626        fn test_percentile_sorted_f64_empty() {
1627            assert_eq!(percentile_sorted_f64(&[], 0.5), 0.0);
1628        }
1629
1630        #[test]
1631        fn test_percentile_sorted_f64_single() {
1632            assert_eq!(percentile_sorted_f64(&[42.0], 0.5), 42.0);
1633        }
1634
1635        #[test]
1636        fn test_percentile_sorted_f64_median() {
1637            let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1638            assert_eq!(percentile_sorted_f64(&sorted, 0.5), 3.0);
1639        }
1640
1641        #[test]
1642        fn test_percentile_sorted_f64_quartiles() {
1643            let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1644            assert_eq!(percentile_sorted_f64(&sorted, 0.0), 1.0);
1645            assert_eq!(percentile_sorted_f64(&sorted, 1.0), 5.0);
1646            assert_eq!(percentile_sorted_f64(&sorted, 0.25), 2.0);
1647            assert_eq!(percentile_sorted_f64(&sorted, 0.75), 4.0);
1648        }
1649
1650        #[test]
1651        fn test_histogram_f64_empty() {
1652            assert_eq!(histogram_f64(&[], 0.0, 10.0, 5), vec![0; 5]);
1653        }
1654
1655        #[test]
1656        fn test_histogram_f64() {
1657            let values = vec![0.1, 0.5, 0.9, 1.5, 2.5, 3.5, 4.5];
1658            let counts = histogram_f64(&values, 0.0, 5.0, 5);
1659            assert_eq!(counts, vec![3, 1, 1, 1, 1]);
1660        }
1661
1662        #[test]
1663        fn test_histogram_f64_edge_values() {
1664            // Values exactly at bin boundaries
1665            let values = vec![0.0, 1.0, 2.0, 3.0, 4.0, 4.999];
1666            let counts = histogram_f64(&values, 0.0, 5.0, 5);
1667            assert_eq!(counts, vec![1, 1, 1, 1, 2]); // 4.0 and 4.999 in last bin
1668        }
1669
1670        #[test]
1671        fn test_histogram_f64_uniform() {
1672            // Uniform distribution across bins
1673            let values: Vec<f64> = (0..100).map(f64::from).collect();
1674            let counts = histogram_f64(&values, 0.0, 100.0, 10);
1675            // Each bin should have 10 values
1676            assert!(counts.iter().all(|&c| c == 10));
1677        }
1678    }
1679}