Skip to main content

shape_runtime/intrinsics/
statistical.rs

1//! Statistical intrinsics — full migration to typed marshal layer.
2//!
3//! Per the intrinsics-typed-CC migration's per-file table (see
4//! `docs/defections.md` 2026-05-07 intrinsics-typed-CC entry's predicted
5//! error-drop calibration subsection), all 4 statistical intrinsics
6//! (`correlation`, `covariance`, `percentile`, `median`) migrate to
7//! `register_typed_fn_N` typed entries via [`create_statistical_intrinsics_module`].
8//!
9//! `percentile` and `median` mutate an owned f64 copy — the marshal layer's
10//! always-clone semantics for owned-data input ensure this is safe (per
11//! the let-vs-var Rust-like-lifetime analysis at `docs/defections.md`
12//! 2026-05-07 zero-copy entry lines 281-291).
13//!
14//! Provides efficient implementations of correlation, covariance,
15//! percentiles, and other statistical measures.
16
17use crate::marshal::{register_typed_fn_1, register_typed_fn_2};
18use crate::module_exports::ModuleExports;
19use crate::simd_statistics;
20use crate::typed_module_exports::{ConcreteReturn, ConcreteType, TypedReturn};
21use std::sync::Arc;
22
23// ───────────────────── Module factory (4 typed entries) ─────────────────────
24
25/// Create the statistical intrinsics module with 4 typed-marshal entry points.
26pub fn create_statistical_intrinsics_module() -> ModuleExports {
27    let mut module = ModuleExports::new("std::core::intrinsics::statistical");
28    module.description = "Statistical intrinsics (correlation, covariance, percentile, median)"
29        .to_string();
30
31    register_typed_fn_2::<_, Arc<Vec<f64>>, Arc<Vec<f64>>>(
32        &mut module,
33        "__intrinsic_correlation",
34        "Pearson correlation coefficient between two Vec<number>",
35        [("series_a", "Array<number>"), ("series_b", "Array<number>")],
36        ConcreteType::Number,
37        |a, b, _ctx| {
38            let data_a = a.as_slice();
39            let data_b = b.as_slice();
40            if data_a.len() != data_b.len() {
41                return Err(format!(
42                    "Column lengths must match: {} != {}",
43                    data_a.len(),
44                    data_b.len()
45                ));
46            }
47            if data_a.is_empty() {
48                return Ok(TypedReturn::Concrete(ConcreteReturn::F64(f64::NAN)));
49            }
50            let result = simd_statistics::correlation(data_a, data_b);
51            Ok(TypedReturn::Concrete(ConcreteReturn::F64(result)))
52        },
53    );
54
55    register_typed_fn_2::<_, Arc<Vec<f64>>, Arc<Vec<f64>>>(
56        &mut module,
57        "__intrinsic_covariance",
58        "Covariance between two Vec<number>",
59        [("series_a", "Array<number>"), ("series_b", "Array<number>")],
60        ConcreteType::Number,
61        |a, b, _ctx| {
62            let data_a = a.as_slice();
63            let data_b = b.as_slice();
64            if data_a.len() != data_b.len() {
65                return Err("Column lengths must match".to_string());
66            }
67            if data_a.is_empty() {
68                return Ok(TypedReturn::Concrete(ConcreteReturn::F64(f64::NAN)));
69            }
70            let result = simd_statistics::covariance(data_a, data_b);
71            Ok(TypedReturn::Concrete(ConcreteReturn::F64(result)))
72        },
73    );
74
75    register_typed_fn_2::<_, Arc<Vec<f64>>, f64>(
76        &mut module,
77        "__intrinsic_percentile",
78        "Percentile (0-100) of a Vec<number> via O(n) average-case quickselect",
79        [("series", "Array<number>"), ("percentile", "number")],
80        ConcreteType::Number,
81        |series, percentile, _ctx| {
82            if !(0.0..=100.0).contains(&percentile) {
83                return Err("Percentile must be between 0 and 100".to_string());
84            }
85            let data = series.as_slice();
86            if data.is_empty() {
87                return Ok(TypedReturn::Concrete(ConcreteReturn::F64(f64::NAN)));
88            }
89            let mut values = data.to_vec();
90            let n = values.len();
91            let k = ((percentile / 100.0) * (n - 1) as f64).round() as usize;
92            let result = quickselect(&mut values, k);
93            Ok(TypedReturn::Concrete(ConcreteReturn::F64(result)))
94        },
95    );
96
97    register_typed_fn_1::<_, Arc<Vec<f64>>>(
98        &mut module,
99        "__intrinsic_median",
100        "Median (50th percentile) of a Vec<number>",
101        "series",
102        "Array<number>",
103        ConcreteType::Number,
104        |series, _ctx| {
105            let slice = series.as_slice();
106            if slice.is_empty() {
107                return Ok(TypedReturn::Concrete(ConcreteReturn::F64(f64::NAN)));
108            }
109            let mut data = slice.to_vec();
110            data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
111            let n = data.len();
112            let result = if n % 2 == 0 {
113                (data[n / 2 - 1] + data[n / 2]) / 2.0
114            } else {
115                data[n / 2]
116            };
117            Ok(TypedReturn::Concrete(ConcreteReturn::F64(result)))
118        },
119    );
120
121    module
122}
123
124// ───────────────────── Helpers (used by typed bodies) ─────────────────────
125
126/// Quickselect algorithm for O(n) average case percentile calculation.
127fn quickselect(arr: &mut [f64], k: usize) -> f64 {
128    if arr.len() == 1 {
129        return arr[0];
130    }
131
132    let k = k.min(arr.len() - 1);
133    let mut left = 0;
134    let mut right = arr.len() - 1;
135
136    loop {
137        if left == right {
138            return arr[left];
139        }
140
141        let mid = left + (right - left) / 2;
142        let pivot_idx = median_of_three(arr, left, mid, right);
143        let pivot_idx = partition(arr, left, right, pivot_idx);
144
145        if k == pivot_idx {
146            return arr[k];
147        } else if k < pivot_idx {
148            right = pivot_idx - 1;
149        } else {
150            left = pivot_idx + 1;
151        }
152    }
153}
154
155fn median_of_three(arr: &[f64], a: usize, b: usize, c: usize) -> usize {
156    if (arr[a] <= arr[b] && arr[b] <= arr[c]) || (arr[c] <= arr[b] && arr[b] <= arr[a]) {
157        b
158    } else if (arr[b] <= arr[a] && arr[a] <= arr[c]) || (arr[c] <= arr[a] && arr[a] <= arr[b]) {
159        a
160    } else {
161        c
162    }
163}
164
165fn partition(arr: &mut [f64], left: usize, right: usize, pivot_idx: usize) -> usize {
166    let pivot_value = arr[pivot_idx];
167    arr.swap(pivot_idx, right);
168    let mut store_idx = left;
169    for i in left..right {
170        if arr[i] < pivot_value {
171            arr.swap(i, store_idx);
172            store_idx += 1;
173        }
174    }
175    arr.swap(store_idx, right);
176    store_idx
177}