shape_runtime/intrinsics/
statistical.rs1use crate::marshal::{register_typed_fn_1, register_typed_fn_2};
18use crate::module_exports::ModuleExports;
19use crate::simd_statistics;
20use crate::typed_module_exports::{ConcreteReturn, ConcreteType, TypedReturn};
21use std::sync::Arc;
22
23pub fn create_statistical_intrinsics_module() -> ModuleExports {
27 let mut module = ModuleExports::new("std::core::intrinsics::statistical");
28 module.description = "Statistical intrinsics (correlation, covariance, percentile, median)"
29 .to_string();
30
31 register_typed_fn_2::<_, Arc<Vec<f64>>, Arc<Vec<f64>>>(
32 &mut module,
33 "__intrinsic_correlation",
34 "Pearson correlation coefficient between two Vec<number>",
35 [("series_a", "Array<number>"), ("series_b", "Array<number>")],
36 ConcreteType::Number,
37 |a, b, _ctx| {
38 let data_a = a.as_slice();
39 let data_b = b.as_slice();
40 if data_a.len() != data_b.len() {
41 return Err(format!(
42 "Column lengths must match: {} != {}",
43 data_a.len(),
44 data_b.len()
45 ));
46 }
47 if data_a.is_empty() {
48 return Ok(TypedReturn::Concrete(ConcreteReturn::F64(f64::NAN)));
49 }
50 let result = simd_statistics::correlation(data_a, data_b);
51 Ok(TypedReturn::Concrete(ConcreteReturn::F64(result)))
52 },
53 );
54
55 register_typed_fn_2::<_, Arc<Vec<f64>>, Arc<Vec<f64>>>(
56 &mut module,
57 "__intrinsic_covariance",
58 "Covariance between two Vec<number>",
59 [("series_a", "Array<number>"), ("series_b", "Array<number>")],
60 ConcreteType::Number,
61 |a, b, _ctx| {
62 let data_a = a.as_slice();
63 let data_b = b.as_slice();
64 if data_a.len() != data_b.len() {
65 return Err("Column lengths must match".to_string());
66 }
67 if data_a.is_empty() {
68 return Ok(TypedReturn::Concrete(ConcreteReturn::F64(f64::NAN)));
69 }
70 let result = simd_statistics::covariance(data_a, data_b);
71 Ok(TypedReturn::Concrete(ConcreteReturn::F64(result)))
72 },
73 );
74
75 register_typed_fn_2::<_, Arc<Vec<f64>>, f64>(
76 &mut module,
77 "__intrinsic_percentile",
78 "Percentile (0-100) of a Vec<number> via O(n) average-case quickselect",
79 [("series", "Array<number>"), ("percentile", "number")],
80 ConcreteType::Number,
81 |series, percentile, _ctx| {
82 if !(0.0..=100.0).contains(&percentile) {
83 return Err("Percentile must be between 0 and 100".to_string());
84 }
85 let data = series.as_slice();
86 if data.is_empty() {
87 return Ok(TypedReturn::Concrete(ConcreteReturn::F64(f64::NAN)));
88 }
89 let mut values = data.to_vec();
90 let n = values.len();
91 let k = ((percentile / 100.0) * (n - 1) as f64).round() as usize;
92 let result = quickselect(&mut values, k);
93 Ok(TypedReturn::Concrete(ConcreteReturn::F64(result)))
94 },
95 );
96
97 register_typed_fn_1::<_, Arc<Vec<f64>>>(
98 &mut module,
99 "__intrinsic_median",
100 "Median (50th percentile) of a Vec<number>",
101 "series",
102 "Array<number>",
103 ConcreteType::Number,
104 |series, _ctx| {
105 let slice = series.as_slice();
106 if slice.is_empty() {
107 return Ok(TypedReturn::Concrete(ConcreteReturn::F64(f64::NAN)));
108 }
109 let mut data = slice.to_vec();
110 data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
111 let n = data.len();
112 let result = if n % 2 == 0 {
113 (data[n / 2 - 1] + data[n / 2]) / 2.0
114 } else {
115 data[n / 2]
116 };
117 Ok(TypedReturn::Concrete(ConcreteReturn::F64(result)))
118 },
119 );
120
121 module
122}
123
124fn quickselect(arr: &mut [f64], k: usize) -> f64 {
128 if arr.len() == 1 {
129 return arr[0];
130 }
131
132 let k = k.min(arr.len() - 1);
133 let mut left = 0;
134 let mut right = arr.len() - 1;
135
136 loop {
137 if left == right {
138 return arr[left];
139 }
140
141 let mid = left + (right - left) / 2;
142 let pivot_idx = median_of_three(arr, left, mid, right);
143 let pivot_idx = partition(arr, left, right, pivot_idx);
144
145 if k == pivot_idx {
146 return arr[k];
147 } else if k < pivot_idx {
148 right = pivot_idx - 1;
149 } else {
150 left = pivot_idx + 1;
151 }
152 }
153}
154
155fn median_of_three(arr: &[f64], a: usize, b: usize, c: usize) -> usize {
156 if (arr[a] <= arr[b] && arr[b] <= arr[c]) || (arr[c] <= arr[b] && arr[b] <= arr[a]) {
157 b
158 } else if (arr[b] <= arr[a] && arr[a] <= arr[c]) || (arr[c] <= arr[a] && arr[a] <= arr[b]) {
159 a
160 } else {
161 c
162 }
163}
164
165fn partition(arr: &mut [f64], left: usize, right: usize, pivot_idx: usize) -> usize {
166 let pivot_value = arr[pivot_idx];
167 arr.swap(pivot_idx, right);
168 let mut store_idx = left;
169 for i in left..right {
170 if arr[i] < pivot_value {
171 arr.swap(i, store_idx);
172 store_idx += 1;
173 }
174 }
175 arr.swap(store_idx, right);
176 store_idx
177}