rhai_sci/statistics.rs
1use rhai::plugin::*;
2
3#[export_module]
4pub mod stats {
5 use crate::{
6 array_to_vec_float, array_to_vec_int, if_list_convert_to_vec_float_and_do, if_list_do,
7 if_list_do_int_or_do_float,
8 };
9 #[cfg(feature = "nalgebra")]
10 use rhai::Map;
11 use rhai::{Array, Dynamic, EvalAltResult, Position, FLOAT, INT};
12
13 #[cfg(feature = "nalgebra")]
14 use std::collections::BTreeMap;
15 use std::collections::HashMap;
16
17 /// Return the highest value from a pair of numbers. Fails if the numbers are anything other
18 /// than INT or FLOAT.
19 /// ```typescript
20 /// let the_higher_number = max(2, 3);
21 /// assert_eq(the_higher_number, 3);
22 /// ```
23 /// ```typescript
24 /// let the_higher_number = max(2.0, 3.0);
25 /// assert_eq(the_higher_number, 3.0);
26 /// ```
27 #[rhai_fn(name = "max", return_raw)]
28 pub fn gen_max(a: Dynamic, b: Dynamic) -> Result<Dynamic, Box<EvalAltResult>> {
29 array_max(&mut vec![a, b])
30 }
31
32 /// Return the highest value from an array. Fails if the input is not an array, or if
33 /// it is an array with elements other than INT or FLOAT.
34 /// ```typescript
35 /// let the_highest_number = max([2, 3, 4, 5]);
36 /// assert_eq(the_highest_number, 5);
37 /// ```
38 /// ```typescript
39 /// let the_highest_number = max([2, 3.0, 4.12, 5]);
40 /// assert_eq(the_highest_number, 5.0);
41 /// ```
42 #[rhai_fn(name = "max", return_raw)]
43 pub fn array_max(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
44 if_list_do_int_or_do_float(
45 arr,
46 |arr: &mut Array| {
47 let mut y = array_to_vec_int(arr);
48 y.sort();
49 Ok(Dynamic::from(y[y.len() - 1]))
50 },
51 |arr: &mut Array| {
52 let mut y = array_to_vec_float(arr);
53 y.sort_by(|a, b| a.partial_cmp(b).unwrap());
54 Ok(Dynamic::from(y[y.len() - 1]))
55 },
56 )
57 }
58
59 /// Return the lowest value from a pair of numbers. Fails if the numbers are anything other
60 /// than INT or FLOAT.
61 ///
62 /// ```typescript
63 /// let the_lower_number = min(2, 3);
64 /// assert_eq(the_lower_number, 2);
65 /// ```
66 /// ```typescript
67 /// let the_lower_number = min(2.0, 3.0);
68 /// assert_eq(the_lower_number, 2.0);
69 /// ```
70 #[rhai_fn(name = "min", return_raw)]
71 pub fn gen_min(a: Dynamic, b: Dynamic) -> Result<Dynamic, Box<EvalAltResult>> {
72 array_min(&mut vec![a, b])
73 }
74
75 /// Return the lowest value from an array. Fails if the input is not an array, or if
76 /// it is an array with elements other than INT or FLOAT.
77 ///
78 /// ```typescript
79 /// let the_lowest_number = min([2, 3, 4, 5]);
80 /// assert_eq(the_lowest_number, 2);
81 /// ```
82 /// ```typescript
83 /// let the_lowest_number = min([2, 3.0, 4.12, 5]);
84 /// assert_eq(the_lowest_number, 2.0);
85 /// ```
86 #[rhai_fn(name = "min", return_raw, pure)]
87 pub fn array_min(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
88 if_list_do_int_or_do_float(
89 arr,
90 |arr: &mut Array| {
91 let mut y = array_to_vec_int(arr);
92 y.sort();
93 Ok(Dynamic::from(y[0]))
94 },
95 |arr: &mut Array| {
96 let mut y = array_to_vec_float(arr);
97 y.sort_by(|a, b| a.partial_cmp(b).unwrap());
98 Ok(Dynamic::from(y[0]))
99 },
100 )
101 }
102
103 /// Return the highest value from an array. Fails if the input is not an array, or if
104 /// it is an array with elements other than INT or FLOAT.
105 /// ```typescript
106 /// let high_and_low = bounds([2, 3, 4, 5]);
107 /// assert_eq(high_and_low, [2, 5]);
108 /// ```
109 #[rhai_fn(name = "bounds", return_raw)]
110 pub fn bounds(arr: &mut Array) -> Result<Array, Box<EvalAltResult>> {
111 match (array_min(arr), array_max(arr)) {
112 (Ok(low), Ok(high)) => Ok(vec![low, high]),
113 (Ok(_), Err(high)) => Err(high),
114 (Err(low), Ok(_)) => Err(low),
115 (Err(low), Err(_)) => Err(low),
116 }
117 }
118
119 /// Returns the `k` highest values from an array. Fails if the input is not an array, or if
120 /// it is an array with elements other than INT or FLOAT.
121 /// ```typescript
122 /// let data = [32, 15, -7, 10, 1000, 41, 42];
123 /// let mk = maxk(data, 3);
124 /// assert_eq(mk, [41, 42, 1000]);
125 /// ```
126 /// ```typescript
127 /// let data = [32, 15, -7.0, 10, 1000, 41.0, 42];
128 /// let mk = maxk(data, 3);
129 /// assert_eq(mk, [41.0, 42.0, 1000.0]);
130 /// ```
131 #[rhai_fn(name = "maxk", return_raw, pure)]
132 pub fn maxk(arr: &mut Array, k: INT) -> Result<Array, Box<EvalAltResult>> {
133 if_list_do_int_or_do_float(
134 arr,
135 |arr: &mut Array| {
136 let mut y = array_to_vec_int(arr);
137 y.sort();
138 let r = (y.len() - (k as usize))..(y.len());
139 let mut v = Array::new();
140 for idx in r {
141 v.push(Dynamic::from(y[idx]));
142 }
143 Ok(v)
144 },
145 |arr: &mut Array| {
146 let mut y = array_to_vec_float(arr);
147 y.sort_by(|a, b| a.partial_cmp(b).unwrap());
148 let r = (y.len() - (k as usize))..(y.len());
149 let mut v = Array::new();
150 for idx in r {
151 v.push(Dynamic::from(y[idx]));
152 }
153 Ok(v)
154 },
155 )
156 }
157
158 /// Return the `k` lowest values in an array. Fails if the input is not an array, or if
159 /// it is an array with elements other than INT or FLOAT.
160 /// ```typescript
161 /// let data = [32, 15, -7, 10, 1000, 41, 42];
162 /// let mk = mink(data, 3);
163 /// assert_eq(mk, [-7, 10, 15]);
164 /// ```
165 /// ```typescript
166 /// let data = [32, 15.1223232, -7, 10, 1000.00000, 41, 42];
167 /// let mk = mink(data, 3);
168 /// assert_eq(mk, [-7.0, 10.0, 15.1223232]);
169 /// ```
170 #[rhai_fn(name = "mink", return_raw, pure)]
171 pub fn mink(arr: &mut Array, k: INT) -> Result<Array, Box<EvalAltResult>> {
172 if_list_do_int_or_do_float(
173 arr,
174 |arr| {
175 let mut y = array_to_vec_int(arr);
176 y.sort();
177 let r = (0 as usize)..(k as usize);
178 let mut v = Array::new();
179 for idx in r {
180 v.push(Dynamic::from(y[idx]));
181 }
182 Ok(v)
183 },
184 |arr| {
185 let mut y = array_to_vec_float(arr);
186 y.sort_by(|a, b| a.partial_cmp(b).unwrap());
187 let r = (0 as usize)..(k as usize);
188 let mut v = Array::new();
189 for idx in r {
190 v.push(Dynamic::from(y[idx]));
191 }
192 Ok(v)
193 },
194 )
195 }
196
197 /// Sum an array. Fails if the input is not an array, or if
198 /// it is an array with elements other than INT or FLOAT.
199 /// ```typescript
200 /// let data = [1, 2, 3];
201 /// let m = sum(data);
202 /// assert_eq(m, 6);
203 /// ```
204 /// ```typescript
205 /// let data = [1, 2.0, 3];
206 /// let m = sum(data);
207 /// assert_eq(m, 6.0);
208 /// ```
209 #[rhai_fn(name = "sum", return_raw, pure)]
210 pub fn sum(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
211 if_list_do_int_or_do_float(
212 arr,
213 |arr| {
214 let y = array_to_vec_int(arr);
215 Ok(Dynamic::from_int(y.iter().sum()))
216 },
217 |arr| {
218 let y = array_to_vec_float(arr);
219 Ok(Dynamic::from_float(y.iter().sum()))
220 },
221 )
222 }
223
224 /// Return the average of an array. Fails if the input is not an array, or if
225 /// it is an array with elements other than INT or FLOAT.
226 /// ```typescript
227 /// let data = [1, 2, 3];
228 /// let m = mean(data);
229 /// assert_eq(m, 2.0);
230 /// ```
231 #[rhai_fn(name = "mean", return_raw, pure)]
232 pub fn mean(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
233 let l = arr.len() as FLOAT;
234 if_list_do_int_or_do_float(
235 arr,
236 |arr: &mut Array| {
237 sum(arr).map(|s| Dynamic::from_float(s.as_int().unwrap() as FLOAT / l))
238 },
239 |arr: &mut Array| sum(arr).map(|s| Dynamic::from_float(s.as_float().unwrap() / l)),
240 )
241 }
242
243 /// Return the index of the largest array element. Fails if the input is not an array, or if
244 /// it is an array with elements other than INT or FLOAT.
245 /// ```typescript
246 /// let data = [1, 2, 3];
247 /// let m = argmax(data);
248 /// assert_eq(m, 2);
249 /// ```
250 #[rhai_fn(name = "argmax", return_raw, pure)]
251 pub fn argmax(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
252 if_list_do(arr, |arr| {
253 array_max(arr).map(|m| {
254 Dynamic::from_int(
255 arr.iter()
256 .position(|r| format!("{r}") == format!("{m}"))
257 .unwrap() as INT,
258 )
259 })
260 })
261 }
262
263 /// Return the index of the smallest array element. Fails if the input is not an array, or if
264 /// it is an array with elements other than INT or FLOAT.
265 /// ```typescript
266 /// let data = [1, 2, 3];
267 /// let m = argmin(data);
268 /// assert_eq(m, 0);
269 /// ```
270 #[rhai_fn(name = "argmin", return_raw, pure)]
271 pub fn argmin(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
272 if_list_do(arr, |arr| {
273 array_min(arr).map(|m| {
274 Dynamic::from_int(
275 arr.iter()
276 .position(|r| format!("{r}") == format!("{m}"))
277 .unwrap() as INT,
278 )
279 })
280 })
281 }
282
283 /// Compute the product of an array. Fails if the input is not an array, or if
284 /// it is an array with elements other than INT or FLOAT.
285 /// ```typescript
286 /// let data = [1, 2, 3];
287 /// let m = prod(data);
288 /// assert_eq(m, 6);
289 /// ```
290 /// ```typescript
291 /// let data = [3, 6, 10];
292 /// let m = prod(data);
293 /// assert_eq(m, 180);
294 /// ```
295 #[rhai_fn(name = "prod", return_raw, pure)]
296 pub fn prod(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
297 if_list_do_int_or_do_float(
298 arr,
299 |arr| {
300 let mut p = 1 as INT;
301 for el in arr {
302 p *= el.as_int().unwrap()
303 }
304 Ok(Dynamic::from_int(p))
305 },
306 |arr| {
307 let mut p = 1.0 as FLOAT;
308 for el in arr {
309 p *= el.as_float().unwrap()
310 }
311 Ok(Dynamic::from_float(p))
312 },
313 )
314 }
315
316 /// Returns the variance of a 1-D array.
317 /// ```typescript
318 /// let data = [1, 2, 3];
319 /// let v = variance(data);
320 /// assert_eq(v, 1.0);
321 /// ```
322 #[rhai_fn(name = "variance", return_raw, pure)]
323 pub fn variance(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
324 let m = mean(arr).map(|med| med.as_float().unwrap())?;
325
326 if_list_convert_to_vec_float_and_do(arr, |x| {
327 let mut sum = 0.0 as FLOAT;
328
329 for v in &x {
330 sum += (v - m).powi(2)
331 }
332 let d = sum / (x.len() as FLOAT - 1.0);
333 Ok(Dynamic::from_float(d))
334 })
335 }
336
337 /// Returns the standard deviation of a 1-D array.
338 /// ```typescript
339 /// let data = [1, 2, 3];
340 /// let v = std(data);
341 /// assert_eq(v, 1.0);
342 /// ```
343 #[rhai_fn(name = "std", return_raw, pure)]
344 pub fn std(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
345 variance(arr).map(|v| Dynamic::from_float(v.as_float().unwrap().sqrt()))
346 }
347
348 /// Returns the variance of a 1-D array.
349 /// ```typescript
350 /// let data = [1, 2, 3, 4, 5];
351 /// let r = rms(data);
352 /// assert_eq(r, 3.3166247903554);
353 /// ```
354 #[rhai_fn(name = "rms", return_raw, pure)]
355 pub fn rms(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
356 if_list_convert_to_vec_float_and_do(arr, |arr| {
357 let mut sum = 0.0 as FLOAT;
358 for v in &arr {
359 sum += v.powi(2)
360 }
361 let d = sum / (arr.len() as FLOAT);
362 Ok(Dynamic::from_float(d.sqrt()))
363 })
364 }
365
366 /// Returns the variance of a 1-D array.
367 /// ```typescript
368 /// let data = [1, 1, 1, 1, 2, 5, 6, 7, 8];
369 /// let m = median(data);
370 /// assert_eq(m, 2.0);
371 /// ```
372 #[rhai_fn(name = "median", return_raw, pure)]
373 pub fn median(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
374 if_list_convert_to_vec_float_and_do(arr, |mut x| {
375 x.sort_by(|a, b| a.partial_cmp(b).unwrap());
376
377 let med = if x.len() % 2 == 1 {
378 x[(x.len() - 1) / 2]
379 } else {
380 (x[x.len() / 2] + x[x.len() / 2 - 1]) / 2.0
381 };
382
383 Ok(Dynamic::from_float(med))
384 })
385 }
386
387 /// Returns the median absolute deviation of a 1-D array.
388 /// ```typescript
389 /// let data = [1.0, 2.0, 3.0, 3.0, 4.0, 4.0, 4.0, 5.0, 5.5, 6.0, 6.0, 6.5, 7.0, 7.0, 7.5, 8.0, 9.0, 12.0, 52.0, 90.0];
390 /// let m = mad(data);
391 /// assert_eq(m, 2.0);
392 /// ```
393 #[rhai_fn(name = "mad", return_raw, pure)]
394 pub fn mad(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
395 let m = median(arr).map(|med| med.as_float().unwrap())?;
396
397 if_list_convert_to_vec_float_and_do(arr, |x| {
398 let mut dev = vec![];
399 for v in x {
400 dev.push(Dynamic::from_float((v - m).abs()));
401 }
402 median(&mut dev)
403 })
404 }
405
406 /// Returns a given percentile value for a 1-D array of data.
407 ///
408 /// The array must not be empty.
409 ///
410 /// If the percentile value is <= 0 or >= 100, returns the minimum and maximum values of the array respectively.
411 /// ```typescript
412 /// let data = [1, 2, 0, 3, 4];
413 /// let p = prctile(data, 0);
414 /// assert_eq(p, 0.0);
415 /// ```
416 /// ```typescript
417 /// let data = [1, 2, 0, 3, 4];
418 /// let p = prctile(data, 50);
419 /// assert_eq(p, 2.0);
420 /// ```
421 /// ```typescript
422 /// let data = [1, 2, 0, 3, 4];
423 /// let p = prctile(data, 100);
424 /// assert_eq(p, 4.0);
425 /// ```
426 #[rhai_fn(name = "prctile", return_raw, pure)]
427 pub fn prctile(arr: &mut Array, p: Dynamic) -> Result<FLOAT, Box<EvalAltResult>> {
428 if arr.is_empty() {
429 return Err(EvalAltResult::ErrorArithmetic(
430 "Array must not be empty".to_string(),
431 Position::NONE,
432 )
433 .into());
434 }
435 if !p.is_float() && !p.is_int() {
436 return Err(EvalAltResult::ErrorArithmetic(
437 "Percentile value must either be INT or FLOAT".to_string(),
438 Position::NONE,
439 )
440 .into());
441 }
442
443 if_list_convert_to_vec_float_and_do(arr, move |mut float_array| {
444 match float_array.len() {
445 0 => unreachable!(),
446 1 => return Ok(float_array[0]),
447 _ => (),
448 }
449
450 // Sort
451 float_array.sort_by(|a, b| a.partial_cmp(b).unwrap());
452
453 let sorted_array = float_array
454 .iter()
455 .map(|el| Dynamic::from_float(*el))
456 .collect::<Array>();
457
458 let mut x = crate::matrix_functions::linspace(
459 Dynamic::from_int(0),
460 Dynamic::from_int(100),
461 float_array.len() as INT,
462 )?;
463 crate::misc_functions::interp1(&mut x, sorted_array, p.clone())
464 })
465 }
466
467 /// Returns the inter-quartile range for a 1-D array.
468 /// ```typescript
469 /// let data = [1, 1, 1, 1, 1, 1, 1, 5, 6, 9, 9, 9, 9, 9, 9, 9, 9];
470 /// let inter_quartile_range = iqr(data);
471 /// assert_eq(inter_quartile_range, 8.0);
472 /// ```
473 #[rhai_fn(name = "iqr", return_raw, pure)]
474 pub fn iqr(arr: &mut Array) -> Result<FLOAT, Box<EvalAltResult>> {
475 match (
476 prctile(arr, Dynamic::from_int(25)),
477 prctile(arr, Dynamic::from_int(75)),
478 ) {
479 (Ok(low), Ok(high)) => Ok(high - low),
480 (Ok(_), Err(high)) => Err(high),
481 (Err(low), Ok(_)) => Err(low),
482 (Err(low), Err(_)) => Err(low),
483 }
484 }
485
486 /// Returns the mode of a 1-D array.
487 /// ```typescript
488 /// let data = [1, 2, 2, 2, 2, 3];
489 /// let m = mode(data);
490 /// assert_eq(m, 2);
491 /// ```
492 /// ```typescript
493 /// let data = [1.0, 2.0, 2.0, 2.0, 2.0, 3.0];
494 /// let m = mode(data);
495 /// assert_eq(m, 2.0);
496 /// ```
497 #[rhai_fn(name = "mode", return_raw, pure)]
498 pub fn mode(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
499 if_list_do_int_or_do_float(
500 arr,
501 |arr| {
502 let v = array_to_vec_int(arr);
503
504 let mut counts: HashMap<INT, usize> = HashMap::new();
505
506 Ok(Dynamic::from_int(
507 v.iter()
508 .copied()
509 .max_by_key(|&n| {
510 let count = counts.entry(n).or_insert(0);
511 *count += 1;
512 *count
513 })
514 .unwrap(),
515 ))
516 },
517 |arr| {
518 let v = array_to_vec_float(arr);
519
520 let mut counts: HashMap<String, usize> = HashMap::new();
521
522 Ok(Dynamic::from_float(
523 v.iter()
524 .copied()
525 .max_by_key(|&n| {
526 let count = counts.entry(format!("{:?}", n)).or_insert(0);
527 *count += 1;
528 *count
529 })
530 .unwrap(),
531 ))
532 },
533 )
534 }
535
536 /// Performs ordinary least squares regression and provides a statistical assessment.
537 /// ```typescript
538 /// let x = [[1.0, 0.0],
539 /// [1.0, 1.0],
540 /// [1.0, 2.0]];
541 /// let y = [[0.1],
542 /// [0.8],
543 /// [2.1]];
544 /// let b = regress(x, y);
545 /// assert_eq(b, #{"parameters": [-2.220446049250313e-16, 1.0000000000000002],
546 /// "pvalues": [1.0, 0.10918255350924745],
547 /// "standard_errors": [0.11180339887498947, 0.17320508075688767]});
548 /// ```
549 #[cfg(feature = "nalgebra")]
550 #[rhai_fn(name = "regress", return_raw, pure)]
551 pub fn regress(x: &mut Array, y: Array) -> Result<Map, Box<EvalAltResult>> {
552 use linregress::{FormulaRegressionBuilder, RegressionDataBuilder};
553 let x_transposed = crate::matrix_functions::transpose(x)?;
554 let mut data: Vec<(String, Vec<f64>)> = vec![];
555 let mut vars = vec![];
556 for (iter, column) in x_transposed.iter().enumerate() {
557 let var_name = format!("x_{iter}");
558 vars.push(var_name.clone());
559 data.push((
560 var_name,
561 array_to_vec_float(&mut column.clone().into_array().unwrap()),
562 ));
563 }
564 data.push((
565 "y".to_string(),
566 array_to_vec_float(&mut crate::matrix_functions::flatten(&mut y.clone())),
567 ));
568
569 let regress_data = RegressionDataBuilder::new().build_from(data).unwrap();
570
571 let model = FormulaRegressionBuilder::new()
572 .data(®ress_data)
573 .data_columns("y", vars)
574 .fit()
575 .map_err(|e| EvalAltResult::ErrorArithmetic(e.to_string(), Position::NONE))?;
576
577 let parameters = Dynamic::from_array(
578 model
579 .iter_parameter_pairs()
580 .map(|x| Dynamic::from_float(x.1))
581 .collect::<Array>(),
582 );
583 let pvalues = Dynamic::from_array(
584 model
585 .iter_p_value_pairs()
586 .map(|x| Dynamic::from_float(x.1))
587 .collect::<Array>(),
588 );
589 let standard_errors = Dynamic::from_array(
590 model
591 .iter_se_pairs()
592 .map(|x| Dynamic::from_float(x.1))
593 .collect::<Array>(),
594 );
595
596 let mut result = BTreeMap::new();
597 let mut params = smartstring::SmartString::new();
598 params.push_str("parameters");
599 result.insert(params, parameters);
600 let mut pv = smartstring::SmartString::new();
601 pv.push_str("pvalues");
602 result.insert(pv, pvalues);
603 let mut se = smartstring::SmartString::new();
604 se.push_str("standard_errors");
605 result.insert(se, standard_errors);
606 Ok(result)
607 }
608}