rhai_sci/
statistics.rs

1use rhai::plugin::*;
2
3#[export_module]
4pub mod stats {
5    use crate::{
6        array_to_vec_float, array_to_vec_int, if_list_convert_to_vec_float_and_do, if_list_do,
7        if_list_do_int_or_do_float,
8    };
9    #[cfg(feature = "nalgebra")]
10    use rhai::Map;
11    use rhai::{Array, Dynamic, EvalAltResult, Position, FLOAT, INT};
12
13    #[cfg(feature = "nalgebra")]
14    use std::collections::BTreeMap;
15    use std::collections::HashMap;
16
17    /// Return the highest value from a pair of numbers. Fails if the numbers are anything other
18    /// than INT or FLOAT.
19    /// ```typescript
20    /// let the_higher_number = max(2, 3);
21    /// assert_eq(the_higher_number, 3);
22    /// ```
23    /// ```typescript
24    /// let the_higher_number = max(2.0, 3.0);
25    /// assert_eq(the_higher_number, 3.0);
26    /// ```
27    #[rhai_fn(name = "max", return_raw)]
28    pub fn gen_max(a: Dynamic, b: Dynamic) -> Result<Dynamic, Box<EvalAltResult>> {
29        array_max(&mut vec![a, b])
30    }
31
32    /// Return the highest value from an array. Fails if the input is not an array, or if
33    /// it is an array with elements other than INT or FLOAT.
34    /// ```typescript
35    /// let the_highest_number = max([2, 3, 4, 5]);
36    /// assert_eq(the_highest_number, 5);
37    /// ```
38    /// ```typescript
39    /// let the_highest_number = max([2, 3.0, 4.12, 5]);
40    /// assert_eq(the_highest_number, 5.0);
41    /// ```
42    #[rhai_fn(name = "max", return_raw)]
43    pub fn array_max(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
44        if_list_do_int_or_do_float(
45            arr,
46            |arr: &mut Array| {
47                let mut y = array_to_vec_int(arr);
48                y.sort();
49                Ok(Dynamic::from(y[y.len() - 1]))
50            },
51            |arr: &mut Array| {
52                let mut y = array_to_vec_float(arr);
53                y.sort_by(|a, b| a.partial_cmp(b).unwrap());
54                Ok(Dynamic::from(y[y.len() - 1]))
55            },
56        )
57    }
58
59    /// Return the lowest value from a pair of numbers. Fails if the numbers are anything other
60    /// than INT or FLOAT.
61    ///
62    /// ```typescript
63    /// let the_lower_number = min(2, 3);
64    /// assert_eq(the_lower_number, 2);
65    /// ```
66    /// ```typescript
67    /// let the_lower_number = min(2.0, 3.0);
68    /// assert_eq(the_lower_number, 2.0);
69    /// ```
70    #[rhai_fn(name = "min", return_raw)]
71    pub fn gen_min(a: Dynamic, b: Dynamic) -> Result<Dynamic, Box<EvalAltResult>> {
72        array_min(&mut vec![a, b])
73    }
74
75    /// Return the lowest value from an array. Fails if the input is not an array, or if
76    /// it is an array with elements other than INT or FLOAT.
77    ///
78    /// ```typescript
79    /// let the_lowest_number = min([2, 3, 4, 5]);
80    /// assert_eq(the_lowest_number, 2);
81    /// ```
82    /// ```typescript
83    /// let the_lowest_number = min([2, 3.0, 4.12, 5]);
84    /// assert_eq(the_lowest_number, 2.0);
85    /// ```
86    #[rhai_fn(name = "min", return_raw, pure)]
87    pub fn array_min(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
88        if_list_do_int_or_do_float(
89            arr,
90            |arr: &mut Array| {
91                let mut y = array_to_vec_int(arr);
92                y.sort();
93                Ok(Dynamic::from(y[0]))
94            },
95            |arr: &mut Array| {
96                let mut y = array_to_vec_float(arr);
97                y.sort_by(|a, b| a.partial_cmp(b).unwrap());
98                Ok(Dynamic::from(y[0]))
99            },
100        )
101    }
102
103    /// Return the highest value from an array. Fails if the input is not an array, or if
104    /// it is an array with elements other than INT or FLOAT.
105    /// ```typescript
106    /// let high_and_low = bounds([2, 3, 4, 5]);
107    /// assert_eq(high_and_low, [2, 5]);
108    /// ```
109    #[rhai_fn(name = "bounds", return_raw)]
110    pub fn bounds(arr: &mut Array) -> Result<Array, Box<EvalAltResult>> {
111        match (array_min(arr), array_max(arr)) {
112            (Ok(low), Ok(high)) => Ok(vec![low, high]),
113            (Ok(_), Err(high)) => Err(high),
114            (Err(low), Ok(_)) => Err(low),
115            (Err(low), Err(_)) => Err(low),
116        }
117    }
118
119    /// Returns the `k` highest values from an array. Fails if the input is not an array, or if
120    /// it is an array with elements other than INT or FLOAT.
121    /// ```typescript
122    /// let data = [32, 15, -7, 10, 1000, 41, 42];
123    /// let mk = maxk(data, 3);
124    /// assert_eq(mk, [41, 42, 1000]);
125    /// ```
126    /// ```typescript
127    /// let data = [32, 15, -7.0, 10, 1000, 41.0, 42];
128    /// let mk = maxk(data, 3);
129    /// assert_eq(mk, [41.0, 42.0, 1000.0]);
130    /// ```
131    #[rhai_fn(name = "maxk", return_raw, pure)]
132    pub fn maxk(arr: &mut Array, k: INT) -> Result<Array, Box<EvalAltResult>> {
133        if_list_do_int_or_do_float(
134            arr,
135            |arr: &mut Array| {
136                let mut y = array_to_vec_int(arr);
137                y.sort();
138                let r = (y.len() - (k as usize))..(y.len());
139                let mut v = Array::new();
140                for idx in r {
141                    v.push(Dynamic::from(y[idx]));
142                }
143                Ok(v)
144            },
145            |arr: &mut Array| {
146                let mut y = array_to_vec_float(arr);
147                y.sort_by(|a, b| a.partial_cmp(b).unwrap());
148                let r = (y.len() - (k as usize))..(y.len());
149                let mut v = Array::new();
150                for idx in r {
151                    v.push(Dynamic::from(y[idx]));
152                }
153                Ok(v)
154            },
155        )
156    }
157
158    /// Return the `k` lowest values in an array. Fails if the input is not an array, or if
159    /// it is an array with elements other than INT or FLOAT.
160    /// ```typescript
161    /// let data = [32, 15, -7, 10, 1000, 41, 42];
162    /// let mk = mink(data, 3);
163    /// assert_eq(mk, [-7, 10, 15]);
164    /// ```
165    /// ```typescript
166    /// let data = [32, 15.1223232, -7, 10, 1000.00000, 41, 42];
167    /// let mk = mink(data, 3);
168    /// assert_eq(mk, [-7.0, 10.0, 15.1223232]);
169    /// ```
170    #[rhai_fn(name = "mink", return_raw, pure)]
171    pub fn mink(arr: &mut Array, k: INT) -> Result<Array, Box<EvalAltResult>> {
172        if_list_do_int_or_do_float(
173            arr,
174            |arr| {
175                let mut y = array_to_vec_int(arr);
176                y.sort();
177                let r = (0 as usize)..(k as usize);
178                let mut v = Array::new();
179                for idx in r {
180                    v.push(Dynamic::from(y[idx]));
181                }
182                Ok(v)
183            },
184            |arr| {
185                let mut y = array_to_vec_float(arr);
186                y.sort_by(|a, b| a.partial_cmp(b).unwrap());
187                let r = (0 as usize)..(k as usize);
188                let mut v = Array::new();
189                for idx in r {
190                    v.push(Dynamic::from(y[idx]));
191                }
192                Ok(v)
193            },
194        )
195    }
196
197    /// Sum an array. Fails if the input is not an array, or if
198    /// it is an array with elements other than INT or FLOAT.
199    /// ```typescript
200    /// let data = [1, 2, 3];
201    /// let m = sum(data);
202    /// assert_eq(m, 6);
203    /// ```
204    /// ```typescript
205    /// let data = [1, 2.0, 3];
206    /// let m = sum(data);
207    /// assert_eq(m, 6.0);
208    /// ```
209    #[rhai_fn(name = "sum", return_raw, pure)]
210    pub fn sum(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
211        if_list_do_int_or_do_float(
212            arr,
213            |arr| {
214                let y = array_to_vec_int(arr);
215                Ok(Dynamic::from_int(y.iter().sum()))
216            },
217            |arr| {
218                let y = array_to_vec_float(arr);
219                Ok(Dynamic::from_float(y.iter().sum()))
220            },
221        )
222    }
223
224    /// Return the average of an array. Fails if the input is not an array, or if
225    /// it is an array with elements other than INT or FLOAT.
226    /// ```typescript
227    /// let data = [1, 2, 3];
228    /// let m = mean(data);
229    /// assert_eq(m, 2.0);
230    /// ```
231    #[rhai_fn(name = "mean", return_raw, pure)]
232    pub fn mean(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
233        let l = arr.len() as FLOAT;
234        if_list_do_int_or_do_float(
235            arr,
236            |arr: &mut Array| {
237                sum(arr).map(|s| Dynamic::from_float(s.as_int().unwrap() as FLOAT / l))
238            },
239            |arr: &mut Array| sum(arr).map(|s| Dynamic::from_float(s.as_float().unwrap() / l)),
240        )
241    }
242
243    /// Return the index of the largest array element. Fails if the input is not an array, or if
244    /// it is an array with elements other than INT or FLOAT.
245    /// ```typescript
246    /// let data = [1, 2, 3];
247    /// let m = argmax(data);
248    /// assert_eq(m, 2);
249    /// ```
250    #[rhai_fn(name = "argmax", return_raw, pure)]
251    pub fn argmax(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
252        if_list_do(arr, |arr| {
253            array_max(arr).map(|m| {
254                Dynamic::from_int(
255                    arr.iter()
256                        .position(|r| format!("{r}") == format!("{m}"))
257                        .unwrap() as INT,
258                )
259            })
260        })
261    }
262
263    /// Return the index of the smallest array element. Fails if the input is not an array, or if
264    /// it is an array with elements other than INT or FLOAT.
265    /// ```typescript
266    /// let data = [1, 2, 3];
267    /// let m = argmin(data);
268    /// assert_eq(m, 0);
269    /// ```
270    #[rhai_fn(name = "argmin", return_raw, pure)]
271    pub fn argmin(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
272        if_list_do(arr, |arr| {
273            array_min(arr).map(|m| {
274                Dynamic::from_int(
275                    arr.iter()
276                        .position(|r| format!("{r}") == format!("{m}"))
277                        .unwrap() as INT,
278                )
279            })
280        })
281    }
282
283    /// Compute the product of an array. Fails if the input is not an array, or if
284    /// it is an array with elements other than INT or FLOAT.
285    /// ```typescript
286    /// let data = [1, 2, 3];
287    /// let m = prod(data);
288    /// assert_eq(m, 6);
289    /// ```
290    /// ```typescript
291    /// let data = [3, 6, 10];
292    /// let m = prod(data);
293    /// assert_eq(m, 180);
294    /// ```
295    #[rhai_fn(name = "prod", return_raw, pure)]
296    pub fn prod(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
297        if_list_do_int_or_do_float(
298            arr,
299            |arr| {
300                let mut p = 1 as INT;
301                for el in arr {
302                    p *= el.as_int().unwrap()
303                }
304                Ok(Dynamic::from_int(p))
305            },
306            |arr| {
307                let mut p = 1.0 as FLOAT;
308                for el in arr {
309                    p *= el.as_float().unwrap()
310                }
311                Ok(Dynamic::from_float(p))
312            },
313        )
314    }
315
316    /// Returns the variance of a 1-D array.
317    /// ```typescript
318    /// let data = [1, 2, 3];
319    /// let v = variance(data);
320    /// assert_eq(v, 1.0);
321    /// ```
322    #[rhai_fn(name = "variance", return_raw, pure)]
323    pub fn variance(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
324        let m = mean(arr).map(|med| med.as_float().unwrap())?;
325
326        if_list_convert_to_vec_float_and_do(arr, |x| {
327            let mut sum = 0.0 as FLOAT;
328
329            for v in &x {
330                sum += (v - m).powi(2)
331            }
332            let d = sum / (x.len() as FLOAT - 1.0);
333            Ok(Dynamic::from_float(d))
334        })
335    }
336
337    /// Returns the standard deviation of a 1-D array.
338    /// ```typescript
339    /// let data = [1, 2, 3];
340    /// let v = std(data);
341    /// assert_eq(v, 1.0);
342    /// ```
343    #[rhai_fn(name = "std", return_raw, pure)]
344    pub fn std(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
345        variance(arr).map(|v| Dynamic::from_float(v.as_float().unwrap().sqrt()))
346    }
347
348    /// Returns the variance of a 1-D array.
349    /// ```typescript
350    /// let data = [1, 2, 3, 4, 5];
351    /// let r = rms(data);
352    /// assert_eq(r, 3.3166247903554);
353    /// ```
354    #[rhai_fn(name = "rms", return_raw, pure)]
355    pub fn rms(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
356        if_list_convert_to_vec_float_and_do(arr, |arr| {
357            let mut sum = 0.0 as FLOAT;
358            for v in &arr {
359                sum += v.powi(2)
360            }
361            let d = sum / (arr.len() as FLOAT);
362            Ok(Dynamic::from_float(d.sqrt()))
363        })
364    }
365
366    /// Returns the variance of a 1-D array.
367    /// ```typescript
368    /// let data = [1, 1, 1, 1, 2, 5, 6, 7, 8];
369    /// let m = median(data);
370    /// assert_eq(m, 2.0);
371    /// ```
372    #[rhai_fn(name = "median", return_raw, pure)]
373    pub fn median(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
374        if_list_convert_to_vec_float_and_do(arr, |mut x| {
375            x.sort_by(|a, b| a.partial_cmp(b).unwrap());
376
377            let med = if x.len() % 2 == 1 {
378                x[(x.len() - 1) / 2]
379            } else {
380                (x[x.len() / 2] + x[x.len() / 2 - 1]) / 2.0
381            };
382
383            Ok(Dynamic::from_float(med))
384        })
385    }
386
387    /// Returns the median absolute deviation of a 1-D array.
388    /// ```typescript
389    /// let data = [1.0, 2.0, 3.0, 3.0, 4.0, 4.0, 4.0, 5.0, 5.5, 6.0, 6.0, 6.5, 7.0, 7.0, 7.5, 8.0, 9.0, 12.0, 52.0, 90.0];
390    /// let m = mad(data);
391    /// assert_eq(m, 2.0);
392    /// ```
393    #[rhai_fn(name = "mad", return_raw, pure)]
394    pub fn mad(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
395        let m = median(arr).map(|med| med.as_float().unwrap())?;
396
397        if_list_convert_to_vec_float_and_do(arr, |x| {
398            let mut dev = vec![];
399            for v in x {
400                dev.push(Dynamic::from_float((v - m).abs()));
401            }
402            median(&mut dev)
403        })
404    }
405
406    /// Returns a given percentile value for a 1-D array of data.
407    ///
408    /// The array must not be empty.
409    ///
410    /// If the percentile value is <= 0 or >= 100, returns the minimum and maximum values of the array respectively.
411    /// ```typescript
412    /// let data = [1, 2, 0, 3, 4];
413    /// let p = prctile(data, 0);
414    /// assert_eq(p, 0.0);
415    /// ```
416    /// ```typescript
417    /// let data = [1, 2, 0, 3, 4];
418    /// let p = prctile(data, 50);
419    /// assert_eq(p, 2.0);
420    /// ```
421    /// ```typescript
422    /// let data = [1, 2, 0, 3, 4];
423    /// let p = prctile(data, 100);
424    /// assert_eq(p, 4.0);
425    /// ```
426    #[rhai_fn(name = "prctile", return_raw, pure)]
427    pub fn prctile(arr: &mut Array, p: Dynamic) -> Result<FLOAT, Box<EvalAltResult>> {
428        if arr.is_empty() {
429            return Err(EvalAltResult::ErrorArithmetic(
430                "Array must not be empty".to_string(),
431                Position::NONE,
432            )
433            .into());
434        }
435        if !p.is_float() && !p.is_int() {
436            return Err(EvalAltResult::ErrorArithmetic(
437                "Percentile value must either be INT or FLOAT".to_string(),
438                Position::NONE,
439            )
440            .into());
441        }
442
443        if_list_convert_to_vec_float_and_do(arr, move |mut float_array| {
444            match float_array.len() {
445                0 => unreachable!(),
446                1 => return Ok(float_array[0]),
447                _ => (),
448            }
449
450            // Sort
451            float_array.sort_by(|a, b| a.partial_cmp(b).unwrap());
452
453            let sorted_array = float_array
454                .iter()
455                .map(|el| Dynamic::from_float(*el))
456                .collect::<Array>();
457
458            let mut x = crate::matrix_functions::linspace(
459                Dynamic::from_int(0),
460                Dynamic::from_int(100),
461                float_array.len() as INT,
462            )?;
463            crate::misc_functions::interp1(&mut x, sorted_array, p.clone())
464        })
465    }
466
467    /// Returns the inter-quartile range for a 1-D array.
468    /// ```typescript
469    /// let data = [1, 1, 1, 1, 1, 1, 1, 5, 6, 9, 9, 9, 9, 9, 9, 9, 9];
470    /// let inter_quartile_range = iqr(data);
471    /// assert_eq(inter_quartile_range, 8.0);
472    /// ```
473    #[rhai_fn(name = "iqr", return_raw, pure)]
474    pub fn iqr(arr: &mut Array) -> Result<FLOAT, Box<EvalAltResult>> {
475        match (
476            prctile(arr, Dynamic::from_int(25)),
477            prctile(arr, Dynamic::from_int(75)),
478        ) {
479            (Ok(low), Ok(high)) => Ok(high - low),
480            (Ok(_), Err(high)) => Err(high),
481            (Err(low), Ok(_)) => Err(low),
482            (Err(low), Err(_)) => Err(low),
483        }
484    }
485
486    /// Returns the mode of a 1-D array.
487    /// ```typescript
488    /// let data = [1, 2, 2, 2, 2, 3];
489    /// let m = mode(data);
490    /// assert_eq(m, 2);
491    /// ```
492    /// ```typescript
493    /// let data = [1.0, 2.0, 2.0, 2.0, 2.0, 3.0];
494    /// let m = mode(data);
495    /// assert_eq(m, 2.0);
496    /// ```
497    #[rhai_fn(name = "mode", return_raw, pure)]
498    pub fn mode(arr: &mut Array) -> Result<Dynamic, Box<EvalAltResult>> {
499        if_list_do_int_or_do_float(
500            arr,
501            |arr| {
502                let v = array_to_vec_int(arr);
503
504                let mut counts: HashMap<INT, usize> = HashMap::new();
505
506                Ok(Dynamic::from_int(
507                    v.iter()
508                        .copied()
509                        .max_by_key(|&n| {
510                            let count = counts.entry(n).or_insert(0);
511                            *count += 1;
512                            *count
513                        })
514                        .unwrap(),
515                ))
516            },
517            |arr| {
518                let v = array_to_vec_float(arr);
519
520                let mut counts: HashMap<String, usize> = HashMap::new();
521
522                Ok(Dynamic::from_float(
523                    v.iter()
524                        .copied()
525                        .max_by_key(|&n| {
526                            let count = counts.entry(format!("{:?}", n)).or_insert(0);
527                            *count += 1;
528                            *count
529                        })
530                        .unwrap(),
531                ))
532            },
533        )
534    }
535
536    /// Performs ordinary least squares regression and provides a statistical assessment.
537    /// ```typescript
538    /// let x = [[1.0, 0.0],
539    ///          [1.0, 1.0],
540    ///          [1.0, 2.0]];
541    /// let y = [[0.1],
542    ///          [0.8],
543    ///          [2.1]];
544    /// let b = regress(x, y);
545    /// assert_eq(b,  #{"parameters": [-2.220446049250313e-16, 1.0000000000000002],
546    ///                 "pvalues": [1.0, 0.10918255350924745],
547    ///                 "standard_errors": [0.11180339887498947, 0.17320508075688767]});
548    /// ```
549    #[cfg(feature = "nalgebra")]
550    #[rhai_fn(name = "regress", return_raw, pure)]
551    pub fn regress(x: &mut Array, y: Array) -> Result<Map, Box<EvalAltResult>> {
552        use linregress::{FormulaRegressionBuilder, RegressionDataBuilder};
553        let x_transposed = crate::matrix_functions::transpose(x)?;
554        let mut data: Vec<(String, Vec<f64>)> = vec![];
555        let mut vars = vec![];
556        for (iter, column) in x_transposed.iter().enumerate() {
557            let var_name = format!("x_{iter}");
558            vars.push(var_name.clone());
559            data.push((
560                var_name,
561                array_to_vec_float(&mut column.clone().into_array().unwrap()),
562            ));
563        }
564        data.push((
565            "y".to_string(),
566            array_to_vec_float(&mut crate::matrix_functions::flatten(&mut y.clone())),
567        ));
568
569        let regress_data = RegressionDataBuilder::new().build_from(data).unwrap();
570
571        let model = FormulaRegressionBuilder::new()
572            .data(&regress_data)
573            .data_columns("y", vars)
574            .fit()
575            .map_err(|e| EvalAltResult::ErrorArithmetic(e.to_string(), Position::NONE))?;
576
577        let parameters = Dynamic::from_array(
578            model
579                .iter_parameter_pairs()
580                .map(|x| Dynamic::from_float(x.1))
581                .collect::<Array>(),
582        );
583        let pvalues = Dynamic::from_array(
584            model
585                .iter_p_value_pairs()
586                .map(|x| Dynamic::from_float(x.1))
587                .collect::<Array>(),
588        );
589        let standard_errors = Dynamic::from_array(
590            model
591                .iter_se_pairs()
592                .map(|x| Dynamic::from_float(x.1))
593                .collect::<Array>(),
594        );
595
596        let mut result = BTreeMap::new();
597        let mut params = smartstring::SmartString::new();
598        params.push_str("parameters");
599        result.insert(params, parameters);
600        let mut pv = smartstring::SmartString::new();
601        pv.push_str("pvalues");
602        result.insert(pv, pvalues);
603        let mut se = smartstring::SmartString::new();
604        se.push_str("standard_errors");
605        result.insert(se, standard_errors);
606        Ok(result)
607    }
608}