rs_ml/lib.rs
1//! rs-ml is a simple ML framework for the Rust language. it includes train test splitting,
2//! scalers, and a guassian naive bayes model. It also includes traits to add more transfomers and
3//! models to the framework.
4//!
5//! # Usage
6//!
7//! This library requires a compute backend to perform matrix operations. Compute backends are
8//! exposed with provided feature flags. Refer to the
9//! [ndarray_linalg](https://github.com/rust-ndarray/ndarray-linalg?tab=readme-ov-file#backend-features)
10//! docs for more information.
11#![deny(
12 missing_docs,
13 unsafe_code,
14 missing_debug_implementations,
15 missing_copy_implementations,
16 clippy::missing_panics_doc
17)]
18
19use std::ops::Add;
20use std::ops::Div;
21use std::ops::Mul;
22
23use classification::ClassificationDataSet;
24use ndarray::Array1;
25use ndarray::Axis;
26use num_traits::Float;
27
28pub mod classification;
29pub mod dimensionality_reduction;
30pub mod metrics;
31pub mod regression;
32pub mod transformer;
33
34/// Trait for fitting classification and regression models, and transformers.
35///
36/// The struct on which this trait is implemented holds and validates the hyperparameters necessary
37/// to fit the estimator to the desired output. For example, a classification model may take as
38/// input a tuple with features and labels:
39/// ```
40/// use ndarray::{Array1, Array2};
41/// use rs_ml::Estimator;
42///
43/// struct ModelParameters {
44/// // Hyperparameters required to fit the model
45/// learning_rate: f64
46/// }
47///
48/// struct Model {
49/// // Internal state of model required to predict features
50/// means: Array2<f64>
51/// };
52///
53/// impl Estimator<(Array2<f64>, Array1<String>)> for ModelParameters {
54/// type Estimator = Model;
55///
56/// fn fit(&self, input: &(Array2<f64>, Array1<String>)) -> Option<Self::Estimator> {
57/// let (features, labels) = input;
58///
59/// // logic to fit the model
60/// Some(Model {
61/// means: Array2::zeros((1, 1))
62/// })
63/// }
64/// }
65/// ```
66pub trait Estimator<Input> {
67 /// Output model or transformer fitted to input data.
68 type Estimator;
69
70 /// Fit model or transformer based on given inputs, or None if the estimator was not able to
71 /// fit to the input data as expected.
72 fn fit(&self, input: &Input) -> Option<Self::Estimator>;
73}
74
75/// Trait to prepare a struct for training or inference
76pub trait Estimatable {
77 /// Prepare for an estimator to train or make inference based on this data
78 fn prepare_for_estimation<F: Float>(&self) -> Array1<F>;
79}
80
81impl<F1: Float> Estimatable for Array1<F1> {
82 fn prepare_for_estimation<F2: Float>(&self) -> Array1<F2> {
83 self.map(|v| F2::from(*v).unwrap())
84 }
85}
86
87/// Train test split result. returns in order training features, testing features, training labels,
88/// testing labels.
89#[derive(Debug, Clone)]
90pub struct SplitDataset<Feature, Label>(
91 pub Vec<Feature>,
92 pub Vec<Feature>,
93 pub Vec<Label>,
94 pub Vec<Label>,
95);
96
97/// Split data and features into training and testing set. `test_size` must be between 0 and 1.
98///
99/// # Panics
100///
101/// Panics if `test_size` is outside range 0..=1.
102///
103/// Example:
104/// ```
105/// use rs_ml::{train_test_split};
106/// use rs_ml::classification::ClassificationDataSet;
107/// use ndarray::{arr1, arr2};
108///
109/// let features = arr2(&[
110/// [1., 0.],
111/// [0., 1.],
112/// [0., 0.],
113/// [1., 1.]]);
114///
115/// let labels = vec![1, 1, 0, 0];
116///
117/// let dataset = ClassificationDataSet::from(
118/// features.rows().into_iter().zip(labels));
119///
120/// let (train, test) = train_test_split(dataset, 0.25);
121/// ```
122pub fn train_test_split<Feature, Label>(
123 dataset: ClassificationDataSet<Feature, Label>,
124 test_size: f64,
125) -> (
126 ClassificationDataSet<Feature, Label>,
127 ClassificationDataSet<Feature, Label>,
128) {
129 let (train, test): (Vec<_>, Vec<_>) = dataset
130 .consume_records()
131 .into_iter()
132 .partition(|_| rand::random_bool(test_size));
133
134 (
135 ClassificationDataSet::from(train),
136 ClassificationDataSet::from(test),
137 )
138}
139
140/// Mean of elements in an iterator.
141fn iterative_mean<I, F, R>(it: I) -> Option<R>
142where
143 I: IntoIterator<Item = F>,
144 F: Into<R>,
145 R: Div<f64, Output = R> + Mul<f64, Output = R> + Add<Output = R> + Default,
146{
147 it.into_iter().enumerate().fold(None, |acc, (i, curr)| {
148 let idx = i as f64;
149 let idx_inc_1 = (i + 1) as f64;
150
151 let current: R = curr.into();
152 let scaled_current = current / idx_inc_1;
153
154 match acc {
155 Some(acc) => Some(acc * (idx / idx_inc_1) + scaled_current),
156 None => Some(scaled_current),
157 }
158 })
159}
160
161#[cfg(test)]
162mod tests {
163 use ndarray::{arr1, arr2, Array1};
164
165 use crate::iterative_mean;
166
167 #[test]
168 fn test_iterative_mean_2darray() {
169 let arr = arr2(&[[0., 1., 2.], [1., 2., 3.], [2., 3., 4.]]);
170
171 let mean: Option<Array1<f64>> =
172 iterative_mean(arr.rows().into_iter().map(|row| row.to_owned()));
173
174 assert!(mean.is_some_and(|m| m.relative_eq(&arr1(&[1.0, 2.0, 3.0]), 1e-4, 1e-2)));
175 }
176
177 #[test]
178 fn test_iterative_mean_vec() {
179 let arr: Vec<f64> = vec![0., 1., 2., 3., 4.];
180
181 let mean = iterative_mean(arr);
182
183 assert_eq!(mean, Some(2.0));
184 }
185}