rs_ml/
lib.rs

1//! rs-ml is a simple ML framework for the Rust language. it includes train test splitting,
2//! scalers, and a guassian naive bayes model. It also includes traits to add more transfomers and
3//! models to the framework.
4//!
5//! # Usage
6//!
7//! This library requires a compute backend to perform matrix operations. Compute backends are
8//! exposed with provided feature flags. Refer to the
9//! [ndarray_linalg](https://github.com/rust-ndarray/ndarray-linalg?tab=readme-ov-file#backend-features)
10//! docs for more information.
11#![deny(
12    missing_docs,
13    unsafe_code,
14    missing_debug_implementations,
15    missing_copy_implementations,
16    clippy::missing_panics_doc
17)]
18
19use ndarray::{Array, Axis, Dimension, RemoveAxis};
20use rand::{rng, Rng};
21
22pub mod classification;
23pub mod metrics;
24pub mod regression;
25pub mod transformer;
26
27/// Trait for fitting classification and regression models, and transformers.
28///
29/// The struct on which this trait is implemented holds and validates the hyperparameters necessary
30/// to fit the estimator to the desired output. For example, a classification model may take as
31/// input a tuple with features and labels:
32/// ```
33/// use ndarray::{Array1, Array2};
34/// use rs_ml::Estimator;
35///
36/// struct ModelParameters {
37///   // Hyperparameters required to fit the model
38///   learning_rate: f64
39/// }
40///
41/// struct Model {
42///     // Internal state of model required to predict features
43///     means: Array2<f64>
44/// };
45///
46/// impl Estimator<(Array2<f64>, Array1<String>)> for ModelParameters {
47///     type Estimator = Model;
48///
49///     fn fit(&self, input: &(Array2<f64>, Array1<String>)) -> Option<Self::Estimator> {
50///         let (features, labels) = input;
51///
52///         // logic to fit the model
53///         Some(Model {
54///             means: Array2::zeros((1, 1))
55///         })
56///     }
57/// }
58/// ```
59pub trait Estimator<Input> {
60    /// Output model or transformer fitted to input data.
61    type Estimator;
62
63    /// Fit model or transformer based on given inputs, or None if the estimator was not able to
64    /// fit to the input data as expected.
65    fn fit(&self, input: &Input) -> Option<Self::Estimator>;
66}
67
68/// Train test split result. returns in order training features, testing features, training labels,
69/// testing labels.
70#[derive(Debug, Clone)]
71pub struct TrainTestSplitResult<Feature, Label, D: Dimension, D2: Dimension>(
72    pub Array<Feature, D>,
73    pub Array<Feature, D>,
74    pub Array<Label, D2>,
75    pub Array<Label, D2>,
76);
77
78/// Split data and features into training and testing set. `test_size` must be between 0 and 1.
79///
80/// # Panics
81///
82/// Panics if `test_size` is outside range 0..=1.
83///
84/// Example:
85/// ```
86/// use rs_ml::{train_test_split, TrainTestSplitResult};
87/// use ndarray::{arr1, arr2};
88///
89/// let features = arr2(&[
90///   [1., 0.],
91///   [0., 1.],
92///   [0., 0.],
93///   [1., 1.]]);
94///
95/// let labels = arr1(&[1, 1, 0, 0]);
96///
97/// let TrainTestSplitResult(train_features, test_features, train_labels, test_labels) = train_test_split(&features,
98/// &labels, 0.25);
99/// ```
100pub fn train_test_split<
101    D: Dimension + RemoveAxis,
102    D2: Dimension + RemoveAxis,
103    Feature: Clone,
104    Label: Clone,
105>(
106    arr: &Array<Feature, D>,
107    y: &Array<Label, D2>,
108    test_size: f64,
109) -> TrainTestSplitResult<Feature, Label, D, D2> {
110    let rows = arr.shape()[0];
111
112    let (test, train): (Vec<usize>, Vec<usize>) =
113        (0..rows).partition(|_| rng().random_bool(test_size));
114
115    TrainTestSplitResult(
116        arr.select(Axis(0), &train),
117        arr.select(Axis(0), &test),
118        y.select(Axis(0), &train),
119        y.select(Axis(0), &test),
120    )
121}