rs_ml/lib.rs
1//! rs-ml is a simple ML framework for the Rust language. it includes train test splitting,
2//! scalers, and a guassian naive bayes model. It also includes traits to add more transfomers and
3//! models to the framework.
4//!
5//! # Usage
6//!
7//! This library requires a compute backend to perform matrix operations. Compute backends are
8//! exposed with provided feature flags. Refer to the
9//! [ndarray_linalg](https://github.com/rust-ndarray/ndarray-linalg?tab=readme-ov-file#backend-features)
10//! docs for more information.
11#![deny(
12 missing_docs,
13 unsafe_code,
14 missing_debug_implementations,
15 missing_copy_implementations,
16 clippy::missing_panics_doc
17)]
18
19use ndarray::{Array, Axis, Dimension, RemoveAxis};
20use rand::{rng, Rng};
21
22pub mod classification;
23pub mod metrics;
24pub mod regression;
25pub mod transformer;
26
27/// Trait for fitting classification and regression models, and transformers.
28///
29/// The struct on which this trait is implemented holds and validates the hyperparameters necessary
30/// to fit the estimator to the desired output. For example, a classification model may take as
31/// input a tuple with features and labels:
32/// ```
33/// use ndarray::{Array1, Array2};
34/// use rs_ml::Estimator;
35///
36/// struct ModelParameters {
37/// // Hyperparameters required to fit the model
38/// learning_rate: f64
39/// }
40///
41/// struct Model {
42/// // Internal state of model required to predict features
43/// means: Array2<f64>
44/// };
45///
46/// impl Estimator<(Array2<f64>, Array1<String>)> for ModelParameters {
47/// type Estimator = Model;
48///
49/// fn fit(&self, input: &(Array2<f64>, Array1<String>)) -> Option<Self::Estimator> {
50/// let (features, labels) = input;
51///
52/// // logic to fit the model
53/// Some(Model {
54/// means: Array2::zeros((1, 1))
55/// })
56/// }
57/// }
58/// ```
59pub trait Estimator<Input> {
60 /// Output model or transformer fitted to input data.
61 type Estimator;
62
63 /// Fit model or transformer based on given inputs, or None if the estimator was not able to
64 /// fit to the input data as expected.
65 fn fit(&self, input: &Input) -> Option<Self::Estimator>;
66}
67
68/// Train test split result. returns in order training features, testing features, training labels,
69/// testing labels.
70#[derive(Debug, Clone)]
71pub struct TrainTestSplitResult<Feature, Label, D: Dimension, D2: Dimension>(
72 pub Array<Feature, D>,
73 pub Array<Feature, D>,
74 pub Array<Label, D2>,
75 pub Array<Label, D2>,
76);
77
78/// Split data and features into training and testing set. `test_size` must be between 0 and 1.
79///
80/// # Panics
81///
82/// Panics if `test_size` is outside range 0..=1.
83///
84/// Example:
85/// ```
86/// use rs_ml::{train_test_split, TrainTestSplitResult};
87/// use ndarray::{arr1, arr2};
88///
89/// let features = arr2(&[
90/// [1., 0.],
91/// [0., 1.],
92/// [0., 0.],
93/// [1., 1.]]);
94///
95/// let labels = arr1(&[1, 1, 0, 0]);
96///
97/// let TrainTestSplitResult(train_features, test_features, train_labels, test_labels) = train_test_split(&features,
98/// &labels, 0.25);
99/// ```
100pub fn train_test_split<
101 D: Dimension + RemoveAxis,
102 D2: Dimension + RemoveAxis,
103 Feature: Clone,
104 Label: Clone,
105>(
106 arr: &Array<Feature, D>,
107 y: &Array<Label, D2>,
108 test_size: f64,
109) -> TrainTestSplitResult<Feature, Label, D, D2> {
110 let rows = arr.shape()[0];
111
112 let (test, train): (Vec<usize>, Vec<usize>) =
113 (0..rows).partition(|_| rng().random_bool(test_size));
114
115 TrainTestSplitResult(
116 arr.select(Axis(0), &train),
117 arr.select(Axis(0), &test),
118 y.select(Axis(0), &train),
119 y.select(Axis(0), &test),
120 )
121}