abd_clam/chaoda/
metaml.rs

1//! A trait to be implemented by machine learning regressors.
2
3use automl::IntoSupervisedData;
4use smartcore::linalg::naive::dense_matrix::DenseMatrix;
5use std::path::Path;
6
7/// Trait to represent types that can be used as a meta-ML model
8pub trait MetaMLModel {
9    /// Train the model on the given features and targets.
10    ///
11    /// This function is used to train the model using the provided dataset, consisting of features and targets.
12    ///
13    /// # Arguments
14    ///
15    /// * `dataset`: A `MetaMLDataset` containing the features and targets used for training.
16    ///
17    /// # Panics
18    ///
19    /// This function may panic in the following situations:
20    ///
21    /// * If the number of columns in the features data isn't 6.
22    /// * If the number of rows in the features data doesn't match the number of elements in the targets data.
23    ///
24    fn train(&mut self, dataset: MetaMLDataset);
25
26    /// Makes a prediction given a trained model and 6 feature values.
27    ///
28    /// This function is used to make a prediction using the provided feature values and a previously
29    /// trained model.
30    ///
31    /// # Arguments
32    ///
33    /// * `features`: A reference to an array of 6 feature values.
34    ///
35    /// # Panics
36    ///
37    /// This function may panic in the following situation:
38    ///
39    /// * If the model hasn't been trained.
40    ///
41    /// # Errors
42    ///
43    /// Will throw error if model is not saved
44    ///
45    fn predict(&self, features: &[f32; 6]) -> Result<f32, String>;
46
47    /// Loads a trained meta-ml model from disk.
48    ///
49    /// This function is used to load a previously trained meta-ml model from the specified file path.
50    ///
51    /// # Arguments
52    ///
53    /// * `path`: A reference to the file path where the model is stored.
54    ///
55    /// # Errors
56    ///
57    /// This function can return errors in the following cases:
58    ///
59    /// * If the serialized model cannot be read from the input file path.
60    /// * If the trained model cannot be deserialized.
61    ///
62    /// # Returns
63    ///
64    /// If successful, this function returns the loaded meta-ml model.
65    ///
66    fn load(path: &Path) -> Result<Self, String>
67    where
68        Self: Sized;
69
70    /// Saves a trained meta-ml model to disk.
71    ///
72    /// # Arguments
73    ///
74    /// * `path`: A reference to the file path where the model will be saved.
75    ///
76    /// # Returns
77    ///
78    /// Returns `Result<(), String>` where `Ok(())` indicates success, and `Err` contains an error message.
79    ///
80    /// # Errors
81    /// * If the model hasn't been trained.
82    /// * If the trained model cannot be serialized.
83    /// * If the serialized model cannot be written to the output file path.
84    ///
85    fn save(&self, path: &Path) -> Result<(), String>;
86}
87
88/// Represents the training data for a `MetaML` model
89///
90/// # Invariants:
91/// * The number of columns in the features data is 6
92/// * The number of rows in the features data is equal to the number of rows in the target data
93/// * The data at row `i` of the features data corresponds to the data at row `i` of the targets data
94pub struct MetaMLDataset {
95    /// Features data for training the `MetaML` model.
96    features: DenseMatrix<f32>,
97    /// Target values for the corresponding features data.
98    targets: Vec<f32>,
99}
100
101impl MetaMLDataset {
102    /// Creates a dataset for training a meta-ml model from a set of feature values and their corresponding target values.
103    ///
104    /// # Arguments
105    ///
106    /// * `_features`: A slice of arrays representing feature values, where each array has 6 elements.
107    /// * `_targets`: A slice of target values.
108    ///
109    /// # Returns
110    ///
111    /// Returns a `Result<Self, String>` where `Ok(Self)` indicates success, and `Err` contains an error message.
112    ///
113    /// # Errors
114    /// * If the number of columns in the features data isn't 6.
115    /// * If the number of rows in the features data doesn't match the number of elements in the targets data.
116    ///
117    pub fn new(_features: &[[f32; 6]], _targets: &[f32]) -> Result<Self, String> {
118        todo!()
119        // TODO: better error checking once the rust branch is merged into master
120        // if features.len() == targets.len() {
121        //     Err("Different number of features and targets in input data".to_string())
122        // } else {
123        //     let features = DenseMatrix::from_2d_vec(&features.iter().map(|f| f.to_vec()).collect::<Vec<_>>());
124        //     let targets = targets.to_vec();
125        //     Ok(Self { features, targets })
126        // }
127    }
128
129    /// Creates a dataset for training a meta-ml model from input data on disk.
130    ///
131    /// # Arguments
132    ///
133    /// * `_features_file_path`: Path to the file containing feature data.
134    /// * `_targets_file_path`: Path to the file containing target data.
135    ///
136    /// # Returns
137    ///
138    /// Returns a `Result<Self, String>` where `Ok(Self)` indicates success, and `Err` contains an error message.
139    ///
140    /// # Errors
141    /// * If either of the given paths can't be converted to a string.
142    /// * If either of the given files can't be found, opened, or parsed as `f32`s.
143    /// * If the data contained within the features file isn't two-dimensional.
144    /// * If the data contained within the targets file isn't one-dimensional.
145    /// * If the number of columns in the features data isn't 6.
146    /// * If the number of rows in the features data doesn't match the number of elements in the targets data.
147    ///
148    pub fn from_npy(_features_file_path: &Path, _targets_file_path: &Path) -> Result<Self, String> {
149        todo!()
150
151        // let features_f64: Array2<f64> = read_npy(
152        //     features_file_path
153        //         .to_str()
154        //         .ok_or_else(|| "failed to convert PathBuf to string".to_string())?,
155        // )
156        // .map_err(|_| "failed to read the features data file".to_string())?;
157        // let targets_f64: Array1<f64> = read_npy(
158        //     targets_file_path
159        //         .to_str()
160        //         .ok_or_else(|| "failed to convert PathBuf to string".to_string())?,
161        // )
162        // .map_err(|_| "failed to read the outputs data file".to_string())?;
163        //
164        // // Ensure the input data has the correct shape
165        // if features_f64.ncols() != 6 {
166        //     return Err(format!(
167        //         "Input features had {} columns (expected 6)",
168        //         features_f64.ncols()
169        //     ));
170        // }
171        // if features_f64.nrows() != targets_f64.len() {
172        //     return Err(format!(
173        //         "Input features had {} data points, but targets had {}",
174        //         features_f64.nrows(),
175        //         targets_f64.len(),
176        //     ));
177        // }
178        //
179        // // Transform the training data from f64 to f32 (we are given f64, but automl uses f32s)
180        // let features: Array2<f32> = features_f64.map(|x| *x as f32);
181        // let targets: Array1<f32> = targets_f64.map(|x| *x as f32);
182        //
183        // // Transform the training data to vectors. This won't fail, we checked
184        // // that the data has the correct shape earlier
185        // let features: Vec<[f32; 6]> = features
186        //     .rows()
187        //     .into_iter()
188        //     .map(|row| row.into_iter().copied().collect::<Vec<_>>().try_into().unwrap())
189        //     .collect();
190        // let targets: Vec<f32> = targets.to_vec();
191        //
192        // Self::new(&features, &targets)
193    }
194}
195
196impl IntoSupervisedData for MetaMLDataset {
197    /// Converts the current dataset into a tuple containing feature data and target data.
198    ///
199    /// This function transforms the dataset into a format suitable for supervised learning, returning
200    /// a tuple where the first element is a two-dimensional feature matrix, and the second element is
201    /// a one-dimensional target vector.
202    ///
203    /// # Returns
204    ///
205    /// A tuple containing feature data represented as a `DenseMatrix<f32>` and target data represented
206    /// as a `Vec<f32>`.
207    fn to_supervised_data(self) -> (DenseMatrix<f32>, Vec<f32>) {
208        (self.features, self.targets)
209    }
210}