abd_clam/chaoda/metaml.rs
1//! A trait to be implemented by machine learning regressors.
2
3use automl::IntoSupervisedData;
4use smartcore::linalg::naive::dense_matrix::DenseMatrix;
5use std::path::Path;
6
7/// Trait to represent types that can be used as a meta-ML model
8pub trait MetaMLModel {
9 /// Train the model on the given features and targets.
10 ///
11 /// This function is used to train the model using the provided dataset, consisting of features and targets.
12 ///
13 /// # Arguments
14 ///
15 /// * `dataset`: A `MetaMLDataset` containing the features and targets used for training.
16 ///
17 /// # Panics
18 ///
19 /// This function may panic in the following situations:
20 ///
21 /// * If the number of columns in the features data isn't 6.
22 /// * If the number of rows in the features data doesn't match the number of elements in the targets data.
23 ///
24 fn train(&mut self, dataset: MetaMLDataset);
25
26 /// Makes a prediction given a trained model and 6 feature values.
27 ///
28 /// This function is used to make a prediction using the provided feature values and a previously
29 /// trained model.
30 ///
31 /// # Arguments
32 ///
33 /// * `features`: A reference to an array of 6 feature values.
34 ///
35 /// # Panics
36 ///
37 /// This function may panic in the following situation:
38 ///
39 /// * If the model hasn't been trained.
40 ///
41 /// # Errors
42 ///
43 /// Will throw error if model is not saved
44 ///
45 fn predict(&self, features: &[f32; 6]) -> Result<f32, String>;
46
47 /// Loads a trained meta-ml model from disk.
48 ///
49 /// This function is used to load a previously trained meta-ml model from the specified file path.
50 ///
51 /// # Arguments
52 ///
53 /// * `path`: A reference to the file path where the model is stored.
54 ///
55 /// # Errors
56 ///
57 /// This function can return errors in the following cases:
58 ///
59 /// * If the serialized model cannot be read from the input file path.
60 /// * If the trained model cannot be deserialized.
61 ///
62 /// # Returns
63 ///
64 /// If successful, this function returns the loaded meta-ml model.
65 ///
66 fn load(path: &Path) -> Result<Self, String>
67 where
68 Self: Sized;
69
70 /// Saves a trained meta-ml model to disk.
71 ///
72 /// # Arguments
73 ///
74 /// * `path`: A reference to the file path where the model will be saved.
75 ///
76 /// # Returns
77 ///
78 /// Returns `Result<(), String>` where `Ok(())` indicates success, and `Err` contains an error message.
79 ///
80 /// # Errors
81 /// * If the model hasn't been trained.
82 /// * If the trained model cannot be serialized.
83 /// * If the serialized model cannot be written to the output file path.
84 ///
85 fn save(&self, path: &Path) -> Result<(), String>;
86}
87
88/// Represents the training data for a `MetaML` model
89///
90/// # Invariants:
91/// * The number of columns in the features data is 6
92/// * The number of rows in the features data is equal to the number of rows in the target data
93/// * The data at row `i` of the features data corresponds to the data at row `i` of the targets data
94pub struct MetaMLDataset {
95 /// Features data for training the `MetaML` model.
96 features: DenseMatrix<f32>,
97 /// Target values for the corresponding features data.
98 targets: Vec<f32>,
99}
100
101impl MetaMLDataset {
102 /// Creates a dataset for training a meta-ml model from a set of feature values and their corresponding target values.
103 ///
104 /// # Arguments
105 ///
106 /// * `_features`: A slice of arrays representing feature values, where each array has 6 elements.
107 /// * `_targets`: A slice of target values.
108 ///
109 /// # Returns
110 ///
111 /// Returns a `Result<Self, String>` where `Ok(Self)` indicates success, and `Err` contains an error message.
112 ///
113 /// # Errors
114 /// * If the number of columns in the features data isn't 6.
115 /// * If the number of rows in the features data doesn't match the number of elements in the targets data.
116 ///
117 pub fn new(_features: &[[f32; 6]], _targets: &[f32]) -> Result<Self, String> {
118 todo!()
119 // TODO: better error checking once the rust branch is merged into master
120 // if features.len() == targets.len() {
121 // Err("Different number of features and targets in input data".to_string())
122 // } else {
123 // let features = DenseMatrix::from_2d_vec(&features.iter().map(|f| f.to_vec()).collect::<Vec<_>>());
124 // let targets = targets.to_vec();
125 // Ok(Self { features, targets })
126 // }
127 }
128
129 /// Creates a dataset for training a meta-ml model from input data on disk.
130 ///
131 /// # Arguments
132 ///
133 /// * `_features_file_path`: Path to the file containing feature data.
134 /// * `_targets_file_path`: Path to the file containing target data.
135 ///
136 /// # Returns
137 ///
138 /// Returns a `Result<Self, String>` where `Ok(Self)` indicates success, and `Err` contains an error message.
139 ///
140 /// # Errors
141 /// * If either of the given paths can't be converted to a string.
142 /// * If either of the given files can't be found, opened, or parsed as `f32`s.
143 /// * If the data contained within the features file isn't two-dimensional.
144 /// * If the data contained within the targets file isn't one-dimensional.
145 /// * If the number of columns in the features data isn't 6.
146 /// * If the number of rows in the features data doesn't match the number of elements in the targets data.
147 ///
148 pub fn from_npy(_features_file_path: &Path, _targets_file_path: &Path) -> Result<Self, String> {
149 todo!()
150
151 // let features_f64: Array2<f64> = read_npy(
152 // features_file_path
153 // .to_str()
154 // .ok_or_else(|| "failed to convert PathBuf to string".to_string())?,
155 // )
156 // .map_err(|_| "failed to read the features data file".to_string())?;
157 // let targets_f64: Array1<f64> = read_npy(
158 // targets_file_path
159 // .to_str()
160 // .ok_or_else(|| "failed to convert PathBuf to string".to_string())?,
161 // )
162 // .map_err(|_| "failed to read the outputs data file".to_string())?;
163 //
164 // // Ensure the input data has the correct shape
165 // if features_f64.ncols() != 6 {
166 // return Err(format!(
167 // "Input features had {} columns (expected 6)",
168 // features_f64.ncols()
169 // ));
170 // }
171 // if features_f64.nrows() != targets_f64.len() {
172 // return Err(format!(
173 // "Input features had {} data points, but targets had {}",
174 // features_f64.nrows(),
175 // targets_f64.len(),
176 // ));
177 // }
178 //
179 // // Transform the training data from f64 to f32 (we are given f64, but automl uses f32s)
180 // let features: Array2<f32> = features_f64.map(|x| *x as f32);
181 // let targets: Array1<f32> = targets_f64.map(|x| *x as f32);
182 //
183 // // Transform the training data to vectors. This won't fail, we checked
184 // // that the data has the correct shape earlier
185 // let features: Vec<[f32; 6]> = features
186 // .rows()
187 // .into_iter()
188 // .map(|row| row.into_iter().copied().collect::<Vec<_>>().try_into().unwrap())
189 // .collect();
190 // let targets: Vec<f32> = targets.to_vec();
191 //
192 // Self::new(&features, &targets)
193 }
194}
195
196impl IntoSupervisedData for MetaMLDataset {
197 /// Converts the current dataset into a tuple containing feature data and target data.
198 ///
199 /// This function transforms the dataset into a format suitable for supervised learning, returning
200 /// a tuple where the first element is a two-dimensional feature matrix, and the second element is
201 /// a one-dimensional target vector.
202 ///
203 /// # Returns
204 ///
205 /// A tuple containing feature data represented as a `DenseMatrix<f32>` and target data represented
206 /// as a `Vec<f32>`.
207 fn to_supervised_data(self) -> (DenseMatrix<f32>, Vec<f32>) {
208 (self.features, self.targets)
209 }
210}