1use std::borrow::Borrow;
2use ndarray::{Array1, Array2};
3use crate::error::{DigiFiError, ErrorTitle};
4
5
6#[derive(Debug, Default)]
7pub struct FeatureCollection {
73 pub features: Vec<Array1<f64>>,
74 pub feature_names: Vec<String>,
75 feature_size: Option<usize>,
76 pub add_constant: bool,
77}
78
79impl FeatureCollection {
80 pub fn new() -> Self {
82 Self::default()
83 }
84
85 fn validate_feature<T, I>(&self, feature: &T, feature_name: &str) -> Result<(), DigiFiError>
86 where
87 T: Iterator<Item = I> + ExactSizeIterator,
88 I: Borrow<f64>,
89 {
90 if self.feature_names.contains(&feature_name.to_owned()) {
91 return Err(DigiFiError::Other { title: Self::error_title(), details: format!("Feature `{}` already exists in the collection.", feature_name), });
92 }
93 if let Some(feature_size) = self.feature_size {
94 if feature.len() != feature_size {
95 return Err(DigiFiError::WrongLength { title: Self::error_title(), arg: feature_name.to_owned(), len: feature_size, });
96 }
97 }
98 Ok(())
99 }
100
101 fn feature_exists(&self, feature_name: &str) -> Result<(), DigiFiError> {
103 if !self.feature_names.contains(&feature_name.to_owned()) {
104 return Err(DigiFiError::Other { title: Self::error_title(), details: format!("Feature `{}` doesn't exists in the collection.", feature_name), });
105 }
106 Ok(())
107 }
108
109 pub fn len(&self) -> usize {
111 self.feature_names.len()
112 }
113
114 pub fn feature_size(&self) -> Option<usize> {
118 self.feature_size
119 }
120
121 pub fn get_feature_index(&self, feature_name: &str) -> Result<usize, DigiFiError> {
123 self.feature_names.iter().position(|v| v == feature_name )
124 .ok_or(DigiFiError::NotFound { title: Self::error_title(), data: feature_name.to_owned(), })
125 }
126
127 pub fn add_feature<T, I>(&mut self, feature: T, feature_name: &str) -> Result<(), DigiFiError>
129 where
130 T: Iterator<Item = I> + ExactSizeIterator,
131 I: Borrow<f64>,
132 {
133 let feature_name: String = String::from(feature_name);
134 self.validate_feature(&feature, &feature_name)?;
135 let feature_len: usize = feature.len();
136 self.features.push(feature.map(|v| *v.borrow() ).collect());
138 self.feature_names.push(feature_name);
139 if let None = self.feature_size {
141 self.feature_size = Some(feature_len);
142 }
143 Ok(())
144 }
145
146 pub fn remove_feature(&mut self, feature_name: &str) -> Result<(), DigiFiError> {
148 let feature_name: String = String::from(feature_name);
149 self.feature_exists(&feature_name)?;
150 let index: usize = self.get_feature_index(&feature_name)?;
151 self.features.remove(index);
153 self.feature_names.remove(index);
154 if self.feature_names.is_empty() {
156 self.feature_size = None;
157 }
158 Ok(())
159 }
160
161 pub fn get_feature_array(&self, feature_name: &str) -> Result<&Array1<f64>, DigiFiError> {
163 let feature_name: String = String::from(feature_name);
164 self.feature_exists(&feature_name)?;
165 let index: usize = self.get_feature_index(&feature_name)?;
166 Ok(&self.features[index])
167 }
168
169 pub fn get_matrix(&self) -> Result<Array2<f64>, DigiFiError> {
171 let feature_size: usize = self.feature_size
172 .ok_or(DigiFiError::Other { title: Self::error_title(), details: "No features are present in the collection.".to_owned(), })?;
173 let (mut shape, mut x_matrix) = (
174 (self.len(), feature_size),
175 self.features.iter().fold(vec![], |mut prev, curr| { prev.append(&mut curr.to_vec()); prev } )
176 );
177 if self.add_constant {
178 shape.0 += 1;
179 x_matrix.append(&mut vec![1.0; feature_size]);
180 }
181 Ok(Array2::from_shape_vec(shape, x_matrix)?.reversed_axes())
182 }
183
184 pub fn into_matrix(self) -> Result<Array2<f64>, DigiFiError> {
185 self.get_matrix()
186 }
187
188 pub fn ddof(&self) -> usize {
190 match self.add_constant {
191 true => self.len() + 1,
192 false => self.len(),
193 }
194 }
195}
196
197impl ErrorTitle for FeatureCollection {
198 fn error_title() -> String {
199 String::from("Feature Collection")
200 }
201}
202
203
204#[cfg(test)]
205mod tests {
206 use ndarray::{Array1, Array2, array};
207 use crate::utilities::{TEST_ACCURACY, feature_collection::FeatureCollection};
208
209 #[test]
210 fn unit_test_feature_collection() -> () {
211 let x_1: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
213 let x_2: Vec<f64> = vec![6.0, 5.0, 4.0, 3.0, 2.0, 1.0];
214 let x_3: Array1<f64> = Array1::from_vec(vec![9.0, 8.0, 7.0, 6.0, 5.0, 4.0]);
215 let x_4: Array1<f64> = Array1::from_vec(vec![4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
216 let mut fc: FeatureCollection = FeatureCollection::new();
218 fc.add_feature(x_1.iter(), "x_1").unwrap();
219 assert_eq!(fc.len(), 1);
220 assert_eq!(fc.feature_size(), Some(6));
221 fc.add_feature(x_2.into_iter(), "x_2").unwrap();
222 assert_eq!(fc.len(), 2);
223 assert_eq!(fc.feature_size(), Some(6));
224 fc.add_feature(x_3.iter(), "x_3").unwrap();
225 assert_eq!(fc.len(), 3);
226 assert_eq!(fc.feature_size(), Some(6));
227 fc.add_feature(x_4.into_iter(), "x_4").unwrap();
228 assert_eq!(fc.len(), 4);
229 assert_eq!(fc.feature_size(), Some(6));
230 assert_eq!(fc.get_feature_array("x_3").unwrap(), &x_3);
232 let matrix: Array2<f64> = array![
233 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
234 [6.0, 5.0, 4.0, 3.0, 2.0, 1.0],
235 [9.0, 8.0, 7.0, 6.0, 5.0, 4.0],
236 [4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
237 ].reversed_axes();
238 assert_eq!(fc.get_matrix().unwrap(), matrix);
239 fc.remove_feature("x_1").unwrap();
241 assert_eq!(fc.len(), 3);
242 assert_eq!(fc.feature_size(), Some(6));
243 }
244
245 #[test]
246 fn unit_test_feature_collection_for_linear_regression() -> () {
247 use crate::statistics::linear_regression;
248 let mut fc: FeatureCollection = FeatureCollection::new();
249 fc.add_constant = true;
250 fc.add_feature(vec![1.0, 4.0, 6.0].into_iter(), "x_1").unwrap();
251 fc.add_feature(vec![3.0, 4.0, 5.0].into_iter(), "x_2").unwrap();
252 let y: Array1<f64> = array![1.0, 2.0, 3.0];
253 let params: Array1<f64> = linear_regression(&fc.into_matrix().unwrap(), &y).unwrap();
254 let results: Array1<f64> = Array1::from(vec![-2.49556592e-16, 1.0, -2.0]);
256 assert!((¶ms - &results).map(|v| v.abs() ).sum() < TEST_ACCURACY);
257 }
258}