Skip to main content

digifi/utilities/
feature_collection.rs

1use std::borrow::Borrow;
2use ndarray::{Array1, Array2};
3use crate::error::{DigiFiError, ErrorTitle};
4
5
6#[derive(Debug, Default)]
7/// Collection of features that are organized for modelling.
8/// 
9/// # Examples
10/// 
11/// 1. Working with feature collection
12/// 
13/// ```rust
14/// use ndarray::{Array1, Array2, array};
15/// use digifi::utilities::FeatureCollection;
16/// 
17/// // Features
18/// let x_1: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
19/// let x_2: Vec<f64> = vec![6.0, 5.0, 4.0, 3.0, 2.0, 1.0];
20/// let x_3: Array1<f64> = Array1::from_vec(vec![9.0, 8.0, 7.0, 6.0, 5.0, 4.0]);
21/// let x_4: Array1<f64> = Array1::from_vec(vec![4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
22/// 
23/// // Create feature collection with iterators from different types of data structures
24/// let mut fc: FeatureCollection = FeatureCollection::new();
25/// fc.add_feature(x_1.iter(), "x_1").unwrap();
26/// assert_eq!(fc.len(), 1);
27/// assert_eq!(fc.feature_size(), Some(6));
28/// fc.add_feature(x_2.into_iter(), "x_2").unwrap();
29/// assert_eq!(fc.len(), 2);
30/// assert_eq!(fc.feature_size(), Some(6));
31/// fc.add_feature(x_3.iter(), "x_3").unwrap();
32/// assert_eq!(fc.len(), 3);
33/// assert_eq!(fc.feature_size(), Some(6));
34/// fc.add_feature(x_4.into_iter(), "x_4").unwrap();
35/// assert_eq!(fc.len(), 4);
36/// assert_eq!(fc.feature_size(), Some(6));
37/// 
38/// // Get features from the collection
39/// assert_eq!(fc.get_feature_array("x_3").unwrap(), &x_3);
40/// let matrix: Array2<f64> = array![
41///     [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
42///     [6.0, 5.0, 4.0, 3.0, 2.0, 1.0],
43///     [9.0, 8.0, 7.0, 6.0, 5.0, 4.0],
44///     [4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
45/// ].reversed_axes();
46/// assert_eq!(fc.get_matrix().unwrap(), matrix);
47/// 
48/// // Remove feature
49/// fc.remove_feature("x_1").unwrap();
50/// assert_eq!(fc.len(), 3);
51/// assert_eq!(fc.feature_size(), Some(6));
52/// ```
53/// 
54/// 2. Using feature collection for modelling
55/// 
56/// ```rust
57/// use ndarray::{Array1, Array2, array};
58/// use digifi::utilities::{TEST_ACCURACY, FeatureCollection};
59/// use digifi::statistics::linear_regression;
60/// 
61/// let mut fc: FeatureCollection = FeatureCollection::new();
62/// fc.add_constant = true;
63/// fc.add_feature(vec![1.0, 4.0, 6.0].into_iter(), "x_1").unwrap();
64/// fc.add_feature(vec![3.0, 4.0, 5.0].into_iter(), "x_2").unwrap();
65/// let y: Array1<f64> = array![1.0, 2.0, 3.0];
66/// let params: Array1<f64> = linear_regression(&fc.into_matrix().unwrap(), &y).unwrap();
67/// 
68/// // The results were found using LinearRegression from sklearn
69/// let results: Array1<f64> = Array1::from(vec![-2.49556592e-16, 1.0, -2.0]);
70/// assert!((&params - &results).map(|v| v.abs() ).sum() < TEST_ACCURACY);
71/// ```
72pub struct FeatureCollection {
73    pub features: Vec<Array1<f64>>,
74    pub feature_names: Vec<String>,
75    feature_size: Option<usize>,
76    pub add_constant: bool,
77}
78
79impl FeatureCollection {
80    /// Creates a new instance  of `FeatureCollection`.
81    pub fn new() -> Self {
82        Self::default()
83    }
84
85    fn validate_feature<T, I>(&self, feature: &T, feature_name: &str) -> Result<(), DigiFiError>
86    where
87        T: Iterator<Item = I> + ExactSizeIterator,
88        I: Borrow<f64>,
89    {
90        if self.feature_names.contains(&feature_name.to_owned()) {
91            return Err(DigiFiError::Other { title: Self::error_title(), details: format!("Feature `{}` already exists in the collection.", feature_name), });
92        }
93        if let Some(feature_size) =  self.feature_size {
94            if feature.len() != feature_size {
95                return Err(DigiFiError::WrongLength { title: Self::error_title(), arg: feature_name.to_owned(), len: feature_size, });
96            }
97        }
98        Ok(())
99    }
100
101    /// Checks that the feature exists in the collection.
102    fn feature_exists(&self, feature_name: &str) -> Result<(), DigiFiError> {
103        if !self.feature_names.contains(&feature_name.to_owned()) {
104            return Err(DigiFiError::Other { title: Self::error_title(), details: format!("Feature `{}` doesn't exists in the collection.", feature_name), });
105        }
106        Ok(())
107    }
108
109    /// Returns the number of features in the collection.
110    pub fn len(&self) -> usize {
111        self.feature_names.len()
112    }
113
114    /// Returns the length of a feature in the collection.
115    /// 
116    /// Note: This length will be the same for every feature.
117    pub fn feature_size(&self) -> Option<usize> {
118        self.feature_size
119    }
120
121    /// Returns the index of the feature in the collection.
122    pub fn get_feature_index(&self, feature_name: &str) -> Result<usize, DigiFiError> {
123        self.feature_names.iter().position(|v| v == feature_name )
124            .ok_or(DigiFiError::NotFound { title: Self::error_title(), data: feature_name.to_owned(), })
125    }
126
127    /// Adds the feature to the collection.
128    pub fn add_feature<T, I>(&mut self, feature: T, feature_name: &str) -> Result<(), DigiFiError>
129    where
130        T: Iterator<Item = I> + ExactSizeIterator,
131        I: Borrow<f64>,
132    {
133        let feature_name: String = String::from(feature_name);
134        self.validate_feature(&feature, &feature_name)?;
135        let feature_len: usize = feature.len();
136        // Update feature information
137        self.features.push(feature.map(|v| *v.borrow() ).collect());
138        self.feature_names.push(feature_name);
139        // Update other metadata
140        if let None = self.feature_size {
141            self.feature_size = Some(feature_len);
142        }
143        Ok(())
144    }
145
146    /// Removes the feature from the collection.
147    pub fn remove_feature(&mut self, feature_name: &str) -> Result<(), DigiFiError> {
148        let feature_name: String = String::from(feature_name);
149        self.feature_exists(&feature_name)?;
150        let index: usize = self.get_feature_index(&feature_name)?;
151        // Update feature information
152        self.features.remove(index);
153        self.feature_names.remove(index);
154        // Update other metadata
155        if self.feature_names.is_empty() {
156            self.feature_size = None;
157        }
158        Ok(())
159    }
160
161    /// Returns a feature as an `Array1`
162    pub fn get_feature_array(&self, feature_name: &str) -> Result<&Array1<f64>, DigiFiError> {
163        let feature_name: String = String::from(feature_name);
164        self.feature_exists(&feature_name)?;
165        let index: usize = self.get_feature_index(&feature_name)?;
166        Ok(&self.features[index])
167    }
168
169    /// Returns a matrix that is composed of the features from the collection.
170    pub fn get_matrix(&self) -> Result<Array2<f64>, DigiFiError> {
171        let feature_size: usize = self.feature_size
172            .ok_or(DigiFiError::Other { title: Self::error_title(), details: "No features are present in the collection.".to_owned(), })?;
173        let (mut shape, mut x_matrix) = (
174            (self.len(), feature_size),
175            self.features.iter().fold(vec![], |mut prev, curr| { prev.append(&mut curr.to_vec()); prev } )
176        );
177        if self.add_constant {
178            shape.0 += 1;
179            x_matrix.append(&mut vec![1.0; feature_size]);
180        }
181        Ok(Array2::from_shape_vec(shape, x_matrix)?.reversed_axes())
182    }
183
184    pub fn into_matrix(self) -> Result<Array2<f64>, DigiFiError> {
185        self.get_matrix()
186    }
187
188    /// Returns delta degrees of freedom.
189    pub fn ddof(&self) -> usize {
190        match self.add_constant {
191            true => self.len() + 1,
192            false => self.len(),
193        }
194    }
195}
196
197impl ErrorTitle for FeatureCollection {
198    fn error_title() -> String {
199        String::from("Feature Collection")
200    }
201}
202
203
204#[cfg(test)]
205mod tests {
206    use ndarray::{Array1, Array2, array};
207    use crate::utilities::{TEST_ACCURACY, feature_collection::FeatureCollection};
208
209    #[test]
210    fn unit_test_feature_collection() -> () {
211        // Features
212        let x_1: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
213        let x_2: Vec<f64> = vec![6.0, 5.0, 4.0, 3.0, 2.0, 1.0];
214        let x_3: Array1<f64> = Array1::from_vec(vec![9.0, 8.0, 7.0, 6.0, 5.0, 4.0]);
215        let x_4: Array1<f64> = Array1::from_vec(vec![4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
216        // Create feature collection with iterators from different types of data structures
217        let mut fc: FeatureCollection = FeatureCollection::new();
218        fc.add_feature(x_1.iter(), "x_1").unwrap();
219        assert_eq!(fc.len(), 1);
220        assert_eq!(fc.feature_size(), Some(6));
221        fc.add_feature(x_2.into_iter(), "x_2").unwrap();
222        assert_eq!(fc.len(), 2);
223        assert_eq!(fc.feature_size(), Some(6));
224        fc.add_feature(x_3.iter(), "x_3").unwrap();
225        assert_eq!(fc.len(), 3);
226        assert_eq!(fc.feature_size(), Some(6));
227        fc.add_feature(x_4.into_iter(), "x_4").unwrap();
228        assert_eq!(fc.len(), 4);
229        assert_eq!(fc.feature_size(), Some(6));
230        // Get features from the collection
231        assert_eq!(fc.get_feature_array("x_3").unwrap(), &x_3);
232        let matrix: Array2<f64> = array![
233            [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
234            [6.0, 5.0, 4.0, 3.0, 2.0, 1.0],
235            [9.0, 8.0, 7.0, 6.0, 5.0, 4.0],
236            [4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
237        ].reversed_axes();
238        assert_eq!(fc.get_matrix().unwrap(), matrix);
239        // Remove feature
240        fc.remove_feature("x_1").unwrap();
241        assert_eq!(fc.len(), 3);
242        assert_eq!(fc.feature_size(), Some(6));
243    }
244
245    #[test]
246    fn unit_test_feature_collection_for_linear_regression() -> () {
247        use crate::statistics::linear_regression;
248        let mut fc: FeatureCollection = FeatureCollection::new();
249        fc.add_constant = true;
250        fc.add_feature(vec![1.0, 4.0, 6.0].into_iter(), "x_1").unwrap();
251        fc.add_feature(vec![3.0, 4.0, 5.0].into_iter(), "x_2").unwrap();
252        let y: Array1<f64> = array![1.0, 2.0, 3.0];
253        let params: Array1<f64> = linear_regression(&fc.into_matrix().unwrap(), &y).unwrap();
254        // The results were found using LinearRegression from sklearn
255        let results: Array1<f64> = Array1::from(vec![-2.49556592e-16, 1.0, -2.0]);
256        assert!((&params - &results).map(|v| v.abs() ).sum() < TEST_ACCURACY);
257    }
258}