scouter_drift/
utils.rs

1use crate::error::DriftError;
2use ndarray::{Array, Array2};
3use rayon::iter::IndexedParallelIterator;
4use rayon::iter::IntoParallelRefIterator;
5use rayon::iter::ParallelIterator;
6use scouter_types::FeatureMap;
7use std::collections::BTreeSet;
8use std::collections::HashMap;
9
10pub trait CategoricalFeatureHelpers {
11    // creates a feature map from a 2D array
12    //
13    // # Arguments
14    //
15    // * `features` - A vector of feature names
16    // * `array` - A 2D array of string values
17    //
18    // # Returns
19    //
20    // A feature map
21    fn create_feature_map(
22        &self,
23        features: &[String],
24        array: &[Vec<String>],
25    ) -> Result<FeatureMap, DriftError> {
26        // check if features and array are the same length
27        if features.len() != array.len() {
28            return Err(DriftError::FeatureLengthError);
29        };
30
31        let feature_map = array
32            .par_iter()
33            .enumerate()
34            .map(|(i, col)| {
35                let unique = col
36                    .iter()
37                    .collect::<BTreeSet<_>>()
38                    .into_iter()
39                    .collect::<Vec<_>>();
40                let mut map = HashMap::new();
41                for (j, item) in unique.iter().enumerate() {
42                    map.insert(item.to_string(), j);
43
44                    // check if j is last index
45                    if j == unique.len() - 1 {
46                        // insert missing value
47                        map.insert("missing".to_string(), j + 1);
48                    }
49                }
50
51                (features[i].to_string(), map)
52            })
53            .collect::<HashMap<_, _>>();
54
55        Ok(FeatureMap {
56            features: feature_map,
57        })
58    }
59
60    fn convert_strings_to_ndarray_f32(
61        &self,
62        features: &Vec<String>,
63        array: &[Vec<String>],
64        feature_map: &FeatureMap,
65    ) -> Result<Array2<f32>, DriftError>
66where {
67        // check if features in feature_map.features.keys(). If any feature is not found, return error
68        let features_not_exist = features
69            .iter()
70            .map(|x| feature_map.features.contains_key(x))
71            .position(|x| !x);
72
73        if features_not_exist.is_some() {
74            return Err(DriftError::FeatureNotExistError);
75        }
76
77        let data = features
78            .par_iter()
79            .enumerate()
80            .map(|(i, feature)| {
81                let map = feature_map.features.get(feature).unwrap();
82
83                // attempt to set feature. If not found, set to missing
84                let col = array[i]
85                    .iter()
86                    .map(|x| *map.get(x).unwrap_or(map.get("missing").unwrap()) as f32)
87                    .collect::<Vec<_>>();
88
89                col
90            })
91            .collect::<Vec<_>>();
92
93        let data = Array::from_shape_vec((features.len(), array[0].len()), data.concat())
94            .map_err(DriftError::ShapeError)?;
95
96        Ok(data.t().to_owned())
97    }
98
99    fn convert_strings_to_ndarray_f64(
100        &self,
101        features: &Vec<String>,
102        array: &[Vec<String>],
103        feature_map: &FeatureMap,
104    ) -> Result<Array2<f64>, DriftError>
105where {
106        // check if features in feature_map.features.keys(). If any feature is not found, return error
107        let features_not_exist = features
108            .iter()
109            .map(|x| feature_map.features.contains_key(x))
110            .position(|x| !x);
111
112        if features_not_exist.is_some() {
113            return Err(DriftError::FeatureNotExistError);
114        }
115        let data = features
116            .par_iter()
117            .enumerate()
118            .map(|(i, feature)| {
119                let map = feature_map.features.get(feature).unwrap();
120
121                // attempt to set feature. If not found, set to missing
122                let col = array[i]
123                    .iter()
124                    .map(|x| *map.get(x).unwrap_or(map.get("missing").unwrap()) as f64)
125                    .collect::<Vec<_>>();
126                col
127            })
128            .collect::<Vec<_>>();
129
130        let data = Array::from_shape_vec((features.len(), array[0].len()), data.concat())
131            .map_err(DriftError::ShapeError)?;
132
133        Ok(data.t().to_owned())
134    }
135}
136
137#[cfg(test)]
138mod tests {
139
140    use crate::utils::CategoricalFeatureHelpers;
141
142    pub struct TestStruct;
143    impl CategoricalFeatureHelpers for TestStruct {}
144
145    #[test]
146    fn test_create_feature_map_base() {
147        let string_vec = vec![
148            vec![
149                "a".to_string(),
150                "b".to_string(),
151                "c".to_string(),
152                "d".to_string(),
153                "e".to_string(),
154            ],
155            vec![
156                "hello".to_string(),
157                "blah".to_string(),
158                "c".to_string(),
159                "d".to_string(),
160                "e".to_string(),
161                "hello".to_string(),
162                "blah".to_string(),
163                "c".to_string(),
164                "d".to_string(),
165                "e".to_string(),
166            ],
167        ];
168
169        let string_features = vec!["feature_1".to_string(), "feature_2".to_string()];
170
171        let feature_map = TestStruct
172            .create_feature_map(&string_features, &string_vec)
173            .unwrap();
174
175        assert_eq!(feature_map.features.len(), 2);
176        assert_eq!(feature_map.features.get("feature_2").unwrap().len(), 6);
177    }
178
179    #[test]
180    fn test_create_array_from_string() {
181        let string_vec = vec![
182            vec![
183                "a".to_string(),
184                "b".to_string(),
185                "c".to_string(),
186                "d".to_string(),
187                "e".to_string(),
188            ],
189            vec![
190                "a".to_string(),
191                "a".to_string(),
192                "a".to_string(),
193                "b".to_string(),
194                "b".to_string(),
195            ],
196        ];
197
198        let string_features = vec!["feature_1".to_string(), "feature_2".to_string()];
199
200        let feature_map = TestStruct
201            .create_feature_map(&string_features, &string_vec)
202            .unwrap();
203
204        assert_eq!(feature_map.features.len(), 2);
205
206        let f32_array = TestStruct
207            .convert_strings_to_ndarray_f32(&string_features, &string_vec, &feature_map)
208            .unwrap();
209
210        assert_eq!(f32_array.shape(), &[5, 2]);
211
212        let f64_array = TestStruct
213            .convert_strings_to_ndarray_f64(&string_features, &string_vec, &feature_map)
214            .unwrap();
215
216        assert_eq!(f64_array.shape(), &[5, 2]);
217    }
218}