1use crate::error::DriftError;
2use ndarray::{Array, Array2};
3use rayon::iter::IndexedParallelIterator;
4use rayon::iter::IntoParallelRefIterator;
5use rayon::iter::ParallelIterator;
6use scouter_types::FeatureMap;
7use std::collections::BTreeSet;
8use std::collections::HashMap;
9
10pub trait CategoricalFeatureHelpers {
11 fn create_feature_map(
22 &self,
23 features: &[String],
24 array: &[Vec<String>],
25 ) -> Result<FeatureMap, DriftError> {
26 if features.len() != array.len() {
28 return Err(DriftError::FeatureLengthError);
29 };
30
31 let feature_map = array
32 .par_iter()
33 .enumerate()
34 .map(|(i, col)| {
35 let unique = col
36 .iter()
37 .collect::<BTreeSet<_>>()
38 .into_iter()
39 .collect::<Vec<_>>();
40 let mut map = HashMap::new();
41 for (j, item) in unique.iter().enumerate() {
42 map.insert(item.to_string(), j);
43
44 if j == unique.len() - 1 {
46 map.insert("missing".to_string(), j + 1);
48 }
49 }
50
51 (features[i].to_string(), map)
52 })
53 .collect::<HashMap<_, _>>();
54
55 Ok(FeatureMap {
56 features: feature_map,
57 })
58 }
59
60 fn convert_strings_to_ndarray_f32(
61 &self,
62 features: &Vec<String>,
63 array: &[Vec<String>],
64 feature_map: &FeatureMap,
65 ) -> Result<Array2<f32>, DriftError>
66where {
67 let features_not_exist = features
69 .iter()
70 .map(|x| feature_map.features.contains_key(x))
71 .position(|x| !x);
72
73 if features_not_exist.is_some() {
74 return Err(DriftError::FeatureNotExistError);
75 }
76
77 let data = features
78 .par_iter()
79 .enumerate()
80 .map(|(i, feature)| {
81 let map = feature_map.features.get(feature).unwrap();
82
83 let col = array[i]
85 .iter()
86 .map(|x| *map.get(x).unwrap_or(map.get("missing").unwrap()) as f32)
87 .collect::<Vec<_>>();
88
89 col
90 })
91 .collect::<Vec<_>>();
92
93 let data = Array::from_shape_vec((features.len(), array[0].len()), data.concat())
94 .map_err(DriftError::ShapeError)?;
95
96 Ok(data.t().to_owned())
97 }
98
99 fn convert_strings_to_ndarray_f64(
100 &self,
101 features: &Vec<String>,
102 array: &[Vec<String>],
103 feature_map: &FeatureMap,
104 ) -> Result<Array2<f64>, DriftError>
105where {
106 let features_not_exist = features
108 .iter()
109 .map(|x| feature_map.features.contains_key(x))
110 .position(|x| !x);
111
112 if features_not_exist.is_some() {
113 return Err(DriftError::FeatureNotExistError);
114 }
115 let data = features
116 .par_iter()
117 .enumerate()
118 .map(|(i, feature)| {
119 let map = feature_map.features.get(feature).unwrap();
120
121 let col = array[i]
123 .iter()
124 .map(|x| *map.get(x).unwrap_or(map.get("missing").unwrap()) as f64)
125 .collect::<Vec<_>>();
126 col
127 })
128 .collect::<Vec<_>>();
129
130 let data = Array::from_shape_vec((features.len(), array[0].len()), data.concat())
131 .map_err(DriftError::ShapeError)?;
132
133 Ok(data.t().to_owned())
134 }
135}
136
137#[cfg(test)]
138mod tests {
139
140 use crate::utils::CategoricalFeatureHelpers;
141
142 pub struct TestStruct;
143 impl CategoricalFeatureHelpers for TestStruct {}
144
145 #[test]
146 fn test_create_feature_map_base() {
147 let string_vec = vec![
148 vec![
149 "a".to_string(),
150 "b".to_string(),
151 "c".to_string(),
152 "d".to_string(),
153 "e".to_string(),
154 ],
155 vec![
156 "hello".to_string(),
157 "blah".to_string(),
158 "c".to_string(),
159 "d".to_string(),
160 "e".to_string(),
161 "hello".to_string(),
162 "blah".to_string(),
163 "c".to_string(),
164 "d".to_string(),
165 "e".to_string(),
166 ],
167 ];
168
169 let string_features = vec!["feature_1".to_string(), "feature_2".to_string()];
170
171 let feature_map = TestStruct
172 .create_feature_map(&string_features, &string_vec)
173 .unwrap();
174
175 assert_eq!(feature_map.features.len(), 2);
176 assert_eq!(feature_map.features.get("feature_2").unwrap().len(), 6);
177 }
178
179 #[test]
180 fn test_create_array_from_string() {
181 let string_vec = vec![
182 vec![
183 "a".to_string(),
184 "b".to_string(),
185 "c".to_string(),
186 "d".to_string(),
187 "e".to_string(),
188 ],
189 vec![
190 "a".to_string(),
191 "a".to_string(),
192 "a".to_string(),
193 "b".to_string(),
194 "b".to_string(),
195 ],
196 ];
197
198 let string_features = vec!["feature_1".to_string(), "feature_2".to_string()];
199
200 let feature_map = TestStruct
201 .create_feature_map(&string_features, &string_vec)
202 .unwrap();
203
204 assert_eq!(feature_map.features.len(), 2);
205
206 let f32_array = TestStruct
207 .convert_strings_to_ndarray_f32(&string_features, &string_vec, &feature_map)
208 .unwrap();
209
210 assert_eq!(f32_array.shape(), &[5, 2]);
211
212 let f64_array = TestStruct
213 .convert_strings_to_ndarray_f64(&string_features, &string_vec, &feature_map)
214 .unwrap();
215
216 assert_eq!(f64_array.shape(), &[5, 2]);
217 }
218}