1extern crate fnv;
27extern crate csv;
28
29use fnv::FnvHashMap;
30
31pub struct DataDictionary {
33 user_dict: FnvHashMap<String, u32>,
34 item_dict: FnvHashMap<String, u32>,
35 num_interactions: u64,
36}
37
38impl DataDictionary {
39
40 pub fn num_users(&self) -> usize {
42 self.user_dict.len()
43 }
44
45 pub fn num_items(&self) -> usize {
47 self.item_dict.len()
48 }
49
50 pub fn num_interactions(&self) -> u64 {
52 self.num_interactions
53 }
54
55 pub fn user_index(&self, name: &str) -> &u32 {
57 &self.user_dict[name]
58 }
59
60 pub fn item_index(&self, name: &str) -> &u32 {
62 &self.item_dict[name]
63 }
64
65 pub fn from_owned<T>(interactions: T) -> Self
69 where
70 T: Iterator<Item = (String, String)>
71 {
72 let mut user_index: u32 = 0;
73 let mut user_dict: FnvHashMap<String, u32> = FnvHashMap::default();
74
75 let mut item_index: u32 = 0;
76 let mut item_dict: FnvHashMap<String, u32> = FnvHashMap::default();
77
78 let mut num_interactions: u64 = 0;
79
80 for (user, item) in interactions {
81
82 user_dict.entry(user).or_insert_with(|| {
83 let current_user_index = user_index;
84 user_index += 1;
85 current_user_index
86 });
87
88 item_dict.entry(item).or_insert_with(|| {
89 let current_item_index = item_index;
90 item_index += 1;
91 current_item_index
92 });
93
94 num_interactions += 1;
95 }
96
97 DataDictionary { user_dict, item_dict, num_interactions }
98 }
99
100 pub fn from<'a,T>(interactions: T) -> DataDictionary
104 where
105 T: Iterator<Item = &'a(String, String)>
106 {
107
108 let owned = interactions
109 .map(|(user, item)| (user.to_owned(), item.to_owned()));
110
111 DataDictionary::from_owned(owned)
112 }
113}
114
115impl <T> From<T> for DataDictionary
119where
120 T: Iterator<Item = (String, String)>
121{
122 fn from(iter: T) -> Self {
123 let mut user_index: u32 = 0;
124 let mut user_dict: FnvHashMap<String, u32> = FnvHashMap::default();
125
126 let mut item_index: u32 = 0;
127 let mut item_dict: FnvHashMap<String, u32> = FnvHashMap::default();
128
129 let mut num_interactions: u64 = 0;
130
131 for (user, item) in iter {
132
133 user_dict.entry(user).or_insert_with(|| {
134 let current_user_index = user_index;
135 user_index += 1;
136 current_user_index
137 });
138
139 item_dict.entry(item).or_insert_with(|| {
140 let current_item_index = item_index;
141 item_index += 1;
142 current_item_index
143 });
144
145 num_interactions += 1;
146 }
147
148 DataDictionary { user_dict, item_dict, num_interactions }
149 }
150}
151
152pub struct Renaming {
154 item_names: FnvHashMap<u32, String>,
155}
156
157impl Renaming {
158 pub fn item_name(&self, item_index: u32) -> &str {
160 &self.item_names[&item_index]
161 }
162}
163
164impl From<DataDictionary> for Renaming {
166
167 fn from(data_dict: DataDictionary) -> Self {
168 let item_names: FnvHashMap<u32, String> = data_dict
169 .item_dict
170 .into_iter()
171 .map(|(name, item_id)| (item_id, name))
172 .collect(); Renaming { item_names }
175 }
176}
177
178
179#[cfg(test)]
180mod tests {
181
182 extern crate fnv;
183
184 use fnv::FnvHashMap;
185 use stats::{DataDictionary, Renaming};
186
187 #[test]
188 fn dict_from_tuple_iterator() {
189
190 let interactions = vec![
191 (String::from("user_a"), String::from("item_a")),
192 (String::from("user_a"), String::from("item_b")),
193 (String::from("user_b"), String::from("item_b")),
194 (String::from("user_c"), String::from("item_a")),
195 ];
196
197 let data_dict = DataDictionary::from(interactions.iter());
198
199 assert_eq!(data_dict.num_users(), 3);
200 assert_eq!(data_dict.num_items(), 2);
201 assert_eq!(data_dict.num_interactions(), 4);
202
203 assert_eq!(*data_dict.user_index("user_a"), 0);
204 assert_eq!(*data_dict.user_index("user_c"), 2);
205
206 assert_eq!(*data_dict.item_index("item_a"), 0);
207 assert_eq!(*data_dict.item_index("item_b"), 1);
208
209 assert_eq!(interactions.len(), 4);
211 }
212
213 #[test]
214 fn dict_from_owned_tuple_iterator() {
215
216 let interactions = vec![
217 (String::from("user_a"), String::from("item_a")),
218 (String::from("user_a"), String::from("item_b")),
219 (String::from("user_b"), String::from("item_b")),
220 (String::from("user_c"), String::from("item_a")),
221 ];
222
223 let data_dict = DataDictionary::from_owned(interactions.into_iter());
224
225 assert_eq!(data_dict.num_users(), 3);
226 assert_eq!(data_dict.num_items(), 2);
227 assert_eq!(data_dict.num_interactions(), 4);
228
229 assert_eq!(*data_dict.user_index("user_a"), 0);
230 assert_eq!(*data_dict.user_index("user_c"), 2);
231
232 assert_eq!(*data_dict.item_index("item_a"), 0);
233 assert_eq!(*data_dict.item_index("item_b"), 1);
234 }
235
236 #[test]
237 fn renaming_from_dict() {
238
239 let user_mapping = vec![
240 (String::from("user_a"), 0),
241 (String::from("user_b"), 1),
242 ];
243
244 let item_mapping = vec![
245 (String::from("item_a"), 0),
246 (String::from("item_b"), 1),
247 (String::from("item_c"), 2),
248 ];
249
250 let user_dict: FnvHashMap<String, u32> = user_mapping.into_iter().collect();
251 let item_dict: FnvHashMap<String, u32> = item_mapping.into_iter().collect();
252
253 let data_dict = DataDictionary { user_dict, item_dict, num_interactions: 10 };
254
255 let renaming: Renaming = data_dict.into();
256
257 assert_eq!(renaming.item_name(0), "item_a");
258 assert_eq!(renaming.item_name(1), "item_b");
259 assert_eq!(renaming.item_name(2), "item_c");
260 }
261}