use std::collections::HashMap;
use prelude::*;
#[derive(RustcEncodable, RustcDecodable, Default)]
pub struct DictVectorizer {
dictionary: HashMap<String, (usize, usize)>,
data: Vec<(usize, usize, f32)>,
}
impl DictVectorizer {
pub fn new() -> DictVectorizer {
DictVectorizer {
dictionary: HashMap::new(),
data: Vec::new(),
}
}
pub fn partial_fit(&mut self, row: usize, name: &str, value: f32) {
let mut insert = false;
let dict_len = self.dictionary.len();
let col = match self.dictionary.get_mut(name) {
Some(value) => {
value.1 += 1;
value.0
}
None => {
insert = true;
dict_len
}
};
if insert {
self.dictionary.insert(name.to_string(), (col, 1));
}
self.data.push((row, col, value));
}
pub fn transform(&self) -> SparseRowArray {
let rows = self.data.iter().map(|x| x.0).max().unwrap() + 1;
let cols = self.dictionary.len();
let mut array = SparseRowArray::zeros(rows, cols);
for &(row, col, value) in &self.data {
array.set(row, col, value);
}
array
}
pub fn dictionary(&self) -> &HashMap<String, (usize, usize)> {
&self.dictionary
}
}