use std::collections::HashMap;
use prelude::*;
#[derive(RustcEncodable, RustcDecodable)]
pub struct DictVectorizer {
dictionary: HashMap<String, (usize, usize)>,
data: Vec<(usize, usize, f32)>
}
impl DictVectorizer {
pub fn new() -> DictVectorizer {
DictVectorizer {dictionary: HashMap::new(),
data: Vec::new()}
}
pub fn partial_fit(&mut self, row: usize, name: &str, value: f32) {
let mut insert = false;
let dict_len = self.dictionary.len();
let col = match self.dictionary.get_mut(name) {
Some(value) => {value.1 += 1; value.0},
None => {insert = true; dict_len}
};
if insert == true {
self.dictionary.insert(name.to_string(),
(col, 1));
}
self.data.push((row, col, value));
}
pub fn transform(&self) -> SparseRowArray {
let rows = self.data.iter().map(|x| x.0).max().unwrap() + 1;
let cols = self.dictionary.len();
let mut array = SparseRowArray::zeros(rows, cols);
for &(row, col, value) in self.data.iter() {
array.set(row, col, value);
}
array
}
pub fn dictionary(&self) -> &HashMap<String, (usize, usize)> {
&self.dictionary
}
}