1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
use std::io::prelude::*; use std::io::BufReader; use std::fs::File; use std::collections::HashMap; use errors::Word2VecError; pub struct WordClusters { clusters: HashMap<i32, Vec<String>>, } impl WordClusters { pub fn load_from_file(file_name: &str) -> Result<WordClusters, Word2VecError> { let file = File::open(file_name)?; let reader = BufReader::new(file); return WordClusters::load_from_reader(reader) } pub fn load_from_reader<R: BufRead>(mut reader: R) -> Result<WordClusters, Word2VecError> { let mut buffer = String::new(); let mut clusters: HashMap<i32, Vec<String>> = HashMap::new(); while reader.read_line(&mut buffer)? > 0 { { let mut iter = buffer.split_whitespace(); let word = iter.next().unwrap(); let cluster_number = iter.next().unwrap().trim().parse::<i32>().ok().unwrap(); let cluster = clusters.entry(cluster_number).or_insert(Vec::new()); cluster.push(word.to_string()); } buffer.clear(); } Ok(WordClusters { clusters }) } pub fn get_words_on_cluster(&self, index: i32) -> Option<&Vec<String>> { self.clusters.get(&index) } pub fn get_cluster(&self, word: &str) -> Option<&i32> { let word = word.to_string(); for (key, val) in self.clusters.iter() { if val.contains(&word) { return Some(key); } } None } }