1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
use std::io::prelude::*;
use std::io::BufReader;
use std::fs::File;
use std::collections::HashMap;
use errors::Word2VecError;


pub struct WordClusters {
    clusters: HashMap<i32, Vec<String>>,
}


impl WordClusters {
    pub fn load_from_file(file_name: &str) -> Result<WordClusters, Word2VecError> {
        let file = File::open(file_name)?;
        let reader = BufReader::new(file);

        return WordClusters::load_from_reader(reader)
    }

    pub fn load_from_reader<R: BufRead>(mut reader: R) -> Result<WordClusters, Word2VecError> {
        let mut buffer = String::new();
        let mut clusters: HashMap<i32, Vec<String>> = HashMap::new();
        while reader.read_line(&mut buffer)? > 0 {
            {
                let mut iter = buffer.split_whitespace();
                let word = iter.next().unwrap();
                let cluster_number = iter.next().unwrap().trim().parse::<i32>().ok().unwrap();
                let cluster = clusters.entry(cluster_number).or_insert(Vec::new());
                cluster.push(word.to_string());
            }
            buffer.clear();
        }
        Ok(WordClusters { clusters })
    }

    pub fn get_words_on_cluster(&self, index: i32) -> Option<&Vec<String>> {
        self.clusters.get(&index)
    }

    pub fn get_cluster(&self, word: &str) -> Option<&i32> {
        let word = word.to_string();
        for (key, val) in self.clusters.iter() {
            if val.contains(&word) {
                return Some(key);
            }
        }
        None
    }
}