cum/
lib.rs

1use std::collections::HashMap;
2use std::collections::HashSet;
3
4/// Creates a tuple of overlapping words and their 
5/// frequency found within a vector of Strings.
6///
7/// Arguments:
8///
9/// * `vec`: A vector of Strings, with each String
10/// representing a sentence.
11///
12/// Returns:
13///
14/// A matrix where each row is a unique word and
15/// a second column representing its frequency
16///
17pub fn overlap(vec: Vec<String>) -> HashSet<(String, u32)> {
18    let mut words: HashSet<(String, u32)> = HashSet::new();
19    let mut word_freq: HashMap<String, u32> = HashMap::new();
20
21    // Strings
22    for string in vec.iter() {
23        // Words
24        for word in string.split_ascii_whitespace() {
25            let presumed_value: u32 = *word_freq.get(&word.to_owned()).unwrap_or(&1);
26
27            word_freq.insert(word.to_owned(), presumed_value+1);
28        }
29    }
30
31    for (k, v) in word_freq.iter() {
32        words.insert((k.clone(), *v));
33    }
34
35    println!("{:?}", words);
36
37    words
38}