1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
use std::collections::HashSet;
use std::convert::AsRef;
use std::convert::{From, Into};
use std::fs::File;
use std::io::{self, BufRead, BufReader};
use std::ops::{Deref, DerefMut};
use std::path::Path;
#[derive(Debug, Clone)]
pub struct StopWords(HashSet<String>);
impl From<HashSet<String>> for StopWords {
fn from(set: HashSet<String>) -> StopWords {
let mut sw = StopWords::new();
set.into_iter().for_each(|w| {
sw.insert(w);
});
sw
}
}
impl Into<HashSet<String>> for StopWords {
fn into(self) -> HashSet<String> {
self.0
}
}
impl Deref for StopWords {
type Target = HashSet<String>;
fn deref(&self) -> &HashSet<String> {
&self.0
}
}
impl DerefMut for StopWords {
fn deref_mut(&mut self) -> &mut HashSet<String> {
&mut self.0
}
}
impl StopWords {
pub fn new() -> Self {
StopWords(HashSet::new())
}
pub fn insert(&mut self, value: String) -> bool {
self.0.insert(value.to_lowercase())
}
pub fn from_file<P: AsRef<Path>>(path: P) -> io::Result<Self> {
let mut sw = StopWords::new();
let file = File::open(path)?;
let reader = BufReader::new(file);
for line in reader.lines() {
let word = line?;
if !word.is_empty() && !word.starts_with('#') {
sw.insert(word);
}
}
Ok(sw)
}
}