use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::convert::AsRef;
use std::fs::File;
use std::io::{self, BufRead, BufReader};
use std::ops::{Deref, DerefMut};
use std::path::Path;
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct StopWords(HashSet<String>);
impl From<HashSet<String>> for StopWords {
fn from(set: HashSet<String>) -> StopWords {
StopWords(set)
}
}
impl From<StopWords> for HashSet<String> {
fn from(sw: StopWords) -> Self {
sw.0
}
}
impl Deref for StopWords {
type Target = HashSet<String>;
fn deref(&self) -> &HashSet<String> {
&self.0
}
}
impl DerefMut for StopWords {
fn deref_mut(&mut self) -> &mut HashSet<String> {
&mut self.0
}
}
impl StopWords {
pub fn new() -> Self {
StopWords(HashSet::new())
}
pub fn insert(&mut self, value: String) -> bool {
self.0.insert(value.to_lowercase())
}
pub fn from_file<P: AsRef<Path>>(path: P) -> io::Result<Self> {
let mut sw = StopWords::new();
let file = File::open(path)?;
let reader = BufReader::new(file);
for line in reader.lines() {
let word = line?;
if !word.is_empty() && !word.starts_with('#') {
sw.insert(word);
}
}
Ok(sw)
}
}
impl Default for StopWords {
fn default() -> Self {
Self::new()
}
}