stopwords/sklearn/
mod.rs

1use super::Language;
2use super::Stopwords;
3
4lazy_static! {
5    static ref ENGLISH: Vec<&'static str> = include_str!("data/english.txt").lines().collect();
6}
7
8/// Data from [scikit-learn](http://scikit-learn.org) - Python machine learning library.
9pub struct SkLearn;
10
11impl Stopwords for SkLearn {
12    /// Stopwords provided by vectorizers (`TfidfVectorizer`, `HashingVectorizer`, etc).
13    ///
14    /// The only language available is English.
15    fn stopwords(language: Language) -> Option<&'static [&'static str]> {
16        match language {
17            Language::English => Some(&ENGLISH),
18            _ => None,
19        }
20    }
21}
22
23#[cfg(test)]
24mod tests {
25    use super::*;
26    use std::collections::HashSet;
27
28    #[test]
29    fn english() {
30        let words: HashSet<_> = SkLearn::stopwords(Language::English).unwrap().iter().take(5).collect();
31        assert_eq!(words, [ "a", "about", "above", "across", "after" ].iter().collect());
32    }
33}