1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#[macro_use] extern crate lazy_static;
#[macro_use] extern crate thiserror;
use std::str::FromStr;
mod nltk;
mod spark;
mod sklearn;
pub use nltk::NLTK;
pub use spark::Spark;
pub use sklearn::SkLearn;
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum Language {
Arabic,
Azerbaijani,
Danish,
Dutch,
English,
Finnish,
French,
German,
Greek,
Hungarian,
Italian,
Kazakh,
Nepali,
Norwegian,
Portuguese,
Romanian,
Russian,
Spanish,
Swedish,
Turkish,
}
#[derive(Error, PartialEq, Debug)]
#[error("Language {0:?} is not supported")]
pub struct LanguageError(String);
impl FromStr for Language {
type Err = LanguageError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"arabic" => Ok(Language::Arabic),
"azerbaijani" => Ok(Language::Azerbaijani),
"danish" => Ok(Language::Danish),
"dutch" => Ok(Language::Dutch),
"english" => Ok(Language::English),
"finnish" => Ok(Language::Finnish),
"french" => Ok(Language::French),
"german" => Ok(Language::German),
"greek" => Ok(Language::Greek),
"hungarian" => Ok(Language::Hungarian),
"italian" => Ok(Language::Italian),
"kazakh" => Ok(Language::Kazakh),
"nepali" => Ok(Language::Nepali),
"norwegian" => Ok(Language::Norwegian),
"portuguese" => Ok(Language::Portuguese),
"romanian" => Ok(Language::Romanian),
"russian" => Ok(Language::Russian),
"spanish" => Ok(Language::Spanish),
"swedish" => Ok(Language::Swedish),
"turkish" => Ok(Language::Turkish),
_ => Err(LanguageError(s.to_owned()))
}
}
}
pub trait Stopwords {
fn stopwords(language: Language) -> Option<&'static [&'static str]>;
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn from_str() {
assert_eq!(Language::from_str("english").ok(), Some(Language::English));
assert_eq!(Language::from_str("en"), Err(LanguageError("en".to_owned())));
assert_eq!(Language::from_str("en").ok(), None);
}
}