1extern crate serde;
24#[macro_use]
25extern crate serde_derive;
26
27use std::borrow::Cow;
28
29mod snowball;
30
31use snowball::SnowballEnv;
32use snowball::algorithms;
33
34#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Copy, Clone)]
37pub enum Algorithm {
38 Arabic,
39 Danish,
40 Dutch,
41 English,
42 Finnish,
43 French,
44 German,
45 Greek,
46 Hungarian,
47 Italian,
48 Norwegian,
49 Portuguese,
50 Romanian,
51 Russian,
52 Spanish,
53 Swedish,
54 Tamil,
55 Turkish
56}
57
58pub struct Stemmer {
60 stemmer: fn(&mut SnowballEnv) -> bool,
61}
62
63impl Stemmer {
64 pub fn create(lang: Algorithm) -> Self {
66 match lang {
67 Algorithm::Arabic => Stemmer { stemmer: algorithms::arabic::stem },
68 Algorithm::Danish => Stemmer { stemmer: algorithms::danish::stem },
69 Algorithm::Dutch => Stemmer { stemmer: algorithms::dutch::stem },
70 Algorithm::English => Stemmer { stemmer: algorithms::english::stem },
71 Algorithm::Finnish => Stemmer { stemmer: algorithms::finnish::stem },
72 Algorithm::French => Stemmer { stemmer: algorithms::french::stem },
73 Algorithm::German => Stemmer { stemmer: algorithms::german::stem },
74 Algorithm::Greek => Stemmer { stemmer: algorithms::greek::stem },
75 Algorithm::Hungarian => Stemmer { stemmer: algorithms::hungarian::stem },
76 Algorithm::Italian => Stemmer { stemmer: algorithms::italian::stem },
77 Algorithm::Norwegian => Stemmer { stemmer: algorithms::norwegian::stem },
78 Algorithm::Portuguese => Stemmer { stemmer: algorithms::portuguese::stem },
79 Algorithm::Romanian => Stemmer { stemmer: algorithms::romanian::stem },
80 Algorithm::Russian => Stemmer { stemmer: algorithms::russian::stem },
81 Algorithm::Spanish => Stemmer { stemmer: algorithms::spanish::stem },
82 Algorithm::Swedish => Stemmer { stemmer: algorithms::swedish::stem },
83 Algorithm::Tamil => Stemmer { stemmer: algorithms::tamil::stem },
84 Algorithm::Turkish => Stemmer { stemmer: algorithms::turkish::stem },
85 }
86 }
87
88 pub fn stem<'a>(&self, input: &'a str) -> Cow<'a, str> {
91 let mut env = SnowballEnv::create(input);
92 (self.stemmer)(&mut env);
93 env.get_current()
94 }
95}
96
97
98
99#[cfg(test)]
100mod tests {
101 use super::{Stemmer, Algorithm};
102
103 fn stemms_to(lhs: &str, rhs: &str, stemmer: Algorithm) {
104 assert_eq!(Stemmer::create(stemmer).stem(lhs), rhs);
105 }
106
107 #[test]
108 fn german_test() {
109 use std::fs;
110 use std::io;
111 use std::io::BufRead;
112
113 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ger.txt").unwrap());
114 let result = io::BufReader::new(fs::File::open("test_data/res_ger.txt").unwrap());
115
116 let lines = vocab.lines().zip(result.lines());
117
118 for (voc, res) in lines {
119 stemms_to(voc.unwrap().as_str(),
120 res.unwrap().as_str(),
121 Algorithm::German);
122 }
123 }
124
125 #[test]
126 fn english_test() {
127 use std::fs;
128 use std::io;
129 use std::io::BufRead;
130
131 let vocab = io::BufReader::new(fs::File::open("test_data/voc_en.txt").unwrap());
132 let result = io::BufReader::new(fs::File::open("test_data/res_en.txt").unwrap());
133
134 let lines = vocab.lines().zip(result.lines());
135
136 for (voc, res) in lines {
137 stemms_to(voc.unwrap().as_str(),
138 res.unwrap().as_str(),
139 Algorithm::English);
140 }
141 }
142
143 #[test]
144 fn french_test() {
145 use std::fs;
146 use std::io;
147 use std::io::BufRead;
148
149 let vocab = io::BufReader::new(fs::File::open("test_data/voc_fr.txt").unwrap());
150 let result = io::BufReader::new(fs::File::open("test_data/res_fr.txt").unwrap());
151
152 let lines = vocab.lines().zip(result.lines());
153
154 for (voc, res) in lines {
155 stemms_to(voc.unwrap().as_str(),
156 res.unwrap().as_str(),
157 Algorithm::French);
158 }
159 }
160
161 #[test]
162 fn spanish_test() {
163 use std::fs;
164 use std::io;
165 use std::io::BufRead;
166
167 let vocab = io::BufReader::new(fs::File::open("test_data/voc_es.txt").unwrap());
168 let result = io::BufReader::new(fs::File::open("test_data/res_es.txt").unwrap());
169
170 let lines = vocab.lines().zip(result.lines());
171
172 for (voc, res) in lines {
173 stemms_to(voc.unwrap().as_str(),
174 res.unwrap().as_str(),
175 Algorithm::Spanish);
176 }
177 }
178
179 #[test]
180 fn portuguese_test() {
181 use std::fs;
182 use std::io;
183 use std::io::BufRead;
184
185 let vocab = io::BufReader::new(fs::File::open("test_data/voc_pt.txt").unwrap());
186 let result = io::BufReader::new(fs::File::open("test_data/res_pt.txt").unwrap());
187
188 let lines = vocab.lines().zip(result.lines());
189
190 for (voc, res) in lines {
191 stemms_to(voc.unwrap().as_str(),
192 res.unwrap().as_str(),
193 Algorithm::Portuguese);
194 }
195 }
196
197 #[test]
198 fn italian_test() {
199 use std::fs;
200 use std::io;
201 use std::io::BufRead;
202
203 let vocab = io::BufReader::new(fs::File::open("test_data/voc_it.txt").unwrap());
204 let result = io::BufReader::new(fs::File::open("test_data/res_it.txt").unwrap());
205
206 let lines = vocab.lines().zip(result.lines());
207
208 for (voc, res) in lines {
209 stemms_to(voc.unwrap().as_str(),
210 res.unwrap().as_str(),
211 Algorithm::Italian);
212 }
213 }
214
215 #[test]
216 fn romanian_test() {
217 use std::fs;
218 use std::io;
219 use std::io::BufRead;
220
221 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ro.txt").unwrap());
222 let result = io::BufReader::new(fs::File::open("test_data/res_ro.txt").unwrap());
223
224 let lines = vocab.lines().zip(result.lines());
225
226 for (voc, res) in lines {
227 stemms_to(voc.unwrap().as_str(),
228 res.unwrap().as_str(),
229 Algorithm::Romanian);
230 }
231 }
232
233 #[test]
234 fn russian_test() {
235 use std::fs;
236 use std::io;
237 use std::io::BufRead;
238
239 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ru.txt").unwrap());
240 let result = io::BufReader::new(fs::File::open("test_data/res_ru.txt").unwrap());
241
242 let lines = vocab.lines().zip(result.lines());
243
244 for (voc, res) in lines {
245 stemms_to(voc.unwrap().as_str(),
246 res.unwrap().as_str(),
247 Algorithm::Russian);
248 }
249 }
250
251 #[test]
252 fn arabic_test() {
253 use std::fs;
254 use std::io;
255 use std::io::BufRead;
256
257 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ar.txt").unwrap());
258 let result = io::BufReader::new(fs::File::open("test_data/res_ar.txt").unwrap());
259
260 let lines = vocab.lines().zip(result.lines());
261
262 for (voc, res) in lines {
263 stemms_to(voc.unwrap().as_str(),
264 res.unwrap().as_str(),
265 Algorithm::Arabic);
266 }
267 }
268
269 #[test]
270 fn finnish_test() {
271 use std::fs;
272 use std::io;
273 use std::io::BufRead;
274
275 let vocab = io::BufReader::new(fs::File::open("test_data/voc_fi.txt").unwrap());
276 let result = io::BufReader::new(fs::File::open("test_data/res_fi.txt").unwrap());
277
278 let lines = vocab.lines().zip(result.lines());
279
280 for (voc, res) in lines {
281 stemms_to(voc.unwrap().as_str(),
282 res.unwrap().as_str(),
283 Algorithm::Finnish);
284 }
285 }
286
287 #[test]
288 fn greek_test() {
289 use std::fs;
290 use std::io;
291 use std::io::BufRead;
292
293 let vocab = io::BufReader::new(fs::File::open("test_data/voc_el.txt").unwrap());
294 let result = io::BufReader::new(fs::File::open("test_data/res_el.txt").unwrap());
295
296 let lines = vocab.lines().zip(result.lines());
297
298 for (voc, res) in lines {
299 stemms_to(voc.unwrap().as_str(),
300 res.unwrap().as_str(),
301 Algorithm::Greek);
302 }
303 }
304
305 #[test]
306 fn norwegian_test() {
307 use std::fs;
308 use std::io;
309 use std::io::BufRead;
310
311 let vocab = io::BufReader::new(fs::File::open("test_data/voc_no.txt").unwrap());
312 let result = io::BufReader::new(fs::File::open("test_data/res_no.txt").unwrap());
313
314 let lines = vocab.lines().zip(result.lines());
315
316 for (voc, res) in lines {
317 stemms_to(voc.unwrap().as_str(),
318 res.unwrap().as_str(),
319 Algorithm::Norwegian);
320 }
321 }
322
323}