1extern crate serde;
24#[macro_use]
25extern crate serde_derive;
26
27use std::borrow::Cow;
28
29mod snowball;
30
31use snowball::algorithms;
32use snowball::SnowballEnv;
33
34#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Copy, Clone)]
37pub enum Algorithm {
38 Arabic,
39 Armenian,
40 Danish,
41 Dutch,
42 English,
43 Finnish,
44 French,
45 German,
46 Greek,
47 Hungarian,
48 Italian,
49 Norwegian,
50 Portuguese,
51 Romanian,
52 Russian,
53 Spanish,
54 Swedish,
55 Tamil,
56 Turkish,
57}
58
59pub struct Stemmer {
61 stemmer: fn(&mut SnowballEnv) -> bool,
62}
63
64impl Stemmer {
65 pub fn create(lang: Algorithm) -> Self {
67 match lang {
68 Algorithm::Arabic => Stemmer {
69 stemmer: algorithms::arabic::stem,
70 },
71 Algorithm::Armenian => Stemmer {
72 stemmer: algorithms::armenian::stem,
73 },
74 Algorithm::Danish => Stemmer {
75 stemmer: algorithms::danish::stem,
76 },
77 Algorithm::Dutch => Stemmer {
78 stemmer: algorithms::dutch::stem,
79 },
80 Algorithm::English => Stemmer {
81 stemmer: algorithms::english::stem,
82 },
83 Algorithm::Finnish => Stemmer {
84 stemmer: algorithms::finnish::stem,
85 },
86 Algorithm::French => Stemmer {
87 stemmer: algorithms::french::stem,
88 },
89 Algorithm::German => Stemmer {
90 stemmer: algorithms::german::stem,
91 },
92 Algorithm::Greek => Stemmer {
93 stemmer: algorithms::greek::stem,
94 },
95 Algorithm::Hungarian => Stemmer {
96 stemmer: algorithms::hungarian::stem,
97 },
98 Algorithm::Italian => Stemmer {
99 stemmer: algorithms::italian::stem,
100 },
101 Algorithm::Norwegian => Stemmer {
102 stemmer: algorithms::norwegian::stem,
103 },
104 Algorithm::Portuguese => Stemmer {
105 stemmer: algorithms::portuguese::stem,
106 },
107 Algorithm::Romanian => Stemmer {
108 stemmer: algorithms::romanian::stem,
109 },
110 Algorithm::Russian => Stemmer {
111 stemmer: algorithms::russian::stem,
112 },
113 Algorithm::Spanish => Stemmer {
114 stemmer: algorithms::spanish::stem,
115 },
116 Algorithm::Swedish => Stemmer {
117 stemmer: algorithms::swedish::stem,
118 },
119 Algorithm::Tamil => Stemmer {
120 stemmer: algorithms::tamil::stem,
121 },
122 Algorithm::Turkish => Stemmer {
123 stemmer: algorithms::turkish::stem,
124 },
125 }
126 }
127
128 pub fn stem<'a>(&self, input: &'a str) -> Cow<'a, str> {
131 let mut env = SnowballEnv::create(input);
132 (self.stemmer)(&mut env);
133 env.get_current()
134 }
135
136 pub fn stem_cow<'a>(&self, input: Cow<'a, str>) -> Cow<'a, str> {
139 let mut env = SnowballEnv::create_cow(input);
140 (self.stemmer)(&mut env);
141 env.get_current()
142 }
143}
144
145#[cfg(test)]
146mod tests {
147 use super::{Algorithm, Stemmer};
148
149 fn stemms_to(lhs: &str, rhs: &str, stemmer: Algorithm) {
150 assert_eq!(Stemmer::create(stemmer).stem(lhs), rhs);
151 }
152
153 #[test]
154 fn german_test() {
155 use std::fs;
156 use std::io;
157 use std::io::BufRead;
158
159 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ger.txt").unwrap());
160 let result = io::BufReader::new(fs::File::open("test_data/res_ger.txt").unwrap());
161
162 let lines = vocab.lines().zip(result.lines());
163
164 for (voc, res) in lines {
165 stemms_to(
166 voc.unwrap().as_str(),
167 res.unwrap().as_str(),
168 Algorithm::German,
169 );
170 }
171 }
172
173 #[test]
174 fn english_test() {
175 use std::fs;
176 use std::io;
177 use std::io::BufRead;
178
179 let vocab = io::BufReader::new(fs::File::open("test_data/voc_en.txt").unwrap());
180 let result = io::BufReader::new(fs::File::open("test_data/res_en.txt").unwrap());
181
182 let lines = vocab.lines().zip(result.lines());
183
184 for (voc, res) in lines {
185 stemms_to(
186 voc.unwrap().as_str(),
187 res.unwrap().as_str(),
188 Algorithm::English,
189 );
190 }
191 }
192
193 #[test]
194 fn french_test() {
195 use std::fs;
196 use std::io;
197 use std::io::BufRead;
198
199 let vocab = io::BufReader::new(fs::File::open("test_data/voc_fr.txt").unwrap());
200 let result = io::BufReader::new(fs::File::open("test_data/res_fr.txt").unwrap());
201
202 let lines = vocab.lines().zip(result.lines());
203
204 for (voc, res) in lines {
205 stemms_to(
206 voc.unwrap().as_str(),
207 res.unwrap().as_str(),
208 Algorithm::French,
209 );
210 }
211 }
212
213 #[test]
214 fn spanish_test() {
215 use std::fs;
216 use std::io;
217 use std::io::BufRead;
218
219 let vocab = io::BufReader::new(fs::File::open("test_data/voc_es.txt").unwrap());
220 let result = io::BufReader::new(fs::File::open("test_data/res_es.txt").unwrap());
221
222 let lines = vocab.lines().zip(result.lines());
223
224 for (voc, res) in lines {
225 stemms_to(
226 voc.unwrap().as_str(),
227 res.unwrap().as_str(),
228 Algorithm::Spanish,
229 );
230 }
231 }
232
233 #[test]
234 fn portuguese_test() {
235 use std::fs;
236 use std::io;
237 use std::io::BufRead;
238
239 let vocab = io::BufReader::new(fs::File::open("test_data/voc_pt.txt").unwrap());
240 let result = io::BufReader::new(fs::File::open("test_data/res_pt.txt").unwrap());
241
242 let lines = vocab.lines().zip(result.lines());
243
244 for (voc, res) in lines {
245 stemms_to(
246 voc.unwrap().as_str(),
247 res.unwrap().as_str(),
248 Algorithm::Portuguese,
249 );
250 }
251 }
252
253 #[test]
254 fn italian_test() {
255 use std::fs;
256 use std::io;
257 use std::io::BufRead;
258
259 let vocab = io::BufReader::new(fs::File::open("test_data/voc_it.txt").unwrap());
260 let result = io::BufReader::new(fs::File::open("test_data/res_it.txt").unwrap());
261
262 let lines = vocab.lines().zip(result.lines());
263
264 for (voc, res) in lines {
265 stemms_to(
266 voc.unwrap().as_str(),
267 res.unwrap().as_str(),
268 Algorithm::Italian,
269 );
270 }
271 }
272
273 #[test]
274 fn romanian_test() {
275 use std::fs;
276 use std::io;
277 use std::io::BufRead;
278
279 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ro.txt").unwrap());
280 let result = io::BufReader::new(fs::File::open("test_data/res_ro.txt").unwrap());
281
282 let lines = vocab.lines().zip(result.lines());
283
284 for (voc, res) in lines {
285 stemms_to(
286 voc.unwrap().as_str(),
287 res.unwrap().as_str(),
288 Algorithm::Romanian,
289 );
290 }
291 }
292
293 #[test]
294 fn russian_test() {
295 use std::fs;
296 use std::io;
297 use std::io::BufRead;
298
299 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ru.txt").unwrap());
300 let result = io::BufReader::new(fs::File::open("test_data/res_ru.txt").unwrap());
301
302 let lines = vocab.lines().zip(result.lines());
303
304 for (voc, res) in lines {
305 stemms_to(
306 voc.unwrap().as_str(),
307 res.unwrap().as_str(),
308 Algorithm::Russian,
309 );
310 }
311 }
312
313 #[test]
314 fn arabic_test() {
315 use std::fs;
316 use std::io;
317 use std::io::BufRead;
318
319 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ar.txt").unwrap());
320 let result = io::BufReader::new(fs::File::open("test_data/res_ar.txt").unwrap());
321
322 let lines = vocab.lines().zip(result.lines());
323
324 for (voc, res) in lines {
325 stemms_to(
326 voc.unwrap().as_str(),
327 res.unwrap().as_str(),
328 Algorithm::Arabic,
329 );
330 }
331 }
332
333 #[test]
334 fn finnish_test() {
335 use std::fs;
336 use std::io;
337 use std::io::BufRead;
338
339 let vocab = io::BufReader::new(fs::File::open("test_data/voc_fi.txt").unwrap());
340 let result = io::BufReader::new(fs::File::open("test_data/res_fi.txt").unwrap());
341
342 let lines = vocab.lines().zip(result.lines());
343
344 for (voc, res) in lines {
345 stemms_to(
346 voc.unwrap().as_str(),
347 res.unwrap().as_str(),
348 Algorithm::Finnish,
349 );
350 }
351 }
352
353 #[test]
354 fn greek_test() {
355 use std::fs;
356 use std::io;
357 use std::io::BufRead;
358
359 let vocab = io::BufReader::new(fs::File::open("test_data/voc_el.txt").unwrap());
360 let result = io::BufReader::new(fs::File::open("test_data/res_el.txt").unwrap());
361
362 let lines = vocab.lines().zip(result.lines());
363
364 for (voc, res) in lines {
365 stemms_to(
366 voc.unwrap().as_str(),
367 res.unwrap().as_str(),
368 Algorithm::Greek,
369 );
370 }
371 }
372
373 #[test]
374 fn norwegian_test() {
375 use std::fs;
376 use std::io;
377 use std::io::BufRead;
378
379 let vocab = io::BufReader::new(fs::File::open("test_data/voc_no.txt").unwrap());
380 let result = io::BufReader::new(fs::File::open("test_data/res_no.txt").unwrap());
381
382 let lines = vocab.lines().zip(result.lines());
383
384 for (voc, res) in lines {
385 stemms_to(
386 voc.unwrap().as_str(),
387 res.unwrap().as_str(),
388 Algorithm::Norwegian,
389 );
390 }
391 }
392}