custom_rust_stemmers/
lib.rs1use std::borrow::Cow;
26
27mod snowball;
28
29use snowball::SnowballEnv;
30use snowball::algorithms;
31
32pub enum Algorithm {
35 Arabic,
36 English,
37 French,
38 German,
39 Italian,
40 Portuguese,
41 Romanian,
42 Russian,
43 Spanish,
44}
45
46pub struct Stemmer {
48 stemmer: Box<Fn(&mut SnowballEnv) -> bool>,
49}
50
51impl Stemmer {
52 pub fn create(lang: Algorithm) -> Self {
54 match lang {
55 Algorithm::Arabic => Stemmer { stemmer: Box::new(algorithms::arabic::_stem) },
56 Algorithm::English => Stemmer { stemmer: Box::new(algorithms::english::_stem) },
57 Algorithm::French => Stemmer { stemmer: Box::new(algorithms::french::_stem) },
58 Algorithm::German => Stemmer { stemmer: Box::new(algorithms::german::_stem) },
59 Algorithm::Italian => Stemmer { stemmer: Box::new(algorithms::italian::_stem) },
60 Algorithm::Portuguese => Stemmer { stemmer: Box::new(algorithms::portuguese::_stem) },
61 Algorithm::Romanian => Stemmer { stemmer: Box::new(algorithms::romanian::_stem) },
62 Algorithm::Russian => Stemmer { stemmer: Box::new(algorithms::russian::_stem) },
63 Algorithm::Spanish => Stemmer { stemmer: Box::new(algorithms::spanish::_stem) },
64 }
65 }
66
67 pub fn stem<'a>(&self, input: &'a str) -> Cow<'a, str> {
70 let mut env = SnowballEnv::create(input);
71 (self.stemmer)(&mut env);
72 env.get_current()
73 }
74}
75
76
77
78#[cfg(test)]
79mod tests {
80 use super::{Stemmer, Algorithm};
81
82 fn stemms_to(lhs: &str, rhs: &str, stemmer: Algorithm) {
83 assert_eq!(Stemmer::create(stemmer).stem(lhs), rhs);
84 }
85
86 #[test]
87 fn german_test() {
88 use std::fs;
89 use std::io;
90 use std::io::BufRead;
91
92 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ger.txt").unwrap());
93 let result = io::BufReader::new(fs::File::open("test_data/res_ger.txt").unwrap());
94
95 let lines = vocab.lines().zip(result.lines());
96
97 for (voc, res) in lines {
98 stemms_to(voc.unwrap().as_str(),
99 res.unwrap().as_str(),
100 Algorithm::German);
101 }
102 }
103
104 #[test]
105 fn english_test() {
106 use std::fs;
107 use std::io;
108 use std::io::BufRead;
109
110 let vocab = io::BufReader::new(fs::File::open("test_data/voc_en.txt").unwrap());
111 let result = io::BufReader::new(fs::File::open("test_data/res_en.txt").unwrap());
112
113 let lines = vocab.lines().zip(result.lines());
114
115 for (voc, res) in lines {
116 stemms_to(voc.unwrap().as_str(),
117 res.unwrap().as_str(),
118 Algorithm::English);
119 }
120 }
121
122 #[test]
123 fn french_test() {
124 use std::fs;
125 use std::io;
126 use std::io::BufRead;
127
128 let vocab = io::BufReader::new(fs::File::open("test_data/voc_fr.txt").unwrap());
129 let result = io::BufReader::new(fs::File::open("test_data/res_fr.txt").unwrap());
130
131 let lines = vocab.lines().zip(result.lines());
132
133 for (voc, res) in lines {
134 stemms_to(voc.unwrap().as_str(),
135 res.unwrap().as_str(),
136 Algorithm::French);
137 }
138 }
139
140 #[test]
141 fn spanish_test() {
142 use std::fs;
143 use std::io;
144 use std::io::BufRead;
145
146 let vocab = io::BufReader::new(fs::File::open("test_data/voc_es.txt").unwrap());
147 let result = io::BufReader::new(fs::File::open("test_data/res_es.txt").unwrap());
148
149 let lines = vocab.lines().zip(result.lines());
150
151 for (voc, res) in lines {
152 stemms_to(voc.unwrap().as_str(),
153 res.unwrap().as_str(),
154 Algorithm::Spanish);
155 }
156 }
157
158 #[test]
159 fn portuguese_test() {
160 use std::fs;
161 use std::io;
162 use std::io::BufRead;
163
164 let vocab = io::BufReader::new(fs::File::open("test_data/voc_pt.txt").unwrap());
165 let result = io::BufReader::new(fs::File::open("test_data/res_pt.txt").unwrap());
166
167 let lines = vocab.lines().zip(result.lines());
168
169 for (voc, res) in lines {
170 stemms_to(voc.unwrap().as_str(),
171 res.unwrap().as_str(),
172 Algorithm::Portuguese);
173 }
174 }
175
176 #[test]
177 fn italian_test() {
178 use std::fs;
179 use std::io;
180 use std::io::BufRead;
181
182 let vocab = io::BufReader::new(fs::File::open("test_data/voc_it.txt").unwrap());
183 let result = io::BufReader::new(fs::File::open("test_data/res_it.txt").unwrap());
184
185 let lines = vocab.lines().zip(result.lines());
186
187 for (voc, res) in lines {
188 stemms_to(voc.unwrap().as_str(),
189 res.unwrap().as_str(),
190 Algorithm::Italian);
191 }
192 }
193
194 #[test]
195 fn romanian_test() {
196 use std::fs;
197 use std::io;
198 use std::io::BufRead;
199
200 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ro.txt").unwrap());
201 let result = io::BufReader::new(fs::File::open("test_data/res_ro.txt").unwrap());
202
203 let lines = vocab.lines().zip(result.lines());
204
205 for (voc, res) in lines {
206 stemms_to(voc.unwrap().as_str(),
207 res.unwrap().as_str(),
208 Algorithm::Romanian);
209 }
210 }
211
212 #[test]
213 fn russian_test() {
214 use std::fs;
215 use std::io;
216 use std::io::BufRead;
217
218 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ru.txt").unwrap());
219 let result = io::BufReader::new(fs::File::open("test_data/res_ru.txt").unwrap());
220
221 let lines = vocab.lines().zip(result.lines());
222
223 for (voc, res) in lines {
224 stemms_to(voc.unwrap().as_str(),
225 res.unwrap().as_str(),
226 Algorithm::Russian);
227 }
228 }
229
230 #[test]
231 fn arabic_test() {
232 use std::fs;
233 use std::io;
234 use std::io::BufRead;
235
236 let vocab = io::BufReader::new(fs::File::open("test_data/voc_ar.txt").unwrap());
237 let result = io::BufReader::new(fs::File::open("test_data/res_ar.txt").unwrap());
238
239 let lines = vocab.lines().zip(result.lines());
240
241 for (voc, res) in lines {
242 stemms_to(voc.unwrap().as_str(),
243 res.unwrap().as_str(),
244 Algorithm::Arabic);
245 }
246 }
247
248}