porter_stemmer/
lib.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5extern crate unicode_segmentation;
6
7use unicode_segmentation::UnicodeSegmentation;
8
9/// Given a word, return its stemmed form
10///
11/// # Examples
12///
13/// ```
14/// use porter_stemmer::stem;
15///
16/// let stemmed = stem("totally");
17/// assert_eq!("total", &stemmed);
18/// ```
19pub fn stem(word: &str) -> String {
20    stem_tokenized(word.graphemes(true).collect::<Vec<&str>>()).iter()
21        .fold(String::new(), |prev, next| { format!("{}{}", prev, next) })
22}
23
24/// Take a word as a Vector of grapheme clusters, and return the stemmed equivalent using Porter's
25/// stemming algorithm.
26///
27/// # Examples
28///
29/// ```
30/// use porter_stemmer::stem_tokenized;
31///
32/// let tokenized = vec!["s", "t", "e", "m", "m", "i", "n", "g"];
33/// let stemmed = stem_tokenized(tokenized);
34/// assert_eq!(&["s", "t", "e", "m"], &stemmed[..]);
35/// ```
36pub fn stem_tokenized(word: Vec<&str>) -> Vec<&str> {
37    if word.len() > 2 {
38        let word = phase_one_a(word);
39        let word = phase_one_b(word);
40        let word = phase_one_c(word);
41        let word = phase_two(word);
42        let word = phase_three(word);
43        let word = phase_four(word);
44        let word = phase_5a(word);
45        let word = phase_5b(word);
46        word
47    } else {
48        word
49    }
50}
51
52fn real_vowel(grapheme: &str) -> bool {
53    match grapheme {
54        "a" |
55        "e" |
56        "i" |
57        "o" |
58        "u" => {
59            true
60        },
61        _ => false
62    }
63}
64
65fn real_consonant(grapheme: &str) -> bool {
66    !real_vowel(grapheme)
67}
68
69fn porter_vowel(word: &[&str], index: usize) -> bool {
70    let grapheme = word[index];
71
72    if real_vowel(grapheme) {
73        true
74    } else {
75        if index == 0 || grapheme != "y" {
76            false
77        } else {
78            let preceeding_grapheme = word[index - 1];
79            real_consonant(preceeding_grapheme)
80        }
81    }
82}
83
84fn porter_consonant(word: &[&str], index: usize) -> bool {
85    !porter_vowel(word, index)
86}
87
88fn contains_porter_vowel(word: &[&str]) -> bool {
89    for index in 0..word.len() {
90        if porter_vowel(word, index) {
91            return true;
92        }
93    }
94
95    return false;
96}
97
98fn ends_double_porters_consonant(word: &[&str]) -> bool {
99    let word_length = word.len();
100    if word_length > 2 {
101        let last_grapheme = word[word_length - 1];
102        let penultimate_grapheme = word[word_length - 2];
103
104        last_grapheme == penultimate_grapheme &&
105            porter_consonant(word, word_length - 1)
106    } else {
107        false
108    }
109
110}
111
112// Condition: *o  the stem ends consonant-vowel-consonant,
113// where the second consonant is not w, x or y.
114fn ends_star_o(word: &[&str]) -> bool {
115    let word_length = word.len();
116
117    if word_length > 2 {
118        let last_grapheme = word[word_length - 1];
119        match last_grapheme {
120            "w" | "x" | "y" => false,
121            _ => {
122                porter_consonant(word, word_length - 1) &&
123                porter_vowel(word, word_length - 2) &&
124                porter_consonant(word, word_length - 3)
125            }
126        }
127    } else {
128        false
129    }
130}
131
132/// The Porter stemmer makes use of a _measure_.
133///
134/// Defined formally as the number of
135/// Vowel sequence-Consonant sequence pairs in a word or fragment.
136///
137/// If C is a sequence of consonants, and V a sequence
138/// of vowels, the measure of a word or word part can be
139/// defined by:
140///
141/// C?(VC)*V?
142///
143/// Where the measure, _m_, is equal to the number of matches
144/// by the Kleene star `(VC)*`
145///
146/// Note how the parameter is a &[&'a str] (slice).  This is so we can use an
147/// indexable list of grapheme clusters.
148///
149/// TODO: Maybe parameterise over Index trait so we can optimise
150/// for known single char byte sequences in English? What if
151/// the English input has a name with a diacritic?
152fn measure(word: &[&str]) -> usize {
153    let mut measure = 0;
154    let word_length = word.len();
155
156    if word_length == 0{
157        return measure;
158    }
159
160    let mut is_vowel_current = real_vowel(word[0]);
161
162    for index in 1..word_length {
163        let is_vowel = porter_vowel(word, index);
164        if !is_vowel_current && is_vowel {
165            is_vowel_current = true;
166        } else if is_vowel_current && !is_vowel {
167            is_vowel_current = false;
168            measure += 1;
169        }
170    }
171
172    return measure;
173}
174
175/// Order in which to apply rules:
176///
177/// SSES -> SS
178/// IES  -> I
179/// SS -> SS
180/// S  ->
181fn phase_one_a(word: Vec<&str>) -> Vec<&str> {
182    // Move `word` in here where we can make mutable where necessary
183    let word_length = word.len();
184
185    if word.ends_with(&["s", "s", "e", "s"]) || word.ends_with(&["i", "e", "s"]) {
186        let mut word = word;
187        word.truncate(word_length - 2);
188        word
189    } else if word.ends_with(&["s", "s"]) {
190        word
191    } else if word.ends_with(&["s"]) {
192        let mut word = word;
193        word.truncate(word_length - 1);
194        word
195    } else {
196        word
197    }
198}
199
200/// Order in which to apply rules:
201///
202/// measure > 0 ? EED -> EE
203/// *v*         ? ED ->
204/// *v*         ? ING ->
205fn phase_one_b(word: Vec<&str>) -> Vec<&str> {
206    let word_length = word.len();
207
208    if word.ends_with(&["e", "e", "d"]) {
209        if measure(&word[..word_length - 3]) > 0 {
210            let mut word = word;
211            word.truncate(word_length - 1);
212            word
213        } else {
214            word
215        }
216    } else if word.ends_with(&["e", "d"]) {
217        if contains_porter_vowel(&word[..word_length - 2]) {
218            let mut word = word;
219            word.truncate(word_length - 2);
220            phase_one_b_substep(word)
221        } else {
222            word
223        }
224    } else if word.ends_with(&["i", "n", "g"]) {
225        if contains_porter_vowel(&word[..word_length - 3]) {
226            let mut word = word;
227            word.truncate(word_length - 3);
228            phase_one_b_substep(word)
229        } else {
230            word
231        }
232    } else {
233        word
234    }
235}
236
237///
238/// AT -> ATE
239/// BL -> BLE
240/// IZ -> IZE
241///
242/// Contraints apply to whole word in here
243/// *d (double consonant) and not (*L or *S or *Z) -> change to single letter
244///
245/// m=1 and *o (see `ends_star_o`) -> E
246fn phase_one_b_substep(word: Vec<&str>) -> Vec<&str> {
247    let word_length = word.len();
248    if word.ends_with(&["a", "t"]) ||
249       word.ends_with(&["b", "l"]) ||
250       word.ends_with(&["i", "z"]) {
251        let mut word = word;
252        word.push("e");
253        word
254    } else if ends_double_porters_consonant(&word) &&
255              !(word.ends_with(&["l"]) ||
256                word.ends_with(&["s"]) ||
257                word.ends_with(&["z"])) {
258
259        let mut word = word;
260        word.truncate(word_length - 1);
261        word
262
263    } else if measure(&word) == 1 && ends_star_o(&word) {
264        let mut word = word;
265        word.push("e");
266        word
267    } else {
268        word
269    }
270}
271
272///
273/// TODO: Question about "contains* vowel and the 'Y' case (see ignored test on sky)
274/// *v* Y -> I
275fn phase_one_c(word: Vec<&str>) -> Vec<&str> {
276    let word_length = word.len();
277    if contains_porter_vowel(&word) && word.ends_with(&["y"]) {
278        let mut word = word;
279        word[word_length - 1] = "i";
280        word
281    } else {
282        word
283    }
284}
285
286/// For all where the STEM is measure > 0
287/// ATIONAL -> ATE
288/// TIONAL  -> TION
289/// ENCI    -> ENCE
290/// ANCI    -> ANCE
291/// IZER    -> IZE
292/// ABLI    -> ABLE
293/// ALLI    -> AL
294/// ENTLI   -> ENT
295/// ELI     -> E
296/// OUSLI   -> OUS
297/// IZATION -> IZE
298/// ATION   -> ATE
299/// ATOR    -> ATE
300/// ALISM   -> AL
301/// IVENESS -> IVE
302/// FULNESS -> FUL
303/// OUSNESS -> OUS
304/// ALITI   -> AL
305/// IVITI   -> IVE
306/// BILITI  -> BLE
307// TODO: This is a naive implementation - we can definitely be more efficient here by traversing
308// backwards and splitting on the last grapheme rather than searching everything (use a trie to
309// hold the search space)
310fn phase_two(word: Vec<&str>) -> Vec<&str> {
311    let word_length = word.len();
312    if word.ends_with(&["a", "t", "i", "o", "n", "a", "l"]) &&
313        measure(&word[..word_length - 7]) > 0 {
314
315        let mut word = word;
316        word.truncate(word_length - 5);
317        word.push("e");
318        word
319    } else if word.ends_with(&["t", "i", "o", "n", "a", "l"]) &&
320        measure(&word[..word_length - 6]) > 0 {
321
322        let mut word = word;
323        word.truncate(word_length - 2);
324        word
325    } else if word.ends_with(&["e", "n", "c", "i"]) &&
326        measure(&word[..word_length - 4]) > 0 {
327
328        let mut word = word;
329        word[word_length - 1] = "e";
330        word
331    } else if word.ends_with(&["a", "n", "c", "i"]) &&
332        measure(&word[..word_length - 4]) > 0 {
333
334        let mut word = word;
335        word[word_length - 1] = "e";
336        word
337    } else if word.ends_with(&["i", "z", "e", "r"]) &&
338        measure(&word[..word_length - 4]) > 0 {
339
340        let mut word = word;
341        word.truncate(word_length - 1);
342        word
343    } else if word.ends_with(&["a", "b", "l", "i"]) &&
344        measure(&word[..word_length - 4]) > 0 {
345
346        let mut word = word;
347        word[word_length - 1] = "e";
348        word
349    } else if word.ends_with(&["a", "l", "l", "i"]) &&
350        measure(&word[..word_length - 4]) > 0 {
351
352        let mut word = word;
353        word.truncate(word_length - 2);
354        word
355    } else if word.ends_with(&["e", "n", "t", "l", "i"]) &&
356        measure(&word[..word_length - 5]) > 0 {
357
358        let mut word = word;
359        word.truncate(word_length - 2);
360        word
361    } else if word.ends_with(&["e", "l", "i"]) &&
362        measure(&word[..word_length - 3]) > 0 {
363
364        let mut word = word;
365        word.truncate(word_length - 2);
366        word
367    } else if word.ends_with(&["o", "u", "s", "l", "i"]) &&
368        measure(&word[..word_length - 5]) > 0 {
369
370        let mut word = word;
371        word.truncate(word_length - 2);
372        word
373    } else if word.ends_with(&["i", "z", "a", "t", "i", "o", "n"]) &&
374        measure(&word[..word_length - 7]) > 0 {
375
376        let mut word = word;
377        word.truncate(word_length - 5);
378        word.push("e");
379        word
380    } else if word.ends_with(&["a", "t", "i", "o", "n"]) &&
381        measure(&word[..word_length - 5]) > 0 {
382
383        let mut word = word;
384        word.truncate(word_length - 3);
385        word.push("e");
386        word
387    } else if word.ends_with(&["a", "t", "o", "r"]) &&
388        measure(&word[..word_length - 4]) > 0 {
389
390        let mut word = word;
391        word.truncate(word_length - 2);
392        word.push("e");
393        word
394    } else if word.ends_with(&["a", "l", "i", "s", "m"]) &&
395        measure(&word[..word_length - 5]) > 0 {
396
397        let mut word = word;
398        word.truncate(word_length - 3);
399        word
400    } else if word.ends_with(&["i", "v", "e", "n", "e", "s", "s"]) &&
401        measure(&word[..word_length - 7]) > 0 {
402
403        let mut word = word;
404        word.truncate(word_length - 4);
405        word
406    } else if word.ends_with(&["f", "u", "l", "n", "e", "s", "s"]) &&
407        measure(&word[..word_length - 7]) > 0 {
408
409        let mut word = word;
410        word.truncate(word_length - 4);
411        word
412    } else if word.ends_with(&["o", "u", "s", "n", "e", "s", "s"]) &&
413        measure(&word[..word_length - 7]) > 0 {
414
415        let mut word = word;
416        word.truncate(word_length - 4);
417        word
418    } else if word.ends_with(&["a", "l", "i", "t", "i"]) &&
419        measure(&word[..word_length - 5]) > 0 {
420
421        let mut word = word;
422        word.truncate(word_length - 3);
423        word
424    } else if word.ends_with(&["i", "v", "i", "t", "i"]) &&
425        measure(&word[..word_length - 5]) > 0 {
426
427        let mut word = word;
428        word.truncate(word_length - 3);
429        word.push("e");
430        word
431    } else if word.ends_with(&["b", "i", "l", "i", "t", "i"]) &&
432        measure(&word[..word_length - 6]) > 0 {
433
434        let mut word = word;
435        word.truncate(word_length - 5);
436        word.push("l");
437        word.push("e");
438        word
439    } else {
440        word
441    }
442}
443
444/// For all whre the STEM measure is greater than one
445/// ICATE -> IC
446/// ATIVE ->
447/// ALIZE -> AL
448/// ICITI -> IC
449/// ICAL  -> IC
450/// FUL   ->
451/// NESS  ->
452// TODO: see phase_two
453fn phase_three(word: Vec<&str>) -> Vec<&str> {
454    let word_length = word.len();
455    if word.ends_with(&["i", "c", "a", "t", "e"]) &&
456        measure(&word[..word_length - 5]) > 0 {
457
458        let mut word = word;
459        word.truncate(word_length - 3);
460        word
461    } else if word.ends_with(&["a", "t", "i", "v", "e"]) &&
462        measure(&word[..word_length - 5]) > 0 {
463
464        let mut word = word;
465        word.truncate(word_length - 5);
466        word
467    } else if word.ends_with(&["a", "l", "i", "z", "e"]) &&
468        measure(&word[..word_length - 5]) > 0 {
469
470        let mut word = word;
471        word.truncate(word_length - 3);
472        word
473    } else if word.ends_with(&["i", "c", "i", "t", "i"]) &&
474        measure(&word[..word_length - 5]) > 0 {
475
476        let mut word = word;
477        word.truncate(word_length - 3);
478        word
479    } else if word.ends_with(&["i", "c", "a", "l"]) &&
480        measure(&word[..word_length - 4]) > 0 {
481
482        let mut word = word;
483        word.truncate(word_length - 2);
484        word
485    } else if word.ends_with(&["f", "u", "l"]) &&
486        measure(&word[..word_length - 3]) > 0 {
487
488        let mut word = word;
489        word.truncate(word_length - 3);
490        word
491    } else if word.ends_with(&["n", "e", "s", "s"]) &&
492        measure(&word[..word_length - 4]) > 0 {
493
494        let mut word = word;
495        word.truncate(word_length - 4);
496        word
497    } else {
498        word
499    }
500}
501
502fn phase_four(word: Vec<&str>) -> Vec<&str> {
503    let word_length = word.len();
504    if word.ends_with(&["a", "l"]) &&
505        measure(&word[..word_length - 2]) > 1 {
506        let mut word = word;
507        word.truncate(word_length - 2);
508        word
509    } else if word.ends_with(&["a", "n", "c", "e"]) &&
510        measure(&word[..word_length - 4]) > 1 {
511        let mut word = word;
512        word.truncate(word_length - 4);
513        word
514    } else if word.ends_with(&["e", "n", "c", "e"]) &&
515        measure(&word[..word_length - 4]) > 1 {
516        let mut word = word;
517        word.truncate(word_length - 4);
518        word
519    } else if word.ends_with(&["e", "r"]) &&
520        measure(&word[..word_length - 2]) > 1 {
521        let mut word = word;
522        word.truncate(word_length - 2);
523        word
524    } else if word.ends_with(&["i", "c"]) &&
525        measure(&word[..word_length - 2]) > 1 {
526        let mut word = word;
527        word.truncate(word_length - 2);
528        word
529    } else if word.ends_with(&["a", "b", "l", "e"]) &&
530        measure(&word[..word_length - 4]) > 1 {
531        let mut word = word;
532        word.truncate(word_length - 4);
533        word
534    } else if word.ends_with(&["i", "b", "l", "e"]) &&
535        measure(&word[..word_length - 4]) > 1 {
536        let mut word = word;
537        word.truncate(word_length - 4);
538        word
539    } else if word.ends_with(&["a", "n", "t"]) &&
540        measure(&word[..word_length - 3]) > 1 {
541        let mut word = word;
542        word.truncate(word_length - 3);
543        word
544    } else if word.ends_with(&["e", "m", "e", "n", "t"]) &&
545        measure(&word[..word_length - 5]) > 1 {
546        let mut word = word;
547        word.truncate(word_length - 5);
548        word
549    } else if word.ends_with(&["m", "e", "n", "t"]) &&
550        measure(&word[..word_length - 4]) > 1 {
551        let mut word = word;
552        word.truncate(word_length - 4);
553        word
554    } else if word.ends_with(&["e", "n", "t"]) &&
555        measure(&word[..word_length - 3]) > 1 {
556        let mut word = word;
557        word.truncate(word_length - 3);
558        word
559    } else if word.ends_with(&["i", "o", "n"]) &&
560        measure(&word[..word_length - 3]) > 1 {
561
562        let last_grapheme_in_stem = word[word_length - 4];
563        if last_grapheme_in_stem == "s" || last_grapheme_in_stem == "t" {
564            let mut word = word;
565            word.truncate(word_length - 3);
566            word
567        } else {
568            word
569        }
570    } else if word.ends_with(&["o", "u"]) &&
571        measure(&word[..word_length - 2]) > 1 {
572        let mut word = word;
573        word.truncate(word_length - 2);
574        word
575    } else if word.ends_with(&["i", "s", "m"]) &&
576        measure(&word[..word_length - 3]) > 1 {
577        let mut word = word;
578        word.truncate(word_length - 3);
579        word
580    } else if word.ends_with(&["a", "t", "e"]) &&
581        measure(&word[..word_length - 3]) > 1 {
582        let mut word = word;
583        word.truncate(word_length - 3);
584        word
585    } else if word.ends_with(&["i", "t", "i"]) &&
586        measure(&word[..word_length - 3]) > 1 {
587        let mut word = word;
588        word.truncate(word_length - 3);
589        word
590    } else if word.ends_with(&["o", "u", "s"]) &&
591        measure(&word[..word_length - 3]) > 1 {
592        let mut word = word;
593        word.truncate(word_length - 3);
594        word
595    } else if word.ends_with(&["i", "v", "e"]) &&
596        measure(&word[..word_length - 3]) > 1 {
597        let mut word = word;
598        word.truncate(word_length - 3);
599        word
600    } else if word.ends_with(&["i", "z", "e"]) &&
601        measure(&word[..word_length - 3]) > 1 {
602        let mut word = word;
603        word.truncate(word_length - 3);
604        word
605    } else {
606        word
607    }
608
609}
610
611fn phase_5a(word: Vec<&str>) -> Vec<&str> {
612    let word_length = word.len();
613
614    if word.ends_with(&["e"]) &&
615        measure(&word[..word_length - 1]) > 1 {
616        let mut word = word;
617        word.truncate(word_length - 1);
618        word
619    } else if word.ends_with(&["e"]) &&
620        measure(&word[..word_length - 1]) == 1 &&
621        !ends_star_o(&word[..word_length - 1]) {
622
623        let mut word = word;
624        word.truncate(word_length - 1);
625        word
626    } else {
627        word
628    }
629}
630
631fn phase_5b(word: Vec<&str>) -> Vec<&str> {
632    let word_length = word.len();
633    if word.ends_with(&["l"]) &&
634        measure(&word) > 1 &&
635        ends_double_porters_consonant(&word) {
636
637        let mut word = word;
638        word.truncate(word_length - 1);
639        word
640    } else {
641        word
642    }
643}
644
645#[cfg(test)]
646mod tests {
647    use super::*;
648
649    #[test]
650    fn test_real_vowel() {
651        assert!(real_vowel("a"));
652        assert!(real_vowel("e"));
653        assert!(real_vowel("i"));
654        assert!(real_vowel("o"));
655        assert!(real_vowel("u"));
656        assert!(!real_vowel("b"));
657    }
658
659    #[test]
660    fn test_real_consonant() {
661        assert!(!real_consonant("a"));
662        assert!(!real_consonant("e"));
663        assert!(!real_consonant("i"));
664        assert!(!real_consonant("o"));
665        assert!(!real_consonant("u"));
666        assert!(real_consonant("b"));
667    }
668
669    fn tokenise<'a>(input: &'a str) -> Vec<&'a str> {
670        use unicode_segmentation::UnicodeSegmentation;
671        input.graphemes(true).collect::<Vec<&'a str>>()
672    }
673
674    fn assert_fn<'a>(f: fn(Vec<&'a str>) -> Vec<&'a str>, input: &'a str, expected: &'a str) {
675        let input = tokenise(input);
676        let expected = tokenise(expected);
677
678        assert_eq!(&f(input), &expected);
679    }
680
681
682    #[test]
683    fn test_porter_character_types() {
684        let graphemes = tokenise("toy");
685
686        assert!(porter_consonant(&graphemes, 0));
687        assert!(porter_vowel(&graphemes, 1));
688        assert!(porter_consonant(&graphemes, 2));
689
690        let graphemes = tokenise("syzygy");
691        assert!(porter_consonant(&graphemes, 0));
692        assert!(porter_vowel(&graphemes, 1));
693        assert!(porter_consonant(&graphemes, 2));
694        assert!(porter_vowel(&graphemes, 3));
695        assert!(porter_consonant(&graphemes, 4));
696        assert!(porter_vowel(&graphemes, 5));
697    }
698
699    #[test]
700    fn test_ends_double_porters_consonant() {
701    let graphemes = tokenise("sell");
702    assert!(ends_double_porters_consonant(&graphemes));
703
704    let graphemes = tokenise("greyy");
705    assert!(!ends_double_porters_consonant(&graphemes));
706
707    let graphemes = tokenise("see");
708    assert!(!ends_double_porters_consonant(&graphemes));
709    }
710
711    #[test]
712    fn test_contains_vowel() {
713        let graphemes = tokenise("toy");
714        assert!(contains_porter_vowel(&graphemes));
715
716        let graphemes = tokenise("syzygy");
717        assert!(contains_porter_vowel(&graphemes));
718
719        let graphemes = tokenise("trjk");
720        assert!(!contains_porter_vowel(&graphemes));
721    }
722
723    #[test]
724    fn test_ends_star_o() {
725        let graphemes = tokenise("awhil");
726        assert!(ends_star_o(&graphemes));
727
728        let graphemes = tokenise("mix");
729        assert!(!ends_star_o(&graphemes));
730
731        let graphemes = tokenise("dew");
732        assert!(!ends_star_o(&graphemes));
733
734        let graphemes = tokenise("day");
735        assert!(!ends_star_o(&graphemes));
736    }
737
738    #[test]
739    fn test_measure() {
740        let graphemes = tokenise("crepuscular");
741        assert_eq!(4, measure(&graphemes[..]));
742
743        let graphemes = tokenise("bacon");
744        assert_eq!(2, measure(&graphemes[..]));
745
746        let graphemes = tokenise("abacus");
747        assert_eq!(3, measure(&graphemes[..]));
748
749
750        let graphemes = tokenise("paackkeeer");
751        assert_eq!(2, measure(&graphemes[..]));
752
753        let graphemes = tokenise("syzygy");
754        assert_eq!(2, measure(&graphemes[..]));
755
756    }
757
758    #[test]
759    fn test_phase_one() {
760        assert_fn(phase_one_a, "caresses", "caress");
761        assert_fn(phase_one_a, "caress", "caress");
762        assert_fn(phase_one_a, "ponies", "poni");
763        assert_fn(phase_one_a, "cats", "cat");
764    }
765
766    #[test]
767    fn test_phase_one_b() {
768        assert_fn(phase_one_b, "feed", "feed");
769        assert_fn(phase_one_b, "agreed", "agree");
770        assert_fn(phase_one_b, "plastered", "plaster");
771        assert_fn(phase_one_b, "bled", "bled");
772        assert_fn(phase_one_b, "motoring", "motor");
773        assert_fn(phase_one_b, "sing", "sing");
774    }
775
776    #[test]
777    fn test_phase_one_b_substep() {
778        assert_fn(phase_one_b_substep, "conflat", "conflate");
779        assert_fn(phase_one_b_substep, "troubl", "trouble");
780        assert_fn(phase_one_b_substep, "siz", "size");
781        assert_fn(phase_one_b_substep, "hopp", "hop");
782        assert_fn(phase_one_b_substep, "hiss", "hiss");
783        assert_fn(phase_one_b_substep, "fizz", "fizz");
784        assert_fn(phase_one_b_substep, "fall", "fall");
785        assert_fn(phase_one_b_substep, "fail", "fail");
786        assert_fn(phase_one_b_substep, "fil", "file");
787    }
788
789    #[test]
790    fn test_phase_one_c() {
791        assert_fn(phase_one_c, "happy", "happi");
792    }
793
794    #[test]
795    #[ignore]
796    fn test_phase_one_c_sky() {
797        assert_fn(phase_one_c, "sky", "sky");
798    }
799
800    #[test]
801    fn test_phase_two() {
802        assert_fn(phase_two, "relational", "relate");
803        assert_fn(phase_two, "conditional", "condition");
804        assert_fn(phase_two, "rational", "rational");
805        assert_fn(phase_two, "valenci", "valence");
806        assert_fn(phase_two, "hesitanci", "hesitance");
807        assert_fn(phase_two, "digitizer", "digitize");
808        assert_fn(phase_two, "conformabli", "conformable");
809        assert_fn(phase_two, "radicalli", "radical");
810        assert_fn(phase_two, "differentli", "different");
811        assert_fn(phase_two, "vileli", "vile");
812        assert_fn(phase_two, "analogousli", "analogous");
813        assert_fn(phase_two, "vietnamization", "vietnamize");
814        assert_fn(phase_two, "predication", "predicate");
815        assert_fn(phase_two, "operator", "operate");
816        assert_fn(phase_two, "feudalism", "feudal");
817        assert_fn(phase_two, "decisiveness", "decisive");
818        assert_fn(phase_two, "hopefulness", "hopeful");
819        assert_fn(phase_two, "callousness", "callous");
820        assert_fn(phase_two, "formaliti", "formal");
821        assert_fn(phase_two, "sensitiviti", "sensitive");
822        assert_fn(phase_two, "sensibiliti", "sensible");
823    }
824
825    #[test]
826    fn test_phase_three() {
827        assert_fn(phase_three, "triplicate", "triplic");
828        assert_fn(phase_three, "formative", "form");
829        assert_fn(phase_three, "formalize", "formal");
830        assert_fn(phase_three, "electriciti", "electric");
831        assert_fn(phase_three, "electrical", "electric");
832        assert_fn(phase_three, "hopeful", "hope");
833        assert_fn(phase_three, "goodness", "good");
834    }
835
836    #[test]
837    fn test_phase_four() {
838        assert_fn(phase_four, "revival", "reviv");
839        assert_fn(phase_four, "allowance", "allow");
840        assert_fn(phase_four, "inference", "infer");
841        assert_fn(phase_four, "airliner", "airlin");
842        assert_fn(phase_four, "gyroscopic", "gyroscop");
843        assert_fn(phase_four, "adjustable", "adjust");
844        assert_fn(phase_four, "defensible", "defens");
845        assert_fn(phase_four, "irritant", "irrit");
846        assert_fn(phase_four, "replacement", "replac");
847        assert_fn(phase_four, "adjustment", "adjust");
848        assert_fn(phase_four, "dependent", "depend");
849        assert_fn(phase_four, "adoption", "adopt");
850        assert_fn(phase_four, "homologou", "homolog");
851        assert_fn(phase_four, "communism", "commun");
852        assert_fn(phase_four, "activate", "activ");
853        assert_fn(phase_four, "angulariti", "angular");
854        assert_fn(phase_four, "homologous", "homolog");
855        assert_fn(phase_four, "effective", "effect");
856        assert_fn(phase_four, "bowdlerize", "bowdler");
857    }
858
859    #[test]
860    fn test_phase_five_a() {
861        // 5a
862        assert_fn(phase_5a, "probate", "probat");
863        assert_fn(phase_5a, "rate", "rate");
864        assert_fn(phase_5a, "cease", "ceas");
865    }
866
867    #[test]
868    fn test_phase_five_b() {
869        // 5b
870        assert_fn(phase_5b, "controll", "control");
871        assert_fn(phase_5b, "roll", "roll");
872    }
873
874    #[test]
875    fn test_stem_tokenized() {
876        assert_fn(stem_tokenized, "surveillance", "surveil");
877    }
878}