1use std::borrow::Cow;
2use std::mem;
3use tantivy_tokenizer_api::{Token, TokenFilter, TokenStream, Tokenizer};
4
5mod snowball;
6
7pub mod algorithms;
8
9#[derive(Clone)]
14pub struct StemmerTokenizer {
15 algorithm: algorithms::Algorithm,
16}
17
18impl StemmerTokenizer {
19 pub fn new(algorithm: algorithms::Algorithm) -> StemmerTokenizer {
21 StemmerTokenizer { algorithm }
22 }
23}
24
25#[cfg(feature = "english_porter_2")]
26impl Default for StemmerTokenizer {
27 fn default() -> Self {
29 StemmerTokenizer::new(algorithms::english_porter_2)
30 }
31}
32
33impl TokenFilter for StemmerTokenizer {
34 type Tokenizer<T: Tokenizer> = StemmerFilter<T>;
35
36 fn transform<T: Tokenizer>(self, tokenizer: T) -> StemmerFilter<T> {
37 StemmerFilter {
38 algorithm: self.algorithm,
39 inner: tokenizer,
40 }
41 }
42}
43
44#[derive(Clone)]
45pub struct StemmerFilter<T> {
46 algorithm: algorithms::Algorithm,
47 inner: T,
48}
49
50impl<T: Tokenizer> Tokenizer for StemmerFilter<T> {
51 type TokenStream<'a> = StemmerTokenStream<T::TokenStream<'a>>;
52
53 fn token_stream<'a>(&'a mut self, text: &'a str) -> Self::TokenStream<'a> {
54 StemmerTokenStream {
55 tail: self.inner.token_stream(text),
56 buffer: String::new(),
57 algorithm: self.algorithm,
58 }
59 }
60}
61
62pub struct StemmerTokenStream<T> {
63 tail: T,
64 buffer: String,
65 algorithm: algorithms::Algorithm,
66}
67
68impl<T: TokenStream> TokenStream for StemmerTokenStream<T> {
69 fn advance(&mut self) -> bool {
70 if !self.tail.advance() {
71 return false;
72 }
73
74 let token = self.tail.token_mut();
75
76 match (self.algorithm)(&token.text) {
77 Cow::Owned(stemmed_str) => token.text = stemmed_str,
78 Cow::Borrowed(stemmed_str) => {
79 self.buffer.clear();
80 self.buffer.push_str(stemmed_str);
81 mem::swap(&mut token.text, &mut self.buffer);
82 }
83 }
84
85 true
86 }
87
88 fn token(&self) -> &Token {
89 self.tail.token()
90 }
91
92 fn token_mut(&mut self) -> &mut Token {
93 self.tail.token_mut()
94 }
95}