reefdb/indexes/fts/tokenizers/
default.rs

1use serde::{Deserialize, Serialize};
2
3use super::tokenizer::Tokenizer;
4
5#[derive(Debug, Serialize, Deserialize, Clone)]
6pub struct DefaultTokenizer;
7
8impl Tokenizer for DefaultTokenizer {
9    fn tokenize<'a>(&self, text: &'a str) -> Box<dyn Iterator<Item = &'a str> + 'a> {
10        Box::new(text.split_whitespace())
11    }
12
13    fn new() -> Self {
14        DefaultTokenizer
15    }
16}
17
18#[cfg(test)]
19mod tests {
20    #[test]
21    fn tokenizer_test() {
22        use super::Tokenizer;
23        let tokenizer = super::DefaultTokenizer::new();
24        let tokens: Vec<&str> = tokenizer.tokenize("Hello World").collect();
25        assert_eq!(tokens, vec!["Hello", "World"]);
26    }
27}