summa 0.8.8

Fast full-text search server
1
2
3
4
5
6
7
8
9
10
11
12
13
use super::summa_tokenizer::SummaTokenizer;
use crate::configs::IndexConfig;
use tantivy::tokenizer::{LowerCaser, RemoveLongFilter, SimpleTokenizer, StopWordFilter, TextAnalyzer};

pub fn default_tokenizers(index_config: &IndexConfig) -> [(String, TextAnalyzer); 2] {
    let mut summa_tokenizer = TextAnalyzer::from(SummaTokenizer).filter(RemoveLongFilter::limit(100)).filter(LowerCaser);
    let mut default_tokenizer = TextAnalyzer::from(SimpleTokenizer).filter(RemoveLongFilter::limit(100)).filter(LowerCaser);
    if let Some(stop_words) = index_config.stop_words.as_ref().cloned() {
        summa_tokenizer = summa_tokenizer.filter(StopWordFilter::remove(stop_words.clone()));
        default_tokenizer = default_tokenizer.filter(StopWordFilter::remove(stop_words));
    }
    [("summa".to_owned(), summa_tokenizer), ("default".to_owned(), default_tokenizer)]
}