gchdb 0.2.8

Provides a record abstraction for storing chat records extracted by different chat software, and provides full-text search feature
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
use cang_jie::{CangJieTokenizer, TokenizerOption};
// use lindera_tantivy::tokenizer::LinderaTokenizer;
use tantivy::tokenizer::TokenizerManager;

pub use cang_jie::CANG_JIE as LANG_CN;
// pub const LANG_JP: &str = "lindera";

pub fn tokenizers_register(tokenizers: &TokenizerManager) {
    tokenizers.register(
        LANG_CN,
        CangJieTokenizer {
            option: TokenizerOption::ForSearch { hmm: true },
            ..Default::default()
        },
    );
    // tokenizers.register(LANG_JP, LinderaTokenizer::new("decompose", ""));
}