cang-jie(仓颉)
A Chinese tokenizer for tantivy, based on jieba-rs.
As of now, only support UTF-8.
Example
let mut schema_builder = default;
let text_indexing = default
.set_tokenizer // Set custom tokenizer
.set_index_option;
let text_options = default
.set_indexing_options
.set_stored;
// ... Some code
let index = create?;
let tokenizer = CangJieTokenizer ;
index.tokenizers.register;
// ... Some code