ik_rs/core/
cjk_segmenter.rs1use crate::core::char_util::{utf8_len, CharType};
2use crate::core::lexeme::{Lexeme, LexemeType};
3use crate::core::ordered_linked_list::OrderedLinkedList;
4use crate::core::segmentor::Segmenter;
5use crate::dict::dictionary::GLOBAL_DICT;
6
7const SEGMENTER_NAME: &str = "CJK_SEGMENTER";
8
9#[derive(Default, Debug)]
10pub struct CJKSegmenter {}
11
12impl Segmenter for CJKSegmenter {
13 fn analyze(
14 &mut self,
15 input: &str,
16 cursor: usize,
17 curr_char_type: &CharType,
18 origin_lexemes: &mut OrderedLinkedList<Lexeme>,
19 ) {
20 match curr_char_type {
21 CharType::USELESS => {}
22 _ => {
23 let char_count = utf8_len(input);
24 let lock_guard = {
25 cfg_if::cfg_if! {
26 if #[cfg(feature="use-parking-lot")] {GLOBAL_DICT.read()}
27 else {
28 match GLOBAL_DICT.read() {
29 Err(_err) => return,
30 Ok(lck) => lck
31 }
32 }
33 }
34 };
35 let hits =
36 lock_guard.match_in_main_dict_with_offset(input, cursor, char_count - cursor);
37 for hit in hits.iter() {
38 if hit.is_match() {
39 let new_lexeme = Lexeme::new(hit.pos(), LexemeType::CNWORD);
40 origin_lexemes.insert(new_lexeme);
41 }
42 }
43 }
44 }
45 }
46
47 fn name(&self) -> &str {
48 return SEGMENTER_NAME;
49 }
50}