ik_rs/core/
cjk_segmenter.rs

1use crate::core::char_util::{utf8_len, CharType};
2use crate::core::lexeme::{Lexeme, LexemeType};
3use crate::core::ordered_linked_list::OrderedLinkedList;
4use crate::core::segmentor::Segmenter;
5use crate::dict::dictionary::GLOBAL_DICT;
6
7const SEGMENTER_NAME: &str = "CJK_SEGMENTER";
8
9#[derive(Default, Debug)]
10pub struct CJKSegmenter {}
11
12impl Segmenter for CJKSegmenter {
13    fn analyze(
14        &mut self,
15        input: &str,
16        cursor: usize,
17        curr_char_type: &CharType,
18        origin_lexemes: &mut OrderedLinkedList<Lexeme>,
19    ) {
20        match curr_char_type {
21            CharType::USELESS => {}
22            _ => {
23                let char_count = utf8_len(input);
24                let lock_guard = {
25                    cfg_if::cfg_if! {
26                        if #[cfg(feature="use-parking-lot")] {GLOBAL_DICT.read()}
27                        else /*if #[cfg(feature="use-std-sync")]*/ {
28                            match GLOBAL_DICT.read() {
29                                Err(_err) => return,
30                                Ok(lck) => lck
31                            }
32                        }
33                    }
34                };
35                let hits =
36                    lock_guard.match_in_main_dict_with_offset(input, cursor, char_count - cursor);
37                for hit in hits.iter() {
38                    if hit.is_match() {
39                        let new_lexeme = Lexeme::new(hit.pos(), LexemeType::CNWORD);
40                        origin_lexemes.insert(new_lexeme);
41                    }
42                }
43            }
44        }
45    }
46
47    fn name(&self) -> &str {
48        return SEGMENTER_NAME;
49    }
50}