1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
#![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] #![allow(non_snake_case)] #![allow(dead_code)] use std::os::raw::c_void; use std::os::raw::c_char; use std::os::raw::c_int; type TokenizeMode = c_int; const DefaultMode: TokenizeMode = 0; const SearchMode: TokenizeMode = 1; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct Word { pub offset: usize, pub len : usize } #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct CWordWeight { pub word: *mut c_char, pub weight: f64 } extern "C" { pub fn NewJieba( dict_path: *const c_char, hmm_path: *const c_char, user_dict: *const c_char, idf_path: *const c_char, stop_words_path: *const c_char, ) -> *mut c_void; fn FreeJieba(handle: *mut c_void); fn FreeWords(words: *mut *mut c_char); fn Cut(handle: *mut c_void, sentence: *const c_char, is_hmm_used: c_int) -> *mut *mut c_char; fn CutAll(handle: *mut c_void, sentence: *const c_char) -> *mut *mut c_char; fn CutForSearch(handle: *mut c_void, sentence: *const c_char, is_hmm_used: c_int) -> *mut *mut c_char; fn Tag(handle: *mut c_void, sentence: *const c_char) -> *mut *mut c_char; fn AddWord(handle: *mut c_void, word: *const c_char); fn Tokenize(x: *mut c_void, sentence: *const c_char, mode: TokenizeMode , is_hmm_used: c_int) -> *mut Word; fn Extract(handle: *mut c_void, sentence: *const c_char, top_k: c_int) -> *mut *mut c_char; fn ExtractWithWeight(handle: *mut c_void, sentence: *const c_char, top_k: c_int) -> *mut CWordWeight; fn FreeWordWeights(wws: *mut CWordWeight); } use std::ffi::CString; #[derive(Debug)] pub struct Jieba { inner: *mut c_void, } pub struct JiebaDict { dict: CString, hmm: CString, user: CString, idf: CString, stop: CString } impl JiebaDict { pub fn new(dict: &str, hmm: &str, user_dict: &str, idf_path: &str, stop_path: &str) -> Self { JiebaDict { dict: CString::new(dict).expect("Can not parser dict path!"), hmm: CString::new(hmm).expect("Can not parser hmm model path!"), user: CString::new(user_dict).expect("Can not parser user dict path!"), idf: CString::new(idf_path).expect("Can not parser idf path!"), stop: CString::new(stop_path).expect("Can not parser stop words path!") } } } impl Jieba { pub fn with_dict(dict: JiebaDict) -> Self { unsafe { Jieba { inner: NewJieba( dict.dict.as_ptr(), dict.hmm.as_ptr(), dict.user.as_ptr(), dict.idf.as_ptr(), dict.stop.as_ptr() ) } } } pub fn cut(&self, sentence: &str, is_hmm_used: bool) -> Vec<String> { let cstr = CString::new(sentence).unwrap(); let sentence_ptr = cstr.as_ptr(); let words_ptr = unsafe { Cut(self.inner, sentence_ptr, is_hmm_used as c_int) }; convert_ptr_vec(words_ptr) } pub fn cut_all(&self, sentence: &str) -> Vec<String> { let cstr = CString::new(sentence).unwrap(); let sentence_ptr = cstr.as_ptr(); let words_ptr = unsafe { CutAll(self.inner, sentence_ptr) }; convert_ptr_vec(words_ptr) } pub fn cut_for_search(&self, sentence: &str, is_hmm_used: bool) -> Vec<String> { let cstr = CString::new(sentence).unwrap(); let sentence_ptr = cstr.as_ptr(); let words_ptr = unsafe { CutForSearch(self.inner, sentence_ptr, is_hmm_used as c_int) }; convert_ptr_vec(words_ptr) } pub fn tag(&self, sentence: &str) -> Vec<String> { let cstr = CString::new(sentence).unwrap(); let sentence_ptr = cstr.as_ptr(); let words_ptr = unsafe { Tag(self.inner, sentence_ptr) }; convert_ptr_vec(words_ptr) } pub fn add_word(&self, word: &str) { let cstr = CString::new(word).unwrap(); let word_ptr = cstr.as_ptr(); unsafe { AddWord(self.inner, word_ptr); } } } impl Drop for Jieba { fn drop(&mut self) { unsafe { FreeJieba(self.inner); } } } fn convert_ptr_vec(words_ptr: *mut *mut c_char) -> Vec<String> { let mut vec_str = Vec::new(); unsafe { let mut idx = 0; while !words_ptr.offset(idx).is_null() && !(*(words_ptr.offset(idx))).is_null() { vec_str.push( CString::from_raw(*(words_ptr.offset(idx))).into_string().unwrap() ); idx += 1; } FreeWords(words_ptr); } vec_str }