pub struct Trie<T> { /* private fields */ }Expand description
Trie 树,用于存储词典
§Examples
use char_trie::Trie;
let mut trie = Trie::default();
trie.insert("中国人", "cns");
assert_eq!(trie.get("中国人"), Some("cns").as_ref());
assert_eq!(trie.get("中国"), None);Implementations§
Source§impl<T> Trie<T>
impl<T> Trie<T>
Sourcepub fn new_big() -> Self
pub fn new_big() -> Self
创建一个新的 Trie 树, 对于超大词典作了优化,可以有效加速词典加载速度,但是较耗费空间
§Examples
use char_trie::Trie;
let mut trie = Trie::new_big();
trie.insert("中国人", "cns");
assert_eq!(trie.get("中国人"), Some("cns").as_ref());
assert_eq!(trie.get("中国"), None);Examples found in repository?
examples/all_seg.rs (line 7)
5pub fn main() {
6 // let mut trie = Trie::default();
7 let mut trie = Trie::new_big();
8
9 let start = std::time::Instant::now();
10
11 for line in BufReader::new(File::open("dict/default.dic").unwrap()).lines() {
12 let line = line.unwrap();
13 let parts: Vec<&str> = line.split('\t').collect();
14 if parts.len() == 3 {
15 trie.insert(
16 parts[0],
17 (parts[1].to_string(), parts[2].parse::<i32>().unwrap()),
18 );
19 }
20 }
21
22 trie.insert("中国人", (String::from("ud"), 10000));
23
24 println!("load dict use {:?}", start.elapsed());
25
26 let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
27
28 for token in trie.iter_all(text) {
29 println!("{:?}", token);
30 }
31
32 let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
33 println!("{:?}", c);
34}Sourcepub fn insert(&mut self, key: &str, value: T)
pub fn insert(&mut self, key: &str, value: T)
插入一个词到trie树中 key 词 value 词的值
§Examples
use char_trie::Trie;
let mut trie = Trie::default();
trie.insert("中国人", "cns");
assert_eq!(trie.get("中国人"), Some("cns").as_ref());
assert_eq!(trie.get("中国"), None);Examples found in repository?
examples/all_seg.rs (lines 15-18)
5pub fn main() {
6 // let mut trie = Trie::default();
7 let mut trie = Trie::new_big();
8
9 let start = std::time::Instant::now();
10
11 for line in BufReader::new(File::open("dict/default.dic").unwrap()).lines() {
12 let line = line.unwrap();
13 let parts: Vec<&str> = line.split('\t').collect();
14 if parts.len() == 3 {
15 trie.insert(
16 parts[0],
17 (parts[1].to_string(), parts[2].parse::<i32>().unwrap()),
18 );
19 }
20 }
21
22 trie.insert("中国人", (String::from("ud"), 10000));
23
24 println!("load dict use {:?}", start.elapsed());
25
26 let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
27
28 for token in trie.iter_all(text) {
29 println!("{:?}", token);
30 }
31
32 let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
33 println!("{:?}", c);
34}More examples
examples/front_max.rs (lines 13-16)
5pub fn main() {
6 let mut trie = Trie::default();
7 let start = std::time::Instant::now();
8
9 for line in BufReader::new(File::open("dict/default.dic").unwrap()).lines() {
10 let line = line.unwrap();
11 let parts: Vec<&str> = line.split('\t').collect();
12 if parts.len() == 3 {
13 trie.insert(
14 parts[0],
15 (parts[1].to_string(), parts[2].parse::<i32>().unwrap()),
16 );
17 }
18 }
19
20 println!("load dict use {:?}", start.elapsed());
21
22 let file = std::fs::read_to_string("dict/big_text.txt").unwrap();
23
24 let start = std::time::Instant::now();
25 let mut len = 0;
26
27 file.lines().for_each(|line| {
28 let result: Vec<_> = trie.iter_max(line).map(|t| t.0).collect();
29 len += result.len();
30 println!("{:?}", result);
31 });
32
33 println!("text parse token:{} use {:?}", len, start.elapsed());
34}examples/example.rs (line 6)
3pub fn main() {
4 let mut trie = Trie::default();
5
6 trie.insert("中国人", String::from("ud"));
7 trie.insert("中国", String::from("ud"));
8 trie.insert("我", String::from("ud"));
9 trie.insert("是", String::from("ud"));
10 trie.insert("爱", String::from("ud"));
11 trie.insert("北京", String::from("ud"));
12 trie.insert("天安门", String::from("ud"));
13 trie.insert("天安", String::from("ud"));
14 trie.insert("安门", String::from("ud"));
15 trie.insert("上", String::from("ud"));
16 trie.insert("太阳", String::from("ud"));
17 trie.insert("升", String::from("ud"));
18
19 let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
20
21 let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
22 //["我", "爱", "北京", "天安", "天安门", "安门", "天安", "天安门", "安门", "上", "太阳", "升", "我", "是", "中国", "中国人", "我", "爱", "中国"]
23 println!("{:?}", c);
24
25 let c: Vec<_> = trie.iter_max(text).map(|t| t.0).collect();
26
27 //["我", "爱", "北京", "天安门", "天安门", "上", "太阳", "升", "我", "是", "中国人", "我", "爱", "中国"]
28 println!("{:?}", c);
29}pub fn get(&self, key: &str) -> Option<&T>
pub fn char_get(&self, c: char) -> Option<&Self>
Sourcepub fn iter_all<'a>(&'a self, text: &'a str) -> AllTokenizer<'a, T> ⓘ
pub fn iter_all<'a>(&'a self, text: &'a str) -> AllTokenizer<'a, T> ⓘ
实现了全词匹配, 如词典中包含 【中国,国人,中国人】 三个词,那么对于文本 “我是中国人” 将返回 [中国,国人,中国人]
§Examples
use char_trie::Trie;
let mut trie = Trie::default();
trie.insert("中国人", "cns");
trie.insert("中国", "cn");
trie.insert("国人", "gr");
let text = "我是中国人";
let tokens: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
assert_eq!(tokens, vec!["中国", "中国人", "国人"]);Examples found in repository?
examples/all_seg.rs (line 28)
5pub fn main() {
6 // let mut trie = Trie::default();
7 let mut trie = Trie::new_big();
8
9 let start = std::time::Instant::now();
10
11 for line in BufReader::new(File::open("dict/default.dic").unwrap()).lines() {
12 let line = line.unwrap();
13 let parts: Vec<&str> = line.split('\t').collect();
14 if parts.len() == 3 {
15 trie.insert(
16 parts[0],
17 (parts[1].to_string(), parts[2].parse::<i32>().unwrap()),
18 );
19 }
20 }
21
22 trie.insert("中国人", (String::from("ud"), 10000));
23
24 println!("load dict use {:?}", start.elapsed());
25
26 let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
27
28 for token in trie.iter_all(text) {
29 println!("{:?}", token);
30 }
31
32 let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
33 println!("{:?}", c);
34}More examples
examples/example.rs (line 21)
3pub fn main() {
4 let mut trie = Trie::default();
5
6 trie.insert("中国人", String::from("ud"));
7 trie.insert("中国", String::from("ud"));
8 trie.insert("我", String::from("ud"));
9 trie.insert("是", String::from("ud"));
10 trie.insert("爱", String::from("ud"));
11 trie.insert("北京", String::from("ud"));
12 trie.insert("天安门", String::from("ud"));
13 trie.insert("天安", String::from("ud"));
14 trie.insert("安门", String::from("ud"));
15 trie.insert("上", String::from("ud"));
16 trie.insert("太阳", String::from("ud"));
17 trie.insert("升", String::from("ud"));
18
19 let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
20
21 let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
22 //["我", "爱", "北京", "天安", "天安门", "安门", "天安", "天安门", "安门", "上", "太阳", "升", "我", "是", "中国", "中国人", "我", "爱", "中国"]
23 println!("{:?}", c);
24
25 let c: Vec<_> = trie.iter_max(text).map(|t| t.0).collect();
26
27 //["我", "爱", "北京", "天安门", "天安门", "上", "太阳", "升", "我", "是", "中国人", "我", "爱", "中国"]
28 println!("{:?}", c);
29}Sourcepub fn iter_max<'a>(&'a self, text: &'a str) -> MaxFrontTokenizer<'a, T> ⓘ
pub fn iter_max<'a>(&'a self, text: &'a str) -> MaxFrontTokenizer<'a, T> ⓘ
实现了正向最大匹配, 如词典中包含 【中国,国人,中国人】 三个词,那么对于文本 “我是中国人” 将返回 [中国人]
§Examples
use char_trie::Trie;
let mut trie = Trie::default();
trie.insert("中国人", "cns");
trie.insert("中国", "cn");
trie.insert("国人", "gr");
let text = "我是中国人";
let tokens: Vec<_> = trie.iter_max(text).map(|t| t.0).collect();
assert_eq!(tokens, vec!["中国人"]);Examples found in repository?
examples/front_max.rs (line 28)
5pub fn main() {
6 let mut trie = Trie::default();
7 let start = std::time::Instant::now();
8
9 for line in BufReader::new(File::open("dict/default.dic").unwrap()).lines() {
10 let line = line.unwrap();
11 let parts: Vec<&str> = line.split('\t').collect();
12 if parts.len() == 3 {
13 trie.insert(
14 parts[0],
15 (parts[1].to_string(), parts[2].parse::<i32>().unwrap()),
16 );
17 }
18 }
19
20 println!("load dict use {:?}", start.elapsed());
21
22 let file = std::fs::read_to_string("dict/big_text.txt").unwrap();
23
24 let start = std::time::Instant::now();
25 let mut len = 0;
26
27 file.lines().for_each(|line| {
28 let result: Vec<_> = trie.iter_max(line).map(|t| t.0).collect();
29 len += result.len();
30 println!("{:?}", result);
31 });
32
33 println!("text parse token:{} use {:?}", len, start.elapsed());
34}More examples
examples/example.rs (line 25)
3pub fn main() {
4 let mut trie = Trie::default();
5
6 trie.insert("中国人", String::from("ud"));
7 trie.insert("中国", String::from("ud"));
8 trie.insert("我", String::from("ud"));
9 trie.insert("是", String::from("ud"));
10 trie.insert("爱", String::from("ud"));
11 trie.insert("北京", String::from("ud"));
12 trie.insert("天安门", String::from("ud"));
13 trie.insert("天安", String::from("ud"));
14 trie.insert("安门", String::from("ud"));
15 trie.insert("上", String::from("ud"));
16 trie.insert("太阳", String::from("ud"));
17 trie.insert("升", String::from("ud"));
18
19 let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
20
21 let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
22 //["我", "爱", "北京", "天安", "天安门", "安门", "天安", "天安门", "安门", "上", "太阳", "升", "我", "是", "中国", "中国人", "我", "爱", "中国"]
23 println!("{:?}", c);
24
25 let c: Vec<_> = trie.iter_max(text).map(|t| t.0).collect();
26
27 //["我", "爱", "北京", "天安门", "天安门", "上", "太阳", "升", "我", "是", "中国人", "我", "爱", "中国"]
28 println!("{:?}", c);
29}Trait Implementations§
Auto Trait Implementations§
impl<T> Freeze for Trie<T>where
T: Freeze,
impl<T> RefUnwindSafe for Trie<T>where
T: RefUnwindSafe,
impl<T> Send for Trie<T>where
T: Send,
impl<T> Sync for Trie<T>where
T: Sync,
impl<T> Unpin for Trie<T>where
T: Unpin,
impl<T> UnwindSafe for Trie<T>where
T: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more