Trie

Struct Trie 

Source
pub struct Trie<T> { /* private fields */ }
Expand description

Trie 树,用于存储词典

§Examples

use char_trie::Trie;
let mut trie = Trie::default();
trie.insert("中国人", "cns");
assert_eq!(trie.get("中国人"), Some("cns").as_ref());
assert_eq!(trie.get("中国"), None);

Implementations§

Source§

impl<T> Trie<T>

Source

pub fn new_big() -> Self

创建一个新的 Trie 树, 对于超大词典作了优化,可以有效加速词典加载速度,但是较耗费空间

§Examples
use char_trie::Trie;
let mut trie = Trie::new_big();
trie.insert("中国人", "cns");
assert_eq!(trie.get("中国人"), Some("cns").as_ref());
assert_eq!(trie.get("中国"), None);
Examples found in repository?
examples/all_seg.rs (line 7)
5pub fn main() {
6    // let mut trie = Trie::default();
7    let mut trie = Trie::new_big();
8
9    let start = std::time::Instant::now();
10
11    for line in BufReader::new(File::open("dict/default.dic").unwrap()).lines() {
12        let line = line.unwrap();
13        let parts: Vec<&str> = line.split('\t').collect();
14        if parts.len() == 3 {
15            trie.insert(
16                parts[0],
17                (parts[1].to_string(), parts[2].parse::<i32>().unwrap()),
18            );
19        }
20    }
21
22    trie.insert("中国人", (String::from("ud"), 10000));
23
24    println!("load dict use {:?}", start.elapsed());
25
26    let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
27
28    for token in trie.iter_all(text) {
29        println!("{:?}", token);
30    }
31
32    let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
33    println!("{:?}", c);
34}
Source

pub fn insert(&mut self, key: &str, value: T)

插入一个词到trie树中 key 词 value 词的值

§Examples
use char_trie::Trie;
let mut trie = Trie::default();
trie.insert("中国人", "cns");
assert_eq!(trie.get("中国人"), Some("cns").as_ref());
assert_eq!(trie.get("中国"), None);
Examples found in repository?
examples/all_seg.rs (lines 15-18)
5pub fn main() {
6    // let mut trie = Trie::default();
7    let mut trie = Trie::new_big();
8
9    let start = std::time::Instant::now();
10
11    for line in BufReader::new(File::open("dict/default.dic").unwrap()).lines() {
12        let line = line.unwrap();
13        let parts: Vec<&str> = line.split('\t').collect();
14        if parts.len() == 3 {
15            trie.insert(
16                parts[0],
17                (parts[1].to_string(), parts[2].parse::<i32>().unwrap()),
18            );
19        }
20    }
21
22    trie.insert("中国人", (String::from("ud"), 10000));
23
24    println!("load dict use {:?}", start.elapsed());
25
26    let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
27
28    for token in trie.iter_all(text) {
29        println!("{:?}", token);
30    }
31
32    let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
33    println!("{:?}", c);
34}
More examples
Hide additional examples
examples/front_max.rs (lines 13-16)
5pub fn main() {
6    let mut trie = Trie::default();
7    let start = std::time::Instant::now();
8
9    for line in BufReader::new(File::open("dict/default.dic").unwrap()).lines() {
10        let line = line.unwrap();
11        let parts: Vec<&str> = line.split('\t').collect();
12        if parts.len() == 3 {
13            trie.insert(
14                parts[0],
15                (parts[1].to_string(), parts[2].parse::<i32>().unwrap()),
16            );
17        }
18    }
19
20    println!("load dict use {:?}", start.elapsed());
21
22    let file = std::fs::read_to_string("dict/big_text.txt").unwrap();
23
24    let start = std::time::Instant::now();
25    let mut len = 0;
26
27    file.lines().for_each(|line| {
28        let result: Vec<_> = trie.iter_max(line).map(|t| t.0).collect();
29        len += result.len();
30        println!("{:?}", result);
31    });
32
33    println!("text parse token:{} use {:?}", len, start.elapsed());
34}
examples/example.rs (line 6)
3pub fn main() {
4    let mut trie = Trie::default();
5
6    trie.insert("中国人", String::from("ud"));
7    trie.insert("中国", String::from("ud"));
8    trie.insert("我", String::from("ud"));
9    trie.insert("是", String::from("ud"));
10    trie.insert("爱", String::from("ud"));
11    trie.insert("北京", String::from("ud"));
12    trie.insert("天安门", String::from("ud"));
13    trie.insert("天安", String::from("ud"));
14    trie.insert("安门", String::from("ud"));
15    trie.insert("上", String::from("ud"));
16    trie.insert("太阳", String::from("ud"));
17    trie.insert("升", String::from("ud"));
18
19    let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
20
21    let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
22    //["我", "爱", "北京", "天安", "天安门", "安门", "天安", "天安门", "安门", "上", "太阳", "升", "我", "是", "中国", "中国人", "我", "爱", "中国"]
23    println!("{:?}", c);
24
25    let c: Vec<_> = trie.iter_max(text).map(|t| t.0).collect();
26
27    //["我", "爱", "北京", "天安门", "天安门", "上", "太阳", "升", "我", "是", "中国人", "我", "爱", "中国"]
28    println!("{:?}", c);
29}
Source

pub fn get(&self, key: &str) -> Option<&T>

Source

pub fn char_get(&self, c: char) -> Option<&Self>

Source

pub fn iter_all<'a>(&'a self, text: &'a str) -> AllTokenizer<'a, T>

实现了全词匹配, 如词典中包含 【中国,国人,中国人】 三个词,那么对于文本 “我是中国人” 将返回 [中国,国人,中国人]

§Examples
use char_trie::Trie;
let mut trie = Trie::default();
trie.insert("中国人", "cns");
trie.insert("中国", "cn");
trie.insert("国人", "gr");
let text = "我是中国人";
let tokens: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
assert_eq!(tokens, vec!["中国", "中国人", "国人"]);
Examples found in repository?
examples/all_seg.rs (line 28)
5pub fn main() {
6    // let mut trie = Trie::default();
7    let mut trie = Trie::new_big();
8
9    let start = std::time::Instant::now();
10
11    for line in BufReader::new(File::open("dict/default.dic").unwrap()).lines() {
12        let line = line.unwrap();
13        let parts: Vec<&str> = line.split('\t').collect();
14        if parts.len() == 3 {
15            trie.insert(
16                parts[0],
17                (parts[1].to_string(), parts[2].parse::<i32>().unwrap()),
18            );
19        }
20    }
21
22    trie.insert("中国人", (String::from("ud"), 10000));
23
24    println!("load dict use {:?}", start.elapsed());
25
26    let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
27
28    for token in trie.iter_all(text) {
29        println!("{:?}", token);
30    }
31
32    let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
33    println!("{:?}", c);
34}
More examples
Hide additional examples
examples/example.rs (line 21)
3pub fn main() {
4    let mut trie = Trie::default();
5
6    trie.insert("中国人", String::from("ud"));
7    trie.insert("中国", String::from("ud"));
8    trie.insert("我", String::from("ud"));
9    trie.insert("是", String::from("ud"));
10    trie.insert("爱", String::from("ud"));
11    trie.insert("北京", String::from("ud"));
12    trie.insert("天安门", String::from("ud"));
13    trie.insert("天安", String::from("ud"));
14    trie.insert("安门", String::from("ud"));
15    trie.insert("上", String::from("ud"));
16    trie.insert("太阳", String::from("ud"));
17    trie.insert("升", String::from("ud"));
18
19    let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
20
21    let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
22    //["我", "爱", "北京", "天安", "天安门", "安门", "天安", "天安门", "安门", "上", "太阳", "升", "我", "是", "中国", "中国人", "我", "爱", "中国"]
23    println!("{:?}", c);
24
25    let c: Vec<_> = trie.iter_max(text).map(|t| t.0).collect();
26
27    //["我", "爱", "北京", "天安门", "天安门", "上", "太阳", "升", "我", "是", "中国人", "我", "爱", "中国"]
28    println!("{:?}", c);
29}
Source

pub fn iter_max<'a>(&'a self, text: &'a str) -> MaxFrontTokenizer<'a, T>

实现了正向最大匹配, 如词典中包含 【中国,国人,中国人】 三个词,那么对于文本 “我是中国人” 将返回 [中国人]

§Examples
use char_trie::Trie;
let mut trie = Trie::default();
trie.insert("中国人", "cns");
trie.insert("中国", "cn");
trie.insert("国人", "gr");
let text = "我是中国人";
let tokens: Vec<_> = trie.iter_max(text).map(|t| t.0).collect();
assert_eq!(tokens, vec!["中国人"]);
Examples found in repository?
examples/front_max.rs (line 28)
5pub fn main() {
6    let mut trie = Trie::default();
7    let start = std::time::Instant::now();
8
9    for line in BufReader::new(File::open("dict/default.dic").unwrap()).lines() {
10        let line = line.unwrap();
11        let parts: Vec<&str> = line.split('\t').collect();
12        if parts.len() == 3 {
13            trie.insert(
14                parts[0],
15                (parts[1].to_string(), parts[2].parse::<i32>().unwrap()),
16            );
17        }
18    }
19
20    println!("load dict use {:?}", start.elapsed());
21
22    let file = std::fs::read_to_string("dict/big_text.txt").unwrap();
23
24    let start = std::time::Instant::now();
25    let mut len = 0;
26
27    file.lines().for_each(|line| {
28        let result: Vec<_> = trie.iter_max(line).map(|t| t.0).collect();
29        len += result.len();
30        println!("{:?}", result);
31    });
32
33    println!("text parse token:{} use {:?}", len, start.elapsed());
34}
More examples
Hide additional examples
examples/example.rs (line 25)
3pub fn main() {
4    let mut trie = Trie::default();
5
6    trie.insert("中国人", String::from("ud"));
7    trie.insert("中国", String::from("ud"));
8    trie.insert("我", String::from("ud"));
9    trie.insert("是", String::from("ud"));
10    trie.insert("爱", String::from("ud"));
11    trie.insert("北京", String::from("ud"));
12    trie.insert("天安门", String::from("ud"));
13    trie.insert("天安", String::from("ud"));
14    trie.insert("安门", String::from("ud"));
15    trie.insert("上", String::from("ud"));
16    trie.insert("太阳", String::from("ud"));
17    trie.insert("升", String::from("ud"));
18
19    let text = "我爱北京天安门,天安门上太阳升。我是中国人,我爱中国。";
20
21    let c: Vec<_> = trie.iter_all(text).map(|t| t.0).collect();
22    //["我", "爱", "北京", "天安", "天安门", "安门", "天安", "天安门", "安门", "上", "太阳", "升", "我", "是", "中国", "中国人", "我", "爱", "中国"]
23    println!("{:?}", c);
24
25    let c: Vec<_> = trie.iter_max(text).map(|t| t.0).collect();
26
27    //["我", "爱", "北京", "天安门", "天安门", "上", "太阳", "升", "我", "是", "中国人", "我", "爱", "中国"]
28    println!("{:?}", c);
29}

Trait Implementations§

Source§

impl<T: Debug> Debug for Trie<T>

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl<T: Default> Default for Trie<T>

Source§

fn default() -> Trie<T>

Returns the “default value” for a type. Read more

Auto Trait Implementations§

§

impl<T> Freeze for Trie<T>
where T: Freeze,

§

impl<T> RefUnwindSafe for Trie<T>
where T: RefUnwindSafe,

§

impl<T> Send for Trie<T>
where T: Send,

§

impl<T> Sync for Trie<T>
where T: Sync,

§

impl<T> Unpin for Trie<T>
where T: Unpin,

§

impl<T> UnwindSafe for Trie<T>
where T: UnwindSafe,

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.