lindera_tokenizer/
token.rs1use once_cell::sync::Lazy;
2use serde::Serialize;
3
4use lindera_core::dictionary::{Dictionary, UserDictionary};
5use lindera_core::word_entry::WordId;
6
7static UNK: Lazy<Vec<&str>> = Lazy::new(|| vec!["UNK"]);
8
9#[derive(Serialize, Clone)]
10pub struct Token<'a> {
11 pub text: &'a str,
13
14 pub byte_start: usize,
16
17 pub byte_end: usize,
19
20 pub position: usize,
22
23 pub position_length: usize,
25
26 pub word_id: WordId,
28
29 pub dictionary: &'a Dictionary,
31
32 pub user_dictionary: Option<&'a UserDictionary>,
34
35 details: Option<Vec<String>>,
38}
39
40impl<'a> Token<'a> {
41 pub fn new(
42 text: &'a str,
43 start: usize,
44 end: usize,
45 position: usize,
46 word_id: WordId,
47 dictionary: &'a Dictionary,
48 user_dictionary: Option<&'a UserDictionary>,
49 ) -> Self {
50 Self {
51 text,
52 details: None,
53 byte_start: start,
54 byte_end: end,
55 position,
56 position_length: 1,
57 word_id,
58 dictionary,
59 user_dictionary,
60 }
61 }
62
63 fn details(&self) -> Option<Vec<&str>> {
64 match &self.details {
65 Some(details) => {
66 let mut v = Vec::new();
67 for detail in details.iter() {
68 let a = detail.as_str();
69 v.push(a);
70 }
71 Some(v)
72 }
73 None => None,
74 }
75 }
76
77 pub fn get_details(&mut self) -> Option<Vec<&str>> {
79 if self.details.is_some() {
80 return self.details();
81 }
82
83 if self.word_id.is_unknown() {
84 self.set_details(Some(UNK.iter().map(|v| v.to_string()).collect()));
85 return self.details();
86 }
87
88 self.details = if self.word_id.is_system() {
89 self.dictionary.word_details(self.word_id.0 as usize)
90 } else {
91 match self.user_dictionary {
92 Some(user_dictionary) => user_dictionary.word_details(self.word_id.0 as usize),
93 None => None,
94 }
95 };
96 self.details()
97 }
98
99 pub fn set_details(&mut self, details: Option<Vec<String>>) -> &Token<'a> {
100 self.details = details;
101 self
102 }
103}