ultra_nlp/_daachorse/
dictionary.rs1use daachorse::charwise::{
2 CharwiseDoubleArrayAhoCorasick as DoubleArrayAhoCorasick,
3 CharwiseDoubleArrayAhoCorasickBuilder as DoubleArrayAhoCorasickBuilder,
4};
5use daachorse::MatchKind;
6use crate::{
7 UltraNLPResult,
8 UltraNLPError
9};
10
11#[derive(Clone)]
12pub struct StandardDictionary {
13 pub(crate) acdat: DoubleArrayAhoCorasick<usize>,
14}
15
16#[derive(Clone)]
17pub struct ForwardDictionary {
18 pub(crate) acdat: DoubleArrayAhoCorasick<usize>,
19}
20
21#[derive(Clone)]
22pub struct BackwardDictionary {
23 pub(crate) acdat: DoubleArrayAhoCorasick<usize>,
24}
25
26impl StandardDictionary {
27 pub fn new<T: AsRef<str>, I: IntoIterator<Item = T>>(
28 patterns: I
29 ) -> UltraNLPResult<Self> {
30 let patterns_with_values = prepare_patterns_for_dictionary(patterns)?;
31
32 let acdat = create_acdat_with_values(
33 patterns_with_values,
34 MatchKind::Standard
35 )?;
36
37 Ok(Self { acdat, })
38 }
39}
40
41impl ForwardDictionary {
42 pub fn new<T: AsRef<str>, I: IntoIterator<Item = T>>(
43 patterns: I
44 ) -> UltraNLPResult<Self> {
45 let patterns_with_values = prepare_patterns_for_dictionary(patterns)?;
46
47 let acdat = create_acdat_with_values(
48 patterns_with_values,
49 MatchKind::LeftmostLongest
50 )?;
51
52 Ok(Self { acdat })
53 }
54}
55
56impl BackwardDictionary {
57 pub fn new<T: AsRef<str>, I: IntoIterator<Item = T>>(
58 patterns: I
59 ) -> UltraNLPResult<Self> {
60 let patterns_with_values = prepare_patterns_for_dictionary(patterns)?;
61
62 let reversed_patterns_with_values = patterns_with_values
63 .into_iter()
64 .map(|(pattern, value)| {
65 let pattern = pattern
66 .chars()
67 .rev()
68 .collect::<String>();
69
70 (pattern, value)
71 })
72 .collect::<Vec<_>>();
73
74 let acdat = create_acdat_with_values(
75 reversed_patterns_with_values,
76 MatchKind::LeftmostLongest
77 )?;
78
79 Ok(Self { acdat })
80 }
81}
82
83fn create_acdat_with_values<
84 T: AsRef<str>,
85 I: IntoIterator<Item = (T, usize)>
86>(
87 patterns_with_values: I,
88 match_kind: MatchKind,
89) -> UltraNLPResult<DoubleArrayAhoCorasick<usize>> {
90 let acdat = DoubleArrayAhoCorasickBuilder::new()
91 .match_kind(match_kind)
92 .build_with_values(patterns_with_values);
93
94 acdat.map_err(|err| UltraNLPError::new(err.to_string()))
95}
96
97fn prepare_patterns_for_dictionary<
98 T: AsRef<str>,
99 I: IntoIterator<Item = T>
100>(
101 patterns: I,
102) -> UltraNLPResult<Vec<(String, usize)>> {
103 let patterns_with_values = patterns
104 .into_iter()
105 .enumerate()
106 .map(|(i, pattern)| -> Result<(String, usize), _>{
107 let pattern = pattern
108 .as_ref()
109 .to_lowercase();
110
111 let value = usize::try_from(i)
112 .map_err(|err| UltraNLPError::new(err.to_string()))?;
113
114 Ok((pattern, value))
115 })
116 .collect::<Result<Vec<_>, _>>()?;
117
118 Ok(patterns_with_values)
119}
120
121#[cfg(test)]
122mod tests {
123 mod standard_dictionary {
124 use crate::daachorse::StandardDictionary;
125
126 #[test]
127 fn test_empty_patterns() {
128 let patterns: Vec<&str> = vec![];
129
130 assert!(StandardDictionary::new(patterns).is_err());
131 }
132
133 #[test]
134 fn test_patterns() {
135 let patterns: Vec<&str> = vec!["foo", "bar"];
136
137 StandardDictionary::new(patterns).unwrap();
138 }
139
140 #[test]
141 fn test_same_patterns() {
142 let patterns: Vec<&str> = vec!["foo", "FOO"];
143
144 assert!(StandardDictionary::new(patterns).is_err());
145 }
146 }
147
148 mod forward_dictionary {
149 use crate::daachorse::ForwardDictionary;
150
151 #[test]
152 fn test_empty_patterns() {
153 let patterns: Vec<&str> = vec![];
154
155 assert!(ForwardDictionary::new(patterns).is_err());
156 }
157
158 #[test]
159 fn test_patterns() {
160 let patterns: Vec<&str> = vec!["foo", "bar"];
161
162 ForwardDictionary::new(patterns).unwrap();
163 }
164
165 #[test]
166 fn test_same_patterns() {
167 let patterns: Vec<&str> = vec!["foo", "FOO"];
168
169 assert!(ForwardDictionary::new(patterns).is_err());
170 }
171 }
172
173 mod backward_dictionary {
174 use crate::daachorse::BackwardDictionary;
175
176 #[test]
177 fn test_empty_patterns() {
178 let patterns: Vec<&str> = vec![];
179
180 assert!(BackwardDictionary::new(patterns).is_err());
181 }
182
183 #[test]
184 fn test_patterns() {
185 let patterns: Vec<&str> = vec!["foo", "bar"];
186
187 BackwardDictionary::new(patterns).unwrap();
188 }
189
190 #[test]
191 fn test_same_patterns() {
192 let patterns: Vec<&str> = vec!["foo", "FOO"];
193
194 assert!(BackwardDictionary::new(patterns).is_err());
195 }
196 }
197}