ultra_nlp/_daachorse/
dictionary.rs

1use daachorse::charwise::{
2    CharwiseDoubleArrayAhoCorasick as DoubleArrayAhoCorasick,
3    CharwiseDoubleArrayAhoCorasickBuilder as DoubleArrayAhoCorasickBuilder,
4};
5use daachorse::MatchKind;
6use crate::{
7    UltraNLPResult,
8    UltraNLPError
9};
10
11#[derive(Clone)]
12pub struct StandardDictionary {
13    pub(crate) acdat: DoubleArrayAhoCorasick<usize>,
14}
15
16#[derive(Clone)]
17pub struct ForwardDictionary {
18    pub(crate) acdat: DoubleArrayAhoCorasick<usize>,
19}
20
21#[derive(Clone)]
22pub struct BackwardDictionary {
23    pub(crate) acdat: DoubleArrayAhoCorasick<usize>,
24}
25
26impl StandardDictionary {
27    pub fn new<T: AsRef<str>, I: IntoIterator<Item = T>>(
28        patterns: I
29    ) -> UltraNLPResult<Self> {
30        let patterns_with_values = prepare_patterns_for_dictionary(patterns)?;
31
32        let acdat = create_acdat_with_values(
33            patterns_with_values,
34            MatchKind::Standard
35        )?;
36
37        Ok(Self { acdat, })
38    }
39}
40
41impl ForwardDictionary {
42    pub fn new<T: AsRef<str>, I: IntoIterator<Item = T>>(
43        patterns: I
44    ) -> UltraNLPResult<Self> {
45        let patterns_with_values = prepare_patterns_for_dictionary(patterns)?;
46
47        let acdat = create_acdat_with_values(
48            patterns_with_values,
49            MatchKind::LeftmostLongest
50        )?;
51
52        Ok(Self { acdat })
53    }
54}
55
56impl BackwardDictionary {
57    pub fn new<T: AsRef<str>, I: IntoIterator<Item = T>>(
58        patterns: I
59    ) -> UltraNLPResult<Self> {
60        let patterns_with_values = prepare_patterns_for_dictionary(patterns)?;
61
62        let reversed_patterns_with_values = patterns_with_values
63            .into_iter()
64            .map(|(pattern, value)| {
65                let pattern = pattern
66                    .chars()
67                    .rev()
68                    .collect::<String>();
69
70                (pattern, value)
71            })
72            .collect::<Vec<_>>();
73
74        let acdat = create_acdat_with_values(
75            reversed_patterns_with_values,
76            MatchKind::LeftmostLongest
77        )?;
78
79        Ok(Self { acdat })
80    }
81}
82
83fn create_acdat_with_values<
84    T: AsRef<str>,
85    I: IntoIterator<Item = (T, usize)>
86>(
87    patterns_with_values: I,
88    match_kind: MatchKind,
89) -> UltraNLPResult<DoubleArrayAhoCorasick<usize>> {
90    let acdat = DoubleArrayAhoCorasickBuilder::new()
91        .match_kind(match_kind)
92        .build_with_values(patterns_with_values);
93
94    acdat.map_err(|err| UltraNLPError::new(err.to_string()))
95}
96
97fn prepare_patterns_for_dictionary<
98    T: AsRef<str>,
99    I: IntoIterator<Item = T>
100>(
101    patterns: I,
102) -> UltraNLPResult<Vec<(String, usize)>> {
103    let patterns_with_values = patterns
104        .into_iter()
105        .enumerate()
106        .map(|(i, pattern)| -> Result<(String, usize), _>{
107            let pattern = pattern
108                .as_ref()
109                .to_lowercase();
110
111            let value = usize::try_from(i)
112                .map_err(|err| UltraNLPError::new(err.to_string()))?;
113
114            Ok((pattern, value))
115        })
116        .collect::<Result<Vec<_>, _>>()?;
117
118    Ok(patterns_with_values)
119}
120
121#[cfg(test)]
122mod tests {
123    mod standard_dictionary {
124        use crate::daachorse::StandardDictionary;
125
126        #[test]
127        fn test_empty_patterns() {
128            let patterns: Vec<&str> = vec![];
129
130            assert!(StandardDictionary::new(patterns).is_err());
131        }
132
133        #[test]
134        fn test_patterns() {
135            let patterns: Vec<&str> = vec!["foo", "bar"];
136
137            StandardDictionary::new(patterns).unwrap();
138        }
139
140        #[test]
141        fn test_same_patterns() {
142            let patterns: Vec<&str> = vec!["foo", "FOO"];
143
144            assert!(StandardDictionary::new(patterns).is_err());
145        }
146    }
147
148    mod forward_dictionary {
149        use crate::daachorse::ForwardDictionary;
150
151        #[test]
152        fn test_empty_patterns() {
153            let patterns: Vec<&str> = vec![];
154
155            assert!(ForwardDictionary::new(patterns).is_err());
156        }
157
158        #[test]
159        fn test_patterns() {
160            let patterns: Vec<&str> = vec!["foo", "bar"];
161
162            ForwardDictionary::new(patterns).unwrap();
163        }
164
165        #[test]
166        fn test_same_patterns() {
167            let patterns: Vec<&str> = vec!["foo", "FOO"];
168
169            assert!(ForwardDictionary::new(patterns).is_err());
170        }
171    }
172
173    mod backward_dictionary {
174        use crate::daachorse::BackwardDictionary;
175
176        #[test]
177        fn test_empty_patterns() {
178            let patterns: Vec<&str> = vec![];
179
180            assert!(BackwardDictionary::new(patterns).is_err());
181        }
182
183        #[test]
184        fn test_patterns() {
185            let patterns: Vec<&str> = vec!["foo", "bar"];
186
187            BackwardDictionary::new(patterns).unwrap();
188        }
189
190        #[test]
191        fn test_same_patterns() {
192            let patterns: Vec<&str> = vec!["foo", "FOO"];
193
194            assert!(BackwardDictionary::new(patterns).is_err());
195        }
196    }
197}