ultra_nlp/_cedarwood/
dictionary.rs1use std::collections::HashSet;
2use cedarwood::Cedar;
3use crate::{
4 UltraNLPResult,
5 UltraNLPError,
6};
7
8#[derive(Clone)]
9pub struct ForwardDictionary {
10 pub(crate) dat: Cedar,
11}
12
13#[derive(Clone)]
14pub struct BackwardDictionary {
15 pub(crate) dat: Cedar,
16}
17
18impl ForwardDictionary {
19 pub fn new<T: AsRef<str>, I: IntoIterator<Item = T>>(
20 patterns: I
21 ) -> UltraNLPResult<Self> {
22 let patterns_with_values = prepare_patterns_for_dictionary(patterns)?;
23 if patterns_with_values.len() == 0 {
24 return Err(UltraNLPError::new("The patterns cannot be empty"));
25 }
26
27 let patterns = patterns_with_values
28 .iter()
29 .map(|(x, _)| x);
30 if !is_unique(patterns) {
31 return Err(UltraNLPError::new("The patterns are not unique"));
32 }
33
34 let dat = create_dat_with_values(patterns_with_values);
35
36 Ok(Self { dat })
37 }
38}
39
40impl BackwardDictionary {
41 pub fn new<T: AsRef<str>, I: IntoIterator<Item = T>>(
42 patterns: I
43 ) -> UltraNLPResult<Self> {
44 let patterns_with_values = prepare_patterns_for_dictionary(patterns)?;
45 if patterns_with_values.len() == 0 {
46 return Err(UltraNLPError::new("The patterns cannot be empty"));
47 }
48
49 let patterns = patterns_with_values
50 .iter()
51 .map(|(x, _)| x);
52 if !is_unique(patterns) {
53 return Err(UltraNLPError::new("The patterns are not unique"));
54 }
55
56 let patterns_with_values = patterns_with_values
57 .into_iter()
58 .map(|(pattern, value)| {
59 let pattern = pattern
60 .chars()
61 .rev()
62 .collect::<String>();
63
64 (pattern, value)
65 })
66 .collect::<Vec<_>>();
67
68 let dat = create_dat_with_values(patterns_with_values);
69
70 Ok(Self { dat })
71 }
72}
73
74fn create_dat_with_values<
75 T: AsRef<str>,
76 I: IntoIterator<Item = (T, i32)>,
77>(patterns_with_values: I) -> Cedar {
78 let key_values: Vec<(String, i32)> = patterns_with_values.into_iter()
79 .map(|(key, value)| {
80 let key = key.as_ref().to_owned();
81 let value = value;
82
83 (key, value)
84 })
85 .collect::<Vec<_>>();
86 let key_values: Vec<(&str, i32)> = key_values
87 .iter()
88 .map(|(key, value)| (key.as_str(), *value))
89 .collect::<Vec<_>>();
90
91 let mut dat = Cedar::new();
92 dat.build(&key_values);
93
94 dat
95}
96
97fn prepare_patterns_for_dictionary<
98 T: AsRef<str>,
99 I: IntoIterator<Item = T>
100>(
101 patterns: I,
102) -> UltraNLPResult<Vec<(String, i32)>> {
103 let patterns_with_values = patterns
104 .into_iter()
105 .enumerate()
106 .map(|(index, pattern)| -> Result<(String, i32), _>{
107 let pattern = pattern.as_ref().to_lowercase();
108
109 let value = i32::try_from(index)
110 .map_err(|err| UltraNLPError::new(err.to_string()))?;
111
112 Ok((pattern, value))
113 })
114 .collect::<Result<Vec<_>, _>>()?;
115
116 Ok(patterns_with_values)
117}
118
119fn is_unique<T: AsRef<str>, I: IntoIterator<Item = T>>(
120 collection: I
121) -> bool {
122 let mut set = HashSet::new();
123 collection
124 .into_iter()
125 .all(|x| set.insert(x.as_ref().to_owned()))
126}
127
128#[cfg(test)]
129mod tests {
130 mod forward_dictionary {
131 use crate::cedarwood::ForwardDictionary;
132
133 #[test]
134 fn test_empty_patterns() {
135 let patterns: Vec<&str> = vec![];
136
137 assert!(ForwardDictionary::new(patterns).is_err());
138 }
139
140 #[test]
141 fn test_patterns() {
142 let patterns: Vec<&str> = vec!["foo", "bar"];
143
144 ForwardDictionary::new(patterns).unwrap();
145 }
146
147 #[test]
148 fn test_same_patterns() {
149 let patterns: Vec<&str> = vec!["foo", "FOO"];
150
151 assert!(ForwardDictionary::new(patterns).is_err());
152 }
153 }
154
155 mod backward_dictionary {
156 use crate::cedarwood::BackwardDictionary;
157
158 #[test]
159 fn test_empty_patterns() {
160 let patterns: Vec<&str> = vec![];
161
162 assert!(BackwardDictionary::new(patterns).is_err());
163 }
164
165 #[test]
166 fn test_patterns() {
167 let patterns: Vec<&str> = vec!["foo", "bar"];
168
169 BackwardDictionary::new(patterns).unwrap();
170 }
171
172 #[test]
173 fn test_same_patterns() {
174 let patterns: Vec<&str> = vec!["foo", "FOO"];
175
176 assert!(BackwardDictionary::new(patterns).is_err());
177 }
178 }
179}