ultra_nlp/_cedarwood/
dictionary.rs

1use std::collections::HashSet;
2use cedarwood::Cedar;
3use crate::{
4    UltraNLPResult,
5    UltraNLPError,
6};
7
8#[derive(Clone)]
9pub struct ForwardDictionary {
10    pub(crate) dat: Cedar,
11}
12
13#[derive(Clone)]
14pub struct BackwardDictionary {
15    pub(crate) dat: Cedar,
16}
17
18impl ForwardDictionary {
19    pub fn new<T: AsRef<str>, I: IntoIterator<Item = T>>(
20        patterns: I
21    ) -> UltraNLPResult<Self> {
22        let patterns_with_values = prepare_patterns_for_dictionary(patterns)?;
23        if patterns_with_values.len() == 0 {
24            return Err(UltraNLPError::new("The patterns cannot be empty"));
25        }
26
27        let patterns = patterns_with_values
28            .iter()
29            .map(|(x, _)| x);
30        if !is_unique(patterns) {
31            return Err(UltraNLPError::new("The patterns are not unique"));
32        }
33
34        let dat = create_dat_with_values(patterns_with_values);
35
36        Ok(Self { dat })
37    }
38}
39
40impl BackwardDictionary {
41    pub fn new<T: AsRef<str>, I: IntoIterator<Item = T>>(
42        patterns: I
43    ) -> UltraNLPResult<Self> {
44        let patterns_with_values = prepare_patterns_for_dictionary(patterns)?;
45        if patterns_with_values.len() == 0 {
46            return Err(UltraNLPError::new("The patterns cannot be empty"));
47        }
48
49        let patterns = patterns_with_values
50            .iter()
51            .map(|(x, _)| x);
52        if !is_unique(patterns) {
53            return Err(UltraNLPError::new("The patterns are not unique"));
54        }
55
56        let patterns_with_values = patterns_with_values
57            .into_iter()
58            .map(|(pattern, value)| {
59                let pattern = pattern
60                    .chars()
61                    .rev()
62                    .collect::<String>();
63
64                (pattern, value)
65            })
66            .collect::<Vec<_>>();
67
68        let dat = create_dat_with_values(patterns_with_values);
69
70        Ok(Self { dat })
71    }
72}
73
74fn create_dat_with_values<
75    T: AsRef<str>,
76    I: IntoIterator<Item = (T, i32)>,
77>(patterns_with_values: I) -> Cedar {
78    let key_values: Vec<(String, i32)> = patterns_with_values.into_iter()
79        .map(|(key, value)| {
80            let key = key.as_ref().to_owned();
81            let value = value;
82
83            (key, value)
84        })
85        .collect::<Vec<_>>();
86    let key_values: Vec<(&str, i32)> = key_values
87        .iter()
88        .map(|(key, value)| (key.as_str(), *value))
89        .collect::<Vec<_>>();
90
91    let mut dat = Cedar::new();
92    dat.build(&key_values);
93
94    dat
95}
96
97fn prepare_patterns_for_dictionary<
98    T: AsRef<str>,
99    I: IntoIterator<Item = T>
100>(
101    patterns: I,
102) -> UltraNLPResult<Vec<(String, i32)>> {
103    let patterns_with_values = patterns
104        .into_iter()
105        .enumerate()
106        .map(|(index, pattern)| -> Result<(String, i32), _>{
107            let pattern = pattern.as_ref().to_lowercase();
108
109            let value = i32::try_from(index)
110                .map_err(|err| UltraNLPError::new(err.to_string()))?;
111
112            Ok((pattern, value))
113        })
114        .collect::<Result<Vec<_>, _>>()?;
115
116    Ok(patterns_with_values)
117}
118
119fn is_unique<T: AsRef<str>, I: IntoIterator<Item = T>>(
120    collection: I
121) -> bool {
122    let mut set = HashSet::new();
123    collection
124        .into_iter()
125        .all(|x| set.insert(x.as_ref().to_owned()))
126}
127
128#[cfg(test)]
129mod tests {
130    mod forward_dictionary {
131        use crate::cedarwood::ForwardDictionary;
132
133        #[test]
134        fn test_empty_patterns() {
135            let patterns: Vec<&str> = vec![];
136
137            assert!(ForwardDictionary::new(patterns).is_err());
138        }
139
140        #[test]
141        fn test_patterns() {
142            let patterns: Vec<&str> = vec!["foo", "bar"];
143
144            ForwardDictionary::new(patterns).unwrap();
145        }
146
147        #[test]
148        fn test_same_patterns() {
149            let patterns: Vec<&str> = vec!["foo", "FOO"];
150
151            assert!(ForwardDictionary::new(patterns).is_err());
152        }
153    }
154
155    mod backward_dictionary {
156        use crate::cedarwood::BackwardDictionary;
157
158        #[test]
159        fn test_empty_patterns() {
160            let patterns: Vec<&str> = vec![];
161
162            assert!(BackwardDictionary::new(patterns).is_err());
163        }
164
165        #[test]
166        fn test_patterns() {
167            let patterns: Vec<&str> = vec!["foo", "bar"];
168
169            BackwardDictionary::new(patterns).unwrap();
170        }
171
172        #[test]
173        fn test_same_patterns() {
174            let patterns: Vec<&str> = vec!["foo", "FOO"];
175
176            assert!(BackwardDictionary::new(patterns).is_err());
177        }
178    }
179}