text_processing_rs/taggers/
cardinal.rs1use lazy_static::lazy_static;
11use std::collections::HashMap;
12
13lazy_static! {
14 static ref ONES: HashMap<&'static str, i64> = {
16 let mut m = HashMap::new();
17 m.insert("zero", 0);
18 m.insert("one", 1);
19 m.insert("two", 2);
20 m.insert("three", 3);
21 m.insert("four", 4);
22 m.insert("five", 5);
23 m.insert("six", 6);
24 m.insert("seven", 7);
25 m.insert("eight", 8);
26 m.insert("nine", 9);
27 m.insert("ten", 10);
28 m.insert("eleven", 11);
29 m.insert("twelve", 12);
30 m.insert("thirteen", 13);
31 m.insert("fourteen", 14);
32 m.insert("fifteen", 15);
33 m.insert("sixteen", 16);
34 m.insert("seventeen", 17);
35 m.insert("eighteen", 18);
36 m.insert("nineteen", 19);
37 m
38 };
39
40 static ref TENS: HashMap<&'static str, i64> = {
42 let mut m = HashMap::new();
43 m.insert("twenty", 20);
44 m.insert("thirty", 30);
45 m.insert("forty", 40);
46 m.insert("fifty", 50);
47 m.insert("sixty", 60);
48 m.insert("seventy", 70);
49 m.insert("eighty", 80);
50 m.insert("ninety", 90);
51 m
52 };
53
54 static ref SCALES: HashMap<&'static str, i128> = {
56 let mut m = HashMap::new();
57 m.insert("hundred", 100);
58 m.insert("thousand", 1_000);
59 m.insert("million", 1_000_000);
60 m.insert("billion", 1_000_000_000);
61 m.insert("trillion", 1_000_000_000_000);
62 m.insert("quadrillion", 1_000_000_000_000_000);
63 m.insert("quintillion", 1_000_000_000_000_000_000);
64 m.insert("sextillion", 1_000_000_000_000_000_000_000_i128);
65 m.insert("lakh", 100_000);
67 m.insert("crore", 10_000_000);
68 m
69 };
70}
71
72pub fn parse(input: &str) -> Option<String> {
76 let input = input.to_lowercase();
77 let input = input.trim();
78
79 if input == "zero" {
81 return Some("zero".to_string());
82 }
83
84 let (is_negative, rest) = if input.starts_with("minus ") {
86 (true, input.strip_prefix("minus ")?)
87 } else if input.starts_with("negative ") {
88 (true, input.strip_prefix("negative ")?)
89 } else {
90 (false, input)
91 };
92
93 let num = words_to_number(rest)?;
94
95 if is_negative {
96 Some(format!("-{}", num))
97 } else {
98 Some(num.to_string())
99 }
100}
101
102pub fn words_to_number(input: &str) -> Option<i128> {
115 let input = input.to_lowercase();
116 let words: Vec<&str> = input
117 .split_whitespace()
118 .filter(|w| *w != "and" && *w != "a")
119 .collect();
120
121 if words.is_empty() {
122 return None;
123 }
124
125 if words.len() == 2 && words[1] == "hundred" {
127 if let Some(&val) = ONES.get(words[0]) {
128 if val >= 11 && val <= 19 {
129 return Some((val * 100) as i128);
130 }
131 }
132 if let Some(&val) = TENS.get(words[0]) {
133 return Some((val * 100) as i128);
134 }
135 }
136
137 if words.len() >= 2 && words[1] == "hundred" {
139 if let Some(&first_val) = ONES.get(words[0]) {
140 if first_val >= 11 && first_val <= 99 {
141 let base = (first_val * 100) as i128;
142 if words.len() == 2 {
143 return Some(base);
144 }
145 let rest = words[2..].join(" ");
147 if let Some(remainder) = words_to_number(&rest) {
148 return Some(base + remainder);
149 }
150 }
151 }
152 if let Some(&first_val) = TENS.get(words[0]) {
153 let base = (first_val * 100) as i128;
154 if words.len() == 2 {
155 return Some(base);
156 }
157 let rest = words[2..].join(" ");
158 if let Some(remainder) = words_to_number(&rest) {
159 return Some(base + remainder);
160 }
161 }
162 }
163
164 let mut result: i128 = 0;
165 let mut current: i128 = 0;
166 let mut found_number = false;
167
168 for word in words {
169 if let Some(&val) = ONES.get(word) {
170 current += val as i128;
171 found_number = true;
172 } else if let Some(&val) = TENS.get(word) {
173 current += val as i128;
174 found_number = true;
175 } else if word == "hundred" {
176 if current == 0 {
177 current = 1;
178 }
179 current *= 100;
180 found_number = true;
181 } else if let Some(&scale) = SCALES.get(word) {
182 if scale >= 1000 {
183 if current == 0 {
184 current = 1;
185 }
186 current *= scale;
187 result += current;
188 current = 0;
189 found_number = true;
190 }
191 } else {
192 return None;
194 }
195 }
196
197 if found_number {
198 Some(result + current)
199 } else {
200 None
201 }
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207
208 #[test]
209 fn test_ones() {
210 assert_eq!(parse("one"), Some("1".to_string()));
211 assert_eq!(parse("two"), Some("2".to_string()));
212 assert_eq!(parse("nine"), Some("9".to_string()));
213 assert_eq!(parse("ten"), Some("10".to_string()));
214 assert_eq!(parse("fifteen"), Some("15".to_string()));
215 assert_eq!(parse("nineteen"), Some("19".to_string()));
216 }
217
218 #[test]
219 fn test_tens() {
220 assert_eq!(parse("twenty"), Some("20".to_string()));
221 assert_eq!(parse("twenty one"), Some("21".to_string()));
222 assert_eq!(parse("forty two"), Some("42".to_string()));
223 assert_eq!(parse("ninety nine"), Some("99".to_string()));
224 }
225
226 #[test]
227 fn test_hundreds() {
228 assert_eq!(parse("one hundred"), Some("100".to_string()));
229 assert_eq!(parse("one hundred one"), Some("101".to_string()));
230 assert_eq!(parse("one hundred and one"), Some("101".to_string()));
231 assert_eq!(parse("two hundred twenty two"), Some("222".to_string()));
232 }
233
234 #[test]
235 fn test_eleven_hundred() {
236 assert_eq!(parse("eleven hundred"), Some("1100".to_string()));
237 assert_eq!(parse("twenty one hundred"), Some("2100".to_string()));
238 assert_eq!(parse("eleven hundred twenty one"), Some("1121".to_string()));
239 }
240
241 #[test]
242 fn test_thousands() {
243 assert_eq!(parse("one thousand"), Some("1000".to_string()));
244 assert_eq!(parse("one thousand one"), Some("1001".to_string()));
245 assert_eq!(parse("one thousand one hundred"), Some("1100".to_string()));
246 assert_eq!(
247 parse("one thousand two hundred thirty four"),
248 Some("1234".to_string())
249 );
250 }
251
252 #[test]
253 fn test_millions() {
254 assert_eq!(parse("one million"), Some("1000000".to_string()));
255 assert_eq!(parse("two million three"), Some("2000003".to_string()));
256 }
257
258 #[test]
259 fn test_negative() {
260 assert_eq!(parse("minus sixty"), Some("-60".to_string()));
261 assert_eq!(
262 parse("minus twenty five thousand thirty seven"),
263 Some("-25037".to_string())
264 );
265 }
266
267 #[test]
268 fn test_zero() {
269 assert_eq!(parse("zero"), Some("zero".to_string()));
270 }
271
272 #[test]
273 fn test_invalid() {
274 assert_eq!(parse("hello"), None);
275 assert_eq!(parse("one hello"), None);
276 }
277}