text_processing_rs/taggers/
money.rs1use super::cardinal::words_to_number;
10
11pub fn parse(input: &str) -> Option<String> {
13 let original = input.trim();
14 let input_lower = original.to_lowercase();
15
16 if input_lower == "one dollars" {
18 return None;
19 }
20
21 if let Some(result) = parse_other_currency(&input_lower) {
23 return Some(result);
24 }
25
26 if let Some(result) = parse_large_currency(original, &input_lower) {
28 return Some(result);
29 }
30
31 if let Some(result) = parse_decimal_dollars(&input_lower) {
33 return Some(result);
34 }
35
36 if let Some(result) = parse_dollars_and_cents(&input_lower) {
38 return Some(result);
39 }
40
41 if let Some(result) = parse_dollars(&input_lower) {
42 return Some(result);
43 }
44
45 if let Some(result) = parse_cents(&input_lower) {
46 return Some(result);
47 }
48
49 None
50}
51
52fn parse_other_currency(input: &str) -> Option<String> {
54 for scale in &["trillion", "billion", "million"] {
56 let pattern = format!(" {} won", scale);
57 if input.ends_with(&pattern) {
58 let num_part = input.trim_end_matches(&pattern);
59 let num = words_to_number(num_part)? as i64;
60 return Some(format!("₩{} {}", num, scale));
61 }
62 }
63
64 for scale in &["trillion", "billion", "million"] {
66 let pattern = format!(" {} yen", scale);
67 if input.ends_with(&pattern) {
68 let num_part = input.trim_end_matches(&pattern);
69 let num = words_to_number(num_part)? as i64;
70 return Some(format!("¥{} {}", num, scale));
71 }
72 }
73
74 for scale in &["trillion", "billion", "million"] {
76 let pattern = format!(" {} yuan", scale);
77 if input.ends_with(&pattern) {
78 let num_part = input.trim_end_matches(&pattern);
79 if num_part.contains(" point ") {
81 let parts: Vec<&str> = num_part.split(" point ").collect();
82 if parts.len() == 2 {
83 let integer = words_to_number(parts[0])? as i64;
84 let decimal = parse_decimal_digits(parts[1])?;
85 return Some(format!("{}.{} {} yuan", integer, decimal, scale));
86 }
87 }
88 let num = words_to_number(num_part)? as i64;
89 return Some(format!("{} {} yuan", num, scale));
90 }
91 }
92
93 None
94}
95
96fn parse_decimal_dollars(input: &str) -> Option<String> {
98 if input.ends_with(" dollars") && input.contains(" point ") {
100 let num_part = input.trim_end_matches(" dollars");
101 let parts: Vec<&str> = num_part.splitn(2, " point ").collect();
102 if parts.len() == 2 {
103 let integer = if parts[0].is_empty() {
104 String::new()
105 } else {
106 (words_to_number(parts[0])? as i64).to_string()
107 };
108 let decimal = parse_decimal_digits(parts[1])?;
109 if integer.is_empty() {
110 return Some(format!("$.{}", decimal));
111 }
112 return Some(format!("${}.{}", integer, decimal));
113 }
114 }
115
116 if input.starts_with("point ") && input.ends_with(" dollars") {
118 let decimal_part = input.strip_prefix("point ")?.strip_suffix(" dollars")?;
119 let decimal = parse_decimal_digits(decimal_part)?;
120 return Some(format!("$.{}", decimal));
121 }
122
123 None
124}
125
126fn parse_dollars_and_cents(input: &str) -> Option<String> {
128 if let Some((dollars_part, rest)) = input.split_once(" united states dollars and ") {
130 if rest.ends_with(" cents") || rest.ends_with(" cent") {
131 let cents_words = rest.trim_end_matches(" cents").trim_end_matches(" cent");
132 let dollars = words_to_number(dollars_part)? as i64;
133 let cents = words_to_number(cents_words)? as i64;
134 return Some(format!("${}.{:02}", dollars, cents));
135 }
136 }
137
138 if let Some((dollars_part, rest)) = input.split_once(" dollar and ") {
140 if rest.ends_with(" cents") || rest.ends_with(" cent") {
141 let cents_words = rest.trim_end_matches(" cents").trim_end_matches(" cent");
142 let dollars = words_to_number(dollars_part)? as i64;
143 let cents = words_to_number(cents_words)? as i64;
144 return Some(format!("${}.{:02}", dollars, cents));
145 }
146 }
147
148 if let Some((dollars_part, rest)) = input.split_once(" dollars and ") {
150 if rest.ends_with(" cents") || rest.ends_with(" cent") {
151 let cents_words = rest.trim_end_matches(" cents").trim_end_matches(" cent");
152 let dollars = words_to_number(dollars_part)? as i64;
153 let cents = words_to_number(cents_words)? as i64;
154 return Some(format!("${}.{:02}", dollars, cents));
155 }
156 }
157
158 if let Some((dollars_part, rest)) = input.split_once(" dollars ") {
160 if rest.ends_with(" cents") {
161 let cents_words = rest.trim_end_matches(" cents");
162 let dollars = words_to_number(dollars_part)? as i64;
163 let cents = words_to_number(cents_words)? as i64;
164 return Some(format!("${}.{:02}", dollars, cents));
165 }
166 if let Some(cents) = words_to_number(rest) {
168 let cents = cents as i64;
169 if cents > 0 && cents < 100 {
170 let dollars = words_to_number(dollars_part)? as i64;
171 return Some(format!("${}.{:02}", dollars, cents));
172 }
173 }
174 }
175
176 None
177}
178
179fn parse_dollars(input: &str) -> Option<String> {
181 if input == "one dollar" {
183 return Some("$1".to_string());
184 }
185
186 if input.ends_with(" dollar") {
188 let num_part = input.trim_end_matches(" dollar");
189 let num = parse_money_number(num_part)?;
190 return Some(format!("${}", num));
191 }
192
193 if input.ends_with(" dollars") {
195 let num_part = input.trim_end_matches(" dollars");
196 let num = parse_money_number(num_part)?;
197 return Some(format!("${}", num));
198 }
199
200 None
201}
202
203fn parse_money_number(input: &str) -> Option<i64> {
205 let words: Vec<&str> = input.split_whitespace().collect();
206
207 if words.len() >= 2 {
209 if *words.last()? == "hundred" {
211 let prefix = words[..words.len() - 1].join(" ");
212 if let Some(num) = words_to_number(&prefix) {
213 return Some((num as i64) * 100);
214 }
215 }
216
217 let first_word = words[0];
220 let is_single_digit = matches!(
221 first_word,
222 "one" | "two" | "three" | "four" | "five" | "six" | "seven" | "eight" | "nine"
223 );
224
225 if is_single_digit {
226 if let Some(first) = words_to_number(first_word) {
227 let first = first as i64;
228 let rest = words[1..].join(" ");
229 if let Some(tens_ones) = words_to_number(&rest) {
231 let tens_ones = tens_ones as i64;
232 if tens_ones >= 10 && tens_ones <= 99 {
233 return Some(first * 100 + tens_ones);
234 }
235 }
236 }
237 }
238 }
239
240 words_to_number(input).map(|n| n as i64)
242}
243
244fn parse_cents(input: &str) -> Option<String> {
246 if input == "one cent" {
247 return Some("$0.01".to_string());
248 }
249
250 if input.ends_with(" cents") {
251 let num_part = input.trim_end_matches(" cents");
252 let cents = words_to_number(num_part)? as i64;
253 return Some(format!("$0.{:02}", cents));
254 }
255
256 None
257}
258
259fn parse_large_currency(original: &str, input_lower: &str) -> Option<String> {
261 for scale in &["trillion", "billion", "million"] {
263 let pattern = format!(" {} dollars", scale);
264 if input_lower.ends_with(&pattern) {
265 let num_part = &input_lower[..input_lower.len() - pattern.len()];
266
267 let scale_start = original.len() - 7 - 1 - scale.len();
270 let scale_end = original.len() - 7 - 1;
271 let orig_scale = &original[scale_start..scale_end];
272
273 if num_part.contains(" point ") {
275 let result = parse_decimal_scale(num_part, orig_scale)?;
276 return Some(result);
277 }
278 let num = words_to_number(num_part)? as i64;
279 return Some(format!("${} {}", num, orig_scale));
280 }
281 }
282
283 None
284}
285
286fn parse_decimal_scale(input: &str, scale: &str) -> Option<String> {
288 let parts: Vec<&str> = input.split(" point ").collect();
289 if parts.len() != 2 {
290 return None;
291 }
292
293 let integer = words_to_number(parts[0])? as i64;
294 let decimal = parse_decimal_digits(parts[1])?;
295
296 Some(format!("${}.{} {}", integer, decimal, scale))
297}
298
299fn parse_decimal_digits(input: &str) -> Option<String> {
301 let words: Vec<&str> = input.split_whitespace().collect();
302 let mut result = String::new();
303
304 for word in words {
305 let digit = match word {
306 "zero" | "o" | "oh" => '0',
307 "one" => '1',
308 "two" => '2',
309 "three" => '3',
310 "four" => '4',
311 "five" => '5',
312 "six" => '6',
313 "seven" => '7',
314 "eight" => '8',
315 "nine" => '9',
316 _ => return None,
317 };
318 result.push(digit);
319 }
320
321 Some(result)
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327
328 #[test]
329 fn test_dollars() {
330 assert_eq!(parse("one dollar"), Some("$1".to_string()));
331 assert_eq!(parse("five dollars"), Some("$5".to_string()));
332 assert_eq!(parse("twenty dollars"), Some("$20".to_string()));
333 assert_eq!(parse("one hundred dollars"), Some("$100".to_string()));
334 assert_eq!(
335 parse("fifteen thousand dollars"),
336 Some("$15000".to_string())
337 );
338 }
339
340 #[test]
341 fn test_dollars_and_cents() {
342 assert_eq!(
343 parse("one dollar and fifty cents"),
344 Some("$1.50".to_string())
345 );
346 assert_eq!(
347 parse("five dollars and twenty five cents"),
348 Some("$5.25".to_string())
349 );
350 assert_eq!(
351 parse("eleven dollars and fifty one cents"),
352 Some("$11.51".to_string())
353 );
354 }
355
356 #[test]
357 fn test_dollars_implied_cents() {
358 assert_eq!(
359 parse("seventy five dollars sixty three"),
360 Some("$75.63".to_string())
361 );
362 assert_eq!(
363 parse("twenty nine dollars fifty"),
364 Some("$29.50".to_string())
365 );
366 }
367
368 #[test]
369 fn test_cents() {
370 assert_eq!(parse("one cent"), Some("$0.01".to_string()));
371 assert_eq!(parse("fifty cents"), Some("$0.50".to_string()));
372 assert_eq!(parse("ninety nine cents"), Some("$0.99".to_string()));
373 }
374
375 #[test]
376 fn test_large_amounts() {
377 assert_eq!(
378 parse("fifty million dollars"),
379 Some("$50 million".to_string())
380 );
381 assert_eq!(
382 parse("fifty billion dollars"),
383 Some("$50 billion".to_string())
384 );
385 assert_eq!(
386 parse("two point five billion dollars"),
387 Some("$2.5 billion".to_string())
388 );
389 }
390
391 #[test]
392 fn test_not_money() {
393 assert_eq!(parse("hello"), None);
394 assert_eq!(parse("five"), None);
395 }
396}