Skip to main content

text_processing_rs/taggers/
money.rs

1//! Money tagger.
2//!
3//! Converts spoken currency expressions to written form:
4//! - "five dollars" → "$5"
5//! - "five dollars and fifty cents" → "$5.50"
6//! - "one cent" → "$0.01"
7//! - "fifteen hundred dollars" → "$1500"
8
9use super::cardinal::words_to_number;
10
11/// Parse spoken money expression to written form.
12pub fn parse(input: &str) -> Option<String> {
13    let original = input.trim();
14    let input_lower = original.to_lowercase();
15
16    // "one dollars" is grammatically incorrect - pass through
17    if input_lower == "one dollars" {
18        return None;
19    }
20
21    // Try other currencies (won, yen, yuan)
22    if let Some(result) = parse_other_currency(&input_lower) {
23        return Some(result);
24    }
25
26    // Try large currency first (most specific - contains scale words)
27    if let Some(result) = parse_large_currency(original, &input_lower) {
28        return Some(result);
29    }
30
31    // Try decimal dollar patterns (twenty point five o six dollars)
32    if let Some(result) = parse_decimal_dollars(&input_lower) {
33        return Some(result);
34    }
35
36    // Try dollars and cents
37    if let Some(result) = parse_dollars_and_cents(&input_lower) {
38        return Some(result);
39    }
40
41    if let Some(result) = parse_dollars(&input_lower) {
42        return Some(result);
43    }
44
45    if let Some(result) = parse_cents(&input_lower) {
46        return Some(result);
47    }
48
49    None
50}
51
52/// Parse other currencies (won, yen, yuan)
53fn parse_other_currency(input: &str) -> Option<String> {
54    // Korean won: "X billion won" → "₩X billion"
55    for scale in &["trillion", "billion", "million"] {
56        let pattern = format!(" {} won", scale);
57        if input.ends_with(&pattern) {
58            let num_part = input.trim_end_matches(&pattern);
59            let num = words_to_number(num_part)? as i64;
60            return Some(format!("₩{} {}", num, scale));
61        }
62    }
63
64    // Japanese yen: "X billion yen" → "¥X billion"
65    for scale in &["trillion", "billion", "million"] {
66        let pattern = format!(" {} yen", scale);
67        if input.ends_with(&pattern) {
68            let num_part = input.trim_end_matches(&pattern);
69            let num = words_to_number(num_part)? as i64;
70            return Some(format!("¥{} {}", num, scale));
71        }
72    }
73
74    // Chinese yuan: "X billion yuan" → "X billion yuan" (no symbol)
75    for scale in &["trillion", "billion", "million"] {
76        let pattern = format!(" {} yuan", scale);
77        if input.ends_with(&pattern) {
78            let num_part = input.trim_end_matches(&pattern);
79            // Handle decimal like "one point six nine billion yuan"
80            if num_part.contains(" point ") {
81                let parts: Vec<&str> = num_part.split(" point ").collect();
82                if parts.len() == 2 {
83                    let integer = words_to_number(parts[0])? as i64;
84                    let decimal = parse_decimal_digits(parts[1])?;
85                    return Some(format!("{}.{} {} yuan", integer, decimal, scale));
86                }
87            }
88            let num = words_to_number(num_part)? as i64;
89            return Some(format!("{} {} yuan", num, scale));
90        }
91    }
92
93    None
94}
95
96/// Parse decimal dollar patterns like "twenty point five o six dollars"
97fn parse_decimal_dollars(input: &str) -> Option<String> {
98    // Pattern: "X point Y dollars" where Y can contain "o"
99    if input.ends_with(" dollars") && input.contains(" point ") {
100        let num_part = input.trim_end_matches(" dollars");
101        let parts: Vec<&str> = num_part.splitn(2, " point ").collect();
102        if parts.len() == 2 {
103            let integer = if parts[0].is_empty() {
104                String::new()
105            } else {
106                (words_to_number(parts[0])? as i64).to_string()
107            };
108            let decimal = parse_decimal_digits(parts[1])?;
109            if integer.is_empty() {
110                return Some(format!("$.{}", decimal));
111            }
112            return Some(format!("${}.{}", integer, decimal));
113        }
114    }
115
116    // Pattern: "point X dollars" (no integer part)
117    if input.starts_with("point ") && input.ends_with(" dollars") {
118        let decimal_part = input.strip_prefix("point ")?.strip_suffix(" dollars")?;
119        let decimal = parse_decimal_digits(decimal_part)?;
120        return Some(format!("$.{}", decimal));
121    }
122
123    None
124}
125
126/// Parse "X dollars and Y cents" pattern
127fn parse_dollars_and_cents(input: &str) -> Option<String> {
128    // Pattern: "X united states dollars and Y cents"
129    if let Some((dollars_part, rest)) = input.split_once(" united states dollars and ") {
130        if rest.ends_with(" cents") || rest.ends_with(" cent") {
131            let cents_words = rest.trim_end_matches(" cents").trim_end_matches(" cent");
132            let dollars = words_to_number(dollars_part)? as i64;
133            let cents = words_to_number(cents_words)? as i64;
134            return Some(format!("${}.{:02}", dollars, cents));
135        }
136    }
137
138    // Pattern: "X dollar and Y cents" (singular)
139    if let Some((dollars_part, rest)) = input.split_once(" dollar and ") {
140        if rest.ends_with(" cents") || rest.ends_with(" cent") {
141            let cents_words = rest.trim_end_matches(" cents").trim_end_matches(" cent");
142            let dollars = words_to_number(dollars_part)? as i64;
143            let cents = words_to_number(cents_words)? as i64;
144            return Some(format!("${}.{:02}", dollars, cents));
145        }
146    }
147
148    // Pattern: "X dollars and Y cents"
149    if let Some((dollars_part, rest)) = input.split_once(" dollars and ") {
150        if rest.ends_with(" cents") || rest.ends_with(" cent") {
151            let cents_words = rest.trim_end_matches(" cents").trim_end_matches(" cent");
152            let dollars = words_to_number(dollars_part)? as i64;
153            let cents = words_to_number(cents_words)? as i64;
154            return Some(format!("${}.{:02}", dollars, cents));
155        }
156    }
157
158    // Pattern: "X dollars Y cents" (without "and")
159    if let Some((dollars_part, rest)) = input.split_once(" dollars ") {
160        if rest.ends_with(" cents") {
161            let cents_words = rest.trim_end_matches(" cents");
162            let dollars = words_to_number(dollars_part)? as i64;
163            let cents = words_to_number(cents_words)? as i64;
164            return Some(format!("${}.{:02}", dollars, cents));
165        }
166        // Pattern: "X dollars Y" (implied cents, e.g., "seventy five dollars sixty three")
167        if let Some(cents) = words_to_number(rest) {
168            let cents = cents as i64;
169            if cents > 0 && cents < 100 {
170                let dollars = words_to_number(dollars_part)? as i64;
171                return Some(format!("${}.{:02}", dollars, cents));
172            }
173        }
174    }
175
176    None
177}
178
179/// Parse "X dollars" pattern
180fn parse_dollars(input: &str) -> Option<String> {
181    // "one dollar" (singular)
182    if input == "one dollar" {
183        return Some("$1".to_string());
184    }
185
186    // "X dollar" (singular with number, e.g., "twenty dollar")
187    if input.ends_with(" dollar") {
188        let num_part = input.trim_end_matches(" dollar");
189        let num = parse_money_number(num_part)?;
190        return Some(format!("${}", num));
191    }
192
193    // "X dollars"
194    if input.ends_with(" dollars") {
195        let num_part = input.trim_end_matches(" dollars");
196        let num = parse_money_number(num_part)?;
197        return Some(format!("${}", num));
198    }
199
200    None
201}
202
203/// Parse money number, handling shorthand like "one fifty five" = 155
204fn parse_money_number(input: &str) -> Option<i64> {
205    let words: Vec<&str> = input.split_whitespace().collect();
206
207    // Try shorthand patterns first
208    if words.len() >= 2 {
209        // Check for "X hundred" pattern at the end (ninety nine hundred = 9900)
210        if *words.last()? == "hundred" {
211            let prefix = words[..words.len() - 1].join(" ");
212            if let Some(num) = words_to_number(&prefix) {
213                return Some((num as i64) * 100);
214            }
215        }
216
217        // Check for "X YY" shorthand (one fifty five = 155)
218        // Only applies when first word is a single digit (1-9)
219        let first_word = words[0];
220        let is_single_digit = matches!(
221            first_word,
222            "one" | "two" | "three" | "four" | "five" | "six" | "seven" | "eight" | "nine"
223        );
224
225        if is_single_digit {
226            if let Some(first) = words_to_number(first_word) {
227                let first = first as i64;
228                let rest = words[1..].join(" ");
229                // Rest must be a two-digit number (10-99)
230                if let Some(tens_ones) = words_to_number(&rest) {
231                    let tens_ones = tens_ones as i64;
232                    if tens_ones >= 10 && tens_ones <= 99 {
233                        return Some(first * 100 + tens_ones);
234                    }
235                }
236            }
237        }
238    }
239
240    // Fall back to standard cardinal parsing
241    words_to_number(input).map(|n| n as i64)
242}
243
244/// Parse "X cents" pattern
245fn parse_cents(input: &str) -> Option<String> {
246    if input == "one cent" {
247        return Some("$0.01".to_string());
248    }
249
250    if input.ends_with(" cents") {
251        let num_part = input.trim_end_matches(" cents");
252        let cents = words_to_number(num_part)? as i64;
253        return Some(format!("$0.{:02}", cents));
254    }
255
256    None
257}
258
259/// Parse large currency amounts (billions, millions)
260fn parse_large_currency(original: &str, input_lower: &str) -> Option<String> {
261    // "X billion dollars" → "$X billion"
262    for scale in &["trillion", "billion", "million"] {
263        let pattern = format!(" {} dollars", scale);
264        if input_lower.ends_with(&pattern) {
265            let num_part = &input_lower[..input_lower.len() - pattern.len()];
266
267            // Extract original scale word to preserve casing
268            // "dollars" is 7 chars, scale is scale.len() chars, space is 1 char
269            let scale_start = original.len() - 7 - 1 - scale.len();
270            let scale_end = original.len() - 7 - 1;
271            let orig_scale = &original[scale_start..scale_end];
272
273            // Handle decimal like "two point five billion dollars"
274            if num_part.contains(" point ") {
275                let result = parse_decimal_scale(num_part, orig_scale)?;
276                return Some(result);
277            }
278            let num = words_to_number(num_part)? as i64;
279            return Some(format!("${} {}", num, orig_scale));
280        }
281    }
282
283    None
284}
285
286/// Parse decimal scale numbers like "two point five"
287fn parse_decimal_scale(input: &str, scale: &str) -> Option<String> {
288    let parts: Vec<&str> = input.split(" point ").collect();
289    if parts.len() != 2 {
290        return None;
291    }
292
293    let integer = words_to_number(parts[0])? as i64;
294    let decimal = parse_decimal_digits(parts[1])?;
295
296    Some(format!("${}.{} {}", integer, decimal, scale))
297}
298
299/// Parse decimal digits ("five" → "5", "five o" → "50")
300fn parse_decimal_digits(input: &str) -> Option<String> {
301    let words: Vec<&str> = input.split_whitespace().collect();
302    let mut result = String::new();
303
304    for word in words {
305        let digit = match word {
306            "zero" | "o" | "oh" => '0',
307            "one" => '1',
308            "two" => '2',
309            "three" => '3',
310            "four" => '4',
311            "five" => '5',
312            "six" => '6',
313            "seven" => '7',
314            "eight" => '8',
315            "nine" => '9',
316            _ => return None,
317        };
318        result.push(digit);
319    }
320
321    Some(result)
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    #[test]
329    fn test_dollars() {
330        assert_eq!(parse("one dollar"), Some("$1".to_string()));
331        assert_eq!(parse("five dollars"), Some("$5".to_string()));
332        assert_eq!(parse("twenty dollars"), Some("$20".to_string()));
333        assert_eq!(parse("one hundred dollars"), Some("$100".to_string()));
334        assert_eq!(
335            parse("fifteen thousand dollars"),
336            Some("$15000".to_string())
337        );
338    }
339
340    #[test]
341    fn test_dollars_and_cents() {
342        assert_eq!(
343            parse("one dollar and fifty cents"),
344            Some("$1.50".to_string())
345        );
346        assert_eq!(
347            parse("five dollars and twenty five cents"),
348            Some("$5.25".to_string())
349        );
350        assert_eq!(
351            parse("eleven dollars and fifty one cents"),
352            Some("$11.51".to_string())
353        );
354    }
355
356    #[test]
357    fn test_dollars_implied_cents() {
358        assert_eq!(
359            parse("seventy five dollars sixty three"),
360            Some("$75.63".to_string())
361        );
362        assert_eq!(
363            parse("twenty nine dollars fifty"),
364            Some("$29.50".to_string())
365        );
366    }
367
368    #[test]
369    fn test_cents() {
370        assert_eq!(parse("one cent"), Some("$0.01".to_string()));
371        assert_eq!(parse("fifty cents"), Some("$0.50".to_string()));
372        assert_eq!(parse("ninety nine cents"), Some("$0.99".to_string()));
373    }
374
375    #[test]
376    fn test_large_amounts() {
377        assert_eq!(
378            parse("fifty million dollars"),
379            Some("$50 million".to_string())
380        );
381        assert_eq!(
382            parse("fifty billion dollars"),
383            Some("$50 billion".to_string())
384        );
385        assert_eq!(
386            parse("two point five billion dollars"),
387            Some("$2.5 billion".to_string())
388        );
389    }
390
391    #[test]
392    fn test_not_money() {
393        assert_eq!(parse("hello"), None);
394        assert_eq!(parse("five"), None);
395    }
396}