Skip to main content

text_processing_rs/taggers/
decimal.rs

1//! Decimal number tagger.
2//!
3//! Converts spoken decimal numbers to written form:
4//! - "three point one four" → "3.14"
5//! - "zero point five" → "0.5"
6//! - "five point two million" → "5.2 million"
7//! - "point five" → ".5"
8
9use super::cardinal::words_to_number;
10
11/// Parse spoken decimal expression to written form.
12pub fn parse(input: &str) -> Option<String> {
13    let original = input.trim();
14    let input_lower = original.to_lowercase();
15
16    // Check for scale suffix (million, billion, etc.)
17    if let Some(result) = parse_with_scale(original, &input_lower) {
18        return Some(result);
19    }
20
21    // Check for "point" decimal
22    if let Some(result) = parse_point_decimal(&input_lower) {
23        return Some(result);
24    }
25
26    None
27}
28
29/// Parse numbers with scale words (million, billion, trillion)
30fn parse_with_scale(original: &str, input_lower: &str) -> Option<String> {
31    let scales = ["trillion", "billion", "million", "thousand"];
32
33    for scale in &scales {
34        if input_lower.ends_with(scale) {
35            let num_part = input_lower[..input_lower.len() - scale.len()].trim();
36
37            // Extract original scale word to preserve casing
38            let orig_scale = &original[original.len() - scale.len()..];
39
40            // Check if it has a decimal point
41            if num_part.contains(" point ") {
42                let decimal = parse_point_decimal(num_part)?;
43                return Some(format!("{} {}", decimal, orig_scale));
44            }
45
46            // Plain number with scale
47            let num = words_to_number(num_part)? as i64;
48            return Some(format!("{} {}", num, orig_scale));
49        }
50    }
51
52    None
53}
54
55/// Parse "X point Y" decimal pattern
56fn parse_point_decimal(input: &str) -> Option<String> {
57    // Handle negative
58    let (is_negative, rest) = if input.starts_with("minus ") {
59        (true, input.strip_prefix("minus ")?)
60    } else if input.starts_with("negative ") {
61        (true, input.strip_prefix("negative ")?)
62    } else {
63        (false, input)
64    };
65
66    // Handle "point X" (no integer part, e.g., "point five" → ".5")
67    let (integer_str, decimal_str) = if rest.starts_with("point ") {
68        ("", rest.strip_prefix("point ")?)
69    } else if rest.contains(" point ") {
70        let parts: Vec<&str> = rest.splitn(2, " point ").collect();
71        if parts.len() != 2 {
72            return None;
73        }
74        (parts[0], parts[1])
75    } else {
76        return None;
77    };
78
79    // Integer part (can be empty for ".5")
80    let integer_part = if integer_str.is_empty() {
81        String::new()
82    } else {
83        (words_to_number(integer_str)? as i64).to_string()
84    };
85
86    // Decimal part - parse as individual digits
87    let decimal_part = parse_decimal_digits(decimal_str)?;
88
89    let sign = if is_negative { "-" } else { "" };
90
91    if integer_part.is_empty() {
92        Some(format!("{}.{}", sign, decimal_part))
93    } else {
94        Some(format!("{}{}.{}", sign, integer_part, decimal_part))
95    }
96}
97
98/// Parse decimal digits: "one four" → "14", "o five" → "05"
99fn parse_decimal_digits(input: &str) -> Option<String> {
100    let words: Vec<&str> = input.split_whitespace().collect();
101    let mut result = String::new();
102
103    for word in words {
104        let digit = match word {
105            "zero" | "o" | "oh" => '0',
106            "one" => '1',
107            "two" => '2',
108            "three" => '3',
109            "four" => '4',
110            "five" => '5',
111            "six" => '6',
112            "seven" => '7',
113            "eight" => '8',
114            "nine" => '9',
115            // Handle compound numbers like "twenty six" → "26"
116            _ => {
117                // Try to parse as a number
118                if let Some(num) = words_to_number(word) {
119                    for c in (num as i64).to_string().chars() {
120                        result.push(c);
121                    }
122                    continue;
123                }
124                return None;
125            }
126        };
127        result.push(digit);
128    }
129
130    if result.is_empty() {
131        None
132    } else {
133        Some(result)
134    }
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn test_simple_decimal() {
143        assert_eq!(parse("three point one four"), Some("3.14".to_string()));
144        assert_eq!(parse("zero point five"), Some("0.5".to_string()));
145        assert_eq!(parse("zero point two six"), Some("0.26".to_string()));
146    }
147
148    #[test]
149    fn test_point_only() {
150        assert_eq!(parse("point five"), Some(".5".to_string()));
151        assert_eq!(parse("point zero two"), Some(".02".to_string()));
152    }
153
154    #[test]
155    fn test_with_oh() {
156        assert_eq!(parse("eighteen point o five"), Some("18.05".to_string()));
157        assert_eq!(parse("eighteen point o o o"), Some("18.000".to_string()));
158    }
159
160    #[test]
161    fn test_negative() {
162        assert_eq!(
163            parse("minus sixty point two four zero zero"),
164            Some("-60.2400".to_string())
165        );
166    }
167
168    #[test]
169    fn test_with_scale() {
170        assert_eq!(
171            parse("five point two million"),
172            Some("5.2 million".to_string())
173        );
174        assert_eq!(parse("fifty billion"), Some("50 billion".to_string()));
175        assert_eq!(
176            parse("four point eight five billion"),
177            Some("4.85 billion".to_string())
178        );
179    }
180}