fn format_numbers(text: &str) -> String {
let chars: Vec<char> = text.chars().collect();
let mut result = String::with_capacity(text.len() + 8);
let mut i = 0;
while i < chars.len() {
if chars[i].is_ascii_digit() {
let start = i;
while i < chars.len() && chars[i].is_ascii_digit() {
i += 1;
}
let digits: String = chars[start..i].iter().collect();
let preceded_by_dot = start > 0 && chars[start - 1] == '.';
let followed_by_dot = i < chars.len() && chars[i] == '.';
if digits.len() > 4 && !preceded_by_dot && !followed_by_dot {
result.push_str(&indian_comma_format(&digits));
} else {
result.push_str(&digits);
}
} else {
result.push(chars[i]);
i += 1;
}
}
result
}
fn indian_comma_format(digits: &str) -> String {
let len = digits.len();
if len <= 3 {
return digits.to_string();
}
let mut result = String::with_capacity(len + len / 2);
let bytes = digits.as_bytes();
let hundreds_start = len - 3;
let leading = &digits[..hundreds_start];
let leading_bytes = leading.as_bytes();
let leading_len = leading_bytes.len();
let first_group = leading_len % 2;
let mut pos = 0;
if first_group > 0 {
result.push(leading_bytes[0] as char);
pos = 1;
if pos < leading_len {
result.push(',');
}
}
while pos < leading_len {
result.push(leading_bytes[pos] as char);
result.push(leading_bytes[pos + 1] as char);
pos += 2;
if pos < leading_len {
result.push(',');
}
}
result.push(',');
for &b in &bytes[hundreds_start..] {
result.push(b as char);
}
result
}
pub fn preprocess_for_tts(text: &str) -> String {
let text = format_numbers(text);
text
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_5_digits() {
assert_eq!(indian_comma_format("10000"), "10,000");
}
#[test]
fn test_6_digits() {
assert_eq!(indian_comma_format("100000"), "1,00,000");
}
#[test]
fn test_7_digits() {
assert_eq!(indian_comma_format("1234567"), "12,34,567");
}
#[test]
fn test_8_digits() {
assert_eq!(indian_comma_format("12345678"), "1,23,45,678");
}
#[test]
fn test_3_digits_unchanged() {
assert_eq!(indian_comma_format("999"), "999");
}
#[test]
fn test_5_digit_in_sentence() {
assert_eq!(format_numbers("population is 10000"), "population is 10,000");
}
#[test]
fn test_4_digits_unchanged() {
assert_eq!(format_numbers("1000 items"), "1000 items");
assert_eq!(format_numbers("9999 rupees"), "9999 rupees");
}
#[test]
fn test_decimal_unchanged() {
assert_eq!(format_numbers("pi is 3.14159"), "pi is 3.14159");
}
#[test]
fn test_decimal_5_digit_unchanged() {
assert_eq!(format_numbers("value 3.14159 units"), "value 3.14159 units");
}
#[test]
fn test_large_preceded_by_dot_unchanged() {
assert_eq!(format_numbers("v1.10000"), "v1.10000");
}
#[test]
fn test_multiple_numbers() {
let result = format_numbers("cost 50000 and count 200");
assert_eq!(result, "cost 50,000 and count 200");
}
#[test]
fn test_rupees_lakh() {
assert_eq!(format_numbers("₹100000 salary"), "₹1,00,000 salary");
}
#[test]
fn test_crore() {
assert_eq!(format_numbers("10000000 population"), "1,00,00,000 population");
}
#[test]
fn test_preprocess_passthrough_small() {
assert_eq!(preprocess_for_tts("hello world"), "hello world");
}
#[test]
fn test_preprocess_formats_large_number() {
assert_eq!(
preprocess_for_tts("There are 1000000 cases pending."),
"There are 10,00,000 cases pending."
);
}
#[test]
fn test_preprocess_leaves_decimal() {
assert_eq!(
preprocess_for_tts("rate is 3.14159"),
"rate is 3.14159"
);
}
#[test]
fn test_preprocess_hindi_with_number() {
assert_eq!(
preprocess_for_tts("₹50000 का भुगतान करें"),
"₹50,000 का भुगतान करें"
);
}
}