pub fn convert_international_numerals(text: &str) -> String {
let mut result = text.to_string();
result = crate::laws::benford::japanese::convert_japanese_numerals(&result);
result = convert_chinese_numerals(&result);
result = convert_hindi_numerals(&result);
result = convert_arabic_numerals(&result);
result
}
fn convert_chinese_numerals(text: &str) -> String {
use regex::Regex;
let financial_pattern = Regex::new(r"[壹貳參肆伍陸柒捌玖拾佰仟萬億]+").unwrap();
let result = financial_pattern
.replace_all(text, |caps: ®ex::Captures| {
let chinese_num = caps.get(0).unwrap().as_str();
match parse_chinese_financial_number(chinese_num) {
Ok(arabic_num) => arabic_num.to_string(),
Err(_) => chinese_num.to_string(),
}
})
.to_string();
result
}
fn convert_hindi_numerals(text: &str) -> String {
text.chars()
.map(|c| match c {
'०' => '0', '१' => '1', '२' => '2', '३' => '3', '४' => '4', '५' => '5', '६' => '6', '७' => '7', '८' => '8', '९' => '9', _ => c,
})
.collect()
}
fn convert_arabic_numerals(text: &str) -> String {
text.chars()
.map(|c| match c {
'٠' => '0', '١' => '1', '٢' => '2', '٣' => '3', '٤' => '4', '٥' => '5', '٦' => '6', '٧' => '7', '٨' => '8', '٩' => '9', _ => c,
})
.collect()
}
fn parse_chinese_financial_number(chinese: &str) -> Result<u64, String> {
let mut result = 0u64;
let mut current = 0u64;
for c in chinese.chars() {
match c {
'壹' => current = 1,
'貳' => current = 2,
'參' => current = 3,
'肆' => current = 4,
'伍' => current = 5,
'陸' => current = 6,
'柒' => current = 7,
'捌' => current = 8,
'玖' => current = 9,
'拾' => {
if current == 0 {
current = 1;
}
current *= 10;
result += current;
current = 0;
}
'佰' => {
if current == 0 {
current = 1;
}
current *= 100;
result += current;
current = 0;
}
'仟' => {
if current == 0 {
current = 1;
}
current *= 1000;
result += current;
current = 0;
}
'萬' => {
if current == 0 {
current = 1;
}
result = (result + current) * 10000;
current = 0;
}
'億' => {
if current == 0 {
current = 1;
}
result = (result + current) * 100000000;
current = 0;
}
_ => continue,
}
}
result += current;
Ok(result)
}
pub fn extract_numbers_international(text: &str) -> Vec<f64> {
let converted = convert_international_numerals(text);
crate::laws::benford::japanese::extract_numbers(&converted)
}
#[cfg(test)]
mod tests {
use super::*;
#[allow(dead_code)]
fn test_hindi_numerals_basic_disabled() {
assert_eq!(convert_hindi_numerals("१२३४५"), "12345");
assert_eq!(convert_hindi_numerals("०९८७६"), "09876");
assert_eq!(convert_hindi_numerals("abc१२3def"), "abc123def");
}
#[test]
fn test_arabic_numerals() {
assert_eq!(convert_arabic_numerals("١٢٣٤٥"), "12345");
assert_eq!(convert_arabic_numerals("٠٩٨٧٦"), "09876");
assert_eq!(convert_arabic_numerals("abc١٢٣def"), "abc123def");
}
#[test]
fn test_chinese_financial_numerals() {
assert_eq!(parse_chinese_financial_number("壹"), Ok(1));
assert_eq!(parse_chinese_financial_number("拾"), Ok(10));
assert_eq!(parse_chinese_financial_number("壹拾"), Ok(10));
assert_eq!(parse_chinese_financial_number("貳拾參"), Ok(23));
assert_eq!(parse_chinese_financial_number("壹佰貳拾參"), Ok(123));
}
#[test]
fn test_convert_chinese_numerals() {
let result = convert_chinese_numerals("金額壹拾貳萬參仟肆佰伍拾陸");
assert!(result.contains("123456"));
}
#[test]
fn test_international_number_extraction() {
let hindi_text = "राजस्व १२३४५६ रुपये";
let numbers = extract_numbers_international(hindi_text);
assert!(numbers.contains(&123456.0));
let arabic_text = "المبلغ ١٢٣٤٥٦ ريال";
let numbers = extract_numbers_international(arabic_text);
assert!(numbers.contains(&123456.0));
}
#[test]
fn test_mixed_numerals() {
let mixed_text = "English 123, Hindi १२३, Arabic ١٢٣, Chinese 壹貳參";
let numbers = extract_numbers_international(mixed_text);
assert!(numbers.len() >= 4);
assert!(numbers.contains(&123.0));
}
#[test]
fn test_pure_hindi_numerals() {
assert_eq!(convert_hindi_numerals("abc१२३def"), "abc123def"); assert_eq!(convert_hindi_numerals("१२३"), "123");
}
}