use alloc::string::String;
use alloc::vec::Vec;
#[inline]
pub fn thai_digit_to_ascii(c: char) -> Option<char> {
match c {
'\u{0E50}' => Some('0'),
'\u{0E51}' => Some('1'),
'\u{0E52}' => Some('2'),
'\u{0E53}' => Some('3'),
'\u{0E54}' => Some('4'),
'\u{0E55}' => Some('5'),
'\u{0E56}' => Some('6'),
'\u{0E57}' => Some('7'),
'\u{0E58}' => Some('8'),
'\u{0E59}' => Some('9'),
_ => None,
}
}
pub fn thai_digits_to_ascii(text: &str) -> String {
if !text.chars().any(|c| thai_digit_to_ascii(c).is_some()) {
return String::from(text);
}
text.chars()
.map(|c| thai_digit_to_ascii(c).unwrap_or(c))
.collect()
}
#[inline]
pub fn is_thai_digit_str(text: &str) -> bool {
!text.is_empty() && text.chars().all(|c| thai_digit_to_ascii(c).is_some())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum NumToken {
Digit(u64),
Yi,
Et,
Sip,
Roi,
Pan,
Muen,
Saen,
Lan,
}
fn next_num_token(s: &str) -> Option<(NumToken, &str)> {
const VOCAB: &[(&str, NumToken)] = &[
("ศูนย์", NumToken::Digit(0)),
("หนึ่ง", NumToken::Digit(1)),
("เอ็ด", NumToken::Et),
("ยี่", NumToken::Yi),
("สอง", NumToken::Digit(2)),
("สาม", NumToken::Digit(3)),
("สี่", NumToken::Digit(4)),
("ห้า", NumToken::Digit(5)),
("หก", NumToken::Digit(6)),
("เจ็ด", NumToken::Digit(7)),
("แปด", NumToken::Digit(8)),
("เก้า", NumToken::Digit(9)),
("สิบ", NumToken::Sip),
("ร้อย", NumToken::Roi),
("พัน", NumToken::Pan),
("หมื่น", NumToken::Muen),
("แสน", NumToken::Saen),
("ล้าน", NumToken::Lan),
];
for &(word, tok) in VOCAB {
if let Some(rest) = s.strip_prefix(word) {
return Some((tok, rest));
}
}
None
}
fn parse_below_lan(s: &str) -> Option<u64> {
let mut s = s;
let mut total: u64 = 0;
let mut pending: Option<u64> = None; let mut had_sip = false;
while !s.is_empty() {
let (tok, rest) = next_num_token(s)?;
match tok {
NumToken::Digit(d) => {
if pending.is_some() {
return None;
}
pending = Some(d);
}
NumToken::Yi => {
if pending.is_some() {
return None;
}
pending = Some(2);
}
NumToken::Et => {
if !had_sip || pending.is_some() {
return None;
}
total = total.checked_add(1)?;
}
NumToken::Sip => {
let coeff = pending.take().unwrap_or(1);
total = total.checked_add(coeff.checked_mul(10)?)?;
had_sip = true;
}
NumToken::Roi => {
let coeff = pending.take().unwrap_or(1);
total = total.checked_add(coeff.checked_mul(100)?)?;
}
NumToken::Pan => {
let coeff = pending.take().unwrap_or(1);
total = total.checked_add(coeff.checked_mul(1_000)?)?;
}
NumToken::Muen => {
let coeff = pending.take().unwrap_or(1);
total = total.checked_add(coeff.checked_mul(10_000)?)?;
}
NumToken::Saen => {
let coeff = pending.take().unwrap_or(1);
total = total.checked_add(coeff.checked_mul(100_000)?)?;
}
NumToken::Lan => {
return None;
}
}
s = rest;
}
if let Some(d) = pending {
total = total.checked_add(d)?;
}
Some(total)
}
fn u64_to_string(mut n: u64) -> String {
if n == 0 {
return String::from("0");
}
let mut digits: Vec<u8> = Vec::new();
while n > 0 {
digits.push(b'0' + (n % 10) as u8);
n /= 10;
}
digits.reverse();
String::from_utf8(digits).unwrap_or_default()
}
pub fn parse_thai_word(text: &str) -> Option<u64> {
let s = text.trim();
if s.is_empty() {
return None;
}
if let Some(lan_pos) = s.find("ล้าน") {
let prefix = &s[..lan_pos];
let suffix = &s[lan_pos + "ล้าน".len()..];
let millions: u64 = if prefix.is_empty() {
1
} else {
parse_below_lan(prefix)?
};
let remainder: u64 = if suffix.is_empty() {
0
} else {
parse_below_lan(suffix)?
};
millions.checked_mul(1_000_000)?.checked_add(remainder)
} else {
let result = parse_below_lan(s)?;
Some(result)
}
}
pub fn thai_word_to_decimal(text: &str) -> Option<String> {
parse_thai_word(text).map(u64_to_string)
}
#[inline]
fn digit_word(d: u64) -> &'static str {
match d {
1 => "หนึ่ง",
2 => "สอง",
3 => "สาม",
4 => "สี่",
5 => "ห้า",
6 => "หก",
7 => "เจ็ด",
8 => "แปด",
9 => "เก้า",
_ => "",
}
}
fn write_below_lan(mut n: u64, out: &mut String) {
if n >= 100_000 {
out.push_str(digit_word(n / 100_000));
out.push_str("แสน");
n %= 100_000;
}
if n >= 10_000 {
out.push_str(digit_word(n / 10_000));
out.push_str("หมื่น");
n %= 10_000;
}
if n >= 1_000 {
out.push_str(digit_word(n / 1_000));
out.push_str("พัน");
n %= 1_000;
}
if n >= 100 {
out.push_str(digit_word(n / 100));
out.push_str("ร้อย");
n %= 100;
}
if n >= 10 {
let tens = n / 10;
let units = n % 10;
match tens {
1 => out.push_str("สิบ"), 2 => out.push_str("ยี่สิบ"), _ => {
out.push_str(digit_word(tens));
out.push_str("สิบ");
}
}
match units {
0 => {}
1 => out.push_str("เอ็ด"), _ => out.push_str(digit_word(units)),
}
} else if n > 0 {
out.push_str(digit_word(n)); }
}
fn write_thai_word(n: u64, out: &mut String) {
if n >= 1_000_000 {
write_thai_word(n / 1_000_000, out);
out.push_str("ล้าน");
let rem = n % 1_000_000;
if rem > 0 {
write_below_lan(rem, out);
}
} else {
write_below_lan(n, out);
}
}
pub fn u64_to_thai_word(n: u64) -> String {
if n == 0 {
return String::from("ศูนย์");
}
let mut out = String::new();
write_thai_word(n, &mut out);
out
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct BahtAmount {
pub baht: u64,
pub satang: u8,
}
pub fn parse_thai_baht(text: &str) -> Option<BahtAmount> {
let s = text.trim();
if s.is_empty() {
return None;
}
let (baht_part, after_baht) = s.split_once("บาท")?;
let baht = parse_thai_word(baht_part.trim())?;
let satang_str = after_baht.trim();
let satang: u8 = if satang_str.is_empty() || satang_str == "ถ้วน" {
0
} else if let Some(san_word) = satang_str.strip_suffix("สตางค์") {
let val = parse_thai_word(san_word.trim())?;
if val > 99 {
return None;
}
val as u8
} else {
return None;
};
Some(BahtAmount { baht, satang })
}
pub fn to_thai_baht_text(baht: u64, satang: u8) -> String {
let mut out = u64_to_thai_word(baht);
out.push_str("บาท");
if satang == 0 {
out.push_str("ถ้วน");
} else {
out.push_str(&u64_to_thai_word(satang as u64));
out.push_str("สตางค์");
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn thai_digits_map_correctly() {
let pairs = [
('๐', '0'),
('๑', '1'),
('๒', '2'),
('๓', '3'),
('๔', '4'),
('๕', '5'),
('๖', '6'),
('๗', '7'),
('๘', '8'),
('๙', '9'),
];
for (thai, ascii) in pairs {
assert_eq!(thai_digit_to_ascii(thai), Some(ascii), "failed for {thai}");
}
}
#[test]
fn non_digit_returns_none() {
assert_eq!(thai_digit_to_ascii('ก'), None);
assert_eq!(thai_digit_to_ascii('5'), None);
assert_eq!(thai_digit_to_ascii(' '), None);
}
#[test]
fn converts_all_thai_digits() {
assert_eq!(thai_digits_to_ascii("๐๑๒๓๔๕๖๗๘๙"), "0123456789");
}
#[test]
fn passthrough_ascii_only() {
assert_eq!(thai_digits_to_ascii("hello 123"), "hello 123");
}
#[test]
fn empty_string_passthrough() {
assert_eq!(thai_digits_to_ascii(""), "");
}
#[test]
fn mixed_thai_digit_in_sentence() {
assert_eq!(thai_digits_to_ascii("ธนาคาร๑๐๐แห่ง"), "ธนาคาร100แห่ง");
}
#[test]
fn mixed_thai_and_ascii_digits() {
assert_eq!(thai_digits_to_ascii("๑2๓"), "123");
}
#[test]
fn no_allocation_when_no_thai_digits() {
let result = thai_digits_to_ascii("no thai digits here");
assert_eq!(result, "no thai digits here");
}
#[test]
fn all_thai_digits() {
assert!(is_thai_digit_str("๑๒๓"));
assert!(is_thai_digit_str("๐"));
}
#[test]
fn mixed_is_false() {
assert!(!is_thai_digit_str("๑2๓"));
assert!(!is_thai_digit_str("๑ก"));
}
#[test]
fn ascii_only_is_false() {
assert!(!is_thai_digit_str("123"));
}
#[test]
fn empty_is_false() {
assert!(!is_thai_digit_str(""));
}
#[test]
fn zero_formats_correctly() {
assert_eq!(u64_to_string(0), "0");
}
#[test]
fn small_number_formats_correctly() {
assert_eq!(u64_to_string(42), "42");
}
#[test]
fn large_number_formats_correctly() {
assert_eq!(u64_to_string(1_000_000), "1000000");
}
#[test]
fn zero() {
assert_eq!(parse_thai_word("ศูนย์"), Some(0));
}
#[test]
fn one_to_nine() {
let cases = [
("หนึ่ง", 1u64),
("สอง", 2),
("สาม", 3),
("สี่", 4),
("ห้า", 5),
("หก", 6),
("เจ็ด", 7),
("แปด", 8),
("เก้า", 9),
];
for (word, expected) in cases {
assert_eq!(parse_thai_word(word), Some(expected), "failed for {word}");
}
}
#[test]
fn ten_implied_one() {
assert_eq!(parse_thai_word("สิบ"), Some(10));
}
#[test]
fn eleven_uses_et() {
assert_eq!(parse_thai_word("สิบเอ็ด"), Some(11));
}
#[test]
fn twelve_to_nineteen() {
let cases = [
("สิบสอง", 12u64),
("สิบสาม", 13),
("สิบสี่", 14),
("สิบห้า", 15),
("สิบหก", 16),
("สิบเจ็ด", 17),
("สิบแปด", 18),
("สิบเก้า", 19),
];
for (word, expected) in cases {
assert_eq!(parse_thai_word(word), Some(expected), "failed for {word}");
}
}
#[test]
fn twenty_uses_yi() {
assert_eq!(parse_thai_word("ยี่สิบ"), Some(20));
}
#[test]
fn twenty_one_yi_et() {
assert_eq!(parse_thai_word("ยี่สิบเอ็ด"), Some(21));
}
#[test]
fn thirty_four() {
assert_eq!(parse_thai_word("สามสิบสี่"), Some(34));
}
#[test]
fn ninety_nine() {
assert_eq!(parse_thai_word("เก้าสิบเก้า"), Some(99));
}
#[test]
fn hundred_implied_one() {
assert_eq!(parse_thai_word("ร้อย"), Some(100));
}
#[test]
fn one_hundred_explicit() {
assert_eq!(parse_thai_word("หนึ่งร้อย"), Some(100));
}
#[test]
fn one_hundred_twenty_three() {
assert_eq!(parse_thai_word("หนึ่งร้อยยี่สิบสาม"), Some(123));
}
#[test]
fn two_hundred() {
assert_eq!(parse_thai_word("สองร้อย"), Some(200));
}
#[test]
fn nine_hundred_ninety_nine() {
assert_eq!(parse_thai_word("เก้าร้อยเก้าสิบเก้า"), Some(999));
}
#[test]
fn one_thousand() {
assert_eq!(parse_thai_word("หนึ่งพัน"), Some(1_000));
assert_eq!(parse_thai_word("พัน"), Some(1_000));
}
#[test]
fn two_thousand_five_hundred() {
assert_eq!(parse_thai_word("สองพันห้าร้อย"), Some(2_500));
}
#[test]
fn ten_thousand() {
assert_eq!(parse_thai_word("หนึ่งหมื่น"), Some(10_000));
assert_eq!(parse_thai_word("หมื่น"), Some(10_000));
}
#[test]
fn hundred_thousand() {
assert_eq!(parse_thai_word("หนึ่งแสน"), Some(100_000));
assert_eq!(parse_thai_word("แสน"), Some(100_000));
}
#[test]
fn one_million_explicit() {
assert_eq!(parse_thai_word("หนึ่งล้าน"), Some(1_000_000));
}
#[test]
fn one_million_implied() {
assert_eq!(parse_thai_word("ล้าน"), Some(1_000_000));
}
#[test]
fn ten_million() {
assert_eq!(parse_thai_word("สิบล้าน"), Some(10_000_000));
}
#[test]
fn hundred_million() {
assert_eq!(parse_thai_word("หนึ่งร้อยล้าน"), Some(100_000_000));
}
#[test]
fn two_million_five_hundred_thousand() {
assert_eq!(parse_thai_word("สองล้านห้าแสน"), Some(2_500_000));
}
#[test]
fn complex_seven_digit() {
assert_eq!(
parse_thai_word("สามล้านสี่แสนห้าหมื่นหกพันเจ็ดร้อยแปดสิบเก้า"),
Some(3_456_789)
);
}
#[test]
fn empty_returns_none() {
assert_eq!(parse_thai_word(""), None);
}
#[test]
fn whitespace_only_returns_none() {
assert_eq!(parse_thai_word(" "), None);
}
#[test]
fn non_number_word_returns_none() {
assert_eq!(parse_thai_word("กินข้าว"), None);
assert_eq!(parse_thai_word("ประเทศไทย"), None);
}
#[test]
fn et_without_sip_is_invalid() {
assert_eq!(parse_thai_word("เอ็ด"), None);
assert_eq!(parse_thai_word("ร้อยเอ็ด"), None);
}
#[test]
fn consecutive_digits_invalid() {
assert_eq!(parse_thai_word("หนึ่งสอง"), None);
}
#[test]
fn word_to_decimal_converts() {
assert_eq!(thai_word_to_decimal("ยี่สิบ"), Some(String::from("20")));
assert_eq!(
thai_word_to_decimal("หนึ่งร้อยยี่สิบสาม"),
Some(String::from("123"))
);
}
#[test]
fn word_to_decimal_none_for_non_number() {
assert_eq!(thai_word_to_decimal("กิน"), None);
}
#[test]
fn leading_trailing_whitespace_trimmed() {
assert_eq!(parse_thai_word(" สิบ "), Some(10));
}
#[test]
fn zero_word() {
assert_eq!(u64_to_thai_word(0), "ศูนย์");
}
#[test]
fn single_digits_word() {
let cases = [
(1u64, "หนึ่ง"),
(2, "สอง"),
(3, "สาม"),
(4, "สี่"),
(5, "ห้า"),
(6, "หก"),
(7, "เจ็ด"),
(8, "แปด"),
(9, "เก้า"),
];
for (n, word) in cases {
assert_eq!(u64_to_thai_word(n), word, "failed for {n}");
}
}
#[test]
fn ten_implied_one_word() {
assert_eq!(u64_to_thai_word(10), "สิบ");
}
#[test]
fn eleven_et_form() {
assert_eq!(u64_to_thai_word(11), "สิบเอ็ด");
}
#[test]
fn twelve_to_nineteen_word() {
let cases = [(12u64, "สิบสอง"), (15, "สิบห้า"), (19, "สิบเก้า")];
for (n, word) in cases {
assert_eq!(u64_to_thai_word(n), word);
}
}
#[test]
fn twenty_yi_form() {
assert_eq!(u64_to_thai_word(20), "ยี่สิบ");
}
#[test]
fn twenty_one_yi_et_word() {
assert_eq!(u64_to_thai_word(21), "ยี่สิบเอ็ด");
}
#[test]
fn thirty_four_word() {
assert_eq!(u64_to_thai_word(34), "สามสิบสี่");
}
#[test]
fn one_hundred_word() {
assert_eq!(u64_to_thai_word(100), "หนึ่งร้อย");
}
#[test]
fn one_hundred_twenty_three_word() {
assert_eq!(u64_to_thai_word(123), "หนึ่งร้อยยี่สิบสาม");
}
#[test]
fn one_hundred_one_no_et() {
assert_eq!(u64_to_thai_word(101), "หนึ่งร้อยหนึ่ง");
}
#[test]
fn one_hundred_eleven_et() {
assert_eq!(u64_to_thai_word(111), "หนึ่งร้อยสิบเอ็ด");
}
#[test]
fn one_thousand_word() {
assert_eq!(u64_to_thai_word(1_000), "หนึ่งพัน");
}
#[test]
fn ten_thousand_word() {
assert_eq!(u64_to_thai_word(10_000), "หนึ่งหมื่น");
}
#[test]
fn hundred_thousand_word() {
assert_eq!(u64_to_thai_word(100_000), "หนึ่งแสน");
}
#[test]
fn one_million_word() {
assert_eq!(u64_to_thai_word(1_000_000), "หนึ่งล้าน");
}
#[test]
fn ten_million_word() {
assert_eq!(u64_to_thai_word(10_000_000), "สิบล้าน");
}
#[test]
fn complex_seven_digit_word() {
assert_eq!(
u64_to_thai_word(3_456_789),
"สามล้านสี่แสนห้าหมื่นหกพันเจ็ดร้อยแปดสิบเก้า"
);
}
#[test]
fn roundtrip_parse_then_generate() {
let cases = [
0u64, 1, 9, 10, 11, 20, 21, 99, 100, 101, 111, 999, 1_000, 10_000, 100_000, 1_000_000,
10_000_000, 3_456_789,
];
for n in cases {
let word = u64_to_thai_word(n);
let parsed = parse_thai_word(&word);
assert_eq!(parsed, Some(n), "roundtrip failed for {n}: word={word:?}");
}
}
#[test]
fn baht_exact_no_satang() {
assert_eq!(
parse_thai_baht("หนึ่งร้อยยี่สิบสามบาทถ้วน"),
Some(BahtAmount {
baht: 123,
satang: 0
})
);
}
#[test]
fn baht_with_satang() {
assert_eq!(
parse_thai_baht("ห้าบาทยี่สิบห้าสตางค์"),
Some(BahtAmount {
baht: 5,
satang: 25
})
);
}
#[test]
fn baht_no_suffix_implies_zero_satang() {
assert_eq!(
parse_thai_baht("หนึ่งร้อยบาท"),
Some(BahtAmount {
baht: 100,
satang: 0
})
);
}
#[test]
fn baht_zero_baht_with_satang() {
assert_eq!(
parse_thai_baht("ศูนย์บาทห้าสิบสตางค์"),
Some(BahtAmount {
baht: 0,
satang: 50
})
);
}
#[test]
fn baht_million() {
assert_eq!(
parse_thai_baht("หนึ่งล้านบาทถ้วน"),
Some(BahtAmount {
baht: 1_000_000,
satang: 0
})
);
}
#[test]
fn baht_satang_eleven() {
assert_eq!(
parse_thai_baht("สองบาทสิบเอ็ดสตางค์"),
Some(BahtAmount {
baht: 2,
satang: 11
})
);
}
#[test]
fn baht_satang_fifty() {
assert_eq!(
parse_thai_baht("หนึ่งร้อยบาทห้าสิบสตางค์"),
Some(BahtAmount {
baht: 100,
satang: 50
})
);
}
#[test]
fn baht_satang_above_99_is_none() {
assert_eq!(parse_thai_baht("หนึ่งบาทหนึ่งร้อยสตางค์"), None);
}
#[test]
fn baht_no_baht_marker_is_none() {
assert_eq!(parse_thai_baht("หนึ่งร้อยยี่สิบสาม"), None);
}
#[test]
fn baht_non_number_is_none() {
assert_eq!(parse_thai_baht("กินข้าวบาทถ้วน"), None);
}
#[test]
fn baht_empty_is_none() {
assert_eq!(parse_thai_baht(""), None);
}
#[test]
fn baht_unrecognised_satang_suffix_is_none() {
assert_eq!(parse_thai_baht("หนึ่งบาทมาก"), None);
}
#[test]
fn baht_text_zero_exact() {
assert_eq!(to_thai_baht_text(0, 0), "ศูนย์บาทถ้วน");
}
#[test]
fn baht_text_one_exact() {
assert_eq!(to_thai_baht_text(1, 0), "หนึ่งบาทถ้วน");
}
#[test]
fn baht_text_hundred_exact() {
assert_eq!(to_thai_baht_text(100, 0), "หนึ่งร้อยบาทถ้วน");
}
#[test]
fn baht_text_with_satang() {
assert_eq!(to_thai_baht_text(21, 50), "ยี่สิบเอ็ดบาทห้าสิบสตางค์");
}
#[test]
fn baht_text_million_exact() {
assert_eq!(to_thai_baht_text(1_000_000, 0), "หนึ่งล้านบาทถ้วน");
}
#[test]
fn baht_text_zero_baht_with_satang() {
assert_eq!(to_thai_baht_text(0, 25), "ศูนย์บาทยี่สิบห้าสตางค์");
}
#[test]
fn baht_text_satang_eleven() {
assert_eq!(to_thai_baht_text(2, 11), "สองบาทสิบเอ็ดสตางค์");
}
#[test]
fn baht_roundtrip() {
let cases = [
(0u64, 0u8),
(1, 0),
(100, 0),
(123, 50),
(5, 25),
(1_000_000, 0),
(21, 11),
(0, 99),
];
for (baht, satang) in cases {
let text = to_thai_baht_text(baht, satang);
let parsed = parse_thai_baht(&text);
assert_eq!(
parsed,
Some(BahtAmount { baht, satang }),
"roundtrip failed for ({baht}, {satang}): text={text:?}"
);
}
}
}