kham_core/number.rs
1//! Thai number normalization.
2//!
3//! Two independent normalization paths:
4//!
5//! 1. **Thai digit → ASCII**: converts Thai digit characters (๐–๙, U+0E50–U+0E59)
6//! to their ASCII equivalents. Non-digit characters are passed through unchanged.
7//!
8//! 2. **Spelled-out Thai number words → integer**: parses a full Thai cardinal number
9//! word (e.g. `หนึ่งร้อยยี่สิบสาม`) into its numeric value (`123`).
10//!
11//! ## Thai digit mapping
12//!
13//! | Thai | ASCII |
14//! |------|-------|
15//! | ๐ | 0 |
16//! | ๑ | 1 |
17//! | ๒ | 2 |
18//! | ๓ | 3 |
19//! | ๔ | 4 |
20//! | ๕ | 5 |
21//! | ๖ | 6 |
22//! | ๗ | 7 |
23//! | ๘ | 8 |
24//! | ๙ | 9 |
25//!
26//! ## Thai number word grammar (cardinal)
27//!
28//! | Word | Value | Notes |
29//! |--------|-------------|------------------------------------------|
30//! | ศูนย์ | 0 | |
31//! | หนึ่ง | 1 | |
32//! | เอ็ด | 1 | units position after `สิบ` only |
33//! | ยี่ | 2 | tens position only (`ยี่สิบ` = 20) |
34//! | สอง | 2 | |
35//! | สาม | 3 | |
36//! | สี่ | 4 | |
37//! | ห้า | 5 | |
38//! | หก | 6 | |
39//! | เจ็ด | 7 | |
40//! | แปด | 8 | |
41//! | เก้า | 9 | |
42//! | สิบ | ×10 | preceding digit optional (default 1) |
43//! | ร้อย | ×100 | preceding digit optional |
44//! | พัน | ×1 000 | preceding digit optional |
45//! | หมื่น | ×10 000 | preceding digit optional |
46//! | แสน | ×100 000 | preceding digit optional |
47//! | ล้าน | ×1 000 000 | splits number into two sub-million groups|
48//!
49//! # Examples
50//!
51//! ```rust
52//! use kham_core::number::{thai_digits_to_ascii, parse_thai_word};
53//!
54//! // Thai digit conversion
55//! assert_eq!(thai_digits_to_ascii("๑๒๓"), "123");
56//! assert_eq!(thai_digits_to_ascii("ธนาคาร๑๐๐แห่ง"), "ธนาคาร100แห่ง");
57//!
58//! // Spelled-out number word parsing
59//! assert_eq!(parse_thai_word("ยี่สิบ"), Some(20));
60//! assert_eq!(parse_thai_word("หนึ่งร้อยยี่สิบสาม"), Some(123));
61//! assert_eq!(parse_thai_word("สองล้านห้าแสน"), Some(2_500_000));
62//! ```
63
64use alloc::string::String;
65use alloc::vec::Vec;
66
67// ---------------------------------------------------------------------------
68// Thai digit ↔ ASCII
69// ---------------------------------------------------------------------------
70
71/// Convert a single Thai digit character (๐–๙) to its ASCII equivalent.
72///
73/// Returns `None` for any character that is not a Thai digit.
74///
75/// # Examples
76///
77/// ```rust
78/// use kham_core::number::thai_digit_to_ascii;
79///
80/// assert_eq!(thai_digit_to_ascii('๑'), Some('1'));
81/// assert_eq!(thai_digit_to_ascii('ก'), None);
82/// assert_eq!(thai_digit_to_ascii('5'), None); // ASCII digits pass through unchanged
83/// ```
84#[inline]
85pub fn thai_digit_to_ascii(c: char) -> Option<char> {
86 match c {
87 '\u{0E50}' => Some('0'),
88 '\u{0E51}' => Some('1'),
89 '\u{0E52}' => Some('2'),
90 '\u{0E53}' => Some('3'),
91 '\u{0E54}' => Some('4'),
92 '\u{0E55}' => Some('5'),
93 '\u{0E56}' => Some('6'),
94 '\u{0E57}' => Some('7'),
95 '\u{0E58}' => Some('8'),
96 '\u{0E59}' => Some('9'),
97 _ => None,
98 }
99}
100
101/// Convert all Thai digit characters in `text` to ASCII digits.
102///
103/// Characters that are not Thai digits are passed through unchanged.
104/// Allocates a new [`String`] only when Thai digits are present;
105/// otherwise returns a copy of the input.
106///
107/// # Examples
108///
109/// ```rust
110/// use kham_core::number::thai_digits_to_ascii;
111///
112/// assert_eq!(thai_digits_to_ascii("๑๒๓"), "123");
113/// assert_eq!(thai_digits_to_ascii("ธนาคาร๑๐๐แห่ง"), "ธนาคาร100แห่ง");
114/// assert_eq!(thai_digits_to_ascii("hello"), "hello");
115/// assert_eq!(thai_digits_to_ascii(""), "");
116/// // Mixed Thai and ASCII digits — only Thai digits are converted
117/// assert_eq!(thai_digits_to_ascii("๑2๓"), "123");
118/// ```
119pub fn thai_digits_to_ascii(text: &str) -> String {
120 if !text.chars().any(|c| thai_digit_to_ascii(c).is_some()) {
121 return String::from(text);
122 }
123 text.chars()
124 .map(|c| thai_digit_to_ascii(c).unwrap_or(c))
125 .collect()
126}
127
128/// Return `true` if every character in `text` is a Thai digit (๐–๙).
129///
130/// Returns `false` for empty strings.
131///
132/// # Examples
133///
134/// ```rust
135/// use kham_core::number::is_thai_digit_str;
136///
137/// assert!(is_thai_digit_str("๑๒๓"));
138/// assert!(is_thai_digit_str("๐"));
139/// assert!(!is_thai_digit_str("123"));
140/// assert!(!is_thai_digit_str("๑2๓")); // mixed
141/// assert!(!is_thai_digit_str(""));
142/// ```
143#[inline]
144pub fn is_thai_digit_str(text: &str) -> bool {
145 !text.is_empty() && text.chars().all(|c| thai_digit_to_ascii(c).is_some())
146}
147
148// ---------------------------------------------------------------------------
149// Spelled-out Thai number word → u64
150// ---------------------------------------------------------------------------
151
152/// Internal lexer token for the Thai number word parser.
153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
154enum NumToken {
155 /// A digit 0–9: ศูนย์ หนึ่ง สอง สาม สี่ ห้า หก เจ็ด แปด เก้า.
156 Digit(u64),
157 /// ยี่ — special form of 2, valid only in the tens position (ยี่สิบ = 20).
158 Yi,
159 /// เอ็ด — special form of 1, valid only in the units position after สิบ.
160 Et,
161 /// สิบ — ×10 multiplier.
162 Sip,
163 /// ร้อย — ×100 multiplier.
164 Roi,
165 /// พัน — ×1 000 multiplier.
166 Pan,
167 /// หมื่น — ×10 000 multiplier.
168 Muen,
169 /// แสน — ×100 000 multiplier.
170 Saen,
171 /// ล้าน — ×1 000 000 group separator.
172 Lan,
173}
174
175/// Greedy longest-prefix match against the Thai number vocabulary.
176///
177/// Returns `(token, remaining_slice)` or `None` if no token starts at `s`.
178fn next_num_token(s: &str) -> Option<(NumToken, &str)> {
179 // Ordered so that longer / more specific prefixes come before shorter ones.
180 // e.g. "เก้า" before any single-syllable word to avoid prefix collisions.
181 const VOCAB: &[(&str, NumToken)] = &[
182 ("ศูนย์", NumToken::Digit(0)),
183 ("หนึ่ง", NumToken::Digit(1)),
184 ("เอ็ด", NumToken::Et),
185 ("ยี่", NumToken::Yi),
186 ("สอง", NumToken::Digit(2)),
187 ("สาม", NumToken::Digit(3)),
188 ("สี่", NumToken::Digit(4)),
189 ("ห้า", NumToken::Digit(5)),
190 ("หก", NumToken::Digit(6)),
191 ("เจ็ด", NumToken::Digit(7)),
192 ("แปด", NumToken::Digit(8)),
193 ("เก้า", NumToken::Digit(9)),
194 ("สิบ", NumToken::Sip),
195 ("ร้อย", NumToken::Roi),
196 ("พัน", NumToken::Pan),
197 ("หมื่น", NumToken::Muen),
198 ("แสน", NumToken::Saen),
199 ("ล้าน", NumToken::Lan),
200 ];
201 for &(word, tok) in VOCAB {
202 if let Some(rest) = s.strip_prefix(word) {
203 return Some((tok, rest));
204 }
205 }
206 None
207}
208
209/// Parse a Thai cardinal number below one million (0–999 999).
210///
211/// Returns `None` when `s` contains unrecognised tokens or is structurally
212/// invalid (e.g. two consecutive digit words without a multiplier between them).
213/// An empty string returns `Some(0)` to support implied-zero sub-parts.
214fn parse_below_lan(s: &str) -> Option<u64> {
215 let mut s = s;
216 let mut total: u64 = 0;
217 let mut pending: Option<u64> = None; // digit waiting for its multiplier
218 let mut had_sip = false; // ตรวจสอบ for เอ็ด-validity
219
220 while !s.is_empty() {
221 let (tok, rest) = next_num_token(s)?;
222 match tok {
223 NumToken::Digit(d) => {
224 // Two consecutive digit words with no multiplier between them
225 // are not valid Thai number spelling.
226 if pending.is_some() {
227 return None;
228 }
229 pending = Some(d);
230 }
231 NumToken::Yi => {
232 if pending.is_some() {
233 return None;
234 }
235 // ยี่ acts as digit-2 in front of สิบ (ยี่สิบ = 20).
236 pending = Some(2);
237 }
238 NumToken::Et => {
239 // เอ็ด is the units-1 form allowed only after สิบ.
240 if !had_sip || pending.is_some() {
241 return None;
242 }
243 total = total.checked_add(1)?;
244 }
245 NumToken::Sip => {
246 let coeff = pending.take().unwrap_or(1);
247 total = total.checked_add(coeff.checked_mul(10)?)?;
248 had_sip = true;
249 }
250 NumToken::Roi => {
251 let coeff = pending.take().unwrap_or(1);
252 total = total.checked_add(coeff.checked_mul(100)?)?;
253 }
254 NumToken::Pan => {
255 let coeff = pending.take().unwrap_or(1);
256 total = total.checked_add(coeff.checked_mul(1_000)?)?;
257 }
258 NumToken::Muen => {
259 let coeff = pending.take().unwrap_or(1);
260 total = total.checked_add(coeff.checked_mul(10_000)?)?;
261 }
262 NumToken::Saen => {
263 let coeff = pending.take().unwrap_or(1);
264 total = total.checked_add(coeff.checked_mul(100_000)?)?;
265 }
266 NumToken::Lan => {
267 // ล้าน is resolved at the outer level; hitting it here is invalid.
268 return None;
269 }
270 }
271 s = rest;
272 }
273
274 // Any remaining pending digit is a standalone units value (e.g. the สาม in
275 // หนึ่งร้อยสองสิบสาม).
276 if let Some(d) = pending {
277 total = total.checked_add(d)?;
278 }
279
280 Some(total)
281}
282
283/// Convert a `u64` to its decimal string representation without `std`.
284fn u64_to_string(mut n: u64) -> String {
285 if n == 0 {
286 return String::from("0");
287 }
288 let mut digits: Vec<u8> = Vec::new();
289 while n > 0 {
290 digits.push(b'0' + (n % 10) as u8);
291 n /= 10;
292 }
293 digits.reverse();
294 // SAFETY: digits are ASCII '0'–'9', guaranteed valid UTF-8.
295 String::from_utf8(digits).unwrap_or_default()
296}
297
298/// Parse a spelled-out Thai cardinal number word into its numeric value.
299///
300/// Handles the full range from 0 (`ศูนย์`) up to values bounded by `u64::MAX`.
301/// Returns `None` when the input contains unrecognised characters, is empty,
302/// or is structurally invalid for Thai number grammar.
303///
304/// ## Grammar summary
305///
306/// ```text
307/// number ::= sub_lan "ล้าน" sub_lan
308/// | sub_lan "ล้าน"
309/// | sub_lan
310///
311/// sub_lan ::= [digit] "แสน" sub_แสน | sub_แสน
312/// | [digit] "หมื่น" sub_หมื่น | sub_หมื่น
313/// | [digit] "พัน" sub_พัน | sub_พัน
314/// | [digit] "ร้อย" sub_ร้อย | sub_ร้อย
315/// | ["ยี่" | digit] "สิบ" unit | "สิบ" unit
316/// | unit
317///
318/// unit ::= digit | "เอ็ด" | ε
319/// digit ::= "ศูนย์" | "หนึ่ง" | "สอง" | … | "เก้า"
320/// ```
321///
322/// The `ยี่` form of 2 is valid only in `ยี่สิบ` (20, 21, …, 29).
323/// The `เอ็ด` form of 1 is valid only as the units digit after `สิบ`
324/// (11, 21, 31, …).
325///
326/// # Examples
327///
328/// ```rust
329/// use kham_core::number::parse_thai_word;
330///
331/// // single digits
332/// assert_eq!(parse_thai_word("ศูนย์"), Some(0));
333/// assert_eq!(parse_thai_word("หนึ่ง"), Some(1));
334/// assert_eq!(parse_thai_word("เก้า"), Some(9));
335///
336/// // tens — implied-1 prefix and special forms
337/// assert_eq!(parse_thai_word("สิบ"), Some(10));
338/// assert_eq!(parse_thai_word("สิบเอ็ด"), Some(11));
339/// assert_eq!(parse_thai_word("ยี่สิบ"), Some(20));
340/// assert_eq!(parse_thai_word("ยี่สิบเอ็ด"), Some(21));
341/// assert_eq!(parse_thai_word("สามสิบสี่"), Some(34));
342///
343/// // hundreds
344/// assert_eq!(parse_thai_word("ร้อย"), Some(100));
345/// assert_eq!(parse_thai_word("หนึ่งร้อย"), Some(100));
346/// assert_eq!(parse_thai_word("หนึ่งร้อยยี่สิบสาม"), Some(123));
347///
348/// // thousands
349/// assert_eq!(parse_thai_word("หนึ่งพัน"), Some(1_000));
350/// assert_eq!(parse_thai_word("สองพันห้าร้อย"), Some(2_500));
351///
352/// // ten-thousands / hundred-thousands
353/// assert_eq!(parse_thai_word("หนึ่งหมื่น"), Some(10_000));
354/// assert_eq!(parse_thai_word("หนึ่งแสน"), Some(100_000));
355///
356/// // millions — coefficient itself can be a sub-million number
357/// assert_eq!(parse_thai_word("หนึ่งล้าน"), Some(1_000_000));
358/// assert_eq!(parse_thai_word("ล้าน"), Some(1_000_000));
359/// assert_eq!(parse_thai_word("สองล้านห้าแสน"), Some(2_500_000));
360/// assert_eq!(parse_thai_word("สิบล้าน"), Some(10_000_000));
361/// assert_eq!(parse_thai_word("หนึ่งร้อยล้าน"), Some(100_000_000));
362///
363/// // not a number → None
364/// assert_eq!(parse_thai_word("กินข้าว"), None);
365/// assert_eq!(parse_thai_word(""), None);
366/// ```
367pub fn parse_thai_word(text: &str) -> Option<u64> {
368 let s = text.trim();
369 if s.is_empty() {
370 return None;
371 }
372
373 if let Some(lan_pos) = s.find("ล้าน") {
374 let prefix = &s[..lan_pos];
375 let suffix = &s[lan_pos + "ล้าน".len()..];
376
377 // The millions coefficient is a sub-million number; bare "ล้าน" implies 1.
378 let millions: u64 = if prefix.is_empty() {
379 1
380 } else {
381 parse_below_lan(prefix)?
382 };
383 let remainder: u64 = if suffix.is_empty() {
384 0
385 } else {
386 parse_below_lan(suffix)?
387 };
388
389 millions.checked_mul(1_000_000)?.checked_add(remainder)
390 } else {
391 let result = parse_below_lan(s)?;
392 // parse_below_lan returns Some(0) for empty string, but we guard that
393 // case at the top of this function, so a Some(0) here means ศูนย์.
394 Some(result)
395 }
396}
397
398/// Return the decimal string representation of a Thai number word, or `None`
399/// if the input is not a recognised Thai number word.
400///
401/// This is a convenience wrapper over [`parse_thai_word`] that formats the
402/// result as a string suitable for use as an FTS synonym.
403///
404/// # Examples
405///
406/// ```rust
407/// use kham_core::number::thai_word_to_decimal;
408///
409/// assert_eq!(thai_word_to_decimal("ยี่สิบ"), Some(String::from("20")));
410/// assert_eq!(thai_word_to_decimal("หนึ่งร้อยยี่สิบสาม"), Some(String::from("123")));
411/// assert_eq!(thai_word_to_decimal("กิน"), None);
412/// ```
413pub fn thai_word_to_decimal(text: &str) -> Option<String> {
414 parse_thai_word(text).map(u64_to_string)
415}
416
417// ---------------------------------------------------------------------------
418// Number → Thai word (generator, inverse of parse_thai_word)
419// ---------------------------------------------------------------------------
420
421/// Map a single digit 1–9 to its Thai word.
422///
423/// Returns an empty string for 0 (caller handles zero as ศูนย์ or omits it).
424#[inline]
425fn digit_word(d: u64) -> &'static str {
426 match d {
427 1 => "หนึ่ง",
428 2 => "สอง",
429 3 => "สาม",
430 4 => "สี่",
431 5 => "ห้า",
432 6 => "หก",
433 7 => "เจ็ด",
434 8 => "แปด",
435 9 => "เก้า",
436 _ => "",
437 }
438}
439
440/// Append the Thai word representation of `n` (1–999 999) to `out`.
441fn write_below_lan(mut n: u64, out: &mut String) {
442 if n >= 100_000 {
443 out.push_str(digit_word(n / 100_000));
444 out.push_str("แสน");
445 n %= 100_000;
446 }
447 if n >= 10_000 {
448 out.push_str(digit_word(n / 10_000));
449 out.push_str("หมื่น");
450 n %= 10_000;
451 }
452 if n >= 1_000 {
453 out.push_str(digit_word(n / 1_000));
454 out.push_str("พัน");
455 n %= 1_000;
456 }
457 if n >= 100 {
458 out.push_str(digit_word(n / 100));
459 out.push_str("ร้อย");
460 n %= 100;
461 }
462 if n >= 10 {
463 let tens = n / 10;
464 let units = n % 10;
465 match tens {
466 1 => out.push_str("สิบ"), // implied-1: สิบ not หนึ่งสิบ
467 2 => out.push_str("ยี่สิบ"), // special form for 20s
468 _ => {
469 out.push_str(digit_word(tens));
470 out.push_str("สิบ");
471 }
472 }
473 match units {
474 0 => {}
475 1 => out.push_str("เอ็ด"), // เอ็ด only after สิบ
476 _ => out.push_str(digit_word(units)),
477 }
478 } else if n > 0 {
479 out.push_str(digit_word(n)); // plain units 1–9, no สิบ context → หนึ่ง not เอ็ด
480 }
481}
482
483/// Append the full Thai word representation of any `n > 0` to `out`.
484fn write_thai_word(n: u64, out: &mut String) {
485 if n >= 1_000_000 {
486 write_thai_word(n / 1_000_000, out);
487 out.push_str("ล้าน");
488 let rem = n % 1_000_000;
489 if rem > 0 {
490 write_below_lan(rem, out);
491 }
492 } else {
493 write_below_lan(n, out);
494 }
495}
496
497/// Convert a `u64` to a spelled-out Thai cardinal number word.
498///
499/// This is the inverse of [`parse_thai_word`]: for any value `n`,
500/// `parse_thai_word(u64_to_thai_word(n)) == Some(n)`.
501///
502/// - Zero is rendered as `ศูนย์`.
503/// - The `ยี่` form is used for 20, 21, …, 29 and their multiples.
504/// - The `เอ็ด` form is used for units-1 when a tens word (`สิบ`) precedes it.
505/// - Higher multipliers use explicit digit prefixes (`หนึ่งร้อย` = 100, etc.).
506///
507/// # Examples
508///
509/// ```rust
510/// use kham_core::number::u64_to_thai_word;
511///
512/// assert_eq!(u64_to_thai_word(0), "ศูนย์");
513/// assert_eq!(u64_to_thai_word(10), "สิบ");
514/// assert_eq!(u64_to_thai_word(11), "สิบเอ็ด");
515/// assert_eq!(u64_to_thai_word(20), "ยี่สิบ");
516/// assert_eq!(u64_to_thai_word(21), "ยี่สิบเอ็ด");
517/// assert_eq!(u64_to_thai_word(100), "หนึ่งร้อย");
518/// assert_eq!(u64_to_thai_word(123), "หนึ่งร้อยยี่สิบสาม");
519/// assert_eq!(u64_to_thai_word(1_000_000), "หนึ่งล้าน");
520/// assert_eq!(u64_to_thai_word(10_000_000), "สิบล้าน");
521/// ```
522pub fn u64_to_thai_word(n: u64) -> String {
523 if n == 0 {
524 return String::from("ศูนย์");
525 }
526 let mut out = String::new();
527 write_thai_word(n, &mut out);
528 out
529}
530
531// ---------------------------------------------------------------------------
532// Thai Baht — parse and generate
533// ---------------------------------------------------------------------------
534
535/// A monetary amount in Thai Baht.
536///
537/// `satang` is the sub-unit (1 baht = 100 satang). Valid range is 0–99.
538///
539/// # Examples
540///
541/// ```rust
542/// use kham_core::number::{BahtAmount, parse_thai_baht, to_thai_baht_text};
543///
544/// let amt = parse_thai_baht("หนึ่งร้อยยี่สิบสามบาทห้าสิบสตางค์").unwrap();
545/// assert_eq!(amt.baht, 123);
546/// assert_eq!(amt.satang, 50);
547///
548/// assert_eq!(to_thai_baht_text(123, 50), "หนึ่งร้อยยี่สิบสามบาทห้าสิบสตางค์");
549/// assert_eq!(to_thai_baht_text(100, 0), "หนึ่งร้อยบาทถ้วน");
550/// ```
551#[derive(Debug, Clone, Copy, PartialEq, Eq)]
552pub struct BahtAmount {
553 /// Whole baht.
554 pub baht: u64,
555 /// Satang (0–99). 100 satang = 1 baht.
556 pub satang: u8,
557}
558
559/// Parse a Thai Baht currency string into a [`BahtAmount`].
560///
561/// Accepted forms:
562///
563/// | Input | baht | satang |
564/// |--------------------------------------------|------|--------|
565/// | `หนึ่งร้อยบาทถ้วน` | 100 | 0 |
566/// | `หนึ่งร้อยบาท` | 100 | 0 |
567/// | `ห้าบาทยี่สิบห้าสตางค์` | 5 | 25 |
568/// | `หนึ่งล้านบาทถ้วน` | 1 000 000 | 0 |
569/// | `ศูนย์บาทห้าสิบสตางค์` | 0 | 50 |
570///
571/// Returns `None` when:
572/// - The string contains no `บาท`.
573/// - The baht part is not a recognised Thai number word.
574/// - The satang part is present but not a recognised Thai number word.
575/// - The satang value exceeds 99.
576///
577/// # Examples
578///
579/// ```rust
580/// use kham_core::number::{parse_thai_baht, BahtAmount};
581///
582/// assert_eq!(
583/// parse_thai_baht("หนึ่งร้อยยี่สิบสามบาทถ้วน"),
584/// Some(BahtAmount { baht: 123, satang: 0 })
585/// );
586/// assert_eq!(
587/// parse_thai_baht("ห้าบาทยี่สิบห้าสตางค์"),
588/// Some(BahtAmount { baht: 5, satang: 25 })
589/// );
590/// assert_eq!(parse_thai_baht("กินข้าว"), None);
591/// assert_eq!(parse_thai_baht(""), None);
592/// ```
593pub fn parse_thai_baht(text: &str) -> Option<BahtAmount> {
594 let s = text.trim();
595 if s.is_empty() {
596 return None;
597 }
598
599 let (baht_part, after_baht) = s.split_once("บาท")?;
600
601 let baht = parse_thai_word(baht_part.trim())?;
602
603 let satang_str = after_baht.trim();
604 let satang: u8 = if satang_str.is_empty() || satang_str == "ถ้วน" {
605 0
606 } else if let Some(san_word) = satang_str.strip_suffix("สตางค์") {
607 let val = parse_thai_word(san_word.trim())?;
608 if val > 99 {
609 return None;
610 }
611 val as u8
612 } else {
613 return None;
614 };
615
616 Some(BahtAmount { baht, satang })
617}
618
619/// Render a baht + satang amount as Thai currency text.
620///
621/// - When `satang == 0` the suffix `ถ้วน` (exact, no satang) is appended.
622/// - When `satang > 0` the satang amount is spelled out followed by `สตางค์`.
623///
624/// The satang parameter should be in the range 0–99 (1 baht = 100 satang).
625/// Values above 99 are accepted and rendered as-is but are semantically odd.
626///
627/// # Examples
628///
629/// ```rust
630/// use kham_core::number::to_thai_baht_text;
631///
632/// assert_eq!(to_thai_baht_text(0, 0), "ศูนย์บาทถ้วน");
633/// assert_eq!(to_thai_baht_text(1, 0), "หนึ่งบาทถ้วน");
634/// assert_eq!(to_thai_baht_text(100, 0), "หนึ่งร้อยบาทถ้วน");
635/// assert_eq!(to_thai_baht_text(21, 50), "ยี่สิบเอ็ดบาทห้าสิบสตางค์");
636/// assert_eq!(to_thai_baht_text(1_000_000, 0), "หนึ่งล้านบาทถ้วน");
637/// assert_eq!(to_thai_baht_text(0, 25), "ศูนย์บาทยี่สิบห้าสตางค์");
638/// ```
639pub fn to_thai_baht_text(baht: u64, satang: u8) -> String {
640 let mut out = u64_to_thai_word(baht);
641 out.push_str("บาท");
642 if satang == 0 {
643 out.push_str("ถ้วน");
644 } else {
645 out.push_str(&u64_to_thai_word(satang as u64));
646 out.push_str("สตางค์");
647 }
648 out
649}
650
651// ---------------------------------------------------------------------------
652// Tests
653// ---------------------------------------------------------------------------
654
655#[cfg(test)]
656mod tests {
657 use super::*;
658
659 // ── thai_digit_to_ascii ───────────────────────────────────────────────────
660
661 #[test]
662 fn thai_digits_map_correctly() {
663 let pairs = [
664 ('๐', '0'),
665 ('๑', '1'),
666 ('๒', '2'),
667 ('๓', '3'),
668 ('๔', '4'),
669 ('๕', '5'),
670 ('๖', '6'),
671 ('๗', '7'),
672 ('๘', '8'),
673 ('๙', '9'),
674 ];
675 for (thai, ascii) in pairs {
676 assert_eq!(thai_digit_to_ascii(thai), Some(ascii), "failed for {thai}");
677 }
678 }
679
680 #[test]
681 fn non_digit_returns_none() {
682 assert_eq!(thai_digit_to_ascii('ก'), None);
683 assert_eq!(thai_digit_to_ascii('5'), None);
684 assert_eq!(thai_digit_to_ascii(' '), None);
685 }
686
687 // ── thai_digits_to_ascii ──────────────────────────────────────────────────
688
689 #[test]
690 fn converts_all_thai_digits() {
691 assert_eq!(thai_digits_to_ascii("๐๑๒๓๔๕๖๗๘๙"), "0123456789");
692 }
693
694 #[test]
695 fn passthrough_ascii_only() {
696 assert_eq!(thai_digits_to_ascii("hello 123"), "hello 123");
697 }
698
699 #[test]
700 fn empty_string_passthrough() {
701 assert_eq!(thai_digits_to_ascii(""), "");
702 }
703
704 #[test]
705 fn mixed_thai_digit_in_sentence() {
706 assert_eq!(thai_digits_to_ascii("ธนาคาร๑๐๐แห่ง"), "ธนาคาร100แห่ง");
707 }
708
709 #[test]
710 fn mixed_thai_and_ascii_digits() {
711 assert_eq!(thai_digits_to_ascii("๑2๓"), "123");
712 }
713
714 #[test]
715 fn no_allocation_when_no_thai_digits() {
716 // We just verify correctness; allocation behaviour is an impl detail.
717 let result = thai_digits_to_ascii("no thai digits here");
718 assert_eq!(result, "no thai digits here");
719 }
720
721 // ── is_thai_digit_str ─────────────────────────────────────────────────────
722
723 #[test]
724 fn all_thai_digits() {
725 assert!(is_thai_digit_str("๑๒๓"));
726 assert!(is_thai_digit_str("๐"));
727 }
728
729 #[test]
730 fn mixed_is_false() {
731 assert!(!is_thai_digit_str("๑2๓"));
732 assert!(!is_thai_digit_str("๑ก"));
733 }
734
735 #[test]
736 fn ascii_only_is_false() {
737 assert!(!is_thai_digit_str("123"));
738 }
739
740 #[test]
741 fn empty_is_false() {
742 assert!(!is_thai_digit_str(""));
743 }
744
745 // ── u64_to_string ─────────────────────────────────────────────────────────
746
747 #[test]
748 fn zero_formats_correctly() {
749 assert_eq!(u64_to_string(0), "0");
750 }
751
752 #[test]
753 fn small_number_formats_correctly() {
754 assert_eq!(u64_to_string(42), "42");
755 }
756
757 #[test]
758 fn large_number_formats_correctly() {
759 assert_eq!(u64_to_string(1_000_000), "1000000");
760 }
761
762 // ── parse_thai_word — single digits ──────────────────────────────────────
763
764 #[test]
765 fn zero() {
766 assert_eq!(parse_thai_word("ศูนย์"), Some(0));
767 }
768
769 #[test]
770 fn one_to_nine() {
771 let cases = [
772 ("หนึ่ง", 1u64),
773 ("สอง", 2),
774 ("สาม", 3),
775 ("สี่", 4),
776 ("ห้า", 5),
777 ("หก", 6),
778 ("เจ็ด", 7),
779 ("แปด", 8),
780 ("เก้า", 9),
781 ];
782 for (word, expected) in cases {
783 assert_eq!(parse_thai_word(word), Some(expected), "failed for {word}");
784 }
785 }
786
787 // ── parse_thai_word — tens ────────────────────────────────────────────────
788
789 #[test]
790 fn ten_implied_one() {
791 assert_eq!(parse_thai_word("สิบ"), Some(10));
792 }
793
794 #[test]
795 fn eleven_uses_et() {
796 assert_eq!(parse_thai_word("สิบเอ็ด"), Some(11));
797 }
798
799 #[test]
800 fn twelve_to_nineteen() {
801 let cases = [
802 ("สิบสอง", 12u64),
803 ("สิบสาม", 13),
804 ("สิบสี่", 14),
805 ("สิบห้า", 15),
806 ("สิบหก", 16),
807 ("สิบเจ็ด", 17),
808 ("สิบแปด", 18),
809 ("สิบเก้า", 19),
810 ];
811 for (word, expected) in cases {
812 assert_eq!(parse_thai_word(word), Some(expected), "failed for {word}");
813 }
814 }
815
816 #[test]
817 fn twenty_uses_yi() {
818 assert_eq!(parse_thai_word("ยี่สิบ"), Some(20));
819 }
820
821 #[test]
822 fn twenty_one_yi_et() {
823 assert_eq!(parse_thai_word("ยี่สิบเอ็ด"), Some(21));
824 }
825
826 #[test]
827 fn thirty_four() {
828 assert_eq!(parse_thai_word("สามสิบสี่"), Some(34));
829 }
830
831 #[test]
832 fn ninety_nine() {
833 assert_eq!(parse_thai_word("เก้าสิบเก้า"), Some(99));
834 }
835
836 // ── parse_thai_word — hundreds ────────────────────────────────────────────
837
838 #[test]
839 fn hundred_implied_one() {
840 assert_eq!(parse_thai_word("ร้อย"), Some(100));
841 }
842
843 #[test]
844 fn one_hundred_explicit() {
845 assert_eq!(parse_thai_word("หนึ่งร้อย"), Some(100));
846 }
847
848 #[test]
849 fn one_hundred_twenty_three() {
850 assert_eq!(parse_thai_word("หนึ่งร้อยยี่สิบสาม"), Some(123));
851 }
852
853 #[test]
854 fn two_hundred() {
855 assert_eq!(parse_thai_word("สองร้อย"), Some(200));
856 }
857
858 #[test]
859 fn nine_hundred_ninety_nine() {
860 assert_eq!(parse_thai_word("เก้าร้อยเก้าสิบเก้า"), Some(999));
861 }
862
863 // ── parse_thai_word — thousands ───────────────────────────────────────────
864
865 #[test]
866 fn one_thousand() {
867 assert_eq!(parse_thai_word("หนึ่งพัน"), Some(1_000));
868 assert_eq!(parse_thai_word("พัน"), Some(1_000));
869 }
870
871 #[test]
872 fn two_thousand_five_hundred() {
873 assert_eq!(parse_thai_word("สองพันห้าร้อย"), Some(2_500));
874 }
875
876 #[test]
877 fn ten_thousand() {
878 assert_eq!(parse_thai_word("หนึ่งหมื่น"), Some(10_000));
879 assert_eq!(parse_thai_word("หมื่น"), Some(10_000));
880 }
881
882 #[test]
883 fn hundred_thousand() {
884 assert_eq!(parse_thai_word("หนึ่งแสน"), Some(100_000));
885 assert_eq!(parse_thai_word("แสน"), Some(100_000));
886 }
887
888 // ── parse_thai_word — millions ────────────────────────────────────────────
889
890 #[test]
891 fn one_million_explicit() {
892 assert_eq!(parse_thai_word("หนึ่งล้าน"), Some(1_000_000));
893 }
894
895 #[test]
896 fn one_million_implied() {
897 assert_eq!(parse_thai_word("ล้าน"), Some(1_000_000));
898 }
899
900 #[test]
901 fn ten_million() {
902 assert_eq!(parse_thai_word("สิบล้าน"), Some(10_000_000));
903 }
904
905 #[test]
906 fn hundred_million() {
907 assert_eq!(parse_thai_word("หนึ่งร้อยล้าน"), Some(100_000_000));
908 }
909
910 #[test]
911 fn two_million_five_hundred_thousand() {
912 assert_eq!(parse_thai_word("สองล้านห้าแสน"), Some(2_500_000));
913 }
914
915 #[test]
916 fn complex_seven_digit() {
917 // 3,456,789
918 assert_eq!(
919 parse_thai_word("สามล้านสี่แสนห้าหมื่นหกพันเจ็ดร้อยแปดสิบเก้า"),
920 Some(3_456_789)
921 );
922 }
923
924 // ── parse_thai_word — invalid / None ─────────────────────────────────────
925
926 #[test]
927 fn empty_returns_none() {
928 assert_eq!(parse_thai_word(""), None);
929 }
930
931 #[test]
932 fn whitespace_only_returns_none() {
933 assert_eq!(parse_thai_word(" "), None);
934 }
935
936 #[test]
937 fn non_number_word_returns_none() {
938 assert_eq!(parse_thai_word("กินข้าว"), None);
939 assert_eq!(parse_thai_word("ประเทศไทย"), None);
940 }
941
942 #[test]
943 fn et_without_sip_is_invalid() {
944 assert_eq!(parse_thai_word("เอ็ด"), None);
945 assert_eq!(parse_thai_word("ร้อยเอ็ด"), None);
946 }
947
948 #[test]
949 fn consecutive_digits_invalid() {
950 // หนึ่งสอง = two digit words with no multiplier is invalid
951 assert_eq!(parse_thai_word("หนึ่งสอง"), None);
952 }
953
954 // ── thai_word_to_decimal ──────────────────────────────────────────────────
955
956 #[test]
957 fn word_to_decimal_converts() {
958 assert_eq!(thai_word_to_decimal("ยี่สิบ"), Some(String::from("20")));
959 assert_eq!(
960 thai_word_to_decimal("หนึ่งร้อยยี่สิบสาม"),
961 Some(String::from("123"))
962 );
963 }
964
965 #[test]
966 fn word_to_decimal_none_for_non_number() {
967 assert_eq!(thai_word_to_decimal("กิน"), None);
968 }
969
970 // ── trim handling ─────────────────────────────────────────────────────────
971
972 #[test]
973 fn leading_trailing_whitespace_trimmed() {
974 assert_eq!(parse_thai_word(" สิบ "), Some(10));
975 }
976
977 // ── u64_to_thai_word ──────────────────────────────────────────────────────
978
979 #[test]
980 fn zero_word() {
981 assert_eq!(u64_to_thai_word(0), "ศูนย์");
982 }
983
984 #[test]
985 fn single_digits_word() {
986 let cases = [
987 (1u64, "หนึ่ง"),
988 (2, "สอง"),
989 (3, "สาม"),
990 (4, "สี่"),
991 (5, "ห้า"),
992 (6, "หก"),
993 (7, "เจ็ด"),
994 (8, "แปด"),
995 (9, "เก้า"),
996 ];
997 for (n, word) in cases {
998 assert_eq!(u64_to_thai_word(n), word, "failed for {n}");
999 }
1000 }
1001
1002 #[test]
1003 fn ten_implied_one_word() {
1004 assert_eq!(u64_to_thai_word(10), "สิบ");
1005 }
1006
1007 #[test]
1008 fn eleven_et_form() {
1009 assert_eq!(u64_to_thai_word(11), "สิบเอ็ด");
1010 }
1011
1012 #[test]
1013 fn twelve_to_nineteen_word() {
1014 let cases = [(12u64, "สิบสอง"), (15, "สิบห้า"), (19, "สิบเก้า")];
1015 for (n, word) in cases {
1016 assert_eq!(u64_to_thai_word(n), word);
1017 }
1018 }
1019
1020 #[test]
1021 fn twenty_yi_form() {
1022 assert_eq!(u64_to_thai_word(20), "ยี่สิบ");
1023 }
1024
1025 #[test]
1026 fn twenty_one_yi_et_word() {
1027 assert_eq!(u64_to_thai_word(21), "ยี่สิบเอ็ด");
1028 }
1029
1030 #[test]
1031 fn thirty_four_word() {
1032 assert_eq!(u64_to_thai_word(34), "สามสิบสี่");
1033 }
1034
1035 #[test]
1036 fn one_hundred_word() {
1037 assert_eq!(u64_to_thai_word(100), "หนึ่งร้อย");
1038 }
1039
1040 #[test]
1041 fn one_hundred_twenty_three_word() {
1042 assert_eq!(u64_to_thai_word(123), "หนึ่งร้อยยี่สิบสาม");
1043 }
1044
1045 #[test]
1046 fn one_hundred_one_no_et() {
1047 // เอ็ด only after สิบ — 101 has no สิบ so units=1 → หนึ่ง
1048 assert_eq!(u64_to_thai_word(101), "หนึ่งร้อยหนึ่ง");
1049 }
1050
1051 #[test]
1052 fn one_hundred_eleven_et() {
1053 assert_eq!(u64_to_thai_word(111), "หนึ่งร้อยสิบเอ็ด");
1054 }
1055
1056 #[test]
1057 fn one_thousand_word() {
1058 assert_eq!(u64_to_thai_word(1_000), "หนึ่งพัน");
1059 }
1060
1061 #[test]
1062 fn ten_thousand_word() {
1063 assert_eq!(u64_to_thai_word(10_000), "หนึ่งหมื่น");
1064 }
1065
1066 #[test]
1067 fn hundred_thousand_word() {
1068 assert_eq!(u64_to_thai_word(100_000), "หนึ่งแสน");
1069 }
1070
1071 #[test]
1072 fn one_million_word() {
1073 assert_eq!(u64_to_thai_word(1_000_000), "หนึ่งล้าน");
1074 }
1075
1076 #[test]
1077 fn ten_million_word() {
1078 assert_eq!(u64_to_thai_word(10_000_000), "สิบล้าน");
1079 }
1080
1081 #[test]
1082 fn complex_seven_digit_word() {
1083 assert_eq!(
1084 u64_to_thai_word(3_456_789),
1085 "สามล้านสี่แสนห้าหมื่นหกพันเจ็ดร้อยแปดสิบเก้า"
1086 );
1087 }
1088
1089 // ── roundtrip parse_thai_word ↔ u64_to_thai_word ─────────────────────────
1090
1091 #[test]
1092 fn roundtrip_parse_then_generate() {
1093 let cases = [
1094 0u64, 1, 9, 10, 11, 20, 21, 99, 100, 101, 111, 999, 1_000, 10_000, 100_000, 1_000_000,
1095 10_000_000, 3_456_789,
1096 ];
1097 for n in cases {
1098 let word = u64_to_thai_word(n);
1099 let parsed = parse_thai_word(&word);
1100 assert_eq!(parsed, Some(n), "roundtrip failed for {n}: word={word:?}");
1101 }
1102 }
1103
1104 // ── parse_thai_baht ───────────────────────────────────────────────────────
1105
1106 #[test]
1107 fn baht_exact_no_satang() {
1108 assert_eq!(
1109 parse_thai_baht("หนึ่งร้อยยี่สิบสามบาทถ้วน"),
1110 Some(BahtAmount {
1111 baht: 123,
1112 satang: 0
1113 })
1114 );
1115 }
1116
1117 #[test]
1118 fn baht_with_satang() {
1119 assert_eq!(
1120 parse_thai_baht("ห้าบาทยี่สิบห้าสตางค์"),
1121 Some(BahtAmount {
1122 baht: 5,
1123 satang: 25
1124 })
1125 );
1126 }
1127
1128 #[test]
1129 fn baht_no_suffix_implies_zero_satang() {
1130 assert_eq!(
1131 parse_thai_baht("หนึ่งร้อยบาท"),
1132 Some(BahtAmount {
1133 baht: 100,
1134 satang: 0
1135 })
1136 );
1137 }
1138
1139 #[test]
1140 fn baht_zero_baht_with_satang() {
1141 assert_eq!(
1142 parse_thai_baht("ศูนย์บาทห้าสิบสตางค์"),
1143 Some(BahtAmount {
1144 baht: 0,
1145 satang: 50
1146 })
1147 );
1148 }
1149
1150 #[test]
1151 fn baht_million() {
1152 assert_eq!(
1153 parse_thai_baht("หนึ่งล้านบาทถ้วน"),
1154 Some(BahtAmount {
1155 baht: 1_000_000,
1156 satang: 0
1157 })
1158 );
1159 }
1160
1161 #[test]
1162 fn baht_satang_eleven() {
1163 // สิบเอ็ด satang = 11
1164 assert_eq!(
1165 parse_thai_baht("สองบาทสิบเอ็ดสตางค์"),
1166 Some(BahtAmount {
1167 baht: 2,
1168 satang: 11
1169 })
1170 );
1171 }
1172
1173 #[test]
1174 fn baht_satang_fifty() {
1175 assert_eq!(
1176 parse_thai_baht("หนึ่งร้อยบาทห้าสิบสตางค์"),
1177 Some(BahtAmount {
1178 baht: 100,
1179 satang: 50
1180 })
1181 );
1182 }
1183
1184 #[test]
1185 fn baht_satang_above_99_is_none() {
1186 // หนึ่งร้อย = 100 which is > 99 satang
1187 assert_eq!(parse_thai_baht("หนึ่งบาทหนึ่งร้อยสตางค์"), None);
1188 }
1189
1190 #[test]
1191 fn baht_no_baht_marker_is_none() {
1192 assert_eq!(parse_thai_baht("หนึ่งร้อยยี่สิบสาม"), None);
1193 }
1194
1195 #[test]
1196 fn baht_non_number_is_none() {
1197 assert_eq!(parse_thai_baht("กินข้าวบาทถ้วน"), None);
1198 }
1199
1200 #[test]
1201 fn baht_empty_is_none() {
1202 assert_eq!(parse_thai_baht(""), None);
1203 }
1204
1205 #[test]
1206 fn baht_unrecognised_satang_suffix_is_none() {
1207 assert_eq!(parse_thai_baht("หนึ่งบาทมาก"), None);
1208 }
1209
1210 // ── to_thai_baht_text ─────────────────────────────────────────────────────
1211
1212 #[test]
1213 fn baht_text_zero_exact() {
1214 assert_eq!(to_thai_baht_text(0, 0), "ศูนย์บาทถ้วน");
1215 }
1216
1217 #[test]
1218 fn baht_text_one_exact() {
1219 assert_eq!(to_thai_baht_text(1, 0), "หนึ่งบาทถ้วน");
1220 }
1221
1222 #[test]
1223 fn baht_text_hundred_exact() {
1224 assert_eq!(to_thai_baht_text(100, 0), "หนึ่งร้อยบาทถ้วน");
1225 }
1226
1227 #[test]
1228 fn baht_text_with_satang() {
1229 assert_eq!(to_thai_baht_text(21, 50), "ยี่สิบเอ็ดบาทห้าสิบสตางค์");
1230 }
1231
1232 #[test]
1233 fn baht_text_million_exact() {
1234 assert_eq!(to_thai_baht_text(1_000_000, 0), "หนึ่งล้านบาทถ้วน");
1235 }
1236
1237 #[test]
1238 fn baht_text_zero_baht_with_satang() {
1239 assert_eq!(to_thai_baht_text(0, 25), "ศูนย์บาทยี่สิบห้าสตางค์");
1240 }
1241
1242 #[test]
1243 fn baht_text_satang_eleven() {
1244 assert_eq!(to_thai_baht_text(2, 11), "สองบาทสิบเอ็ดสตางค์");
1245 }
1246
1247 // ── roundtrip parse_thai_baht ↔ to_thai_baht_text ────────────────────────
1248
1249 #[test]
1250 fn baht_roundtrip() {
1251 let cases = [
1252 (0u64, 0u8),
1253 (1, 0),
1254 (100, 0),
1255 (123, 50),
1256 (5, 25),
1257 (1_000_000, 0),
1258 (21, 11),
1259 (0, 99),
1260 ];
1261 for (baht, satang) in cases {
1262 let text = to_thai_baht_text(baht, satang);
1263 let parsed = parse_thai_baht(&text);
1264 assert_eq!(
1265 parsed,
1266 Some(BahtAmount { baht, satang }),
1267 "roundtrip failed for ({baht}, {satang}): text={text:?}"
1268 );
1269 }
1270 }
1271}