Skip to main content

kham_core/
number.rs

1//! Thai number normalization.
2//!
3//! Two independent normalization paths:
4//!
5//! 1. **Thai digit → ASCII**: converts Thai digit characters (๐–๙, U+0E50–U+0E59)
6//!    to their ASCII equivalents. Non-digit characters are passed through unchanged.
7//!
8//! 2. **Spelled-out Thai number words → integer**: parses a full Thai cardinal number
9//!    word (e.g. `หนึ่งร้อยยี่สิบสาม`) into its numeric value (`123`).
10//!
11//! ## Thai digit mapping
12//!
13//! | Thai | ASCII |
14//! |------|-------|
15//! | ๐    | 0     |
16//! | ๑    | 1     |
17//! | ๒    | 2     |
18//! | ๓    | 3     |
19//! | ๔    | 4     |
20//! | ๕    | 5     |
21//! | ๖    | 6     |
22//! | ๗    | 7     |
23//! | ๘    | 8     |
24//! | ๙    | 9     |
25//!
26//! ## Thai number word grammar (cardinal)
27//!
28//! | Word   | Value       | Notes                                    |
29//! |--------|-------------|------------------------------------------|
30//! | ศูนย์  | 0           |                                          |
31//! | หนึ่ง  | 1           |                                          |
32//! | เอ็ด   | 1           | units position after `สิบ` only          |
33//! | ยี่     | 2           | tens position only (`ยี่สิบ` = 20)       |
34//! | สอง   | 2           |                                          |
35//! | สาม   | 3           |                                          |
36//! | สี่     | 4           |                                          |
37//! | ห้า    | 5           |                                          |
38//! | หก    | 6           |                                          |
39//! | เจ็ด   | 7           |                                          |
40//! | แปด   | 8           |                                          |
41//! | เก้า   | 9           |                                          |
42//! | สิบ    | ×10         | preceding digit optional (default 1)     |
43//! | ร้อย   | ×100        | preceding digit optional                 |
44//! | พัน    | ×1 000      | preceding digit optional                 |
45//! | หมื่น  | ×10 000     | preceding digit optional                 |
46//! | แสน   | ×100 000    | preceding digit optional                 |
47//! | ล้าน   | ×1 000 000  | splits number into two sub-million groups|
48//!
49//! # Examples
50//!
51//! ```rust
52//! use kham_core::number::{thai_digits_to_ascii, parse_thai_word};
53//!
54//! // Thai digit conversion
55//! assert_eq!(thai_digits_to_ascii("๑๒๓"), "123");
56//! assert_eq!(thai_digits_to_ascii("ธนาคาร๑๐๐แห่ง"), "ธนาคาร100แห่ง");
57//!
58//! // Spelled-out number word parsing
59//! assert_eq!(parse_thai_word("ยี่สิบ"), Some(20));
60//! assert_eq!(parse_thai_word("หนึ่งร้อยยี่สิบสาม"), Some(123));
61//! assert_eq!(parse_thai_word("สองล้านห้าแสน"), Some(2_500_000));
62//! ```
63
64use alloc::string::String;
65use alloc::vec::Vec;
66
67// ---------------------------------------------------------------------------
68// Thai digit ↔ ASCII
69// ---------------------------------------------------------------------------
70
71/// Convert a single Thai digit character (๐–๙) to its ASCII equivalent.
72///
73/// Returns `None` for any character that is not a Thai digit.
74///
75/// # Examples
76///
77/// ```rust
78/// use kham_core::number::thai_digit_to_ascii;
79///
80/// assert_eq!(thai_digit_to_ascii('๑'), Some('1'));
81/// assert_eq!(thai_digit_to_ascii('ก'), None);
82/// assert_eq!(thai_digit_to_ascii('5'), None); // ASCII digits pass through unchanged
83/// ```
84#[inline]
85pub fn thai_digit_to_ascii(c: char) -> Option<char> {
86    match c {
87        '\u{0E50}' => Some('0'),
88        '\u{0E51}' => Some('1'),
89        '\u{0E52}' => Some('2'),
90        '\u{0E53}' => Some('3'),
91        '\u{0E54}' => Some('4'),
92        '\u{0E55}' => Some('5'),
93        '\u{0E56}' => Some('6'),
94        '\u{0E57}' => Some('7'),
95        '\u{0E58}' => Some('8'),
96        '\u{0E59}' => Some('9'),
97        _ => None,
98    }
99}
100
101/// Convert all Thai digit characters in `text` to ASCII digits.
102///
103/// Characters that are not Thai digits are passed through unchanged.
104/// Allocates a new [`String`] only when Thai digits are present;
105/// otherwise returns a copy of the input.
106///
107/// # Examples
108///
109/// ```rust
110/// use kham_core::number::thai_digits_to_ascii;
111///
112/// assert_eq!(thai_digits_to_ascii("๑๒๓"), "123");
113/// assert_eq!(thai_digits_to_ascii("ธนาคาร๑๐๐แห่ง"), "ธนาคาร100แห่ง");
114/// assert_eq!(thai_digits_to_ascii("hello"), "hello");
115/// assert_eq!(thai_digits_to_ascii(""), "");
116/// // Mixed Thai and ASCII digits — only Thai digits are converted
117/// assert_eq!(thai_digits_to_ascii("๑2๓"), "123");
118/// ```
119pub fn thai_digits_to_ascii(text: &str) -> String {
120    if !text.chars().any(|c| thai_digit_to_ascii(c).is_some()) {
121        return String::from(text);
122    }
123    text.chars()
124        .map(|c| thai_digit_to_ascii(c).unwrap_or(c))
125        .collect()
126}
127
128/// Return `true` if every character in `text` is a Thai digit (๐–๙).
129///
130/// Returns `false` for empty strings.
131///
132/// # Examples
133///
134/// ```rust
135/// use kham_core::number::is_thai_digit_str;
136///
137/// assert!(is_thai_digit_str("๑๒๓"));
138/// assert!(is_thai_digit_str("๐"));
139/// assert!(!is_thai_digit_str("123"));
140/// assert!(!is_thai_digit_str("๑2๓")); // mixed
141/// assert!(!is_thai_digit_str(""));
142/// ```
143#[inline]
144pub fn is_thai_digit_str(text: &str) -> bool {
145    !text.is_empty() && text.chars().all(|c| thai_digit_to_ascii(c).is_some())
146}
147
148// ---------------------------------------------------------------------------
149// Spelled-out Thai number word → u64
150// ---------------------------------------------------------------------------
151
152/// Internal lexer token for the Thai number word parser.
153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
154enum NumToken {
155    /// A digit 0–9: ศูนย์ หนึ่ง สอง สาม สี่ ห้า หก เจ็ด แปด เก้า.
156    Digit(u64),
157    /// ยี่ — special form of 2, valid only in the tens position (ยี่สิบ = 20).
158    Yi,
159    /// เอ็ด — special form of 1, valid only in the units position after สิบ.
160    Et,
161    /// สิบ — ×10 multiplier.
162    Sip,
163    /// ร้อย — ×100 multiplier.
164    Roi,
165    /// พัน — ×1 000 multiplier.
166    Pan,
167    /// หมื่น — ×10 000 multiplier.
168    Muen,
169    /// แสน — ×100 000 multiplier.
170    Saen,
171    /// ล้าน — ×1 000 000 group separator.
172    Lan,
173}
174
175/// Greedy longest-prefix match against the Thai number vocabulary.
176///
177/// Returns `(token, remaining_slice)` or `None` if no token starts at `s`.
178fn next_num_token(s: &str) -> Option<(NumToken, &str)> {
179    // Ordered so that longer / more specific prefixes come before shorter ones.
180    // e.g. "เก้า" before any single-syllable word to avoid prefix collisions.
181    const VOCAB: &[(&str, NumToken)] = &[
182        ("ศูนย์", NumToken::Digit(0)),
183        ("หนึ่ง", NumToken::Digit(1)),
184        ("เอ็ด", NumToken::Et),
185        ("ยี่", NumToken::Yi),
186        ("สอง", NumToken::Digit(2)),
187        ("สาม", NumToken::Digit(3)),
188        ("สี่", NumToken::Digit(4)),
189        ("ห้า", NumToken::Digit(5)),
190        ("หก", NumToken::Digit(6)),
191        ("เจ็ด", NumToken::Digit(7)),
192        ("แปด", NumToken::Digit(8)),
193        ("เก้า", NumToken::Digit(9)),
194        ("สิบ", NumToken::Sip),
195        ("ร้อย", NumToken::Roi),
196        ("พัน", NumToken::Pan),
197        ("หมื่น", NumToken::Muen),
198        ("แสน", NumToken::Saen),
199        ("ล้าน", NumToken::Lan),
200    ];
201    for &(word, tok) in VOCAB {
202        if let Some(rest) = s.strip_prefix(word) {
203            return Some((tok, rest));
204        }
205    }
206    None
207}
208
209/// Parse a Thai cardinal number below one million (0–999 999).
210///
211/// Returns `None` when `s` contains unrecognised tokens or is structurally
212/// invalid (e.g. two consecutive digit words without a multiplier between them).
213/// An empty string returns `Some(0)` to support implied-zero sub-parts.
214fn parse_below_lan(s: &str) -> Option<u64> {
215    let mut s = s;
216    let mut total: u64 = 0;
217    let mut pending: Option<u64> = None; // digit waiting for its multiplier
218    let mut had_sip = false; // ตรวจสอบ for เอ็ด-validity
219
220    while !s.is_empty() {
221        let (tok, rest) = next_num_token(s)?;
222        match tok {
223            NumToken::Digit(d) => {
224                // Two consecutive digit words with no multiplier between them
225                // are not valid Thai number spelling.
226                if pending.is_some() {
227                    return None;
228                }
229                pending = Some(d);
230            }
231            NumToken::Yi => {
232                if pending.is_some() {
233                    return None;
234                }
235                // ยี่ acts as digit-2 in front of สิบ (ยี่สิบ = 20).
236                pending = Some(2);
237            }
238            NumToken::Et => {
239                // เอ็ด is the units-1 form allowed only after สิบ.
240                if !had_sip || pending.is_some() {
241                    return None;
242                }
243                total = total.checked_add(1)?;
244            }
245            NumToken::Sip => {
246                let coeff = pending.take().unwrap_or(1);
247                total = total.checked_add(coeff.checked_mul(10)?)?;
248                had_sip = true;
249            }
250            NumToken::Roi => {
251                let coeff = pending.take().unwrap_or(1);
252                total = total.checked_add(coeff.checked_mul(100)?)?;
253            }
254            NumToken::Pan => {
255                let coeff = pending.take().unwrap_or(1);
256                total = total.checked_add(coeff.checked_mul(1_000)?)?;
257            }
258            NumToken::Muen => {
259                let coeff = pending.take().unwrap_or(1);
260                total = total.checked_add(coeff.checked_mul(10_000)?)?;
261            }
262            NumToken::Saen => {
263                let coeff = pending.take().unwrap_or(1);
264                total = total.checked_add(coeff.checked_mul(100_000)?)?;
265            }
266            NumToken::Lan => {
267                // ล้าน is resolved at the outer level; hitting it here is invalid.
268                return None;
269            }
270        }
271        s = rest;
272    }
273
274    // Any remaining pending digit is a standalone units value (e.g. the สาม in
275    // หนึ่งร้อยสองสิบสาม).
276    if let Some(d) = pending {
277        total = total.checked_add(d)?;
278    }
279
280    Some(total)
281}
282
283/// Convert a `u64` to its decimal string representation without `std`.
284fn u64_to_string(mut n: u64) -> String {
285    if n == 0 {
286        return String::from("0");
287    }
288    let mut digits: Vec<u8> = Vec::new();
289    while n > 0 {
290        digits.push(b'0' + (n % 10) as u8);
291        n /= 10;
292    }
293    digits.reverse();
294    // SAFETY: digits are ASCII '0'–'9', guaranteed valid UTF-8.
295    String::from_utf8(digits).unwrap_or_default()
296}
297
298/// Parse a spelled-out Thai cardinal number word into its numeric value.
299///
300/// Handles the full range from 0 (`ศูนย์`) up to values bounded by `u64::MAX`.
301/// Returns `None` when the input contains unrecognised characters, is empty,
302/// or is structurally invalid for Thai number grammar.
303///
304/// ## Grammar summary
305///
306/// ```text
307/// number  ::= sub_lan "ล้าน" sub_lan
308///           | sub_lan "ล้าน"
309///           | sub_lan
310///
311/// sub_lan ::= [digit] "แสน" sub_แสน | sub_แสน
312///           | [digit] "หมื่น" sub_หมื่น | sub_หมื่น
313///           | [digit] "พัน"   sub_พัน   | sub_พัน
314///           | [digit] "ร้อย"  sub_ร้อย  | sub_ร้อย
315///           | ["ยี่" | digit] "สิบ" unit | "สิบ" unit
316///           | unit
317///
318/// unit    ::= digit | "เอ็ด" | ε
319/// digit   ::= "ศูนย์" | "หนึ่ง" | "สอง" | … | "เก้า"
320/// ```
321///
322/// The `ยี่` form of 2 is valid only in `ยี่สิบ` (20, 21, …, 29).
323/// The `เอ็ด` form of 1 is valid only as the units digit after `สิบ`
324/// (11, 21, 31, …).
325///
326/// # Examples
327///
328/// ```rust
329/// use kham_core::number::parse_thai_word;
330///
331/// // single digits
332/// assert_eq!(parse_thai_word("ศูนย์"), Some(0));
333/// assert_eq!(parse_thai_word("หนึ่ง"), Some(1));
334/// assert_eq!(parse_thai_word("เก้า"), Some(9));
335///
336/// // tens — implied-1 prefix and special forms
337/// assert_eq!(parse_thai_word("สิบ"), Some(10));
338/// assert_eq!(parse_thai_word("สิบเอ็ด"), Some(11));
339/// assert_eq!(parse_thai_word("ยี่สิบ"), Some(20));
340/// assert_eq!(parse_thai_word("ยี่สิบเอ็ด"), Some(21));
341/// assert_eq!(parse_thai_word("สามสิบสี่"), Some(34));
342///
343/// // hundreds
344/// assert_eq!(parse_thai_word("ร้อย"), Some(100));
345/// assert_eq!(parse_thai_word("หนึ่งร้อย"), Some(100));
346/// assert_eq!(parse_thai_word("หนึ่งร้อยยี่สิบสาม"), Some(123));
347///
348/// // thousands
349/// assert_eq!(parse_thai_word("หนึ่งพัน"), Some(1_000));
350/// assert_eq!(parse_thai_word("สองพันห้าร้อย"), Some(2_500));
351///
352/// // ten-thousands / hundred-thousands
353/// assert_eq!(parse_thai_word("หนึ่งหมื่น"), Some(10_000));
354/// assert_eq!(parse_thai_word("หนึ่งแสน"), Some(100_000));
355///
356/// // millions — coefficient itself can be a sub-million number
357/// assert_eq!(parse_thai_word("หนึ่งล้าน"), Some(1_000_000));
358/// assert_eq!(parse_thai_word("ล้าน"), Some(1_000_000));
359/// assert_eq!(parse_thai_word("สองล้านห้าแสน"), Some(2_500_000));
360/// assert_eq!(parse_thai_word("สิบล้าน"), Some(10_000_000));
361/// assert_eq!(parse_thai_word("หนึ่งร้อยล้าน"), Some(100_000_000));
362///
363/// // not a number → None
364/// assert_eq!(parse_thai_word("กินข้าว"), None);
365/// assert_eq!(parse_thai_word(""), None);
366/// ```
367pub fn parse_thai_word(text: &str) -> Option<u64> {
368    let s = text.trim();
369    if s.is_empty() {
370        return None;
371    }
372
373    if let Some(lan_pos) = s.find("ล้าน") {
374        let prefix = &s[..lan_pos];
375        let suffix = &s[lan_pos + "ล้าน".len()..];
376
377        // The millions coefficient is a sub-million number; bare "ล้าน" implies 1.
378        let millions: u64 = if prefix.is_empty() {
379            1
380        } else {
381            parse_below_lan(prefix)?
382        };
383        let remainder: u64 = if suffix.is_empty() {
384            0
385        } else {
386            parse_below_lan(suffix)?
387        };
388
389        millions.checked_mul(1_000_000)?.checked_add(remainder)
390    } else {
391        let result = parse_below_lan(s)?;
392        // parse_below_lan returns Some(0) for empty string, but we guard that
393        // case at the top of this function, so a Some(0) here means ศูนย์.
394        Some(result)
395    }
396}
397
398/// Return the decimal string representation of a Thai number word, or `None`
399/// if the input is not a recognised Thai number word.
400///
401/// This is a convenience wrapper over [`parse_thai_word`] that formats the
402/// result as a string suitable for use as an FTS synonym.
403///
404/// # Examples
405///
406/// ```rust
407/// use kham_core::number::thai_word_to_decimal;
408///
409/// assert_eq!(thai_word_to_decimal("ยี่สิบ"), Some(String::from("20")));
410/// assert_eq!(thai_word_to_decimal("หนึ่งร้อยยี่สิบสาม"), Some(String::from("123")));
411/// assert_eq!(thai_word_to_decimal("กิน"), None);
412/// ```
413pub fn thai_word_to_decimal(text: &str) -> Option<String> {
414    parse_thai_word(text).map(u64_to_string)
415}
416
417// ---------------------------------------------------------------------------
418// Number → Thai word (generator, inverse of parse_thai_word)
419// ---------------------------------------------------------------------------
420
421/// Map a single digit 1–9 to its Thai word.
422///
423/// Returns an empty string for 0 (caller handles zero as ศูนย์ or omits it).
424#[inline]
425fn digit_word(d: u64) -> &'static str {
426    match d {
427        1 => "หนึ่ง",
428        2 => "สอง",
429        3 => "สาม",
430        4 => "สี่",
431        5 => "ห้า",
432        6 => "หก",
433        7 => "เจ็ด",
434        8 => "แปด",
435        9 => "เก้า",
436        _ => "",
437    }
438}
439
440/// Append the Thai word representation of `n` (1–999 999) to `out`.
441fn write_below_lan(mut n: u64, out: &mut String) {
442    if n >= 100_000 {
443        out.push_str(digit_word(n / 100_000));
444        out.push_str("แสน");
445        n %= 100_000;
446    }
447    if n >= 10_000 {
448        out.push_str(digit_word(n / 10_000));
449        out.push_str("หมื่น");
450        n %= 10_000;
451    }
452    if n >= 1_000 {
453        out.push_str(digit_word(n / 1_000));
454        out.push_str("พัน");
455        n %= 1_000;
456    }
457    if n >= 100 {
458        out.push_str(digit_word(n / 100));
459        out.push_str("ร้อย");
460        n %= 100;
461    }
462    if n >= 10 {
463        let tens = n / 10;
464        let units = n % 10;
465        match tens {
466            1 => out.push_str("สิบ"),  // implied-1: สิบ not หนึ่งสิบ
467            2 => out.push_str("ยี่สิบ"), // special form for 20s
468            _ => {
469                out.push_str(digit_word(tens));
470                out.push_str("สิบ");
471            }
472        }
473        match units {
474            0 => {}
475            1 => out.push_str("เอ็ด"), // เอ็ด only after สิบ
476            _ => out.push_str(digit_word(units)),
477        }
478    } else if n > 0 {
479        out.push_str(digit_word(n)); // plain units 1–9, no สิบ context → หนึ่ง not เอ็ด
480    }
481}
482
483/// Append the full Thai word representation of any `n > 0` to `out`.
484fn write_thai_word(n: u64, out: &mut String) {
485    if n >= 1_000_000 {
486        write_thai_word(n / 1_000_000, out);
487        out.push_str("ล้าน");
488        let rem = n % 1_000_000;
489        if rem > 0 {
490            write_below_lan(rem, out);
491        }
492    } else {
493        write_below_lan(n, out);
494    }
495}
496
497/// Convert a `u64` to a spelled-out Thai cardinal number word.
498///
499/// This is the inverse of [`parse_thai_word`]: for any value `n`,
500/// `parse_thai_word(u64_to_thai_word(n)) == Some(n)`.
501///
502/// - Zero is rendered as `ศูนย์`.
503/// - The `ยี่` form is used for 20, 21, …, 29 and their multiples.
504/// - The `เอ็ด` form is used for units-1 when a tens word (`สิบ`) precedes it.
505/// - Higher multipliers use explicit digit prefixes (`หนึ่งร้อย` = 100, etc.).
506///
507/// # Examples
508///
509/// ```rust
510/// use kham_core::number::u64_to_thai_word;
511///
512/// assert_eq!(u64_to_thai_word(0),   "ศูนย์");
513/// assert_eq!(u64_to_thai_word(10),  "สิบ");
514/// assert_eq!(u64_to_thai_word(11),  "สิบเอ็ด");
515/// assert_eq!(u64_to_thai_word(20),  "ยี่สิบ");
516/// assert_eq!(u64_to_thai_word(21),  "ยี่สิบเอ็ด");
517/// assert_eq!(u64_to_thai_word(100), "หนึ่งร้อย");
518/// assert_eq!(u64_to_thai_word(123), "หนึ่งร้อยยี่สิบสาม");
519/// assert_eq!(u64_to_thai_word(1_000_000), "หนึ่งล้าน");
520/// assert_eq!(u64_to_thai_word(10_000_000), "สิบล้าน");
521/// ```
522pub fn u64_to_thai_word(n: u64) -> String {
523    if n == 0 {
524        return String::from("ศูนย์");
525    }
526    let mut out = String::new();
527    write_thai_word(n, &mut out);
528    out
529}
530
531// ---------------------------------------------------------------------------
532// Thai Baht — parse and generate
533// ---------------------------------------------------------------------------
534
535/// A monetary amount in Thai Baht.
536///
537/// `satang` is the sub-unit (1 baht = 100 satang). Valid range is 0–99.
538///
539/// # Examples
540///
541/// ```rust
542/// use kham_core::number::{BahtAmount, parse_thai_baht, to_thai_baht_text};
543///
544/// let amt = parse_thai_baht("หนึ่งร้อยยี่สิบสามบาทห้าสิบสตางค์").unwrap();
545/// assert_eq!(amt.baht, 123);
546/// assert_eq!(amt.satang, 50);
547///
548/// assert_eq!(to_thai_baht_text(123, 50), "หนึ่งร้อยยี่สิบสามบาทห้าสิบสตางค์");
549/// assert_eq!(to_thai_baht_text(100, 0),  "หนึ่งร้อยบาทถ้วน");
550/// ```
551#[derive(Debug, Clone, Copy, PartialEq, Eq)]
552pub struct BahtAmount {
553    /// Whole baht.
554    pub baht: u64,
555    /// Satang (0–99). 100 satang = 1 baht.
556    pub satang: u8,
557}
558
559/// Parse a Thai Baht currency string into a [`BahtAmount`].
560///
561/// Accepted forms:
562///
563/// | Input                                      | baht | satang |
564/// |--------------------------------------------|------|--------|
565/// | `หนึ่งร้อยบาทถ้วน`                          | 100  | 0      |
566/// | `หนึ่งร้อยบาท`                               | 100  | 0      |
567/// | `ห้าบาทยี่สิบห้าสตางค์`                      | 5    | 25     |
568/// | `หนึ่งล้านบาทถ้วน`                           | 1 000 000 | 0 |
569/// | `ศูนย์บาทห้าสิบสตางค์`                       | 0    | 50     |
570///
571/// Returns `None` when:
572/// - The string contains no `บาท`.
573/// - The baht part is not a recognised Thai number word.
574/// - The satang part is present but not a recognised Thai number word.
575/// - The satang value exceeds 99.
576///
577/// # Examples
578///
579/// ```rust
580/// use kham_core::number::{parse_thai_baht, BahtAmount};
581///
582/// assert_eq!(
583///     parse_thai_baht("หนึ่งร้อยยี่สิบสามบาทถ้วน"),
584///     Some(BahtAmount { baht: 123, satang: 0 })
585/// );
586/// assert_eq!(
587///     parse_thai_baht("ห้าบาทยี่สิบห้าสตางค์"),
588///     Some(BahtAmount { baht: 5, satang: 25 })
589/// );
590/// assert_eq!(parse_thai_baht("กินข้าว"), None);
591/// assert_eq!(parse_thai_baht(""), None);
592/// ```
593pub fn parse_thai_baht(text: &str) -> Option<BahtAmount> {
594    let s = text.trim();
595    if s.is_empty() {
596        return None;
597    }
598
599    let (baht_part, after_baht) = s.split_once("บาท")?;
600
601    let baht = parse_thai_word(baht_part.trim())?;
602
603    let satang_str = after_baht.trim();
604    let satang: u8 = if satang_str.is_empty() || satang_str == "ถ้วน" {
605        0
606    } else if let Some(san_word) = satang_str.strip_suffix("สตางค์") {
607        let val = parse_thai_word(san_word.trim())?;
608        if val > 99 {
609            return None;
610        }
611        val as u8
612    } else {
613        return None;
614    };
615
616    Some(BahtAmount { baht, satang })
617}
618
619/// Render a baht + satang amount as Thai currency text.
620///
621/// - When `satang == 0` the suffix `ถ้วน` (exact, no satang) is appended.
622/// - When `satang > 0` the satang amount is spelled out followed by `สตางค์`.
623///
624/// The satang parameter should be in the range 0–99 (1 baht = 100 satang).
625/// Values above 99 are accepted and rendered as-is but are semantically odd.
626///
627/// # Examples
628///
629/// ```rust
630/// use kham_core::number::to_thai_baht_text;
631///
632/// assert_eq!(to_thai_baht_text(0,   0),  "ศูนย์บาทถ้วน");
633/// assert_eq!(to_thai_baht_text(1,   0),  "หนึ่งบาทถ้วน");
634/// assert_eq!(to_thai_baht_text(100, 0),  "หนึ่งร้อยบาทถ้วน");
635/// assert_eq!(to_thai_baht_text(21,  50), "ยี่สิบเอ็ดบาทห้าสิบสตางค์");
636/// assert_eq!(to_thai_baht_text(1_000_000, 0), "หนึ่งล้านบาทถ้วน");
637/// assert_eq!(to_thai_baht_text(0,   25), "ศูนย์บาทยี่สิบห้าสตางค์");
638/// ```
639pub fn to_thai_baht_text(baht: u64, satang: u8) -> String {
640    let mut out = u64_to_thai_word(baht);
641    out.push_str("บาท");
642    if satang == 0 {
643        out.push_str("ถ้วน");
644    } else {
645        out.push_str(&u64_to_thai_word(satang as u64));
646        out.push_str("สตางค์");
647    }
648    out
649}
650
651// ---------------------------------------------------------------------------
652// Tests
653// ---------------------------------------------------------------------------
654
655#[cfg(test)]
656mod tests {
657    use super::*;
658
659    // ── thai_digit_to_ascii ───────────────────────────────────────────────────
660
661    #[test]
662    fn thai_digits_map_correctly() {
663        let pairs = [
664            ('๐', '0'),
665            ('๑', '1'),
666            ('๒', '2'),
667            ('๓', '3'),
668            ('๔', '4'),
669            ('๕', '5'),
670            ('๖', '6'),
671            ('๗', '7'),
672            ('๘', '8'),
673            ('๙', '9'),
674        ];
675        for (thai, ascii) in pairs {
676            assert_eq!(thai_digit_to_ascii(thai), Some(ascii), "failed for {thai}");
677        }
678    }
679
680    #[test]
681    fn non_digit_returns_none() {
682        assert_eq!(thai_digit_to_ascii('ก'), None);
683        assert_eq!(thai_digit_to_ascii('5'), None);
684        assert_eq!(thai_digit_to_ascii(' '), None);
685    }
686
687    // ── thai_digits_to_ascii ──────────────────────────────────────────────────
688
689    #[test]
690    fn converts_all_thai_digits() {
691        assert_eq!(thai_digits_to_ascii("๐๑๒๓๔๕๖๗๘๙"), "0123456789");
692    }
693
694    #[test]
695    fn passthrough_ascii_only() {
696        assert_eq!(thai_digits_to_ascii("hello 123"), "hello 123");
697    }
698
699    #[test]
700    fn empty_string_passthrough() {
701        assert_eq!(thai_digits_to_ascii(""), "");
702    }
703
704    #[test]
705    fn mixed_thai_digit_in_sentence() {
706        assert_eq!(thai_digits_to_ascii("ธนาคาร๑๐๐แห่ง"), "ธนาคาร100แห่ง");
707    }
708
709    #[test]
710    fn mixed_thai_and_ascii_digits() {
711        assert_eq!(thai_digits_to_ascii("๑2๓"), "123");
712    }
713
714    #[test]
715    fn no_allocation_when_no_thai_digits() {
716        // We just verify correctness; allocation behaviour is an impl detail.
717        let result = thai_digits_to_ascii("no thai digits here");
718        assert_eq!(result, "no thai digits here");
719    }
720
721    // ── is_thai_digit_str ─────────────────────────────────────────────────────
722
723    #[test]
724    fn all_thai_digits() {
725        assert!(is_thai_digit_str("๑๒๓"));
726        assert!(is_thai_digit_str("๐"));
727    }
728
729    #[test]
730    fn mixed_is_false() {
731        assert!(!is_thai_digit_str("๑2๓"));
732        assert!(!is_thai_digit_str("๑ก"));
733    }
734
735    #[test]
736    fn ascii_only_is_false() {
737        assert!(!is_thai_digit_str("123"));
738    }
739
740    #[test]
741    fn empty_is_false() {
742        assert!(!is_thai_digit_str(""));
743    }
744
745    // ── u64_to_string ─────────────────────────────────────────────────────────
746
747    #[test]
748    fn zero_formats_correctly() {
749        assert_eq!(u64_to_string(0), "0");
750    }
751
752    #[test]
753    fn small_number_formats_correctly() {
754        assert_eq!(u64_to_string(42), "42");
755    }
756
757    #[test]
758    fn large_number_formats_correctly() {
759        assert_eq!(u64_to_string(1_000_000), "1000000");
760    }
761
762    // ── parse_thai_word — single digits ──────────────────────────────────────
763
764    #[test]
765    fn zero() {
766        assert_eq!(parse_thai_word("ศูนย์"), Some(0));
767    }
768
769    #[test]
770    fn one_to_nine() {
771        let cases = [
772            ("หนึ่ง", 1u64),
773            ("สอง", 2),
774            ("สาม", 3),
775            ("สี่", 4),
776            ("ห้า", 5),
777            ("หก", 6),
778            ("เจ็ด", 7),
779            ("แปด", 8),
780            ("เก้า", 9),
781        ];
782        for (word, expected) in cases {
783            assert_eq!(parse_thai_word(word), Some(expected), "failed for {word}");
784        }
785    }
786
787    // ── parse_thai_word — tens ────────────────────────────────────────────────
788
789    #[test]
790    fn ten_implied_one() {
791        assert_eq!(parse_thai_word("สิบ"), Some(10));
792    }
793
794    #[test]
795    fn eleven_uses_et() {
796        assert_eq!(parse_thai_word("สิบเอ็ด"), Some(11));
797    }
798
799    #[test]
800    fn twelve_to_nineteen() {
801        let cases = [
802            ("สิบสอง", 12u64),
803            ("สิบสาม", 13),
804            ("สิบสี่", 14),
805            ("สิบห้า", 15),
806            ("สิบหก", 16),
807            ("สิบเจ็ด", 17),
808            ("สิบแปด", 18),
809            ("สิบเก้า", 19),
810        ];
811        for (word, expected) in cases {
812            assert_eq!(parse_thai_word(word), Some(expected), "failed for {word}");
813        }
814    }
815
816    #[test]
817    fn twenty_uses_yi() {
818        assert_eq!(parse_thai_word("ยี่สิบ"), Some(20));
819    }
820
821    #[test]
822    fn twenty_one_yi_et() {
823        assert_eq!(parse_thai_word("ยี่สิบเอ็ด"), Some(21));
824    }
825
826    #[test]
827    fn thirty_four() {
828        assert_eq!(parse_thai_word("สามสิบสี่"), Some(34));
829    }
830
831    #[test]
832    fn ninety_nine() {
833        assert_eq!(parse_thai_word("เก้าสิบเก้า"), Some(99));
834    }
835
836    // ── parse_thai_word — hundreds ────────────────────────────────────────────
837
838    #[test]
839    fn hundred_implied_one() {
840        assert_eq!(parse_thai_word("ร้อย"), Some(100));
841    }
842
843    #[test]
844    fn one_hundred_explicit() {
845        assert_eq!(parse_thai_word("หนึ่งร้อย"), Some(100));
846    }
847
848    #[test]
849    fn one_hundred_twenty_three() {
850        assert_eq!(parse_thai_word("หนึ่งร้อยยี่สิบสาม"), Some(123));
851    }
852
853    #[test]
854    fn two_hundred() {
855        assert_eq!(parse_thai_word("สองร้อย"), Some(200));
856    }
857
858    #[test]
859    fn nine_hundred_ninety_nine() {
860        assert_eq!(parse_thai_word("เก้าร้อยเก้าสิบเก้า"), Some(999));
861    }
862
863    // ── parse_thai_word — thousands ───────────────────────────────────────────
864
865    #[test]
866    fn one_thousand() {
867        assert_eq!(parse_thai_word("หนึ่งพัน"), Some(1_000));
868        assert_eq!(parse_thai_word("พัน"), Some(1_000));
869    }
870
871    #[test]
872    fn two_thousand_five_hundred() {
873        assert_eq!(parse_thai_word("สองพันห้าร้อย"), Some(2_500));
874    }
875
876    #[test]
877    fn ten_thousand() {
878        assert_eq!(parse_thai_word("หนึ่งหมื่น"), Some(10_000));
879        assert_eq!(parse_thai_word("หมื่น"), Some(10_000));
880    }
881
882    #[test]
883    fn hundred_thousand() {
884        assert_eq!(parse_thai_word("หนึ่งแสน"), Some(100_000));
885        assert_eq!(parse_thai_word("แสน"), Some(100_000));
886    }
887
888    // ── parse_thai_word — millions ────────────────────────────────────────────
889
890    #[test]
891    fn one_million_explicit() {
892        assert_eq!(parse_thai_word("หนึ่งล้าน"), Some(1_000_000));
893    }
894
895    #[test]
896    fn one_million_implied() {
897        assert_eq!(parse_thai_word("ล้าน"), Some(1_000_000));
898    }
899
900    #[test]
901    fn ten_million() {
902        assert_eq!(parse_thai_word("สิบล้าน"), Some(10_000_000));
903    }
904
905    #[test]
906    fn hundred_million() {
907        assert_eq!(parse_thai_word("หนึ่งร้อยล้าน"), Some(100_000_000));
908    }
909
910    #[test]
911    fn two_million_five_hundred_thousand() {
912        assert_eq!(parse_thai_word("สองล้านห้าแสน"), Some(2_500_000));
913    }
914
915    #[test]
916    fn complex_seven_digit() {
917        // 3,456,789
918        assert_eq!(
919            parse_thai_word("สามล้านสี่แสนห้าหมื่นหกพันเจ็ดร้อยแปดสิบเก้า"),
920            Some(3_456_789)
921        );
922    }
923
924    // ── parse_thai_word — invalid / None ─────────────────────────────────────
925
926    #[test]
927    fn empty_returns_none() {
928        assert_eq!(parse_thai_word(""), None);
929    }
930
931    #[test]
932    fn whitespace_only_returns_none() {
933        assert_eq!(parse_thai_word("   "), None);
934    }
935
936    #[test]
937    fn non_number_word_returns_none() {
938        assert_eq!(parse_thai_word("กินข้าว"), None);
939        assert_eq!(parse_thai_word("ประเทศไทย"), None);
940    }
941
942    #[test]
943    fn et_without_sip_is_invalid() {
944        assert_eq!(parse_thai_word("เอ็ด"), None);
945        assert_eq!(parse_thai_word("ร้อยเอ็ด"), None);
946    }
947
948    #[test]
949    fn consecutive_digits_invalid() {
950        // หนึ่งสอง = two digit words with no multiplier is invalid
951        assert_eq!(parse_thai_word("หนึ่งสอง"), None);
952    }
953
954    // ── thai_word_to_decimal ──────────────────────────────────────────────────
955
956    #[test]
957    fn word_to_decimal_converts() {
958        assert_eq!(thai_word_to_decimal("ยี่สิบ"), Some(String::from("20")));
959        assert_eq!(
960            thai_word_to_decimal("หนึ่งร้อยยี่สิบสาม"),
961            Some(String::from("123"))
962        );
963    }
964
965    #[test]
966    fn word_to_decimal_none_for_non_number() {
967        assert_eq!(thai_word_to_decimal("กิน"), None);
968    }
969
970    // ── trim handling ─────────────────────────────────────────────────────────
971
972    #[test]
973    fn leading_trailing_whitespace_trimmed() {
974        assert_eq!(parse_thai_word("  สิบ  "), Some(10));
975    }
976
977    // ── u64_to_thai_word ──────────────────────────────────────────────────────
978
979    #[test]
980    fn zero_word() {
981        assert_eq!(u64_to_thai_word(0), "ศูนย์");
982    }
983
984    #[test]
985    fn single_digits_word() {
986        let cases = [
987            (1u64, "หนึ่ง"),
988            (2, "สอง"),
989            (3, "สาม"),
990            (4, "สี่"),
991            (5, "ห้า"),
992            (6, "หก"),
993            (7, "เจ็ด"),
994            (8, "แปด"),
995            (9, "เก้า"),
996        ];
997        for (n, word) in cases {
998            assert_eq!(u64_to_thai_word(n), word, "failed for {n}");
999        }
1000    }
1001
1002    #[test]
1003    fn ten_implied_one_word() {
1004        assert_eq!(u64_to_thai_word(10), "สิบ");
1005    }
1006
1007    #[test]
1008    fn eleven_et_form() {
1009        assert_eq!(u64_to_thai_word(11), "สิบเอ็ด");
1010    }
1011
1012    #[test]
1013    fn twelve_to_nineteen_word() {
1014        let cases = [(12u64, "สิบสอง"), (15, "สิบห้า"), (19, "สิบเก้า")];
1015        for (n, word) in cases {
1016            assert_eq!(u64_to_thai_word(n), word);
1017        }
1018    }
1019
1020    #[test]
1021    fn twenty_yi_form() {
1022        assert_eq!(u64_to_thai_word(20), "ยี่สิบ");
1023    }
1024
1025    #[test]
1026    fn twenty_one_yi_et_word() {
1027        assert_eq!(u64_to_thai_word(21), "ยี่สิบเอ็ด");
1028    }
1029
1030    #[test]
1031    fn thirty_four_word() {
1032        assert_eq!(u64_to_thai_word(34), "สามสิบสี่");
1033    }
1034
1035    #[test]
1036    fn one_hundred_word() {
1037        assert_eq!(u64_to_thai_word(100), "หนึ่งร้อย");
1038    }
1039
1040    #[test]
1041    fn one_hundred_twenty_three_word() {
1042        assert_eq!(u64_to_thai_word(123), "หนึ่งร้อยยี่สิบสาม");
1043    }
1044
1045    #[test]
1046    fn one_hundred_one_no_et() {
1047        // เอ็ด only after สิบ — 101 has no สิบ so units=1 → หนึ่ง
1048        assert_eq!(u64_to_thai_word(101), "หนึ่งร้อยหนึ่ง");
1049    }
1050
1051    #[test]
1052    fn one_hundred_eleven_et() {
1053        assert_eq!(u64_to_thai_word(111), "หนึ่งร้อยสิบเอ็ด");
1054    }
1055
1056    #[test]
1057    fn one_thousand_word() {
1058        assert_eq!(u64_to_thai_word(1_000), "หนึ่งพัน");
1059    }
1060
1061    #[test]
1062    fn ten_thousand_word() {
1063        assert_eq!(u64_to_thai_word(10_000), "หนึ่งหมื่น");
1064    }
1065
1066    #[test]
1067    fn hundred_thousand_word() {
1068        assert_eq!(u64_to_thai_word(100_000), "หนึ่งแสน");
1069    }
1070
1071    #[test]
1072    fn one_million_word() {
1073        assert_eq!(u64_to_thai_word(1_000_000), "หนึ่งล้าน");
1074    }
1075
1076    #[test]
1077    fn ten_million_word() {
1078        assert_eq!(u64_to_thai_word(10_000_000), "สิบล้าน");
1079    }
1080
1081    #[test]
1082    fn complex_seven_digit_word() {
1083        assert_eq!(
1084            u64_to_thai_word(3_456_789),
1085            "สามล้านสี่แสนห้าหมื่นหกพันเจ็ดร้อยแปดสิบเก้า"
1086        );
1087    }
1088
1089    // ── roundtrip parse_thai_word ↔ u64_to_thai_word ─────────────────────────
1090
1091    #[test]
1092    fn roundtrip_parse_then_generate() {
1093        let cases = [
1094            0u64, 1, 9, 10, 11, 20, 21, 99, 100, 101, 111, 999, 1_000, 10_000, 100_000, 1_000_000,
1095            10_000_000, 3_456_789,
1096        ];
1097        for n in cases {
1098            let word = u64_to_thai_word(n);
1099            let parsed = parse_thai_word(&word);
1100            assert_eq!(parsed, Some(n), "roundtrip failed for {n}: word={word:?}");
1101        }
1102    }
1103
1104    // ── parse_thai_baht ───────────────────────────────────────────────────────
1105
1106    #[test]
1107    fn baht_exact_no_satang() {
1108        assert_eq!(
1109            parse_thai_baht("หนึ่งร้อยยี่สิบสามบาทถ้วน"),
1110            Some(BahtAmount {
1111                baht: 123,
1112                satang: 0
1113            })
1114        );
1115    }
1116
1117    #[test]
1118    fn baht_with_satang() {
1119        assert_eq!(
1120            parse_thai_baht("ห้าบาทยี่สิบห้าสตางค์"),
1121            Some(BahtAmount {
1122                baht: 5,
1123                satang: 25
1124            })
1125        );
1126    }
1127
1128    #[test]
1129    fn baht_no_suffix_implies_zero_satang() {
1130        assert_eq!(
1131            parse_thai_baht("หนึ่งร้อยบาท"),
1132            Some(BahtAmount {
1133                baht: 100,
1134                satang: 0
1135            })
1136        );
1137    }
1138
1139    #[test]
1140    fn baht_zero_baht_with_satang() {
1141        assert_eq!(
1142            parse_thai_baht("ศูนย์บาทห้าสิบสตางค์"),
1143            Some(BahtAmount {
1144                baht: 0,
1145                satang: 50
1146            })
1147        );
1148    }
1149
1150    #[test]
1151    fn baht_million() {
1152        assert_eq!(
1153            parse_thai_baht("หนึ่งล้านบาทถ้วน"),
1154            Some(BahtAmount {
1155                baht: 1_000_000,
1156                satang: 0
1157            })
1158        );
1159    }
1160
1161    #[test]
1162    fn baht_satang_eleven() {
1163        // สิบเอ็ด satang = 11
1164        assert_eq!(
1165            parse_thai_baht("สองบาทสิบเอ็ดสตางค์"),
1166            Some(BahtAmount {
1167                baht: 2,
1168                satang: 11
1169            })
1170        );
1171    }
1172
1173    #[test]
1174    fn baht_satang_fifty() {
1175        assert_eq!(
1176            parse_thai_baht("หนึ่งร้อยบาทห้าสิบสตางค์"),
1177            Some(BahtAmount {
1178                baht: 100,
1179                satang: 50
1180            })
1181        );
1182    }
1183
1184    #[test]
1185    fn baht_satang_above_99_is_none() {
1186        // หนึ่งร้อย = 100 which is > 99 satang
1187        assert_eq!(parse_thai_baht("หนึ่งบาทหนึ่งร้อยสตางค์"), None);
1188    }
1189
1190    #[test]
1191    fn baht_no_baht_marker_is_none() {
1192        assert_eq!(parse_thai_baht("หนึ่งร้อยยี่สิบสาม"), None);
1193    }
1194
1195    #[test]
1196    fn baht_non_number_is_none() {
1197        assert_eq!(parse_thai_baht("กินข้าวบาทถ้วน"), None);
1198    }
1199
1200    #[test]
1201    fn baht_empty_is_none() {
1202        assert_eq!(parse_thai_baht(""), None);
1203    }
1204
1205    #[test]
1206    fn baht_unrecognised_satang_suffix_is_none() {
1207        assert_eq!(parse_thai_baht("หนึ่งบาทมาก"), None);
1208    }
1209
1210    // ── to_thai_baht_text ─────────────────────────────────────────────────────
1211
1212    #[test]
1213    fn baht_text_zero_exact() {
1214        assert_eq!(to_thai_baht_text(0, 0), "ศูนย์บาทถ้วน");
1215    }
1216
1217    #[test]
1218    fn baht_text_one_exact() {
1219        assert_eq!(to_thai_baht_text(1, 0), "หนึ่งบาทถ้วน");
1220    }
1221
1222    #[test]
1223    fn baht_text_hundred_exact() {
1224        assert_eq!(to_thai_baht_text(100, 0), "หนึ่งร้อยบาทถ้วน");
1225    }
1226
1227    #[test]
1228    fn baht_text_with_satang() {
1229        assert_eq!(to_thai_baht_text(21, 50), "ยี่สิบเอ็ดบาทห้าสิบสตางค์");
1230    }
1231
1232    #[test]
1233    fn baht_text_million_exact() {
1234        assert_eq!(to_thai_baht_text(1_000_000, 0), "หนึ่งล้านบาทถ้วน");
1235    }
1236
1237    #[test]
1238    fn baht_text_zero_baht_with_satang() {
1239        assert_eq!(to_thai_baht_text(0, 25), "ศูนย์บาทยี่สิบห้าสตางค์");
1240    }
1241
1242    #[test]
1243    fn baht_text_satang_eleven() {
1244        assert_eq!(to_thai_baht_text(2, 11), "สองบาทสิบเอ็ดสตางค์");
1245    }
1246
1247    // ── roundtrip parse_thai_baht ↔ to_thai_baht_text ────────────────────────
1248
1249    #[test]
1250    fn baht_roundtrip() {
1251        let cases = [
1252            (0u64, 0u8),
1253            (1, 0),
1254            (100, 0),
1255            (123, 50),
1256            (5, 25),
1257            (1_000_000, 0),
1258            (21, 11),
1259            (0, 99),
1260        ];
1261        for (baht, satang) in cases {
1262            let text = to_thai_baht_text(baht, satang);
1263            let parsed = parse_thai_baht(&text);
1264            assert_eq!(
1265                parsed,
1266                Some(BahtAmount { baht, satang }),
1267                "roundtrip failed for ({baht}, {satang}): text={text:?}"
1268            );
1269        }
1270    }
1271}