Skip to main content

kham_core/
date.rs

1//! Thai date normalization.
2//!
3//! Parses Thai date strings (Buddhist Era or Gregorian) into a structured [`ThaiDate`]
4//! and formats them back to ISO 8601 or Thai text.
5//!
6//! ## Supported input formats
7//!
8//! | Format | Example |
9//! |--------|---------|
10//! | Day full-month year | `5 กรกฎาคม 2567` |
11//! | Day abbreviated-month year | `5 ก.ค. 2567` |
12//! | With era marker | `5 ก.ค. พ.ศ. 2567` |
13//! | With วันที่ prefix | `วันที่ 5 กรกฎาคม 2567` |
14//! | Slash-separated | `5/7/2567` |
15//! | Dash-separated | `5-7-2567` |
16//! | Thai digits | `๕ ก.ค. ๒๕๖๗` |
17//!
18//! ## Era inference
19//!
20//! When no era marker is present, the year is heuristically classified:
21//! - year ≥ 2300 → Buddhist Era (พ.ศ.)
22//! - year < 2300 → Gregorian (ค.ศ.)
23//!
24//! ## Examples
25//!
26//! ```rust
27//! use kham_core::date::{parse_thai_date, Era};
28//!
29//! let d = parse_thai_date("5 กรกฎาคม 2567").unwrap();
30//! assert_eq!(d.day, 5);
31//! assert_eq!(d.month, 7);
32//! assert_eq!(d.year, 2567);
33//! assert!(matches!(d.era, Era::Buddhist));
34//! assert_eq!(d.to_iso8601(), "2024-07-05");
35//!
36//! let d2 = parse_thai_date("5/7/2567").unwrap();
37//! assert_eq!(d2.to_iso8601(), "2024-07-05");
38//! ```
39
40extern crate alloc;
41use alloc::string::String;
42use alloc::vec::Vec;
43
44use crate::number::thai_digit_to_ascii;
45
46// (pattern_bytes, month_number) — sorted longest-first at build time
47static MONTHS_BY_LEN: &[(&str, u8)] = &[
48    // Full names — longest first
49    ("กุมภาพันธ์", 2),
50    ("พฤศจิกายน", 11),
51    ("กรกฎาคม", 7),
52    ("มิถุนายน", 6),
53    ("สิงหาคม", 8),
54    ("ธันวาคม", 12),
55    ("เมษายน", 4),
56    ("มีนาคม", 3),
57    ("มกราคม", 1),
58    ("กันยายน", 9),
59    ("ตุลาคม", 10),
60    ("พฤษภาคม", 5),
61    // Abbreviated forms (dot-terminated)
62    ("มี.ค.", 3),
63    ("เม.ย.", 4),
64    ("มิ.ย.", 6),
65    ("ม.ค.", 1),
66    ("ก.พ.", 2),
67    ("พ.ค.", 5),
68    ("ก.ค.", 7),
69    ("ส.ค.", 8),
70    ("ก.ย.", 9),
71    ("ต.ค.", 10),
72    ("พ.ย.", 11),
73    ("ธ.ค.", 12),
74];
75
76/// Era system for a [`ThaiDate`].
77#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub enum Era {
79    /// Buddhist Era (พุทธศักราช, พ.ศ.) — CE + 543.
80    Buddhist,
81    /// Gregorian / Christian Era (คริสต์ศักราช, ค.ศ.).
82    Gregorian,
83}
84
85/// A parsed Thai date with day, month, year, and era.
86#[derive(Debug, Clone, PartialEq, Eq)]
87pub struct ThaiDate {
88    /// Day of month (1–31).
89    pub day: u8,
90    /// Month number (1–12).
91    pub month: u8,
92    /// Year in the stated [`Era`].
93    pub year: u32,
94    /// Era system for this date.
95    pub era: Era,
96}
97
98impl ThaiDate {
99    /// Returns the year in Gregorian (CE).
100    ///
101    /// Buddhist years subtract 543; Gregorian years are returned as-is.
102    pub fn gregorian_year(&self) -> i32 {
103        match self.era {
104            Era::Buddhist => self.year as i32 - 543,
105            Era::Gregorian => self.year as i32,
106        }
107    }
108
109    /// Returns the year in Buddhist Era (BE).
110    ///
111    /// Gregorian years add 543; Buddhist years are returned as-is.
112    pub fn buddhist_year(&self) -> u32 {
113        match self.era {
114            Era::Buddhist => self.year,
115            Era::Gregorian => self.year + 543,
116        }
117    }
118
119    /// Formats the date as an ISO 8601 string (`YYYY-MM-DD`) using Gregorian year.
120    ///
121    /// ```rust
122    /// use kham_core::date::parse_thai_date;
123    /// let d = parse_thai_date("5 กรกฎาคม 2567").unwrap();
124    /// assert_eq!(d.to_iso8601(), "2024-07-05");
125    /// ```
126    pub fn to_iso8601(&self) -> String {
127        let y = self.gregorian_year();
128        let mut out = String::with_capacity(10);
129        // year — 4 digits, possibly negative (rare)
130        if y < 0 {
131            out.push('-');
132            push_padded(&mut out, (-y) as u32, 4);
133        } else {
134            push_padded(&mut out, y as u32, 4);
135        }
136        out.push('-');
137        push_padded(&mut out, self.month as u32, 2);
138        out.push('-');
139        push_padded(&mut out, self.day as u32, 2);
140        out
141    }
142
143    /// Formats the date as Thai text: `"5 กรกฎาคม พ.ศ. 2567"`.
144    ///
145    /// The year is always expressed in Buddhist Era.
146    ///
147    /// ```rust
148    /// use kham_core::date::parse_thai_date;
149    /// let d = parse_thai_date("5/7/2567").unwrap();
150    /// assert_eq!(d.to_thai_text(), "5 กรกฎาคม พ.ศ. 2567");
151    /// ```
152    pub fn to_thai_text(&self) -> String {
153        let month_name = MONTH_NAMES_FULL[(self.month as usize) - 1];
154        let be = self.buddhist_year();
155        let mut out = String::new();
156        push_decimal(&mut out, self.day as u32);
157        out.push(' ');
158        out.push_str(month_name);
159        out.push_str(" พ.ศ. ");
160        push_decimal(&mut out, be);
161        out
162    }
163}
164
165static MONTH_NAMES_FULL: &[&str] = &[
166    "มกราคม",
167    "กุมภาพันธ์",
168    "มีนาคม",
169    "เมษายน",
170    "พฤษภาคม",
171    "มิถุนายน",
172    "กรกฎาคม",
173    "สิงหาคม",
174    "กันยายน",
175    "ตุลาคม",
176    "พฤศจิกายน",
177    "ธันวาคม",
178];
179
180// ── Parsing ──────────────────────────────────────────────────────────────────
181
182/// Parses a Thai date string into a [`ThaiDate`].
183///
184/// Returns `None` if the input cannot be recognized as a valid date.
185///
186/// ```rust
187/// use kham_core::date::{parse_thai_date, Era};
188///
189/// let d = parse_thai_date("๕ ก.ค. ๒๕๖๗").unwrap();
190/// assert_eq!(d.day, 5);
191/// assert_eq!(d.month, 7);
192/// assert_eq!(d.year, 2567);
193/// assert!(matches!(d.era, Era::Buddhist));
194/// ```
195pub fn parse_thai_date(text: &str) -> Option<ThaiDate> {
196    let text = text.trim();
197    if text.is_empty() {
198        return None;
199    }
200    // Convert Thai digits to ASCII uniformly before parsing
201    let ascii = to_ascii_digits(text);
202    let s = ascii.trim();
203
204    // Try numeric formats first (D/M/Y and D-M-Y)
205    if let Some(d) = parse_numeric(s) {
206        return Some(d);
207    }
208    // Try word-based format
209    parse_word_based(s)
210}
211
212/// Formats a Buddhist Era date as Thai text: `"5 กรกฎาคม พ.ศ. 2567"`.
213///
214/// Returns `None` if `month` is not in 1–12 or `day` is not in 1–31.
215pub fn format_thai_date(year_be: u32, month: u8, day: u8) -> Option<String> {
216    if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
217        return None;
218    }
219    let d = ThaiDate {
220        day,
221        month,
222        year: year_be,
223        era: Era::Buddhist,
224    };
225    Some(d.to_thai_text())
226}
227
228// ── Internal helpers ──────────────────────────────────────────────────────────
229
230/// Converts every Thai digit character in `s` to its ASCII equivalent.
231fn to_ascii_digits(s: &str) -> String {
232    s.chars()
233        .map(|c| thai_digit_to_ascii(c).unwrap_or(c))
234        .collect()
235}
236
237/// Appends `n` to `out` without leading zeros.
238fn push_decimal(out: &mut String, n: u32) {
239    let mut buf: Vec<u8> = Vec::new();
240    if n == 0 {
241        out.push('0');
242        return;
243    }
244    let mut v = n;
245    while v > 0 {
246        buf.push(b'0' + (v % 10) as u8);
247        v /= 10;
248    }
249    buf.reverse();
250    for b in buf {
251        out.push(b as char);
252    }
253}
254
255/// Appends `n` zero-padded to `width` into `out`.
256fn push_padded(out: &mut String, n: u32, width: usize) {
257    let mut buf: Vec<u8> = Vec::new();
258    let mut v = n;
259    if v == 0 {
260        buf.push(b'0');
261    } else {
262        while v > 0 {
263            buf.push(b'0' + (v % 10) as u8);
264            v /= 10;
265        }
266    }
267    buf.reverse();
268    let pad = width.saturating_sub(buf.len());
269    for _ in 0..pad {
270        out.push('0');
271    }
272    for b in buf {
273        out.push(b as char);
274    }
275}
276
277/// Infers era from a bare year number.
278fn infer_era(year: u32) -> Era {
279    if year >= 2300 {
280        Era::Buddhist
281    } else {
282        Era::Gregorian
283    }
284}
285
286/// Parses `D/M/Y` or `D-M-Y` (separator can be `/` or `-`).
287fn parse_numeric(s: &str) -> Option<ThaiDate> {
288    let sep = if s.contains('/') {
289        '/'
290    } else if s.contains('-') {
291        '-'
292    } else {
293        return None;
294    };
295
296    let parts: Vec<&str> = s.splitn(3, sep).collect();
297    if parts.len() != 3 {
298        return None;
299    }
300    let day: u8 = parts[0].trim().parse().ok()?;
301    let month: u8 = parts[1].trim().parse().ok()?;
302    let year: u32 = parts[2].trim().parse().ok()?;
303
304    if !(1..=31).contains(&day) || !(1..=12).contains(&month) {
305        return None;
306    }
307    let era = infer_era(year);
308    Some(ThaiDate {
309        day,
310        month,
311        year,
312        era,
313    })
314}
315
316/// Parses word-based formats like `5 กรกฎาคม 2567` or `วันที่ 5 ก.ค. พ.ศ. 2567`.
317fn parse_word_based(s: &str) -> Option<ThaiDate> {
318    // Strip optional วันที่ prefix (14 UTF-8 bytes — 5 chars × 3 bytes + space × 1 byte? No, วันที่ = 4 Thai chars = 12 bytes, plus space)
319    let s = s.strip_prefix("วันที่").map(|t| t.trim_start()).unwrap_or(s);
320
321    // Consume leading digits (day)
322    let (day_str, rest) = take_digits(s);
323    if day_str.is_empty() {
324        return None;
325    }
326    let day: u8 = day_str.parse().ok()?;
327    if !(1..=31).contains(&day) {
328        return None;
329    }
330
331    let rest = rest.trim_start();
332
333    // Match month name (greedy longest-first)
334    let (month, rest) = match_month(rest)?;
335
336    let rest = rest.trim_start();
337
338    // Optional era marker
339    let (era_opt, rest) = parse_era_marker(rest);
340    let rest = rest.trim_start();
341
342    // Year
343    let (year_str, _) = take_digits(rest);
344    if year_str.is_empty() {
345        return None;
346    }
347    let year: u32 = year_str.parse().ok()?;
348
349    let era = era_opt.unwrap_or_else(|| infer_era(year));
350
351    Some(ThaiDate {
352        day,
353        month,
354        year,
355        era,
356    })
357}
358
359/// Consumes leading ASCII digit characters and returns `(digits, remainder)`.
360fn take_digits(s: &str) -> (&str, &str) {
361    let end = s
362        .char_indices()
363        .take_while(|(_, c)| c.is_ascii_digit())
364        .last()
365        .map(|(i, c)| i + c.len_utf8())
366        .unwrap_or(0);
367    (&s[..end], &s[end..])
368}
369
370/// Tries to match a month name at the start of `s` (longest-first).
371/// Returns `(month_number, remainder)` or `None`.
372fn match_month(s: &str) -> Option<(u8, &str)> {
373    for &(pattern, month) in MONTHS_BY_LEN {
374        if let Some(rest) = s.strip_prefix(pattern) {
375            return Some((month, rest));
376        }
377    }
378    None
379}
380
381/// Strips a leading era marker (`พ.ศ.` or `ค.ศ.`) and returns `(Some(era), rest)`.
382/// If no marker is found returns `(None, original)`.
383fn parse_era_marker(s: &str) -> (Option<Era>, &str) {
384    if let Some(rest) = s.strip_prefix("พ.ศ.") {
385        return (Some(Era::Buddhist), rest);
386    }
387    if let Some(rest) = s.strip_prefix("ค.ศ.") {
388        return (Some(Era::Gregorian), rest);
389    }
390    (None, s)
391}
392
393// ── Tests ─────────────────────────────────────────────────────────────────────
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398
399    // ── Era conversion ───────────────────────────────────────────────────────
400
401    #[test]
402    fn buddhist_to_gregorian() {
403        let d = ThaiDate {
404            day: 1,
405            month: 1,
406            year: 2567,
407            era: Era::Buddhist,
408        };
409        assert_eq!(d.gregorian_year(), 2024);
410    }
411
412    #[test]
413    fn gregorian_to_buddhist() {
414        let d = ThaiDate {
415            day: 1,
416            month: 1,
417            year: 2024,
418            era: Era::Gregorian,
419        };
420        assert_eq!(d.buddhist_year(), 2567);
421    }
422
423    // ── ISO 8601 formatting ──────────────────────────────────────────────────
424
425    #[test]
426    fn iso8601_buddhist() {
427        let d = ThaiDate {
428            day: 5,
429            month: 7,
430            year: 2567,
431            era: Era::Buddhist,
432        };
433        assert_eq!(d.to_iso8601(), "2024-07-05");
434    }
435
436    #[test]
437    fn iso8601_single_digit_day_month() {
438        let d = ThaiDate {
439            day: 3,
440            month: 3,
441            year: 2567,
442            era: Era::Buddhist,
443        };
444        assert_eq!(d.to_iso8601(), "2024-03-03");
445    }
446
447    #[test]
448    fn iso8601_gregorian() {
449        let d = ThaiDate {
450            day: 1,
451            month: 1,
452            year: 2024,
453            era: Era::Gregorian,
454        };
455        assert_eq!(d.to_iso8601(), "2024-01-01");
456    }
457
458    // ── Thai text formatting ─────────────────────────────────────────────────
459
460    #[test]
461    fn to_thai_text_basic() {
462        let d = ThaiDate {
463            day: 5,
464            month: 7,
465            year: 2567,
466            era: Era::Buddhist,
467        };
468        assert_eq!(d.to_thai_text(), "5 กรกฎาคม พ.ศ. 2567");
469    }
470
471    #[test]
472    fn to_thai_text_gregorian_converts_to_be() {
473        let d = ThaiDate {
474            day: 1,
475            month: 1,
476            year: 2024,
477            era: Era::Gregorian,
478        };
479        assert_eq!(d.to_thai_text(), "1 มกราคม พ.ศ. 2567");
480    }
481
482    // ── format_thai_date ─────────────────────────────────────────────────────
483
484    #[test]
485    fn format_thai_date_valid() {
486        assert_eq!(
487            format_thai_date(2567, 7, 5),
488            Some(String::from("5 กรกฎาคม พ.ศ. 2567"))
489        );
490    }
491
492    #[test]
493    fn format_thai_date_invalid_month() {
494        assert_eq!(format_thai_date(2567, 0, 1), None);
495        assert_eq!(format_thai_date(2567, 13, 1), None);
496    }
497
498    #[test]
499    fn format_thai_date_invalid_day() {
500        assert_eq!(format_thai_date(2567, 1, 0), None);
501        assert_eq!(format_thai_date(2567, 1, 32), None);
502    }
503
504    // ── parse_thai_date: full month name ─────────────────────────────────────
505
506    #[test]
507    fn parse_full_month_name_be() {
508        let d = parse_thai_date("5 กรกฎาคม 2567").unwrap();
509        assert_eq!(d.day, 5);
510        assert_eq!(d.month, 7);
511        assert_eq!(d.year, 2567);
512        assert_eq!(d.era, Era::Buddhist);
513    }
514
515    #[test]
516    fn parse_full_month_explicit_be_marker() {
517        let d = parse_thai_date("5 กรกฎาคม พ.ศ. 2567").unwrap();
518        assert_eq!(d.era, Era::Buddhist);
519        assert_eq!(d.year, 2567);
520    }
521
522    #[test]
523    fn parse_full_month_explicit_ce_marker() {
524        let d = parse_thai_date("5 กรกฎาคม ค.ศ. 2024").unwrap();
525        assert_eq!(d.era, Era::Gregorian);
526        assert_eq!(d.year, 2024);
527    }
528
529    #[test]
530    fn parse_wanthi_prefix() {
531        let d = parse_thai_date("วันที่ 5 กรกฎาคม 2567").unwrap();
532        assert_eq!(d.day, 5);
533        assert_eq!(d.month, 7);
534        assert_eq!(d.year, 2567);
535    }
536
537    #[test]
538    fn parse_wanthi_prefix_with_era() {
539        let d = parse_thai_date("วันที่ 5 กรกฎาคม พ.ศ. 2567").unwrap();
540        assert_eq!(d.era, Era::Buddhist);
541    }
542
543    // ── parse_thai_date: abbreviated month ──────────────────────────────────
544
545    #[test]
546    fn parse_abbreviated_month() {
547        let d = parse_thai_date("5 ก.ค. 2567").unwrap();
548        assert_eq!(d.day, 5);
549        assert_eq!(d.month, 7);
550        assert_eq!(d.year, 2567);
551    }
552
553    #[test]
554    fn parse_abbreviated_month_with_era() {
555        let d = parse_thai_date("5 ก.ค. พ.ศ. 2567").unwrap();
556        assert_eq!(d.era, Era::Buddhist);
557        assert_eq!(d.month, 7);
558    }
559
560    #[test]
561    fn parse_all_abbreviated_months() {
562        let cases: &[(&str, u8)] = &[
563            ("5 ม.ค. 2567", 1),
564            ("5 ก.พ. 2567", 2),
565            ("5 มี.ค. 2567", 3),
566            ("5 เม.ย. 2567", 4),
567            ("5 พ.ค. 2567", 5),
568            ("5 มิ.ย. 2567", 6),
569            ("5 ก.ค. 2567", 7),
570            ("5 ส.ค. 2567", 8),
571            ("5 ก.ย. 2567", 9),
572            ("5 ต.ค. 2567", 10),
573            ("5 พ.ย. 2567", 11),
574            ("5 ธ.ค. 2567", 12),
575        ];
576        for &(input, expected_month) in cases {
577            let d = parse_thai_date(input).unwrap_or_else(|| panic!("failed to parse: {input}"));
578            assert_eq!(d.month, expected_month, "month mismatch for: {input}");
579        }
580    }
581
582    #[test]
583    fn parse_all_full_months() {
584        let cases: &[(&str, u8)] = &[
585            ("1 มกราคม 2567", 1),
586            ("1 กุมภาพันธ์ 2567", 2),
587            ("1 มีนาคม 2567", 3),
588            ("1 เมษายน 2567", 4),
589            ("1 พฤษภาคม 2567", 5),
590            ("1 มิถุนายน 2567", 6),
591            ("1 กรกฎาคม 2567", 7),
592            ("1 สิงหาคม 2567", 8),
593            ("1 กันยายน 2567", 9),
594            ("1 ตุลาคม 2567", 10),
595            ("1 พฤศจิกายน 2567", 11),
596            ("1 ธันวาคม 2567", 12),
597        ];
598        for &(input, expected_month) in cases {
599            let d = parse_thai_date(input).unwrap_or_else(|| panic!("failed to parse: {input}"));
600            assert_eq!(d.month, expected_month, "month mismatch for: {input}");
601        }
602    }
603
604    // ── parse_thai_date: numeric formats ────────────────────────────────────
605
606    #[test]
607    fn parse_slash_separated() {
608        let d = parse_thai_date("5/7/2567").unwrap();
609        assert_eq!(d.day, 5);
610        assert_eq!(d.month, 7);
611        assert_eq!(d.year, 2567);
612        assert_eq!(d.era, Era::Buddhist);
613        assert_eq!(d.to_iso8601(), "2024-07-05");
614    }
615
616    #[test]
617    fn parse_dash_separated() {
618        let d = parse_thai_date("5-7-2567").unwrap();
619        assert_eq!(d.day, 5);
620        assert_eq!(d.month, 7);
621        assert_eq!(d.year, 2567);
622    }
623
624    #[test]
625    fn parse_numeric_gregorian_year() {
626        let d = parse_thai_date("1/1/2024").unwrap();
627        assert_eq!(d.era, Era::Gregorian);
628        assert_eq!(d.year, 2024);
629    }
630
631    #[test]
632    fn parse_numeric_invalid_month() {
633        assert!(parse_thai_date("5/13/2567").is_none());
634        assert!(parse_thai_date("5/0/2567").is_none());
635    }
636
637    #[test]
638    fn parse_numeric_invalid_day() {
639        assert!(parse_thai_date("32/1/2567").is_none());
640        assert!(parse_thai_date("0/1/2567").is_none());
641    }
642
643    // ── parse_thai_date: Thai digits ────────────────────────────────────────
644
645    #[test]
646    fn parse_thai_digits_abbreviated_month() {
647        let d = parse_thai_date("๕ ก.ค. ๒๕๖๗").unwrap();
648        assert_eq!(d.day, 5);
649        assert_eq!(d.month, 7);
650        assert_eq!(d.year, 2567);
651    }
652
653    #[test]
654    fn parse_thai_digits_full_month() {
655        let d = parse_thai_date("๕ กรกฎาคม ๒๕๖๗").unwrap();
656        assert_eq!(d.day, 5);
657        assert_eq!(d.month, 7);
658        assert_eq!(d.year, 2567);
659    }
660
661    #[test]
662    fn parse_thai_digits_numeric() {
663        let d = parse_thai_date("๕/๗/๒๕๖๗").unwrap();
664        assert_eq!(d.day, 5);
665        assert_eq!(d.month, 7);
666        assert_eq!(d.year, 2567);
667    }
668
669    // ── Era inference ─────────────────────────────────────────────────────────
670
671    #[test]
672    fn infer_era_buddhist() {
673        assert_eq!(infer_era(2567), Era::Buddhist);
674        assert_eq!(infer_era(2300), Era::Buddhist);
675    }
676
677    #[test]
678    fn infer_era_gregorian() {
679        assert_eq!(infer_era(2024), Era::Gregorian);
680        assert_eq!(infer_era(1999), Era::Gregorian);
681    }
682
683    // ── Edge cases ────────────────────────────────────────────────────────────
684
685    #[test]
686    fn parse_empty_returns_none() {
687        assert!(parse_thai_date("").is_none());
688        assert!(parse_thai_date("   ").is_none());
689    }
690
691    #[test]
692    fn parse_garbage_returns_none() {
693        assert!(parse_thai_date("hello world").is_none());
694        assert!(parse_thai_date("กินข้าว").is_none());
695    }
696
697    #[test]
698    fn roundtrip_iso8601() {
699        let d = parse_thai_date("5 กรกฎาคม 2567").unwrap();
700        assert_eq!(d.to_iso8601(), "2024-07-05");
701    }
702
703    #[test]
704    fn roundtrip_thai_text() {
705        let d = parse_thai_date("5/7/2567").unwrap();
706        assert_eq!(d.to_thai_text(), "5 กรกฎาคม พ.ศ. 2567");
707    }
708
709    #[test]
710    fn leading_trailing_whitespace() {
711        let d = parse_thai_date("  5 กรกฎาคม 2567  ").unwrap();
712        assert_eq!(d.day, 5);
713    }
714}