Skip to main content

uni_btic/
parse.rs

1use crate::btic::Btic;
2use crate::certainty::Certainty;
3use crate::error::BticError;
4use crate::granularity::Granularity;
5use chrono::{Datelike, NaiveDate, NaiveDateTime};
6
7/// Parse a BTIC literal string into a `Btic` value.
8///
9/// Supported forms (per spec section 13.5):
10/// - Single granular: `"1985"`, `"1985-03"`, `"1985-03-15"`, `"1985-03-15T14:30Z"`
11/// - Two-bound solidus: `"1985-03/2024-06"`, `"1985/2024-06-15"`
12/// - Unbounded: `"2020-03/"`, `"/2024-06"`, `"/"`
13/// - Certainty prefixes: `"~1985"` (approximate), `"?1985"` (uncertain), `"??1985"` (unknown)
14/// - BCE dates: `"500 BCE"`
15pub fn parse_btic_literal(s: &str) -> Result<Btic, BticError> {
16    let s = s.trim();
17
18    if s.is_empty() {
19        return Err(BticError::ParseError("empty literal".into()));
20    }
21
22    // Check for solidus (interval notation)
23    if let Some(slash_pos) = s.find('/') {
24        let left = &s[..slash_pos];
25        let right = &s[slash_pos + 1..];
26        return parse_two_bound(left, right);
27    }
28
29    // Single granular expression
30    parse_single(s)
31}
32
33/// Parse a two-bound interval (e.g., "1985-03/2024-06", "2020-03/", "/2024-06", "/").
34fn parse_two_bound(left: &str, right: &str) -> Result<Btic, BticError> {
35    let left = left.trim();
36    let right = right.trim();
37
38    let (lo, lo_gran, lo_cert) = if left.is_empty() {
39        // Left-unbounded
40        (i64::MIN, Granularity::Millisecond, Certainty::Definite)
41    } else {
42        parse_component(left)?
43    };
44
45    let (hi_raw, hi_gran, hi_cert) = if right.is_empty() {
46        // Right-unbounded
47        (i64::MAX, Granularity::Millisecond, Certainty::Definite)
48    } else {
49        let (lo_ms, gran, cert) = parse_component(right)?;
50        let hi_ms = expand_granularity(lo_ms, gran)?;
51        (hi_ms, gran, cert)
52    };
53
54    // Sentinel bounds already carry zeroed granularity/certainty from the
55    // unbounded branches above, so build_meta handles all cases uniformly.
56    let meta = Btic::build_meta(lo_gran, hi_gran, lo_cert, hi_cert);
57    Btic::new(lo, hi_raw, meta)
58}
59
60/// Parse a single granular expression (e.g., "1985", "1985-03-15", "~500 BCE").
61/// Both bounds are derived from the same expression.
62fn parse_single(s: &str) -> Result<Btic, BticError> {
63    let (lo, gran, cert) = parse_component(s)?;
64    let hi = expand_granularity(lo, gran)?;
65
66    let meta = Btic::build_meta(gran, gran, cert, cert);
67    Btic::new(lo, hi, meta)
68}
69
70/// Parse a single temporal component, returning (lo_ms, granularity, certainty).
71///
72/// Handles certainty prefixes (`~`, `?`, `??`) and BCE suffix.
73fn parse_component(s: &str) -> Result<(i64, Granularity, Certainty), BticError> {
74    let s = s.trim();
75    let (s, certainty) = strip_certainty_prefix(s);
76    let s = s.trim();
77
78    // Check for BCE suffix
79    if let Some(bce_s) = strip_bce_suffix(s) {
80        return parse_bce_year(bce_s.trim(), certainty);
81    }
82
83    parse_iso_component(s, certainty)
84}
85
86/// Strip certainty prefix from a string, returning (remaining, certainty).
87fn strip_certainty_prefix(s: &str) -> (&str, Certainty) {
88    if let Some(rest) = s.strip_prefix("??") {
89        (rest, Certainty::Unknown)
90    } else if let Some(rest) = s.strip_prefix('~') {
91        (rest, Certainty::Approximate)
92    } else if let Some(rest) = s.strip_prefix('?') {
93        (rest, Certainty::Uncertain)
94    } else {
95        (s, Certainty::Definite)
96    }
97}
98
99/// Check for and strip a "BCE" suffix (case-insensitive), tolerating an
100/// optional space before it (e.g. "500 BCE" or "500BCE").
101fn strip_bce_suffix(s: &str) -> Option<&str> {
102    // `len - 3` is a byte index: without the char-boundary guard, a
103    // multi-byte UTF-8 character straddling it makes the slice panic
104    // (fuzz-found via `parse_btic_literal("Ҫ[?")`).
105    if s.len() >= 3
106        && s.is_char_boundary(s.len() - 3)
107        && s[s.len() - 3..].eq_ignore_ascii_case("BCE")
108    {
109        Some(s[..s.len() - 3].trim_end())
110    } else {
111        None
112    }
113}
114
115/// Parse a BCE year like "500" into astronomical year -499.
116fn parse_bce_year(
117    s: &str,
118    certainty: Certainty,
119) -> Result<(i64, Granularity, Certainty), BticError> {
120    let year: i32 = s
121        .trim()
122        .parse()
123        .map_err(|e| BticError::ParseError(format!("invalid BCE year '{s}': {e}")))?;
124    if year <= 0 {
125        return Err(BticError::ParseError(format!(
126            "BCE year must be positive, got {year}"
127        )));
128    }
129    // Astronomical year: 1 BCE = year 0, 2 BCE = year -1, etc.
130    let astro_year = -(year - 1);
131    let lo_ms = year_to_ms(astro_year)?;
132    Ok((lo_ms, Granularity::Year, certainty))
133}
134
135/// Parse an ISO 8601 component and determine its granularity.
136fn parse_iso_component(
137    s: &str,
138    certainty: Certainty,
139) -> Result<(i64, Granularity, Certainty), BticError> {
140    // Try from most specific to least specific
141
142    // Full datetime with time component (contains 'T')
143    if s.contains('T') {
144        return parse_datetime_component(s, certainty);
145    }
146
147    // Date-only forms: YYYY-MM-DD, YYYY-MM, YYYY
148    parse_date_only_component(s, certainty)
149}
150
151/// Parse a datetime string (contains 'T').
152fn parse_datetime_component(
153    s: &str,
154    certainty: Certainty,
155) -> Result<(i64, Granularity, Certainty), BticError> {
156    // Strip the trailing 'Z' or timezone offset, then apply the offset so the
157    // resulting timestamp is anchored to UTC.
158    let (s_clean, tz_offset_secs) = strip_timezone(s);
159
160    // Try parsing from most specific precision to least. Each format carries the
161    // granularity it implies; because chrono requires the whole input to match,
162    // a string with a fractional part will not match the fraction-free seconds
163    // format and so falls through to a millisecond format.
164    //
165    // - Millisecond: 2024-06-15T14:30:00.000
166    // - Second:      2024-06-15T14:30:00
167    // - Minute:      2024-06-15T14:30
168    // - Hour:        2024-06-15T14
169    let formats_and_gran = [
170        ("%Y-%m-%dT%H:%M:%S", Granularity::Second),
171        ("%Y-%m-%dT%H:%M:%S%.3f", Granularity::Millisecond),
172        ("%Y-%m-%dT%H:%M:%S%.f", Granularity::Millisecond),
173        ("%Y-%m-%dT%H:%M", Granularity::Minute),
174        ("%Y-%m-%dT%H", Granularity::Hour),
175    ];
176
177    for (fmt, gran) in &formats_and_gran {
178        if let Ok(ndt) = NaiveDateTime::parse_from_str(s_clean, fmt) {
179            let ms = datetime_to_ms(ndt) - (tz_offset_secs as i64) * 1_000;
180            return Ok((ms, *gran, certainty));
181        }
182    }
183
184    Err(BticError::ParseError(format!(
185        "cannot parse datetime '{s}'"
186    )))
187}
188
189/// Parse a date-only component: YYYY-MM-DD, YYYY-MM, YYYY.
190fn parse_date_only_component(
191    s: &str,
192    certainty: Certainty,
193) -> Result<(i64, Granularity, Certainty), BticError> {
194    let parts: Vec<&str> = s.split('-').collect();
195
196    match parts.len() {
197        3 => {
198            // YYYY-MM-DD
199            let date = NaiveDate::parse_from_str(s, "%Y-%m-%d")
200                .map_err(|e| BticError::ParseError(format!("invalid date '{s}': {e}")))?;
201            let ms = date_to_ms(date);
202            Ok((ms, Granularity::Day, certainty))
203        }
204        2 => {
205            // YYYY-MM
206            let year: i32 = parts[0]
207                .parse()
208                .map_err(|e| BticError::ParseError(format!("invalid year in '{s}': {e}")))?;
209            let month: u32 = parts[1]
210                .parse()
211                .map_err(|e| BticError::ParseError(format!("invalid month in '{s}': {e}")))?;
212            if !(1..=12).contains(&month) {
213                return Err(BticError::ParseError(format!(
214                    "month {month} out of range 1-12"
215                )));
216            }
217            let date = NaiveDate::from_ymd_opt(year, month, 1).ok_or_else(|| {
218                BticError::ParseError(format!("invalid date {year}-{month:02}-01"))
219            })?;
220            let ms = date_to_ms(date);
221            Ok((ms, Granularity::Month, certainty))
222        }
223        1 => {
224            // YYYY (just a year)
225            let year: i32 = parts[0]
226                .parse()
227                .map_err(|e| BticError::ParseError(format!("invalid year '{s}': {e}")))?;
228            let ms = year_to_ms(year)?;
229            Ok((ms, Granularity::Year, certainty))
230        }
231        _ => Err(BticError::ParseError(format!(
232            "cannot parse date component '{s}'"
233        ))),
234    }
235}
236
237/// Strip timezone suffix from a datetime string, returning (cleaned, offset_secs).
238fn strip_timezone(s: &str) -> (&str, i32) {
239    if let Some(stripped) = s.strip_suffix('Z') {
240        return (stripped, 0);
241    }
242    if let Some(stripped) = s.strip_suffix('z') {
243        return (stripped, 0);
244    }
245
246    // Look for +HH:MM or -HH:MM at the end
247    let bytes = s.as_bytes();
248    if bytes.len() >= 6 {
249        let sign_pos = bytes.len() - 6;
250        if (bytes[sign_pos] == b'+' || bytes[sign_pos] == b'-') && bytes[sign_pos + 3] == b':' {
251            let sign = if bytes[sign_pos] == b'+' { 1 } else { -1 };
252            if let (Ok(h), Ok(m)) = (
253                s[sign_pos + 1..sign_pos + 3].parse::<i32>(),
254                s[sign_pos + 4..sign_pos + 6].parse::<i32>(),
255            ) {
256                let offset = sign * (h * 3600 + m * 60);
257                return (&s[..sign_pos], offset);
258            }
259        }
260    }
261
262    (s, 0)
263}
264
265/// Convert a NaiveDate to milliseconds since epoch.
266fn date_to_ms(date: NaiveDate) -> i64 {
267    let dt = date.and_hms_opt(0, 0, 0).unwrap();
268    datetime_to_ms(dt)
269}
270
271/// Convert a NaiveDateTime to milliseconds since epoch.
272fn datetime_to_ms(dt: NaiveDateTime) -> i64 {
273    dt.and_utc().timestamp_millis()
274}
275
276/// Convert an astronomical year to milliseconds since epoch (start of year).
277fn year_to_ms(year: i32) -> Result<i64, BticError> {
278    let date = NaiveDate::from_ymd_opt(year, 1, 1)
279        .ok_or_else(|| BticError::ParseError(format!("year {year} out of range")))?;
280    Ok(date_to_ms(date))
281}
282
283/// Expand a lower-bound ms timestamp by one unit of the given granularity
284/// to produce the upper bound. Uses calendar-aware arithmetic for variable-width units.
285fn expand_granularity(lo_ms: i64, gran: Granularity) -> Result<i64, BticError> {
286    match gran {
287        Granularity::Millisecond => Ok(lo_ms + 1),
288        Granularity::Second => Ok(lo_ms + 1_000),
289        Granularity::Minute => Ok(lo_ms + 60_000),
290        Granularity::Hour => Ok(lo_ms + 3_600_000),
291        Granularity::Day => Ok(lo_ms + 86_400_000),
292        // Variable-width calendar units require chrono
293        Granularity::Month => expand_months(lo_ms, 1),
294        Granularity::Quarter => expand_months(lo_ms, 3),
295        Granularity::Year => expand_years(lo_ms, 1),
296        Granularity::Decade => expand_years(lo_ms, 10),
297        Granularity::Century => expand_years(lo_ms, 100),
298        Granularity::Millennium => expand_years(lo_ms, 1000),
299    }
300}
301
302/// Add N months to a timestamp (calendar-aware).
303fn expand_months(lo_ms: i64, months: i32) -> Result<i64, BticError> {
304    let dt = ms_to_datetime(lo_ms)?;
305    let date = dt.date();
306
307    let mut year = date.year();
308    let mut month = date.month() as i32 + months;
309    while month > 12 {
310        month -= 12;
311        year += 1;
312    }
313    while month < 1 {
314        month += 12;
315        year -= 1;
316    }
317
318    let next_date = NaiveDate::from_ymd_opt(year, month as u32, 1)
319        .ok_or_else(|| BticError::ParseError(format!("date overflow: {year}-{month:02}-01")))?;
320    Ok(date_to_ms(next_date))
321}
322
323/// Add N years to a timestamp (calendar-aware).
324fn expand_years(lo_ms: i64, years: i32) -> Result<i64, BticError> {
325    let dt = ms_to_datetime(lo_ms)?;
326    let date = dt.date();
327    let next_date = NaiveDate::from_ymd_opt(date.year() + years, 1, 1).ok_or_else(|| {
328        BticError::ParseError(format!("date overflow: year {}", date.year() + years))
329    })?;
330    Ok(date_to_ms(next_date))
331}
332
333/// Convert milliseconds since epoch to a NaiveDateTime.
334fn ms_to_datetime(ms: i64) -> Result<NaiveDateTime, BticError> {
335    let secs = ms.div_euclid(1000);
336    let nsecs = (ms.rem_euclid(1000) * 1_000_000) as u32;
337    chrono::DateTime::from_timestamp(secs, nsecs)
338        .map(|dt| dt.naive_utc())
339        .ok_or_else(|| BticError::ParseError(format!("timestamp {ms}ms out of range")))
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345
346    fn assert_btic(
347        s: &str,
348        expected_lo: i64,
349        expected_hi: i64,
350        lo_gran: Granularity,
351        hi_gran: Granularity,
352    ) {
353        let b = parse_btic_literal(s).unwrap_or_else(|e| panic!("parse '{s}' failed: {e}"));
354        assert_eq!(b.lo(), expected_lo, "lo mismatch for '{s}'");
355        assert_eq!(b.hi(), expected_hi, "hi mismatch for '{s}'");
356        assert_eq!(b.lo_granularity(), lo_gran, "lo_gran mismatch for '{s}'");
357        assert_eq!(b.hi_granularity(), hi_gran, "hi_gran mismatch for '{s}'");
358    }
359
360    #[test]
361    fn year_1985() {
362        assert_btic(
363            "1985",
364            473_385_600_000,
365            504_921_600_000,
366            Granularity::Year,
367            Granularity::Year,
368        );
369    }
370
371    #[test]
372    fn month_march_1985() {
373        assert_btic(
374            "1985-03",
375            478_483_200_000,
376            481_161_600_000,
377            Granularity::Month,
378            Granularity::Month,
379        );
380    }
381
382    #[test]
383    fn day_1985_03_15() {
384        assert_btic(
385            "1985-03-15",
386            479_692_800_000,
387            479_779_200_000,
388            Granularity::Day,
389            Granularity::Day,
390        );
391    }
392
393    #[test]
394    fn epoch_instant() {
395        let b = parse_btic_literal("1970-01-01T00:00:00.000Z").unwrap();
396        assert_eq!(b.lo(), 0);
397        assert_eq!(b.hi(), 1);
398        assert!(b.is_instant());
399        assert_eq!(b.lo_granularity(), Granularity::Millisecond);
400    }
401
402    #[test]
403    fn two_bound_solidus() {
404        let b = parse_btic_literal("1985-03/2024-06").unwrap();
405        assert_eq!(b.lo(), 478_483_200_000); // 1985-03-01
406        assert_eq!(b.hi(), 1_719_792_000_000); // 2024-07-01
407        assert_eq!(b.lo_granularity(), Granularity::Month);
408        assert_eq!(b.hi_granularity(), Granularity::Month);
409    }
410
411    #[test]
412    fn mixed_granularity_solidus() {
413        let b = parse_btic_literal("1985-03/2024-06-15").unwrap();
414        assert_eq!(b.lo(), 478_483_200_000); // 1985-03-01
415        assert_eq!(b.hi(), 1_718_496_000_000); // 2024-06-16
416        assert_eq!(b.lo_granularity(), Granularity::Month);
417        assert_eq!(b.hi_granularity(), Granularity::Day);
418    }
419
420    #[test]
421    fn right_unbounded() {
422        let b = parse_btic_literal("2020-03/").unwrap();
423        assert_eq!(b.lo(), 1_583_020_800_000); // 2020-03-01
424        assert_eq!(b.hi(), i64::MAX);
425        assert!(b.is_unbounded());
426        assert_eq!(b.lo_granularity(), Granularity::Month);
427    }
428
429    #[test]
430    fn left_unbounded() {
431        let b = parse_btic_literal("/2024-06").unwrap();
432        assert_eq!(b.lo(), i64::MIN);
433        assert_eq!(b.hi(), 1_719_792_000_000); // 2024-07-01
434    }
435
436    #[test]
437    fn fully_unbounded() {
438        let b = parse_btic_literal("/").unwrap();
439        assert_eq!(b.lo(), i64::MIN);
440        assert_eq!(b.hi(), i64::MAX);
441        assert_eq!(b.meta(), 0);
442    }
443
444    #[test]
445    fn certainty_approximate() {
446        let b = parse_btic_literal("~1985").unwrap();
447        assert_eq!(b.lo_certainty(), Certainty::Approximate);
448        assert_eq!(b.hi_certainty(), Certainty::Approximate);
449    }
450
451    #[test]
452    fn certainty_uncertain() {
453        let b = parse_btic_literal("?1985").unwrap();
454        assert_eq!(b.lo_certainty(), Certainty::Uncertain);
455        assert_eq!(b.hi_certainty(), Certainty::Uncertain);
456    }
457
458    #[test]
459    fn certainty_unknown() {
460        let b = parse_btic_literal("??1985").unwrap();
461        assert_eq!(b.lo_certainty(), Certainty::Unknown);
462        assert_eq!(b.hi_certainty(), Certainty::Unknown);
463    }
464
465    #[test]
466    fn mixed_certainty_solidus() {
467        let b = parse_btic_literal("~1985/2024-06").unwrap();
468        assert_eq!(b.lo_certainty(), Certainty::Approximate);
469        assert_eq!(b.hi_certainty(), Certainty::Definite);
470    }
471
472    #[test]
473    fn bce_date() {
474        let b = parse_btic_literal("500 BCE").unwrap();
475        // Astronomical year -499
476        assert_eq!(b.lo_granularity(), Granularity::Year);
477        assert_eq!(b.hi_granularity(), Granularity::Year);
478        // Verify it's a year-long interval
479        assert!(b.duration_ms().unwrap() > 0);
480    }
481
482    #[test]
483    fn approximate_bce() {
484        let b = parse_btic_literal("~500 BCE").unwrap();
485        assert_eq!(b.lo_certainty(), Certainty::Approximate);
486        assert_eq!(b.hi_certainty(), Certainty::Approximate);
487        assert_eq!(b.lo_granularity(), Granularity::Year);
488    }
489
490    #[test]
491    fn second_granularity() {
492        let b = parse_btic_literal("1985-03-15T14:30:00Z").unwrap();
493        assert_eq!(b.lo_granularity(), Granularity::Second);
494        assert_eq!(b.duration_ms(), Some(1000));
495    }
496
497    #[test]
498    fn minute_granularity() {
499        let b = parse_btic_literal("1985-03-15T14:30Z").unwrap();
500        assert_eq!(b.lo_granularity(), Granularity::Minute);
501        assert_eq!(b.duration_ms(), Some(60_000));
502    }
503
504    #[test]
505    fn empty_literal_rejected() {
506        assert!(parse_btic_literal("").is_err());
507    }
508
509    #[test]
510    fn invalid_literal_rejected() {
511        assert!(parse_btic_literal("not-a-date").is_err());
512    }
513
514    /// Fuzz-found (2026-06-10): a multi-byte UTF-8 character straddling the
515    /// `len - 3` byte index panicked the BCE-suffix check instead of
516    /// returning a parse error. User-reachable via any BTIC literal.
517    #[test]
518    fn multibyte_utf8_near_bce_suffix_is_rejected_not_panicking() {
519        assert!(parse_btic_literal("Ҫ[?").is_err());
520        assert!(parse_btic_literal("12Ҫ").is_err());
521        assert!(parse_btic_literal("ҪҪ").is_err());
522        assert!(parse_btic_literal("é").is_err());
523    }
524}