Skip to main content

uni_btic/
parse.rs

1use crate::btic::Btic;
2use crate::certainty::Certainty;
3use crate::error::BticError;
4use crate::granularity::Granularity;
5use chrono::{Datelike, NaiveDate, NaiveDateTime};
6
7/// Parse a BTIC literal string into a `Btic` value.
8///
9/// Supported forms (per spec section 13.5):
10/// - Single granular: `"1985"`, `"1985-03"`, `"1985-03-15"`, `"1985-03-15T14:30Z"`
11/// - Two-bound solidus: `"1985-03/2024-06"`, `"1985/2024-06-15"`
12/// - Unbounded: `"2020-03/"`, `"/2024-06"`, `"/"`
13/// - Certainty prefixes: `"~1985"` (approximate), `"?1985"` (uncertain), `"??1985"` (unknown)
14/// - BCE dates: `"500 BCE"`
15pub fn parse_btic_literal(s: &str) -> Result<Btic, BticError> {
16    let s = s.trim();
17
18    if s.is_empty() {
19        return Err(BticError::ParseError("empty literal".into()));
20    }
21
22    // Check for solidus (interval notation)
23    if let Some(slash_pos) = s.find('/') {
24        let left = &s[..slash_pos];
25        let right = &s[slash_pos + 1..];
26        return parse_two_bound(left, right);
27    }
28
29    // Single granular expression
30    parse_single(s)
31}
32
33/// Parse a two-bound interval (e.g., "1985-03/2024-06", "2020-03/", "/2024-06", "/").
34fn parse_two_bound(left: &str, right: &str) -> Result<Btic, BticError> {
35    let left = left.trim();
36    let right = right.trim();
37
38    let (lo, lo_gran, lo_cert) = if left.is_empty() {
39        // Left-unbounded
40        (i64::MIN, Granularity::Millisecond, Certainty::Definite)
41    } else {
42        parse_component(left)?
43    };
44
45    let (hi_raw, hi_gran, hi_cert) = if right.is_empty() {
46        // Right-unbounded
47        (i64::MAX, Granularity::Millisecond, Certainty::Definite)
48    } else {
49        let (lo_ms, gran, cert) = parse_component(right)?;
50        let hi_ms = expand_granularity(lo_ms, gran)?;
51        (hi_ms, gran, cert)
52    };
53
54    // Sentinel bounds already carry zeroed granularity/certainty from the
55    // unbounded branches above, so build_meta handles all cases uniformly.
56    let meta = Btic::build_meta(lo_gran, hi_gran, lo_cert, hi_cert);
57    Btic::new(lo, hi_raw, meta)
58}
59
60/// Parse a single granular expression (e.g., "1985", "1985-03-15", "~500 BCE").
61/// Both bounds are derived from the same expression.
62fn parse_single(s: &str) -> Result<Btic, BticError> {
63    let (lo, gran, cert) = parse_component(s)?;
64    let hi = expand_granularity(lo, gran)?;
65
66    let meta = Btic::build_meta(gran, gran, cert, cert);
67    Btic::new(lo, hi, meta)
68}
69
70/// Parse a single temporal component, returning (lo_ms, granularity, certainty).
71///
72/// Handles certainty prefixes (`~`, `?`, `??`) and BCE suffix.
73fn parse_component(s: &str) -> Result<(i64, Granularity, Certainty), BticError> {
74    let s = s.trim();
75    let (s, certainty) = strip_certainty_prefix(s);
76    let s = s.trim();
77
78    // Check for BCE suffix
79    if let Some(bce_s) = strip_bce_suffix(s) {
80        return parse_bce_year(bce_s.trim(), certainty);
81    }
82
83    parse_iso_component(s, certainty)
84}
85
86/// Strip certainty prefix from a string, returning (remaining, certainty).
87fn strip_certainty_prefix(s: &str) -> (&str, Certainty) {
88    if let Some(rest) = s.strip_prefix("??") {
89        (rest, Certainty::Unknown)
90    } else if let Some(rest) = s.strip_prefix('~') {
91        (rest, Certainty::Approximate)
92    } else if let Some(rest) = s.strip_prefix('?') {
93        (rest, Certainty::Uncertain)
94    } else {
95        (s, Certainty::Definite)
96    }
97}
98
99/// Check for and strip a "BCE" suffix (case-insensitive), tolerating an
100/// optional space before it (e.g. "500 BCE" or "500BCE").
101fn strip_bce_suffix(s: &str) -> Option<&str> {
102    if s.len() >= 3 && s[s.len() - 3..].eq_ignore_ascii_case("BCE") {
103        Some(s[..s.len() - 3].trim_end())
104    } else {
105        None
106    }
107}
108
109/// Parse a BCE year like "500" into astronomical year -499.
110fn parse_bce_year(
111    s: &str,
112    certainty: Certainty,
113) -> Result<(i64, Granularity, Certainty), BticError> {
114    let year: i32 = s
115        .trim()
116        .parse()
117        .map_err(|e| BticError::ParseError(format!("invalid BCE year '{s}': {e}")))?;
118    if year <= 0 {
119        return Err(BticError::ParseError(format!(
120            "BCE year must be positive, got {year}"
121        )));
122    }
123    // Astronomical year: 1 BCE = year 0, 2 BCE = year -1, etc.
124    let astro_year = -(year - 1);
125    let lo_ms = year_to_ms(astro_year)?;
126    Ok((lo_ms, Granularity::Year, certainty))
127}
128
129/// Parse an ISO 8601 component and determine its granularity.
130fn parse_iso_component(
131    s: &str,
132    certainty: Certainty,
133) -> Result<(i64, Granularity, Certainty), BticError> {
134    // Try from most specific to least specific
135
136    // Full datetime with time component (contains 'T')
137    if s.contains('T') {
138        return parse_datetime_component(s, certainty);
139    }
140
141    // Date-only forms: YYYY-MM-DD, YYYY-MM, YYYY
142    parse_date_only_component(s, certainty)
143}
144
145/// Parse a datetime string (contains 'T').
146fn parse_datetime_component(
147    s: &str,
148    certainty: Certainty,
149) -> Result<(i64, Granularity, Certainty), BticError> {
150    // Strip the trailing 'Z' or timezone offset, then apply the offset so the
151    // resulting timestamp is anchored to UTC.
152    let (s_clean, tz_offset_secs) = strip_timezone(s);
153
154    // Try parsing from most specific precision to least. Each format carries the
155    // granularity it implies; because chrono requires the whole input to match,
156    // a string with a fractional part will not match the fraction-free seconds
157    // format and so falls through to a millisecond format.
158    //
159    // - Millisecond: 2024-06-15T14:30:00.000
160    // - Second:      2024-06-15T14:30:00
161    // - Minute:      2024-06-15T14:30
162    // - Hour:        2024-06-15T14
163    let formats_and_gran = [
164        ("%Y-%m-%dT%H:%M:%S", Granularity::Second),
165        ("%Y-%m-%dT%H:%M:%S%.3f", Granularity::Millisecond),
166        ("%Y-%m-%dT%H:%M:%S%.f", Granularity::Millisecond),
167        ("%Y-%m-%dT%H:%M", Granularity::Minute),
168        ("%Y-%m-%dT%H", Granularity::Hour),
169    ];
170
171    for (fmt, gran) in &formats_and_gran {
172        if let Ok(ndt) = NaiveDateTime::parse_from_str(s_clean, fmt) {
173            let ms = datetime_to_ms(ndt) - (tz_offset_secs as i64) * 1_000;
174            return Ok((ms, *gran, certainty));
175        }
176    }
177
178    Err(BticError::ParseError(format!(
179        "cannot parse datetime '{s}'"
180    )))
181}
182
183/// Parse a date-only component: YYYY-MM-DD, YYYY-MM, YYYY.
184fn parse_date_only_component(
185    s: &str,
186    certainty: Certainty,
187) -> Result<(i64, Granularity, Certainty), BticError> {
188    let parts: Vec<&str> = s.split('-').collect();
189
190    match parts.len() {
191        3 => {
192            // YYYY-MM-DD
193            let date = NaiveDate::parse_from_str(s, "%Y-%m-%d")
194                .map_err(|e| BticError::ParseError(format!("invalid date '{s}': {e}")))?;
195            let ms = date_to_ms(date);
196            Ok((ms, Granularity::Day, certainty))
197        }
198        2 => {
199            // YYYY-MM
200            let year: i32 = parts[0]
201                .parse()
202                .map_err(|e| BticError::ParseError(format!("invalid year in '{s}': {e}")))?;
203            let month: u32 = parts[1]
204                .parse()
205                .map_err(|e| BticError::ParseError(format!("invalid month in '{s}': {e}")))?;
206            if !(1..=12).contains(&month) {
207                return Err(BticError::ParseError(format!(
208                    "month {month} out of range 1-12"
209                )));
210            }
211            let date = NaiveDate::from_ymd_opt(year, month, 1).ok_or_else(|| {
212                BticError::ParseError(format!("invalid date {year}-{month:02}-01"))
213            })?;
214            let ms = date_to_ms(date);
215            Ok((ms, Granularity::Month, certainty))
216        }
217        1 => {
218            // YYYY (just a year)
219            let year: i32 = parts[0]
220                .parse()
221                .map_err(|e| BticError::ParseError(format!("invalid year '{s}': {e}")))?;
222            let ms = year_to_ms(year)?;
223            Ok((ms, Granularity::Year, certainty))
224        }
225        _ => Err(BticError::ParseError(format!(
226            "cannot parse date component '{s}'"
227        ))),
228    }
229}
230
231/// Strip timezone suffix from a datetime string, returning (cleaned, offset_secs).
232fn strip_timezone(s: &str) -> (&str, i32) {
233    if let Some(stripped) = s.strip_suffix('Z') {
234        return (stripped, 0);
235    }
236    if let Some(stripped) = s.strip_suffix('z') {
237        return (stripped, 0);
238    }
239
240    // Look for +HH:MM or -HH:MM at the end
241    let bytes = s.as_bytes();
242    if bytes.len() >= 6 {
243        let sign_pos = bytes.len() - 6;
244        if (bytes[sign_pos] == b'+' || bytes[sign_pos] == b'-') && bytes[sign_pos + 3] == b':' {
245            let sign = if bytes[sign_pos] == b'+' { 1 } else { -1 };
246            if let (Ok(h), Ok(m)) = (
247                s[sign_pos + 1..sign_pos + 3].parse::<i32>(),
248                s[sign_pos + 4..sign_pos + 6].parse::<i32>(),
249            ) {
250                let offset = sign * (h * 3600 + m * 60);
251                return (&s[..sign_pos], offset);
252            }
253        }
254    }
255
256    (s, 0)
257}
258
259/// Convert a NaiveDate to milliseconds since epoch.
260fn date_to_ms(date: NaiveDate) -> i64 {
261    let dt = date.and_hms_opt(0, 0, 0).unwrap();
262    datetime_to_ms(dt)
263}
264
265/// Convert a NaiveDateTime to milliseconds since epoch.
266fn datetime_to_ms(dt: NaiveDateTime) -> i64 {
267    dt.and_utc().timestamp_millis()
268}
269
270/// Convert an astronomical year to milliseconds since epoch (start of year).
271fn year_to_ms(year: i32) -> Result<i64, BticError> {
272    let date = NaiveDate::from_ymd_opt(year, 1, 1)
273        .ok_or_else(|| BticError::ParseError(format!("year {year} out of range")))?;
274    Ok(date_to_ms(date))
275}
276
277/// Expand a lower-bound ms timestamp by one unit of the given granularity
278/// to produce the upper bound. Uses calendar-aware arithmetic for variable-width units.
279fn expand_granularity(lo_ms: i64, gran: Granularity) -> Result<i64, BticError> {
280    match gran {
281        Granularity::Millisecond => Ok(lo_ms + 1),
282        Granularity::Second => Ok(lo_ms + 1_000),
283        Granularity::Minute => Ok(lo_ms + 60_000),
284        Granularity::Hour => Ok(lo_ms + 3_600_000),
285        Granularity::Day => Ok(lo_ms + 86_400_000),
286        // Variable-width calendar units require chrono
287        Granularity::Month => expand_months(lo_ms, 1),
288        Granularity::Quarter => expand_months(lo_ms, 3),
289        Granularity::Year => expand_years(lo_ms, 1),
290        Granularity::Decade => expand_years(lo_ms, 10),
291        Granularity::Century => expand_years(lo_ms, 100),
292        Granularity::Millennium => expand_years(lo_ms, 1000),
293    }
294}
295
296/// Add N months to a timestamp (calendar-aware).
297fn expand_months(lo_ms: i64, months: i32) -> Result<i64, BticError> {
298    let dt = ms_to_datetime(lo_ms)?;
299    let date = dt.date();
300
301    let mut year = date.year();
302    let mut month = date.month() as i32 + months;
303    while month > 12 {
304        month -= 12;
305        year += 1;
306    }
307    while month < 1 {
308        month += 12;
309        year -= 1;
310    }
311
312    let next_date = NaiveDate::from_ymd_opt(year, month as u32, 1)
313        .ok_or_else(|| BticError::ParseError(format!("date overflow: {year}-{month:02}-01")))?;
314    Ok(date_to_ms(next_date))
315}
316
317/// Add N years to a timestamp (calendar-aware).
318fn expand_years(lo_ms: i64, years: i32) -> Result<i64, BticError> {
319    let dt = ms_to_datetime(lo_ms)?;
320    let date = dt.date();
321    let next_date = NaiveDate::from_ymd_opt(date.year() + years, 1, 1).ok_or_else(|| {
322        BticError::ParseError(format!("date overflow: year {}", date.year() + years))
323    })?;
324    Ok(date_to_ms(next_date))
325}
326
327/// Convert milliseconds since epoch to a NaiveDateTime.
328fn ms_to_datetime(ms: i64) -> Result<NaiveDateTime, BticError> {
329    let secs = ms.div_euclid(1000);
330    let nsecs = (ms.rem_euclid(1000) * 1_000_000) as u32;
331    chrono::DateTime::from_timestamp(secs, nsecs)
332        .map(|dt| dt.naive_utc())
333        .ok_or_else(|| BticError::ParseError(format!("timestamp {ms}ms out of range")))
334}
335
336#[cfg(test)]
337mod tests {
338    use super::*;
339
340    fn assert_btic(
341        s: &str,
342        expected_lo: i64,
343        expected_hi: i64,
344        lo_gran: Granularity,
345        hi_gran: Granularity,
346    ) {
347        let b = parse_btic_literal(s).unwrap_or_else(|e| panic!("parse '{s}' failed: {e}"));
348        assert_eq!(b.lo(), expected_lo, "lo mismatch for '{s}'");
349        assert_eq!(b.hi(), expected_hi, "hi mismatch for '{s}'");
350        assert_eq!(b.lo_granularity(), lo_gran, "lo_gran mismatch for '{s}'");
351        assert_eq!(b.hi_granularity(), hi_gran, "hi_gran mismatch for '{s}'");
352    }
353
354    #[test]
355    fn year_1985() {
356        assert_btic(
357            "1985",
358            473_385_600_000,
359            504_921_600_000,
360            Granularity::Year,
361            Granularity::Year,
362        );
363    }
364
365    #[test]
366    fn month_march_1985() {
367        assert_btic(
368            "1985-03",
369            478_483_200_000,
370            481_161_600_000,
371            Granularity::Month,
372            Granularity::Month,
373        );
374    }
375
376    #[test]
377    fn day_1985_03_15() {
378        assert_btic(
379            "1985-03-15",
380            479_692_800_000,
381            479_779_200_000,
382            Granularity::Day,
383            Granularity::Day,
384        );
385    }
386
387    #[test]
388    fn epoch_instant() {
389        let b = parse_btic_literal("1970-01-01T00:00:00.000Z").unwrap();
390        assert_eq!(b.lo(), 0);
391        assert_eq!(b.hi(), 1);
392        assert!(b.is_instant());
393        assert_eq!(b.lo_granularity(), Granularity::Millisecond);
394    }
395
396    #[test]
397    fn two_bound_solidus() {
398        let b = parse_btic_literal("1985-03/2024-06").unwrap();
399        assert_eq!(b.lo(), 478_483_200_000); // 1985-03-01
400        assert_eq!(b.hi(), 1_719_792_000_000); // 2024-07-01
401        assert_eq!(b.lo_granularity(), Granularity::Month);
402        assert_eq!(b.hi_granularity(), Granularity::Month);
403    }
404
405    #[test]
406    fn mixed_granularity_solidus() {
407        let b = parse_btic_literal("1985-03/2024-06-15").unwrap();
408        assert_eq!(b.lo(), 478_483_200_000); // 1985-03-01
409        assert_eq!(b.hi(), 1_718_496_000_000); // 2024-06-16
410        assert_eq!(b.lo_granularity(), Granularity::Month);
411        assert_eq!(b.hi_granularity(), Granularity::Day);
412    }
413
414    #[test]
415    fn right_unbounded() {
416        let b = parse_btic_literal("2020-03/").unwrap();
417        assert_eq!(b.lo(), 1_583_020_800_000); // 2020-03-01
418        assert_eq!(b.hi(), i64::MAX);
419        assert!(b.is_unbounded());
420        assert_eq!(b.lo_granularity(), Granularity::Month);
421    }
422
423    #[test]
424    fn left_unbounded() {
425        let b = parse_btic_literal("/2024-06").unwrap();
426        assert_eq!(b.lo(), i64::MIN);
427        assert_eq!(b.hi(), 1_719_792_000_000); // 2024-07-01
428    }
429
430    #[test]
431    fn fully_unbounded() {
432        let b = parse_btic_literal("/").unwrap();
433        assert_eq!(b.lo(), i64::MIN);
434        assert_eq!(b.hi(), i64::MAX);
435        assert_eq!(b.meta(), 0);
436    }
437
438    #[test]
439    fn certainty_approximate() {
440        let b = parse_btic_literal("~1985").unwrap();
441        assert_eq!(b.lo_certainty(), Certainty::Approximate);
442        assert_eq!(b.hi_certainty(), Certainty::Approximate);
443    }
444
445    #[test]
446    fn certainty_uncertain() {
447        let b = parse_btic_literal("?1985").unwrap();
448        assert_eq!(b.lo_certainty(), Certainty::Uncertain);
449        assert_eq!(b.hi_certainty(), Certainty::Uncertain);
450    }
451
452    #[test]
453    fn certainty_unknown() {
454        let b = parse_btic_literal("??1985").unwrap();
455        assert_eq!(b.lo_certainty(), Certainty::Unknown);
456        assert_eq!(b.hi_certainty(), Certainty::Unknown);
457    }
458
459    #[test]
460    fn mixed_certainty_solidus() {
461        let b = parse_btic_literal("~1985/2024-06").unwrap();
462        assert_eq!(b.lo_certainty(), Certainty::Approximate);
463        assert_eq!(b.hi_certainty(), Certainty::Definite);
464    }
465
466    #[test]
467    fn bce_date() {
468        let b = parse_btic_literal("500 BCE").unwrap();
469        // Astronomical year -499
470        assert_eq!(b.lo_granularity(), Granularity::Year);
471        assert_eq!(b.hi_granularity(), Granularity::Year);
472        // Verify it's a year-long interval
473        assert!(b.duration_ms().unwrap() > 0);
474    }
475
476    #[test]
477    fn approximate_bce() {
478        let b = parse_btic_literal("~500 BCE").unwrap();
479        assert_eq!(b.lo_certainty(), Certainty::Approximate);
480        assert_eq!(b.hi_certainty(), Certainty::Approximate);
481        assert_eq!(b.lo_granularity(), Granularity::Year);
482    }
483
484    #[test]
485    fn second_granularity() {
486        let b = parse_btic_literal("1985-03-15T14:30:00Z").unwrap();
487        assert_eq!(b.lo_granularity(), Granularity::Second);
488        assert_eq!(b.duration_ms(), Some(1000));
489    }
490
491    #[test]
492    fn minute_granularity() {
493        let b = parse_btic_literal("1985-03-15T14:30Z").unwrap();
494        assert_eq!(b.lo_granularity(), Granularity::Minute);
495        assert_eq!(b.duration_ms(), Some(60_000));
496    }
497
498    #[test]
499    fn empty_literal_rejected() {
500        assert!(parse_btic_literal("").is_err());
501    }
502
503    #[test]
504    fn invalid_literal_rejected() {
505        assert!(parse_btic_literal("not-a-date").is_err());
506    }
507}