Skip to main content

deep_time/alloc_parse/
parse_date.rs

1use crate::{
2    ClassifiedDate, DateClassification, Dt, DtErr, DtErrKind, Lang, MAX_DATE_STRING_LEN, Mode,
3    Order, OrderFirst, ParseCfg, an_err, classify_date, default_date_parse_options,
4    generate_ambiguous_day_first_candidates, generate_ambiguous_month_first_candidates,
5    generate_ambiguous_year_first_candidates, generate_unambiguous_candidates,
6    is_week_date_missing_weekday, parse_pure_numeric_unix_timestamp, parse_syslog_no_year,
7    parse_week_date_no_weekday, parse_yyyy_mm, smart_detect_date_order, try_pure_numeric,
8};
9use alloc::borrow::Cow;
10use alloc::string::String;
11
12impl Dt {
13    /// Automatically parses datetime [`str`] into a [`Dt`] by guessing and generating the format. Supports the vast
14    /// majority of date formats.
15    ///
16    /// - Requires the `"alloc"` feature.
17    /// - The returned [`Dt`] is internally on the TAI time scale. The `attos` field is an [`i128`] attosecond
18    ///   count since TAI 2000-01-01 noon. See [`Scale`] for more information.
19    /// - The returned [`Dt`] is **not** in local time, if a timezone is parsed then it's used to find the offset
20    ///   to return non-local instant.
21    ///
22    /// ## Parameters
23    ///
24    /// - `s`: The string to parse. Must be non-empty and no longer than 255 bytes. Empty strings or overly
25    ///   long inputs return an error.
26    /// - `opts`: Optional [`ParseCfg`]. Pass `None` to use the defaults.
27    ///
28    /// ## Configuration Options
29    ///
30    /// These are the fields of the configuration options struct [`ParseCfg`], their types and defaults.
31    ///
32    /// See [`ParseCfg`] for more information.
33    ///
34    /// | Field          | Type and Default     | Effect |
35    /// |----------------|----------------------------------|--------|
36    /// | `lang`         | [`Lang::En`]                     | Language, scroll down to see currently supported languages                                        |
37    /// | `order`        | [`Order::Smart`]                 | How to resolve ambiguous numeric dates like `01/02/03`                                            |
38    /// | `mode`         | [`Mode::Auto`]                   | Special handling for purely numeric inputs                                                        |
39    /// | `parse`        | [`Option<Vec<String>>`] - `None` | An explicit list of formats to try, if the [`Mode`] is Explicit then only these formats are tried |
40    /// | `relative`     | [`bool`] - `true`                | Enable phrases like "tomorrow", "in 3 days", limited support for relative dates                   |
41    /// | `ref_time`     | [`Option<Dt>`] - `None`          | Reference time for relative dates and syslog-style "no-year" dates                                |
42    /// | `to_lower`     | [`bool`] - `true`                | Automatically lowercase the input, **only** set to false if it's already lowercase                |
43    ///
44    /// ## Purely Numeric Inputs
45    ///
46    /// When the input consists **only** of digits (and optionally a decimal point),
47    /// the parser uses a fast, mode-aware path before trying any other strategies.
48    /// The exact interpretation depends on the number of digits and the selected `mode`.
49    ///
50    /// | Digits | Example(s)               | `Mode`          | Interpreted as                          | Notes |
51    /// |--------|--------------------------|-----------------|-----------------------------------------|-------|
52    /// | 1–4    | `2024`, `24`, `5`        | `Auto`/`Legacy` | Year (2-digit uses 2000/1900 pivot)    | 1- and 3-digit years only work in `Scientific` |
53    /// | 5      | `24123`, `60400`         | `Legacy`        | Ordinal date (YYDDD)                    | — |
54    /// | 5      | `60400`, `60400.75`      | `Scientific`    | Modified Julian Date (MJD)              | Fractional days supported |
55    /// | 5      | `24123`, `60400.75`      | `Auto`          | Ordinal (non-decimal) or MJD (decimal) | Smart default |
56    /// | 6      | `240315`, `202403`       | `Auto`          | YYYYMM if plausible year, else YYMMDD   | Most common compact form |
57    /// | 6      | `240315`                 | `Legacy`        | YYMMDD preferred                        | — |
58    /// | 6      | `202403`                 | `Scientific`    | YYYYMM preferred                        | — |
59    /// | 7      | `2024123`                | `Legacy`        | Ordinal date (YYYYDDD)                  | — |
60    /// | 7      | `2460123`, `2460123.5`   | `Scientific`    | Julian Day (JD)                         | Fractional days supported |
61    /// | 7      | `2024123`                | `Auto`          | Ordinal (integer) or JD (decimal)       | Smart default |
62    /// | 10–11  | `1735689600`             | any             | Unix seconds                            | — |
63    /// | 12–15  | `1735689600123`          | any             | Unix milliseconds                       | Most common high-precision case |
64    /// | 16–18  | `1735689600123456`       | any             | Unix microseconds                       | — |
65    /// | 19+    | `1735689600123456789`    | any             | Unix nanoseconds                        | Full precision |
66    ///
67    /// Use `Mode::UnixTimestamp` when you know the input is always a Unix timestamp.
68    ///
69    /// ## Ambiguous Numeric Dates
70    ///
71    /// Dates where the components could map to different orders (e.g. `01/02/03`,
72    /// `3-4-5`, `15.03.24`, `2024.03.15`) are resolved via the `order` field:
73    ///
74    /// - **`Order::Smart`** (default) — Applies the fast heuristic described in [`Order::Smart`].
75    ///   It strongly prefers modern/tech conventions (Year-first for compact/ISO-like data)
76    ///   while handling the majority of international and US-style dates.
77    ///
78    /// - **`Order::Year`**, **`Order::Day`**, or **`Order::Month`** force a
79    ///   specific interpretation and bypass the heuristic entirely.
80    ///
81    /// ## Supported Formats
82    ///
83    /// The main part of the parser basically works by using aho-corasick with day names, month names, and other things to
84    /// tokenize an input and then automatically generate candidate formats to try on it. Due to this it's difficult to
85    /// say the number of supported formats, but it's probably in the thousands.
86    ///
87    /// Separators generally don't matter, they could be spaces, slashes, whatever.
88    ///
89    /// Generally speaking the date part must come first, and stuff like time components, offsets and iana timezone names
90    /// must come afterwards.
91    ///
92    /// - **ISO 8601** and variants: `2024-03-15`, `2024-03-15T14:30:00Z`, `2024-03-15T14:30:00+01:00[Europe/Paris]`
93    /// - **Named dates** (in supported languages): `15 March 2024`, `15 mars 2024`, `15. März 2024`, `15 de marzo de 2024`
94    /// - **Week dates**: `2024-W15`, `2024-W15-3`, `2024W153` (missing weekday defaults to Monday)
95    /// - **Syslog-style** (no year): `Mar  5 10:23:45` (year inferred from `ref_time`)
96    /// - **Relative expressions**: `tomorrow`, `in 3 days`, `2 weeks ago`
97    /// - **12-hour time**: `2:30 PM`, `14:30:45.123`
98    /// - **Offsets and timezones**: `+0100`, `-05:30`, `Z`, IANA timezone names (with the `jiff-tz feature enabled`)
99    /// - **Library time scales**: `TAI`, `TT`, etc. are detected and parsed, must come after the date part of the input.
100    ///
101    /// Note that relative date support is quite limited and phrases such as `"next friday at 9am"` will not parse.
102    ///
103    /// ## Examples
104    ///
105    /// ```rust
106    /// use deep_time::{Dt, ParseCfg, Order, Mode, Lang};
107    ///
108    /// // Default smart parsing
109    /// let dt = Dt::from_str_parse("2024-03-15 14:30:00", &None).unwrap();
110    ///
111    /// // German named date
112    /// let cfg = ParseCfg { lang: Lang::De, ..Default::default() };
113    /// let dt = Dt::from_str_parse("15. März 2024 um 14:30", &Some(cfg)).unwrap();
114    ///
115    /// // Force month-first
116    /// let cfg = ParseCfg { order: Order::Month, ..Default::default() };
117    /// let dt = Dt::from_str_parse("03/15/2024", &Some(cfg)).unwrap();
118    ///
119    /// // Pure numeric compact form
120    /// let dt = Dt::from_str_parse("20240315", &None).unwrap(); // March 15, 2024
121    ///
122    /// // Unix timestamp (milliseconds)
123    /// let cfg = ParseCfg { mode: Mode::UnixTimestamp, ..Default::default() };
124    /// let dt = Dt::from_str_parse("1735689600123", &Some(cfg)).unwrap();
125    ///
126    /// // Explicit formats only (no fallback)
127    /// let cfg = ParseCfg {
128    ///     parse: Some(vec!["%d/%m/%Y".into(), "%Y-%m-%d".into()]),
129    ///     mode: Mode::Explicit,
130    ///     ..Default::default()
131    /// };
132    /// let dt = Dt::from_str_parse("15/03/2024", &Some(cfg)).unwrap();
133    ///
134    /// // Relative date
135    /// let dt = Dt::from_str_parse("2 days from now", &None).unwrap();
136    /// ```
137    ///
138    /// ## Notes
139    ///
140    /// - The `Smart` + `Auto` combination gives the best real-world success rate for mixed data.
141    /// - All successfully parsed [`Dt`] values are stored with attosecond precision on the internal
142    ///   TAI timescale.
143    /// - Timezone handling (IANA names and fixed offsets) is fully supported when the `jiff-tz` feature
144    ///   is enabled.
145    ///
146    /// ## Supported Languages:
147    ///
148    /// Language support here basically means supporting abbreviated and full day and month names.
149    /// Non-Ascii types of numeric characters are also supported such as full width digits.
150    ///
151    /// Some day/month names in non-English languages are not supported due to clashes, any such missing
152    /// support is noted below.
153    ///
154    /// - En
155    /// - De
156    ///     - Won't parse "t" as short form for day.
157    /// - Es
158    ///     - Won't parse "mar" as tuesday, will instead parse as march.
159    /// - Fr
160    ///     - Won't parse "mar" as tuesday, will instead parse as march.
161    ///
162    /// ## See also
163    ///
164    /// - [`ParseCfg`]
165    /// - [`Order`]
166    /// - [`Mode`]
167    /// - [`Lang`]
168    /// - [`Dt`]
169    /// - [`Dt::from_str_iso`](../struct.Dt.html#method.from_str_iso)
170    pub fn from_str_parse(s: &str, opts: &Option<ParseCfg>) -> Result<Dt, DtErr> {
171        let opts: &ParseCfg = opts
172            .as_ref()
173            .unwrap_or_else(|| default_date_parse_options());
174
175        if s.is_empty() {
176            return Err(an_err!(DtErrKind::Incomplete, "empty"));
177        } else if s.len() > MAX_DATE_STRING_LEN {
178            return Err(an_err!(DtErrKind::InvalidInput, "too long: {}", s));
179        }
180
181        let lang: Lang = opts.lang;
182        let ref_time = &opts.ref_time;
183
184        let lowered: Cow<str> = if opts.to_lower {
185            Cow::Owned(s.to_lowercase())
186        } else {
187            Cow::Borrowed(s)
188        };
189
190        let classification = match classify_date(&lowered, lang, ref_time) {
191            Ok(ClassifiedDate::Parsed(time_point)) => return Ok(time_point),
192            Ok(ClassifiedDate::Cls(c)) => c,
193            Err(e) => {
194                // std::eprintln!("{}", e);
195                return Err(an_err!(
196                    DtErrKind::InvalidInput,
197                    "{}",
198                    s => e
199                ));
200            }
201        };
202
203        // let xx = &classification.date;
204        // if xx != trimmed {
205        //     eprintln!("NOT EQUAL: {:?}, {:?}", trimmed, xx);
206        // }
207        // eprintln!("BEFORE & AFTER: {:?}, {:?}", lowered, &classification.date);
208
209        let normalized = &classification.date;
210
211        let (mode, date_order) = if let Some(formats) = &opts.parse {
212            if !formats.is_empty() {
213                for fmt in formats {
214                    if let Ok(value) = Self::from_str(normalized, fmt, true, true, false) {
215                        return Ok(value);
216                    }
217                }
218                // None of the provided formats worked and mode is Explicit
219                if opts.mode == Mode::Explicit {
220                    return Err(an_err!(DtErrKind::InvalidInput, "{}", s));
221                }
222            }
223            (opts.mode, opts.order)
224        } else {
225            (opts.mode, opts.order)
226        };
227
228        // if s == "2006-04-02 02:30-05 America/Indiana/Vevay" {
229        //     std::eprintln!("{:?}", classification);
230        // }
231        // std::eprintln!("{:?}", classification);
232
233        if classification.is_pure_numeric {
234            match mode {
235                Mode::UnixTimestamp => {
236                    if let Some(dt) = parse_pure_numeric_unix_timestamp(
237                        normalized,
238                        classification.num_non_decimal_digits as usize,
239                    ) {
240                        return Ok(dt);
241                    }
242                }
243                _ => {
244                    if let Some(dt) = try_pure_numeric(
245                        normalized,
246                        classification.num_digits,
247                        classification.num_non_decimal_digits,
248                        classification.is_decimal,
249                        mode,
250                    ) {
251                        // std::eprintln!("NUMERIC INPUT SUCCESS: {:?}", s);
252                        return Ok(dt);
253                    }
254                }
255            }
256        }
257        if !classification.has_year
258            && let Some(dt) = parse_syslog_no_year(normalized, lang, ref_time)
259        {
260            return Ok(dt);
261        }
262
263        if is_week_date_missing_weekday(&classification) {
264            // std::eprintln!("IS WEEK DATE MISSING WEEKDAY: {:?}", s);
265            if let Some(dt) = parse_week_date_no_weekday(&classification, lang, ref_time) {
266                return Ok(dt);
267            }
268        }
269        if let Some(dt) = try_unambiguous(normalized, &classification) {
270            return Ok(dt);
271        }
272        // std::eprintln!("done trying unambiguous");
273        if let Some(dt) = match date_order {
274            Order::Smart => {
275                let order = smart_detect_date_order(normalized, &classification);
276                let mut result: Option<Dt>;
277
278                match order {
279                    OrderFirst::Day => {
280                        result = try_compatible_formats(
281                            normalized,
282                            generate_ambiguous_day_first_candidates(&classification),
283                        );
284                        // std::eprintln!("done trying day first: {:?}", result);
285
286                        if result.is_none() {
287                            result = try_compatible_formats(
288                                normalized,
289                                generate_ambiguous_month_first_candidates(&classification),
290                            );
291                            // std::eprintln!("done trying month first: {:?}", result);
292                        }
293
294                        if result.is_none() {
295                            result = try_compatible_formats(
296                                normalized,
297                                generate_ambiguous_year_first_candidates(&classification),
298                            );
299                            // std::eprintln!("done trying year first: {:?}", result);
300                        }
301                    }
302                    OrderFirst::Month => {
303                        result = try_compatible_formats(
304                            normalized,
305                            generate_ambiguous_month_first_candidates(&classification),
306                        );
307                        // std::eprintln!("done trying month first: {:?}", result);
308
309                        if result.is_none() {
310                            result = try_compatible_formats(
311                                normalized,
312                                generate_ambiguous_day_first_candidates(&classification),
313                            );
314                            // std::eprintln!("done trying day first: {:?}", result);
315                        }
316
317                        if result.is_none() {
318                            result = try_compatible_formats(
319                                normalized,
320                                generate_ambiguous_year_first_candidates(&classification),
321                            );
322                            // std::eprintln!("done trying year first: {:?}", result);
323                        }
324                    }
325                    OrderFirst::Year => {
326                        result = try_compatible_formats(
327                            normalized,
328                            generate_ambiguous_year_first_candidates(&classification),
329                        );
330                        // std::eprintln!("done trying year first: {:?}", result);
331
332                        if result.is_none() {
333                            result = try_compatible_formats(
334                                normalized,
335                                generate_ambiguous_day_first_candidates(&classification),
336                            );
337                            // std::eprintln!("done trying day first: {:?}", result);
338                        }
339
340                        if result.is_none() {
341                            result = try_compatible_formats(
342                                normalized,
343                                generate_ambiguous_month_first_candidates(&classification),
344                            );
345                            // std::eprintln!("done trying month first: {:?}", result);
346                        }
347                    }
348                }
349
350                result
351            }
352            Order::Year => try_compatible_formats(
353                normalized,
354                generate_ambiguous_year_first_candidates(&classification),
355            ),
356            Order::Day => try_compatible_formats(
357                normalized,
358                generate_ambiguous_day_first_candidates(&classification),
359            ),
360            Order::Month => try_compatible_formats(
361                normalized,
362                generate_ambiguous_month_first_candidates(&classification),
363            ),
364        } {
365            return Ok(dt);
366        }
367        // std::eprintln!("NOW trying numeric timestamp");
368        if classification.is_pure_numeric
369            && mode != Mode::UnixTimestamp
370            && let Some(dt) = parse_pure_numeric_unix_timestamp(
371                normalized,
372                classification.num_non_decimal_digits as usize,
373            )
374        {
375            return Ok(dt);
376        }
377        Err(an_err!(DtErrKind::InvalidInput, "{}", s))
378    }
379
380    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
381    /// but returns attoseconds since the library epoch: 2000-01-01 12:00:00 UTC
382    /// (on the UTC scale).
383    ///
384    /// Returns `Some(attos)` on success (negative for pre-2000 dates) or `None`
385    /// on any parse error.
386    #[inline]
387    pub fn str_to_attos(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
388        Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_attos())
389    }
390
391    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
392    /// but returns milliseconds since the library epoch: 2000-01-01 12:00:00 UTC
393    /// (on the UTC scale).
394    ///
395    /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
396    /// on any parse error.
397    #[inline]
398    pub fn str_to_ms(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
399        Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ms())
400    }
401
402    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
403    /// but returns nanoseconds since the library epoch: 2000-01-01 12:00:00 UTC
404    /// (on the UTC scale).
405    ///
406    /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
407    /// on any parse error.
408    #[inline]
409    pub fn str_to_ns(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
410        Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ns())
411    }
412
413    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
414    /// but returns milliseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
415    ///
416    /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
417    /// on any parse error.
418    #[inline]
419    pub fn str_to_unix_ms(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
420        Dt::from_str_parse(s, opts)
421            .ok()
422            .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ms())
423    }
424
425    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
426    /// but returns nanoseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
427    ///
428    /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
429    /// on any parse error.
430    #[inline]
431    pub fn str_to_unix_ns(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
432        Dt::from_str_parse(s, opts)
433            .ok()
434            .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ns())
435    }
436}
437
438/// Core zero-allocation helper (updated to match the new `&str` signature).
439///
440/// The `fmt` we get from the iterator is still `'static`, but it coerces automatically
441/// to `&str`, so everything continues to work.
442#[inline]
443pub(crate) fn try_compatible_formats<I>(s: &str, formats: I) -> Option<Dt>
444where
445    I: IntoIterator<Item = String>,
446{
447    // let mut dt = None;
448
449    // for fmt in formats.into_iter() {
450    //     eprintln!("TRYING FMT: {}", fmt);
451    //     if let Ok(parsed) = Dt::from_str(s, &fmt, true, true, false) {
452    //         dt = Some(parsed);
453    //         break;
454    //     }
455    //     // === DEBUG ===
456    //     // eprintln!("Tried format: {:?}", fmt);
457    // }
458
459    // dt
460    formats
461        .into_iter()
462        .find_map(|fmt| Dt::from_str(s, &fmt, true, true, false).ok())
463}
464
465#[inline]
466pub(crate) fn try_unambiguous(s: &str, classification: &DateClassification) -> Option<Dt> {
467    if matches!(classification.bytes_len, 6..=8)
468        && let Some(dt) = parse_yyyy_mm(s.as_bytes())
469    {
470        return Some(dt);
471    }
472    try_compatible_formats(s, generate_unambiguous_candidates(classification))
473}