Skip to main content

deep_time/alloc_parse/
parse_date.rs

1use crate::{
2    ClassifiedDate, DateClassification, Dt, DtErr, DtErrKind, Lang, Mode, Order, OrderFirst,
3    ParseCfg, STRTIME_SIZE, an_err, classify_date,
4    generate_ambiguous_day_first_candidates, generate_ambiguous_month_first_candidates,
5    generate_ambiguous_year_first_candidates, generate_unambiguous_candidates,
6    is_week_date_missing_weekday, parse_pure_numeric_unix_timestamp, parse_syslog_no_year,
7    parse_week_date_no_weekday, parse_yyyy_mm, smart_detect_date_order, try_pure_numeric,
8};
9use alloc::borrow::Cow;
10use alloc::string::String;
11
12impl Dt {
13    /// Automatically parses datetime [`str`] into a [`Dt`] by guessing and generating the format. Supports the vast
14    /// majority of date formats.
15    ///
16    /// - Requires the `"parse"` feature (which enables `alloc`).
17    /// - The returned [`Dt`] is internally on the TAI time scale. The `attos` field is an [`i128`] attosecond
18    ///   count since TAI 2000-01-01 noon. See [`Scale`] for more information.
19    /// - The returned [`Dt`] is **not** in local time, if a timezone is parsed then it's used to find the offset
20    ///   to return non-local instant.
21    ///
22    /// ## Parameters
23    ///
24    /// - `s`: The string to parse. Must be non-empty and no longer than 255 bytes. Empty strings or overly
25    ///   long inputs return an error.
26    /// - `opts`: The [`ParseCfg`] to use. Pass `&ParseCfg::DEFAULT` (or `&ParseCfg::default()`)
27    ///   to use the standard smart defaults. You can create a `ParseCfg` once and pass `&cfg`
28    ///   on every call for consistent behavior and to avoid repeated construction.
29    ///
30    /// ## Configuration Options
31    ///
32    /// These are the fields of the configuration options struct [`ParseCfg`], their types and defaults.
33    ///
34    /// See [`ParseCfg`] for more information.
35    ///
36    /// | Field          | Type and Default     | Effect |
37    /// |----------------|----------------------------------|--------|
38    /// | `lang`         | [`Lang::En`]                     | Language, scroll down to see currently supported languages                                        |
39    /// | `order`        | [`Order::Smart`]                 | How to resolve ambiguous numeric dates like `01/02/03`                                            |
40    /// | `mode`         | [`Mode::Auto`]                   | Special handling for purely numeric inputs                                                        |
41    /// | `parse`        | [`Option<Vec<String>>`] - `None` | An explicit list of formats to try, if the [`Mode`] is Explicit then only these formats are tried |
42    /// | `relative`     | [`bool`] - `true`                | Enable phrases like "tomorrow", "in 3 days"                   |
43    /// | `ref_time`     | [`Option<Dt>`] - `None`          | Reference time for relative dates and syslog-style "no-year" dates                                |
44    /// | `to_lower`     | [`bool`] - `true`                | Automatically lowercase the input, **only** set to false if it's already lowercase                |
45    ///
46    /// ## Purely Numeric Inputs
47    ///
48    /// When the input consists **only** of digits (and optionally a decimal point),
49    /// the parser uses a fast, mode-aware path before trying any other strategies.
50    /// The exact interpretation depends on the number of digits and the selected `mode`.
51    ///
52    /// | Digits | Example(s)               | `Mode`          | Interpreted as                          | Notes |
53    /// |--------|--------------------------|-----------------|-----------------------------------------|-------|
54    /// | 1–4    | `2024`, `24`, `5`        | `Auto`/`Legacy` | Year (2-digit uses 2000/1900 pivot)    | 1- and 3-digit years only work in `Scientific` |
55    /// | 5      | `24123`, `60400`         | `Legacy`        | Ordinal date (YYDDD)                    | — |
56    /// | 5      | `60400`, `60400.75`      | `Scientific`    | Modified Julian Date (MJD)              | Fractional days supported |
57    /// | 5      | `24123`, `60400.75`      | `Auto`          | Ordinal (non-decimal) or MJD (decimal) | Smart default |
58    /// | 6      | `240315`, `202403`       | `Auto`          | YYYYMM if plausible year, else YYMMDD   | Most common compact form |
59    /// | 6      | `240315`                 | `Legacy`        | YYMMDD preferred                        | — |
60    /// | 6      | `202403`                 | `Scientific`    | YYYYMM preferred                        | — |
61    /// | 7      | `2024123`                | `Legacy`        | Ordinal date (YYYYDDD)                  | — |
62    /// | 7      | `2460123`, `2460123.5`   | `Scientific`    | Julian Day (JD)                         | Fractional days supported |
63    /// | 7      | `2024123`                | `Auto`          | Ordinal (integer) or JD (decimal)       | Smart default |
64    /// | 10–11  | `1735689600`             | any             | Unix seconds                            | — |
65    /// | 12–15  | `1735689600123`          | any             | Unix milliseconds                       | Most common high-precision case |
66    /// | 16–18  | `1735689600123456`       | any             | Unix microseconds                       | — |
67    /// | 19+    | `1735689600123456789`    | any             | Unix nanoseconds                        | Full precision |
68    ///
69    /// Use `Mode::UnixTimestamp` when you know the input is always a Unix timestamp.
70    ///
71    /// ## Ambiguous Numeric Dates
72    ///
73    /// Dates where the components could map to different orders (e.g. `01/02/03`,
74    /// `3-4-5`, `15.03.24`, `2024.03.15`) are resolved via the `order` field:
75    ///
76    /// - **`Order::Smart`** (default) — Applies the fast heuristic described in [`Order::Smart`].
77    ///   It strongly prefers modern/tech conventions (Year-first for compact/ISO-like data)
78    ///   while handling the majority of international and US-style dates.
79    ///
80    /// - **`Order::Year`**, **`Order::Day`**, or **`Order::Month`** force a
81    ///   specific interpretation and bypass the heuristic entirely.
82    ///
83    /// ## Supported Formats
84    ///
85    /// The parser tokenizes known words (month/day names, relative phrases, timezones, etc.), generates candidate
86    /// formats from the token pattern, and tries them until one matches. Thousands of layouts are supported.
87    ///
88    /// Separators generally don't matter, they could be spaces, slashes, or hyphens, but **not colons** - colons are
89    /// reserved for the time connector, times, and offsets.
90    ///
91    /// Generally speaking the date part must come first, and stuff like time components, offsets and iana timezone names
92    /// must come afterwards.
93    ///
94    /// - **ISO 8601** and variants: `2024-03-15`, `2024-03-15T14:30:00Z`, `2024-03-15T14:30:00+01:00[Europe/Paris]`
95    /// - **Named dates** (in supported languages): `15 March 2024`, `15 mars 2024`, `15. März 2024`, `15 de marzo de 2024`
96    /// - **Week dates**: `2024-W15`, `2024-W15-3`, `2024W153` (missing weekday defaults to Monday)
97    /// - **Syslog-style** (no year): `Mar  5 10:23:45` (year inferred from `ref_time`)
98    /// - **Relative expressions**: `tomorrow`, `in 3 days`, `2 weeks ago`
99    /// - **12-hour time**: `2:30 PM`, `14:30:45.123`
100    /// - **Offsets and timezones**: `+0100`, `-05:30`, `Z`, IANA timezone names (with the `jiff-tz` feature enabled)
101    /// - **Library time scales**: `TAI`, `TT`, etc. are detected and parsed, must come after the date part of the input
102    ///
103    /// Relative dates are also automatically supported, except for bare numbers with no colons like `0900`, as these
104    /// are differently interpreted.
105    ///
106    /// ## Examples
107    ///
108    /// ```rust
109    /// use deep_time::{Dt, ParseCfg, Order, Mode, Scale};
110    ///
111    /// // Default smart parsing
112    /// let dt = Dt::from_str_parse("2024-03-15 14:30:00", &ParseCfg::DEFAULT).unwrap();
113    ///
114    /// // German named date (requires the `de` feature)
115    /// # #[cfg(feature = "de")]
116    /// # {
117    /// # use deep_time::Lang;
118    /// let cfg = ParseCfg { lang: Lang::De, ..Default::default() };
119    /// let dt = Dt::from_str_parse("15. März 2024 um 14:30", &cfg).unwrap();
120    /// # }
121    ///
122    /// // Pure numeric compact form
123    /// let dt = Dt::from_str_parse("20240315", &ParseCfg::DEFAULT).unwrap(); // March 15, 2024
124    ///
125    /// // Unix timestamp (milliseconds)
126    /// let cfg = ParseCfg { mode: Mode::UnixTimestamp, ..Default::default() };
127    /// let dt = Dt::from_str_parse("1735689600123", &cfg).unwrap();
128    ///
129    /// // Explicit formats only (no fallback)
130    /// let cfg = ParseCfg {
131    ///     parse: Some(vec!["%d/%m/%Y".into(), "%Y-%m-%d".into()]),
132    ///     mode: Mode::Explicit,
133    ///     ..Default::default()
134    /// };
135    /// let dt = Dt::from_str_parse("15/03/2024", &cfg).unwrap();
136    ///
137    /// // Relative dates — build config once, borrow repeatedly
138    /// let ref_time = Dt::from_ymd(2026, 6, 16, Scale::UTC, 12, 0, 0, 0);
139    /// let cfg = ParseCfg {
140    ///     ref_time: Some(ref_time),
141    ///     ..Default::default()
142    /// };
143    /// let dt = Dt::from_str_parse("next Monday at 14:00", &cfg).unwrap();
144    ///
145    /// assert_eq!(dt, Dt::from_ymd(2026, 6, 22, Scale::UTC, 14, 0, 0, 0));
146    /// ```
147    ///
148    /// ## Notes
149    ///
150    /// - The `Smart` + `Auto` combination gives the best real-world success rate for mixed data.
151    /// - Relative expressions and syslog-style no-year dates need a reference time. If `ref_time` is `None`
152    ///   and the `std` feature is enabled, system time is used; without `std`, set `ref_time` explicitly or
153    ///   parsing will fail.
154    /// - All successfully parsed [`Dt`] values are stored with attosecond precision on the internal
155    ///   TAI timescale.
156    /// - Timezone handling (IANA names and fixed offsets) is fully supported when the `jiff-tz` feature
157    ///   is enabled.
158    ///
159    /// ## Supported Languages:
160    ///
161    /// Language support here basically means supporting abbreviated and full day and month names.
162    /// Non-Ascii types of numeric characters are also supported such as full width digits.
163    ///
164    /// Some day/month names in non-English languages are not supported due to clashes, any such missing
165    /// support is noted below.
166    ///
167    /// - En
168    /// - De
169    ///     - Won't parse "t" as short form for day.
170    /// - Es
171    ///     - English word "ago" won't be detected as relative date word.
172    ///     - Won't parse "mar" as tuesday, will instead parse as march.
173    /// - Fr
174    ///     - Won't parse "mar" as tuesday, will instead parse as march.
175    ///
176    /// ## See also
177    ///
178    /// - [`ParseCfg`]
179    /// - [`Order`]
180    /// - [`Mode`]
181    /// - [`Lang`]
182    /// - [`Dt`]
183    /// - [`Dt::from_str_iso`](../struct.Dt.html#method.from_str_iso)
184    pub fn from_str_parse(s: &str, opts: &ParseCfg) -> Result<Dt, DtErr> {
185
186        if s.is_empty() {
187            return Err(an_err!(DtErrKind::Incomplete, "empty"));
188        } else if s.len() > STRTIME_SIZE {
189            return Err(an_err!(DtErrKind::InvalidInput, "too long: {}", s));
190        }
191
192        let lang: Lang = opts.lang;
193        let ref_time = &opts.ref_time;
194
195        let lowered: Cow<str> = if opts.to_lower {
196            Cow::Owned(s.to_lowercase())
197        } else {
198            Cow::Borrowed(s)
199        };
200
201        let classification = match classify_date(&lowered, lang, ref_time) {
202            Ok(ClassifiedDate::Parsed(time_point)) => return Ok(time_point),
203            Ok(ClassifiedDate::Cls(c)) => c,
204            Err(e) => {
205                // std::eprintln!("{}", e);
206                return Err(an_err!(
207                    DtErrKind::InvalidInput,
208                    "{}",
209                    s => e
210                ));
211            }
212        };
213
214        // let xx = &classification.date;
215        // if xx != trimmed {
216        //     eprintln!("NOT EQUAL: {:?}, {:?}", trimmed, xx);
217        // }
218        // eprintln!("BEFORE & AFTER: {:?}, {:?}", lowered, &classification.date);
219
220        let normalized = &classification.date;
221
222        let (mode, date_order) = if let Some(formats) = &opts.parse {
223            if !formats.is_empty() {
224                for fmt in formats {
225                    if let Ok(value) = Self::from_str(normalized, fmt, true, true, false) {
226                        return Ok(value);
227                    }
228                }
229                // None of the provided formats worked and mode is Explicit
230                if opts.mode == Mode::Explicit {
231                    return Err(an_err!(DtErrKind::InvalidInput, "{}", s));
232                }
233            }
234            (opts.mode, opts.order)
235        } else {
236            (opts.mode, opts.order)
237        };
238
239        // if s == "on the 5th of april 2024 at 00:00am" {
240        //     std::eprintln!("{:?}", classification);
241        // }
242        // std::eprintln!("{:?}", classification);
243
244        if classification.is_pure_numeric {
245            match mode {
246                Mode::UnixTimestamp => {
247                    if let Some(dt) = parse_pure_numeric_unix_timestamp(
248                        normalized,
249                        classification.num_non_decimal_digits as usize,
250                    ) {
251                        return Ok(dt);
252                    }
253                }
254                _ => {
255                    if let Some(dt) = try_pure_numeric(
256                        normalized,
257                        classification.num_digits,
258                        classification.num_non_decimal_digits,
259                        classification.is_decimal,
260                        mode,
261                    ) {
262                        // std::eprintln!("NUMERIC INPUT SUCCESS: {:?}", s);
263                        return Ok(dt);
264                    }
265                }
266            }
267        }
268        if !classification.has_year
269            && let Some(dt) = parse_syslog_no_year(normalized, lang, ref_time)
270        {
271            return Ok(dt);
272        }
273
274        if is_week_date_missing_weekday(&classification) {
275            // std::eprintln!("IS WEEK DATE MISSING WEEKDAY: {:?}", s);
276            if let Some(dt) = parse_week_date_no_weekday(&classification, lang, ref_time) {
277                return Ok(dt);
278            }
279        }
280        if let Some(dt) = try_unambiguous(normalized, &classification) {
281            return Ok(dt);
282        }
283        // std::eprintln!("done trying unambiguous");
284        if let Some(dt) = match date_order {
285            Order::Smart => {
286                let order = smart_detect_date_order(normalized, &classification);
287                let mut result: Option<Dt>;
288
289                match order {
290                    OrderFirst::Day => {
291                        result = try_compatible_formats(
292                            normalized,
293                            generate_ambiguous_day_first_candidates(&classification),
294                        );
295                        // std::eprintln!("done trying day first: {:?}", result);
296
297                        if result.is_none() {
298                            result = try_compatible_formats(
299                                normalized,
300                                generate_ambiguous_month_first_candidates(&classification),
301                            );
302                            // std::eprintln!("done trying month first: {:?}", result);
303                        }
304
305                        if result.is_none() {
306                            result = try_compatible_formats(
307                                normalized,
308                                generate_ambiguous_year_first_candidates(&classification),
309                            );
310                            // std::eprintln!("done trying year first: {:?}", result);
311                        }
312                    }
313                    OrderFirst::Month => {
314                        result = try_compatible_formats(
315                            normalized,
316                            generate_ambiguous_month_first_candidates(&classification),
317                        );
318                        // std::eprintln!("done trying month first: {:?}", result);
319
320                        if result.is_none() {
321                            result = try_compatible_formats(
322                                normalized,
323                                generate_ambiguous_day_first_candidates(&classification),
324                            );
325                            // std::eprintln!("done trying day first: {:?}", result);
326                        }
327
328                        if result.is_none() {
329                            result = try_compatible_formats(
330                                normalized,
331                                generate_ambiguous_year_first_candidates(&classification),
332                            );
333                            // std::eprintln!("done trying year first: {:?}", result);
334                        }
335                    }
336                    OrderFirst::Year => {
337                        result = try_compatible_formats(
338                            normalized,
339                            generate_ambiguous_year_first_candidates(&classification),
340                        );
341                        // std::eprintln!("done trying year first: {:?}", result);
342
343                        if result.is_none() {
344                            result = try_compatible_formats(
345                                normalized,
346                                generate_ambiguous_day_first_candidates(&classification),
347                            );
348                            // std::eprintln!("done trying day first: {:?}", result);
349                        }
350
351                        if result.is_none() {
352                            result = try_compatible_formats(
353                                normalized,
354                                generate_ambiguous_month_first_candidates(&classification),
355                            );
356                            // std::eprintln!("done trying month first: {:?}", result);
357                        }
358                    }
359                }
360
361                result
362            }
363            Order::Year => try_compatible_formats(
364                normalized,
365                generate_ambiguous_year_first_candidates(&classification),
366            ),
367            Order::Day => try_compatible_formats(
368                normalized,
369                generate_ambiguous_day_first_candidates(&classification),
370            ),
371            Order::Month => try_compatible_formats(
372                normalized,
373                generate_ambiguous_month_first_candidates(&classification),
374            ),
375        } {
376            return Ok(dt);
377        }
378        // std::eprintln!("NOW trying numeric timestamp");
379        if classification.is_pure_numeric
380            && mode != Mode::UnixTimestamp
381            && let Some(dt) = parse_pure_numeric_unix_timestamp(
382                normalized,
383                classification.num_non_decimal_digits as usize,
384            )
385        {
386            return Ok(dt);
387        }
388        Err(an_err!(DtErrKind::InvalidInput, "{}", s))
389    }
390
391    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
392    /// but returns attoseconds since the library epoch: 2000-01-01 12:00:00 UTC
393    /// (on the UTC scale).
394    ///
395    /// Returns `Some(attos)` on success (negative for pre-2000 dates) or `None`
396    /// on any parse error.
397    #[inline]
398    pub fn str_to_attos(s: &str, opts: &ParseCfg) -> Option<i128> {
399        Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_attos())
400    }
401
402    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
403    /// but returns milliseconds since the library epoch: 2000-01-01 12:00:00 UTC
404    /// (on the UTC scale).
405    ///
406    /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
407    /// on any parse error.
408    #[inline]
409    pub fn str_to_ms(s: &str, opts: &ParseCfg) -> Option<i128> {
410        Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ms())
411    }
412
413    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
414    /// but returns nanoseconds since the library epoch: 2000-01-01 12:00:00 UTC
415    /// (on the UTC scale).
416    ///
417    /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
418    /// on any parse error.
419    #[inline]
420    pub fn str_to_ns(s: &str, opts: &ParseCfg) -> Option<i128> {
421        Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ns())
422    }
423
424    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
425    /// but returns milliseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
426    ///
427    /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
428    /// on any parse error.
429    #[inline]
430    pub fn str_to_unix_ms(s: &str, opts: &ParseCfg) -> Option<i128> {
431        Dt::from_str_parse(s, opts)
432            .ok()
433            .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ms())
434    }
435
436    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
437    /// but returns nanoseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
438    ///
439    /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
440    /// on any parse error.
441    #[inline]
442    pub fn str_to_unix_ns(s: &str, opts: &ParseCfg) -> Option<i128> {
443        Dt::from_str_parse(s, opts)
444            .ok()
445            .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ns())
446    }
447}
448
449/// Core zero-allocation helper (updated to match the new `&str` signature).
450///
451/// The `fmt` we get from the iterator is still `'static`, but it coerces automatically
452/// to `&str`, so everything continues to work.
453#[inline]
454pub(crate) fn try_compatible_formats<I>(s: &str, formats: I) -> Option<Dt>
455where
456    I: IntoIterator<Item = String>,
457{
458    // let mut dt = None;
459
460    // for fmt in formats.into_iter() {
461    //     eprintln!("TRYING FMT: {}", fmt);
462    //     dt = match Dt::from_str(s, &fmt, true, true, false) {
463    //         Ok(parsed) => Some(parsed),
464    //         Err(e) => {
465    //             eprintln!("  FAILED with: {:?}", e);
466    //             continue;
467    //         }
468    //     };
469    //     if dt.is_some() {
470    //         break;
471    //     }
472    //     // === DEBUG ===
473    //     // eprintln!("Tried format: {:?}", fmt);
474    // }
475
476    // dt
477    formats
478        .into_iter()
479        .find_map(|fmt| Dt::from_str(s, &fmt, true, true, false).ok())
480}
481
482#[inline]
483pub(crate) fn try_unambiguous(s: &str, classification: &DateClassification) -> Option<Dt> {
484    if matches!(classification.bytes_len, 6..=8)
485        && let Some(dt) = parse_yyyy_mm(s.as_bytes())
486    {
487        return Some(dt);
488    }
489    try_compatible_formats(s, generate_unambiguous_candidates(classification))
490}