Skip to main content

deep_time/alloc_parse/
parse_date.rs

1use crate::{
2    ClassifiedDate, DateClassification, Dt, DtErr, DtErrKind, MAX_DATE_STRING_LEN, Mode, Order,
3    OrderFirst, ParseCfg, an_err, classify_date, default_date_parse_options,
4    generate_ambiguous_day_first_candidates, generate_ambiguous_month_first_candidates,
5    generate_ambiguous_year_first_candidates, generate_unambiguous_candidates,
6    is_week_date_missing_weekday, parse_pure_numeric_unix_timestamp, parse_syslog_no_year,
7    parse_week_date_no_weekday, parse_yyyy_mm, smart_detect_date_order, try_pure_numeric,
8};
9use alloc::borrow::Cow;
10use alloc::string::String;
11
12impl Dt {
13    /// Automatically parses datetime [`str`] into a [`Dt`] by guessing and generating the format. Supports the vast
14    /// majority of date formats.
15    ///
16    /// - Requires the `"alloc"` feature.
17    /// - The returned [`Dt`] is internally on the TAI time scale. The `attos` field is an [`i128`] attosecond
18    ///   count since TAI 2000-01-01 noon. See [`Scale`] for more information.
19    ///
20    /// ## Parameters
21    ///
22    /// - `s`: The string to parse. Must be non-empty and no longer than 255 bytes. Empty strings or overly
23    ///   long inputs return an error.
24    /// - `opts`: Optional [`ParseCfg`]. Pass `None` to use the defaults.
25    ///
26    /// ## Configuration Options (via [`ParseCfg`])
27    ///
28    /// | Field          | Default     | Effect |
29    /// |----------------|-------------|--------|
30    /// | `lang`         | `En`        | Language, scroll down to see currently supported languages |
31    /// | `order`        | `Smart`     | How to resolve ambiguous numeric dates like `01/02/03` |
32    /// | `mode`         | `Auto`      | Special handling for purely numeric inputs |
33    /// | `parse`        | `None`      | If provided, these exact `strftime`-style formats are tried **first** (and exclusively if `mode` is `Explicit`) |
34    /// | `relative`     | `true`      | Enable phrases like "tomorrow", "next Friday", "in 3 days" |
35    /// | `ref_time`     | `None`      | Reference time for relative dates and syslog-style "no-year" dates (uses system time if `std` feature is enabled) |
36    /// | `to_lower`     | `true`      | Automatically lowercase the input, set to `false` only if it's already lowercase |
37    ///
38    /// ## Purely Numeric Inputs
39    ///
40    /// When the input consists **only** of digits (and optionally a decimal point),
41    /// the parser uses a fast, mode-aware path before trying any other strategies.
42    /// The exact interpretation depends on the number of digits and the selected `mode`.
43    ///
44    /// | Digits | Example(s)               | `Mode`          | Interpreted as                          | Notes |
45    /// |--------|--------------------------|-----------------|-----------------------------------------|-------|
46    /// | 1–4    | `2024`, `24`, `5`        | `Auto`/`Legacy` | Year (2-digit uses 2000/1900 pivot)    | 1- and 3-digit years only work in `Scientific` |
47    /// | 5      | `24123`, `60400`         | `Legacy`        | Ordinal date (YYDDD)                    | — |
48    /// | 5      | `60400`, `60400.75`      | `Scientific`    | Modified Julian Date (MJD)              | Fractional days supported |
49    /// | 5      | `24123`, `60400.75`      | `Auto`          | Ordinal (non-decimal) or MJD (decimal) | Smart default |
50    /// | 6      | `240315`, `202403`       | `Auto`          | YYYYMM if plausible year, else YYMMDD   | Most common compact form |
51    /// | 6      | `240315`                 | `Legacy`        | YYMMDD preferred                        | — |
52    /// | 6      | `202403`                 | `Scientific`    | YYYYMM preferred                        | — |
53    /// | 7      | `2024123`                | `Legacy`        | Ordinal date (YYYYDDD)                  | — |
54    /// | 7      | `2460123`, `2460123.5`   | `Scientific`    | Julian Day (JD)                         | Fractional days supported |
55    /// | 7      | `2024123`                | `Auto`          | Ordinal (integer) or JD (decimal)       | Smart default |
56    /// | 10–11  | `1735689600`             | any             | Unix seconds                            | — |
57    /// | 12–15  | `1735689600123`          | any             | Unix milliseconds                       | Most common high-precision case |
58    /// | 16–18  | `1735689600123456`       | any             | Unix microseconds                       | — |
59    /// | 19+    | `1735689600123456789`    | any             | Unix nanoseconds                        | Full precision |
60    ///
61    /// **Tip**: Use `Mode::UnixTimestamp` when you know the input is always a Unix timestamp.
62    ///
63    /// ## Ambiguous Numeric Dates
64    ///
65    /// Dates where the components could map to different orders (e.g. `01/02/03`,
66    /// `3-4-5`, `15.03.24`, `2024.03.15`) are resolved via the `order` field:
67    ///
68    /// - **`Order::Smart`** (default) — Applies the fast heuristic described in [`Order::Smart`].
69    ///   It strongly prefers modern/tech conventions (Year-first for compact/ISO-like data)
70    ///   while handling the majority of international and US-style dates.
71    ///
72    /// - **`Order::Year`**, **`Order::Day`**, or **`Order::Month`** force a
73    ///   specific interpretation and bypass the heuristic entirely.
74    ///
75    /// This combination of `Smart` + `Auto` mode gives the best real-world parsing
76    /// success rate for mixed data sources.
77    ///
78    /// ## Other Supported Formats
79    ///
80    /// - **ISO 8601** and variants: `2024-03-15`, `2024-03-15T14:30:00Z`, `2024-03-15T14:30:00+01:00[Europe/Paris]`
81    /// - **Named dates** (in supported languages): `15 March 2024`, `15 mars 2024`, `15. März 2024`, `15 de marzo de 2024`
82    /// - **Week dates**: `2024-W15`, `2024-W15-3`, `2024W153` (missing weekday defaults to Monday)
83    /// - **Syslog-style** (no year): `Mar  5 10:23:45` (year inferred from `ref_time`)
84    /// - **Relative expressions**: `tomorrow`, `next Friday at 09:00`, `in 3 days`, `2 weeks ago`
85    /// - **12-hour time**: `2:30 PM`, `14:30:45.123`
86    /// - **Offsets and timezones**: `+0100`, `-05:30`, `Z`, IANA names in brackets
87    ///
88    /// ## Examples
89    ///
90    /// ```rust
91    /// use deep_time::{Dt, ParseCfg, Order, Mode, Lang};
92    ///
93    /// // Default smart parsing
94    /// let dt = Dt::from_str_parse("2024-03-15 14:30:00", &None).unwrap();
95    ///
96    /// // German named date
97    /// let cfg = ParseCfg { lang: Lang::De, ..Default::default() };
98    /// let dt = Dt::from_str_parse("15. März 2024 um 14:30", &Some(cfg)).unwrap();
99    ///
100    /// // Force month-first
101    /// let cfg = ParseCfg { order: Order::Month, ..Default::default() };
102    /// let dt = Dt::from_str_parse("03/15/2024", &Some(cfg)).unwrap();
103    ///
104    /// // Pure numeric compact form
105    /// let dt = Dt::from_str_parse("20240315", &None).unwrap(); // March 15, 2024
106    ///
107    /// // Unix timestamp (milliseconds)
108    /// let cfg = ParseCfg { mode: Mode::UnixTimestamp, ..Default::default() };
109    /// let dt = Dt::from_str_parse("1735689600123", &Some(cfg)).unwrap();
110    ///
111    /// // Explicit formats only (no fallback)
112    /// let cfg = ParseCfg {
113    ///     parse: Some(vec!["%d/%m/%Y".into(), "%Y-%m-%d".into()]),
114    ///     mode: Mode::Explicit,
115    ///     ..Default::default()
116    /// };
117    /// let dt = Dt::from_str_parse("15/03/2024", &Some(cfg)).unwrap();
118    ///
119    /// // Relative date
120    /// let dt = Dt::from_str_parse("2 days from now", &None).unwrap();
121    /// ```
122    ///
123    /// ## Notes
124    ///
125    /// - The `Smart` + `Auto` combination gives the best real-world success rate for mixed data.
126    /// - All successfully parsed [`Dt`] values are stored with attosecond precision on the internal TAI timescale.
127    /// - For maximum reproducibility in production code, prefer `ParseCfg` with `parse: Some(...)` and `mode: Explicit`.
128    /// - Timezone handling (IANA names and fixed offsets) is fully supported.
129    ///
130    /// See also: [`ParseCfg`], [`Order`], [`Mode`], [`Lang`], [`Dt`],
131    /// [`Dt::str_to_attos`], [`Dt::str_to_ms`], [`Dt::str_to_unix_ms`].
132    ///
133    /// ## Supported Languages:
134    ///
135    /// - En
136    /// - De
137    /// - Es
138    /// - Fr
139    ///
140    pub fn from_str_parse(s: &str, opts: &Option<ParseCfg>) -> Result<Dt, DtErr> {
141        let opts: &ParseCfg = opts
142            .as_ref()
143            .unwrap_or_else(|| default_date_parse_options());
144
145        if s.is_empty() {
146            return Err(an_err!(DtErrKind::Incomplete, "empty"));
147        } else if s.len() > MAX_DATE_STRING_LEN {
148            return Err(an_err!(DtErrKind::InvalidInput, "too long: {}", s));
149        }
150
151        let lang = opts.lang;
152        let ref_time = &opts.ref_time;
153
154        let lowered: Cow<str> = if opts.to_lower {
155            Cow::Owned(s.to_lowercase())
156        } else {
157            Cow::Borrowed(s)
158        };
159
160        let classification = match classify_date(&lowered, lang, ref_time) {
161            Ok(ClassifiedDate::Parsed(time_point)) => return Ok(time_point),
162            Ok(ClassifiedDate::Cls(c)) => c,
163            Err(e) => {
164                // std::eprintln!("{}", e);
165                return Err(an_err!(
166                    DtErrKind::InvalidInput,
167                    "{}",
168                    s => e
169                ));
170            }
171        };
172
173        // let xx = &classification.date;
174        // if xx != trimmed {
175        //     eprintln!("NOT EQUAL: {:?}, {:?}", trimmed, xx);
176        // }
177        // eprintln!("BEFORE & AFTER: {:?}, {:?}", lowered, &classification.date);
178
179        let normalized = &classification.date;
180
181        let (mode, date_order) = if let Some(formats) = &opts.parse {
182            if !formats.is_empty() {
183                for fmt in formats {
184                    if let Ok(value) = Self::from_str(normalized, fmt, true, true, false) {
185                        return Ok(value);
186                    }
187                }
188                // None of the provided formats worked and mode is Explicit
189                if opts.mode == Mode::Explicit {
190                    return Err(an_err!(DtErrKind::InvalidInput, "{}", s));
191                }
192            }
193            (opts.mode, opts.order)
194        } else {
195            (opts.mode, opts.order)
196        };
197
198        // if s == "2006-04-02 02:30-05 America/Indiana/Vevay" {
199        //     std::eprintln!("{:?}", classification);
200        // }
201        // std::eprintln!("{:?}", classification);
202
203        if classification.is_pure_numeric {
204            match mode {
205                Mode::UnixTimestamp => {
206                    if let Some(dt) = parse_pure_numeric_unix_timestamp(
207                        normalized,
208                        classification.num_non_decimal_digits as usize,
209                    ) {
210                        return Ok(dt);
211                    }
212                }
213                _ => {
214                    if let Some(dt) = try_pure_numeric(
215                        normalized,
216                        classification.num_digits,
217                        classification.num_non_decimal_digits,
218                        classification.is_decimal,
219                        mode,
220                    ) {
221                        // std::eprintln!("NUMERIC INPUT SUCCESS: {:?}", s);
222                        return Ok(dt);
223                    }
224                }
225            }
226        }
227        if !classification.has_year
228            && let Some(dt) = parse_syslog_no_year(normalized, lang, ref_time)
229        {
230            return Ok(dt);
231        }
232
233        if is_week_date_missing_weekday(&classification) {
234            // std::eprintln!("IS WEEK DATE MISSING WEEKDAY: {:?}", s);
235            if let Some(dt) = parse_week_date_no_weekday(&classification, lang, ref_time) {
236                return Ok(dt);
237            }
238        }
239        if let Some(dt) = try_unambiguous(normalized, &classification) {
240            return Ok(dt);
241        }
242        // std::eprintln!("done trying unambiguous");
243        if let Some(dt) = match date_order {
244            Order::Smart => {
245                let order = smart_detect_date_order(normalized, &classification);
246                let mut result: Option<Dt>;
247
248                match order {
249                    OrderFirst::Day => {
250                        result = try_compatible_formats(
251                            normalized,
252                            generate_ambiguous_day_first_candidates(&classification),
253                        );
254                        // std::eprintln!("done trying day first: {:?}", result);
255
256                        if result.is_none() {
257                            result = try_compatible_formats(
258                                normalized,
259                                generate_ambiguous_month_first_candidates(&classification),
260                            );
261                            // std::eprintln!("done trying month first: {:?}", result);
262                        }
263
264                        if result.is_none() {
265                            result = try_compatible_formats(
266                                normalized,
267                                generate_ambiguous_year_first_candidates(&classification),
268                            );
269                            // std::eprintln!("done trying year first: {:?}", result);
270                        }
271                    }
272                    OrderFirst::Month => {
273                        result = try_compatible_formats(
274                            normalized,
275                            generate_ambiguous_month_first_candidates(&classification),
276                        );
277                        // std::eprintln!("done trying month first: {:?}", result);
278
279                        if result.is_none() {
280                            result = try_compatible_formats(
281                                normalized,
282                                generate_ambiguous_day_first_candidates(&classification),
283                            );
284                            // std::eprintln!("done trying day first: {:?}", result);
285                        }
286
287                        if result.is_none() {
288                            result = try_compatible_formats(
289                                normalized,
290                                generate_ambiguous_year_first_candidates(&classification),
291                            );
292                            // std::eprintln!("done trying year first: {:?}", result);
293                        }
294                    }
295                    OrderFirst::Year => {
296                        result = try_compatible_formats(
297                            normalized,
298                            generate_ambiguous_year_first_candidates(&classification),
299                        );
300                        // std::eprintln!("done trying year first: {:?}", result);
301
302                        if result.is_none() {
303                            result = try_compatible_formats(
304                                normalized,
305                                generate_ambiguous_day_first_candidates(&classification),
306                            );
307                            // std::eprintln!("done trying day first: {:?}", result);
308                        }
309
310                        if result.is_none() {
311                            result = try_compatible_formats(
312                                normalized,
313                                generate_ambiguous_month_first_candidates(&classification),
314                            );
315                            // std::eprintln!("done trying month first: {:?}", result);
316                        }
317                    }
318                }
319
320                result
321            }
322            Order::Year => try_compatible_formats(
323                normalized,
324                generate_ambiguous_year_first_candidates(&classification),
325            ),
326            Order::Day => try_compatible_formats(
327                normalized,
328                generate_ambiguous_day_first_candidates(&classification),
329            ),
330            Order::Month => try_compatible_formats(
331                normalized,
332                generate_ambiguous_month_first_candidates(&classification),
333            ),
334        } {
335            return Ok(dt);
336        }
337        // std::eprintln!("NOW trying numeric timestamp");
338        if classification.is_pure_numeric
339            && mode != Mode::UnixTimestamp
340            && let Some(dt) = parse_pure_numeric_unix_timestamp(
341                normalized,
342                classification.num_non_decimal_digits as usize,
343            )
344        {
345            return Ok(dt);
346        }
347        Err(an_err!(DtErrKind::InvalidInput, "{}", s))
348    }
349
350    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
351    /// but returns attoseconds since the library epoch: 2000-01-01 12:00:00 UTC
352    /// (on the UTC scale).
353    ///
354    /// Returns `Some(attos)` on success (negative for pre-2000 dates) or `None`
355    /// on any parse error.
356    #[inline]
357    pub fn str_to_attos(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
358        Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_attos())
359    }
360
361    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
362    /// but returns milliseconds since the library epoch: 2000-01-01 12:00:00 UTC
363    /// (on the UTC scale).
364    ///
365    /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
366    /// on any parse error.
367    #[inline]
368    pub fn str_to_ms(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
369        Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ms())
370    }
371
372    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
373    /// but returns nanoseconds since the library epoch: 2000-01-01 12:00:00 UTC
374    /// (on the UTC scale).
375    ///
376    /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
377    /// on any parse error.
378    #[inline]
379    pub fn str_to_ns(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
380        Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ns())
381    }
382
383    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
384    /// but returns milliseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
385    ///
386    /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
387    /// on any parse error.
388    #[inline]
389    pub fn str_to_unix_ms(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
390        Dt::from_str_parse(s, opts)
391            .ok()
392            .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ms())
393    }
394
395    /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
396    /// but returns nanoseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
397    ///
398    /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
399    /// on any parse error.
400    #[inline]
401    pub fn str_to_unix_ns(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
402        Dt::from_str_parse(s, opts)
403            .ok()
404            .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ns())
405    }
406}
407
408/// Core zero-allocation helper (updated to match the new `&str` signature).
409///
410/// The `fmt` we get from the iterator is still `'static`, but it coerces automatically
411/// to `&str`, so everything continues to work.
412#[inline]
413pub(crate) fn try_compatible_formats<I>(s: &str, formats: I) -> Option<Dt>
414where
415    I: IntoIterator<Item = String>,
416{
417    // let mut dt = None;
418
419    // for fmt in formats.into_iter() {
420    //     eprintln!("TRYING FMT: {}", fmt);
421    //     if let Ok(parsed) = Dt::from_str(s, &fmt, true, true, false) {
422    //         dt = Some(parsed);
423    //         break;
424    //     }
425    //     // === DEBUG ===
426    //     // eprintln!("Tried format: {:?}", fmt);
427    // }
428
429    // dt
430    formats
431        .into_iter()
432        .find_map(|fmt| Dt::from_str(s, &fmt, true, true, false).ok())
433}
434
435#[inline]
436pub(crate) fn try_unambiguous(s: &str, classification: &DateClassification) -> Option<Dt> {
437    if matches!(classification.bytes_len, 6..=8)
438        && let Some(dt) = parse_yyyy_mm(s.as_bytes())
439    {
440        return Some(dt);
441    }
442    try_compatible_formats(s, generate_unambiguous_candidates(classification))
443}