deep_time/alloc_parse/parse_date.rs
1use crate::{
2 ClassifiedDate, DateClassification, Dt, DtErr, DtErrKind, Lang, Mode, Order, OrderFirst,
3 ParseCfg, STRTIME_SIZE, an_err, classify_date,
4 generate_ambiguous_day_first_candidates, generate_ambiguous_month_first_candidates,
5 generate_ambiguous_year_first_candidates, generate_unambiguous_candidates,
6 is_week_date_missing_weekday, parse_pure_numeric_unix_timestamp, parse_syslog_no_year,
7 parse_week_date_no_weekday, parse_yyyy_mm, smart_detect_date_order, try_pure_numeric,
8};
9use alloc::borrow::Cow;
10use alloc::string::String;
11
12impl Dt {
13 /// Automatically parses datetime [`str`] into a [`Dt`] by guessing and generating the format. Supports the vast
14 /// majority of date formats.
15 ///
16 /// - Requires the `"parse"` feature (which enables `alloc`).
17 /// - The returned [`Dt`] is internally on the TAI time scale. The `attos` field is an [`i128`] attosecond
18 /// count since TAI 2000-01-01 noon. See [`Scale`] for more information.
19 /// - The returned [`Dt`] is **not** in local time, if a timezone is parsed then it's used to find the offset
20 /// to return non-local instant.
21 ///
22 /// ## Parameters
23 ///
24 /// - `s`: The string to parse. Must be non-empty and no longer than 255 bytes. Empty strings or overly
25 /// long inputs return an error.
26 /// - `opts`: The [`ParseCfg`] to use. Pass `&ParseCfg::DEFAULT` (or `&ParseCfg::default()`)
27 /// to use the standard smart defaults. You can create a `ParseCfg` once and pass `&cfg`
28 /// on every call for consistent behavior and to avoid repeated construction.
29 ///
30 /// ## Configuration Options
31 ///
32 /// These are the fields of the configuration options struct [`ParseCfg`], their types and defaults.
33 ///
34 /// See [`ParseCfg`] for more information.
35 ///
36 /// | Field | Type and Default | Effect |
37 /// |----------------|----------------------------------|--------|
38 /// | `lang` | [`Lang::En`] | Language, scroll down to see currently supported languages |
39 /// | `order` | [`Order::Smart`] | How to resolve ambiguous numeric dates like `01/02/03` |
40 /// | `mode` | [`Mode::Auto`] | Special handling for purely numeric inputs |
41 /// | `parse` | [`Option<Vec<String>>`] - `None` | An explicit list of formats to try, if the [`Mode`] is Explicit then only these formats are tried |
42 /// | `relative` | [`bool`] - `true` | Enable phrases like "tomorrow", "in 3 days" |
43 /// | `ref_time` | [`Option<Dt>`] - `None` | Reference time for relative dates and syslog-style "no-year" dates |
44 /// | `to_lower` | [`bool`] - `true` | Automatically lowercase the input, **only** set to false if it's already lowercase |
45 ///
46 /// ## Purely Numeric Inputs
47 ///
48 /// When the input consists **only** of digits (and optionally a decimal point),
49 /// the parser uses a fast, mode-aware path before trying any other strategies.
50 /// The exact interpretation depends on the number of digits and the selected `mode`.
51 ///
52 /// | Digits | Example(s) | `Mode` | Interpreted as | Notes |
53 /// |--------|--------------------------|-----------------|-----------------------------------------|-------|
54 /// | 1–4 | `2024`, `24`, `5` | `Auto`/`Legacy` | Year (2-digit uses 2000/1900 pivot) | 1- and 3-digit years only work in `Scientific` |
55 /// | 5 | `24123`, `60400` | `Legacy` | Ordinal date (YYDDD) | — |
56 /// | 5 | `60400`, `60400.75` | `Scientific` | Modified Julian Date (MJD) | Fractional days supported |
57 /// | 5 | `24123`, `60400.75` | `Auto` | Ordinal (non-decimal) or MJD (decimal) | Smart default |
58 /// | 6 | `240315`, `202403` | `Auto` | YYYYMM if plausible year, else YYMMDD | Most common compact form |
59 /// | 6 | `240315` | `Legacy` | YYMMDD preferred | — |
60 /// | 6 | `202403` | `Scientific` | YYYYMM preferred | — |
61 /// | 7 | `2024123` | `Legacy` | Ordinal date (YYYYDDD) | — |
62 /// | 7 | `2460123`, `2460123.5` | `Scientific` | Julian Day (JD) | Fractional days supported |
63 /// | 7 | `2024123` | `Auto` | Ordinal (integer) or JD (decimal) | Smart default |
64 /// | 10–11 | `1735689600` | any | Unix seconds | — |
65 /// | 12–15 | `1735689600123` | any | Unix milliseconds | Most common high-precision case |
66 /// | 16–18 | `1735689600123456` | any | Unix microseconds | — |
67 /// | 19+ | `1735689600123456789` | any | Unix nanoseconds | Full precision |
68 ///
69 /// Use `Mode::UnixTimestamp` when you know the input is always a Unix timestamp.
70 ///
71 /// ## Ambiguous Numeric Dates
72 ///
73 /// Dates where the components could map to different orders (e.g. `01/02/03`,
74 /// `3-4-5`, `15.03.24`, `2024.03.15`) are resolved via the `order` field:
75 ///
76 /// - **`Order::Smart`** (default) — Applies the fast heuristic described in [`Order::Smart`].
77 /// It strongly prefers modern/tech conventions (Year-first for compact/ISO-like data)
78 /// while handling the majority of international and US-style dates.
79 ///
80 /// - **`Order::Year`**, **`Order::Day`**, or **`Order::Month`** force a
81 /// specific interpretation and bypass the heuristic entirely.
82 ///
83 /// ## Supported Formats
84 ///
85 /// The parser tokenizes known words (month/day names, relative phrases, timezones, etc.), generates candidate
86 /// formats from the token pattern, and tries them until one matches. Thousands of layouts are supported.
87 ///
88 /// Separators generally don't matter, they could be spaces, slashes, or hyphens, but **not colons** - colons are
89 /// reserved for the time connector, times, and offsets.
90 ///
91 /// Generally speaking the date part must come first, and stuff like time components, offsets and iana timezone names
92 /// must come afterwards.
93 ///
94 /// - **ISO 8601** and variants: `2024-03-15`, `2024-03-15T14:30:00Z`, `2024-03-15T14:30:00+01:00[Europe/Paris]`
95 /// - **Named dates** (in supported languages): `15 March 2024`, `15 mars 2024`, `15. März 2024`, `15 de marzo de 2024`
96 /// - **Week dates**: `2024-W15`, `2024-W15-3`, `2024W153` (missing weekday defaults to Monday)
97 /// - **Syslog-style** (no year): `Mar 5 10:23:45` (year inferred from `ref_time`)
98 /// - **Relative expressions**: `tomorrow`, `in 3 days`, `2 weeks ago`
99 /// - **12-hour time**: `2:30 PM`, `14:30:45.123`
100 /// - **Offsets and timezones**: `+0100`, `-05:30`, `Z`, IANA timezone names (with the `jiff-tz` feature enabled)
101 /// - **Library time scales**: `TAI`, `TT`, etc. are detected and parsed, must come after the date part of the input
102 ///
103 /// Relative dates are also automatically supported, except for bare numbers with no colons like `0900`, as these
104 /// are differently interpreted.
105 ///
106 /// ## Examples
107 ///
108 /// ```rust
109 /// use deep_time::{Dt, ParseCfg, Order, Mode, Scale};
110 ///
111 /// // Default smart parsing
112 /// let dt = Dt::from_str_parse("2024-03-15 14:30:00", &ParseCfg::DEFAULT).unwrap();
113 ///
114 /// // German named date (requires the `de` feature)
115 /// # #[cfg(feature = "de")]
116 /// # {
117 /// # use deep_time::Lang;
118 /// let cfg = ParseCfg { lang: Lang::De, ..Default::default() };
119 /// let dt = Dt::from_str_parse("15. März 2024 um 14:30", &cfg).unwrap();
120 /// # }
121 ///
122 /// // Pure numeric compact form
123 /// let dt = Dt::from_str_parse("20240315", &ParseCfg::DEFAULT).unwrap(); // March 15, 2024
124 ///
125 /// // Unix timestamp (milliseconds)
126 /// let cfg = ParseCfg { mode: Mode::UnixTimestamp, ..Default::default() };
127 /// let dt = Dt::from_str_parse("1735689600123", &cfg).unwrap();
128 ///
129 /// // Explicit formats only (no fallback)
130 /// let cfg = ParseCfg {
131 /// parse: Some(vec!["%d/%m/%Y".into(), "%Y-%m-%d".into()]),
132 /// mode: Mode::Explicit,
133 /// ..Default::default()
134 /// };
135 /// let dt = Dt::from_str_parse("15/03/2024", &cfg).unwrap();
136 ///
137 /// // Relative dates — build config once, borrow repeatedly
138 /// let ref_time = Dt::from_ymd(2026, 6, 16, Scale::UTC, 12, 0, 0, 0);
139 /// let cfg = ParseCfg {
140 /// ref_time: Some(ref_time),
141 /// ..Default::default()
142 /// };
143 /// let dt = Dt::from_str_parse("next Monday at 14:00", &cfg).unwrap();
144 ///
145 /// assert_eq!(dt, Dt::from_ymd(2026, 6, 22, Scale::UTC, 14, 0, 0, 0));
146 /// ```
147 ///
148 /// ## Notes
149 ///
150 /// - The `Smart` + `Auto` combination gives the best real-world success rate for mixed data.
151 /// - Relative expressions and syslog-style no-year dates need a reference time. If `ref_time` is `None`
152 /// and the `std` feature is enabled, system time is used; without `std`, set `ref_time` explicitly or
153 /// parsing will fail.
154 /// - All successfully parsed [`Dt`] values are stored with attosecond precision on the internal
155 /// TAI timescale.
156 /// - Timezone handling (IANA names and fixed offsets) is fully supported when the `jiff-tz` feature
157 /// is enabled.
158 ///
159 /// ## Supported Languages:
160 ///
161 /// Language support here basically means supporting abbreviated and full day and month names.
162 /// Non-Ascii types of numeric characters are also supported such as full width digits.
163 ///
164 /// Some day/month names in non-English languages are not supported due to clashes, any such missing
165 /// support is noted below.
166 ///
167 /// - En
168 /// - De
169 /// - Won't parse "t" as short form for day.
170 /// - Es
171 /// - English word "ago" won't be detected as relative date word.
172 /// - Won't parse "mar" as tuesday, will instead parse as march.
173 /// - Fr
174 /// - Won't parse "mar" as tuesday, will instead parse as march.
175 ///
176 /// ## See also
177 ///
178 /// - [`ParseCfg`]
179 /// - [`Order`]
180 /// - [`Mode`]
181 /// - [`Lang`]
182 /// - [`Dt`]
183 /// - [`Dt::from_str_iso`](../struct.Dt.html#method.from_str_iso)
184 pub fn from_str_parse(s: &str, opts: &ParseCfg) -> Result<Dt, DtErr> {
185
186 if s.is_empty() {
187 return Err(an_err!(DtErrKind::Incomplete, "empty"));
188 } else if s.len() > STRTIME_SIZE {
189 return Err(an_err!(DtErrKind::InvalidInput, "too long: {}", s));
190 }
191
192 let lang: Lang = opts.lang;
193 let ref_time = &opts.ref_time;
194
195 let lowered: Cow<str> = if opts.to_lower {
196 Cow::Owned(s.to_lowercase())
197 } else {
198 Cow::Borrowed(s)
199 };
200
201 let classification = match classify_date(&lowered, lang, ref_time) {
202 Ok(ClassifiedDate::Parsed(time_point)) => return Ok(time_point),
203 Ok(ClassifiedDate::Cls(c)) => c,
204 Err(e) => {
205 // std::eprintln!("{}", e);
206 return Err(an_err!(
207 DtErrKind::InvalidInput,
208 "{}",
209 s => e
210 ));
211 }
212 };
213
214 // let xx = &classification.date;
215 // if xx != trimmed {
216 // eprintln!("NOT EQUAL: {:?}, {:?}", trimmed, xx);
217 // }
218 // eprintln!("BEFORE & AFTER: {:?}, {:?}", lowered, &classification.date);
219
220 let normalized = &classification.date;
221
222 let (mode, date_order) = if let Some(formats) = &opts.parse {
223 if !formats.is_empty() {
224 for fmt in formats {
225 if let Ok(value) = Self::from_str(normalized, fmt, true, true, false) {
226 return Ok(value);
227 }
228 }
229 // None of the provided formats worked and mode is Explicit
230 if opts.mode == Mode::Explicit {
231 return Err(an_err!(DtErrKind::InvalidInput, "{}", s));
232 }
233 }
234 (opts.mode, opts.order)
235 } else {
236 (opts.mode, opts.order)
237 };
238
239 // if s == "on the 5th of april 2024 at 00:00am" {
240 // std::eprintln!("{:?}", classification);
241 // }
242 // std::eprintln!("{:?}", classification);
243
244 if classification.is_pure_numeric {
245 match mode {
246 Mode::UnixTimestamp => {
247 if let Some(dt) = parse_pure_numeric_unix_timestamp(
248 normalized,
249 classification.num_non_decimal_digits as usize,
250 ) {
251 return Ok(dt);
252 }
253 }
254 _ => {
255 if let Some(dt) = try_pure_numeric(
256 normalized,
257 classification.num_digits,
258 classification.num_non_decimal_digits,
259 classification.is_decimal,
260 mode,
261 ) {
262 // std::eprintln!("NUMERIC INPUT SUCCESS: {:?}", s);
263 return Ok(dt);
264 }
265 }
266 }
267 }
268 if !classification.has_year
269 && let Some(dt) = parse_syslog_no_year(normalized, lang, ref_time)
270 {
271 return Ok(dt);
272 }
273
274 if is_week_date_missing_weekday(&classification) {
275 // std::eprintln!("IS WEEK DATE MISSING WEEKDAY: {:?}", s);
276 if let Some(dt) = parse_week_date_no_weekday(&classification, lang, ref_time) {
277 return Ok(dt);
278 }
279 }
280 if let Some(dt) = try_unambiguous(normalized, &classification) {
281 return Ok(dt);
282 }
283 // std::eprintln!("done trying unambiguous");
284 if let Some(dt) = match date_order {
285 Order::Smart => {
286 let order = smart_detect_date_order(normalized, &classification);
287 let mut result: Option<Dt>;
288
289 match order {
290 OrderFirst::Day => {
291 result = try_compatible_formats(
292 normalized,
293 generate_ambiguous_day_first_candidates(&classification),
294 );
295 // std::eprintln!("done trying day first: {:?}", result);
296
297 if result.is_none() {
298 result = try_compatible_formats(
299 normalized,
300 generate_ambiguous_month_first_candidates(&classification),
301 );
302 // std::eprintln!("done trying month first: {:?}", result);
303 }
304
305 if result.is_none() {
306 result = try_compatible_formats(
307 normalized,
308 generate_ambiguous_year_first_candidates(&classification),
309 );
310 // std::eprintln!("done trying year first: {:?}", result);
311 }
312 }
313 OrderFirst::Month => {
314 result = try_compatible_formats(
315 normalized,
316 generate_ambiguous_month_first_candidates(&classification),
317 );
318 // std::eprintln!("done trying month first: {:?}", result);
319
320 if result.is_none() {
321 result = try_compatible_formats(
322 normalized,
323 generate_ambiguous_day_first_candidates(&classification),
324 );
325 // std::eprintln!("done trying day first: {:?}", result);
326 }
327
328 if result.is_none() {
329 result = try_compatible_formats(
330 normalized,
331 generate_ambiguous_year_first_candidates(&classification),
332 );
333 // std::eprintln!("done trying year first: {:?}", result);
334 }
335 }
336 OrderFirst::Year => {
337 result = try_compatible_formats(
338 normalized,
339 generate_ambiguous_year_first_candidates(&classification),
340 );
341 // std::eprintln!("done trying year first: {:?}", result);
342
343 if result.is_none() {
344 result = try_compatible_formats(
345 normalized,
346 generate_ambiguous_day_first_candidates(&classification),
347 );
348 // std::eprintln!("done trying day first: {:?}", result);
349 }
350
351 if result.is_none() {
352 result = try_compatible_formats(
353 normalized,
354 generate_ambiguous_month_first_candidates(&classification),
355 );
356 // std::eprintln!("done trying month first: {:?}", result);
357 }
358 }
359 }
360
361 result
362 }
363 Order::Year => try_compatible_formats(
364 normalized,
365 generate_ambiguous_year_first_candidates(&classification),
366 ),
367 Order::Day => try_compatible_formats(
368 normalized,
369 generate_ambiguous_day_first_candidates(&classification),
370 ),
371 Order::Month => try_compatible_formats(
372 normalized,
373 generate_ambiguous_month_first_candidates(&classification),
374 ),
375 } {
376 return Ok(dt);
377 }
378 // std::eprintln!("NOW trying numeric timestamp");
379 if classification.is_pure_numeric
380 && mode != Mode::UnixTimestamp
381 && let Some(dt) = parse_pure_numeric_unix_timestamp(
382 normalized,
383 classification.num_non_decimal_digits as usize,
384 )
385 {
386 return Ok(dt);
387 }
388 Err(an_err!(DtErrKind::InvalidInput, "{}", s))
389 }
390
391 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
392 /// but returns attoseconds since the library epoch: 2000-01-01 12:00:00 UTC
393 /// (on the UTC scale).
394 ///
395 /// Returns `Some(attos)` on success (negative for pre-2000 dates) or `None`
396 /// on any parse error.
397 #[inline]
398 pub fn str_to_attos(s: &str, opts: &ParseCfg) -> Option<i128> {
399 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_attos())
400 }
401
402 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
403 /// but returns milliseconds since the library epoch: 2000-01-01 12:00:00 UTC
404 /// (on the UTC scale).
405 ///
406 /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
407 /// on any parse error.
408 #[inline]
409 pub fn str_to_ms(s: &str, opts: &ParseCfg) -> Option<i128> {
410 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ms())
411 }
412
413 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
414 /// but returns nanoseconds since the library epoch: 2000-01-01 12:00:00 UTC
415 /// (on the UTC scale).
416 ///
417 /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
418 /// on any parse error.
419 #[inline]
420 pub fn str_to_ns(s: &str, opts: &ParseCfg) -> Option<i128> {
421 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ns())
422 }
423
424 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
425 /// but returns milliseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
426 ///
427 /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
428 /// on any parse error.
429 #[inline]
430 pub fn str_to_unix_ms(s: &str, opts: &ParseCfg) -> Option<i128> {
431 Dt::from_str_parse(s, opts)
432 .ok()
433 .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ms())
434 }
435
436 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
437 /// but returns nanoseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
438 ///
439 /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
440 /// on any parse error.
441 #[inline]
442 pub fn str_to_unix_ns(s: &str, opts: &ParseCfg) -> Option<i128> {
443 Dt::from_str_parse(s, opts)
444 .ok()
445 .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ns())
446 }
447}
448
449/// Core zero-allocation helper (updated to match the new `&str` signature).
450///
451/// The `fmt` we get from the iterator is still `'static`, but it coerces automatically
452/// to `&str`, so everything continues to work.
453#[inline]
454pub(crate) fn try_compatible_formats<I>(s: &str, formats: I) -> Option<Dt>
455where
456 I: IntoIterator<Item = String>,
457{
458 // let mut dt = None;
459
460 // for fmt in formats.into_iter() {
461 // eprintln!("TRYING FMT: {}", fmt);
462 // dt = match Dt::from_str(s, &fmt, true, true, false) {
463 // Ok(parsed) => Some(parsed),
464 // Err(e) => {
465 // eprintln!(" FAILED with: {:?}", e);
466 // continue;
467 // }
468 // };
469 // if dt.is_some() {
470 // break;
471 // }
472 // // === DEBUG ===
473 // // eprintln!("Tried format: {:?}", fmt);
474 // }
475
476 // dt
477 formats
478 .into_iter()
479 .find_map(|fmt| Dt::from_str(s, &fmt, true, true, false).ok())
480}
481
482#[inline]
483pub(crate) fn try_unambiguous(s: &str, classification: &DateClassification) -> Option<Dt> {
484 if matches!(classification.bytes_len, 6..=8)
485 && let Some(dt) = parse_yyyy_mm(s.as_bytes())
486 {
487 return Some(dt);
488 }
489 try_compatible_formats(s, generate_unambiguous_candidates(classification))
490}