deep_time/alloc_parse/parse_date.rs
1use crate::{
2 ClassifiedDate, DateClassification, Dt, DtErr, DtErrKind, Lang, MAX_DATE_STRING_LEN, Mode,
3 Order, OrderFirst, ParseCfg, an_err, classify_date, default_date_parse_options,
4 generate_ambiguous_day_first_candidates, generate_ambiguous_month_first_candidates,
5 generate_ambiguous_year_first_candidates, generate_unambiguous_candidates,
6 is_week_date_missing_weekday, parse_pure_numeric_unix_timestamp, parse_syslog_no_year,
7 parse_week_date_no_weekday, parse_yyyy_mm, smart_detect_date_order, try_pure_numeric,
8};
9use alloc::borrow::Cow;
10use alloc::string::String;
11
12impl Dt {
13 /// Automatically parses datetime [`str`] into a [`Dt`] by guessing and generating the format. Supports the vast
14 /// majority of date formats.
15 ///
16 /// - Requires the `"alloc"` feature.
17 /// - The returned [`Dt`] is internally on the TAI time scale. The `attos` field is an [`i128`] attosecond
18 /// count since TAI 2000-01-01 noon. See [`Scale`] for more information.
19 /// - The returned [`Dt`] is **not** in local time, if a timezone is parsed then it's used to find the offset
20 /// to return non-local instant.
21 ///
22 /// ## Parameters
23 ///
24 /// - `s`: The string to parse. Must be non-empty and no longer than 255 bytes. Empty strings or overly
25 /// long inputs return an error.
26 /// - `opts`: Optional [`ParseCfg`]. Pass `None` to use the defaults.
27 ///
28 /// ## Configuration Options
29 ///
30 /// These are the fields of the configuration options struct [`ParseCfg`], their types and defaults.
31 ///
32 /// See [`ParseCfg`] for more information.
33 ///
34 /// | Field | Type and Default | Effect |
35 /// |----------------|----------------------------------|--------|
36 /// | `lang` | [`Lang::En`] | Language, scroll down to see currently supported languages |
37 /// | `order` | [`Order::Smart`] | How to resolve ambiguous numeric dates like `01/02/03` |
38 /// | `mode` | [`Mode::Auto`] | Special handling for purely numeric inputs |
39 /// | `parse` | [`Option<Vec<String>>`] - `None` | An explicit list of formats to try, if the [`Mode`] is Explicit then only these formats are tried |
40 /// | `relative` | [`bool`] - `true` | Enable phrases like "tomorrow", "in 3 days", limited support for relative dates |
41 /// | `ref_time` | [`Option<Dt>`] - `None` | Reference time for relative dates and syslog-style "no-year" dates |
42 /// | `to_lower` | [`bool`] - `true` | Automatically lowercase the input, **only** set to false if it's already lowercase |
43 ///
44 /// ## Purely Numeric Inputs
45 ///
46 /// When the input consists **only** of digits (and optionally a decimal point),
47 /// the parser uses a fast, mode-aware path before trying any other strategies.
48 /// The exact interpretation depends on the number of digits and the selected `mode`.
49 ///
50 /// | Digits | Example(s) | `Mode` | Interpreted as | Notes |
51 /// |--------|--------------------------|-----------------|-----------------------------------------|-------|
52 /// | 1–4 | `2024`, `24`, `5` | `Auto`/`Legacy` | Year (2-digit uses 2000/1900 pivot) | 1- and 3-digit years only work in `Scientific` |
53 /// | 5 | `24123`, `60400` | `Legacy` | Ordinal date (YYDDD) | — |
54 /// | 5 | `60400`, `60400.75` | `Scientific` | Modified Julian Date (MJD) | Fractional days supported |
55 /// | 5 | `24123`, `60400.75` | `Auto` | Ordinal (non-decimal) or MJD (decimal) | Smart default |
56 /// | 6 | `240315`, `202403` | `Auto` | YYYYMM if plausible year, else YYMMDD | Most common compact form |
57 /// | 6 | `240315` | `Legacy` | YYMMDD preferred | — |
58 /// | 6 | `202403` | `Scientific` | YYYYMM preferred | — |
59 /// | 7 | `2024123` | `Legacy` | Ordinal date (YYYYDDD) | — |
60 /// | 7 | `2460123`, `2460123.5` | `Scientific` | Julian Day (JD) | Fractional days supported |
61 /// | 7 | `2024123` | `Auto` | Ordinal (integer) or JD (decimal) | Smart default |
62 /// | 10–11 | `1735689600` | any | Unix seconds | — |
63 /// | 12–15 | `1735689600123` | any | Unix milliseconds | Most common high-precision case |
64 /// | 16–18 | `1735689600123456` | any | Unix microseconds | — |
65 /// | 19+ | `1735689600123456789` | any | Unix nanoseconds | Full precision |
66 ///
67 /// Use `Mode::UnixTimestamp` when you know the input is always a Unix timestamp.
68 ///
69 /// ## Ambiguous Numeric Dates
70 ///
71 /// Dates where the components could map to different orders (e.g. `01/02/03`,
72 /// `3-4-5`, `15.03.24`, `2024.03.15`) are resolved via the `order` field:
73 ///
74 /// - **`Order::Smart`** (default) — Applies the fast heuristic described in [`Order::Smart`].
75 /// It strongly prefers modern/tech conventions (Year-first for compact/ISO-like data)
76 /// while handling the majority of international and US-style dates.
77 ///
78 /// - **`Order::Year`**, **`Order::Day`**, or **`Order::Month`** force a
79 /// specific interpretation and bypass the heuristic entirely.
80 ///
81 /// ## Supported Formats
82 ///
83 /// The main part of the parser basically works by using aho-corasick with day names, month names, and other things to
84 /// tokenize an input and then automatically generate candidate formats to try on it. Due to this it's difficult to
85 /// say the number of supported formats, but it's probably in the thousands.
86 ///
87 /// Separators generally don't matter, they could be spaces, slashes, whatever.
88 ///
89 /// Generally speaking the date part must come first, and stuff like time components, offsets and iana timezone names
90 /// must come afterwards.
91 ///
92 /// - **ISO 8601** and variants: `2024-03-15`, `2024-03-15T14:30:00Z`, `2024-03-15T14:30:00+01:00[Europe/Paris]`
93 /// - **Named dates** (in supported languages): `15 March 2024`, `15 mars 2024`, `15. März 2024`, `15 de marzo de 2024`
94 /// - **Week dates**: `2024-W15`, `2024-W15-3`, `2024W153` (missing weekday defaults to Monday)
95 /// - **Syslog-style** (no year): `Mar 5 10:23:45` (year inferred from `ref_time`)
96 /// - **Relative expressions**: `tomorrow`, `in 3 days`, `2 weeks ago`
97 /// - **12-hour time**: `2:30 PM`, `14:30:45.123`
98 /// - **Offsets and timezones**: `+0100`, `-05:30`, `Z`, IANA timezone names (with the `jiff-tz feature enabled`)
99 /// - **Library time scales**: `TAI`, `TT`, etc. are detected and parsed, must come after the date part of the input.
100 ///
101 /// Note that relative date support is quite limited and phrases such as `"next friday at 9am"` will not parse.
102 ///
103 /// ## Examples
104 ///
105 /// ```rust
106 /// use deep_time::{Dt, ParseCfg, Order, Mode, Lang};
107 ///
108 /// // Default smart parsing
109 /// let dt = Dt::from_str_parse("2024-03-15 14:30:00", &None).unwrap();
110 ///
111 /// // German named date
112 /// let cfg = ParseCfg { lang: Lang::De, ..Default::default() };
113 /// let dt = Dt::from_str_parse("15. März 2024 um 14:30", &Some(cfg)).unwrap();
114 ///
115 /// // Force month-first
116 /// let cfg = ParseCfg { order: Order::Month, ..Default::default() };
117 /// let dt = Dt::from_str_parse("03/15/2024", &Some(cfg)).unwrap();
118 ///
119 /// // Pure numeric compact form
120 /// let dt = Dt::from_str_parse("20240315", &None).unwrap(); // March 15, 2024
121 ///
122 /// // Unix timestamp (milliseconds)
123 /// let cfg = ParseCfg { mode: Mode::UnixTimestamp, ..Default::default() };
124 /// let dt = Dt::from_str_parse("1735689600123", &Some(cfg)).unwrap();
125 ///
126 /// // Explicit formats only (no fallback)
127 /// let cfg = ParseCfg {
128 /// parse: Some(vec!["%d/%m/%Y".into(), "%Y-%m-%d".into()]),
129 /// mode: Mode::Explicit,
130 /// ..Default::default()
131 /// };
132 /// let dt = Dt::from_str_parse("15/03/2024", &Some(cfg)).unwrap();
133 ///
134 /// // Relative date
135 /// let dt = Dt::from_str_parse("2 days from now", &None).unwrap();
136 /// ```
137 ///
138 /// ## Notes
139 ///
140 /// - The `Smart` + `Auto` combination gives the best real-world success rate for mixed data.
141 /// - All successfully parsed [`Dt`] values are stored with attosecond precision on the internal
142 /// TAI timescale.
143 /// - Timezone handling (IANA names and fixed offsets) is fully supported when the `jiff-tz` feature
144 /// is enabled.
145 ///
146 /// ## Supported Languages:
147 ///
148 /// Language support here basically means supporting abbreviated and full day and month names.
149 /// Non-Ascii types of numeric characters are also supported such as full width digits.
150 ///
151 /// Some day/month names in non-English languages are not supported due to clashes, any such missing
152 /// support is noted below.
153 ///
154 /// - En
155 /// - De
156 /// - Won't parse "t" as short form for day.
157 /// - Es
158 /// - Won't parse "mar" as tuesday, will instead parse as march.
159 /// - Fr
160 /// - Won't parse "mar" as tuesday, will instead parse as march.
161 ///
162 /// ## See also
163 ///
164 /// - [`ParseCfg`]
165 /// - [`Order`]
166 /// - [`Mode`]
167 /// - [`Lang`]
168 /// - [`Dt`]
169 /// - [`Dt::from_str_iso`](../struct.Dt.html#method.from_str_iso)
170 pub fn from_str_parse(s: &str, opts: &Option<ParseCfg>) -> Result<Dt, DtErr> {
171 let opts: &ParseCfg = opts
172 .as_ref()
173 .unwrap_or_else(|| default_date_parse_options());
174
175 if s.is_empty() {
176 return Err(an_err!(DtErrKind::Incomplete, "empty"));
177 } else if s.len() > MAX_DATE_STRING_LEN {
178 return Err(an_err!(DtErrKind::InvalidInput, "too long: {}", s));
179 }
180
181 let lang: Lang = opts.lang;
182 let ref_time = &opts.ref_time;
183
184 let lowered: Cow<str> = if opts.to_lower {
185 Cow::Owned(s.to_lowercase())
186 } else {
187 Cow::Borrowed(s)
188 };
189
190 let classification = match classify_date(&lowered, lang, ref_time) {
191 Ok(ClassifiedDate::Parsed(time_point)) => return Ok(time_point),
192 Ok(ClassifiedDate::Cls(c)) => c,
193 Err(e) => {
194 // std::eprintln!("{}", e);
195 return Err(an_err!(
196 DtErrKind::InvalidInput,
197 "{}",
198 s => e
199 ));
200 }
201 };
202
203 // let xx = &classification.date;
204 // if xx != trimmed {
205 // eprintln!("NOT EQUAL: {:?}, {:?}", trimmed, xx);
206 // }
207 // eprintln!("BEFORE & AFTER: {:?}, {:?}", lowered, &classification.date);
208
209 let normalized = &classification.date;
210
211 let (mode, date_order) = if let Some(formats) = &opts.parse {
212 if !formats.is_empty() {
213 for fmt in formats {
214 if let Ok(value) = Self::from_str(normalized, fmt, true, true, false) {
215 return Ok(value);
216 }
217 }
218 // None of the provided formats worked and mode is Explicit
219 if opts.mode == Mode::Explicit {
220 return Err(an_err!(DtErrKind::InvalidInput, "{}", s));
221 }
222 }
223 (opts.mode, opts.order)
224 } else {
225 (opts.mode, opts.order)
226 };
227
228 // if s == "2006-04-02 02:30-05 America/Indiana/Vevay" {
229 // std::eprintln!("{:?}", classification);
230 // }
231 // std::eprintln!("{:?}", classification);
232
233 if classification.is_pure_numeric {
234 match mode {
235 Mode::UnixTimestamp => {
236 if let Some(dt) = parse_pure_numeric_unix_timestamp(
237 normalized,
238 classification.num_non_decimal_digits as usize,
239 ) {
240 return Ok(dt);
241 }
242 }
243 _ => {
244 if let Some(dt) = try_pure_numeric(
245 normalized,
246 classification.num_digits,
247 classification.num_non_decimal_digits,
248 classification.is_decimal,
249 mode,
250 ) {
251 // std::eprintln!("NUMERIC INPUT SUCCESS: {:?}", s);
252 return Ok(dt);
253 }
254 }
255 }
256 }
257 if !classification.has_year
258 && let Some(dt) = parse_syslog_no_year(normalized, lang, ref_time)
259 {
260 return Ok(dt);
261 }
262
263 if is_week_date_missing_weekday(&classification) {
264 // std::eprintln!("IS WEEK DATE MISSING WEEKDAY: {:?}", s);
265 if let Some(dt) = parse_week_date_no_weekday(&classification, lang, ref_time) {
266 return Ok(dt);
267 }
268 }
269 if let Some(dt) = try_unambiguous(normalized, &classification) {
270 return Ok(dt);
271 }
272 // std::eprintln!("done trying unambiguous");
273 if let Some(dt) = match date_order {
274 Order::Smart => {
275 let order = smart_detect_date_order(normalized, &classification);
276 let mut result: Option<Dt>;
277
278 match order {
279 OrderFirst::Day => {
280 result = try_compatible_formats(
281 normalized,
282 generate_ambiguous_day_first_candidates(&classification),
283 );
284 // std::eprintln!("done trying day first: {:?}", result);
285
286 if result.is_none() {
287 result = try_compatible_formats(
288 normalized,
289 generate_ambiguous_month_first_candidates(&classification),
290 );
291 // std::eprintln!("done trying month first: {:?}", result);
292 }
293
294 if result.is_none() {
295 result = try_compatible_formats(
296 normalized,
297 generate_ambiguous_year_first_candidates(&classification),
298 );
299 // std::eprintln!("done trying year first: {:?}", result);
300 }
301 }
302 OrderFirst::Month => {
303 result = try_compatible_formats(
304 normalized,
305 generate_ambiguous_month_first_candidates(&classification),
306 );
307 // std::eprintln!("done trying month first: {:?}", result);
308
309 if result.is_none() {
310 result = try_compatible_formats(
311 normalized,
312 generate_ambiguous_day_first_candidates(&classification),
313 );
314 // std::eprintln!("done trying day first: {:?}", result);
315 }
316
317 if result.is_none() {
318 result = try_compatible_formats(
319 normalized,
320 generate_ambiguous_year_first_candidates(&classification),
321 );
322 // std::eprintln!("done trying year first: {:?}", result);
323 }
324 }
325 OrderFirst::Year => {
326 result = try_compatible_formats(
327 normalized,
328 generate_ambiguous_year_first_candidates(&classification),
329 );
330 // std::eprintln!("done trying year first: {:?}", result);
331
332 if result.is_none() {
333 result = try_compatible_formats(
334 normalized,
335 generate_ambiguous_day_first_candidates(&classification),
336 );
337 // std::eprintln!("done trying day first: {:?}", result);
338 }
339
340 if result.is_none() {
341 result = try_compatible_formats(
342 normalized,
343 generate_ambiguous_month_first_candidates(&classification),
344 );
345 // std::eprintln!("done trying month first: {:?}", result);
346 }
347 }
348 }
349
350 result
351 }
352 Order::Year => try_compatible_formats(
353 normalized,
354 generate_ambiguous_year_first_candidates(&classification),
355 ),
356 Order::Day => try_compatible_formats(
357 normalized,
358 generate_ambiguous_day_first_candidates(&classification),
359 ),
360 Order::Month => try_compatible_formats(
361 normalized,
362 generate_ambiguous_month_first_candidates(&classification),
363 ),
364 } {
365 return Ok(dt);
366 }
367 // std::eprintln!("NOW trying numeric timestamp");
368 if classification.is_pure_numeric
369 && mode != Mode::UnixTimestamp
370 && let Some(dt) = parse_pure_numeric_unix_timestamp(
371 normalized,
372 classification.num_non_decimal_digits as usize,
373 )
374 {
375 return Ok(dt);
376 }
377 Err(an_err!(DtErrKind::InvalidInput, "{}", s))
378 }
379
380 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
381 /// but returns attoseconds since the library epoch: 2000-01-01 12:00:00 UTC
382 /// (on the UTC scale).
383 ///
384 /// Returns `Some(attos)` on success (negative for pre-2000 dates) or `None`
385 /// on any parse error.
386 #[inline]
387 pub fn str_to_attos(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
388 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_attos())
389 }
390
391 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
392 /// but returns milliseconds since the library epoch: 2000-01-01 12:00:00 UTC
393 /// (on the UTC scale).
394 ///
395 /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
396 /// on any parse error.
397 #[inline]
398 pub fn str_to_ms(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
399 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ms())
400 }
401
402 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
403 /// but returns nanoseconds since the library epoch: 2000-01-01 12:00:00 UTC
404 /// (on the UTC scale).
405 ///
406 /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
407 /// on any parse error.
408 #[inline]
409 pub fn str_to_ns(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
410 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ns())
411 }
412
413 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
414 /// but returns milliseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
415 ///
416 /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
417 /// on any parse error.
418 #[inline]
419 pub fn str_to_unix_ms(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
420 Dt::from_str_parse(s, opts)
421 .ok()
422 .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ms())
423 }
424
425 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
426 /// but returns nanoseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
427 ///
428 /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
429 /// on any parse error.
430 #[inline]
431 pub fn str_to_unix_ns(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
432 Dt::from_str_parse(s, opts)
433 .ok()
434 .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ns())
435 }
436}
437
438/// Core zero-allocation helper (updated to match the new `&str` signature).
439///
440/// The `fmt` we get from the iterator is still `'static`, but it coerces automatically
441/// to `&str`, so everything continues to work.
442#[inline]
443pub(crate) fn try_compatible_formats<I>(s: &str, formats: I) -> Option<Dt>
444where
445 I: IntoIterator<Item = String>,
446{
447 // let mut dt = None;
448
449 // for fmt in formats.into_iter() {
450 // eprintln!("TRYING FMT: {}", fmt);
451 // if let Ok(parsed) = Dt::from_str(s, &fmt, true, true, false) {
452 // dt = Some(parsed);
453 // break;
454 // }
455 // // === DEBUG ===
456 // // eprintln!("Tried format: {:?}", fmt);
457 // }
458
459 // dt
460 formats
461 .into_iter()
462 .find_map(|fmt| Dt::from_str(s, &fmt, true, true, false).ok())
463}
464
465#[inline]
466pub(crate) fn try_unambiguous(s: &str, classification: &DateClassification) -> Option<Dt> {
467 if matches!(classification.bytes_len, 6..=8)
468 && let Some(dt) = parse_yyyy_mm(s.as_bytes())
469 {
470 return Some(dt);
471 }
472 try_compatible_formats(s, generate_unambiguous_candidates(classification))
473}