deep_time/alloc_parse/parse_date.rs
1use crate::{
2 ClassifiedDate, DateClassification, Dt, DtErr, DtErrKind, MAX_DATE_STRING_LEN, Mode, Order,
3 OrderFirst, ParseCfg, an_err, classify_date, default_date_parse_options,
4 generate_ambiguous_day_first_candidates, generate_ambiguous_month_first_candidates,
5 generate_ambiguous_year_first_candidates, generate_unambiguous_candidates,
6 is_week_date_missing_weekday, parse_pure_numeric_unix_timestamp, parse_syslog_no_year,
7 parse_week_date_no_weekday, parse_yyyy_mm, smart_detect_date_order, try_pure_numeric,
8};
9use alloc::borrow::Cow;
10use alloc::string::String;
11
12impl Dt {
13 /// Automatically parses datetime [`str`] into a [`Dt`] by guessing and generating the format. Supports the vast
14 /// majority of date formats.
15 ///
16 /// - Requires the `"alloc"` feature.
17 /// - The returned [`Dt`] is internally on the TAI time scale. The `attos` field is an [`i128`] attosecond
18 /// count since TAI 2000-01-01 noon. See [`Scale`] for more information.
19 ///
20 /// ## Parameters
21 ///
22 /// - `s`: The string to parse. Must be non-empty and no longer than 255 bytes. Empty strings or overly
23 /// long inputs return an error.
24 /// - `opts`: Optional [`ParseCfg`]. Pass `None` to use the defaults.
25 ///
26 /// ## Configuration Options (via [`ParseCfg`])
27 ///
28 /// | Field | Default | Effect |
29 /// |----------------|-------------|--------|
30 /// | `lang` | `En` | Language, scroll down to see currently supported languages |
31 /// | `order` | `Smart` | How to resolve ambiguous numeric dates like `01/02/03` |
32 /// | `mode` | `Auto` | Special handling for purely numeric inputs |
33 /// | `parse` | `None` | If provided, these exact `strftime`-style formats are tried **first** (and exclusively if `mode` is `Explicit`) |
34 /// | `relative` | `true` | Enable phrases like "tomorrow", "next Friday", "in 3 days" |
35 /// | `ref_time` | `None` | Reference time for relative dates and syslog-style "no-year" dates (uses system time if `std` feature is enabled) |
36 /// | `to_lower` | `true` | Automatically lowercase the input, set to `false` only if it's already lowercase |
37 ///
38 /// ## Purely Numeric Inputs
39 ///
40 /// When the input consists **only** of digits (and optionally a decimal point),
41 /// the parser uses a fast, mode-aware path before trying any other strategies.
42 /// The exact interpretation depends on the number of digits and the selected `mode`.
43 ///
44 /// | Digits | Example(s) | `Mode` | Interpreted as | Notes |
45 /// |--------|--------------------------|-----------------|-----------------------------------------|-------|
46 /// | 1–4 | `2024`, `24`, `5` | `Auto`/`Legacy` | Year (2-digit uses 2000/1900 pivot) | 1- and 3-digit years only work in `Scientific` |
47 /// | 5 | `24123`, `60400` | `Legacy` | Ordinal date (YYDDD) | — |
48 /// | 5 | `60400`, `60400.75` | `Scientific` | Modified Julian Date (MJD) | Fractional days supported |
49 /// | 5 | `24123`, `60400.75` | `Auto` | Ordinal (non-decimal) or MJD (decimal) | Smart default |
50 /// | 6 | `240315`, `202403` | `Auto` | YYYYMM if plausible year, else YYMMDD | Most common compact form |
51 /// | 6 | `240315` | `Legacy` | YYMMDD preferred | — |
52 /// | 6 | `202403` | `Scientific` | YYYYMM preferred | — |
53 /// | 7 | `2024123` | `Legacy` | Ordinal date (YYYYDDD) | — |
54 /// | 7 | `2460123`, `2460123.5` | `Scientific` | Julian Day (JD) | Fractional days supported |
55 /// | 7 | `2024123` | `Auto` | Ordinal (integer) or JD (decimal) | Smart default |
56 /// | 10–11 | `1735689600` | any | Unix seconds | — |
57 /// | 12–15 | `1735689600123` | any | Unix milliseconds | Most common high-precision case |
58 /// | 16–18 | `1735689600123456` | any | Unix microseconds | — |
59 /// | 19+ | `1735689600123456789` | any | Unix nanoseconds | Full precision |
60 ///
61 /// **Tip**: Use `Mode::UnixTimestamp` when you know the input is always a Unix timestamp.
62 ///
63 /// ## Ambiguous Numeric Dates
64 ///
65 /// Dates where the components could map to different orders (e.g. `01/02/03`,
66 /// `3-4-5`, `15.03.24`, `2024.03.15`) are resolved via the `order` field:
67 ///
68 /// - **`Order::Smart`** (default) — Applies the fast heuristic described in [`Order::Smart`].
69 /// It strongly prefers modern/tech conventions (Year-first for compact/ISO-like data)
70 /// while handling the majority of international and US-style dates.
71 ///
72 /// - **`Order::Year`**, **`Order::Day`**, or **`Order::Month`** force a
73 /// specific interpretation and bypass the heuristic entirely.
74 ///
75 /// This combination of `Smart` + `Auto` mode gives the best real-world parsing
76 /// success rate for mixed data sources.
77 ///
78 /// ## Other Supported Formats
79 ///
80 /// - **ISO 8601** and variants: `2024-03-15`, `2024-03-15T14:30:00Z`, `2024-03-15T14:30:00+01:00[Europe/Paris]`
81 /// - **Named dates** (in supported languages): `15 March 2024`, `15 mars 2024`, `15. März 2024`, `15 de marzo de 2024`
82 /// - **Week dates**: `2024-W15`, `2024-W15-3`, `2024W153` (missing weekday defaults to Monday)
83 /// - **Syslog-style** (no year): `Mar 5 10:23:45` (year inferred from `ref_time`)
84 /// - **Relative expressions**: `tomorrow`, `next Friday at 09:00`, `in 3 days`, `2 weeks ago`
85 /// - **12-hour time**: `2:30 PM`, `14:30:45.123`
86 /// - **Offsets and timezones**: `+0100`, `-05:30`, `Z`, IANA names in brackets
87 ///
88 /// ## Examples
89 ///
90 /// ```rust
91 /// use deep_time::{Dt, ParseCfg, Order, Mode, Lang};
92 ///
93 /// // Default smart parsing
94 /// let dt = Dt::from_str_parse("2024-03-15 14:30:00", &None).unwrap();
95 ///
96 /// // German named date
97 /// let cfg = ParseCfg { lang: Lang::De, ..Default::default() };
98 /// let dt = Dt::from_str_parse("15. März 2024 um 14:30", &Some(cfg)).unwrap();
99 ///
100 /// // Force month-first
101 /// let cfg = ParseCfg { order: Order::Month, ..Default::default() };
102 /// let dt = Dt::from_str_parse("03/15/2024", &Some(cfg)).unwrap();
103 ///
104 /// // Pure numeric compact form
105 /// let dt = Dt::from_str_parse("20240315", &None).unwrap(); // March 15, 2024
106 ///
107 /// // Unix timestamp (milliseconds)
108 /// let cfg = ParseCfg { mode: Mode::UnixTimestamp, ..Default::default() };
109 /// let dt = Dt::from_str_parse("1735689600123", &Some(cfg)).unwrap();
110 ///
111 /// // Explicit formats only (no fallback)
112 /// let cfg = ParseCfg {
113 /// parse: Some(vec!["%d/%m/%Y".into(), "%Y-%m-%d".into()]),
114 /// mode: Mode::Explicit,
115 /// ..Default::default()
116 /// };
117 /// let dt = Dt::from_str_parse("15/03/2024", &Some(cfg)).unwrap();
118 ///
119 /// // Relative date
120 /// let dt = Dt::from_str_parse("2 days from now", &None).unwrap();
121 /// ```
122 ///
123 /// ## Notes
124 ///
125 /// - The `Smart` + `Auto` combination gives the best real-world success rate for mixed data.
126 /// - All successfully parsed [`Dt`] values are stored with attosecond precision on the internal TAI timescale.
127 /// - For maximum reproducibility in production code, prefer `ParseCfg` with `parse: Some(...)` and `mode: Explicit`.
128 /// - Timezone handling (IANA names and fixed offsets) is fully supported.
129 ///
130 /// See also: [`ParseCfg`], [`Order`], [`Mode`], [`Lang`], [`Dt`],
131 /// [`Dt::str_to_attos`], [`Dt::str_to_ms`], [`Dt::str_to_unix_ms`].
132 ///
133 /// ## Supported Languages:
134 ///
135 /// - En
136 /// - De
137 /// - Es
138 /// - Fr
139 ///
140 pub fn from_str_parse(s: &str, opts: &Option<ParseCfg>) -> Result<Dt, DtErr> {
141 let opts: &ParseCfg = opts
142 .as_ref()
143 .unwrap_or_else(|| default_date_parse_options());
144
145 if s.is_empty() {
146 return Err(an_err!(DtErrKind::Incomplete, "empty"));
147 } else if s.len() > MAX_DATE_STRING_LEN {
148 return Err(an_err!(DtErrKind::InvalidInput, "too long: {}", s));
149 }
150
151 let lang = opts.lang;
152 let ref_time = &opts.ref_time;
153
154 let lowered: Cow<str> = if opts.to_lower {
155 Cow::Owned(s.to_lowercase())
156 } else {
157 Cow::Borrowed(s)
158 };
159
160 let classification = match classify_date(&lowered, lang, ref_time) {
161 Ok(ClassifiedDate::Parsed(time_point)) => return Ok(time_point),
162 Ok(ClassifiedDate::Cls(c)) => c,
163 Err(e) => {
164 // std::eprintln!("{}", e);
165 return Err(an_err!(
166 DtErrKind::InvalidInput,
167 "{}",
168 s => e
169 ));
170 }
171 };
172
173 // let xx = &classification.date;
174 // if xx != trimmed {
175 // eprintln!("NOT EQUAL: {:?}, {:?}", trimmed, xx);
176 // }
177 // eprintln!("BEFORE & AFTER: {:?}, {:?}", lowered, &classification.date);
178
179 let normalized = &classification.date;
180
181 let (mode, date_order) = if let Some(formats) = &opts.parse {
182 if !formats.is_empty() {
183 for fmt in formats {
184 if let Ok(value) = Self::from_str(normalized, fmt, true, true, false) {
185 return Ok(value);
186 }
187 }
188 // None of the provided formats worked and mode is Explicit
189 if opts.mode == Mode::Explicit {
190 return Err(an_err!(DtErrKind::InvalidInput, "{}", s));
191 }
192 }
193 (opts.mode, opts.order)
194 } else {
195 (opts.mode, opts.order)
196 };
197
198 // if s == "2006-04-02 02:30-05 America/Indiana/Vevay" {
199 // std::eprintln!("{:?}", classification);
200 // }
201 // std::eprintln!("{:?}", classification);
202
203 if classification.is_pure_numeric {
204 match mode {
205 Mode::UnixTimestamp => {
206 if let Some(dt) = parse_pure_numeric_unix_timestamp(
207 normalized,
208 classification.num_non_decimal_digits as usize,
209 ) {
210 return Ok(dt);
211 }
212 }
213 _ => {
214 if let Some(dt) = try_pure_numeric(
215 normalized,
216 classification.num_digits,
217 classification.num_non_decimal_digits,
218 classification.is_decimal,
219 mode,
220 ) {
221 // std::eprintln!("NUMERIC INPUT SUCCESS: {:?}", s);
222 return Ok(dt);
223 }
224 }
225 }
226 }
227 if !classification.has_year
228 && let Some(dt) = parse_syslog_no_year(normalized, lang, ref_time)
229 {
230 return Ok(dt);
231 }
232
233 if is_week_date_missing_weekday(&classification) {
234 // std::eprintln!("IS WEEK DATE MISSING WEEKDAY: {:?}", s);
235 if let Some(dt) = parse_week_date_no_weekday(&classification, lang, ref_time) {
236 return Ok(dt);
237 }
238 }
239 if let Some(dt) = try_unambiguous(normalized, &classification) {
240 return Ok(dt);
241 }
242 // std::eprintln!("done trying unambiguous");
243 if let Some(dt) = match date_order {
244 Order::Smart => {
245 let order = smart_detect_date_order(normalized, &classification);
246 let mut result: Option<Dt>;
247
248 match order {
249 OrderFirst::Day => {
250 result = try_compatible_formats(
251 normalized,
252 generate_ambiguous_day_first_candidates(&classification),
253 );
254 // std::eprintln!("done trying day first: {:?}", result);
255
256 if result.is_none() {
257 result = try_compatible_formats(
258 normalized,
259 generate_ambiguous_month_first_candidates(&classification),
260 );
261 // std::eprintln!("done trying month first: {:?}", result);
262 }
263
264 if result.is_none() {
265 result = try_compatible_formats(
266 normalized,
267 generate_ambiguous_year_first_candidates(&classification),
268 );
269 // std::eprintln!("done trying year first: {:?}", result);
270 }
271 }
272 OrderFirst::Month => {
273 result = try_compatible_formats(
274 normalized,
275 generate_ambiguous_month_first_candidates(&classification),
276 );
277 // std::eprintln!("done trying month first: {:?}", result);
278
279 if result.is_none() {
280 result = try_compatible_formats(
281 normalized,
282 generate_ambiguous_day_first_candidates(&classification),
283 );
284 // std::eprintln!("done trying day first: {:?}", result);
285 }
286
287 if result.is_none() {
288 result = try_compatible_formats(
289 normalized,
290 generate_ambiguous_year_first_candidates(&classification),
291 );
292 // std::eprintln!("done trying year first: {:?}", result);
293 }
294 }
295 OrderFirst::Year => {
296 result = try_compatible_formats(
297 normalized,
298 generate_ambiguous_year_first_candidates(&classification),
299 );
300 // std::eprintln!("done trying year first: {:?}", result);
301
302 if result.is_none() {
303 result = try_compatible_formats(
304 normalized,
305 generate_ambiguous_day_first_candidates(&classification),
306 );
307 // std::eprintln!("done trying day first: {:?}", result);
308 }
309
310 if result.is_none() {
311 result = try_compatible_formats(
312 normalized,
313 generate_ambiguous_month_first_candidates(&classification),
314 );
315 // std::eprintln!("done trying month first: {:?}", result);
316 }
317 }
318 }
319
320 result
321 }
322 Order::Year => try_compatible_formats(
323 normalized,
324 generate_ambiguous_year_first_candidates(&classification),
325 ),
326 Order::Day => try_compatible_formats(
327 normalized,
328 generate_ambiguous_day_first_candidates(&classification),
329 ),
330 Order::Month => try_compatible_formats(
331 normalized,
332 generate_ambiguous_month_first_candidates(&classification),
333 ),
334 } {
335 return Ok(dt);
336 }
337 // std::eprintln!("NOW trying numeric timestamp");
338 if classification.is_pure_numeric
339 && mode != Mode::UnixTimestamp
340 && let Some(dt) = parse_pure_numeric_unix_timestamp(
341 normalized,
342 classification.num_non_decimal_digits as usize,
343 )
344 {
345 return Ok(dt);
346 }
347 Err(an_err!(DtErrKind::InvalidInput, "{}", s))
348 }
349
350 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
351 /// but returns attoseconds since the library epoch: 2000-01-01 12:00:00 UTC
352 /// (on the UTC scale).
353 ///
354 /// Returns `Some(attos)` on success (negative for pre-2000 dates) or `None`
355 /// on any parse error.
356 #[inline]
357 pub fn str_to_attos(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
358 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_attos())
359 }
360
361 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
362 /// but returns milliseconds since the library epoch: 2000-01-01 12:00:00 UTC
363 /// (on the UTC scale).
364 ///
365 /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
366 /// on any parse error.
367 #[inline]
368 pub fn str_to_ms(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
369 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ms())
370 }
371
372 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
373 /// but returns nanoseconds since the library epoch: 2000-01-01 12:00:00 UTC
374 /// (on the UTC scale).
375 ///
376 /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
377 /// on any parse error.
378 #[inline]
379 pub fn str_to_ns(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
380 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ns())
381 }
382
383 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
384 /// but returns milliseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
385 ///
386 /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
387 /// on any parse error.
388 #[inline]
389 pub fn str_to_unix_ms(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
390 Dt::from_str_parse(s, opts)
391 .ok()
392 .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ms())
393 }
394
395 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
396 /// but returns nanoseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
397 ///
398 /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
399 /// on any parse error.
400 #[inline]
401 pub fn str_to_unix_ns(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
402 Dt::from_str_parse(s, opts)
403 .ok()
404 .map(|tp| tp.to_scale_and_diff(Dt::UNIX_EPOCH, false).to_ns())
405 }
406}
407
408/// Core zero-allocation helper (updated to match the new `&str` signature).
409///
410/// The `fmt` we get from the iterator is still `'static`, but it coerces automatically
411/// to `&str`, so everything continues to work.
412#[inline]
413pub(crate) fn try_compatible_formats<I>(s: &str, formats: I) -> Option<Dt>
414where
415 I: IntoIterator<Item = String>,
416{
417 // let mut dt = None;
418
419 // for fmt in formats.into_iter() {
420 // eprintln!("TRYING FMT: {}", fmt);
421 // if let Ok(parsed) = Dt::from_str(s, &fmt, true, true, false) {
422 // dt = Some(parsed);
423 // break;
424 // }
425 // // === DEBUG ===
426 // // eprintln!("Tried format: {:?}", fmt);
427 // }
428
429 // dt
430 formats
431 .into_iter()
432 .find_map(|fmt| Dt::from_str(s, &fmt, true, true, false).ok())
433}
434
435#[inline]
436pub(crate) fn try_unambiguous(s: &str, classification: &DateClassification) -> Option<Dt> {
437 if matches!(classification.bytes_len, 6..=8)
438 && let Some(dt) = parse_yyyy_mm(s.as_bytes())
439 {
440 return Some(dt);
441 }
442 try_compatible_formats(s, generate_unambiguous_candidates(classification))
443}