deep_time/alloc_parse/parse_date.rs
1use crate::{
2 ClassifiedDate, DateClassification, Dt, DtErr, DtErrKind, MAX_DATE_STRING_LEN, Mode, Order,
3 OrderFirst, ParseCfg, Scale, an_err, classify_date, default_date_parse_options,
4 generate_ambiguous_day_first_candidates, generate_ambiguous_month_first_candidates,
5 generate_ambiguous_year_first_candidates, generate_unambiguous_candidates,
6 is_week_date_missing_weekday, parse_pure_numeric_unix_timestamp, parse_syslog_no_year,
7 parse_week_date_no_weekday, parse_yyyy_mm, smart_detect_date_order, try_pure_numeric,
8};
9use alloc::borrow::Cow;
10use alloc::string::String;
11
12impl Dt {
13 /// Automatically parses datetime [`str`] into a [`Dt`] by guessing and generating the format. Supports the vast
14 /// majority of date formats.
15 ///
16 /// Requires the `"alloc"` feature.
17 ///
18 /// ## Parameters
19 ///
20 /// - `s`: The string to parse. Must be non-empty and no longer than 255 bytes. Empty strings or overly
21 /// long inputs return an error.
22 /// - `opts`: Optional [`ParseCfg`]. Pass `None` to use the defaults.
23 ///
24 /// ## Configuration Options (via [`ParseCfg`])
25 ///
26 /// | Field | Default | Effect |
27 /// |----------------|-------------|--------|
28 /// | `lang` | `En` | Language, scroll down to see currently supported languages |
29 /// | `order` | `Smart` | How to resolve ambiguous numeric dates like `01/02/03` |
30 /// | `mode` | `Auto` | Special handling for purely numeric inputs |
31 /// | `parse` | `None` | If provided, these exact `strftime`-style formats are tried **first** (and exclusively if `mode` is `Explicit`) |
32 /// | `relative` | `true` | Enable phrases like "tomorrow", "next Friday", "in 3 days" |
33 /// | `ref_time` | `None` | Reference time for relative dates and syslog-style "no-year" dates (uses system time if `std` feature is enabled) |
34 /// | `to_lower` | `true` | Automatically lowercase the input, set to `false` only if it's already lowercase |
35 ///
36 /// ## Purely Numeric Inputs
37 ///
38 /// When the input consists **only** of digits (and optionally a decimal point),
39 /// the parser uses a fast, mode-aware path before trying any other strategies.
40 /// The exact interpretation depends on the number of digits and the selected `mode`.
41 ///
42 /// | Digits | Example(s) | `Mode` | Interpreted as | Notes |
43 /// |--------|--------------------------|-----------------|-----------------------------------------|-------|
44 /// | 1–4 | `2024`, `24`, `5` | `Auto`/`Legacy` | Year (2-digit uses 2000/1900 pivot) | 1- and 3-digit years only work in `Scientific` |
45 /// | 5 | `24123`, `60400` | `Legacy` | Ordinal date (YYDDD) | — |
46 /// | 5 | `60400`, `60400.75` | `Scientific` | Modified Julian Date (MJD) | Fractional days supported |
47 /// | 5 | `24123`, `60400.75` | `Auto` | Ordinal (non-decimal) or MJD (decimal) | Smart default |
48 /// | 6 | `240315`, `202403` | `Auto` | YYYYMM if plausible year, else YYMMDD | Most common compact form |
49 /// | 6 | `240315` | `Legacy` | YYMMDD preferred | — |
50 /// | 6 | `202403` | `Scientific` | YYYYMM preferred | — |
51 /// | 7 | `2024123` | `Legacy` | Ordinal date (YYYYDDD) | — |
52 /// | 7 | `2460123`, `2460123.5` | `Scientific` | Julian Day (JD) | Fractional days supported |
53 /// | 7 | `2024123` | `Auto` | Ordinal (integer) or JD (decimal) | Smart default |
54 /// | 10–11 | `1735689600` | any | Unix seconds | — |
55 /// | 12–15 | `1735689600123` | any | Unix milliseconds | Most common high-precision case |
56 /// | 16–18 | `1735689600123456` | any | Unix microseconds | — |
57 /// | 19+ | `1735689600123456789` | any | Unix nanoseconds | Full precision |
58 ///
59 /// **Tip**: Use `Mode::UnixTimestamp` when you know the input is always a Unix timestamp.
60 ///
61 /// ## Ambiguous Numeric Dates
62 ///
63 /// Dates where the components could map to different orders (e.g. `01/02/03`,
64 /// `3-4-5`, `15.03.24`, `2024.03.15`) are resolved via the `order` field:
65 ///
66 /// - **`Order::Smart`** (default) — Applies the fast heuristic described in [`Order::Smart`].
67 /// It strongly prefers modern/tech conventions (Year-first for compact/ISO-like data)
68 /// while handling the majority of international and US-style dates.
69 ///
70 /// - **`Order::Year`**, **`Order::Day`**, or **`Order::Month`** force a
71 /// specific interpretation and bypass the heuristic entirely.
72 ///
73 /// This combination of `Smart` + `Auto` mode gives the best real-world parsing
74 /// success rate for mixed data sources.
75 ///
76 /// ## Other Supported Formats
77 ///
78 /// - **ISO 8601** and variants: `2024-03-15`, `2024-03-15T14:30:00Z`, `2024-03-15T14:30:00+01:00[Europe/Paris]`
79 /// - **Named dates** (in supported languages): `15 March 2024`, `15 mars 2024`, `15. März 2024`, `15 de marzo de 2024`
80 /// - **Week dates**: `2024-W15`, `2024-W15-3`, `2024W153` (missing weekday defaults to Monday)
81 /// - **Syslog-style** (no year): `Mar 5 10:23:45` (year inferred from `ref_time`)
82 /// - **Relative expressions**: `tomorrow`, `next Friday at 09:00`, `in 3 days`, `2 weeks ago`
83 /// - **12-hour time**: `2:30 PM`, `14:30:45.123`
84 /// - **Offsets and timezones**: `+0100`, `-05:30`, `Z`, IANA names in brackets
85 ///
86 /// ## Examples
87 ///
88 /// ```rust
89 /// use deep_time::{Dt, ParseCfg, Order, Mode, Lang};
90 ///
91 /// // Default smart parsing
92 /// let dt = Dt::from_str_parse("2024-03-15 14:30:00", &None).unwrap();
93 ///
94 /// // German named date
95 /// let cfg = ParseCfg { lang: Lang::De, ..Default::default() };
96 /// let dt = Dt::from_str_parse("15. März 2024 um 14:30", &Some(cfg)).unwrap();
97 ///
98 /// // Force month-first
99 /// let cfg = ParseCfg { order: Order::Month, ..Default::default() };
100 /// let dt = Dt::from_str_parse("03/15/2024", &Some(cfg)).unwrap();
101 ///
102 /// // Pure numeric compact form
103 /// let dt = Dt::from_str_parse("20240315", &None).unwrap(); // March 15, 2024
104 ///
105 /// // Unix timestamp (milliseconds)
106 /// let cfg = ParseCfg { mode: Mode::UnixTimestamp, ..Default::default() };
107 /// let dt = Dt::from_str_parse("1735689600123", &Some(cfg)).unwrap();
108 ///
109 /// // Explicit formats only (no fallback)
110 /// let cfg = ParseCfg {
111 /// parse: Some(vec!["%d/%m/%Y".into(), "%Y-%m-%d".into()]),
112 /// mode: Mode::Explicit,
113 /// ..Default::default()
114 /// };
115 /// let dt = Dt::from_str_parse("15/03/2024", &Some(cfg)).unwrap();
116 ///
117 /// // Relative date
118 /// let dt = Dt::from_str_parse("2 days from now", &None).unwrap();
119 /// ```
120 ///
121 /// ## Notes
122 ///
123 /// - The `Smart` + `Auto` combination gives the best real-world success rate for mixed data.
124 /// - All successfully parsed [`Dt`] values are stored with attosecond precision on the internal TAI timescale.
125 /// - For maximum reproducibility in production code, prefer `ParseCfg` with `parse: Some(...)` and `mode: Explicit`.
126 /// - Timezone handling (IANA names and fixed offsets) is fully supported.
127 ///
128 /// See also: [`ParseCfg`], [`Order`], [`Mode`], [`Lang`], [`Dt`],
129 /// [`Dt::str_to_attos`], [`Dt::str_to_ms`], [`Dt::str_to_unix_ms`].
130 ///
131 /// ## Supported Languages:
132 ///
133 /// - En
134 /// - De
135 /// - Es
136 /// - Fr
137 ///
138 pub fn from_str_parse(s: &str, opts: &Option<ParseCfg>) -> Result<Dt, DtErr> {
139 let opts: &ParseCfg = opts
140 .as_ref()
141 .unwrap_or_else(|| default_date_parse_options());
142
143 if s.is_empty() {
144 return Err(an_err!(DtErrKind::Incomplete, "empty"));
145 } else if s.len() > MAX_DATE_STRING_LEN {
146 return Err(an_err!(DtErrKind::InvalidInput, "too long: {}", s));
147 }
148
149 let lang = opts.lang;
150 let ref_time = &opts.ref_time;
151
152 let lowered: Cow<str> = if opts.to_lower {
153 Cow::Owned(s.to_lowercase())
154 } else {
155 Cow::Borrowed(s)
156 };
157
158 let classification = match classify_date(&lowered, lang, ref_time) {
159 Ok(ClassifiedDate::Parsed(time_point)) => return Ok(time_point),
160 Ok(ClassifiedDate::Cls(c)) => c,
161 Err(e) => {
162 // std::eprintln!("{}", e);
163 return Err(an_err!(
164 DtErrKind::InvalidInput,
165 "{}",
166 s => e
167 ));
168 }
169 };
170
171 // let xx = &classification.date;
172 // if xx != trimmed {
173 // eprintln!("NOT EQUAL: {:?}, {:?}", trimmed, xx);
174 // }
175 // eprintln!("BEFORE & AFTER: {:?}, {:?}", lowered, &classification.date);
176
177 let normalized = &classification.date;
178
179 let (mode, date_order) = if let Some(formats) = &opts.parse {
180 if !formats.is_empty() {
181 for fmt in formats {
182 if let Ok(value) = Self::from_str(normalized, fmt, true, true, false) {
183 return Ok(value);
184 }
185 }
186 // None of the provided formats worked and mode is Explicit
187 if opts.mode == Mode::Explicit {
188 return Err(an_err!(DtErrKind::InvalidInput, "{}", s));
189 }
190 }
191 (opts.mode, opts.order)
192 } else {
193 (opts.mode, opts.order)
194 };
195
196 // if s == "15. Januar 2024 um 14:30 Uhr" {
197 // std::eprintln!("{:?}", classification);
198 // }
199
200 if classification.is_pure_numeric {
201 match mode {
202 Mode::UnixTimestamp => {
203 if let Some(dt) = parse_pure_numeric_unix_timestamp(
204 normalized,
205 classification.num_non_decimal_digits as usize,
206 ) {
207 return Ok(dt);
208 }
209 }
210 _ => {
211 if let Some(dt) = try_pure_numeric(
212 normalized,
213 classification.num_digits,
214 classification.num_non_decimal_digits,
215 classification.is_decimal,
216 mode,
217 ) {
218 // std::eprintln!("NUMERIC INPUT SUCCESS: {:?}", s);
219 return Ok(dt);
220 }
221 }
222 }
223 }
224 if !classification.has_year
225 && let Some(dt) = parse_syslog_no_year(normalized, lang, ref_time)
226 {
227 return Ok(dt);
228 }
229
230 if is_week_date_missing_weekday(&classification) {
231 // std::eprintln!("IS WEEK DATE MISSING WEEKDAY: {:?}", s);
232 if let Some(dt) = parse_week_date_no_weekday(&classification.date, lang, ref_time) {
233 return Ok(dt);
234 }
235 }
236 if let Some(dt) = try_unambiguous(normalized, &classification) {
237 return Ok(dt);
238 }
239 // std::eprintln!("done trying unambiguous");
240 if let Some(dt) = match date_order {
241 Order::Smart => {
242 let order = smart_detect_date_order(normalized, &classification);
243 let mut result: Option<Dt>;
244
245 match order {
246 OrderFirst::Day => {
247 result = try_compatible_formats(
248 normalized,
249 generate_ambiguous_day_first_candidates(&classification),
250 );
251 // std::eprintln!("done trying day first: {:?}", result);
252
253 if result.is_none() {
254 result = try_compatible_formats(
255 normalized,
256 generate_ambiguous_month_first_candidates(&classification),
257 );
258 // std::eprintln!("done trying month first: {:?}", result);
259 }
260
261 if result.is_none() {
262 result = try_compatible_formats(
263 normalized,
264 generate_ambiguous_year_first_candidates(&classification),
265 );
266 // std::eprintln!("done trying year first: {:?}", result);
267 }
268 }
269 OrderFirst::Month => {
270 result = try_compatible_formats(
271 normalized,
272 generate_ambiguous_month_first_candidates(&classification),
273 );
274 // std::eprintln!("done trying month first: {:?}", result);
275
276 if result.is_none() {
277 result = try_compatible_formats(
278 normalized,
279 generate_ambiguous_day_first_candidates(&classification),
280 );
281 // std::eprintln!("done trying day first: {:?}", result);
282 }
283
284 if result.is_none() {
285 result = try_compatible_formats(
286 normalized,
287 generate_ambiguous_year_first_candidates(&classification),
288 );
289 // std::eprintln!("done trying year first: {:?}", result);
290 }
291 }
292 OrderFirst::Year => {
293 result = try_compatible_formats(
294 normalized,
295 generate_ambiguous_year_first_candidates(&classification),
296 );
297 // std::eprintln!("done trying year first: {:?}", result);
298
299 if result.is_none() {
300 result = try_compatible_formats(
301 normalized,
302 generate_ambiguous_day_first_candidates(&classification),
303 );
304 // std::eprintln!("done trying day first: {:?}", result);
305 }
306
307 if result.is_none() {
308 result = try_compatible_formats(
309 normalized,
310 generate_ambiguous_month_first_candidates(&classification),
311 );
312 // std::eprintln!("done trying month first: {:?}", result);
313 }
314 }
315 }
316
317 result
318 }
319 Order::Year => try_compatible_formats(
320 normalized,
321 generate_ambiguous_year_first_candidates(&classification),
322 ),
323 Order::Day => try_compatible_formats(
324 normalized,
325 generate_ambiguous_day_first_candidates(&classification),
326 ),
327 Order::Month => try_compatible_formats(
328 normalized,
329 generate_ambiguous_month_first_candidates(&classification),
330 ),
331 } {
332 return Ok(dt);
333 }
334 // std::eprintln!("NOW trying numeric timestamp");
335 if classification.is_pure_numeric
336 && mode != Mode::UnixTimestamp
337 && let Some(dt) = parse_pure_numeric_unix_timestamp(
338 normalized,
339 classification.num_non_decimal_digits as usize,
340 )
341 {
342 return Ok(dt);
343 }
344 Err(an_err!(DtErrKind::InvalidInput, "{}", s))
345 }
346
347 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
348 /// but returns attoseconds since the library epoch: 2000-01-01 12:00:00 UTC
349 /// (on the UTC scale).
350 ///
351 /// Returns `Some(attos)` on success (negative for pre-2000 dates) or `None`
352 /// on any parse error.
353 #[inline]
354 pub fn str_to_attos(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
355 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_attos())
356 }
357
358 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
359 /// but returns milliseconds since the library epoch: 2000-01-01 12:00:00 UTC
360 /// (on the UTC scale).
361 ///
362 /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
363 /// on any parse error.
364 #[inline]
365 pub fn str_to_ms(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
366 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ms())
367 }
368
369 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
370 /// but returns nanoseconds since the library epoch: 2000-01-01 12:00:00 UTC
371 /// (on the UTC scale).
372 ///
373 /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
374 /// on any parse error.
375 #[inline]
376 pub fn str_to_ns(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
377 Dt::from_str_parse(s, opts).ok().map(|tp| tp.to_ns())
378 }
379
380 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
381 /// but returns milliseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
382 ///
383 /// Returns `Some(millis)` on success (negative for pre-2000 dates) or `None`
384 /// on any parse error.
385 #[inline]
386 pub fn str_to_unix_ms(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
387 Dt::from_str_parse(s, opts).ok().map(|tp| {
388 tp.to_scale_and_then_diff(Scale::UTC, Dt::UNIX_EPOCH)
389 .to_ms()
390 })
391 }
392
393 /// Same parsing logic as [`Dt::from_str_parse`](../struct.Dt.html#method.from_str_parse),
394 /// but returns nanoseconds since the UNIX epoch: (1970-01-01 00:00:00 UTC).
395 ///
396 /// Returns `Some(nanos)` on success (negative for pre-2000 dates) or `None`
397 /// on any parse error.
398 #[inline]
399 pub fn str_to_unix_ns(s: &str, opts: &Option<ParseCfg>) -> Option<i128> {
400 Dt::from_str_parse(s, opts).ok().map(|tp| {
401 tp.to_scale_and_then_diff(Scale::UTC, Dt::UNIX_EPOCH)
402 .to_ns()
403 })
404 }
405}
406
407/// Core zero-allocation helper (updated to match the new `&str` signature).
408///
409/// The `fmt` we get from the iterator is still `'static`, but it coerces automatically
410/// to `&str`, so everything continues to work.
411#[inline]
412pub(crate) fn try_compatible_formats<I>(s: &str, formats: I) -> Option<Dt>
413where
414 I: IntoIterator<Item = String>,
415{
416 formats
417 .into_iter()
418 .find_map(|fmt| Dt::from_str(s, &fmt, true, true, false).ok())
419}
420
421#[inline]
422pub(crate) fn try_unambiguous(s: &str, classification: &DateClassification) -> Option<Dt> {
423 if matches!(classification.bytes_len, 6..=8)
424 && let Some(dt) = parse_yyyy_mm(s.as_bytes())
425 {
426 return Some(dt);
427 }
428 try_compatible_formats(s, generate_unambiguous_candidates(classification))
429}