Skip to main content

deep_time/alloc_parse/
types.rs

1use crate::Dt;
2use alloc::string::String;
3use alloc::vec::Vec;
4
5#[derive(Clone, Copy, Debug, PartialEq, Default)]
6#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
7#[cfg_attr(feature = "tsify", derive(tsify::Tsify))]
8pub enum Order {
9    /// **Default & recommended** — Research-backed modern heuristic that
10    /// delivers the highest real-world success rate while remaining predictable.
11    ///
12    /// It uses the following prioritized rules (applied in this exact order):
13    ///
14    /// 1. **Pure-numeric compact formats** (≥ 6 digits with no separators,
15    ///    e.g. `240314153045`, `20240315`, `YYMMDDHHMMSS`):
16    ///    treated as **Year-first** (`%Y%m%d` / `%y%m%d`).
17    ///    These are overwhelmingly used in logs, filenames, databases, APIs,
18    ///    configs, and JSON for sortability.
19    ///
20    /// 2. **Delimited formats that start with a plausible 4-digit year**
21    ///    (1900–2100): treated as **Year-first**.
22    ///
23    /// 3. **Numeric plausibility check** (strongest universal signal):
24    ///    - First number is 13–31 → **Day-first** (international/European style).
25    ///    - First number is 1–12 **and** second number is 13–31 → **Month-first**
26    ///      (US style).
27    ///
28    /// 4. **Strong ISO 8601 / timestamp markers** (`T` connector, `Z`, numeric
29    ///    offsets, or IANA timezone names) → **Year-first**.
30    ///
31    /// 5. **Fallback**:
32    ///    - With the `locale` feature enabled: respects the system locale
33    ///      preference (Day-first in most of the world).
34    ///    - Without the `locale` feature: **Day-first** (global majority).
35    ///
36    /// The `/` separator is deliberately ignored in the plausibility step
37    /// because it is culturally ambiguous.
38    ///
39    /// Once the preferred ordering is determined, the parser tries the
40    /// corresponding ambiguous candidate formats (Year-first → Day-first →
41    /// Month-first, or the reverse, depending on the detected order) and falls
42    /// back gracefully.
43    #[default]
44    Smart,
45    /// Force **Year-first** only (YYYY/MM/DD or YY/MM/DD)
46    Year,
47    /// Force **Day-first** only (DD/MM/YYYY)
48    Day,
49    /// Force **Month-first** only (MM/DD/YYYY)
50    Month,
51}
52
53#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
54#[cfg_attr(feature = "tsify", derive(tsify::Tsify))]
55#[derive(Clone, Copy, Debug, Default, PartialEq)]
56/// Only relevant for purely numeric dates.
57pub enum Mode {
58    /// **Default mode** — Smart heuristic:
59    /// - 5/7-digit pure-numeric inside `LEGACY_ORDINAL_YEAR_RANGE` → treated as business ordinal (YYYYDDD / YYDDD)
60    /// - Outside that range or invalid ordinal → treated as MJD or JD
61    #[default]
62    Auto,
63    /// When combined with a provided Vec of formats in parse no other formats are tried.
64    Explicit,
65    /// It's some sort of unix timestamp
66    UnixTimestamp,
67    /// Business/legacy-only mode:
68    /// Only accepts ordinal dates (YYYYDDD / YYDDD). No astronomy (JD/MJD) support.
69    /// Strict and predictable for ERP/mainframe data.
70    Legacy,
71    /// Scientific / astronomy-first mode:
72    /// Prioritizes MJD (5-digit) and JD (7-digit). Ordinals are only fallback.
73    /// Use this when parsing data from astronomy tools or large numeric epochs.
74    Scientific,
75}
76
77#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
78#[cfg_attr(feature = "tsify", derive(tsify::Tsify))]
79#[derive(Clone, Debug, PartialEq)]
80pub struct ParseCfg {
81    /// Explicit list of formats to try **in the exact order given**.
82    ///
83    /// If this is provided and the vec is non-empty and the mode is Explicit
84    /// then only these formats are tried and `mode` and `order` are ignored.
85    ///
86    /// If the mode is not Explicit then after trying the formats in parse the
87    /// rest of the parser will continue as normal, using `mode` and `order`.
88    ///
89    /// Example:
90    /// ```js
91    /// parse: ["%Y-%m-%d", "%d/%m/%Y", "%m/%d/%Y", "%d.%m.%Y"]
92    /// ```
93    #[cfg_attr(feature = "serde", serde(default))]
94    pub parse: Option<Vec<String>>,
95
96    /// Controls which preset format sets are used (astronomy/scientific formats,
97    /// legacy business rules, etc.).
98    #[cfg_attr(feature = "serde", serde(default))]
99    pub mode: Mode,
100
101    /// Controls ambiguous numeric dates.
102    #[cfg_attr(feature = "serde", serde(default))]
103    pub order: Order,
104
105    /// Sets language to use, not persistent.
106    #[cfg_attr(feature = "serde", serde(default))]
107    pub lang: Lang,
108
109    /// Whether to lowercase the input:
110    /// ONLY set to `false` if the &str is already lowercase.
111    #[cfg_attr(feature = "serde", serde(default = "default_true"))]
112    pub to_lower: bool,
113
114    /// Whether to parse relative dates as well as normal dates.
115    #[cfg_attr(feature = "serde", serde(default = "default_true"))]
116    pub relative: bool,
117
118    /// **Reference ("current") time** used for relative expressions:
119    /// - "tomorrow", "next Friday", "in 3 days", "next week"
120    /// - If `Some`, this `Dt` is used as "now" (overrides everything).
121    /// - If `None` + `std` feature enabled: automatically uses real system time.
122    /// - If `None` + no `std`: parsing relative dates will fail with a clear error.
123    #[cfg_attr(feature = "serde", serde(default))]
124    pub ref_time: Option<Dt>,
125}
126
127#[cfg(feature = "serde")]
128fn default_true() -> bool {
129    true
130}
131
132impl Default for ParseCfg {
133    fn default() -> Self {
134        Self {
135            parse: None,
136            mode: Mode::default(),
137            order: Order::default(),
138            lang: Lang::default(),
139            to_lower: true,
140            relative: true,
141            ref_time: None,
142        }
143    }
144}
145
146#[derive(Debug, Clone, Copy, PartialEq, Eq)]
147pub(crate) enum OrderFirst {
148    /// Year-Month-Day ordering (ISO 8601 style, `YYYY-MM-DD`, `20240315`, etc.)
149    Year,
150    /// Month-Day-Year ordering (US / some English locales, `MM/DD/YYYY`)
151    Month,
152    /// Day-Month-Year ordering (most of the world, `DD/MM/YYYY`, `DD.MM.YYYY`)
153    Day,
154}
155
156#[derive(Clone)]
157pub(crate) struct AmBuilder {
158    pub pieces: Vec<&'static str>,
159    pub seen_year: bool,
160    pub seen_month: bool,
161    pub seen_day: bool,
162}
163
164#[inline]
165pub(crate) fn append_to_all(builders: &mut Vec<AmBuilder>, s: &'static str) {
166    for b in builders {
167        b.pieces.push(s);
168    }
169}
170
171/// Language codes following ISO 639-1 standard (two-letter codes)
172/// Default is En (English)
173#[allow(dead_code)]
174#[non_exhaustive]
175#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
176#[cfg_attr(feature = "tsify", derive(tsify::Tsify))]
177#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
178pub enum Lang {
179    /// English (default)
180    #[default]
181    En,
182    /// Afar
183    Aa,
184    /// Abkhazian
185    Ab,
186    /// Avestan
187    Ae,
188    /// Afrikaans
189    Af,
190    /// Akan
191    Ak,
192    /// Amharic
193    Am,
194    /// Aragonese
195    An,
196    /// Arabic
197    Ar,
198    /// Assamese
199    As,
200    /// Avaric
201    Av,
202    /// Aymara
203    Ay,
204    /// Azerbaijani
205    Az,
206    /// Bashkir
207    Ba,
208    /// Belarusian
209    Be,
210    /// Bulgarian
211    Bg,
212    /// Bihari languages
213    Bh,
214    /// Bislama
215    Bi,
216    /// Bambara
217    Bm,
218    /// Bengali
219    Bn,
220    /// Tibetan
221    Bo,
222    /// Breton
223    Br,
224    /// Bosnian
225    Bs,
226    /// Catalan
227    Ca,
228    /// Chechen
229    Ce,
230    /// Chamorro
231    Ch,
232    /// Corsican
233    Co,
234    /// Cree
235    Cr,
236    /// Czech
237    Cs,
238    /// Church Slavic
239    Cu,
240    /// Chuvash
241    Cv,
242    /// Welsh
243    Cy,
244    /// Danish
245    Da,
246    /// German
247    De,
248    /// Divehi
249    Dv,
250    /// Dzongkha
251    Dz,
252    /// Ewe
253    Ee,
254    /// Greek
255    El,
256    /// Esperanto
257    Eo,
258    /// Spanish
259    Es,
260    /// Estonian
261    Et,
262    /// Basque
263    Eu,
264    /// Persian
265    Fa,
266    /// Fulah
267    Ff,
268    /// Finnish
269    Fi,
270    /// Fijian
271    Fj,
272    /// Faroese
273    Fo,
274    /// French
275    Fr,
276    /// Western Frisian
277    Fy,
278    /// Irish
279    Ga,
280    /// Scottish Gaelic
281    Gd,
282    /// Galician
283    Gl,
284    /// Guarani
285    Gn,
286    /// Gujarati
287    Gu,
288    /// Manx
289    Gv,
290    /// Hausa
291    Ha,
292    /// Hebrew
293    He,
294    /// Hindi
295    Hi,
296    /// Hiri Motu
297    Ho,
298    /// Croatian
299    Hr,
300    /// Haitian Creole
301    Ht,
302    /// Hungarian
303    Hu,
304    /// Armenian
305    Hy,
306    /// Herero
307    Hz,
308    /// Interlingua
309    Ia,
310    /// Indonesian
311    Id,
312    /// Interlingue
313    Ie,
314    /// Igbo
315    Ig,
316    /// Sichuan Yi
317    Ii,
318    /// Inupiaq
319    Ik,
320    /// Ido
321    Io,
322    /// Icelandic
323    Is,
324    /// Italian
325    It,
326    /// Inuktitut
327    Iu,
328    /// Japanese
329    Ja,
330    /// Javanese
331    Jv,
332    /// Georgian
333    Ka,
334    /// Kongo
335    Kg,
336    /// Kikuyu
337    Ki,
338    /// Kuanyama
339    Kj,
340    /// Kazakh
341    Kk,
342    /// Greenlandic
343    Kl,
344    /// Khmer
345    Km,
346    /// Kannada
347    Kn,
348    /// Korean
349    Ko,
350    /// Kanuri
351    Kr,
352    /// Kashmiri
353    Ks,
354    /// Kurdish
355    Ku,
356    /// Komi
357    Kv,
358    /// Cornish
359    Kw,
360    /// Kyrgyz
361    Ky,
362    /// Latin
363    La,
364    /// Luxembourgish
365    Lb,
366    /// Ganda
367    Lg,
368    /// Limburgish
369    Li,
370    /// Lingala
371    Ln,
372    /// Lao
373    Lo,
374    /// Lithuanian
375    Lt,
376    /// Luba-Katanga
377    Lu,
378    /// Latvian
379    Lv,
380    /// Malagasy
381    Mg,
382    /// Marshallese
383    Mh,
384    /// Maori
385    Mi,
386    /// Macedonian
387    Mk,
388    /// Malayalam
389    Ml,
390    /// Mongolian
391    Mn,
392    /// Marathi
393    Mr,
394    /// Malay
395    Ms,
396    /// Maltese
397    Mt,
398    /// Burmese
399    My,
400    /// Nauru
401    Na,
402    /// Norwegian Bokmål
403    Nb,
404    /// North Ndebele
405    Nd,
406    /// Nepali
407    Ne,
408    /// Ndonga
409    Ng,
410    /// Dutch
411    Nl,
412    /// Norwegian Nynorsk
413    Nn,
414    /// Norwegian
415    No,
416    /// South Ndebele
417    Nr,
418    /// Navajo
419    Nv,
420    /// Chichewa
421    Ny,
422    /// Occitan
423    Oc,
424    /// Ojibwa
425    Oj,
426    /// Oromo
427    Om,
428    /// Oriya
429    Or,
430    /// Ossetian
431    Os,
432    /// Punjabi
433    Pa,
434    /// Pali
435    Pi,
436    /// Polish
437    Pl,
438    /// Pashto
439    Ps,
440    /// Portuguese
441    Pt,
442    /// Quechua
443    Qu,
444    /// Romansh
445    Rm,
446    /// Rundi
447    Rn,
448    /// Romanian
449    Ro,
450    /// Russian
451    Ru,
452    /// Kinyarwanda
453    Rw,
454    /// Sanskrit
455    Sa,
456    /// Sardinian
457    Sc,
458    /// Sindhi
459    Sd,
460    /// Northern Sami
461    Se,
462    /// Sango
463    Sg,
464    /// Sinhala
465    Si,
466    /// Slovak
467    Sk,
468    /// Slovenian
469    Sl,
470    /// Samoan
471    Sm,
472    /// Shona
473    Sn,
474    /// Somali
475    So,
476    /// Albanian
477    Sq,
478    /// Serbian
479    Sr,
480    /// Swati
481    Ss,
482    /// Southern Sotho
483    St,
484    /// Sundanese
485    Su,
486    /// Swedish
487    Sv,
488    /// Swahili
489    Sw,
490    /// Tamil
491    Ta,
492    /// Telugu
493    Te,
494    /// Tajik
495    Tg,
496    /// Thai
497    Th,
498    /// Tigrinya
499    Ti,
500    /// Turkmen
501    Tk,
502    /// Tagalog
503    Tl,
504    /// Tswana
505    Tn,
506    /// Tonga
507    To,
508    /// Turkish
509    Tr,
510    /// Tsonga
511    Ts,
512    /// Tatar
513    Tt,
514    /// Twi
515    Tw,
516    /// Tahitian
517    Ty,
518    /// Uyghur
519    Ug,
520    /// Ukrainian
521    Uk,
522    /// Urdu
523    Ur,
524    /// Uzbek
525    Uz,
526    /// Venda
527    Ve,
528    /// Vietnamese
529    Vi,
530    /// Volapük
531    Vo,
532    /// Walloon
533    Wa,
534    /// Wolof
535    Wo,
536    /// Xhosa
537    Xh,
538    /// Yiddish
539    Yi,
540    /// Yoruba
541    Yo,
542    /// Zhuang
543    Za,
544    /// Chinese
545    Zh,
546    /// Zulu
547    Zu,
548}
549
550impl alloc::fmt::Display for Lang {
551    fn fmt(&self, f: &mut alloc::fmt::Formatter<'_>) -> alloc::fmt::Result {
552        write!(f, "{:?}", self)
553    }
554}