nu_command/strings/
detect_type.rs

1use chrono::{Local, TimeZone, Utc};
2use fancy_regex::{Regex, RegexBuilder};
3use nu_engine::command_prelude::*;
4use std::sync::LazyLock;
5
6#[derive(Clone)]
7pub struct DetectType;
8
9impl Command for DetectType {
10    fn name(&self) -> &str {
11        "detect type"
12    }
13
14    fn signature(&self) -> Signature {
15        Signature::build(self.name())
16            .input_output_types(vec![(Type::String, Type::Any), (Type::Any, Type::Any)])
17            .switch(
18                "prefer-filesize",
19                "For ints display them as human-readable file sizes",
20                Some('f'),
21            )
22            .switch(
23                "prefer-dmy",
24                "Prefer day-month-year format for ambiguous dates",
25                None,
26            )
27            .category(Category::Strings)
28            .allow_variants_without_examples(true)
29    }
30
31    fn description(&self) -> &str {
32        "Infer Nushell datatype from a string."
33    }
34
35    fn search_terms(&self) -> Vec<&str> {
36        vec!["convert", "conversion"]
37    }
38
39    fn examples(&self) -> Vec<Example<'_>> {
40        vec![
41            Example {
42                description: "Bool from string",
43                example: "'true' | detect type",
44                result: Some(Value::test_bool(true)),
45            },
46            Example {
47                description: "Bool is case insensitive",
48                example: "'FALSE' | detect type",
49                result: Some(Value::test_bool(false)),
50            },
51            Example {
52                description: "Int from plain digits",
53                example: "'42' | detect type",
54                result: Some(Value::test_int(42)),
55            },
56            Example {
57                description: "Int with underscores",
58                example: "'1_000_000' | detect type",
59                result: Some(Value::test_int(1_000_000)),
60            },
61            Example {
62                description: "Int with commas",
63                example: "'1,234,567' | detect type",
64                result: Some(Value::test_int(1_234_567)),
65            },
66            #[allow(clippy::approx_constant, reason = "approx PI in examples is fine")]
67            Example {
68                description: "Float from decimal",
69                example: "'3.14' | detect type",
70                result: Some(Value::test_float(3.14)),
71            },
72            Example {
73                description: "Float in scientific notation",
74                example: "'6.02e23' | detect type",
75                result: Some(Value::test_float(6.02e23)),
76            },
77            Example {
78                description: "Prefer filesize for ints",
79                example: "'1024' | detect type -f",
80                result: Some(Value::test_filesize(1024)),
81            },
82            Example {
83                description: "Date Y-M-D",
84                example: "'2022-01-01' | detect type",
85                result: Some(Value::test_date(
86                    Local.with_ymd_and_hms(2022, 1, 1, 0, 0, 0).unwrap().into(),
87                )),
88            },
89            Example {
90                description: "Date with time and offset",
91                example: "'2022-01-01T00:00:00Z' | detect type",
92                result: Some(Value::test_date(
93                    Utc.with_ymd_and_hms(2022, 1, 1, 0, 0, 0).unwrap().into(),
94                )),
95            },
96            Example {
97                description: "Date D-M-Y",
98                example: "'31-12-2021' | detect type",
99                result: Some(Value::test_date(
100                    Local
101                        .with_ymd_and_hms(2021, 12, 31, 0, 0, 0)
102                        .unwrap()
103                        .into(),
104                )),
105            },
106            Example {
107                description: "Date M-D-Y (default for ambiguous)",
108                example: "'01/02/2025' | detect type",
109                result: Some(Value::test_date(
110                    Local.with_ymd_and_hms(2025, 1, 2, 0, 0, 0).unwrap().into(),
111                )),
112            },
113            Example {
114                description: "Prefer DMY for ambiguous dates",
115                example: "'01/02/2025' | detect type --prefer-dmy",
116                result: Some(Value::test_date(
117                    Local.with_ymd_and_hms(2025, 2, 1, 0, 0, 0).unwrap().into(),
118                )),
119            },
120            Example {
121                description: "Unknown stays a string",
122                example: "'not-a-number' | detect type",
123                result: Some(Value::test_string("not-a-number")),
124            },
125        ]
126    }
127
128    fn run(
129        &self,
130        engine_state: &EngineState,
131        stack: &mut Stack,
132        call: &Call,
133        input: PipelineData,
134    ) -> Result<PipelineData, ShellError> {
135        let metadata = input
136            .metadata()
137            .map(|metadata| metadata.with_content_type(None));
138        let span = call.head;
139        let display_as_filesize = call.has_flag(engine_state, stack, "prefer-filesize")?;
140        let prefer_dmy = call.has_flag(engine_state, stack, "prefer-dmy")?;
141        let val = input.into_value(call.head)?;
142        let val = process(val, display_as_filesize, prefer_dmy, span)?;
143        Ok(val.into_pipeline_data_with_metadata(metadata))
144    }
145}
146
147// This function serves to modify the input string by swapping the day and month components
148// (e.g., turning `"01/02/2025"` into `"02/01/2025"`) so that `dtparse::parse`, which defaults to interpreting
149// ambiguous dates as month-day-year (MDY), correctly parses it as day-month-year (DMY) when `dayfirst` is true.
150// This is necessary because `dtparse::parse` doesn't expose a direct `dayfirst` option in its API, and the
151// swap ensures consistent parsing based on the regex match. It could be written more idiomatically and with
152// the regexes but this is a simpler approach and doesn't have to mess with parsing the time component.
153fn swap_day_month(input: &str) -> String {
154    let re_slash =
155        fancy_regex::Regex::new(r"(\d{1,2})/(\d{1,2})/(\d{4,})").expect("regex should be valid");
156    let swapped_slash = re_slash.replace_all(input, "$2/$1/$3");
157    let re_dash =
158        fancy_regex::Regex::new(r"(\d{1,2})-(\d{1,2})-(\d{4,})").expect("regex should be valid");
159    re_dash.replace_all(&swapped_slash, "$2-$1-$3").to_string()
160}
161
162fn parse_date_from_string_with_dayfirst(
163    input: &str,
164    span: Span,
165    dayfirst: bool,
166) -> Result<chrono::DateTime<chrono::FixedOffset>, nu_protocol::Value> {
167    let input = if dayfirst {
168        swap_day_month(input)
169    } else {
170        input.to_string()
171    };
172    match dtparse::parse(&input) {
173        Ok((native_dt, fixed_offset)) => {
174            let offset = match fixed_offset {
175                Some(offset) => offset,
176                None => *chrono::Local
177                    .from_local_datetime(&native_dt)
178                    .single()
179                    .unwrap_or_default()
180                    .offset(),
181            };
182            match offset.from_local_datetime(&native_dt) {
183                chrono::LocalResult::Single(d) => Ok(d),
184                chrono::LocalResult::Ambiguous(d, _) => Ok(d),
185                chrono::LocalResult::None => Err(nu_protocol::Value::error(
186                    nu_protocol::ShellError::DatetimeParseError {
187                        msg: input.to_string(),
188                        span,
189                    },
190                    span,
191                )),
192            }
193        }
194        Err(_) => Err(nu_protocol::Value::error(
195            nu_protocol::ShellError::DatetimeParseError {
196                msg: input.to_string(),
197                span,
198            },
199            span,
200        )),
201    }
202}
203
204// This function will check if a value matches a regular expression for a particular datatype.
205// If it does, it will convert the value to that datatype.
206fn process(
207    val: Value,
208    display_as_filesize: bool,
209    prefer_dmy: bool,
210    span: Span,
211) -> Result<Value, ShellError> {
212    // step 1: convert value to string
213    let val_str = val.coerce_str().unwrap_or_default();
214
215    // Determine the order of checking ambiguous date formats (DMY vs MDY) based on the prefer_dmy flag.
216    // This ensures that when dates like "01/02/2025" are encountered, we check the preferred format first
217    // (e.g., DMY for day-month-year or MDY for month-day-year) to handle ambiguity correctly.
218    let (first_regex, first_dayfirst, first_name) = if prefer_dmy {
219        (&DATETIME_DMY_RE, true, "DATETIME_DMY_RE")
220    } else {
221        (&DATETIME_MDY_RE, false, "DATETIME_MDY_RE")
222    };
223    let (second_regex, second_dayfirst, second_name) = if prefer_dmy {
224        (&DATETIME_MDY_RE, false, "DATETIME_MDY_RE")
225    } else {
226        (&DATETIME_DMY_RE, true, "DATETIME_DMY_RE")
227    };
228
229    // step 2: bounce string up against regexes
230    if BOOLEAN_RE.is_match(&val_str).unwrap_or(false) {
231        let bval = val_str
232            .to_lowercase()
233            .parse::<bool>()
234            .map_err(|_| ShellError::CantConvert {
235                to_type: "string".to_string(),
236                from_type: "bool".to_string(),
237                span,
238                help: Some(format!(
239                    r#""{val_str}" does not represent a valid boolean value"#
240                )),
241            })?;
242
243        Ok(Value::bool(bval, span))
244    } else if FLOAT_RE.is_match(&val_str).unwrap_or(false) {
245        let fval = val_str
246            .parse::<f64>()
247            .map_err(|_| ShellError::CantConvert {
248                to_type: "float".to_string(),
249                from_type: "string".to_string(),
250                span,
251                help: Some(format!(
252                    r#""{val_str}" does not represent a valid floating point value"#
253                )),
254            })?;
255
256        Ok(Value::float(fval, span))
257    } else if INTEGER_RE.is_match(&val_str).unwrap_or(false) {
258        let ival = val_str
259            .parse::<i64>()
260            .map_err(|_| ShellError::CantConvert {
261                to_type: "int".to_string(),
262                from_type: "string".to_string(),
263                span,
264                help: Some(format!(
265                    r#""{val_str}" does not represent a valid integer value"#
266                )),
267            })?;
268
269        if display_as_filesize {
270            Ok(Value::filesize(ival, span))
271        } else {
272            Ok(Value::int(ival, span))
273        }
274    } else if INTEGER_WITH_DELIMS_RE.is_match(&val_str).unwrap_or(false) {
275        let mut val_str = val_str.into_owned();
276        val_str.retain(|x| !['_', ','].contains(&x));
277
278        let ival = val_str
279            .parse::<i64>()
280            .map_err(|_| ShellError::CantConvert {
281                to_type: "int".to_string(),
282                from_type: "string".to_string(),
283                span,
284                help: Some(format!(
285                    r#""{val_str}" does not represent a valid integer value"#
286                )),
287            })?;
288
289        if display_as_filesize {
290            Ok(Value::filesize(ival, span))
291        } else {
292            Ok(Value::int(ival, span))
293        }
294    } else if first_regex.is_match(&val_str).unwrap_or(false) {
295        let dt =
296            parse_date_from_string_with_dayfirst(&val_str, span, first_dayfirst).map_err(|_| {
297                ShellError::CantConvert {
298                    to_type: "datetime".to_string(),
299                    from_type: "string".to_string(),
300                    span,
301                    help: Some(format!(
302                        r#""{val_str}" does not represent a valid {first_name} value"#
303                    )),
304                }
305            })?;
306        Ok(Value::date(dt, span))
307    } else if second_regex.is_match(&val_str).unwrap_or(false) {
308        let dt = parse_date_from_string_with_dayfirst(&val_str, span, second_dayfirst).map_err(
309            |_| ShellError::CantConvert {
310                to_type: "datetime".to_string(),
311                from_type: "string".to_string(),
312                span,
313                help: Some(format!(
314                    r#""{val_str}" does not represent a valid {second_name} value"#
315                )),
316            },
317        )?;
318        Ok(Value::date(dt, span))
319    } else if DATETIME_YMD_RE.is_match(&val_str).unwrap_or(false) {
320        let dt = parse_date_from_string_with_dayfirst(&val_str, span, false).map_err(|_| {
321            ShellError::CantConvert {
322                to_type: "datetime".to_string(),
323                from_type: "string".to_string(),
324                span,
325                help: Some(format!(
326                    r#""{val_str}" does not represent a valid DATETIME_YMD_RE value"#
327                )),
328            }
329        })?;
330
331        Ok(Value::date(dt, span))
332    } else if DATETIME_YMDZ_RE.is_match(&val_str).unwrap_or(false) {
333        let dt = parse_date_from_string_with_dayfirst(&val_str, span, false).map_err(|_| {
334            ShellError::CantConvert {
335                to_type: "datetime".to_string(),
336                from_type: "string".to_string(),
337                span,
338                help: Some(format!(
339                    r#""{val_str}" does not represent a valid DATETIME_YMDZ_RE value"#
340                )),
341            }
342        })?;
343
344        Ok(Value::date(dt, span))
345    } else {
346        // If we don't know what it is, just return whatever it was passed in as
347        Ok(val)
348    }
349}
350
351// region: datatype regexes
352// Examples: "31-12-2021", "01/01/2022", "15-06-2023 12:30"
353const DATETIME_DMY_PATTERN: &str = r#"(?x)
354        ^
355        ['"]?                        # optional quotes
356        (?:\d{1,2})                  # day
357        [-/]                         # separator
358        (?P<month>0?[1-9]|1[0-2])        # month
359        [-/]                         # separator
360        (?:\d{4,})                   # year
361        (?:
362            [T\ ]                    # separator
363            (?:\d{2})                # hour
364            :?                       # separator
365            (?:\d{2})                # minute
366            (?:
367                :?                   # separator
368                (?:\d{2})            # second
369                (?:
370                    \.(?:\d{1,9})    # subsecond
371                )?
372            )?
373        )?
374        ['"]?                        # optional quotes
375        $
376        "#;
377
378static DATETIME_DMY_RE: LazyLock<Regex> = LazyLock::new(|| {
379    Regex::new(DATETIME_DMY_PATTERN).expect("datetime_dmy_pattern should be valid")
380});
381// Examples: "2022-01-01", "2022/01/01", "2022-01-01T00:00:00"
382const DATETIME_YMD_PATTERN: &str = r#"(?x)
383        ^
384        ['"]?                      # optional quotes
385        (?:\d{4,})                 # year
386        [-/]                       # separator
387        (?P<month>0?[1-9]|1[0-2])      # month
388        [-/]                       # separator
389        (?:\d{1,2})                # day
390        (?:
391            [T\ ]                  # separator
392            (?:\d{2})              # hour
393            :?                     # separator
394            (?:\d{2})              # minute
395            (?:
396                :?                 # separator
397                (?:\d{2})          # seconds
398                (?:
399                    \.(?:\d{1,9})  # subsecond
400                )?
401            )?
402        )?
403        ['"]?                      # optional quotes
404        $
405        "#;
406static DATETIME_YMD_RE: LazyLock<Regex> = LazyLock::new(|| {
407    Regex::new(DATETIME_YMD_PATTERN).expect("datetime_ymd_pattern should be valid")
408});
409// Examples: "2022-01-01T00:00:00Z", "2022-01-01T00:00:00+01:00"
410const DATETIME_YMDZ_PATTERN: &str = r#"(?x)
411        ^
412        ['"]?                  # optional quotes
413        (?:\d{4,})             # year
414        [-/]                   # separator
415        (?P<month>0?[1-9]|1[0-2])  # month
416        [-/]                   # separator
417        (?:\d{1,2})            # day
418        [T\ ]                  # separator
419        (?:\d{2})              # hour
420        :?                     # separator
421        (?:\d{2})              # minute
422        (?:
423            :?                 # separator
424            (?:\d{2})          # second
425            (?:
426                \.(?:\d{1,9})  # subsecond
427            )?
428        )?
429        \s?                    # optional space
430        (?:
431            # offset (e.g. +01:00)
432            [+-](?:\d{2})
433            :?
434            (?:\d{2})
435            # or Zulu suffix
436            |Z
437        )
438        ['"]?                  # optional quotes
439        $
440        "#;
441static DATETIME_YMDZ_RE: LazyLock<Regex> = LazyLock::new(|| {
442    Regex::new(DATETIME_YMDZ_PATTERN).expect("datetime_ymdz_pattern should be valid")
443});
444
445// Examples: "09/24/2012", "09/24/2012 02:43:48", "01/01/2022"
446const DATETIME_MDY_PATTERN: &str = r#"(?x)
447        ^
448        ['"]?                        # optional quotes
449        (?P<month>0?[1-9]|1[0-2])        # month
450        [-/]                         # separator
451        (?:\d{1,2})                  # day
452        [-/]                         # separator
453        (?:\d{4,})                   # year
454        (?:
455            [T\ ]                    # separator
456            (?:\d{2})                # hour
457            :?                       # separator
458            (?:\d{2})                # minute
459            (?:
460                :?                   # separator
461                (?:\d{2})            # second
462                (?:
463                    \.(?:\d{1,9})    # subsecond
464                )?
465            )?
466        )?
467        ['"]?                        # optional quotes
468        $
469        "#;
470
471static DATETIME_MDY_RE: LazyLock<Regex> = LazyLock::new(|| {
472    Regex::new(DATETIME_MDY_PATTERN).expect("datetime_mdy_pattern should be valid")
473});
474
475// Examples: "0.1", "3.0", "3.00001", "-9.9990e-003", "inf", "NaN"
476static FLOAT_RE: LazyLock<Regex> = LazyLock::new(|| {
477    Regex::new(r"^\s*[-+]?((\d*\.\d+)([eE][-+]?\d+)?|inf|NaN|(\d+)[eE][-+]?\d+|\d+\.)$")
478        .expect("float pattern should be valid")
479});
480
481// Examples: "0", "1", "10", "100", "1000"
482static INTEGER_RE: LazyLock<Regex> =
483    LazyLock::new(|| Regex::new(r"^\s*-?(\d+)$").expect("integer pattern should be valid"));
484
485// Examples: "1_000", "10_000", "100_000", "1,000", "10,000"
486static INTEGER_WITH_DELIMS_RE: LazyLock<Regex> = LazyLock::new(|| {
487    Regex::new(r"^\s*-?(\d{1,3}([,_]\d{3})+)$")
488        .expect("integer with delimiters pattern should be valid")
489});
490
491// Examples: "true", "false", "True", "FALSE"
492static BOOLEAN_RE: LazyLock<Regex> = LazyLock::new(|| {
493    RegexBuilder::new(r"^\s*(true)$|^(false)$")
494        .case_insensitive(true)
495        .build()
496        .expect("boolean pattern should be valid")
497});
498// endregion:
499
500#[cfg(test)]
501mod test {
502    use super::*;
503
504    #[test]
505    fn test_examples() {
506        use crate::test_examples;
507
508        test_examples(DetectType)
509    }
510
511    #[test]
512    fn test_float_parse() {
513        // The regex should work on all these but nushell's float parser is more strict
514        assert!(FLOAT_RE.is_match("0.1").unwrap());
515        assert!(FLOAT_RE.is_match("3.0").unwrap());
516        assert!(FLOAT_RE.is_match("3.00001").unwrap());
517        assert!(FLOAT_RE.is_match("-9.9990e-003").unwrap());
518        assert!(FLOAT_RE.is_match("9.9990e+003").unwrap());
519        assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
520        assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
521        assert!(FLOAT_RE.is_match(".5").unwrap());
522        assert!(FLOAT_RE.is_match("2.5E-10").unwrap());
523        assert!(FLOAT_RE.is_match("2.5e10").unwrap());
524        assert!(FLOAT_RE.is_match("NaN").unwrap());
525        assert!(FLOAT_RE.is_match("-NaN").unwrap());
526        assert!(FLOAT_RE.is_match("-inf").unwrap());
527        assert!(FLOAT_RE.is_match("inf").unwrap());
528        assert!(FLOAT_RE.is_match("-7e-05").unwrap());
529        assert!(FLOAT_RE.is_match("7e-05").unwrap());
530        assert!(FLOAT_RE.is_match("+7e+05").unwrap());
531    }
532
533    #[test]
534    fn test_int_parse() {
535        assert!(INTEGER_RE.is_match("0").unwrap());
536        assert!(INTEGER_RE.is_match("1").unwrap());
537        assert!(INTEGER_RE.is_match("10").unwrap());
538        assert!(INTEGER_RE.is_match("100").unwrap());
539        assert!(INTEGER_RE.is_match("1000").unwrap());
540        assert!(INTEGER_RE.is_match("10000").unwrap());
541        assert!(INTEGER_RE.is_match("100000").unwrap());
542        assert!(INTEGER_RE.is_match("1000000").unwrap());
543        assert!(INTEGER_RE.is_match("10000000").unwrap());
544        assert!(INTEGER_RE.is_match("100000000").unwrap());
545        assert!(INTEGER_RE.is_match("1000000000").unwrap());
546        assert!(INTEGER_RE.is_match("10000000000").unwrap());
547        assert!(INTEGER_RE.is_match("100000000000").unwrap());
548        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000").unwrap());
549        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000").unwrap());
550        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000").unwrap());
551        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000").unwrap());
552        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000").unwrap());
553        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000").unwrap());
554        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000_000").unwrap());
555        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000_000").unwrap());
556        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000_000").unwrap());
557        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000").unwrap());
558        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000").unwrap());
559        assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000").unwrap());
560        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000").unwrap());
561        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000").unwrap());
562        assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000,000").unwrap());
563        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000,000").unwrap());
564        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000,000").unwrap());
565    }
566
567    #[test]
568    fn test_bool_parse() {
569        assert!(BOOLEAN_RE.is_match("true").unwrap());
570        assert!(BOOLEAN_RE.is_match("false").unwrap());
571        assert!(!BOOLEAN_RE.is_match("1").unwrap());
572        assert!(!BOOLEAN_RE.is_match("0").unwrap());
573    }
574
575    #[test]
576    fn test_datetime_ymdz_pattern() {
577        assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00Z").unwrap());
578        assert!(
579            DATETIME_YMDZ_RE
580                .is_match("2022-01-01T00:00:00.123456789Z")
581                .unwrap()
582        );
583        assert!(
584            DATETIME_YMDZ_RE
585                .is_match("2022-01-01T00:00:00+01:00")
586                .unwrap()
587        );
588        assert!(
589            DATETIME_YMDZ_RE
590                .is_match("2022-01-01T00:00:00.123456789+01:00")
591                .unwrap()
592        );
593        assert!(
594            DATETIME_YMDZ_RE
595                .is_match("2022-01-01T00:00:00-01:00")
596                .unwrap()
597        );
598        assert!(
599            DATETIME_YMDZ_RE
600                .is_match("2022-01-01T00:00:00.123456789-01:00")
601                .unwrap()
602        );
603        assert!(DATETIME_YMDZ_RE.is_match("'2022-01-01T00:00:00Z'").unwrap());
604
605        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00").unwrap());
606        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.").unwrap());
607        assert!(
608            !DATETIME_YMDZ_RE
609                .is_match("2022-01-01T00:00:00.123456789")
610                .unwrap()
611        );
612        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01").unwrap());
613        assert!(
614            !DATETIME_YMDZ_RE
615                .is_match("2022-01-01T00:00:00+01:0")
616                .unwrap()
617        );
618        assert!(
619            !DATETIME_YMDZ_RE
620                .is_match("2022-01-01T00:00:00+1:00")
621                .unwrap()
622        );
623        assert!(
624            !DATETIME_YMDZ_RE
625                .is_match("2022-01-01T00:00:00.123456789+01")
626                .unwrap()
627        );
628        assert!(
629            !DATETIME_YMDZ_RE
630                .is_match("2022-01-01T00:00:00.123456789+01:0")
631                .unwrap()
632        );
633        assert!(
634            !DATETIME_YMDZ_RE
635                .is_match("2022-01-01T00:00:00.123456789+1:00")
636                .unwrap()
637        );
638        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01").unwrap());
639        assert!(
640            !DATETIME_YMDZ_RE
641                .is_match("2022-01-01T00:00:00-01:0")
642                .unwrap()
643        );
644        assert!(
645            !DATETIME_YMDZ_RE
646                .is_match("2022-01-01T00:00:00-1:00")
647                .unwrap()
648        );
649        assert!(
650            !DATETIME_YMDZ_RE
651                .is_match("2022-01-01T00:00:00.123456789-01")
652                .unwrap()
653        );
654        assert!(
655            !DATETIME_YMDZ_RE
656                .is_match("2022-01-01T00:00:00.123456789-01:0")
657                .unwrap()
658        );
659        assert!(
660            !DATETIME_YMDZ_RE
661                .is_match("2022-01-01T00:00:00.123456789-1:00")
662                .unwrap()
663        );
664    }
665
666    #[test]
667    fn test_datetime_ymd_pattern() {
668        assert!(DATETIME_YMD_RE.is_match("2022-01-01").unwrap());
669        assert!(DATETIME_YMD_RE.is_match("2022/01/01").unwrap());
670        assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00").unwrap());
671        assert!(
672            DATETIME_YMD_RE
673                .is_match("2022-01-01T00:00:00.000000000")
674                .unwrap()
675        );
676        assert!(DATETIME_YMD_RE.is_match("'2022-01-01'").unwrap());
677
678        // The regex isn't this specific, but it would be nice if it were
679        // assert!(!DATETIME_YMD_RE.is_match("2022-13-01").unwrap());
680        // assert!(!DATETIME_YMD_RE.is_match("2022-01-32").unwrap());
681        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T24:00:00").unwrap());
682        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:60:00").unwrap());
683        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:00:60").unwrap());
684        assert!(
685            !DATETIME_YMD_RE
686                .is_match("2022-01-01T00:00:00.0000000000")
687                .unwrap()
688        );
689    }
690
691    #[test]
692    fn test_datetime_dmy_pattern() {
693        assert!(DATETIME_DMY_RE.is_match("31-12-2021").unwrap());
694        assert!(DATETIME_DMY_RE.is_match("01/01/2022").unwrap());
695        assert!(DATETIME_DMY_RE.is_match("15-06-2023 12:30").unwrap());
696        assert!(!DATETIME_DMY_RE.is_match("2022-13-01").unwrap());
697        assert!(!DATETIME_DMY_RE.is_match("2022-01-32").unwrap());
698        assert!(!DATETIME_DMY_RE.is_match("2022-01-01 24:00").unwrap());
699    }
700
701    #[test]
702    fn test_datetime_mdy_pattern() {
703        assert!(DATETIME_MDY_RE.is_match("09/24/2012").unwrap());
704        assert!(DATETIME_MDY_RE.is_match("09/24/2012 02:43:48").unwrap());
705        assert!(DATETIME_MDY_RE.is_match("01/01/2022").unwrap());
706        assert!(!DATETIME_MDY_RE.is_match("09/24/123").unwrap());
707        assert!(!DATETIME_MDY_RE.is_match("09/24/2012 2:43:48").unwrap());
708        assert!(!DATETIME_MDY_RE.is_match("009/24/2012").unwrap());
709    }
710
711    #[test]
712    fn test_ambiguous_date_default() {
713        use chrono::{DateTime, FixedOffset, Local, TimeZone};
714        let span = Span::test_data();
715        // Ambiguous date defaults to MDY (Jan 2)
716        let result = process(Value::string("01/02/2025", span), false, false, span).unwrap();
717        if let Value::Date { val, .. } = result {
718            assert_eq!(
719                val,
720                DateTime::<FixedOffset>::from(Local.with_ymd_and_hms(2025, 1, 2, 0, 0, 0).unwrap())
721            );
722        } else {
723            panic!("Expected date");
724        }
725        // Non-ambiguous DMY (Feb 13)
726        let result = process(Value::string("13/02/2025", span), false, false, span).unwrap();
727        if let Value::Date { val, .. } = result {
728            assert_eq!(
729                val,
730                DateTime::<FixedOffset>::from(
731                    Local.with_ymd_and_hms(2025, 2, 13, 0, 0, 0).unwrap()
732                )
733            );
734        } else {
735            panic!("Expected date");
736        }
737        // Non-ambiguous MDY (Feb 13)
738        let result = process(Value::string("02/13/2025", span), false, false, span).unwrap();
739        if let Value::Date { val, .. } = result {
740            assert_eq!(
741                val,
742                DateTime::<FixedOffset>::from(
743                    Local.with_ymd_and_hms(2025, 2, 13, 0, 0, 0).unwrap()
744                )
745            );
746        } else {
747            panic!("Expected date");
748        }
749    }
750
751    #[test]
752    fn test_ambiguous_date_prefer_dmy() {
753        use chrono::{DateTime, FixedOffset, Local, TimeZone};
754        let span = Span::test_data();
755        // Ambiguous date with prefer_dmy=true -> parsed as Feb 1 (DMY)
756        let result = process(Value::string("01/02/2025", span), false, true, span).unwrap();
757        if let Value::Date { val, .. } = result {
758            assert_eq!(
759                val,
760                DateTime::<FixedOffset>::from(Local.with_ymd_and_hms(2025, 2, 1, 0, 0, 0).unwrap())
761            );
762        } else {
763            panic!("Expected date");
764        }
765        // Non-ambiguous still works (Feb 13)
766        let result = process(Value::string("13/02/2025", span), false, true, span).unwrap();
767        if let Value::Date { val, .. } = result {
768            assert_eq!(
769                val,
770                DateTime::<FixedOffset>::from(
771                    Local.with_ymd_and_hms(2025, 2, 13, 0, 0, 0).unwrap()
772                )
773            );
774        } else {
775            panic!("Expected date");
776        }
777        // Non-ambiguous still works (Feb 13)
778        let result = process(Value::string("02/13/2025", span), false, true, span).unwrap();
779        if let Value::Date { val, .. } = result {
780            assert_eq!(
781                val,
782                DateTime::<FixedOffset>::from(
783                    Local.with_ymd_and_hms(2025, 2, 13, 0, 0, 0).unwrap()
784                )
785            );
786        } else {
787            panic!("Expected date");
788        }
789    }
790}