Skip to main content

nu_command/strings/
detect_type.rs

1use chrono::{Local, TimeZone, Utc};
2use fancy_regex::{Regex, RegexBuilder};
3use nu_engine::command_prelude::*;
4use nu_protocol::PipelineMetadata;
5use std::sync::LazyLock;
6
7#[derive(Clone)]
8pub struct DetectType;
9
10impl Command for DetectType {
11    fn name(&self) -> &str {
12        "detect type"
13    }
14
15    fn signature(&self) -> Signature {
16        Signature::build(self.name())
17            .input_output_types(vec![(Type::String, Type::Any), (Type::Any, Type::Any)])
18            .switch(
19                "prefer-filesize",
20                "For ints display them as human-readable file sizes.",
21                Some('f'),
22            )
23            .switch(
24                "prefer-dmy",
25                "Prefer day-month-year format for ambiguous dates.",
26                None,
27            )
28            .category(Category::Strings)
29            .allow_variants_without_examples(true)
30    }
31
32    fn description(&self) -> &str {
33        "Infer Nushell datatype from a string."
34    }
35
36    fn search_terms(&self) -> Vec<&str> {
37        vec!["convert", "conversion"]
38    }
39
40    fn examples(&self) -> Vec<Example<'_>> {
41        vec![
42            Example {
43                description: "Bool from string",
44                example: "'true' | detect type",
45                result: Some(Value::test_bool(true)),
46            },
47            Example {
48                description: "Bool is case insensitive",
49                example: "'FALSE' | detect type",
50                result: Some(Value::test_bool(false)),
51            },
52            Example {
53                description: "Int from plain digits",
54                example: "'42' | detect type",
55                result: Some(Value::test_int(42)),
56            },
57            Example {
58                description: "Int with underscores",
59                example: "'1_000_000' | detect type",
60                result: Some(Value::test_int(1_000_000)),
61            },
62            Example {
63                description: "Int with commas",
64                example: "'1,234,567' | detect type",
65                result: Some(Value::test_int(1_234_567)),
66            },
67            #[allow(clippy::approx_constant, reason = "approx PI in examples is fine")]
68            Example {
69                description: "Float from decimal",
70                example: "'3.14' | detect type",
71                result: Some(Value::test_float(3.14)),
72            },
73            Example {
74                description: "Float in scientific notation",
75                example: "'6.02e23' | detect type",
76                result: Some(Value::test_float(6.02e23)),
77            },
78            Example {
79                description: "Prefer filesize for ints",
80                example: "'1024' | detect type -f",
81                result: Some(Value::test_filesize(1024)),
82            },
83            Example {
84                description: "Date Y-M-D",
85                example: "'2022-01-01' | detect type",
86                result: Some(Value::test_date(
87                    Local.with_ymd_and_hms(2022, 1, 1, 0, 0, 0).unwrap().into(),
88                )),
89            },
90            Example {
91                description: "Date with time and offset",
92                example: "'2022-01-01T00:00:00Z' | detect type",
93                result: Some(Value::test_date(
94                    Utc.with_ymd_and_hms(2022, 1, 1, 0, 0, 0).unwrap().into(),
95                )),
96            },
97            Example {
98                description: "Date D-M-Y",
99                example: "'31-12-2021' | detect type",
100                result: Some(Value::test_date(
101                    Local
102                        .with_ymd_and_hms(2021, 12, 31, 0, 0, 0)
103                        .unwrap()
104                        .into(),
105                )),
106            },
107            Example {
108                description: "Date M-D-Y (default for ambiguous)",
109                example: "'01/02/2025' | detect type",
110                result: Some(Value::test_date(
111                    Local.with_ymd_and_hms(2025, 1, 2, 0, 0, 0).unwrap().into(),
112                )),
113            },
114            Example {
115                description: "Prefer DMY for ambiguous dates",
116                example: "'01/02/2025' | detect type --prefer-dmy",
117                result: Some(Value::test_date(
118                    Local.with_ymd_and_hms(2025, 2, 1, 0, 0, 0).unwrap().into(),
119                )),
120            },
121            Example {
122                description: "Unknown stays a string",
123                example: "'not-a-number' | detect type",
124                result: Some(Value::test_string("not-a-number")),
125            },
126        ]
127    }
128
129    fn run(
130        &self,
131        engine_state: &EngineState,
132        stack: &mut Stack,
133        call: &Call,
134        mut input: PipelineData,
135    ) -> Result<PipelineData, ShellError> {
136        let span = call.head;
137        let display_as_filesize = call.has_flag(engine_state, stack, "prefer-filesize")?;
138        let prefer_dmy = call.has_flag(engine_state, stack, "prefer-dmy")?;
139        let metadata = input.take_metadata();
140        let val = input.into_value(call.head)?;
141        process(val, metadata, display_as_filesize, prefer_dmy, span)
142    }
143}
144
145// This function serves to modify the input string by swapping the day and month components
146// (e.g., turning `"01/02/2025"` into `"02/01/2025"`) so that `dtparse::parse`, which defaults to interpreting
147// ambiguous dates as month-day-year (MDY), correctly parses it as day-month-year (DMY) when `dayfirst` is true.
148// This is necessary because `dtparse::parse` doesn't expose a direct `dayfirst` option in its API, and the
149// swap ensures consistent parsing based on the regex match. It could be written more idiomatically and with
150// the regexes but this is a simpler approach and doesn't have to mess with parsing the time component.
151fn swap_day_month(input: &str) -> String {
152    let re_slash =
153        fancy_regex::Regex::new(r"(\d{1,2})/(\d{1,2})/(\d{4,})").expect("regex should be valid");
154    let swapped_slash = re_slash.replace_all(input, "$2/$1/$3");
155    let re_dash =
156        fancy_regex::Regex::new(r"(\d{1,2})-(\d{1,2})-(\d{4,})").expect("regex should be valid");
157    re_dash.replace_all(&swapped_slash, "$2-$1-$3").to_string()
158}
159
160fn parse_date_from_string_with_dayfirst(
161    input: &str,
162    span: Span,
163    dayfirst: bool,
164) -> Result<chrono::DateTime<chrono::FixedOffset>, nu_protocol::Value> {
165    let input = if dayfirst {
166        swap_day_month(input)
167    } else {
168        input.to_string()
169    };
170    match dtparse::parse(&input) {
171        Ok((native_dt, fixed_offset)) => {
172            let offset = match fixed_offset {
173                Some(offset) => offset,
174                None => *chrono::Local
175                    .from_local_datetime(&native_dt)
176                    .single()
177                    .unwrap_or_default()
178                    .offset(),
179            };
180            match offset.from_local_datetime(&native_dt) {
181                chrono::LocalResult::Single(d) => Ok(d),
182                chrono::LocalResult::Ambiguous(d, _) => Ok(d),
183                chrono::LocalResult::None => Err(nu_protocol::Value::error(
184                    nu_protocol::ShellError::DatetimeParseError {
185                        msg: input.to_string(),
186                        span,
187                    },
188                    span,
189                )),
190            }
191        }
192        Err(_) => Err(nu_protocol::Value::error(
193            nu_protocol::ShellError::DatetimeParseError {
194                msg: input.to_string(),
195                span,
196            },
197            span,
198        )),
199    }
200}
201
202// This function will check if a value matches a regular expression for a particular datatype.
203// If it does, it will convert the value to that datatype.
204fn process(
205    val: Value,
206    metadata: Option<PipelineMetadata>,
207    display_as_filesize: bool,
208    prefer_dmy: bool,
209    span: Span,
210) -> Result<PipelineData, ShellError> {
211    // step 1: convert value to string
212    let val_str = val.coerce_str().unwrap_or_default();
213
214    // Determine the order of checking ambiguous date formats (DMY vs MDY) based on the prefer_dmy flag.
215    // This ensures that when dates like "01/02/2025" are encountered, we check the preferred format first
216    // (e.g., DMY for day-month-year or MDY for month-day-year) to handle ambiguity correctly.
217    let (first_regex, first_dayfirst, first_name) = if prefer_dmy {
218        (&DATETIME_DMY_RE, true, "DATETIME_DMY_RE")
219    } else {
220        (&DATETIME_MDY_RE, false, "DATETIME_MDY_RE")
221    };
222    let (second_regex, second_dayfirst, second_name) = if prefer_dmy {
223        (&DATETIME_MDY_RE, false, "DATETIME_MDY_RE")
224    } else {
225        (&DATETIME_DMY_RE, true, "DATETIME_DMY_RE")
226    };
227
228    // step 2: bounce string up against regexes
229    let value = if BOOLEAN_RE.is_match(&val_str).unwrap_or(false) {
230        let bval = val_str
231            .to_lowercase()
232            .parse::<bool>()
233            .map_err(|_| ShellError::CantConvert {
234                to_type: "string".to_string(),
235                from_type: "bool".to_string(),
236                span,
237                help: Some(format!(
238                    r#""{val_str}" does not represent a valid boolean value"#
239                )),
240            })?;
241
242        Ok(Value::bool(bval, span))
243    } else if FLOAT_RE.is_match(&val_str).unwrap_or(false) {
244        let fval = val_str
245            .parse::<f64>()
246            .map_err(|_| ShellError::CantConvert {
247                to_type: "float".to_string(),
248                from_type: "string".to_string(),
249                span,
250                help: Some(format!(
251                    r#""{val_str}" does not represent a valid floating point value"#
252                )),
253            })?;
254
255        Ok(Value::float(fval, span))
256    } else if INTEGER_RE.is_match(&val_str).unwrap_or(false) {
257        let ival = val_str
258            .parse::<i64>()
259            .map_err(|_| ShellError::CantConvert {
260                to_type: "int".to_string(),
261                from_type: "string".to_string(),
262                span,
263                help: Some(format!(
264                    r#""{val_str}" does not represent a valid integer value"#
265                )),
266            })?;
267
268        if display_as_filesize {
269            Ok(Value::filesize(ival, span))
270        } else {
271            Ok(Value::int(ival, span))
272        }
273    } else if INTEGER_WITH_DELIMS_RE.is_match(&val_str).unwrap_or(false) {
274        let mut val_str = val_str.into_owned();
275        val_str.retain(|x| !['_', ','].contains(&x));
276
277        let ival = val_str
278            .parse::<i64>()
279            .map_err(|_| ShellError::CantConvert {
280                to_type: "int".to_string(),
281                from_type: "string".to_string(),
282                span,
283                help: Some(format!(
284                    r#""{val_str}" does not represent a valid integer value"#
285                )),
286            })?;
287
288        if display_as_filesize {
289            Ok(Value::filesize(ival, span))
290        } else {
291            Ok(Value::int(ival, span))
292        }
293    } else if first_regex.is_match(&val_str).unwrap_or(false) {
294        let dt =
295            parse_date_from_string_with_dayfirst(&val_str, span, first_dayfirst).map_err(|_| {
296                ShellError::CantConvert {
297                    to_type: "datetime".to_string(),
298                    from_type: "string".to_string(),
299                    span,
300                    help: Some(format!(
301                        r#""{val_str}" does not represent a valid {first_name} value"#
302                    )),
303                }
304            })?;
305        Ok(Value::date(dt, span))
306    } else if second_regex.is_match(&val_str).unwrap_or(false) {
307        let dt = parse_date_from_string_with_dayfirst(&val_str, span, second_dayfirst).map_err(
308            |_| ShellError::CantConvert {
309                to_type: "datetime".to_string(),
310                from_type: "string".to_string(),
311                span,
312                help: Some(format!(
313                    r#""{val_str}" does not represent a valid {second_name} value"#
314                )),
315            },
316        )?;
317        Ok(Value::date(dt, span))
318    } else if DATETIME_YMD_RE.is_match(&val_str).unwrap_or(false) {
319        let dt = parse_date_from_string_with_dayfirst(&val_str, span, false).map_err(|_| {
320            ShellError::CantConvert {
321                to_type: "datetime".to_string(),
322                from_type: "string".to_string(),
323                span,
324                help: Some(format!(
325                    r#""{val_str}" does not represent a valid DATETIME_YMD_RE value"#
326                )),
327            }
328        })?;
329
330        Ok(Value::date(dt, span))
331    } else if DATETIME_YMDZ_RE.is_match(&val_str).unwrap_or(false) {
332        let dt = parse_date_from_string_with_dayfirst(&val_str, span, false).map_err(|_| {
333            ShellError::CantConvert {
334                to_type: "datetime".to_string(),
335                from_type: "string".to_string(),
336                span,
337                help: Some(format!(
338                    r#""{val_str}" does not represent a valid DATETIME_YMDZ_RE value"#
339                )),
340            }
341        })?;
342
343        Ok(Value::date(dt, span))
344    } else {
345        // If we don't know what it is, just return whatever it was passed in as
346        return Ok(val.into_pipeline_data_with_metadata(metadata));
347    };
348
349    value.map(|value| {
350        value.into_pipeline_data_with_metadata(
351            metadata.map(|metadata| metadata.with_content_type(None)),
352        )
353    })
354}
355
356// region: datatype regexes
357// Examples: "31-12-2021", "01/01/2022", "15-06-2023 12:30"
358const DATETIME_DMY_PATTERN: &str = r#"(?x)
359        ^
360        ['"]?                        # optional quotes
361        (?:\d{1,2})                  # day
362        [-/]                         # separator
363        (?P<month>0?[1-9]|1[0-2])        # month
364        [-/]                         # separator
365        (?:\d{4,})                   # year
366        (?:
367            [T\ ]                    # separator
368            (?:\d{2})                # hour
369            :?                       # separator
370            (?:\d{2})                # minute
371            (?:
372                :?                   # separator
373                (?:\d{2})            # second
374                (?:
375                    \.(?:\d{1,9})    # subsecond
376                )?
377            )?
378        )?
379        ['"]?                        # optional quotes
380        $
381        "#;
382
383static DATETIME_DMY_RE: LazyLock<Regex> = LazyLock::new(|| {
384    Regex::new(DATETIME_DMY_PATTERN).expect("datetime_dmy_pattern should be valid")
385});
386// Examples: "2022-01-01", "2022/01/01", "2022-01-01T00:00:00"
387const DATETIME_YMD_PATTERN: &str = r#"(?x)
388        ^
389        ['"]?                      # optional quotes
390        (?:\d{4,})                 # year
391        [-/]                       # separator
392        (?P<month>0?[1-9]|1[0-2])      # month
393        [-/]                       # separator
394        (?:\d{1,2})                # day
395        (?:
396            [T\ ]                  # separator
397            (?:\d{2})              # hour
398            :?                     # separator
399            (?:\d{2})              # minute
400            (?:
401                :?                 # separator
402                (?:\d{2})          # seconds
403                (?:
404                    \.(?:\d{1,9})  # subsecond
405                )?
406            )?
407        )?
408        ['"]?                      # optional quotes
409        $
410        "#;
411static DATETIME_YMD_RE: LazyLock<Regex> = LazyLock::new(|| {
412    Regex::new(DATETIME_YMD_PATTERN).expect("datetime_ymd_pattern should be valid")
413});
414// Examples: "2022-01-01T00:00:00Z", "2022-01-01T00:00:00+01:00"
415const DATETIME_YMDZ_PATTERN: &str = r#"(?x)
416        ^
417        ['"]?                  # optional quotes
418        (?:\d{4,})             # year
419        [-/]                   # separator
420        (?P<month>0?[1-9]|1[0-2])  # month
421        [-/]                   # separator
422        (?:\d{1,2})            # day
423        [T\ ]                  # separator
424        (?:\d{2})              # hour
425        :?                     # separator
426        (?:\d{2})              # minute
427        (?:
428            :?                 # separator
429            (?:\d{2})          # second
430            (?:
431                \.(?:\d{1,9})  # subsecond
432            )?
433        )?
434        \s?                    # optional space
435        (?:
436            # offset (e.g. +01:00)
437            [+-](?:\d{2})
438            :?
439            (?:\d{2})
440            # or Zulu suffix
441            |Z
442        )
443        ['"]?                  # optional quotes
444        $
445        "#;
446static DATETIME_YMDZ_RE: LazyLock<Regex> = LazyLock::new(|| {
447    Regex::new(DATETIME_YMDZ_PATTERN).expect("datetime_ymdz_pattern should be valid")
448});
449
450// Examples: "09/24/2012", "09/24/2012 02:43:48", "01/01/2022"
451const DATETIME_MDY_PATTERN: &str = r#"(?x)
452        ^
453        ['"]?                        # optional quotes
454        (?P<month>0?[1-9]|1[0-2])        # month
455        [-/]                         # separator
456        (?:\d{1,2})                  # day
457        [-/]                         # separator
458        (?:\d{4,})                   # year
459        (?:
460            [T\ ]                    # separator
461            (?:\d{2})                # hour
462            :?                       # separator
463            (?:\d{2})                # minute
464            (?:
465                :?                   # separator
466                (?:\d{2})            # second
467                (?:
468                    \.(?:\d{1,9})    # subsecond
469                )?
470            )?
471        )?
472        ['"]?                        # optional quotes
473        $
474        "#;
475
476static DATETIME_MDY_RE: LazyLock<Regex> = LazyLock::new(|| {
477    Regex::new(DATETIME_MDY_PATTERN).expect("datetime_mdy_pattern should be valid")
478});
479
480// Examples: "0.1", "3.0", "3.00001", "-9.9990e-003", "inf", "NaN"
481static FLOAT_RE: LazyLock<Regex> = LazyLock::new(|| {
482    Regex::new(r"^\s*[-+]?((\d*\.\d+)([eE][-+]?\d+)?|inf|NaN|(\d+)[eE][-+]?\d+|\d+\.)$")
483        .expect("float pattern should be valid")
484});
485
486// Examples: "0", "1", "10", "100", "1000"
487static INTEGER_RE: LazyLock<Regex> =
488    LazyLock::new(|| Regex::new(r"^\s*-?(\d+)$").expect("integer pattern should be valid"));
489
490// Examples: "1_000", "10_000", "100_000", "1,000", "10,000"
491static INTEGER_WITH_DELIMS_RE: LazyLock<Regex> = LazyLock::new(|| {
492    Regex::new(r"^\s*-?(\d{1,3}([,_]\d{3})+)$")
493        .expect("integer with delimiters pattern should be valid")
494});
495
496// Examples: "true", "false", "True", "FALSE"
497static BOOLEAN_RE: LazyLock<Regex> = LazyLock::new(|| {
498    RegexBuilder::new(r"^\s*(true)$|^(false)$")
499        .case_insensitive(true)
500        .build()
501        .expect("boolean pattern should be valid")
502});
503// endregion:
504
505#[cfg(test)]
506mod test {
507    use super::*;
508    use rstest::rstest;
509
510    #[test]
511    fn test_examples() -> nu_test_support::Result {
512        nu_test_support::test().examples(DetectType)
513    }
514
515    #[test]
516    fn test_float_parse() {
517        // The regex should work on all these but nushell's float parser is more strict
518        assert!(FLOAT_RE.is_match("0.1").unwrap());
519        assert!(FLOAT_RE.is_match("3.0").unwrap());
520        assert!(FLOAT_RE.is_match("3.00001").unwrap());
521        assert!(FLOAT_RE.is_match("-9.9990e-003").unwrap());
522        assert!(FLOAT_RE.is_match("9.9990e+003").unwrap());
523        assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
524        assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
525        assert!(FLOAT_RE.is_match(".5").unwrap());
526        assert!(FLOAT_RE.is_match("2.5E-10").unwrap());
527        assert!(FLOAT_RE.is_match("2.5e10").unwrap());
528        assert!(FLOAT_RE.is_match("NaN").unwrap());
529        assert!(FLOAT_RE.is_match("-NaN").unwrap());
530        assert!(FLOAT_RE.is_match("-inf").unwrap());
531        assert!(FLOAT_RE.is_match("inf").unwrap());
532        assert!(FLOAT_RE.is_match("-7e-05").unwrap());
533        assert!(FLOAT_RE.is_match("7e-05").unwrap());
534        assert!(FLOAT_RE.is_match("+7e+05").unwrap());
535    }
536
537    #[test]
538    fn test_int_parse() {
539        assert!(INTEGER_RE.is_match("0").unwrap());
540        assert!(INTEGER_RE.is_match("1").unwrap());
541        assert!(INTEGER_RE.is_match("10").unwrap());
542        assert!(INTEGER_RE.is_match("100").unwrap());
543        assert!(INTEGER_RE.is_match("1000").unwrap());
544        assert!(INTEGER_RE.is_match("10000").unwrap());
545        assert!(INTEGER_RE.is_match("100000").unwrap());
546        assert!(INTEGER_RE.is_match("1000000").unwrap());
547        assert!(INTEGER_RE.is_match("10000000").unwrap());
548        assert!(INTEGER_RE.is_match("100000000").unwrap());
549        assert!(INTEGER_RE.is_match("1000000000").unwrap());
550        assert!(INTEGER_RE.is_match("10000000000").unwrap());
551        assert!(INTEGER_RE.is_match("100000000000").unwrap());
552        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000").unwrap());
553        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000").unwrap());
554        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000").unwrap());
555        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000").unwrap());
556        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000").unwrap());
557        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000").unwrap());
558        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000_000").unwrap());
559        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000_000").unwrap());
560        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000_000").unwrap());
561        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000").unwrap());
562        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000").unwrap());
563        assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000").unwrap());
564        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000").unwrap());
565        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000").unwrap());
566        assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000,000").unwrap());
567        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000,000").unwrap());
568        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000,000").unwrap());
569    }
570
571    #[test]
572    fn test_bool_parse() {
573        assert!(BOOLEAN_RE.is_match("true").unwrap());
574        assert!(BOOLEAN_RE.is_match("false").unwrap());
575        assert!(!BOOLEAN_RE.is_match("1").unwrap());
576        assert!(!BOOLEAN_RE.is_match("0").unwrap());
577    }
578
579    #[test]
580    fn test_datetime_ymdz_pattern() {
581        assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00Z").unwrap());
582        assert!(
583            DATETIME_YMDZ_RE
584                .is_match("2022-01-01T00:00:00.123456789Z")
585                .unwrap()
586        );
587        assert!(
588            DATETIME_YMDZ_RE
589                .is_match("2022-01-01T00:00:00+01:00")
590                .unwrap()
591        );
592        assert!(
593            DATETIME_YMDZ_RE
594                .is_match("2022-01-01T00:00:00.123456789+01:00")
595                .unwrap()
596        );
597        assert!(
598            DATETIME_YMDZ_RE
599                .is_match("2022-01-01T00:00:00-01:00")
600                .unwrap()
601        );
602        assert!(
603            DATETIME_YMDZ_RE
604                .is_match("2022-01-01T00:00:00.123456789-01:00")
605                .unwrap()
606        );
607        assert!(DATETIME_YMDZ_RE.is_match("'2022-01-01T00:00:00Z'").unwrap());
608
609        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00").unwrap());
610        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.").unwrap());
611        assert!(
612            !DATETIME_YMDZ_RE
613                .is_match("2022-01-01T00:00:00.123456789")
614                .unwrap()
615        );
616        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01").unwrap());
617        assert!(
618            !DATETIME_YMDZ_RE
619                .is_match("2022-01-01T00:00:00+01:0")
620                .unwrap()
621        );
622        assert!(
623            !DATETIME_YMDZ_RE
624                .is_match("2022-01-01T00:00:00+1:00")
625                .unwrap()
626        );
627        assert!(
628            !DATETIME_YMDZ_RE
629                .is_match("2022-01-01T00:00:00.123456789+01")
630                .unwrap()
631        );
632        assert!(
633            !DATETIME_YMDZ_RE
634                .is_match("2022-01-01T00:00:00.123456789+01:0")
635                .unwrap()
636        );
637        assert!(
638            !DATETIME_YMDZ_RE
639                .is_match("2022-01-01T00:00:00.123456789+1:00")
640                .unwrap()
641        );
642        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01").unwrap());
643        assert!(
644            !DATETIME_YMDZ_RE
645                .is_match("2022-01-01T00:00:00-01:0")
646                .unwrap()
647        );
648        assert!(
649            !DATETIME_YMDZ_RE
650                .is_match("2022-01-01T00:00:00-1:00")
651                .unwrap()
652        );
653        assert!(
654            !DATETIME_YMDZ_RE
655                .is_match("2022-01-01T00:00:00.123456789-01")
656                .unwrap()
657        );
658        assert!(
659            !DATETIME_YMDZ_RE
660                .is_match("2022-01-01T00:00:00.123456789-01:0")
661                .unwrap()
662        );
663        assert!(
664            !DATETIME_YMDZ_RE
665                .is_match("2022-01-01T00:00:00.123456789-1:00")
666                .unwrap()
667        );
668    }
669
670    #[test]
671    fn test_datetime_ymd_pattern() {
672        assert!(DATETIME_YMD_RE.is_match("2022-01-01").unwrap());
673        assert!(DATETIME_YMD_RE.is_match("2022/01/01").unwrap());
674        assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00").unwrap());
675        assert!(
676            DATETIME_YMD_RE
677                .is_match("2022-01-01T00:00:00.000000000")
678                .unwrap()
679        );
680        assert!(DATETIME_YMD_RE.is_match("'2022-01-01'").unwrap());
681
682        // The regex isn't this specific, but it would be nice if it were
683        // assert!(!DATETIME_YMD_RE.is_match("2022-13-01").unwrap());
684        // assert!(!DATETIME_YMD_RE.is_match("2022-01-32").unwrap());
685        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T24:00:00").unwrap());
686        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:60:00").unwrap());
687        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:00:60").unwrap());
688        assert!(
689            !DATETIME_YMD_RE
690                .is_match("2022-01-01T00:00:00.0000000000")
691                .unwrap()
692        );
693    }
694
695    #[test]
696    fn test_datetime_dmy_pattern() {
697        assert!(DATETIME_DMY_RE.is_match("31-12-2021").unwrap());
698        assert!(DATETIME_DMY_RE.is_match("01/01/2022").unwrap());
699        assert!(DATETIME_DMY_RE.is_match("15-06-2023 12:30").unwrap());
700        assert!(!DATETIME_DMY_RE.is_match("2022-13-01").unwrap());
701        assert!(!DATETIME_DMY_RE.is_match("2022-01-32").unwrap());
702        assert!(!DATETIME_DMY_RE.is_match("2022-01-01 24:00").unwrap());
703    }
704
705    #[test]
706    fn test_datetime_mdy_pattern() {
707        assert!(DATETIME_MDY_RE.is_match("09/24/2012").unwrap());
708        assert!(DATETIME_MDY_RE.is_match("09/24/2012 02:43:48").unwrap());
709        assert!(DATETIME_MDY_RE.is_match("01/01/2022").unwrap());
710        assert!(!DATETIME_MDY_RE.is_match("09/24/123").unwrap());
711        assert!(!DATETIME_MDY_RE.is_match("09/24/2012 2:43:48").unwrap());
712        assert!(!DATETIME_MDY_RE.is_match("009/24/2012").unwrap());
713    }
714
715    #[rstest]
716    // Ambiguous date defaults to MDY (Jan 2)
717    #[case("01/02/2025", 2025, 1, 2)]
718    // Non-ambiguous DMY (Feb 13)
719    #[case("13/02/2025", 2025, 2, 13)]
720    // Non-ambiguous MDY (Feb 13)
721    #[case("02/13/2025", 2025, 2, 13)]
722    fn test_ambiguous_date_default(
723        #[case] input: &str,
724        #[case] year: i32,
725        #[case] month: u32,
726        #[case] day: u32,
727    ) {
728        use chrono::{DateTime, FixedOffset, Local, TimeZone};
729        let span = Span::test_data();
730        let result = process(Value::string(input, span), None, false, false, span)
731            .unwrap()
732            .into_value(span)
733            .unwrap();
734
735        if let Value::Date { val, .. } = result {
736            assert_eq!(
737                val,
738                DateTime::<FixedOffset>::from(
739                    Local.with_ymd_and_hms(year, month, day, 0, 0, 0).unwrap()
740                )
741            );
742        } else {
743            panic!("Expected date");
744        }
745    }
746
747    #[rstest]
748    // Ambiguous date with prefer_dmy=true -> parsed as Feb 1 (DMY)
749    #[case("01/02/2025", 2025, 2, 1)]
750    // Non-ambiguous still works (Feb 13)
751    #[case("13/02/2025", 2025, 2, 13)]
752    // Non-ambiguous still works (Feb 13)
753    #[case("02/13/2025", 2025, 2, 13)]
754    fn test_ambiguous_date_prefer_dmy(
755        #[case] input: &str,
756        #[case] year: i32,
757        #[case] month: u32,
758        #[case] day: u32,
759    ) {
760        use chrono::{DateTime, FixedOffset, Local, TimeZone};
761        let span = Span::test_data();
762        let result = process(Value::string(input, span), None, false, true, span)
763            .unwrap()
764            .into_value(span)
765            .unwrap();
766
767        if let Value::Date { val, .. } = result {
768            assert_eq!(
769                val,
770                DateTime::<FixedOffset>::from(
771                    Local.with_ymd_and_hms(year, month, day, 0, 0, 0).unwrap()
772                )
773            );
774        } else {
775            panic!("Expected date");
776        }
777    }
778}