nu_command/strings/
detect_type.rs

1use crate::parse_date_from_string;
2use chrono::{Local, TimeZone, Utc};
3use fancy_regex::{Regex, RegexBuilder};
4use nu_engine::command_prelude::*;
5use std::sync::LazyLock;
6
7#[derive(Clone)]
8pub struct DetectType;
9
10impl Command for DetectType {
11    fn name(&self) -> &str {
12        "detect type"
13    }
14
15    fn signature(&self) -> Signature {
16        Signature::build(self.name())
17            .input_output_types(vec![(Type::String, Type::Any), (Type::Any, Type::Any)])
18            .switch(
19                "prefer-filesize",
20                "For ints display them as human-readable file sizes",
21                Some('f'),
22            )
23            .category(Category::Strings)
24            .allow_variants_without_examples(true)
25    }
26
27    fn description(&self) -> &str {
28        "Infer Nushell datatype from a string."
29    }
30
31    fn search_terms(&self) -> Vec<&str> {
32        vec!["convert", "conversion"]
33    }
34
35    fn examples(&self) -> Vec<Example<'_>> {
36        vec![
37            Example {
38                description: "Bool from string",
39                example: "'true' | detect type",
40                result: Some(Value::test_bool(true)),
41            },
42            Example {
43                description: "Bool is case insensitive",
44                example: "'FALSE' | detect type",
45                result: Some(Value::test_bool(false)),
46            },
47            Example {
48                description: "Int from plain digits",
49                example: "'42' | detect type",
50                result: Some(Value::test_int(42)),
51            },
52            Example {
53                description: "Int with underscores",
54                example: "'1_000_000' | detect type",
55                result: Some(Value::test_int(1_000_000)),
56            },
57            Example {
58                description: "Int with commas",
59                example: "'1,234,567' | detect type",
60                result: Some(Value::test_int(1_234_567)),
61            },
62            #[allow(clippy::approx_constant, reason = "approx PI in examples is fine")]
63            Example {
64                description: "Float from decimal",
65                example: "'3.14' | detect type",
66                result: Some(Value::test_float(3.14)),
67            },
68            Example {
69                description: "Float in scientific notation",
70                example: "'6.02e23' | detect type",
71                result: Some(Value::test_float(6.02e23)),
72            },
73            Example {
74                description: "Prefer filesize for ints",
75                example: "'1024' | detect type -f",
76                result: Some(Value::test_filesize(1024)),
77            },
78            Example {
79                description: "Date Y-M-D",
80                example: "'2022-01-01' | detect type",
81                result: Some(Value::test_date(
82                    Local.with_ymd_and_hms(2022, 1, 1, 0, 0, 0).unwrap().into(),
83                )),
84            },
85            Example {
86                description: "Date with time and offset",
87                example: "'2022-01-01T00:00:00Z' | detect type",
88                result: Some(Value::test_date(
89                    Utc.with_ymd_and_hms(2022, 1, 1, 0, 0, 0).unwrap().into(),
90                )),
91            },
92            Example {
93                description: "Date D-M-Y",
94                example: "'31-12-2021' | detect type",
95                result: Some(Value::test_date(
96                    Local
97                        .with_ymd_and_hms(2021, 12, 31, 0, 0, 0)
98                        .unwrap()
99                        .into(),
100                )),
101            },
102            Example {
103                description: "Unknown stays a string",
104                example: "'not-a-number' | detect type",
105                result: Some(Value::test_string("not-a-number")),
106            },
107        ]
108    }
109
110    fn run(
111        &self,
112        engine_state: &EngineState,
113        stack: &mut Stack,
114        call: &Call,
115        input: PipelineData,
116    ) -> Result<PipelineData, ShellError> {
117        let metadata = input
118            .metadata()
119            .map(|metadata| metadata.with_content_type(None));
120        let span = call.head;
121        let display_as_filesize = call.has_flag(engine_state, stack, "prefer-filesize")?;
122        let val = input.into_value(call.head)?;
123        let val = process(val, display_as_filesize, span)?;
124        Ok(val.into_pipeline_data_with_metadata(metadata))
125    }
126}
127
128// This function will check if a value matches a regular expression for a particular datatype.
129// If it does, it will convert the value to that datatype.
130fn process(val: Value, display_as_filesize: bool, span: Span) -> Result<Value, ShellError> {
131    // step 1: convert value to string
132    let val_str = val.coerce_str().unwrap_or_default();
133
134    // step 2: bounce string up against regexes
135    if BOOLEAN_RE.is_match(&val_str).unwrap_or(false) {
136        let bval = val_str
137            .to_lowercase()
138            .parse::<bool>()
139            .map_err(|_| ShellError::CantConvert {
140                to_type: "string".to_string(),
141                from_type: "bool".to_string(),
142                span,
143                help: Some(format!(
144                    r#""{val_str}" does not represent a valid boolean value"#
145                )),
146            })?;
147
148        Ok(Value::bool(bval, span))
149    } else if FLOAT_RE.is_match(&val_str).unwrap_or(false) {
150        let fval = val_str
151            .parse::<f64>()
152            .map_err(|_| ShellError::CantConvert {
153                to_type: "float".to_string(),
154                from_type: "string".to_string(),
155                span,
156                help: Some(format!(
157                    r#""{val_str}" does not represent a valid floating point value"#
158                )),
159            })?;
160
161        Ok(Value::float(fval, span))
162    } else if INTEGER_RE.is_match(&val_str).unwrap_or(false) {
163        let ival = val_str
164            .parse::<i64>()
165            .map_err(|_| ShellError::CantConvert {
166                to_type: "int".to_string(),
167                from_type: "string".to_string(),
168                span,
169                help: Some(format!(
170                    r#""{val_str}" does not represent a valid integer value"#
171                )),
172            })?;
173
174        if display_as_filesize {
175            Ok(Value::filesize(ival, span))
176        } else {
177            Ok(Value::int(ival, span))
178        }
179    } else if INTEGER_WITH_DELIMS_RE.is_match(&val_str).unwrap_or(false) {
180        let mut val_str = val_str.into_owned();
181        val_str.retain(|x| !['_', ','].contains(&x));
182
183        let ival = val_str
184            .parse::<i64>()
185            .map_err(|_| ShellError::CantConvert {
186                to_type: "int".to_string(),
187                from_type: "string".to_string(),
188                span,
189                help: Some(format!(
190                    r#""{val_str}" does not represent a valid integer value"#
191                )),
192            })?;
193
194        if display_as_filesize {
195            Ok(Value::filesize(ival, span))
196        } else {
197            Ok(Value::int(ival, span))
198        }
199    } else if DATETIME_DMY_RE.is_match(&val_str).unwrap_or(false) {
200        let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
201            to_type: "datetime".to_string(),
202            from_type: "string".to_string(),
203            span,
204            help: Some(format!(
205                r#""{val_str}" does not represent a valid DATETIME_MDY_RE value"#
206            )),
207        })?;
208
209        Ok(Value::date(dt, span))
210    } else if DATETIME_YMD_RE.is_match(&val_str).unwrap_or(false) {
211        let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
212            to_type: "datetime".to_string(),
213            from_type: "string".to_string(),
214            span,
215            help: Some(format!(
216                r#""{val_str}" does not represent a valid DATETIME_YMD_RE value"#
217            )),
218        })?;
219
220        Ok(Value::date(dt, span))
221    } else if DATETIME_YMDZ_RE.is_match(&val_str).unwrap_or(false) {
222        let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
223            to_type: "datetime".to_string(),
224            from_type: "string".to_string(),
225            span,
226            help: Some(format!(
227                r#""{val_str}" does not represent a valid DATETIME_YMDZ_RE value"#
228            )),
229        })?;
230
231        Ok(Value::date(dt, span))
232    } else {
233        // If we don't know what it is, just return whatever it was passed in as
234        Ok(val)
235    }
236}
237
238// region: datatype regexes
239const DATETIME_DMY_PATTERN: &str = r#"(?x)
240        ^
241        ['"]?                        # optional quotes
242        (?:\d{1,2})                  # day
243        [-/]                         # separator
244        (?P<month>[01]?\d{1})        # month
245        [-/]                         # separator
246        (?:\d{4,})                   # year
247        (?:
248            [T\ ]                    # separator
249            (?:\d{2})                # hour
250            :?                       # separator
251            (?:\d{2})                # minute
252            (?:
253                :?                   # separator
254                (?:\d{2})            # second
255                (?:
256                    \.(?:\d{1,9})    # subsecond
257                )?
258            )?
259        )?
260        ['"]?                        # optional quotes
261        $
262        "#;
263
264static DATETIME_DMY_RE: LazyLock<Regex> = LazyLock::new(|| {
265    Regex::new(DATETIME_DMY_PATTERN).expect("datetime_dmy_pattern should be valid")
266});
267const DATETIME_YMD_PATTERN: &str = r#"(?x)
268        ^
269        ['"]?                      # optional quotes
270        (?:\d{4,})                 # year
271        [-/]                       # separator
272        (?P<month>[01]?\d{1})      # month
273        [-/]                       # separator
274        (?:\d{1,2})                # day
275        (?:
276            [T\ ]                  # separator
277            (?:\d{2})              # hour
278            :?                     # separator
279            (?:\d{2})              # minute
280            (?:
281                :?                 # separator
282                (?:\d{2})          # seconds
283                (?:
284                    \.(?:\d{1,9})  # subsecond
285                )?
286            )?
287        )?
288        ['"]?                      # optional quotes
289        $
290        "#;
291static DATETIME_YMD_RE: LazyLock<Regex> = LazyLock::new(|| {
292    Regex::new(DATETIME_YMD_PATTERN).expect("datetime_ymd_pattern should be valid")
293});
294//2023-03-24 16:44:17.865147299 -05:00
295const DATETIME_YMDZ_PATTERN: &str = r#"(?x)
296        ^
297        ['"]?                  # optional quotes
298        (?:\d{4,})             # year
299        [-/]                   # separator
300        (?P<month>[01]?\d{1})  # month
301        [-/]                   # separator
302        (?:\d{1,2})            # day
303        [T\ ]                  # separator
304        (?:\d{2})              # hour
305        :?                     # separator
306        (?:\d{2})              # minute
307        (?:
308            :?                 # separator
309            (?:\d{2})          # second
310            (?:
311                \.(?:\d{1,9})  # subsecond
312            )?
313        )?
314        \s?                    # optional space
315        (?:
316            # offset (e.g. +01:00)
317            [+-](?:\d{2})
318            :?
319            (?:\d{2})
320            # or Zulu suffix
321            |Z
322        )
323        ['"]?                  # optional quotes
324        $
325        "#;
326static DATETIME_YMDZ_RE: LazyLock<Regex> = LazyLock::new(|| {
327    Regex::new(DATETIME_YMDZ_PATTERN).expect("datetime_ymdz_pattern should be valid")
328});
329
330static FLOAT_RE: LazyLock<Regex> = LazyLock::new(|| {
331    Regex::new(r"^\s*[-+]?((\d*\.\d+)([eE][-+]?\d+)?|inf|NaN|(\d+)[eE][-+]?\d+|\d+\.)$")
332        .expect("float pattern should be valid")
333});
334
335static INTEGER_RE: LazyLock<Regex> =
336    LazyLock::new(|| Regex::new(r"^\s*-?(\d+)$").expect("integer pattern should be valid"));
337
338static INTEGER_WITH_DELIMS_RE: LazyLock<Regex> = LazyLock::new(|| {
339    Regex::new(r"^\s*-?(\d{1,3}([,_]\d{3})+)$")
340        .expect("integer with delimiters pattern should be valid")
341});
342
343static BOOLEAN_RE: LazyLock<Regex> = LazyLock::new(|| {
344    RegexBuilder::new(r"^\s*(true)$|^(false)$")
345        .case_insensitive(true)
346        .build()
347        .expect("boolean pattern should be valid")
348});
349// endregion:
350
351#[cfg(test)]
352mod test {
353    use super::*;
354
355    #[test]
356    fn test_examples() {
357        use crate::test_examples;
358
359        test_examples(DetectType)
360    }
361
362    #[test]
363    fn test_float_parse() {
364        // The regex should work on all these but nushell's float parser is more strict
365        assert!(FLOAT_RE.is_match("0.1").unwrap());
366        assert!(FLOAT_RE.is_match("3.0").unwrap());
367        assert!(FLOAT_RE.is_match("3.00001").unwrap());
368        assert!(FLOAT_RE.is_match("-9.9990e-003").unwrap());
369        assert!(FLOAT_RE.is_match("9.9990e+003").unwrap());
370        assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
371        assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
372        assert!(FLOAT_RE.is_match(".5").unwrap());
373        assert!(FLOAT_RE.is_match("2.5E-10").unwrap());
374        assert!(FLOAT_RE.is_match("2.5e10").unwrap());
375        assert!(FLOAT_RE.is_match("NaN").unwrap());
376        assert!(FLOAT_RE.is_match("-NaN").unwrap());
377        assert!(FLOAT_RE.is_match("-inf").unwrap());
378        assert!(FLOAT_RE.is_match("inf").unwrap());
379        assert!(FLOAT_RE.is_match("-7e-05").unwrap());
380        assert!(FLOAT_RE.is_match("7e-05").unwrap());
381        assert!(FLOAT_RE.is_match("+7e+05").unwrap());
382    }
383
384    #[test]
385    fn test_int_parse() {
386        assert!(INTEGER_RE.is_match("0").unwrap());
387        assert!(INTEGER_RE.is_match("1").unwrap());
388        assert!(INTEGER_RE.is_match("10").unwrap());
389        assert!(INTEGER_RE.is_match("100").unwrap());
390        assert!(INTEGER_RE.is_match("1000").unwrap());
391        assert!(INTEGER_RE.is_match("10000").unwrap());
392        assert!(INTEGER_RE.is_match("100000").unwrap());
393        assert!(INTEGER_RE.is_match("1000000").unwrap());
394        assert!(INTEGER_RE.is_match("10000000").unwrap());
395        assert!(INTEGER_RE.is_match("100000000").unwrap());
396        assert!(INTEGER_RE.is_match("1000000000").unwrap());
397        assert!(INTEGER_RE.is_match("10000000000").unwrap());
398        assert!(INTEGER_RE.is_match("100000000000").unwrap());
399        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000").unwrap());
400        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000").unwrap());
401        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000").unwrap());
402        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000").unwrap());
403        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000").unwrap());
404        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000").unwrap());
405        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000_000").unwrap());
406        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000_000").unwrap());
407        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000_000").unwrap());
408        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000").unwrap());
409        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000").unwrap());
410        assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000").unwrap());
411        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000").unwrap());
412        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000").unwrap());
413        assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000,000").unwrap());
414        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000,000").unwrap());
415        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000,000").unwrap());
416    }
417
418    #[test]
419    fn test_bool_parse() {
420        assert!(BOOLEAN_RE.is_match("true").unwrap());
421        assert!(BOOLEAN_RE.is_match("false").unwrap());
422        assert!(!BOOLEAN_RE.is_match("1").unwrap());
423        assert!(!BOOLEAN_RE.is_match("0").unwrap());
424    }
425
426    #[test]
427    fn test_datetime_ymdz_pattern() {
428        assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00Z").unwrap());
429        assert!(
430            DATETIME_YMDZ_RE
431                .is_match("2022-01-01T00:00:00.123456789Z")
432                .unwrap()
433        );
434        assert!(
435            DATETIME_YMDZ_RE
436                .is_match("2022-01-01T00:00:00+01:00")
437                .unwrap()
438        );
439        assert!(
440            DATETIME_YMDZ_RE
441                .is_match("2022-01-01T00:00:00.123456789+01:00")
442                .unwrap()
443        );
444        assert!(
445            DATETIME_YMDZ_RE
446                .is_match("2022-01-01T00:00:00-01:00")
447                .unwrap()
448        );
449        assert!(
450            DATETIME_YMDZ_RE
451                .is_match("2022-01-01T00:00:00.123456789-01:00")
452                .unwrap()
453        );
454        assert!(DATETIME_YMDZ_RE.is_match("'2022-01-01T00:00:00Z'").unwrap());
455
456        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00").unwrap());
457        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.").unwrap());
458        assert!(
459            !DATETIME_YMDZ_RE
460                .is_match("2022-01-01T00:00:00.123456789")
461                .unwrap()
462        );
463        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01").unwrap());
464        assert!(
465            !DATETIME_YMDZ_RE
466                .is_match("2022-01-01T00:00:00+01:0")
467                .unwrap()
468        );
469        assert!(
470            !DATETIME_YMDZ_RE
471                .is_match("2022-01-01T00:00:00+1:00")
472                .unwrap()
473        );
474        assert!(
475            !DATETIME_YMDZ_RE
476                .is_match("2022-01-01T00:00:00.123456789+01")
477                .unwrap()
478        );
479        assert!(
480            !DATETIME_YMDZ_RE
481                .is_match("2022-01-01T00:00:00.123456789+01:0")
482                .unwrap()
483        );
484        assert!(
485            !DATETIME_YMDZ_RE
486                .is_match("2022-01-01T00:00:00.123456789+1:00")
487                .unwrap()
488        );
489        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01").unwrap());
490        assert!(
491            !DATETIME_YMDZ_RE
492                .is_match("2022-01-01T00:00:00-01:0")
493                .unwrap()
494        );
495        assert!(
496            !DATETIME_YMDZ_RE
497                .is_match("2022-01-01T00:00:00-1:00")
498                .unwrap()
499        );
500        assert!(
501            !DATETIME_YMDZ_RE
502                .is_match("2022-01-01T00:00:00.123456789-01")
503                .unwrap()
504        );
505        assert!(
506            !DATETIME_YMDZ_RE
507                .is_match("2022-01-01T00:00:00.123456789-01:0")
508                .unwrap()
509        );
510        assert!(
511            !DATETIME_YMDZ_RE
512                .is_match("2022-01-01T00:00:00.123456789-1:00")
513                .unwrap()
514        );
515    }
516
517    #[test]
518    fn test_datetime_ymd_pattern() {
519        assert!(DATETIME_YMD_RE.is_match("2022-01-01").unwrap());
520        assert!(DATETIME_YMD_RE.is_match("2022/01/01").unwrap());
521        assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00").unwrap());
522        assert!(
523            DATETIME_YMD_RE
524                .is_match("2022-01-01T00:00:00.000000000")
525                .unwrap()
526        );
527        assert!(DATETIME_YMD_RE.is_match("'2022-01-01'").unwrap());
528
529        // The regex isn't this specific, but it would be nice if it were
530        // assert!(!DATETIME_YMD_RE.is_match("2022-13-01").unwrap());
531        // assert!(!DATETIME_YMD_RE.is_match("2022-01-32").unwrap());
532        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T24:00:00").unwrap());
533        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:60:00").unwrap());
534        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:00:60").unwrap());
535        assert!(
536            !DATETIME_YMD_RE
537                .is_match("2022-01-01T00:00:00.0000000000")
538                .unwrap()
539        );
540    }
541
542    #[test]
543    fn test_datetime_dmy_pattern() {
544        assert!(DATETIME_DMY_RE.is_match("31-12-2021").unwrap());
545        assert!(DATETIME_DMY_RE.is_match("01/01/2022").unwrap());
546        assert!(DATETIME_DMY_RE.is_match("15-06-2023 12:30").unwrap());
547        assert!(!DATETIME_DMY_RE.is_match("2022-13-01").unwrap());
548        assert!(!DATETIME_DMY_RE.is_match("2022-01-32").unwrap());
549        assert!(!DATETIME_DMY_RE.is_match("2022-01-01 24:00").unwrap());
550    }
551}