nu_command/conversions/into/
value.rs

1use crate::parse_date_from_string;
2use fancy_regex::{Regex, RegexBuilder};
3use nu_engine::command_prelude::*;
4use nu_protocol::PipelineIterator;
5use std::collections::HashSet;
6use std::sync::LazyLock;
7
8#[derive(Clone)]
9pub struct IntoValue;
10
11impl Command for IntoValue {
12    fn name(&self) -> &str {
13        "into value"
14    }
15
16    fn signature(&self) -> Signature {
17        Signature::build("into value")
18            .input_output_types(vec![(Type::table(), Type::table())])
19            .named(
20                "columns",
21                SyntaxShape::List(Box::new(SyntaxShape::Any)),
22                "list of columns to update",
23                Some('c'),
24            )
25            .switch(
26                "prefer-filesizes",
27                "For ints display them as human-readable file sizes",
28                Some('f'),
29            )
30            .allow_variants_without_examples(true)
31            .category(Category::Filters)
32    }
33
34    fn description(&self) -> &str {
35        "Infer Nushell datatype for each cell."
36    }
37
38    fn search_terms(&self) -> Vec<&str> {
39        vec!["convert", "conversion"]
40    }
41
42    fn examples(&self) -> Vec<Example> {
43        vec![
44            Example {
45                description: "Infer Nushell values for each cell.",
46                example: "$table | into value",
47                result: None,
48            },
49            Example {
50                description: "Infer Nushell values for each cell in the given columns.",
51                example: "$table | into value -c [column1, column5]",
52                result: None,
53            },
54        ]
55    }
56
57    fn run(
58        &self,
59        engine_state: &EngineState,
60        stack: &mut Stack,
61        call: &Call,
62        input: PipelineData,
63    ) -> Result<PipelineData, ShellError> {
64        let metadata = input.metadata();
65        let span = call.head;
66        let display_as_filesizes = call.has_flag(engine_state, stack, "prefer-filesizes")?;
67
68        // the columns to update
69        let columns: Option<Value> = call.get_flag(engine_state, stack, "columns")?;
70        let columns: Option<HashSet<String>> = match columns {
71            Some(val) => Some(
72                val.into_list()?
73                    .into_iter()
74                    .map(Value::coerce_into_string)
75                    .collect::<Result<HashSet<String>, ShellError>>()?,
76            ),
77            None => None,
78        };
79
80        Ok(UpdateCellIterator {
81            input: input.into_iter(),
82            columns,
83            display_as_filesizes,
84            span,
85        }
86        .into_pipeline_data(span, engine_state.signals().clone())
87        .set_metadata(metadata))
88    }
89}
90
91struct UpdateCellIterator {
92    input: PipelineIterator,
93    columns: Option<HashSet<String>>,
94    display_as_filesizes: bool,
95    span: Span,
96}
97
98impl Iterator for UpdateCellIterator {
99    type Item = Value;
100
101    fn next(&mut self) -> Option<Self::Item> {
102        match self.input.next() {
103            Some(val) => {
104                if let Some(ref cols) = self.columns {
105                    if !val.columns().any(|c| cols.contains(c)) {
106                        return Some(val);
107                    }
108                }
109
110                let span = val.span();
111                match val {
112                    Value::Record { val, .. } => Some(Value::record(
113                        val.into_owned()
114                            .into_iter()
115                            .map(|(col, val)| match &self.columns {
116                                Some(cols) if !cols.contains(&col) => (col, val),
117                                _ => (
118                                    col,
119                                    match process_cell(val, self.display_as_filesizes, span) {
120                                        Ok(val) => val,
121                                        Err(err) => Value::error(err, span),
122                                    },
123                                ),
124                            })
125                            .collect(),
126                        span,
127                    )),
128                    val => match process_cell(val, self.display_as_filesizes, self.span) {
129                        Ok(val) => Some(val),
130                        Err(err) => Some(Value::error(err, self.span)),
131                    },
132                }
133            }
134            None => None,
135        }
136    }
137}
138
139// This function will check each cell to see if it matches a regular expression
140// for a particular datatype. If it does, it will convert the cell to that datatype.
141fn process_cell(val: Value, display_as_filesizes: bool, span: Span) -> Result<Value, ShellError> {
142    // step 1: convert value to string
143    let val_str = val.coerce_str().unwrap_or_default();
144
145    // step 2: bounce string up against regexes
146    if BOOLEAN_RE.is_match(&val_str).unwrap_or(false) {
147        let bval = val_str
148            .parse::<bool>()
149            .map_err(|_| ShellError::CantConvert {
150                to_type: "string".to_string(),
151                from_type: "bool".to_string(),
152                span,
153                help: Some(format!(
154                    r#""{val_str}" does not represent a valid boolean value"#
155                )),
156            })?;
157
158        Ok(Value::bool(bval, span))
159    } else if FLOAT_RE.is_match(&val_str).unwrap_or(false) {
160        let fval = val_str
161            .parse::<f64>()
162            .map_err(|_| ShellError::CantConvert {
163                to_type: "float".to_string(),
164                from_type: "string".to_string(),
165                span,
166                help: Some(format!(
167                    r#""{val_str}" does not represent a valid floating point value"#
168                )),
169            })?;
170
171        Ok(Value::float(fval, span))
172    } else if INTEGER_RE.is_match(&val_str).unwrap_or(false) {
173        let ival = val_str
174            .parse::<i64>()
175            .map_err(|_| ShellError::CantConvert {
176                to_type: "int".to_string(),
177                from_type: "string".to_string(),
178                span,
179                help: Some(format!(
180                    r#""{val_str}" does not represent a valid integer value"#
181                )),
182            })?;
183
184        if display_as_filesizes {
185            Ok(Value::filesize(ival, span))
186        } else {
187            Ok(Value::int(ival, span))
188        }
189    } else if INTEGER_WITH_DELIMS_RE.is_match(&val_str).unwrap_or(false) {
190        let mut val_str = val_str.into_owned();
191        val_str.retain(|x| !['_', ','].contains(&x));
192
193        let ival = val_str
194            .parse::<i64>()
195            .map_err(|_| ShellError::CantConvert {
196                to_type: "int".to_string(),
197                from_type: "string".to_string(),
198                span,
199                help: Some(format!(
200                    r#""{val_str}" does not represent a valid integer value"#
201                )),
202            })?;
203
204        if display_as_filesizes {
205            Ok(Value::filesize(ival, span))
206        } else {
207            Ok(Value::int(ival, span))
208        }
209    } else if DATETIME_DMY_RE.is_match(&val_str).unwrap_or(false) {
210        let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
211            to_type: "datetime".to_string(),
212            from_type: "string".to_string(),
213            span,
214            help: Some(format!(
215                r#""{val_str}" does not represent a valid DATETIME_MDY_RE value"#
216            )),
217        })?;
218
219        Ok(Value::date(dt, span))
220    } else if DATETIME_YMD_RE.is_match(&val_str).unwrap_or(false) {
221        let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
222            to_type: "datetime".to_string(),
223            from_type: "string".to_string(),
224            span,
225            help: Some(format!(
226                r#""{val_str}" does not represent a valid DATETIME_YMD_RE value"#
227            )),
228        })?;
229
230        Ok(Value::date(dt, span))
231    } else if DATETIME_YMDZ_RE.is_match(&val_str).unwrap_or(false) {
232        let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
233            to_type: "datetime".to_string(),
234            from_type: "string".to_string(),
235            span,
236            help: Some(format!(
237                r#""{val_str}" does not represent a valid DATETIME_YMDZ_RE value"#
238            )),
239        })?;
240
241        Ok(Value::date(dt, span))
242    } else {
243        // If we don't know what it is, just return whatever it was passed in as
244        Ok(val)
245    }
246}
247
248// region: datatype regexes
249const DATETIME_DMY_PATTERN: &str = r#"(?x)
250        ^
251        ['"]?                        # optional quotes
252        (?:\d{1,2})                  # day
253        [-/]                         # separator
254        (?P<month>[01]?\d{1})        # month
255        [-/]                         # separator
256        (?:\d{4,})                   # year
257        (?:
258            [T\ ]                    # separator
259            (?:\d{2})                # hour
260            :?                       # separator
261            (?:\d{2})                # minute
262            (?:
263                :?                   # separator
264                (?:\d{2})            # second
265                (?:
266                    \.(?:\d{1,9})    # subsecond
267                )?
268            )?
269        )?
270        ['"]?                        # optional quotes
271        $
272        "#;
273
274static DATETIME_DMY_RE: LazyLock<Regex> = LazyLock::new(|| {
275    Regex::new(DATETIME_DMY_PATTERN).expect("datetime_dmy_pattern should be valid")
276});
277const DATETIME_YMD_PATTERN: &str = r#"(?x)
278        ^
279        ['"]?                      # optional quotes
280        (?:\d{4,})                 # year
281        [-/]                       # separator
282        (?P<month>[01]?\d{1})      # month
283        [-/]                       # separator
284        (?:\d{1,2})                # day
285        (?:
286            [T\ ]                  # separator
287            (?:\d{2})              # hour
288            :?                     # separator
289            (?:\d{2})              # minute
290            (?:
291                :?                 # separator
292                (?:\d{2})          # seconds
293                (?:
294                    \.(?:\d{1,9})  # subsecond
295                )?
296            )?
297        )?
298        ['"]?                      # optional quotes
299        $
300        "#;
301static DATETIME_YMD_RE: LazyLock<Regex> = LazyLock::new(|| {
302    Regex::new(DATETIME_YMD_PATTERN).expect("datetime_ymd_pattern should be valid")
303});
304//2023-03-24 16:44:17.865147299 -05:00
305const DATETIME_YMDZ_PATTERN: &str = r#"(?x)
306        ^
307        ['"]?                  # optional quotes
308        (?:\d{4,})             # year
309        [-/]                   # separator
310        (?P<month>[01]?\d{1})  # month
311        [-/]                   # separator
312        (?:\d{1,2})            # day
313        [T\ ]                  # separator
314        (?:\d{2})              # hour
315        :?                     # separator
316        (?:\d{2})              # minute
317        (?:
318            :?                 # separator
319            (?:\d{2})          # second
320            (?:
321                \.(?:\d{1,9})  # subsecond
322            )?
323        )?
324        \s?                    # optional space
325        (?:
326            # offset (e.g. +01:00)
327            [+-](?:\d{2})
328            :?
329            (?:\d{2})
330            # or Zulu suffix
331            |Z
332        )
333        ['"]?                  # optional quotes
334        $
335        "#;
336static DATETIME_YMDZ_RE: LazyLock<Regex> = LazyLock::new(|| {
337    Regex::new(DATETIME_YMDZ_PATTERN).expect("datetime_ymdz_pattern should be valid")
338});
339
340static FLOAT_RE: LazyLock<Regex> = LazyLock::new(|| {
341    Regex::new(r"^\s*[-+]?((\d*\.\d+)([eE][-+]?\d+)?|inf|NaN|(\d+)[eE][-+]?\d+|\d+\.)$")
342        .expect("float pattern should be valid")
343});
344
345static INTEGER_RE: LazyLock<Regex> =
346    LazyLock::new(|| Regex::new(r"^\s*-?(\d+)$").expect("integer pattern should be valid"));
347
348static INTEGER_WITH_DELIMS_RE: LazyLock<Regex> = LazyLock::new(|| {
349    Regex::new(r"^\s*-?(\d{1,3}([,_]\d{3})+)$")
350        .expect("integer with delimiters pattern should be valid")
351});
352
353static BOOLEAN_RE: LazyLock<Regex> = LazyLock::new(|| {
354    RegexBuilder::new(r"^\s*(true)$|^(false)$")
355        .case_insensitive(true)
356        .build()
357        .expect("boolean pattern should be valid")
358});
359// endregion:
360
361#[cfg(test)]
362mod test {
363    use super::*;
364
365    #[test]
366    fn test_examples() {
367        use crate::test_examples;
368
369        test_examples(IntoValue {})
370    }
371
372    #[test]
373    fn test_float_parse() {
374        // The regex should work on all these but nushell's float parser is more strict
375        assert!(FLOAT_RE.is_match("0.1").unwrap());
376        assert!(FLOAT_RE.is_match("3.0").unwrap());
377        assert!(FLOAT_RE.is_match("3.00001").unwrap());
378        assert!(FLOAT_RE.is_match("-9.9990e-003").unwrap());
379        assert!(FLOAT_RE.is_match("9.9990e+003").unwrap());
380        assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
381        assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
382        assert!(FLOAT_RE.is_match(".5").unwrap());
383        assert!(FLOAT_RE.is_match("2.5E-10").unwrap());
384        assert!(FLOAT_RE.is_match("2.5e10").unwrap());
385        assert!(FLOAT_RE.is_match("NaN").unwrap());
386        assert!(FLOAT_RE.is_match("-NaN").unwrap());
387        assert!(FLOAT_RE.is_match("-inf").unwrap());
388        assert!(FLOAT_RE.is_match("inf").unwrap());
389        assert!(FLOAT_RE.is_match("-7e-05").unwrap());
390        assert!(FLOAT_RE.is_match("7e-05").unwrap());
391        assert!(FLOAT_RE.is_match("+7e+05").unwrap());
392    }
393
394    #[test]
395    fn test_int_parse() {
396        assert!(INTEGER_RE.is_match("0").unwrap());
397        assert!(INTEGER_RE.is_match("1").unwrap());
398        assert!(INTEGER_RE.is_match("10").unwrap());
399        assert!(INTEGER_RE.is_match("100").unwrap());
400        assert!(INTEGER_RE.is_match("1000").unwrap());
401        assert!(INTEGER_RE.is_match("10000").unwrap());
402        assert!(INTEGER_RE.is_match("100000").unwrap());
403        assert!(INTEGER_RE.is_match("1000000").unwrap());
404        assert!(INTEGER_RE.is_match("10000000").unwrap());
405        assert!(INTEGER_RE.is_match("100000000").unwrap());
406        assert!(INTEGER_RE.is_match("1000000000").unwrap());
407        assert!(INTEGER_RE.is_match("10000000000").unwrap());
408        assert!(INTEGER_RE.is_match("100000000000").unwrap());
409        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000").unwrap());
410        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000").unwrap());
411        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000").unwrap());
412        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000").unwrap());
413        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000").unwrap());
414        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000").unwrap());
415        assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000_000").unwrap());
416        assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000_000").unwrap());
417        assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000_000").unwrap());
418        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000").unwrap());
419        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000").unwrap());
420        assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000").unwrap());
421        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000").unwrap());
422        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000").unwrap());
423        assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000,000").unwrap());
424        assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000,000").unwrap());
425        assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000,000").unwrap());
426    }
427
428    #[test]
429    fn test_bool_parse() {
430        assert!(BOOLEAN_RE.is_match("true").unwrap());
431        assert!(BOOLEAN_RE.is_match("false").unwrap());
432        assert!(!BOOLEAN_RE.is_match("1").unwrap());
433        assert!(!BOOLEAN_RE.is_match("0").unwrap());
434    }
435
436    #[test]
437    fn test_datetime_ymdz_pattern() {
438        assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00Z").unwrap());
439        assert!(
440            DATETIME_YMDZ_RE
441                .is_match("2022-01-01T00:00:00.123456789Z")
442                .unwrap()
443        );
444        assert!(
445            DATETIME_YMDZ_RE
446                .is_match("2022-01-01T00:00:00+01:00")
447                .unwrap()
448        );
449        assert!(
450            DATETIME_YMDZ_RE
451                .is_match("2022-01-01T00:00:00.123456789+01:00")
452                .unwrap()
453        );
454        assert!(
455            DATETIME_YMDZ_RE
456                .is_match("2022-01-01T00:00:00-01:00")
457                .unwrap()
458        );
459        assert!(
460            DATETIME_YMDZ_RE
461                .is_match("2022-01-01T00:00:00.123456789-01:00")
462                .unwrap()
463        );
464        assert!(DATETIME_YMDZ_RE.is_match("'2022-01-01T00:00:00Z'").unwrap());
465
466        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00").unwrap());
467        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.").unwrap());
468        assert!(
469            !DATETIME_YMDZ_RE
470                .is_match("2022-01-01T00:00:00.123456789")
471                .unwrap()
472        );
473        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01").unwrap());
474        assert!(
475            !DATETIME_YMDZ_RE
476                .is_match("2022-01-01T00:00:00+01:0")
477                .unwrap()
478        );
479        assert!(
480            !DATETIME_YMDZ_RE
481                .is_match("2022-01-01T00:00:00+1:00")
482                .unwrap()
483        );
484        assert!(
485            !DATETIME_YMDZ_RE
486                .is_match("2022-01-01T00:00:00.123456789+01")
487                .unwrap()
488        );
489        assert!(
490            !DATETIME_YMDZ_RE
491                .is_match("2022-01-01T00:00:00.123456789+01:0")
492                .unwrap()
493        );
494        assert!(
495            !DATETIME_YMDZ_RE
496                .is_match("2022-01-01T00:00:00.123456789+1:00")
497                .unwrap()
498        );
499        assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01").unwrap());
500        assert!(
501            !DATETIME_YMDZ_RE
502                .is_match("2022-01-01T00:00:00-01:0")
503                .unwrap()
504        );
505        assert!(
506            !DATETIME_YMDZ_RE
507                .is_match("2022-01-01T00:00:00-1:00")
508                .unwrap()
509        );
510        assert!(
511            !DATETIME_YMDZ_RE
512                .is_match("2022-01-01T00:00:00.123456789-01")
513                .unwrap()
514        );
515        assert!(
516            !DATETIME_YMDZ_RE
517                .is_match("2022-01-01T00:00:00.123456789-01:0")
518                .unwrap()
519        );
520        assert!(
521            !DATETIME_YMDZ_RE
522                .is_match("2022-01-01T00:00:00.123456789-1:00")
523                .unwrap()
524        );
525    }
526
527    #[test]
528    fn test_datetime_ymd_pattern() {
529        assert!(DATETIME_YMD_RE.is_match("2022-01-01").unwrap());
530        assert!(DATETIME_YMD_RE.is_match("2022/01/01").unwrap());
531        assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00").unwrap());
532        assert!(
533            DATETIME_YMD_RE
534                .is_match("2022-01-01T00:00:00.000000000")
535                .unwrap()
536        );
537        assert!(DATETIME_YMD_RE.is_match("'2022-01-01'").unwrap());
538
539        // The regex isn't this specific, but it would be nice if it were
540        // assert!(!DATETIME_YMD_RE.is_match("2022-13-01").unwrap());
541        // assert!(!DATETIME_YMD_RE.is_match("2022-01-32").unwrap());
542        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T24:00:00").unwrap());
543        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:60:00").unwrap());
544        // assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:00:60").unwrap());
545        assert!(
546            !DATETIME_YMD_RE
547                .is_match("2022-01-01T00:00:00.0000000000")
548                .unwrap()
549        );
550    }
551
552    #[test]
553    fn test_datetime_dmy_pattern() {
554        assert!(DATETIME_DMY_RE.is_match("31-12-2021").unwrap());
555        assert!(DATETIME_DMY_RE.is_match("01/01/2022").unwrap());
556        assert!(DATETIME_DMY_RE.is_match("15-06-2023 12:30").unwrap());
557        assert!(!DATETIME_DMY_RE.is_match("2022-13-01").unwrap());
558        assert!(!DATETIME_DMY_RE.is_match("2022-01-32").unwrap());
559        assert!(!DATETIME_DMY_RE.is_match("2022-01-01 24:00").unwrap());
560    }
561}