nu_command/conversions/into/
value.rs1use crate::parse_date_from_string;
2use fancy_regex::{Regex, RegexBuilder};
3use nu_engine::command_prelude::*;
4use nu_protocol::PipelineIterator;
5use std::collections::HashSet;
6use std::sync::LazyLock;
7
8#[derive(Clone)]
9pub struct IntoValue;
10
11impl Command for IntoValue {
12 fn name(&self) -> &str {
13 "into value"
14 }
15
16 fn signature(&self) -> Signature {
17 Signature::build("into value")
18 .input_output_types(vec![(Type::table(), Type::table())])
19 .named(
20 "columns",
21 SyntaxShape::List(Box::new(SyntaxShape::Any)),
22 "list of columns to update",
23 Some('c'),
24 )
25 .switch(
26 "prefer-filesizes",
27 "For ints display them as human-readable file sizes",
28 Some('f'),
29 )
30 .allow_variants_without_examples(true)
31 .category(Category::Filters)
32 }
33
34 fn description(&self) -> &str {
35 "Infer Nushell datatype for each cell."
36 }
37
38 fn search_terms(&self) -> Vec<&str> {
39 vec!["convert", "conversion"]
40 }
41
42 fn examples(&self) -> Vec<Example> {
43 vec![
44 Example {
45 description: "Infer Nushell values for each cell.",
46 example: "$table | into value",
47 result: None,
48 },
49 Example {
50 description: "Infer Nushell values for each cell in the given columns.",
51 example: "$table | into value -c [column1, column5]",
52 result: None,
53 },
54 ]
55 }
56
57 fn run(
58 &self,
59 engine_state: &EngineState,
60 stack: &mut Stack,
61 call: &Call,
62 input: PipelineData,
63 ) -> Result<PipelineData, ShellError> {
64 let metadata = input.metadata();
65 let span = call.head;
66 let display_as_filesizes = call.has_flag(engine_state, stack, "prefer-filesizes")?;
67
68 let columns: Option<Value> = call.get_flag(engine_state, stack, "columns")?;
70 let columns: Option<HashSet<String>> = match columns {
71 Some(val) => Some(
72 val.into_list()?
73 .into_iter()
74 .map(Value::coerce_into_string)
75 .collect::<Result<HashSet<String>, ShellError>>()?,
76 ),
77 None => None,
78 };
79
80 Ok(UpdateCellIterator {
81 input: input.into_iter(),
82 columns,
83 display_as_filesizes,
84 span,
85 }
86 .into_pipeline_data(span, engine_state.signals().clone())
87 .set_metadata(metadata))
88 }
89}
90
91struct UpdateCellIterator {
92 input: PipelineIterator,
93 columns: Option<HashSet<String>>,
94 display_as_filesizes: bool,
95 span: Span,
96}
97
98impl Iterator for UpdateCellIterator {
99 type Item = Value;
100
101 fn next(&mut self) -> Option<Self::Item> {
102 match self.input.next() {
103 Some(val) => {
104 if let Some(ref cols) = self.columns {
105 if !val.columns().any(|c| cols.contains(c)) {
106 return Some(val);
107 }
108 }
109
110 let span = val.span();
111 match val {
112 Value::Record { val, .. } => Some(Value::record(
113 val.into_owned()
114 .into_iter()
115 .map(|(col, val)| match &self.columns {
116 Some(cols) if !cols.contains(&col) => (col, val),
117 _ => (
118 col,
119 match process_cell(val, self.display_as_filesizes, span) {
120 Ok(val) => val,
121 Err(err) => Value::error(err, span),
122 },
123 ),
124 })
125 .collect(),
126 span,
127 )),
128 val => match process_cell(val, self.display_as_filesizes, self.span) {
129 Ok(val) => Some(val),
130 Err(err) => Some(Value::error(err, self.span)),
131 },
132 }
133 }
134 None => None,
135 }
136 }
137}
138
139fn process_cell(val: Value, display_as_filesizes: bool, span: Span) -> Result<Value, ShellError> {
142 let val_str = val.coerce_str().unwrap_or_default();
144
145 if BOOLEAN_RE.is_match(&val_str).unwrap_or(false) {
147 let bval = val_str
148 .parse::<bool>()
149 .map_err(|_| ShellError::CantConvert {
150 to_type: "string".to_string(),
151 from_type: "bool".to_string(),
152 span,
153 help: Some(format!(
154 r#""{val_str}" does not represent a valid boolean value"#
155 )),
156 })?;
157
158 Ok(Value::bool(bval, span))
159 } else if FLOAT_RE.is_match(&val_str).unwrap_or(false) {
160 let fval = val_str
161 .parse::<f64>()
162 .map_err(|_| ShellError::CantConvert {
163 to_type: "float".to_string(),
164 from_type: "string".to_string(),
165 span,
166 help: Some(format!(
167 r#""{val_str}" does not represent a valid floating point value"#
168 )),
169 })?;
170
171 Ok(Value::float(fval, span))
172 } else if INTEGER_RE.is_match(&val_str).unwrap_or(false) {
173 let ival = val_str
174 .parse::<i64>()
175 .map_err(|_| ShellError::CantConvert {
176 to_type: "int".to_string(),
177 from_type: "string".to_string(),
178 span,
179 help: Some(format!(
180 r#""{val_str}" does not represent a valid integer value"#
181 )),
182 })?;
183
184 if display_as_filesizes {
185 Ok(Value::filesize(ival, span))
186 } else {
187 Ok(Value::int(ival, span))
188 }
189 } else if INTEGER_WITH_DELIMS_RE.is_match(&val_str).unwrap_or(false) {
190 let mut val_str = val_str.into_owned();
191 val_str.retain(|x| !['_', ','].contains(&x));
192
193 let ival = val_str
194 .parse::<i64>()
195 .map_err(|_| ShellError::CantConvert {
196 to_type: "int".to_string(),
197 from_type: "string".to_string(),
198 span,
199 help: Some(format!(
200 r#""{val_str}" does not represent a valid integer value"#
201 )),
202 })?;
203
204 if display_as_filesizes {
205 Ok(Value::filesize(ival, span))
206 } else {
207 Ok(Value::int(ival, span))
208 }
209 } else if DATETIME_DMY_RE.is_match(&val_str).unwrap_or(false) {
210 let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
211 to_type: "datetime".to_string(),
212 from_type: "string".to_string(),
213 span,
214 help: Some(format!(
215 r#""{val_str}" does not represent a valid DATETIME_MDY_RE value"#
216 )),
217 })?;
218
219 Ok(Value::date(dt, span))
220 } else if DATETIME_YMD_RE.is_match(&val_str).unwrap_or(false) {
221 let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
222 to_type: "datetime".to_string(),
223 from_type: "string".to_string(),
224 span,
225 help: Some(format!(
226 r#""{val_str}" does not represent a valid DATETIME_YMD_RE value"#
227 )),
228 })?;
229
230 Ok(Value::date(dt, span))
231 } else if DATETIME_YMDZ_RE.is_match(&val_str).unwrap_or(false) {
232 let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
233 to_type: "datetime".to_string(),
234 from_type: "string".to_string(),
235 span,
236 help: Some(format!(
237 r#""{val_str}" does not represent a valid DATETIME_YMDZ_RE value"#
238 )),
239 })?;
240
241 Ok(Value::date(dt, span))
242 } else {
243 Ok(val)
245 }
246}
247
248const DATETIME_DMY_PATTERN: &str = r#"(?x)
250 ^
251 ['"]? # optional quotes
252 (?:\d{1,2}) # day
253 [-/] # separator
254 (?P<month>[01]?\d{1}) # month
255 [-/] # separator
256 (?:\d{4,}) # year
257 (?:
258 [T\ ] # separator
259 (?:\d{2}) # hour
260 :? # separator
261 (?:\d{2}) # minute
262 (?:
263 :? # separator
264 (?:\d{2}) # second
265 (?:
266 \.(?:\d{1,9}) # subsecond
267 )?
268 )?
269 )?
270 ['"]? # optional quotes
271 $
272 "#;
273
274static DATETIME_DMY_RE: LazyLock<Regex> = LazyLock::new(|| {
275 Regex::new(DATETIME_DMY_PATTERN).expect("datetime_dmy_pattern should be valid")
276});
277const DATETIME_YMD_PATTERN: &str = r#"(?x)
278 ^
279 ['"]? # optional quotes
280 (?:\d{4,}) # year
281 [-/] # separator
282 (?P<month>[01]?\d{1}) # month
283 [-/] # separator
284 (?:\d{1,2}) # day
285 (?:
286 [T\ ] # separator
287 (?:\d{2}) # hour
288 :? # separator
289 (?:\d{2}) # minute
290 (?:
291 :? # separator
292 (?:\d{2}) # seconds
293 (?:
294 \.(?:\d{1,9}) # subsecond
295 )?
296 )?
297 )?
298 ['"]? # optional quotes
299 $
300 "#;
301static DATETIME_YMD_RE: LazyLock<Regex> = LazyLock::new(|| {
302 Regex::new(DATETIME_YMD_PATTERN).expect("datetime_ymd_pattern should be valid")
303});
304const DATETIME_YMDZ_PATTERN: &str = r#"(?x)
306 ^
307 ['"]? # optional quotes
308 (?:\d{4,}) # year
309 [-/] # separator
310 (?P<month>[01]?\d{1}) # month
311 [-/] # separator
312 (?:\d{1,2}) # day
313 [T\ ] # separator
314 (?:\d{2}) # hour
315 :? # separator
316 (?:\d{2}) # minute
317 (?:
318 :? # separator
319 (?:\d{2}) # second
320 (?:
321 \.(?:\d{1,9}) # subsecond
322 )?
323 )?
324 \s? # optional space
325 (?:
326 # offset (e.g. +01:00)
327 [+-](?:\d{2})
328 :?
329 (?:\d{2})
330 # or Zulu suffix
331 |Z
332 )
333 ['"]? # optional quotes
334 $
335 "#;
336static DATETIME_YMDZ_RE: LazyLock<Regex> = LazyLock::new(|| {
337 Regex::new(DATETIME_YMDZ_PATTERN).expect("datetime_ymdz_pattern should be valid")
338});
339
340static FLOAT_RE: LazyLock<Regex> = LazyLock::new(|| {
341 Regex::new(r"^\s*[-+]?((\d*\.\d+)([eE][-+]?\d+)?|inf|NaN|(\d+)[eE][-+]?\d+|\d+\.)$")
342 .expect("float pattern should be valid")
343});
344
345static INTEGER_RE: LazyLock<Regex> =
346 LazyLock::new(|| Regex::new(r"^\s*-?(\d+)$").expect("integer pattern should be valid"));
347
348static INTEGER_WITH_DELIMS_RE: LazyLock<Regex> = LazyLock::new(|| {
349 Regex::new(r"^\s*-?(\d{1,3}([,_]\d{3})+)$")
350 .expect("integer with delimiters pattern should be valid")
351});
352
353static BOOLEAN_RE: LazyLock<Regex> = LazyLock::new(|| {
354 RegexBuilder::new(r"^\s*(true)$|^(false)$")
355 .case_insensitive(true)
356 .build()
357 .expect("boolean pattern should be valid")
358});
359#[cfg(test)]
362mod test {
363 use super::*;
364
365 #[test]
366 fn test_examples() {
367 use crate::test_examples;
368
369 test_examples(IntoValue {})
370 }
371
372 #[test]
373 fn test_float_parse() {
374 assert!(FLOAT_RE.is_match("0.1").unwrap());
376 assert!(FLOAT_RE.is_match("3.0").unwrap());
377 assert!(FLOAT_RE.is_match("3.00001").unwrap());
378 assert!(FLOAT_RE.is_match("-9.9990e-003").unwrap());
379 assert!(FLOAT_RE.is_match("9.9990e+003").unwrap());
380 assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
381 assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
382 assert!(FLOAT_RE.is_match(".5").unwrap());
383 assert!(FLOAT_RE.is_match("2.5E-10").unwrap());
384 assert!(FLOAT_RE.is_match("2.5e10").unwrap());
385 assert!(FLOAT_RE.is_match("NaN").unwrap());
386 assert!(FLOAT_RE.is_match("-NaN").unwrap());
387 assert!(FLOAT_RE.is_match("-inf").unwrap());
388 assert!(FLOAT_RE.is_match("inf").unwrap());
389 assert!(FLOAT_RE.is_match("-7e-05").unwrap());
390 assert!(FLOAT_RE.is_match("7e-05").unwrap());
391 assert!(FLOAT_RE.is_match("+7e+05").unwrap());
392 }
393
394 #[test]
395 fn test_int_parse() {
396 assert!(INTEGER_RE.is_match("0").unwrap());
397 assert!(INTEGER_RE.is_match("1").unwrap());
398 assert!(INTEGER_RE.is_match("10").unwrap());
399 assert!(INTEGER_RE.is_match("100").unwrap());
400 assert!(INTEGER_RE.is_match("1000").unwrap());
401 assert!(INTEGER_RE.is_match("10000").unwrap());
402 assert!(INTEGER_RE.is_match("100000").unwrap());
403 assert!(INTEGER_RE.is_match("1000000").unwrap());
404 assert!(INTEGER_RE.is_match("10000000").unwrap());
405 assert!(INTEGER_RE.is_match("100000000").unwrap());
406 assert!(INTEGER_RE.is_match("1000000000").unwrap());
407 assert!(INTEGER_RE.is_match("10000000000").unwrap());
408 assert!(INTEGER_RE.is_match("100000000000").unwrap());
409 assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000").unwrap());
410 assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000").unwrap());
411 assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000").unwrap());
412 assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000").unwrap());
413 assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000").unwrap());
414 assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000").unwrap());
415 assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000_000").unwrap());
416 assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000_000").unwrap());
417 assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000_000").unwrap());
418 assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000").unwrap());
419 assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000").unwrap());
420 assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000").unwrap());
421 assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000").unwrap());
422 assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000").unwrap());
423 assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000,000").unwrap());
424 assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000,000").unwrap());
425 assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000,000").unwrap());
426 }
427
428 #[test]
429 fn test_bool_parse() {
430 assert!(BOOLEAN_RE.is_match("true").unwrap());
431 assert!(BOOLEAN_RE.is_match("false").unwrap());
432 assert!(!BOOLEAN_RE.is_match("1").unwrap());
433 assert!(!BOOLEAN_RE.is_match("0").unwrap());
434 }
435
436 #[test]
437 fn test_datetime_ymdz_pattern() {
438 assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00Z").unwrap());
439 assert!(
440 DATETIME_YMDZ_RE
441 .is_match("2022-01-01T00:00:00.123456789Z")
442 .unwrap()
443 );
444 assert!(
445 DATETIME_YMDZ_RE
446 .is_match("2022-01-01T00:00:00+01:00")
447 .unwrap()
448 );
449 assert!(
450 DATETIME_YMDZ_RE
451 .is_match("2022-01-01T00:00:00.123456789+01:00")
452 .unwrap()
453 );
454 assert!(
455 DATETIME_YMDZ_RE
456 .is_match("2022-01-01T00:00:00-01:00")
457 .unwrap()
458 );
459 assert!(
460 DATETIME_YMDZ_RE
461 .is_match("2022-01-01T00:00:00.123456789-01:00")
462 .unwrap()
463 );
464 assert!(DATETIME_YMDZ_RE.is_match("'2022-01-01T00:00:00Z'").unwrap());
465
466 assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00").unwrap());
467 assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.").unwrap());
468 assert!(
469 !DATETIME_YMDZ_RE
470 .is_match("2022-01-01T00:00:00.123456789")
471 .unwrap()
472 );
473 assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01").unwrap());
474 assert!(
475 !DATETIME_YMDZ_RE
476 .is_match("2022-01-01T00:00:00+01:0")
477 .unwrap()
478 );
479 assert!(
480 !DATETIME_YMDZ_RE
481 .is_match("2022-01-01T00:00:00+1:00")
482 .unwrap()
483 );
484 assert!(
485 !DATETIME_YMDZ_RE
486 .is_match("2022-01-01T00:00:00.123456789+01")
487 .unwrap()
488 );
489 assert!(
490 !DATETIME_YMDZ_RE
491 .is_match("2022-01-01T00:00:00.123456789+01:0")
492 .unwrap()
493 );
494 assert!(
495 !DATETIME_YMDZ_RE
496 .is_match("2022-01-01T00:00:00.123456789+1:00")
497 .unwrap()
498 );
499 assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01").unwrap());
500 assert!(
501 !DATETIME_YMDZ_RE
502 .is_match("2022-01-01T00:00:00-01:0")
503 .unwrap()
504 );
505 assert!(
506 !DATETIME_YMDZ_RE
507 .is_match("2022-01-01T00:00:00-1:00")
508 .unwrap()
509 );
510 assert!(
511 !DATETIME_YMDZ_RE
512 .is_match("2022-01-01T00:00:00.123456789-01")
513 .unwrap()
514 );
515 assert!(
516 !DATETIME_YMDZ_RE
517 .is_match("2022-01-01T00:00:00.123456789-01:0")
518 .unwrap()
519 );
520 assert!(
521 !DATETIME_YMDZ_RE
522 .is_match("2022-01-01T00:00:00.123456789-1:00")
523 .unwrap()
524 );
525 }
526
527 #[test]
528 fn test_datetime_ymd_pattern() {
529 assert!(DATETIME_YMD_RE.is_match("2022-01-01").unwrap());
530 assert!(DATETIME_YMD_RE.is_match("2022/01/01").unwrap());
531 assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00").unwrap());
532 assert!(
533 DATETIME_YMD_RE
534 .is_match("2022-01-01T00:00:00.000000000")
535 .unwrap()
536 );
537 assert!(DATETIME_YMD_RE.is_match("'2022-01-01'").unwrap());
538
539 assert!(
546 !DATETIME_YMD_RE
547 .is_match("2022-01-01T00:00:00.0000000000")
548 .unwrap()
549 );
550 }
551
552 #[test]
553 fn test_datetime_dmy_pattern() {
554 assert!(DATETIME_DMY_RE.is_match("31-12-2021").unwrap());
555 assert!(DATETIME_DMY_RE.is_match("01/01/2022").unwrap());
556 assert!(DATETIME_DMY_RE.is_match("15-06-2023 12:30").unwrap());
557 assert!(!DATETIME_DMY_RE.is_match("2022-13-01").unwrap());
558 assert!(!DATETIME_DMY_RE.is_match("2022-01-32").unwrap());
559 assert!(!DATETIME_DMY_RE.is_match("2022-01-01 24:00").unwrap());
560 }
561}