1use crate::parse_date_from_string;
2use chrono::{Local, TimeZone, Utc};
3use fancy_regex::{Regex, RegexBuilder};
4use nu_engine::command_prelude::*;
5use std::sync::LazyLock;
6
7#[derive(Clone)]
8pub struct DetectType;
9
10impl Command for DetectType {
11 fn name(&self) -> &str {
12 "detect type"
13 }
14
15 fn signature(&self) -> Signature {
16 Signature::build(self.name())
17 .input_output_types(vec![(Type::String, Type::Any), (Type::Any, Type::Any)])
18 .switch(
19 "prefer-filesize",
20 "For ints display them as human-readable file sizes",
21 Some('f'),
22 )
23 .category(Category::Strings)
24 .allow_variants_without_examples(true)
25 }
26
27 fn description(&self) -> &str {
28 "Infer Nushell datatype from a string."
29 }
30
31 fn search_terms(&self) -> Vec<&str> {
32 vec!["convert", "conversion"]
33 }
34
35 fn examples(&self) -> Vec<Example<'_>> {
36 vec![
37 Example {
38 description: "Bool from string",
39 example: "'true' | detect type",
40 result: Some(Value::test_bool(true)),
41 },
42 Example {
43 description: "Bool is case insensitive",
44 example: "'FALSE' | detect type",
45 result: Some(Value::test_bool(false)),
46 },
47 Example {
48 description: "Int from plain digits",
49 example: "'42' | detect type",
50 result: Some(Value::test_int(42)),
51 },
52 Example {
53 description: "Int with underscores",
54 example: "'1_000_000' | detect type",
55 result: Some(Value::test_int(1_000_000)),
56 },
57 Example {
58 description: "Int with commas",
59 example: "'1,234,567' | detect type",
60 result: Some(Value::test_int(1_234_567)),
61 },
62 #[allow(clippy::approx_constant, reason = "approx PI in examples is fine")]
63 Example {
64 description: "Float from decimal",
65 example: "'3.14' | detect type",
66 result: Some(Value::test_float(3.14)),
67 },
68 Example {
69 description: "Float in scientific notation",
70 example: "'6.02e23' | detect type",
71 result: Some(Value::test_float(6.02e23)),
72 },
73 Example {
74 description: "Prefer filesize for ints",
75 example: "'1024' | detect type -f",
76 result: Some(Value::test_filesize(1024)),
77 },
78 Example {
79 description: "Date Y-M-D",
80 example: "'2022-01-01' | detect type",
81 result: Some(Value::test_date(
82 Local.with_ymd_and_hms(2022, 1, 1, 0, 0, 0).unwrap().into(),
83 )),
84 },
85 Example {
86 description: "Date with time and offset",
87 example: "'2022-01-01T00:00:00Z' | detect type",
88 result: Some(Value::test_date(
89 Utc.with_ymd_and_hms(2022, 1, 1, 0, 0, 0).unwrap().into(),
90 )),
91 },
92 Example {
93 description: "Date D-M-Y",
94 example: "'31-12-2021' | detect type",
95 result: Some(Value::test_date(
96 Local
97 .with_ymd_and_hms(2021, 12, 31, 0, 0, 0)
98 .unwrap()
99 .into(),
100 )),
101 },
102 Example {
103 description: "Unknown stays a string",
104 example: "'not-a-number' | detect type",
105 result: Some(Value::test_string("not-a-number")),
106 },
107 ]
108 }
109
110 fn run(
111 &self,
112 engine_state: &EngineState,
113 stack: &mut Stack,
114 call: &Call,
115 input: PipelineData,
116 ) -> Result<PipelineData, ShellError> {
117 let metadata = input
118 .metadata()
119 .map(|metadata| metadata.with_content_type(None));
120 let span = call.head;
121 let display_as_filesize = call.has_flag(engine_state, stack, "prefer-filesize")?;
122 let val = input.into_value(call.head)?;
123 let val = process(val, display_as_filesize, span)?;
124 Ok(val.into_pipeline_data_with_metadata(metadata))
125 }
126}
127
128fn process(val: Value, display_as_filesize: bool, span: Span) -> Result<Value, ShellError> {
131 let val_str = val.coerce_str().unwrap_or_default();
133
134 if BOOLEAN_RE.is_match(&val_str).unwrap_or(false) {
136 let bval = val_str
137 .to_lowercase()
138 .parse::<bool>()
139 .map_err(|_| ShellError::CantConvert {
140 to_type: "string".to_string(),
141 from_type: "bool".to_string(),
142 span,
143 help: Some(format!(
144 r#""{val_str}" does not represent a valid boolean value"#
145 )),
146 })?;
147
148 Ok(Value::bool(bval, span))
149 } else if FLOAT_RE.is_match(&val_str).unwrap_or(false) {
150 let fval = val_str
151 .parse::<f64>()
152 .map_err(|_| ShellError::CantConvert {
153 to_type: "float".to_string(),
154 from_type: "string".to_string(),
155 span,
156 help: Some(format!(
157 r#""{val_str}" does not represent a valid floating point value"#
158 )),
159 })?;
160
161 Ok(Value::float(fval, span))
162 } else if INTEGER_RE.is_match(&val_str).unwrap_or(false) {
163 let ival = val_str
164 .parse::<i64>()
165 .map_err(|_| ShellError::CantConvert {
166 to_type: "int".to_string(),
167 from_type: "string".to_string(),
168 span,
169 help: Some(format!(
170 r#""{val_str}" does not represent a valid integer value"#
171 )),
172 })?;
173
174 if display_as_filesize {
175 Ok(Value::filesize(ival, span))
176 } else {
177 Ok(Value::int(ival, span))
178 }
179 } else if INTEGER_WITH_DELIMS_RE.is_match(&val_str).unwrap_or(false) {
180 let mut val_str = val_str.into_owned();
181 val_str.retain(|x| !['_', ','].contains(&x));
182
183 let ival = val_str
184 .parse::<i64>()
185 .map_err(|_| ShellError::CantConvert {
186 to_type: "int".to_string(),
187 from_type: "string".to_string(),
188 span,
189 help: Some(format!(
190 r#""{val_str}" does not represent a valid integer value"#
191 )),
192 })?;
193
194 if display_as_filesize {
195 Ok(Value::filesize(ival, span))
196 } else {
197 Ok(Value::int(ival, span))
198 }
199 } else if DATETIME_DMY_RE.is_match(&val_str).unwrap_or(false) {
200 let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
201 to_type: "datetime".to_string(),
202 from_type: "string".to_string(),
203 span,
204 help: Some(format!(
205 r#""{val_str}" does not represent a valid DATETIME_MDY_RE value"#
206 )),
207 })?;
208
209 Ok(Value::date(dt, span))
210 } else if DATETIME_YMD_RE.is_match(&val_str).unwrap_or(false) {
211 let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
212 to_type: "datetime".to_string(),
213 from_type: "string".to_string(),
214 span,
215 help: Some(format!(
216 r#""{val_str}" does not represent a valid DATETIME_YMD_RE value"#
217 )),
218 })?;
219
220 Ok(Value::date(dt, span))
221 } else if DATETIME_YMDZ_RE.is_match(&val_str).unwrap_or(false) {
222 let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
223 to_type: "datetime".to_string(),
224 from_type: "string".to_string(),
225 span,
226 help: Some(format!(
227 r#""{val_str}" does not represent a valid DATETIME_YMDZ_RE value"#
228 )),
229 })?;
230
231 Ok(Value::date(dt, span))
232 } else {
233 Ok(val)
235 }
236}
237
238const DATETIME_DMY_PATTERN: &str = r#"(?x)
240 ^
241 ['"]? # optional quotes
242 (?:\d{1,2}) # day
243 [-/] # separator
244 (?P<month>[01]?\d{1}) # month
245 [-/] # separator
246 (?:\d{4,}) # year
247 (?:
248 [T\ ] # separator
249 (?:\d{2}) # hour
250 :? # separator
251 (?:\d{2}) # minute
252 (?:
253 :? # separator
254 (?:\d{2}) # second
255 (?:
256 \.(?:\d{1,9}) # subsecond
257 )?
258 )?
259 )?
260 ['"]? # optional quotes
261 $
262 "#;
263
264static DATETIME_DMY_RE: LazyLock<Regex> = LazyLock::new(|| {
265 Regex::new(DATETIME_DMY_PATTERN).expect("datetime_dmy_pattern should be valid")
266});
267const DATETIME_YMD_PATTERN: &str = r#"(?x)
268 ^
269 ['"]? # optional quotes
270 (?:\d{4,}) # year
271 [-/] # separator
272 (?P<month>[01]?\d{1}) # month
273 [-/] # separator
274 (?:\d{1,2}) # day
275 (?:
276 [T\ ] # separator
277 (?:\d{2}) # hour
278 :? # separator
279 (?:\d{2}) # minute
280 (?:
281 :? # separator
282 (?:\d{2}) # seconds
283 (?:
284 \.(?:\d{1,9}) # subsecond
285 )?
286 )?
287 )?
288 ['"]? # optional quotes
289 $
290 "#;
291static DATETIME_YMD_RE: LazyLock<Regex> = LazyLock::new(|| {
292 Regex::new(DATETIME_YMD_PATTERN).expect("datetime_ymd_pattern should be valid")
293});
294const DATETIME_YMDZ_PATTERN: &str = r#"(?x)
296 ^
297 ['"]? # optional quotes
298 (?:\d{4,}) # year
299 [-/] # separator
300 (?P<month>[01]?\d{1}) # month
301 [-/] # separator
302 (?:\d{1,2}) # day
303 [T\ ] # separator
304 (?:\d{2}) # hour
305 :? # separator
306 (?:\d{2}) # minute
307 (?:
308 :? # separator
309 (?:\d{2}) # second
310 (?:
311 \.(?:\d{1,9}) # subsecond
312 )?
313 )?
314 \s? # optional space
315 (?:
316 # offset (e.g. +01:00)
317 [+-](?:\d{2})
318 :?
319 (?:\d{2})
320 # or Zulu suffix
321 |Z
322 )
323 ['"]? # optional quotes
324 $
325 "#;
326static DATETIME_YMDZ_RE: LazyLock<Regex> = LazyLock::new(|| {
327 Regex::new(DATETIME_YMDZ_PATTERN).expect("datetime_ymdz_pattern should be valid")
328});
329
330static FLOAT_RE: LazyLock<Regex> = LazyLock::new(|| {
331 Regex::new(r"^\s*[-+]?((\d*\.\d+)([eE][-+]?\d+)?|inf|NaN|(\d+)[eE][-+]?\d+|\d+\.)$")
332 .expect("float pattern should be valid")
333});
334
335static INTEGER_RE: LazyLock<Regex> =
336 LazyLock::new(|| Regex::new(r"^\s*-?(\d+)$").expect("integer pattern should be valid"));
337
338static INTEGER_WITH_DELIMS_RE: LazyLock<Regex> = LazyLock::new(|| {
339 Regex::new(r"^\s*-?(\d{1,3}([,_]\d{3})+)$")
340 .expect("integer with delimiters pattern should be valid")
341});
342
343static BOOLEAN_RE: LazyLock<Regex> = LazyLock::new(|| {
344 RegexBuilder::new(r"^\s*(true)$|^(false)$")
345 .case_insensitive(true)
346 .build()
347 .expect("boolean pattern should be valid")
348});
349#[cfg(test)]
352mod test {
353 use super::*;
354
355 #[test]
356 fn test_examples() {
357 use crate::test_examples;
358
359 test_examples(DetectType)
360 }
361
362 #[test]
363 fn test_float_parse() {
364 assert!(FLOAT_RE.is_match("0.1").unwrap());
366 assert!(FLOAT_RE.is_match("3.0").unwrap());
367 assert!(FLOAT_RE.is_match("3.00001").unwrap());
368 assert!(FLOAT_RE.is_match("-9.9990e-003").unwrap());
369 assert!(FLOAT_RE.is_match("9.9990e+003").unwrap());
370 assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
371 assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
372 assert!(FLOAT_RE.is_match(".5").unwrap());
373 assert!(FLOAT_RE.is_match("2.5E-10").unwrap());
374 assert!(FLOAT_RE.is_match("2.5e10").unwrap());
375 assert!(FLOAT_RE.is_match("NaN").unwrap());
376 assert!(FLOAT_RE.is_match("-NaN").unwrap());
377 assert!(FLOAT_RE.is_match("-inf").unwrap());
378 assert!(FLOAT_RE.is_match("inf").unwrap());
379 assert!(FLOAT_RE.is_match("-7e-05").unwrap());
380 assert!(FLOAT_RE.is_match("7e-05").unwrap());
381 assert!(FLOAT_RE.is_match("+7e+05").unwrap());
382 }
383
384 #[test]
385 fn test_int_parse() {
386 assert!(INTEGER_RE.is_match("0").unwrap());
387 assert!(INTEGER_RE.is_match("1").unwrap());
388 assert!(INTEGER_RE.is_match("10").unwrap());
389 assert!(INTEGER_RE.is_match("100").unwrap());
390 assert!(INTEGER_RE.is_match("1000").unwrap());
391 assert!(INTEGER_RE.is_match("10000").unwrap());
392 assert!(INTEGER_RE.is_match("100000").unwrap());
393 assert!(INTEGER_RE.is_match("1000000").unwrap());
394 assert!(INTEGER_RE.is_match("10000000").unwrap());
395 assert!(INTEGER_RE.is_match("100000000").unwrap());
396 assert!(INTEGER_RE.is_match("1000000000").unwrap());
397 assert!(INTEGER_RE.is_match("10000000000").unwrap());
398 assert!(INTEGER_RE.is_match("100000000000").unwrap());
399 assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000").unwrap());
400 assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000").unwrap());
401 assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000").unwrap());
402 assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000").unwrap());
403 assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000").unwrap());
404 assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000").unwrap());
405 assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000_000").unwrap());
406 assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000_000").unwrap());
407 assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000_000").unwrap());
408 assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000").unwrap());
409 assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000").unwrap());
410 assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000").unwrap());
411 assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000").unwrap());
412 assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000").unwrap());
413 assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000,000").unwrap());
414 assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000,000").unwrap());
415 assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000,000").unwrap());
416 }
417
418 #[test]
419 fn test_bool_parse() {
420 assert!(BOOLEAN_RE.is_match("true").unwrap());
421 assert!(BOOLEAN_RE.is_match("false").unwrap());
422 assert!(!BOOLEAN_RE.is_match("1").unwrap());
423 assert!(!BOOLEAN_RE.is_match("0").unwrap());
424 }
425
426 #[test]
427 fn test_datetime_ymdz_pattern() {
428 assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00Z").unwrap());
429 assert!(
430 DATETIME_YMDZ_RE
431 .is_match("2022-01-01T00:00:00.123456789Z")
432 .unwrap()
433 );
434 assert!(
435 DATETIME_YMDZ_RE
436 .is_match("2022-01-01T00:00:00+01:00")
437 .unwrap()
438 );
439 assert!(
440 DATETIME_YMDZ_RE
441 .is_match("2022-01-01T00:00:00.123456789+01:00")
442 .unwrap()
443 );
444 assert!(
445 DATETIME_YMDZ_RE
446 .is_match("2022-01-01T00:00:00-01:00")
447 .unwrap()
448 );
449 assert!(
450 DATETIME_YMDZ_RE
451 .is_match("2022-01-01T00:00:00.123456789-01:00")
452 .unwrap()
453 );
454 assert!(DATETIME_YMDZ_RE.is_match("'2022-01-01T00:00:00Z'").unwrap());
455
456 assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00").unwrap());
457 assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.").unwrap());
458 assert!(
459 !DATETIME_YMDZ_RE
460 .is_match("2022-01-01T00:00:00.123456789")
461 .unwrap()
462 );
463 assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01").unwrap());
464 assert!(
465 !DATETIME_YMDZ_RE
466 .is_match("2022-01-01T00:00:00+01:0")
467 .unwrap()
468 );
469 assert!(
470 !DATETIME_YMDZ_RE
471 .is_match("2022-01-01T00:00:00+1:00")
472 .unwrap()
473 );
474 assert!(
475 !DATETIME_YMDZ_RE
476 .is_match("2022-01-01T00:00:00.123456789+01")
477 .unwrap()
478 );
479 assert!(
480 !DATETIME_YMDZ_RE
481 .is_match("2022-01-01T00:00:00.123456789+01:0")
482 .unwrap()
483 );
484 assert!(
485 !DATETIME_YMDZ_RE
486 .is_match("2022-01-01T00:00:00.123456789+1:00")
487 .unwrap()
488 );
489 assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01").unwrap());
490 assert!(
491 !DATETIME_YMDZ_RE
492 .is_match("2022-01-01T00:00:00-01:0")
493 .unwrap()
494 );
495 assert!(
496 !DATETIME_YMDZ_RE
497 .is_match("2022-01-01T00:00:00-1:00")
498 .unwrap()
499 );
500 assert!(
501 !DATETIME_YMDZ_RE
502 .is_match("2022-01-01T00:00:00.123456789-01")
503 .unwrap()
504 );
505 assert!(
506 !DATETIME_YMDZ_RE
507 .is_match("2022-01-01T00:00:00.123456789-01:0")
508 .unwrap()
509 );
510 assert!(
511 !DATETIME_YMDZ_RE
512 .is_match("2022-01-01T00:00:00.123456789-1:00")
513 .unwrap()
514 );
515 }
516
517 #[test]
518 fn test_datetime_ymd_pattern() {
519 assert!(DATETIME_YMD_RE.is_match("2022-01-01").unwrap());
520 assert!(DATETIME_YMD_RE.is_match("2022/01/01").unwrap());
521 assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00").unwrap());
522 assert!(
523 DATETIME_YMD_RE
524 .is_match("2022-01-01T00:00:00.000000000")
525 .unwrap()
526 );
527 assert!(DATETIME_YMD_RE.is_match("'2022-01-01'").unwrap());
528
529 assert!(
536 !DATETIME_YMD_RE
537 .is_match("2022-01-01T00:00:00.0000000000")
538 .unwrap()
539 );
540 }
541
542 #[test]
543 fn test_datetime_dmy_pattern() {
544 assert!(DATETIME_DMY_RE.is_match("31-12-2021").unwrap());
545 assert!(DATETIME_DMY_RE.is_match("01/01/2022").unwrap());
546 assert!(DATETIME_DMY_RE.is_match("15-06-2023 12:30").unwrap());
547 assert!(!DATETIME_DMY_RE.is_match("2022-13-01").unwrap());
548 assert!(!DATETIME_DMY_RE.is_match("2022-01-32").unwrap());
549 assert!(!DATETIME_DMY_RE.is_match("2022-01-01 24:00").unwrap());
550 }
551}