use chrono::{Local, TimeZone, Utc};
use fancy_regex::{Regex, RegexBuilder};
use nu_engine::command_prelude::*;
use nu_protocol::PipelineMetadata;
use std::sync::LazyLock;
#[derive(Clone)]
pub struct DetectType;
impl Command for DetectType {
fn name(&self) -> &str {
"detect type"
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_types(vec![(Type::String, Type::Any), (Type::Any, Type::Any)])
.switch(
"prefer-filesize",
"For ints display them as human-readable file sizes.",
Some('f'),
)
.switch(
"prefer-dmy",
"Prefer day-month-year format for ambiguous dates.",
None,
)
.category(Category::Strings)
.allow_variants_without_examples(true)
}
fn description(&self) -> &str {
"Infer Nushell datatype from a string."
}
fn search_terms(&self) -> Vec<&str> {
vec!["convert", "conversion"]
}
fn examples(&self) -> Vec<Example<'_>> {
vec![
Example {
description: "Bool from string",
example: "'true' | detect type",
result: Some(Value::test_bool(true)),
},
Example {
description: "Bool is case insensitive",
example: "'FALSE' | detect type",
result: Some(Value::test_bool(false)),
},
Example {
description: "Int from plain digits",
example: "'42' | detect type",
result: Some(Value::test_int(42)),
},
Example {
description: "Int with underscores",
example: "'1_000_000' | detect type",
result: Some(Value::test_int(1_000_000)),
},
Example {
description: "Int with commas",
example: "'1,234,567' | detect type",
result: Some(Value::test_int(1_234_567)),
},
#[allow(clippy::approx_constant, reason = "approx PI in examples is fine")]
Example {
description: "Float from decimal",
example: "'3.14' | detect type",
result: Some(Value::test_float(3.14)),
},
Example {
description: "Float in scientific notation",
example: "'6.02e23' | detect type",
result: Some(Value::test_float(6.02e23)),
},
Example {
description: "Prefer filesize for ints",
example: "'1024' | detect type -f",
result: Some(Value::test_filesize(1024)),
},
Example {
description: "Date Y-M-D",
example: "'2022-01-01' | detect type",
result: Some(Value::test_date(
Local.with_ymd_and_hms(2022, 1, 1, 0, 0, 0).unwrap().into(),
)),
},
Example {
description: "Date with time and offset",
example: "'2022-01-01T00:00:00Z' | detect type",
result: Some(Value::test_date(
Utc.with_ymd_and_hms(2022, 1, 1, 0, 0, 0).unwrap().into(),
)),
},
Example {
description: "Date D-M-Y",
example: "'31-12-2021' | detect type",
result: Some(Value::test_date(
Local
.with_ymd_and_hms(2021, 12, 31, 0, 0, 0)
.unwrap()
.into(),
)),
},
Example {
description: "Date M-D-Y (default for ambiguous)",
example: "'01/02/2025' | detect type",
result: Some(Value::test_date(
Local.with_ymd_and_hms(2025, 1, 2, 0, 0, 0).unwrap().into(),
)),
},
Example {
description: "Prefer DMY for ambiguous dates",
example: "'01/02/2025' | detect type --prefer-dmy",
result: Some(Value::test_date(
Local.with_ymd_and_hms(2025, 2, 1, 0, 0, 0).unwrap().into(),
)),
},
Example {
description: "Unknown stays a string",
example: "'not-a-number' | detect type",
result: Some(Value::test_string("not-a-number")),
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
mut input: PipelineData,
) -> Result<PipelineData, ShellError> {
let span = call.head;
let display_as_filesize = call.has_flag(engine_state, stack, "prefer-filesize")?;
let prefer_dmy = call.has_flag(engine_state, stack, "prefer-dmy")?;
let metadata = input.take_metadata();
let val = input.into_value(call.head)?;
process(val, metadata, display_as_filesize, prefer_dmy, span)
}
}
fn swap_day_month(input: &str) -> String {
let re_slash =
fancy_regex::Regex::new(r"(\d{1,2})/(\d{1,2})/(\d{4,})").expect("regex should be valid");
let swapped_slash = re_slash.replace_all(input, "$2/$1/$3");
let re_dash =
fancy_regex::Regex::new(r"(\d{1,2})-(\d{1,2})-(\d{4,})").expect("regex should be valid");
re_dash.replace_all(&swapped_slash, "$2-$1-$3").to_string()
}
fn parse_date_from_string_with_dayfirst(
input: &str,
span: Span,
dayfirst: bool,
) -> Result<chrono::DateTime<chrono::FixedOffset>, nu_protocol::Value> {
let input = if dayfirst {
swap_day_month(input)
} else {
input.to_string()
};
match dtparse::parse(&input) {
Ok((native_dt, fixed_offset)) => {
let offset = match fixed_offset {
Some(offset) => offset,
None => *chrono::Local
.from_local_datetime(&native_dt)
.single()
.unwrap_or_default()
.offset(),
};
match offset.from_local_datetime(&native_dt) {
chrono::LocalResult::Single(d) => Ok(d),
chrono::LocalResult::Ambiguous(d, _) => Ok(d),
chrono::LocalResult::None => Err(nu_protocol::Value::error(
nu_protocol::ShellError::DatetimeParseError {
msg: input.to_string(),
span,
},
span,
)),
}
}
Err(_) => Err(nu_protocol::Value::error(
nu_protocol::ShellError::DatetimeParseError {
msg: input.to_string(),
span,
},
span,
)),
}
}
fn process(
val: Value,
metadata: Option<PipelineMetadata>,
display_as_filesize: bool,
prefer_dmy: bool,
span: Span,
) -> Result<PipelineData, ShellError> {
let val_str = val.coerce_str().unwrap_or_default();
let (first_regex, first_dayfirst, first_name) = if prefer_dmy {
(&DATETIME_DMY_RE, true, "DATETIME_DMY_RE")
} else {
(&DATETIME_MDY_RE, false, "DATETIME_MDY_RE")
};
let (second_regex, second_dayfirst, second_name) = if prefer_dmy {
(&DATETIME_MDY_RE, false, "DATETIME_MDY_RE")
} else {
(&DATETIME_DMY_RE, true, "DATETIME_DMY_RE")
};
let value = if BOOLEAN_RE.is_match(&val_str).unwrap_or(false) {
let bval = val_str
.to_lowercase()
.parse::<bool>()
.map_err(|_| ShellError::CantConvert {
to_type: "string".to_string(),
from_type: "bool".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid boolean value"#
)),
})?;
Ok(Value::bool(bval, span))
} else if FLOAT_RE.is_match(&val_str).unwrap_or(false) {
let fval = val_str
.parse::<f64>()
.map_err(|_| ShellError::CantConvert {
to_type: "float".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid floating point value"#
)),
})?;
Ok(Value::float(fval, span))
} else if INTEGER_RE.is_match(&val_str).unwrap_or(false) {
let ival = val_str
.parse::<i64>()
.map_err(|_| ShellError::CantConvert {
to_type: "int".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid integer value"#
)),
})?;
if display_as_filesize {
Ok(Value::filesize(ival, span))
} else {
Ok(Value::int(ival, span))
}
} else if INTEGER_WITH_DELIMS_RE.is_match(&val_str).unwrap_or(false) {
let mut val_str = val_str.into_owned();
val_str.retain(|x| !['_', ','].contains(&x));
let ival = val_str
.parse::<i64>()
.map_err(|_| ShellError::CantConvert {
to_type: "int".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid integer value"#
)),
})?;
if display_as_filesize {
Ok(Value::filesize(ival, span))
} else {
Ok(Value::int(ival, span))
}
} else if first_regex.is_match(&val_str).unwrap_or(false) {
let dt =
parse_date_from_string_with_dayfirst(&val_str, span, first_dayfirst).map_err(|_| {
ShellError::CantConvert {
to_type: "datetime".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid {first_name} value"#
)),
}
})?;
Ok(Value::date(dt, span))
} else if second_regex.is_match(&val_str).unwrap_or(false) {
let dt = parse_date_from_string_with_dayfirst(&val_str, span, second_dayfirst).map_err(
|_| ShellError::CantConvert {
to_type: "datetime".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid {second_name} value"#
)),
},
)?;
Ok(Value::date(dt, span))
} else if DATETIME_YMD_RE.is_match(&val_str).unwrap_or(false) {
let dt = parse_date_from_string_with_dayfirst(&val_str, span, false).map_err(|_| {
ShellError::CantConvert {
to_type: "datetime".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid DATETIME_YMD_RE value"#
)),
}
})?;
Ok(Value::date(dt, span))
} else if DATETIME_YMDZ_RE.is_match(&val_str).unwrap_or(false) {
let dt = parse_date_from_string_with_dayfirst(&val_str, span, false).map_err(|_| {
ShellError::CantConvert {
to_type: "datetime".to_string(),
from_type: "string".to_string(),
span,
help: Some(format!(
r#""{val_str}" does not represent a valid DATETIME_YMDZ_RE value"#
)),
}
})?;
Ok(Value::date(dt, span))
} else {
return Ok(val.into_pipeline_data_with_metadata(metadata));
};
value.map(|value| {
value.into_pipeline_data_with_metadata(
metadata.map(|metadata| metadata.with_content_type(None)),
)
})
}
const DATETIME_DMY_PATTERN: &str = r#"(?x)
^
['"]? # optional quotes
(?:\d{1,2}) # day
[-/] # separator
(?P<month>0?[1-9]|1[0-2]) # month
[-/] # separator
(?:\d{4,}) # year
(?:
[T\ ] # separator
(?:\d{2}) # hour
:? # separator
(?:\d{2}) # minute
(?:
:? # separator
(?:\d{2}) # second
(?:
\.(?:\d{1,9}) # subsecond
)?
)?
)?
['"]? # optional quotes
$
"#;
static DATETIME_DMY_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(DATETIME_DMY_PATTERN).expect("datetime_dmy_pattern should be valid")
});
const DATETIME_YMD_PATTERN: &str = r#"(?x)
^
['"]? # optional quotes
(?:\d{4,}) # year
[-/] # separator
(?P<month>0?[1-9]|1[0-2]) # month
[-/] # separator
(?:\d{1,2}) # day
(?:
[T\ ] # separator
(?:\d{2}) # hour
:? # separator
(?:\d{2}) # minute
(?:
:? # separator
(?:\d{2}) # seconds
(?:
\.(?:\d{1,9}) # subsecond
)?
)?
)?
['"]? # optional quotes
$
"#;
static DATETIME_YMD_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(DATETIME_YMD_PATTERN).expect("datetime_ymd_pattern should be valid")
});
const DATETIME_YMDZ_PATTERN: &str = r#"(?x)
^
['"]? # optional quotes
(?:\d{4,}) # year
[-/] # separator
(?P<month>0?[1-9]|1[0-2]) # month
[-/] # separator
(?:\d{1,2}) # day
[T\ ] # separator
(?:\d{2}) # hour
:? # separator
(?:\d{2}) # minute
(?:
:? # separator
(?:\d{2}) # second
(?:
\.(?:\d{1,9}) # subsecond
)?
)?
\s? # optional space
(?:
# offset (e.g. +01:00)
[+-](?:\d{2})
:?
(?:\d{2})
# or Zulu suffix
|Z
)
['"]? # optional quotes
$
"#;
static DATETIME_YMDZ_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(DATETIME_YMDZ_PATTERN).expect("datetime_ymdz_pattern should be valid")
});
const DATETIME_MDY_PATTERN: &str = r#"(?x)
^
['"]? # optional quotes
(?P<month>0?[1-9]|1[0-2]) # month
[-/] # separator
(?:\d{1,2}) # day
[-/] # separator
(?:\d{4,}) # year
(?:
[T\ ] # separator
(?:\d{2}) # hour
:? # separator
(?:\d{2}) # minute
(?:
:? # separator
(?:\d{2}) # second
(?:
\.(?:\d{1,9}) # subsecond
)?
)?
)?
['"]? # optional quotes
$
"#;
static DATETIME_MDY_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(DATETIME_MDY_PATTERN).expect("datetime_mdy_pattern should be valid")
});
static FLOAT_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^\s*[-+]?((\d*\.\d+)([eE][-+]?\d+)?|inf|NaN|(\d+)[eE][-+]?\d+|\d+\.)$")
.expect("float pattern should be valid")
});
static INTEGER_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\s*-?(\d+)$").expect("integer pattern should be valid"));
static INTEGER_WITH_DELIMS_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^\s*-?(\d{1,3}([,_]\d{3})+)$")
.expect("integer with delimiters pattern should be valid")
});
static BOOLEAN_RE: LazyLock<Regex> = LazyLock::new(|| {
RegexBuilder::new(r"^\s*(true)$|^(false)$")
.case_insensitive(true)
.build()
.expect("boolean pattern should be valid")
});
#[cfg(test)]
mod test {
use super::*;
use rstest::rstest;
#[test]
fn test_examples() -> nu_test_support::Result {
nu_test_support::test().examples(DetectType)
}
#[test]
fn test_float_parse() {
assert!(FLOAT_RE.is_match("0.1").unwrap());
assert!(FLOAT_RE.is_match("3.0").unwrap());
assert!(FLOAT_RE.is_match("3.00001").unwrap());
assert!(FLOAT_RE.is_match("-9.9990e-003").unwrap());
assert!(FLOAT_RE.is_match("9.9990e+003").unwrap());
assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
assert!(FLOAT_RE.is_match("9.9990E+003").unwrap());
assert!(FLOAT_RE.is_match(".5").unwrap());
assert!(FLOAT_RE.is_match("2.5E-10").unwrap());
assert!(FLOAT_RE.is_match("2.5e10").unwrap());
assert!(FLOAT_RE.is_match("NaN").unwrap());
assert!(FLOAT_RE.is_match("-NaN").unwrap());
assert!(FLOAT_RE.is_match("-inf").unwrap());
assert!(FLOAT_RE.is_match("inf").unwrap());
assert!(FLOAT_RE.is_match("-7e-05").unwrap());
assert!(FLOAT_RE.is_match("7e-05").unwrap());
assert!(FLOAT_RE.is_match("+7e+05").unwrap());
}
#[test]
fn test_int_parse() {
assert!(INTEGER_RE.is_match("0").unwrap());
assert!(INTEGER_RE.is_match("1").unwrap());
assert!(INTEGER_RE.is_match("10").unwrap());
assert!(INTEGER_RE.is_match("100").unwrap());
assert!(INTEGER_RE.is_match("1000").unwrap());
assert!(INTEGER_RE.is_match("10000").unwrap());
assert!(INTEGER_RE.is_match("100000").unwrap());
assert!(INTEGER_RE.is_match("1000000").unwrap());
assert!(INTEGER_RE.is_match("10000000").unwrap());
assert!(INTEGER_RE.is_match("100000000").unwrap());
assert!(INTEGER_RE.is_match("1000000000").unwrap());
assert!(INTEGER_RE.is_match("10000000000").unwrap());
assert!(INTEGER_RE.is_match("100000000000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000_000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000,000").unwrap());
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000,000").unwrap());
}
#[test]
fn test_bool_parse() {
assert!(BOOLEAN_RE.is_match("true").unwrap());
assert!(BOOLEAN_RE.is_match("false").unwrap());
assert!(!BOOLEAN_RE.is_match("1").unwrap());
assert!(!BOOLEAN_RE.is_match("0").unwrap());
}
#[test]
fn test_datetime_ymdz_pattern() {
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00Z").unwrap());
assert!(
DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789Z")
.unwrap()
);
assert!(
DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00+01:00")
.unwrap()
);
assert!(
DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789+01:00")
.unwrap()
);
assert!(
DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00-01:00")
.unwrap()
);
assert!(
DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789-01:00")
.unwrap()
);
assert!(DATETIME_YMDZ_RE.is_match("'2022-01-01T00:00:00Z'").unwrap());
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00").unwrap());
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.").unwrap());
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789")
.unwrap()
);
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01").unwrap());
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00+01:0")
.unwrap()
);
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00+1:00")
.unwrap()
);
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789+01")
.unwrap()
);
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789+01:0")
.unwrap()
);
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789+1:00")
.unwrap()
);
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01").unwrap());
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00-01:0")
.unwrap()
);
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00-1:00")
.unwrap()
);
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789-01")
.unwrap()
);
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789-01:0")
.unwrap()
);
assert!(
!DATETIME_YMDZ_RE
.is_match("2022-01-01T00:00:00.123456789-1:00")
.unwrap()
);
}
#[test]
fn test_datetime_ymd_pattern() {
assert!(DATETIME_YMD_RE.is_match("2022-01-01").unwrap());
assert!(DATETIME_YMD_RE.is_match("2022/01/01").unwrap());
assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00").unwrap());
assert!(
DATETIME_YMD_RE
.is_match("2022-01-01T00:00:00.000000000")
.unwrap()
);
assert!(DATETIME_YMD_RE.is_match("'2022-01-01'").unwrap());
assert!(
!DATETIME_YMD_RE
.is_match("2022-01-01T00:00:00.0000000000")
.unwrap()
);
}
#[test]
fn test_datetime_dmy_pattern() {
assert!(DATETIME_DMY_RE.is_match("31-12-2021").unwrap());
assert!(DATETIME_DMY_RE.is_match("01/01/2022").unwrap());
assert!(DATETIME_DMY_RE.is_match("15-06-2023 12:30").unwrap());
assert!(!DATETIME_DMY_RE.is_match("2022-13-01").unwrap());
assert!(!DATETIME_DMY_RE.is_match("2022-01-32").unwrap());
assert!(!DATETIME_DMY_RE.is_match("2022-01-01 24:00").unwrap());
}
#[test]
fn test_datetime_mdy_pattern() {
assert!(DATETIME_MDY_RE.is_match("09/24/2012").unwrap());
assert!(DATETIME_MDY_RE.is_match("09/24/2012 02:43:48").unwrap());
assert!(DATETIME_MDY_RE.is_match("01/01/2022").unwrap());
assert!(!DATETIME_MDY_RE.is_match("09/24/123").unwrap());
assert!(!DATETIME_MDY_RE.is_match("09/24/2012 2:43:48").unwrap());
assert!(!DATETIME_MDY_RE.is_match("009/24/2012").unwrap());
}
#[rstest]
#[case("01/02/2025", 2025, 1, 2)]
#[case("13/02/2025", 2025, 2, 13)]
#[case("02/13/2025", 2025, 2, 13)]
fn test_ambiguous_date_default(
#[case] input: &str,
#[case] year: i32,
#[case] month: u32,
#[case] day: u32,
) {
use chrono::{DateTime, FixedOffset, Local, TimeZone};
let span = Span::test_data();
let result = process(Value::string(input, span), None, false, false, span)
.unwrap()
.into_value(span)
.unwrap();
if let Value::Date { val, .. } = result {
assert_eq!(
val,
DateTime::<FixedOffset>::from(
Local.with_ymd_and_hms(year, month, day, 0, 0, 0).unwrap()
)
);
} else {
panic!("Expected date");
}
}
#[rstest]
#[case("01/02/2025", 2025, 2, 1)]
#[case("13/02/2025", 2025, 2, 13)]
#[case("02/13/2025", 2025, 2, 13)]
fn test_ambiguous_date_prefer_dmy(
#[case] input: &str,
#[case] year: i32,
#[case] month: u32,
#[case] day: u32,
) {
use chrono::{DateTime, FixedOffset, Local, TimeZone};
let span = Span::test_data();
let result = process(Value::string(input, span), None, false, true, span)
.unwrap()
.into_value(span)
.unwrap();
if let Value::Date { val, .. } = result {
assert_eq!(
val,
DateTime::<FixedOffset>::from(
Local.with_ymd_and_hms(year, month, day, 0, 0, 0).unwrap()
)
);
} else {
panic!("Expected date");
}
}
}