Skip to main content

nu_utils/
quoting.rs

1use fancy_regex::Regex;
2use std::sync::LazyLock;
3
4// This hits, in order:
5// • Any character of []:`{}#'";()|$,.!?=
6// • Any digit (\d)
7// • Any whitespace (\s)
8// • Case-insensitive sign-insensitive float "keywords" inf, infinity and nan.
9static NEEDS_QUOTING_REGEX: LazyLock<Regex> = LazyLock::new(|| {
10    Regex::new(r#"[\[\]:`\{\}#'";\(\)\|\$,\.\d\s!?=]|(?i)^[+\-]?(inf(inity)?|nan)$"#)
11        .expect("internal error: NEEDS_QUOTING_REGEX didn't compile")
12});
13
14pub fn needs_quoting(string: &str) -> bool {
15    if string.is_empty() {
16        return true;
17    }
18    // These are case-sensitive keywords
19    match string {
20        // `true`/`false`/`null` are active keywords in JSON and NUON
21        // `&&` is denied by the nu parser for diagnostics reasons
22        // (https://github.com/nushell/nushell/pull/7241)
23        "true" | "false" | "null" | "&&" => return true,
24        _ => (),
25    };
26    // All other cases are handled here
27    NEEDS_QUOTING_REGEX.is_match(string).unwrap_or(false)
28}
29
30pub fn escape_quote_string(string: &str) -> String {
31    let mut output = String::with_capacity(string.len() + 2);
32    output.push('"');
33
34    for c in string.chars() {
35        if c == '"' || c == '\\' {
36            output.push('\\');
37        }
38        output.push(c);
39    }
40
41    output.push('"');
42    output
43}
44
45/// Returns a raw string representation if the string contains quotes or backslashes.
46/// Otherwise returns None (caller should use regular quoting or bare string).
47///
48/// Raw strings avoid escaping by using `r#'...'#` syntax with enough `#` characters
49/// to ensure the closing delimiter is unambiguous.
50///
51/// Note: Nushell requires at least one `#` in raw strings (i.e., `r#'...'#` not `r'...'`).
52pub fn as_raw_string(s: &str) -> Option<String> {
53    // Only use raw strings if they would avoid escaping
54    if !s.contains('"') && !s.contains('\\') {
55        return None;
56    }
57
58    // Find minimum # count needed for delimiter.
59    // Nushell requires at least one #, so start at 1.
60    // Need to avoid both:
61    // - `'#...#` patterns in content that would close early
62    // - leading `###...` content, because the opening quote plus the first
63    //   `###` would also be parsed as a closing delimiter
64    let mut hash_count = 1;
65    loop {
66        let hashes = "#".repeat(hash_count);
67        let closing = format!("'{}", hashes);
68
69        if !s.starts_with(&hashes) && !s.contains(&closing) {
70            return Some(format!("r{hashes}'{s}'{hashes}"));
71        }
72
73        hash_count += 1;
74    }
75}
76
77#[cfg(test)]
78mod tests {
79    use super::as_raw_string;
80
81    #[test]
82    fn raw_string_uses_single_hash_when_safe() {
83        assert_eq!(
84            as_raw_string(r#"hello \"world\""#),
85            Some(r#"r#'hello \"world\"'#"#.to_string())
86        );
87    }
88
89    #[test]
90    fn raw_string_uses_more_hashes_for_quote_hash_sequence() {
91        assert_eq!(
92            as_raw_string(r#"contains '# and "quote""#),
93            Some(r##"r##'contains '# and "quote"'##"##.to_string())
94        );
95    }
96
97    #[test]
98    fn raw_string_uses_more_hashes_when_content_starts_with_hash() {
99        let input = "# example.toml\nname = \"my-app\"\nversion = \"1.0.0\"\n";
100
101        assert_eq!(
102            as_raw_string(input),
103            Some(
104                r##"r##'# example.toml
105name = "my-app"
106version = "1.0.0"
107'##"##
108                    .to_string()
109            )
110        );
111    }
112
113    #[test]
114    fn raw_string_scales_hash_count_for_longer_sequences() {
115        assert_eq!(
116            as_raw_string(r#"contains '## and "quote""#),
117            Some(r###"r###'contains '## and "quote"'###"###.to_string())
118        );
119    }
120}