Skip to main content

brush_core/
escape.rs

1//! String escaping utilities
2
3use std::borrow::Cow;
4
5use itertools::Itertools;
6
7use crate::{error, int_utils};
8
9/// Escape expansion mode.
10#[derive(Clone, Copy)]
11pub enum EscapeExpansionMode {
12    /// echo builtin mode.
13    EchoBuiltin,
14    /// ANSI-C quotes.
15    AnsiCQuotes,
16}
17
18/// Expands backslash escapes in the provided string.
19///
20/// # Arguments
21///
22/// * `s` - The string to expand.
23/// * `mode` - The mode to use for expansion.
24#[expect(clippy::too_many_lines)]
25pub fn expand_backslash_escapes(
26    s: &str,
27    mode: EscapeExpansionMode,
28) -> Result<(Vec<u8>, bool), error::Error> {
29    let mut result: Vec<u8> = Vec::with_capacity(s.len());
30    let mut it = s.chars();
31    while let Some(c) = it.next() {
32        if c != '\\' {
33            // Not a backslash, add and move on.
34            result.append(c.to_string().into_bytes().as_mut());
35            continue;
36        }
37
38        let Some(escape_cmd) = it.next() else {
39            // Trailing backslash.
40            result.push(b'\\');
41            continue;
42        };
43
44        match escape_cmd {
45            'a' => result.push(b'\x07'),
46            'b' => result.push(b'\x08'),
47            'c' => {
48                match mode {
49                    EscapeExpansionMode::EchoBuiltin => {
50                        // Stop all additional output!
51                        return Ok((result, false));
52                    }
53                    EscapeExpansionMode::AnsiCQuotes => {
54                        if let Some(char_value) = it.next() {
55                            // Special case backslash. If it's immediately followed by another
56                            // backslash, then we consume both; if not, we still will use the
57                            // backslash character as the one to apply the control transformation
58                            // to.
59                            if char_value == '\\' {
60                                let orig_it = it.clone();
61                                if !matches!(it.next(), Some('\\')) {
62                                    // Didn't find another backslash; restore iterator.
63                                    it = orig_it;
64                                }
65                            }
66
67                            let mut bytes: Vec<u8> = if char_value.is_ascii_lowercase() {
68                                char_value
69                                    .to_ascii_uppercase()
70                                    .to_string()
71                                    .bytes()
72                                    .collect()
73                            } else {
74                                char_value.to_string().bytes().collect()
75                            };
76
77                            if !bytes.is_empty() {
78                                if bytes[0] == b'?' {
79                                    // We can't explain why this is the case, but it is.
80                                    bytes[0] = 0x7f;
81                                } else {
82                                    bytes[0] &= 0x1f;
83                                }
84                            }
85
86                            result.append(bytes.as_mut());
87                        } else {
88                            result.push(b'\\');
89                            result.push(b'c');
90                        }
91                    }
92                }
93            }
94            'e' | 'E' => result.push(b'\x1b'),
95            'f' => result.push(b'\x0c'),
96            'n' => result.push(b'\n'),
97            'r' => result.push(b'\r'),
98            't' => result.push(b'\t'),
99            'v' => result.push(b'\x0b'),
100            '\\' => result.push(b'\\'),
101            '\'' if matches!(mode, EscapeExpansionMode::AnsiCQuotes) => result.push(b'\''),
102            '\"' if matches!(mode, EscapeExpansionMode::AnsiCQuotes) => result.push(b'\"'),
103            '?' if matches!(mode, EscapeExpansionMode::AnsiCQuotes) => result.push(b'?'),
104            '0' => {
105                // Consume 0-3 valid octal chars
106                let mut taken_so_far = 0;
107                let mut octal_chars: String = it
108                    .take_while_ref(|c| {
109                        if taken_so_far < 3 && matches!(*c, '0'..='7') {
110                            taken_so_far += 1;
111                            true
112                        } else {
113                            false
114                        }
115                    })
116                    .collect();
117
118                if octal_chars.is_empty() {
119                    octal_chars.push('0');
120                }
121
122                let value = int_utils::parse::<u8>(octal_chars.as_str(), 8)?;
123                result.push(value);
124            }
125            'x' => {
126                // Consume 1-2 valid hex chars (or unlimited with braces in ANSI-C mode)
127                let mut hex_chars = String::new();
128                let mut invalid_prefix = false;
129                let mut hexits_consumed = 0;
130                let mut start_brace_consumed = false;
131
132                loop {
133                    // Save the original in case we go too far and need to restore.
134                    let orig_it = it.clone();
135
136                    let Some(next_c) = it.next() else {
137                        break;
138                    };
139
140                    if matches!(mode, EscapeExpansionMode::AnsiCQuotes)
141                        && !start_brace_consumed
142                        && next_c == '{'
143                    {
144                        start_brace_consumed = true;
145                    } else if start_brace_consumed && next_c == '}' {
146                        break;
147                    } else if ((start_brace_consumed && !invalid_prefix)
148                        || (!start_brace_consumed && hexits_consumed < 2))
149                        && next_c.is_ascii_hexdigit()
150                    {
151                        hex_chars.push(next_c);
152                        hexits_consumed += 1;
153                    } else if start_brace_consumed && hexits_consumed == 0 {
154                        invalid_prefix = true;
155                    } else {
156                        // Went too far; restore iterator and break.
157                        it = orig_it;
158                        break;
159                    }
160                }
161
162                if hex_chars.is_empty() {
163                    if start_brace_consumed {
164                        result.push(0);
165                    } else {
166                        result.push(b'\\');
167                        result.append(escape_cmd.to_string().into_bytes().as_mut());
168                    }
169                } else {
170                    let value32 = int_utils::parse::<u32>(hex_chars.as_str(), 16)?;
171                    let value8: u8 = (value32 & 0xFF) as u8;
172                    result.push(value8);
173                }
174            }
175            'u' => {
176                // Consume 1-4 hex digits
177                let mut taken_so_far = 0;
178                let hex_chars: String = it
179                    .take_while_ref(|next_c| {
180                        if taken_so_far < 4 && next_c.is_ascii_hexdigit() {
181                            taken_so_far += 1;
182                            true
183                        } else {
184                            false
185                        }
186                    })
187                    .collect();
188
189                if hex_chars.is_empty() {
190                    result.push(b'\\');
191                    result.append(escape_cmd.to_string().into_bytes().as_mut());
192                } else {
193                    let value = int_utils::parse::<u16>(hex_chars.as_str(), 16)?;
194                    if let Some(decoded) = char::from_u32(u32::from(value)) {
195                        result.append(decoded.to_string().into_bytes().as_mut());
196                    } else {
197                        result.push(b'\\');
198                        result.append(escape_cmd.to_string().into_bytes().as_mut());
199                    }
200                }
201            }
202            'U' => {
203                // Consume 1-8 hex digits
204                let mut taken_so_far = 0;
205                let hex_chars: String = it
206                    .take_while_ref(|next_c| {
207                        if taken_so_far < 8 && next_c.is_ascii_hexdigit() {
208                            taken_so_far += 1;
209                            true
210                        } else {
211                            false
212                        }
213                    })
214                    .collect();
215
216                if hex_chars.is_empty() {
217                    result.push(b'\\');
218                    result.append(escape_cmd.to_string().into_bytes().as_mut());
219                } else {
220                    let value = int_utils::parse::<u32>(hex_chars.as_str(), 16)?;
221                    if let Some(decoded) = char::from_u32(value) {
222                        result.append(decoded.to_string().into_bytes().as_mut());
223                    } else {
224                        result.push(b'\\');
225                        result.append(escape_cmd.to_string().into_bytes().as_mut());
226                    }
227                }
228            }
229            first_octal @ '1'..='7' if matches!(mode, EscapeExpansionMode::AnsiCQuotes) => {
230                // We've already consumed the first octal digit.
231                let mut octal_chars = String::new();
232                octal_chars.push(first_octal);
233
234                // Consume up to 2 more valid octal chars
235                let mut taken_so_far = 1;
236                for next_c in it.take_while_ref(|next_c| {
237                    if taken_so_far < 3 && matches!(next_c, '0'..='7') {
238                        taken_so_far += 1;
239                        true
240                    } else {
241                        false
242                    }
243                }) {
244                    octal_chars.push(next_c);
245                }
246
247                let value = int_utils::parse::<u8>(octal_chars.as_str(), 8)?;
248                result.push(value);
249            }
250            unknown => {
251                // Not a valid escape sequence.
252                result.push(b'\\');
253                result.append(unknown.to_string().into_bytes().as_mut());
254            }
255        }
256    }
257
258    // In ANSI-C quotes, we crop the result at the first NUL.
259    if matches!(mode, EscapeExpansionMode::AnsiCQuotes) {
260        if let Some(nul_index) = result.iter().position(|&b| b == 0) {
261            result.truncate(nul_index);
262        }
263    }
264
265    Ok((result, true))
266}
267
268/// Quoting mode to use for escaping.
269#[derive(Clone, Copy, Default)]
270pub enum QuoteMode {
271    /// Single-quote.
272    #[default]
273    SingleQuote,
274    /// Double-quote.
275    DoubleQuote,
276    /// Backslash-escape.
277    BackslashEscape,
278}
279
280/// Options influencing how to escape/quote an input string.
281#[derive(Default)]
282pub(crate) struct QuoteOptions {
283    /// Whether or not to *always* escape or quote the input; if false, then escaping/quoting
284    /// will only be applied if the input contains characters that *require* it.
285    pub always_quote: bool,
286    /// Preferred mode for quoting/escaping. Quoting may be "upgraded" to a more expressive
287    /// format if the input is not expressible otherwise.
288    pub preferred_mode: QuoteMode,
289    /// Whether or not to *avoid* using ANSI C quoting just for the benefit of newline characters.
290    /// Default is for newline characters to require upgrading the string's quoting to
291    /// ANSI C quoting.
292    pub avoid_ansi_c_quoting_newline: bool,
293}
294
295pub(crate) fn quote<'a>(s: &'a str, options: &QuoteOptions) -> Cow<'a, str> {
296    let use_ansi_c_quotes = s.contains(|c| {
297        needs_ansi_c_quoting(c) && (!options.avoid_ansi_c_quoting_newline || c != '\n')
298    });
299
300    if use_ansi_c_quotes {
301        return ansi_c_quote(s).into();
302    }
303
304    let use_default_quotes =
305        !use_ansi_c_quotes && (options.always_quote || s.is_empty() || s.contains(needs_escaping));
306
307    if !use_default_quotes {
308        return s.into();
309    }
310
311    match options.preferred_mode {
312        QuoteMode::BackslashEscape => backslash_escape(s),
313        QuoteMode::SingleQuote => single_quote(s),
314        QuoteMode::DoubleQuote => double_quote(s).into(),
315    }
316}
317
318/// Escape the given string, forcing quoting.
319///
320/// # Arguments
321///
322/// * `s` - The string to escape.
323/// * `mode` - The quoting mode to use.
324pub fn force_quote(s: &str, mode: QuoteMode) -> String {
325    let options = QuoteOptions {
326        always_quote: true,
327        preferred_mode: mode,
328        ..Default::default()
329    };
330
331    quote(s, &options).to_string()
332}
333
334/// Applies the given quoting mode to the provided string, only changing it if required.
335///
336/// # Arguments
337///
338/// * `s` - The string to escape.
339/// * `mode` - The quoting mode to use.
340pub fn quote_if_needed(s: &str, mode: QuoteMode) -> Cow<'_, str> {
341    let options = QuoteOptions {
342        always_quote: false,
343        preferred_mode: mode,
344        ..Default::default()
345    };
346
347    quote(s, &options)
348}
349
350fn backslash_escape(s: &str) -> Cow<'_, str> {
351    if s.is_empty() {
352        // An empty string must be represented as '' to be a valid shell word.
353        Cow::Owned("''".to_string())
354    } else if !s.chars().any(needs_escaping) {
355        Cow::Borrowed(s)
356    } else {
357        let mut output = String::with_capacity(s.len());
358        for c in s.chars() {
359            if needs_escaping(c) {
360                output.push('\\');
361            }
362            output.push(c);
363        }
364        Cow::Owned(output)
365    }
366}
367
368fn single_quote(s: &str) -> Cow<'_, str> {
369    // Special-case the empty string.
370    if s.is_empty() {
371        return Cow::Borrowed("''");
372    }
373
374    let mut result = String::with_capacity(s.len());
375
376    // Go through the string; put everything in single quotes except for
377    // the single quote character itself. It will get escaped outside
378    // all quoting.
379    let mut first = true;
380    for part in s.split('\'') {
381        if !first {
382            result.push('\\');
383            result.push('\'');
384        } else {
385            first = false;
386        }
387
388        if !part.is_empty() {
389            result.push('\'');
390            result.push_str(part);
391            result.push('\'');
392        }
393    }
394
395    Cow::Owned(result)
396}
397
398fn double_quote(s: &str) -> String {
399    let mut result = String::with_capacity(s.len());
400
401    result.push('"');
402
403    for c in s.chars() {
404        if matches!(c, '$' | '`' | '"' | '\\') {
405            result.push('\\');
406        }
407
408        result.push(c);
409    }
410
411    result.push('"');
412
413    result
414}
415
416fn ansi_c_quote(s: &str) -> String {
417    let mut result = String::with_capacity(s.len());
418    result.push_str("$'");
419
420    for c in s.chars() {
421        match c {
422            '\x07' => result.push_str("\\a"),
423            '\x08' => result.push_str("\\b"),
424            '\x1b' => result.push_str("\\E"),
425            '\x0c' => result.push_str("\\f"),
426            '\n' => result.push_str("\\n"),
427            '\r' => result.push_str("\\r"),
428            '\t' => result.push_str("\\t"),
429            '\x0b' => result.push_str("\\v"),
430            '\\' => result.push_str("\\\\"),
431            '\'' => result.push_str("\\'"),
432            c if needs_ansi_c_quoting(c) => {
433                result.push_str(std::format!("\\{:03o}", c as u8).as_str());
434            }
435            _ => result.push(c),
436        }
437    }
438
439    result.push('\'');
440
441    result
442}
443
444// Returns whether or not the given character needs to be escaped (or quoted) if outside
445// quotes.
446const fn needs_escaping(c: char) -> bool {
447    matches!(
448        c,
449        '(' | ')'
450            | '['
451            | ']'
452            | '{'
453            | '}'
454            | '$'
455            | '*'
456            | '?'
457            | '|'
458            | '&'
459            | ';'
460            | '<'
461            | '>'
462            | '`'
463            | '\\'
464            | '"'
465            | '!'
466            | '^'
467            | ','
468            | ' '
469            | '\''
470    )
471}
472
473const fn needs_ansi_c_quoting(c: char) -> bool {
474    c.is_ascii_control()
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480
481    #[test]
482    fn test_backslash_escape() {
483        assert_eq!(quote_if_needed("a", QuoteMode::BackslashEscape), "a");
484        assert_eq!(quote_if_needed("a b", QuoteMode::BackslashEscape), r"a\ b");
485        assert_eq!(quote_if_needed("", QuoteMode::BackslashEscape), "''");
486    }
487
488    #[test]
489    fn test_single_quote_escape() {
490        assert_eq!(quote_if_needed("a", QuoteMode::SingleQuote), "a");
491        assert_eq!(quote_if_needed("a b", QuoteMode::SingleQuote), "'a b'");
492        assert_eq!(quote_if_needed("", QuoteMode::SingleQuote), "''");
493        assert_eq!(quote_if_needed("'", QuoteMode::SingleQuote), "\\'");
494    }
495
496    fn assert_echo_expands_to(unexpanded: &str, expected: &str) {
497        assert_eq!(
498            String::from_utf8(
499                expand_backslash_escapes(unexpanded, EscapeExpansionMode::EchoBuiltin)
500                    .unwrap()
501                    .0
502            )
503            .unwrap(),
504            expected
505        );
506    }
507
508    #[test]
509    fn test_echo_expansion() {
510        assert_echo_expands_to("a", "a");
511        assert_echo_expands_to(r"\M", "\\M");
512        assert_echo_expands_to(r"a\nb", "a\nb");
513        assert_echo_expands_to(r"\a", "\x07");
514        assert_echo_expands_to(r"\b", "\x08");
515        assert_echo_expands_to(r"\e", "\x1b");
516        assert_echo_expands_to(r"\f", "\x0c");
517        assert_echo_expands_to(r"\n", "\n");
518        assert_echo_expands_to(r"\r", "\r");
519        assert_echo_expands_to(r"\t", "\t");
520        assert_echo_expands_to(r"\v", "\x0b");
521        assert_echo_expands_to(r"\\", "\\");
522        assert_echo_expands_to(r"\'", "\\'");
523        assert_echo_expands_to(r#"\""#, r#"\""#);
524        assert_echo_expands_to(r"\?", "\\?");
525        assert_echo_expands_to(r"\0", "\0");
526        assert_echo_expands_to(r"\00", "\0");
527        assert_echo_expands_to(r"\000", "\0");
528        assert_echo_expands_to(r"\081", "\081");
529        assert_echo_expands_to(r"\0101", "A");
530        assert_echo_expands_to(r"abc\", "abc\\");
531        assert_echo_expands_to(r"\x41", "A");
532        assert_echo_expands_to(r"\xf0\x9f\x90\x8d", "🐍");
533        assert_echo_expands_to(r"\u2620", "☠");
534        assert_echo_expands_to(r"\U0001f602", "😂");
535    }
536}