brush_core/
escape.rs

1//! String escaping utilities
2
3use std::borrow::Cow;
4
5use itertools::Itertools;
6
7use crate::error;
8
9/// Escape expansion mode.
10#[derive(Clone, Copy)]
11pub enum EscapeExpansionMode {
12    /// echo builtin mode.
13    EchoBuiltin,
14    /// ANSI-C quotes.
15    AnsiCQuotes,
16}
17
18/// Expands backslash escapes in the provided string.
19///
20/// # Arguments
21///
22/// * `s` - The string to expand.
23/// * `mode` - The mode to use for expansion.
24#[expect(clippy::too_many_lines)]
25pub fn expand_backslash_escapes(
26    s: &str,
27    mode: EscapeExpansionMode,
28) -> Result<(Vec<u8>, bool), error::Error> {
29    let mut result: Vec<u8> = vec![];
30    let mut it = s.chars();
31    while let Some(c) = it.next() {
32        if c != '\\' {
33            // Not a backslash, add and move on.
34            result.append(c.to_string().into_bytes().as_mut());
35            continue;
36        }
37
38        match it.next() {
39            Some('a') => result.push(b'\x07'),
40            Some('b') => result.push(b'\x08'),
41            Some('c') => {
42                match mode {
43                    EscapeExpansionMode::EchoBuiltin => {
44                        // Stop all additional output!
45                        return Ok((result, false));
46                    }
47                    EscapeExpansionMode::AnsiCQuotes => {
48                        if let Some(_next_next) = it.next() {
49                            return error::unimp("control character in ANSI C quotes");
50                        } else {
51                            result.push(b'\\');
52                            result.push(b'c');
53                        }
54                    }
55                }
56            }
57            Some('e' | 'E') => result.push(b'\x1b'),
58            Some('f') => result.push(b'\x0c'),
59            Some('n') => result.push(b'\n'),
60            Some('r') => result.push(b'\r'),
61            Some('t') => result.push(b'\t'),
62            Some('v') => result.push(b'\x0b'),
63            Some('\\') => result.push(b'\\'),
64            Some('\'') if matches!(mode, EscapeExpansionMode::AnsiCQuotes) => result.push(b'\''),
65            Some('\"') if matches!(mode, EscapeExpansionMode::AnsiCQuotes) => result.push(b'\"'),
66            Some('?') if matches!(mode, EscapeExpansionMode::AnsiCQuotes) => result.push(b'?'),
67            Some('0') => {
68                // Consume 0-3 valid octal chars
69                let mut taken_so_far = 0;
70                let mut octal_chars: String = it
71                    .take_while_ref(|c| {
72                        if taken_so_far < 3 && matches!(*c, '0'..='7') {
73                            taken_so_far += 1;
74                            true
75                        } else {
76                            false
77                        }
78                    })
79                    .collect();
80
81                if octal_chars.is_empty() {
82                    octal_chars.push('0');
83                }
84
85                let value = u8::from_str_radix(octal_chars.as_str(), 8)?;
86                result.push(value);
87            }
88            Some('x') => {
89                // Consume 1-2 valid hex chars
90                let mut taken_so_far = 0;
91                let hex_chars: String = it
92                    .take_while_ref(|c| {
93                        if taken_so_far < 2 && c.is_ascii_hexdigit() {
94                            taken_so_far += 1;
95                            true
96                        } else {
97                            false
98                        }
99                    })
100                    .collect();
101
102                if hex_chars.is_empty() {
103                    result.push(b'\\');
104                    result.append(c.to_string().into_bytes().as_mut());
105                } else {
106                    let value = u8::from_str_radix(hex_chars.as_str(), 16)?;
107                    result.push(value);
108                }
109            }
110            Some('u') => {
111                // Consume 1-4 hex digits
112                let mut taken_so_far = 0;
113                let hex_chars: String = it
114                    .take_while_ref(|c| {
115                        if taken_so_far < 4 && c.is_ascii_hexdigit() {
116                            taken_so_far += 1;
117                            true
118                        } else {
119                            false
120                        }
121                    })
122                    .collect();
123
124                if hex_chars.is_empty() {
125                    result.push(b'\\');
126                    result.append(c.to_string().into_bytes().as_mut());
127                } else {
128                    let value = u16::from_str_radix(hex_chars.as_str(), 16)?;
129
130                    if let Some(decoded) = char::from_u32(u32::from(value)) {
131                        result.append(decoded.to_string().into_bytes().as_mut());
132                    } else {
133                        result.push(b'\\');
134                        result.append(c.to_string().into_bytes().as_mut());
135                    }
136                }
137            }
138            Some('U') => {
139                // Consume 1-8 hex digits
140                let mut taken_so_far = 0;
141                let hex_chars: String = it
142                    .take_while_ref(|c| {
143                        if taken_so_far < 8 && c.is_ascii_hexdigit() {
144                            taken_so_far += 1;
145                            true
146                        } else {
147                            false
148                        }
149                    })
150                    .collect();
151
152                if hex_chars.is_empty() {
153                    result.push(b'\\');
154                    result.append(c.to_string().into_bytes().as_mut());
155                } else {
156                    let value = u32::from_str_radix(hex_chars.as_str(), 16)?;
157
158                    if let Some(decoded) = char::from_u32(value) {
159                        result.append(decoded.to_string().into_bytes().as_mut());
160                    } else {
161                        result.push(b'\\');
162                        result.append(c.to_string().into_bytes().as_mut());
163                    }
164                }
165            }
166            Some(c) => {
167                // Not a valid escape sequence.
168                result.push(b'\\');
169                result.append(c.to_string().into_bytes().as_mut());
170            }
171            None => {
172                // Trailing backslash.
173                result.push(b'\\');
174            }
175        }
176    }
177
178    Ok((result, true))
179}
180
181/// Quoting mode to use for escaping.
182#[derive(Clone, Copy, Default)]
183pub enum QuoteMode {
184    /// Single-quote.
185    #[default]
186    SingleQuote,
187    /// Double-quote.
188    DoubleQuote,
189    /// Backslash-escape.
190    BackslashEscape,
191}
192
193/// Options influencing how to escape/quote an input string.
194#[derive(Default)]
195pub(crate) struct QuoteOptions {
196    /// Whether or not to *always* escape or quote the input; if false, then escaping/quoting
197    /// will only be applied if the input contains characters that *require* it.
198    pub always_quote: bool,
199    /// Preferred mode for quoting/escaping. Quoting may be "upgraded" to a more expressive
200    /// format if the input is not expressible otherwise.
201    pub preferred_mode: QuoteMode,
202    /// Whether or not to *avoid* using ANSI C quoting just for the benefit of newline characters.
203    /// Default is for newline characters to require upgrading the string's quoting to
204    /// ANSI C quoting.
205    pub avoid_ansi_c_quoting_newline: bool,
206}
207
208pub(crate) fn quote<'a>(s: &'a str, options: &QuoteOptions) -> Cow<'a, str> {
209    let use_ansi_c_quotes = s.contains(|c| {
210        needs_ansi_c_quoting(c) && (!options.avoid_ansi_c_quoting_newline || c != '\n')
211    });
212
213    if use_ansi_c_quotes {
214        return ansi_c_quote(s).into();
215    }
216
217    let use_default_quotes =
218        !use_ansi_c_quotes && (options.always_quote || s.is_empty() || s.contains(needs_escaping));
219
220    if !use_default_quotes {
221        return s.into();
222    }
223
224    match options.preferred_mode {
225        QuoteMode::BackslashEscape => backslash_escape(s).into(),
226        QuoteMode::SingleQuote => single_quote(s).into(),
227        QuoteMode::DoubleQuote => double_quote(s).into(),
228    }
229}
230
231/// Escape the given string, forcing quoting.
232///
233/// # Arguments
234///
235/// * `s` - The string to escape.
236/// * `mode` - The quoting mode to use.
237pub fn force_quote(s: &str, mode: QuoteMode) -> String {
238    let options = QuoteOptions {
239        always_quote: true,
240        preferred_mode: mode,
241        ..Default::default()
242    };
243
244    quote(s, &options).to_string()
245}
246
247/// Applies the given quoting mode to the provided string, only changing it if required.
248///
249/// # Arguments
250///
251/// * `s` - The string to escape.
252/// * `mode` - The quoting mode to use.
253pub fn quote_if_needed(s: &str, mode: QuoteMode) -> Cow<'_, str> {
254    let options = QuoteOptions {
255        always_quote: false,
256        preferred_mode: mode,
257        ..Default::default()
258    };
259
260    quote(s, &options)
261}
262
263fn backslash_escape(s: &str) -> String {
264    let mut output = String::new();
265
266    // TODO: Handle other interesting sequences.
267    for c in s.chars() {
268        match c {
269            c if needs_escaping(c) => {
270                output.push('\\');
271                output.push(c);
272            }
273            c => output.push(c),
274        }
275    }
276
277    output
278}
279
280fn single_quote(s: &str) -> String {
281    // Special-case the empty string.
282    if s.is_empty() {
283        return "''".into();
284    }
285
286    let mut result = String::new();
287
288    // Go through the string; put everything in single quotes except for
289    // the single quote character itself. It will get escaped outside
290    // all quoting.
291    let mut first = true;
292    for part in s.split('\'') {
293        if !first {
294            result.push('\\');
295            result.push('\'');
296        } else {
297            first = false;
298        }
299
300        if !part.is_empty() {
301            result.push('\'');
302            result.push_str(part);
303            result.push('\'');
304        }
305    }
306
307    result
308}
309
310fn double_quote(s: &str) -> String {
311    let mut result = String::new();
312
313    result.push('"');
314
315    for c in s.chars() {
316        if matches!(c, '$' | '`' | '"' | '\\') {
317            result.push('\\');
318        }
319
320        result.push(c);
321    }
322
323    result.push('"');
324
325    result
326}
327
328fn ansi_c_quote(s: &str) -> String {
329    let mut result = String::new();
330
331    result.push_str("$'");
332
333    for c in s.chars() {
334        match c {
335            '\x07' => result.push_str("\\a"),
336            '\x08' => result.push_str("\\b"),
337            '\x1b' => result.push_str("\\E"),
338            '\x0c' => result.push_str("\\f"),
339            '\n' => result.push_str("\\n"),
340            '\r' => result.push_str("\\r"),
341            '\t' => result.push_str("\\t"),
342            '\x0b' => result.push_str("\\v"),
343            '\\' => result.push_str("\\\\"),
344            '\'' => result.push_str("\\'"),
345            c if needs_ansi_c_quoting(c) => {
346                result.push_str(std::format!("\\{:03o}", c as u8).as_str());
347            }
348            _ => result.push(c),
349        }
350    }
351
352    result.push('\'');
353
354    result
355}
356
357// Returns whether or not the given character needs to be escaped (or quoted) if outside
358// quotes.
359const fn needs_escaping(c: char) -> bool {
360    matches!(
361        c,
362        '(' | ')'
363            | '['
364            | ']'
365            | '{'
366            | '}'
367            | '$'
368            | '*'
369            | '?'
370            | '|'
371            | '&'
372            | ';'
373            | '<'
374            | '>'
375            | '`'
376            | '\\'
377            | '"'
378            | '!'
379            | '^'
380            | ','
381            | ' '
382            | '\''
383    )
384}
385
386const fn needs_ansi_c_quoting(c: char) -> bool {
387    c.is_ascii_control()
388}
389
390#[cfg(test)]
391mod tests {
392    use super::*;
393
394    #[test]
395    fn test_backslash_escape() {
396        assert_eq!(quote_if_needed("a", QuoteMode::BackslashEscape), "a");
397        assert_eq!(quote_if_needed("a b", QuoteMode::BackslashEscape), r"a\ b");
398        assert_eq!(quote_if_needed("", QuoteMode::BackslashEscape), "");
399    }
400
401    #[test]
402    fn test_single_quote_escape() {
403        assert_eq!(quote_if_needed("a", QuoteMode::SingleQuote), "a");
404        assert_eq!(quote_if_needed("a b", QuoteMode::SingleQuote), "'a b'");
405        assert_eq!(quote_if_needed("", QuoteMode::SingleQuote), "''");
406        assert_eq!(quote_if_needed("'", QuoteMode::SingleQuote), "\\'");
407    }
408
409    fn assert_echo_expands_to(unexpanded: &str, expected: &str) {
410        assert_eq!(
411            String::from_utf8(
412                expand_backslash_escapes(unexpanded, EscapeExpansionMode::EchoBuiltin)
413                    .unwrap()
414                    .0
415            )
416            .unwrap(),
417            expected
418        );
419    }
420
421    #[test]
422    fn test_echo_expansion() {
423        assert_echo_expands_to("a", "a");
424        assert_echo_expands_to(r"\M", "\\M");
425        assert_echo_expands_to(r"a\nb", "a\nb");
426        assert_echo_expands_to(r"\a", "\x07");
427        assert_echo_expands_to(r"\b", "\x08");
428        assert_echo_expands_to(r"\e", "\x1b");
429        assert_echo_expands_to(r"\f", "\x0c");
430        assert_echo_expands_to(r"\n", "\n");
431        assert_echo_expands_to(r"\r", "\r");
432        assert_echo_expands_to(r"\t", "\t");
433        assert_echo_expands_to(r"\v", "\x0b");
434        assert_echo_expands_to(r"\\", "\\");
435        assert_echo_expands_to(r"\'", "\\'");
436        assert_echo_expands_to(r#"\""#, r#"\""#);
437        assert_echo_expands_to(r"\?", "\\?");
438        assert_echo_expands_to(r"\0", "\0");
439        assert_echo_expands_to(r"\00", "\0");
440        assert_echo_expands_to(r"\000", "\0");
441        assert_echo_expands_to(r"\081", "\081");
442        assert_echo_expands_to(r"\0101", "A");
443        assert_echo_expands_to(r"abc\", "abc\\");
444        assert_echo_expands_to(r"\x41", "A");
445        assert_echo_expands_to(r"\xf0\x9f\x90\x8d", "🐍");
446        assert_echo_expands_to(r"\u2620", "☠");
447        assert_echo_expands_to(r"\U0001f602", "😂");
448    }
449}