grep_cli/
escape.rs

1use std::ffi::OsStr;
2
3use bstr::{ByteSlice, ByteVec};
4
5/// Escapes arbitrary bytes into a human readable string.
6///
7/// This converts `\t`, `\r` and `\n` into their escaped forms. It also
8/// converts the non-printable subset of ASCII in addition to invalid UTF-8
9/// bytes to hexadecimal escape sequences. Everything else is left as is.
10///
11/// The dual of this routine is [`unescape`].
12///
13/// # Example
14///
15/// This example shows how to convert a byte string that contains a `\n` and
16/// invalid UTF-8 bytes into a `String`.
17///
18/// Pay special attention to the use of raw strings. That is, `r"\n"` is
19/// equivalent to `"\\n"`.
20///
21/// ```
22/// use grep_cli::escape;
23///
24/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
25/// ```
26pub fn escape(bytes: &[u8]) -> String {
27    bytes.escape_bytes().to_string()
28}
29
30/// Escapes an OS string into a human readable string.
31///
32/// This is like [`escape`], but accepts an OS string.
33pub fn escape_os(string: &OsStr) -> String {
34    escape(Vec::from_os_str_lossy(string).as_bytes())
35}
36
37/// Unescapes a string.
38///
39/// It supports a limited set of escape sequences:
40///
41/// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes.
42/// * `\xZZ` hexadecimal escapes are mapped to their byte.
43///
44/// Everything else is left as is, including non-hexadecimal escapes like
45/// `\xGG`.
46///
47/// This is useful when it is desirable for a command line argument to be
48/// capable of specifying arbitrary bytes or otherwise make it easier to
49/// specify non-printable characters.
50///
51/// The dual of this routine is [`escape`].
52///
53/// # Example
54///
55/// This example shows how to convert an escaped string (which is valid UTF-8)
56/// into a corresponding sequence of bytes. Each escape sequence is mapped to
57/// its bytes, which may include invalid UTF-8.
58///
59/// Pay special attention to the use of raw strings. That is, `r"\n"` is
60/// equivalent to `"\\n"`.
61///
62/// ```
63/// use grep_cli::unescape;
64///
65/// assert_eq!(&b"foo\nbar\xFFbaz"[..], &*unescape(r"foo\nbar\xFFbaz"));
66/// ```
67pub fn unescape(s: &str) -> Vec<u8> {
68    Vec::unescape_bytes(s)
69}
70
71/// Unescapes an OS string.
72///
73/// This is like [`unescape`], but accepts an OS string.
74///
75/// Note that this first lossily decodes the given OS string as UTF-8. That
76/// is, an escaped string (the thing given) should be valid UTF-8.
77pub fn unescape_os(string: &OsStr) -> Vec<u8> {
78    unescape(&string.to_string_lossy())
79}
80
81#[cfg(test)]
82mod tests {
83    use super::{escape, unescape};
84
85    fn b(bytes: &'static [u8]) -> Vec<u8> {
86        bytes.to_vec()
87    }
88
89    #[test]
90    fn empty() {
91        assert_eq!(b(b""), unescape(r""));
92        assert_eq!(r"", escape(b""));
93    }
94
95    #[test]
96    fn backslash() {
97        assert_eq!(b(b"\\"), unescape(r"\\"));
98        assert_eq!(r"\\", escape(b"\\"));
99    }
100
101    #[test]
102    fn nul() {
103        assert_eq!(b(b"\x00"), unescape(r"\x00"));
104        assert_eq!(b(b"\x00"), unescape(r"\0"));
105        assert_eq!(r"\0", escape(b"\x00"));
106    }
107
108    #[test]
109    fn nl() {
110        assert_eq!(b(b"\n"), unescape(r"\n"));
111        assert_eq!(r"\n", escape(b"\n"));
112    }
113
114    #[test]
115    fn tab() {
116        assert_eq!(b(b"\t"), unescape(r"\t"));
117        assert_eq!(r"\t", escape(b"\t"));
118    }
119
120    #[test]
121    fn carriage() {
122        assert_eq!(b(b"\r"), unescape(r"\r"));
123        assert_eq!(r"\r", escape(b"\r"));
124    }
125
126    #[test]
127    fn nothing_simple() {
128        assert_eq!(b(b"\\a"), unescape(r"\a"));
129        assert_eq!(b(b"\\a"), unescape(r"\\a"));
130        assert_eq!(r"\\a", escape(b"\\a"));
131    }
132
133    #[test]
134    fn nothing_hex0() {
135        assert_eq!(b(b"\\x"), unescape(r"\x"));
136        assert_eq!(b(b"\\x"), unescape(r"\\x"));
137        assert_eq!(r"\\x", escape(b"\\x"));
138    }
139
140    #[test]
141    fn nothing_hex1() {
142        assert_eq!(b(b"\\xz"), unescape(r"\xz"));
143        assert_eq!(b(b"\\xz"), unescape(r"\\xz"));
144        assert_eq!(r"\\xz", escape(b"\\xz"));
145    }
146
147    #[test]
148    fn nothing_hex2() {
149        assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
150        assert_eq!(b(b"\\xzz"), unescape(r"\\xzz"));
151        assert_eq!(r"\\xzz", escape(b"\\xzz"));
152    }
153
154    #[test]
155    fn invalid_utf8() {
156        assert_eq!(r"\xFF", escape(b"\xFF"));
157        assert_eq!(r"a\xFFb", escape(b"a\xFFb"));
158    }
159}