grep_cli/escape.rs
1use std::ffi::OsStr;
2
3use bstr::{ByteSlice, ByteVec};
4
5/// Escapes arbitrary bytes into a human readable string.
6///
7/// This converts `\t`, `\r` and `\n` into their escaped forms. It also
8/// converts the non-printable subset of ASCII in addition to invalid UTF-8
9/// bytes to hexadecimal escape sequences. Everything else is left as is.
10///
11/// The dual of this routine is [`unescape`].
12///
13/// # Example
14///
15/// This example shows how to convert a byte string that contains a `\n` and
16/// invalid UTF-8 bytes into a `String`.
17///
18/// Pay special attention to the use of raw strings. That is, `r"\n"` is
19/// equivalent to `"\\n"`.
20///
21/// ```
22/// use grep_cli::escape;
23///
24/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
25/// ```
26pub fn escape(bytes: &[u8]) -> String {
27 bytes.escape_bytes().to_string()
28}
29
30/// Escapes an OS string into a human readable string.
31///
32/// This is like [`escape`], but accepts an OS string.
33pub fn escape_os(string: &OsStr) -> String {
34 escape(Vec::from_os_str_lossy(string).as_bytes())
35}
36
37/// Unescapes a string.
38///
39/// It supports a limited set of escape sequences:
40///
41/// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes.
42/// * `\xZZ` hexadecimal escapes are mapped to their byte.
43///
44/// Everything else is left as is, including non-hexadecimal escapes like
45/// `\xGG`.
46///
47/// This is useful when it is desirable for a command line argument to be
48/// capable of specifying arbitrary bytes or otherwise make it easier to
49/// specify non-printable characters.
50///
51/// The dual of this routine is [`escape`].
52///
53/// # Example
54///
55/// This example shows how to convert an escaped string (which is valid UTF-8)
56/// into a corresponding sequence of bytes. Each escape sequence is mapped to
57/// its bytes, which may include invalid UTF-8.
58///
59/// Pay special attention to the use of raw strings. That is, `r"\n"` is
60/// equivalent to `"\\n"`.
61///
62/// ```
63/// use grep_cli::unescape;
64///
65/// assert_eq!(&b"foo\nbar\xFFbaz"[..], &*unescape(r"foo\nbar\xFFbaz"));
66/// ```
67pub fn unescape(s: &str) -> Vec<u8> {
68 Vec::unescape_bytes(s)
69}
70
71/// Unescapes an OS string.
72///
73/// This is like [`unescape`], but accepts an OS string.
74///
75/// Note that this first lossily decodes the given OS string as UTF-8. That
76/// is, an escaped string (the thing given) should be valid UTF-8.
77pub fn unescape_os(string: &OsStr) -> Vec<u8> {
78 unescape(&string.to_string_lossy())
79}
80
81#[cfg(test)]
82mod tests {
83 use super::{escape, unescape};
84
85 fn b(bytes: &'static [u8]) -> Vec<u8> {
86 bytes.to_vec()
87 }
88
89 #[test]
90 fn empty() {
91 assert_eq!(b(b""), unescape(r""));
92 assert_eq!(r"", escape(b""));
93 }
94
95 #[test]
96 fn backslash() {
97 assert_eq!(b(b"\\"), unescape(r"\\"));
98 assert_eq!(r"\\", escape(b"\\"));
99 }
100
101 #[test]
102 fn nul() {
103 assert_eq!(b(b"\x00"), unescape(r"\x00"));
104 assert_eq!(b(b"\x00"), unescape(r"\0"));
105 assert_eq!(r"\0", escape(b"\x00"));
106 }
107
108 #[test]
109 fn nl() {
110 assert_eq!(b(b"\n"), unescape(r"\n"));
111 assert_eq!(r"\n", escape(b"\n"));
112 }
113
114 #[test]
115 fn tab() {
116 assert_eq!(b(b"\t"), unescape(r"\t"));
117 assert_eq!(r"\t", escape(b"\t"));
118 }
119
120 #[test]
121 fn carriage() {
122 assert_eq!(b(b"\r"), unescape(r"\r"));
123 assert_eq!(r"\r", escape(b"\r"));
124 }
125
126 #[test]
127 fn nothing_simple() {
128 assert_eq!(b(b"\\a"), unescape(r"\a"));
129 assert_eq!(b(b"\\a"), unescape(r"\\a"));
130 assert_eq!(r"\\a", escape(b"\\a"));
131 }
132
133 #[test]
134 fn nothing_hex0() {
135 assert_eq!(b(b"\\x"), unescape(r"\x"));
136 assert_eq!(b(b"\\x"), unescape(r"\\x"));
137 assert_eq!(r"\\x", escape(b"\\x"));
138 }
139
140 #[test]
141 fn nothing_hex1() {
142 assert_eq!(b(b"\\xz"), unescape(r"\xz"));
143 assert_eq!(b(b"\\xz"), unescape(r"\\xz"));
144 assert_eq!(r"\\xz", escape(b"\\xz"));
145 }
146
147 #[test]
148 fn nothing_hex2() {
149 assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
150 assert_eq!(b(b"\\xzz"), unescape(r"\\xzz"));
151 assert_eq!(r"\\xzz", escape(b"\\xzz"));
152 }
153
154 #[test]
155 fn invalid_utf8() {
156 assert_eq!(r"\xFF", escape(b"\xFF"));
157 assert_eq!(r"a\xFFb", escape(b"a\xFFb"));
158 }
159}