escape_bytes/
unescape.rs

1use core::borrow::Borrow;
2
3/// Unescape the bytes previously escaped.
4///
5/// See [crate] for the exact rules.
6///
7/// ## Errors
8///
9/// When encountering unexpected byte sequences.
10///
11/// ## Example
12///
13/// ```rust
14/// # fn main() -> Result<(), escape_bytes::UnescapeError> {
15/// let escaped = br"hello\xc3world";
16/// let unescaped = escape_bytes::unescape(escaped)?;
17/// assert_eq!(unescaped, b"hello\xc3world");
18/// # Ok(())
19/// # }
20/// ```
21#[cfg(feature = "alloc")]
22#[cfg_attr(feature = "doc", doc(cfg(feature = "alloc")))]
23pub fn unescape<I>(i: I) -> Result<alloc::vec::Vec<u8>, UnescapeError>
24where
25    I: IntoIterator,
26    I::Item: Borrow<u8>,
27{
28    let mut escaped = alloc::vec::Vec::<u8>::new();
29    for b in Unescape::new(i) {
30        let b = b?;
31        escaped.push(b);
32    }
33    Ok(escaped)
34}
35
36/// Escape into error occurs when escaping into a slice cannot continue.
37#[derive(Debug, PartialEq, Eq)]
38pub enum UnescapeIntoError {
39    /// Writing into the slice would write to a position that is out-of-bounds.
40    OutOfBounds,
41    /// Occurs when encountering unexpected byte sequences.
42    Unescape(UnescapeError),
43}
44
45/// Unescape the bytes into the slice.
46///
47/// See [crate] for the exact rules.
48///
49/// Returns the number of bytes written to the slice.
50///
51/// ## Errors
52///
53/// If the slice is not large enough to receive the escaped value. No
54/// information is provided to support continuing escaping into a new buffer
55/// from where it stops. Use the [`Unescape`] iterator directly if that is needed.
56///
57/// When encountering unexpected byte sequences.
58pub fn unescape_into<I>(out: &mut [u8], i: I) -> Result<usize, UnescapeIntoError>
59where
60    I: IntoIterator,
61    I::Item: Borrow<u8>,
62{
63    let mut count = 0usize;
64    for (idx, b) in Unescape::new(i).enumerate() {
65        let b = b.map_err(UnescapeIntoError::Unescape)?;
66        let Some(v) = out.get_mut(idx) else {
67            return Err(UnescapeIntoError::OutOfBounds);
68        };
69        *v = b;
70        count += 1;
71    }
72    Ok(count)
73}
74
75/// Returns the unescaped length of the input.
76///
77/// ## Errors
78///
79/// When encountering unexpected byte sequences.
80pub fn unescaped_len<I>(i: I) -> Result<usize, UnescapeError>
81where
82    I: IntoIterator,
83    I::Item: Borrow<u8>,
84{
85    Unescape::new(i).try_fold(0usize, |sum, result| {
86        result?;
87        Ok(sum + 1)
88    })
89}
90
91/// Iterator that unescapes the input iterator.
92///
93/// See [crate] for the exact rules.
94///
95/// Use [`unescape`] or [`unescape_into`].
96#[derive(Debug)]
97pub struct Unescape<I>
98where
99    I: IntoIterator,
100{
101    input: I::IntoIter,
102}
103
104impl<I> Clone for Unescape<I>
105where
106    I: IntoIterator,
107    I::IntoIter: Clone,
108{
109    fn clone(&self) -> Self {
110        Self {
111            input: self.input.clone(),
112        }
113    }
114}
115
116impl<I> Unescape<I>
117where
118    I: IntoIterator,
119{
120    pub fn new(i: I) -> Self {
121        Self {
122            input: i.into_iter(),
123        }
124    }
125}
126
127/// Unescape error occurs when encountering unexpected byte sequences.
128#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
129pub enum UnescapeError {
130    /// An escape backslash character (`\`) was followed by a character that was
131    /// not a `\`, `0`, `t`, `r`, `n`, or `x`.
132    InvalidEscape,
133    /// An escape backslash character and x indicating a hex escape (`\x`) were
134    /// followed by a character that was not a valid hex character
135    /// (0123456789abcdef).
136    InvalidHexHi,
137    /// An escape backslash character, x indicating a hex escape, and the hi
138    /// nibble (`\xN`) were followed by a character that was not a valid hex
139    /// character (0123456789abcdef).
140    InvalidHexLo,
141}
142
143impl<I> Iterator for Unescape<I>
144where
145    I: IntoIterator,
146    I::Item: Borrow<u8>,
147{
148    type Item = Result<u8, UnescapeError>;
149
150    /// Returns the next unescaped byte.
151    fn next(&mut self) -> Option<Self::Item> {
152        enum Next {
153            New,
154            Escape,
155            EscapeHexHi,
156            EscapeHexLo(u8),
157        }
158        let mut state = Next::New;
159        loop {
160            let Some(b) = self.input.next() else {
161                return match state {
162                    Next::New => None,
163                    Next::Escape => Some(Err(UnescapeError::InvalidEscape)),
164                    Next::EscapeHexHi => Some(Err(UnescapeError::InvalidHexHi)),
165                    Next::EscapeHexLo(_) => Some(Err(UnescapeError::InvalidHexLo)),
166                };
167            };
168            let b = *b.borrow();
169            match state {
170                Next::New => match b {
171                    b'\\' => state = Next::Escape,
172                    _ => return Some(Ok(b)),
173                },
174                Next::Escape => match b {
175                    b'0' => return Some(Ok(b'\0')),
176                    b't' => return Some(Ok(b'\t')),
177                    b'n' => return Some(Ok(b'\n')),
178                    b'r' => return Some(Ok(b'\r')),
179                    b'\\' => return Some(Ok(b'\\')),
180                    b'x' => state = Next::EscapeHexHi,
181                    _ => return Some(Err(UnescapeError::InvalidEscape)),
182                },
183                Next::EscapeHexHi => {
184                    let Some(hi) = HEX_ALPHABET_REVERSE_MAP[b as usize] else {
185                        return Some(Err(UnescapeError::InvalidHexHi));
186                    };
187                    state = Next::EscapeHexLo(hi);
188                }
189                Next::EscapeHexLo(hi) => {
190                    let Some(lo) = HEX_ALPHABET_REVERSE_MAP[b as usize] else {
191                        return Some(Err(UnescapeError::InvalidHexLo));
192                    };
193                    return Some(Ok(hi << 4 | lo));
194                }
195            }
196        }
197    }
198
199    fn size_hint(&self) -> (usize, Option<usize>) {
200        let input_hint = self.input.size_hint();
201        (input_hint.0 / 4, input_hint.1)
202    }
203}
204
205#[rustfmt::skip]
206const HEX_ALPHABET_REVERSE_MAP: [Option<u8>; 256] = [
207    None,None,None,None,None,None,None,None,
208    None,None,None,None,None,None,None,None,
209    None,None,None,None,None,None,None,None,
210    None,None,None,None,None,None,None,None,
211    None,None,None,None,None,None,None,None,
212    None,None,None,None,None,None,None,None,
213    // 0..=9
214    Some(0x0), Some(0x1),Some(0x2),Some(0x3),Some(0x4),Some(0x5),Some(0x6),Some(0x7),Some(0x8),Some(0x9),
215    // :..=@
216    None,None,None,None,None,None,None,
217    // A..=F
218    Some(0xA), Some(0xB),Some(0xC),Some(0xD),Some(0xE),Some(0xF),
219    // G..=Z
220    None,None,None,None,None,None,None,None,
221    None,None,None,None,None,None,None,None,
222    None,None,None,None,
223    // [..=`
224    None,None,None,None,None,None,
225    // a..=f
226    Some(0xa), Some(0xb),Some(0xc),Some(0xd),Some(0xe),Some(0xf),
227    // g..=z
228    None,None,None,None,None,None,None,None,
229    None,None,None,None,None,None,None,None,
230    None,None,None,None,
231    // {..=DEL
232    None,None,None,None,None,
233    // 0x128..
234    None,None,None,None,None,None,None,None,
235    None,None,None,None,None,None,None,None,
236    None,None,None,None,None,None,None,None,
237    None,None,None,None,None,None,None,None,
238    None,None,None,None,None,None,None,None,
239    None,None,None,None,None,None,None,None,
240    None,None,None,None,None,None,None,None,
241    None,None,None,None,None,None,None,None,
242    None,None,None,None,None,None,None,None,
243    None,None,None,None,None,None,None,None,
244    None,None,None,None,None,None,None,None,
245    None,None,None,None,None,None,None,None,
246    None,None,None,None,None,None,None,None,
247    None,None,None,None,None,None,None,None,
248    None,None,None,None,None,None,None,None,
249    None,None,None,None,None,None,None,None,
250];
251
252#[cfg(test)]
253mod test {}