php_literal_parser/
string.rs

1#[derive(Debug, Clone, Eq, PartialEq)]
2
3/// An error occurred while
4pub struct UnescapeError;
5
6type UnescapeResult<T> = Result<T, UnescapeError>;
7
8// Used to collect output characters and queue u16 values for translation.
9struct UnescapeState {
10    // The accumulated characters
11    out: Vec<u8>,
12}
13
14impl UnescapeState {
15    fn with_capacity(capacity: usize) -> UnescapeState {
16        UnescapeState {
17            out: Vec::with_capacity(capacity),
18        }
19    }
20
21    // Collect a new character
22    fn push_char(&mut self, c: char) {
23        let mut buff = [0; 8];
24        self.out
25            .extend_from_slice(c.encode_utf8(&mut buff).as_bytes());
26    }
27
28    fn push_u8(&mut self, c: u8) {
29        self.out.push(c);
30    }
31
32    fn push_raw(&mut self, c: u32) -> UnescapeResult<()> {
33        match std::char::from_u32(c) {
34            Some(c) => {
35                self.push_char(c);
36                Ok(())
37            }
38            None => Err(UnescapeError),
39        }
40    }
41
42    fn push_slice(&mut self, slice: &[u8]) {
43        self.out.extend_from_slice(slice);
44    }
45
46    fn finalize(self) -> UnescapeResult<String> {
47        String::from_utf8(self.out).map_err(|_| UnescapeError)
48    }
49}
50
51fn parse_u32(
52    s: &mut PeekableBytes,
53    radix: u32,
54    mut result: u32,
55    max: Option<u8>,
56) -> UnescapeResult<u32> {
57    let mut max = max.unwrap_or(u8::MAX);
58    while let Some(digit) = s.peek().and_then(|digit| (digit as char).to_digit(radix)) {
59        let _ = s.next(); // consume the digit we peeked
60        result = result.checked_mul(radix).ok_or(UnescapeError)?;
61        result = result.checked_add(digit).ok_or(UnescapeError)?;
62        max -= 1;
63        if max == 0 {
64            break;
65        }
66    }
67    Ok(result)
68}
69
70trait EscapedString {
71    fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]>;
72}
73
74struct SingleQuoteString;
75
76impl EscapedString for SingleQuoteString {
77    fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]> {
78        let mut ins = PeekableBytes::new(bytes);
79        let _next = ins.next();
80        debug_assert_eq!(_next, Some(b'\\'));
81        match ins.next() {
82            None => {
83                return Err(UnescapeError);
84            }
85            Some(d) => match d {
86                b'\\' | b'\'' => state.push_u8(d),
87                _ => {
88                    state.push_u8(b'\\');
89                    state.push_u8(d)
90                }
91            },
92        }
93        Ok(ins.as_slice())
94    }
95}
96
97struct DoubleQuoteString;
98
99impl EscapedString for DoubleQuoteString {
100    fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]> {
101        let mut ins = PeekableBytes::new(bytes);
102        let _next = ins.next();
103        debug_assert_eq!(_next, Some(b'\\'));
104        match ins.next() {
105            None => {
106                return Err(UnescapeError);
107            }
108            Some(d) => {
109                match d {
110                    b'$' | b'"' | b'\\' => state.push_u8(d),
111                    b'n' => state.push_u8(b'\n'),   // linefeed
112                    b'r' => state.push_u8(b'\r'),   // carriage return
113                    b't' => state.push_u8(b'\t'),   // tab
114                    b'v' => state.push_u8(b'\x0B'), // vertical tab
115                    b'f' => state.push_u8(b'\x0C'), // form feed
116                    b'x' => {
117                        let val = parse_u32(&mut ins, 16, 0, Some(2))?;
118                        state.push_raw(val)?;
119                    }
120                    b'u' => match ins.next() {
121                        Some(b'{') => {
122                            let val = parse_u32(&mut ins, 16, 0, None)?;
123                            state.push_raw(val)?;
124                            if !matches!(ins.next(), Some(b'}')) {
125                                return Err(UnescapeError);
126                            }
127                        }
128                        Some(d) => {
129                            state.push_u8(b'\\');
130                            state.push_u8(b'u');
131                            state.push_u8(d);
132                        }
133                        None => {
134                            state.push_u8(b'\\');
135                            state.push_u8(d);
136                        }
137                    },
138                    b'0'..=b'7' => {
139                        let val =
140                            parse_u32(&mut ins, 8, (d as char).to_digit(8).unwrap(), Some(3))?;
141                        state.push_raw(val)?;
142                    }
143                    _ => {
144                        state.push_u8(b'\\');
145                        state.push_u8(d)
146                    }
147                }
148            }
149        }
150        Ok(ins.as_slice())
151    }
152}
153
154pub fn parse_string(literal: &str) -> Result<String, UnescapeError> {
155    let inner = &literal[1..(literal.len()) - 1];
156    if literal.bytes().next().unwrap() == b'\'' {
157        unescape::<SingleQuoteString>(inner)
158    } else {
159        unescape::<DoubleQuoteString>(inner)
160    }
161}
162
163fn unescape<S: EscapedString>(s: &str) -> UnescapeResult<String> {
164    let mut state = UnescapeState::with_capacity(s.len());
165    let mut bytes = s.as_bytes();
166    while let Some(escape_index) = memchr::memchr(b'\\', bytes) {
167        state.push_slice(&bytes[0..escape_index]);
168        bytes = &bytes[escape_index..];
169        bytes = S::handle_escape(bytes, &mut state)?;
170    }
171
172    state.push_slice(&bytes[0..]);
173
174    state.finalize()
175}
176
177struct PeekableBytes<'a> {
178    slice: &'a [u8],
179    pos: usize,
180}
181
182impl Iterator for PeekableBytes<'_> {
183    type Item = u8;
184
185    fn next(&mut self) -> Option<Self::Item> {
186        let byte = self.slice.get(self.pos)?;
187        self.pos += 1;
188        Some(*byte)
189    }
190}
191
192impl<'a> PeekableBytes<'a> {
193    pub fn new(slice: &'a [u8]) -> Self {
194        PeekableBytes { slice, pos: 0 }
195    }
196
197    pub fn peek(&self) -> Option<u8> {
198        self.slice.get(self.pos).copied()
199    }
200
201    pub fn as_slice(&self) -> &'a [u8] {
202        &self.slice[self.pos..]
203    }
204}
205
206pub fn is_array_key_numeric(string: &str) -> bool {
207    let mut bytes = string.bytes();
208    if !matches!(
209        (bytes.next(), string.len()),
210        (Some(b'-'), _) | (Some(b'0'..=b'9'), 1) | (Some(b'1'..=b'9'), _)
211    ) {
212        return false;
213    }
214
215    bytes.all(|byte| byte.is_ascii_digit())
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221
222    #[test]
223    fn test_is_numeric() {
224        assert!(is_array_key_numeric("123"));
225        assert!(is_array_key_numeric("-123"));
226        assert!(is_array_key_numeric("0"));
227        assert!(!is_array_key_numeric("0123"));
228        assert!(!is_array_key_numeric("123asd"));
229        assert!(!is_array_key_numeric("+123"));
230    }
231
232    #[test]
233    fn test_unescape_single() {
234        assert_eq!(unescape::<SingleQuoteString>(r#"abc"#), Ok("abc".into()));
235        assert_eq!(
236            unescape::<SingleQuoteString>(r#"ab\nc"#),
237            Ok("ab\\nc".into())
238        );
239        assert_eq!(
240            unescape::<SingleQuoteString>(r#"ab\zc"#),
241            Ok("ab\\zc".into())
242        );
243        assert_eq!(
244            unescape::<SingleQuoteString>(r#" \"abc\" "#),
245            Ok(" \\\"abc\\\" ".into())
246        );
247        assert_eq!(unescape::<SingleQuoteString>(r#"𝄞"#), Ok("𝄞".into()));
248        assert_eq!(unescape::<SingleQuoteString>(r#"\𝄞"#), Ok("\\𝄞".into()));
249        assert_eq!(
250            unescape::<SingleQuoteString>(r#"\xD834\xDD1E"#),
251            Ok("\\xD834\\xDD1E".into())
252        );
253        assert_eq!(
254            unescape::<SingleQuoteString>(r#"\xD834"#),
255            Ok("\\xD834".into())
256        );
257        assert_eq!(
258            unescape::<SingleQuoteString>(r#"\xDD1E"#),
259            Ok("\\xDD1E".into())
260        );
261        assert_eq!(unescape::<SingleQuoteString>("\t"), Ok("\t".into()));
262    }
263
264    #[test]
265    fn test_unescape_double() {
266        assert_eq!(unescape::<DoubleQuoteString>(r#"abc"#), Ok("abc".into()));
267        assert_eq!(
268            unescape::<DoubleQuoteString>(r#"ab\nc"#),
269            Ok("ab\nc".into())
270        );
271        assert_eq!(
272            unescape::<DoubleQuoteString>(r#"ab\zc"#),
273            Ok("ab\\zc".into())
274        );
275        assert_eq!(
276            unescape::<DoubleQuoteString>(r#" \"abc\" "#),
277            Ok(" \"abc\" ".into())
278        );
279        assert_eq!(unescape::<DoubleQuoteString>(r#"𝄞"#), Ok("𝄞".into()));
280        assert_eq!(unescape::<DoubleQuoteString>(r#"\𝄞"#), Ok("\\𝄞".into()));
281        assert_eq!(
282            unescape::<DoubleQuoteString>(r#"\u{1D11E}"#),
283            Ok("𝄞".into())
284        );
285        assert_eq!(
286            unescape::<DoubleQuoteString>(r#"\xD834"#),
287            Ok("\u{D8}34".into())
288        );
289        assert_eq!(
290            unescape::<DoubleQuoteString>(r#"\xDD1E"#),
291            Ok("\u{DD}1E".into())
292        );
293        assert_eq!(unescape::<DoubleQuoteString>(r#"\xD"#), Ok("\u{D}".into()));
294        assert_eq!(unescape::<DoubleQuoteString>("\t"), Ok("\t".into()));
295        assert_eq!(
296            unescape::<DoubleQuoteString>(r#"\u{D834"#),
297            Err(UnescapeError)
298        );
299        assert_eq!(
300            unescape::<DoubleQuoteString>(r#"\uD834"#),
301            Ok("\\uD834".into())
302        );
303        assert_eq!(unescape::<DoubleQuoteString>(r#"\u"#), Ok("\\u".into()));
304        assert_eq!(
305            unescape::<DoubleQuoteString>(r#"\47foo"#),
306            Ok("'foo".into())
307        );
308        assert_eq!(
309            unescape::<DoubleQuoteString>(r#"\48foo"#),
310            Ok("\u{4}8foo".into())
311        );
312        assert_eq!(
313            unescape::<DoubleQuoteString>(r#"\87foo"#),
314            Ok("\\87foo".into())
315        );
316
317        assert_eq!(
318            unescape::<DoubleQuoteString>(r#"\u{999999}"#),
319            Err(UnescapeError)
320        );
321        assert_eq!(
322            unescape::<DoubleQuoteString>(r#"\u{999999999999999999}"#),
323            Err(UnescapeError)
324        );
325    }
326}