php_literal_parser/
string.rs1#[derive(Debug, Clone, Eq, PartialEq)]
2
3pub struct UnescapeError;
5
6type UnescapeResult<T> = Result<T, UnescapeError>;
7
8struct UnescapeState {
10 out: Vec<u8>,
12}
13
14impl UnescapeState {
15 fn with_capacity(capacity: usize) -> UnescapeState {
16 UnescapeState {
17 out: Vec::with_capacity(capacity),
18 }
19 }
20
21 fn push_char(&mut self, c: char) {
23 let mut buff = [0; 8];
24 self.out
25 .extend_from_slice(c.encode_utf8(&mut buff).as_bytes());
26 }
27
28 fn push_u8(&mut self, c: u8) {
29 self.out.push(c);
30 }
31
32 fn push_raw(&mut self, c: u32) -> UnescapeResult<()> {
33 match std::char::from_u32(c) {
34 Some(c) => {
35 self.push_char(c);
36 Ok(())
37 }
38 None => Err(UnescapeError),
39 }
40 }
41
42 fn push_slice(&mut self, slice: &[u8]) {
43 self.out.extend_from_slice(slice);
44 }
45
46 fn finalize(self) -> UnescapeResult<String> {
47 String::from_utf8(self.out).map_err(|_| UnescapeError)
48 }
49}
50
51fn parse_u32(
52 s: &mut PeekableBytes,
53 radix: u32,
54 mut result: u32,
55 max: Option<u8>,
56) -> UnescapeResult<u32> {
57 let mut max = max.unwrap_or(u8::MAX);
58 while let Some(digit) = s.peek().and_then(|digit| (digit as char).to_digit(radix)) {
59 let _ = s.next(); result = result.checked_mul(radix).ok_or(UnescapeError)?;
61 result = result.checked_add(digit).ok_or(UnescapeError)?;
62 max -= 1;
63 if max == 0 {
64 break;
65 }
66 }
67 Ok(result)
68}
69
70trait EscapedString {
71 fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]>;
72}
73
74struct SingleQuoteString;
75
76impl EscapedString for SingleQuoteString {
77 fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]> {
78 let mut ins = PeekableBytes::new(bytes);
79 let _next = ins.next();
80 debug_assert_eq!(_next, Some(b'\\'));
81 match ins.next() {
82 None => {
83 return Err(UnescapeError);
84 }
85 Some(d) => match d {
86 b'\\' | b'\'' => state.push_u8(d),
87 _ => {
88 state.push_u8(b'\\');
89 state.push_u8(d)
90 }
91 },
92 }
93 Ok(ins.as_slice())
94 }
95}
96
97struct DoubleQuoteString;
98
99impl EscapedString for DoubleQuoteString {
100 fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]> {
101 let mut ins = PeekableBytes::new(bytes);
102 let _next = ins.next();
103 debug_assert_eq!(_next, Some(b'\\'));
104 match ins.next() {
105 None => {
106 return Err(UnescapeError);
107 }
108 Some(d) => {
109 match d {
110 b'$' | b'"' | b'\\' => state.push_u8(d),
111 b'n' => state.push_u8(b'\n'), b'r' => state.push_u8(b'\r'), b't' => state.push_u8(b'\t'), b'v' => state.push_u8(b'\x0B'), b'f' => state.push_u8(b'\x0C'), b'x' => {
117 let val = parse_u32(&mut ins, 16, 0, Some(2))?;
118 state.push_raw(val)?;
119 }
120 b'u' => match ins.next() {
121 Some(b'{') => {
122 let val = parse_u32(&mut ins, 16, 0, None)?;
123 state.push_raw(val)?;
124 if !matches!(ins.next(), Some(b'}')) {
125 return Err(UnescapeError);
126 }
127 }
128 Some(d) => {
129 state.push_u8(b'\\');
130 state.push_u8(b'u');
131 state.push_u8(d);
132 }
133 None => {
134 state.push_u8(b'\\');
135 state.push_u8(d);
136 }
137 },
138 b'0'..=b'7' => {
139 let val =
140 parse_u32(&mut ins, 8, (d as char).to_digit(8).unwrap(), Some(3))?;
141 state.push_raw(val)?;
142 }
143 _ => {
144 state.push_u8(b'\\');
145 state.push_u8(d)
146 }
147 }
148 }
149 }
150 Ok(ins.as_slice())
151 }
152}
153
154pub fn parse_string(literal: &str) -> Result<String, UnescapeError> {
155 let inner = &literal[1..(literal.len()) - 1];
156 if literal.bytes().next().unwrap() == b'\'' {
157 unescape::<SingleQuoteString>(inner)
158 } else {
159 unescape::<DoubleQuoteString>(inner)
160 }
161}
162
163fn unescape<S: EscapedString>(s: &str) -> UnescapeResult<String> {
164 let mut state = UnescapeState::with_capacity(s.len());
165 let mut bytes = s.as_bytes();
166 while let Some(escape_index) = memchr::memchr(b'\\', bytes) {
167 state.push_slice(&bytes[0..escape_index]);
168 bytes = &bytes[escape_index..];
169 bytes = S::handle_escape(bytes, &mut state)?;
170 }
171
172 state.push_slice(&bytes[0..]);
173
174 state.finalize()
175}
176
177struct PeekableBytes<'a> {
178 slice: &'a [u8],
179 pos: usize,
180}
181
182impl Iterator for PeekableBytes<'_> {
183 type Item = u8;
184
185 fn next(&mut self) -> Option<Self::Item> {
186 let byte = self.slice.get(self.pos)?;
187 self.pos += 1;
188 Some(*byte)
189 }
190}
191
192impl<'a> PeekableBytes<'a> {
193 pub fn new(slice: &'a [u8]) -> Self {
194 PeekableBytes { slice, pos: 0 }
195 }
196
197 pub fn peek(&self) -> Option<u8> {
198 self.slice.get(self.pos).copied()
199 }
200
201 pub fn as_slice(&self) -> &'a [u8] {
202 &self.slice[self.pos..]
203 }
204}
205
206pub fn is_array_key_numeric(string: &str) -> bool {
207 let mut bytes = string.bytes();
208 if !matches!(
209 (bytes.next(), string.len()),
210 (Some(b'-'), _) | (Some(b'0'..=b'9'), 1) | (Some(b'1'..=b'9'), _)
211 ) {
212 return false;
213 }
214
215 bytes.all(|byte| byte.is_ascii_digit())
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221
222 #[test]
223 fn test_is_numeric() {
224 assert!(is_array_key_numeric("123"));
225 assert!(is_array_key_numeric("-123"));
226 assert!(is_array_key_numeric("0"));
227 assert!(!is_array_key_numeric("0123"));
228 assert!(!is_array_key_numeric("123asd"));
229 assert!(!is_array_key_numeric("+123"));
230 }
231
232 #[test]
233 fn test_unescape_single() {
234 assert_eq!(unescape::<SingleQuoteString>(r#"abc"#), Ok("abc".into()));
235 assert_eq!(
236 unescape::<SingleQuoteString>(r#"ab\nc"#),
237 Ok("ab\\nc".into())
238 );
239 assert_eq!(
240 unescape::<SingleQuoteString>(r#"ab\zc"#),
241 Ok("ab\\zc".into())
242 );
243 assert_eq!(
244 unescape::<SingleQuoteString>(r#" \"abc\" "#),
245 Ok(" \\\"abc\\\" ".into())
246 );
247 assert_eq!(unescape::<SingleQuoteString>(r#"𝄞"#), Ok("𝄞".into()));
248 assert_eq!(unescape::<SingleQuoteString>(r#"\𝄞"#), Ok("\\𝄞".into()));
249 assert_eq!(
250 unescape::<SingleQuoteString>(r#"\xD834\xDD1E"#),
251 Ok("\\xD834\\xDD1E".into())
252 );
253 assert_eq!(
254 unescape::<SingleQuoteString>(r#"\xD834"#),
255 Ok("\\xD834".into())
256 );
257 assert_eq!(
258 unescape::<SingleQuoteString>(r#"\xDD1E"#),
259 Ok("\\xDD1E".into())
260 );
261 assert_eq!(unescape::<SingleQuoteString>("\t"), Ok("\t".into()));
262 }
263
264 #[test]
265 fn test_unescape_double() {
266 assert_eq!(unescape::<DoubleQuoteString>(r#"abc"#), Ok("abc".into()));
267 assert_eq!(
268 unescape::<DoubleQuoteString>(r#"ab\nc"#),
269 Ok("ab\nc".into())
270 );
271 assert_eq!(
272 unescape::<DoubleQuoteString>(r#"ab\zc"#),
273 Ok("ab\\zc".into())
274 );
275 assert_eq!(
276 unescape::<DoubleQuoteString>(r#" \"abc\" "#),
277 Ok(" \"abc\" ".into())
278 );
279 assert_eq!(unescape::<DoubleQuoteString>(r#"𝄞"#), Ok("𝄞".into()));
280 assert_eq!(unescape::<DoubleQuoteString>(r#"\𝄞"#), Ok("\\𝄞".into()));
281 assert_eq!(
282 unescape::<DoubleQuoteString>(r#"\u{1D11E}"#),
283 Ok("𝄞".into())
284 );
285 assert_eq!(
286 unescape::<DoubleQuoteString>(r#"\xD834"#),
287 Ok("\u{D8}34".into())
288 );
289 assert_eq!(
290 unescape::<DoubleQuoteString>(r#"\xDD1E"#),
291 Ok("\u{DD}1E".into())
292 );
293 assert_eq!(unescape::<DoubleQuoteString>(r#"\xD"#), Ok("\u{D}".into()));
294 assert_eq!(unescape::<DoubleQuoteString>("\t"), Ok("\t".into()));
295 assert_eq!(
296 unescape::<DoubleQuoteString>(r#"\u{D834"#),
297 Err(UnescapeError)
298 );
299 assert_eq!(
300 unescape::<DoubleQuoteString>(r#"\uD834"#),
301 Ok("\\uD834".into())
302 );
303 assert_eq!(unescape::<DoubleQuoteString>(r#"\u"#), Ok("\\u".into()));
304 assert_eq!(
305 unescape::<DoubleQuoteString>(r#"\47foo"#),
306 Ok("'foo".into())
307 );
308 assert_eq!(
309 unescape::<DoubleQuoteString>(r#"\48foo"#),
310 Ok("\u{4}8foo".into())
311 );
312 assert_eq!(
313 unescape::<DoubleQuoteString>(r#"\87foo"#),
314 Ok("\\87foo".into())
315 );
316
317 assert_eq!(
318 unescape::<DoubleQuoteString>(r#"\u{999999}"#),
319 Err(UnescapeError)
320 );
321 assert_eq!(
322 unescape::<DoubleQuoteString>(r#"\u{999999999999999999}"#),
323 Err(UnescapeError)
324 );
325 }
326}