atm_parser_helper_common_syntax/
lib.rs1use atm_parser_helper::{Eoi, Error, ParserHelper};
2
3#[cfg(feature = "arbitrary")]
4pub mod testing;
5
6pub trait WhiteSpaceE : Eoi {
8 fn utf8_comment() -> Self;
10}
11
12pub fn spaces<E: WhiteSpaceE>(p: &mut ParserHelper) -> Result<(), Error<E>> {
14 loop {
15 match p.peek_or_end() {
16 Some(0x09) | Some(0x0a) | Some(0x0d) | Some(0x20) => p.advance(1),
17 Some(0x23) => comment(p)?,
18 Some(_) | None => return Ok(()),
19 }
20 }
21}
22
23fn comment<E: WhiteSpaceE>(p: &mut ParserHelper) -> Result<(), Error<E>> {
24 let start = p.position();
25 p.advance(1); loop {
27 match p.next_or_end() {
28 Some(0x0a) | None => {
29 match std::str::from_utf8(p.slice(start..p.position())) {
30 Ok(_) => return Ok(()),
31 Err(_) => return p.fail_at_position(E::utf8_comment(), start),
32 }
33 }
34 Some(_) => {}
35 }
36 }
37}
38
39pub trait IntLiteralE : Eoi {
41 fn int_no_digits() -> Self;
43 fn not_int_literal() -> Self;
45}
46
47pub fn parse_int<I, E: IntLiteralE>(
49 p: &mut ParserHelper,
50 from_decimal: fn(&str) -> Result<I, E>,
51 from_hex: fn(&str) -> Result<I, E>,
52 from_binary: fn(&str) -> Result<I, E>,
53) -> Result<I, Error<E>> {
54 let start = p.position();
55
56 let negative = p.advance_over(b"-");
57 let has_sign = negative || p.advance_over(b"+");
58
59 let is_hex = !has_sign && p.advance_over(b"0x");
60 let is_binary = !is_hex && (!has_sign && p.advance_over(b"0b"));
61
62 if is_hex {
63 if !is_hex_digit(p.peek()?) {
64 return p.fail(E::int_no_digits());
65 }
66
67 let start = p.position();
68 p.skip(is_hex_digit_or_underscore);
69
70 let digits_with_underscores = unsafe { std::str::from_utf8_unchecked(p.slice(start..p.position())) };
71 let without_underscores = digits_with_underscores.replace("_", "");
72 match from_hex(&without_underscores) {
73 Ok(n) => return Ok(n),
74 Err(e) => return p.fail(e),
75 }
76 } else if is_binary {
77 if !is_binary_digit(p.peek()?) {
78 return p.fail(E::int_no_digits());
79 }
80
81 let start = p.position();
82 p.skip(is_binary_digit_or_underscore);
83
84 let digits_with_underscores = unsafe { std::str::from_utf8_unchecked(p.slice(start..p.position())) };
85 let without_underscores = digits_with_underscores.replace("_", "");
86 match from_binary(&without_underscores) {
87 Ok(n) => return Ok(n),
88 Err(e) => return p.fail(e),
89 }
90 } else {
91 if !is_digit(p.peek()?) {
92 if has_sign {
93 return p.fail(E::int_no_digits());
94 } else {
95 return p.fail(E::not_int_literal());
96 }
97 }
98
99 p.skip(is_digit_or_underscore);
100
101 let digits_with_underscores = unsafe { std::str::from_utf8_unchecked(p.slice(start..p.position())) };
102 let without_underscores = digits_with_underscores.replace("_", "");
103 match from_decimal(&without_underscores) {
104 Ok(n) => return Ok(n),
105 Err(e) => return p.fail(e),
106 }
107 }
108}
109
110pub trait FloatLiteralE : Eoi {
112 fn float_no_leading_digits() -> Self;
114 fn float_no_point() -> Self;
116 fn float_no_trailing_digits() -> Self;
118 fn float_no_exponent_digits() -> Self;
120 fn not_float_literal() -> Self;
122}
123
124pub fn parse_float<F, E: FloatLiteralE>(
126 p: &mut ParserHelper,
127 from_s: fn(&str) -> Result<F, E>,
128 neg_inf: F,
129 pos_inf: F,
130 nan: F,
131) -> Result<F, Error<E>> {
132 let start = p.position();
133
134 let negative = p.advance_over(b"-");
135 let has_sign = negative || p.advance_over(b"+");
136
137 match p.peek()? {
138 0x49 => {
139 p.expect_bytes(b"Inf", E::not_float_literal())?;
140 return Ok(if negative { neg_inf } else { pos_inf });
141 }
142 0x4e => {
143 p.expect_bytes(b"NaN", E::not_float_literal())?;
144 return Ok(nan);
145 }
146 _ => {}
147 }
148
149 if !is_digit(p.peek()?) {
150 if has_sign {
151 return p.fail(E::float_no_leading_digits());
152 } else {
153 return p.fail(E::not_float_literal());
154 }
155 }
156 p.skip(is_digit_or_underscore);
157
158 p.expect('.' as u8, E::float_no_point())?;
159
160 if !is_digit(p.peek()?) {
161 return p.fail(E::float_no_trailing_digits());
162 }
163 p.skip(is_digit_or_underscore);
164
165 if let Ok(0x45 | 0x65) = p.peek::<E>() {
166 p.advance(1);
167 let negative = p.advance_over(b"-");
168 if !negative {
169 p.advance_over(b"+");
170 }
171
172 if !is_digit(p.peek()?) {
173 return p.fail(E::float_no_exponent_digits());
174 }
175 p.skip(is_digit_or_underscore);
176 }
177
178 let digits_with_underscores = unsafe { std::str::from_utf8_unchecked(p.slice(start..p.position())) };
179 let without_underscores = digits_with_underscores.replace("_", "");
180 match from_s(&without_underscores) {
181 Ok(n) => return Ok(n),
182 Err(_) => panic!("Prior parsing should have ensured a valid input to f64::from_str"),
183 }
184}
185
186#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
187pub enum Number<I, F> {
188 Float(F),
189 Integer(I),
190}
191
192pub fn parse_number<I, F, E: FloatLiteralE + IntLiteralE>(
194 p: &mut ParserHelper,
195 from_decimal: fn(&str) -> Result<I, E>,
196 from_hex: fn(&str) -> Result<I, E>,
197 from_binary: fn(&str) -> Result<I, E>,
198 from_s: fn(&str) -> Result<F, E>,
199 neg_inf: F,
200 pos_inf: F,
201 nan: F,
202) -> Result<Number<I, F>, Error<E>> {
203 let start = p.position();
204
205 let negative = p.advance_over(b"-");
206 let has_sign = negative || p.advance_over(b"+");
207
208 match p.peek()? {
209 0x49 => {
210 p.expect_bytes(b"Inf", E::not_float_literal())?;
211 return Ok(if negative { Number::Float(neg_inf) } else { Number::Float(pos_inf) });
212 }
213 0x4e => {
214 p.expect_bytes(b"NaN", E::not_float_literal())?;
215 return Ok(Number::Float(nan));
216 }
217 _ => {}
218 }
219
220 let is_hex = !has_sign && p.advance_over(b"0x");
221 let is_binary = !is_hex && (!has_sign && p.advance_over(b"0b"));
222
223 if is_hex {
224 if !is_hex_digit(p.peek()?) {
225 return p.fail(E::int_no_digits());
226 }
227
228 let start = p.position();
229 p.skip(is_hex_digit_or_underscore);
230
231 let digits_with_underscores = unsafe { std::str::from_utf8_unchecked(p.slice(start..p.position())) };
232 let without_underscores = digits_with_underscores.replace("_", "");
233 match from_hex(&without_underscores) {
234 Ok(n) => return Ok(Number::Integer(n)),
235 Err(e) => return p.fail(e),
236 }
237 } else if is_binary {
238 if !is_binary_digit(p.peek()?) {
239 return p.fail(E::int_no_digits());
240 }
241
242 let start = p.position();
243 p.skip(is_binary_digit_or_underscore);
244
245 let digits_with_underscores = unsafe { std::str::from_utf8_unchecked(p.slice(start..p.position())) };
246 let without_underscores = digits_with_underscores.replace("_", "");
247 match from_binary(&without_underscores) {
248 Ok(n) => return Ok(Number::Integer(n)),
249 Err(e) => return p.fail(e),
250 }
251 } else {
252 if !is_digit(p.peek()?) {
253 if has_sign {
254 return p.fail(E::int_no_digits());
255 } else {
256 return p.fail(E::not_int_literal());
257 }
258 }
259
260 p.skip(is_digit_or_underscore);
261
262 match p.peek::<E>() {
263 Ok(0x2e) => {
264 p.advance(1);
265 if !is_digit(p.peek()?) {
266 return p.fail(E::float_no_trailing_digits());
267 }
268 p.skip(is_digit_or_underscore);
269
270 if let Ok(0x45 | 0x65) = p.peek::<E>() {
271 p.advance(1);
272 let negative = p.advance_over(b"-");
273 if !negative {
274 p.advance_over(b"+");
275 }
276
277 if !is_digit(p.peek()?) {
278 return p.fail(E::float_no_exponent_digits());
279 }
280 p.skip(is_digit_or_underscore);
281 }
282
283 let digits_with_underscores = unsafe { std::str::from_utf8_unchecked(p.slice(start..p.position())) };
284 let without_underscores = digits_with_underscores.replace("_", "");
285 match from_s(&without_underscores) {
286 Ok(n) => return Ok(Number::Float(n)),
287 Err(_) => panic!("Prior parsing should have ensured a valid input to f64::from_str"),
288 }
289 }
290
291 _ => {
292 let digits_with_underscores = unsafe { std::str::from_utf8_unchecked(p.slice(start..p.position())) };
293 let without_underscores = digits_with_underscores.replace("_", "");
294 match from_decimal(&without_underscores) {
295 Ok(n) => return Ok(Number::Integer(n)),
296 Err(e) => return p.fail(e),
297 }
298 }
299 }
300 }
301}
302
303pub trait ByteStringLiteralE : Eoi + WhiteSpaceE + IntLiteralE {
305 fn odd_hex_digits() -> Self;
307 fn number_binary_digits() -> Self;
309 fn expected_comma() -> Self;
311 fn byte_out_of_bounds() -> Self;
313 fn not_byte_string_literal() -> Self;
315}
316
317pub fn parse_byte_string<E: ByteStringLiteralE>(p: &mut ParserHelper) -> Result<Vec<u8>, Error<E>> {
319 p.expect('@' as u8, E::not_byte_string_literal())?;
320 match p.next()? {
321 0x5b => {
322 let mut r = Vec::new();
323 loop {
324 spaces(p)?;
325 if p.peek()? == (']' as u8) {
326 p.advance(1);
327 return Ok(r);
328 }
329
330 let b = parse_int(p, u8_from_decimal, u8_from_hex, u8_from_binary)?;
331 r.push(b);
332
333 spaces(p)?;
334
335 if p.peek()? == (']' as u8) {
336 p.advance(1);
337 return Ok(r);
338 } else if p.peek()? == (',' as u8) {
339 p.advance(1);
340 } else {
341 return p.fail(E::expected_comma());
342 }
343 }
344 }
345 0x78 => {
346 let start = p.position();
347 p.skip(is_hex_digit_or_underscore);
348
349 let digits_with_underscores = unsafe { std::str::from_utf8_unchecked(p.slice(start..p.position())) };
350 let without_underscores = digits_with_underscores.replace("_", "");
351
352 if without_underscores.len() % 2 == 0 {
353 let mut buf = Vec::new();
354 let mut i = 0;
355 while i < without_underscores.len() {
356 buf.push(u8::from_str_radix(unsafe {std::str::from_utf8_unchecked(&without_underscores.as_bytes()[i..i + 2])}, 16).unwrap());
357 i += 2;
358 }
359 return Ok(buf);
360 } else {
361 p.fail(E::odd_hex_digits())
362 }
363 }
364 0x62 => {
365 let start = p.position();
366 p.skip(is_binary_digit_or_underscore);
367
368 let digits_with_underscores = unsafe { std::str::from_utf8_unchecked(p.slice(start..p.position())) };
369 let without_underscores = digits_with_underscores.replace("_", "");
370
371 if without_underscores.len() % 8 == 0 {
372 let mut buf = Vec::new();
373 let mut i = 0;
374 while i < without_underscores.len() {
375 buf.push(u8::from_str_radix(unsafe {std::str::from_utf8_unchecked(&without_underscores.as_bytes()[i..i + 8])}, 2).unwrap());
376 i += 8;
377 }
378 return Ok(buf);
379 } else {
380 p.fail(E::number_binary_digits())
381 }
382 }
383 _ => p.fail(E::not_byte_string_literal()),
384 }
385}
386
387pub trait Utf8StringLiteralE : Eoi {
389 fn raw_not_utf8() -> Self;
391 fn raw_too_many_ats() -> Self;
393 fn escaping_not_utf8() -> Self;
395 fn invalid_escape_sequence() -> Self;
397 fn unicode_escape_number_digits() -> Self;
399 fn unicode_escape_invalid_scalar() -> Self;
401 fn unicode_escape_no_closing() -> Self;
403 fn not_utf8_string_literal() -> Self;
405}
406
407pub fn parse_utf8_string<E: Utf8StringLiteralE>(p: &mut ParserHelper) -> Result<String, Error<E>> {
409 let start_ats = p.position();
410 p.skip(is_at);
411 let ats = p.position() - start_ats;
412
413 p.expect('"' as u8, E::not_utf8_string_literal())?;
414 let start = p.position();
415
416 if ats == 0 {
417 let mut s = String::new();
418
419 loop {
420 if p.advance_over(b"\"") {
421 return Ok(s);
422 } else {
423 s.push(parse_char(p)?);
424 }
425 }
426 } else {
427 let mut consecutive_ats = None;
428 let mut end = 0;
429 loop {
430 let b = p.next()?;
431 match b {
432 0x22 => {
433 consecutive_ats = Some(0);
434 end = p.position() - 1;
435 }
436 0x40 => {
437 match consecutive_ats.as_mut() {
438 None => {}
439 Some(n) => {
440 *n += 1;
441 if *n > 255 {
442 return p.fail(E::raw_too_many_ats());
443 }
444 if *n == ats {
445 return std::str::from_utf8(p.slice(start..end))
446 .map(|s| s.to_string())
447 .map_err(|_| p.fail::<(), E>(E::raw_not_utf8()).unwrap_err());
448 }
449 }
450 }
451 }
452 _ => consecutive_ats = None,
453 }
454 }
455 }
456}
457
458fn parse_char<E: Utf8StringLiteralE>(p: &mut ParserHelper) -> Result<char, Error<E>> {
459 let start = p.position();
460 let fst = p.next()?;
461 let mut scalar;
462 if (fst & 0b1000_0000) == 0b0000_0000 {
463 scalar = fst as u32;
464 } else if (fst & 0b1110_0000) == 0b1100_0000 {
465 scalar = (fst & 0b0001_1111) as u32;
466 scalar <<= 6;
467 scalar = ((p.next()? & 0b0011_1111) as u32) | scalar;
468 } else if (fst & 0b1111_0000) == 0b1110_0000 {
469 scalar = (fst & 0b0000_1111) as u32;
470 scalar <<= 6;
471 scalar = ((p.next()? & 0b0011_1111) as u32) | scalar;
472 scalar <<= 6;
473 scalar = ((p.next()? & 0b0011_1111) as u32) | scalar;
474 } else if (fst & 0b1111_1000) == 0b1111_0000 {
475 scalar = (fst & 0b0000_0111) as u32;
476 scalar <<= 6;
477 scalar = ((p.next()? & 0b0011_1111) as u32) | scalar;
478 scalar <<= 6;
479 scalar = ((p.next()? & 0b0011_1111) as u32) | scalar;
480 scalar <<= 6;
481 scalar = ((p.next()? & 0b0011_1111) as u32) | scalar;
482 } else {
483 return p.fail(E::escaping_not_utf8())?;
484 }
485
486 if let Err(_) = std::str::from_utf8(p.slice(start..p.position())) {
487 return p.fail(E::escaping_not_utf8()); }
489
490 match core::char::from_u32(scalar) {
491 None => return p.fail(E::escaping_not_utf8()),
492 Some(c) => {
493 if c == '\\' {
494 match p.next()? {
495 0x22 => return Ok('\"'),
496 0x30 => return Ok('\0'),
497 0x5c => return Ok('\\'),
498 0x6e => return Ok('\n'),
499 0x74 => return Ok('\t'),
500 0x7b => {
501 let start = p.position();
502 p.skip(is_hex_digit);
503 let end = p.position();
504 let len = end - start;
505
506 if len < 1 || len > 6 {
507 return p.fail(E::unicode_escape_number_digits());
508 }
509
510 let raw = p.slice(start..end);
511 let numeric = u32::from_str_radix(unsafe { std::str::from_utf8_unchecked(raw) }, 16).unwrap();
512
513 match std::char::from_u32(numeric) {
514 None => return p.fail(E::unicode_escape_invalid_scalar()),
515 Some(c) => {
516 p.expect('}' as u8, E::unicode_escape_no_closing())?;
517 return Ok(c);
518 }
519 }
520 }
521 _ => return p.fail(E::invalid_escape_sequence()),
522 }
523 } else {
524 return Ok(c);
525 }
526 }
527 }
528}
529
530fn is_at(b: u8) -> bool {
531 b == ('@' as u8)
532}
533
534fn is_digit(byte: u8) -> bool {
535 byte.is_ascii_digit()
536}
537
538fn is_hex_digit(byte: u8) -> bool {
539 byte.is_ascii_hexdigit()
540}
541
542fn is_binary_digit(byte: u8) -> bool {
543 byte == ('0' as u8) || byte == ('1' as u8)
544}
545
546fn is_digit_or_underscore(byte: u8) -> bool {
547 byte == ('_' as u8) || byte.is_ascii_digit()
548}
549
550fn is_hex_digit_or_underscore(byte: u8) -> bool {
551 byte == ('_' as u8) || is_hex_digit(byte)
552}
553
554fn is_binary_digit_or_underscore(byte: u8) -> bool {
555 byte == ('_' as u8) || is_binary_digit(byte)
556}
557
558pub fn u8_from_decimal<E: ByteStringLiteralE>(s: &str) -> Result<u8, E> {
559 u8::from_str_radix(s, 10).map_err(|_| E::byte_out_of_bounds())
560}
561
562pub fn u8_from_hex<E: ByteStringLiteralE>(s: &str) -> Result<u8, E> {
563 u8::from_str_radix(s, 16).map_err(|_| E::byte_out_of_bounds())
564}
565
566pub fn u8_from_binary<E: ByteStringLiteralE>(s: &str) -> Result<u8, E> {
567 u8::from_str_radix(s, 2).map_err(|_| E::byte_out_of_bounds())
568}