1use regex::Regex;
2use std::sync::Arc;
3
4use crate::leaf::{trim_leading_whitespace, trim_leading_whitespace_mut};
5use crate::parse::ParserFn;
6use crate::state::{ParserState, Span};
7
8use aho_corasick::{AhoCorasick, Anchored, Input};
9
10const FLAG_TRIM_WS: u8 = 0b0001;
13const FLAG_SAVE_STATE: u8 = 0b0010;
14
15#[cfg(feature = "diagnostics")]
19macro_rules! sp_new {
20 ($kind:expr, $label:expr) => {
21 SpanParser {
22 kind: $kind,
23 flags: 0,
24 label: Some($label),
25 }
26 };
27 ($kind:expr) => {
28 SpanParser {
29 kind: $kind,
30 flags: 0,
31 label: None,
32 }
33 };
34}
35
36#[cfg(not(feature = "diagnostics"))]
37macro_rules! sp_new {
38 ($kind:expr, $label:expr) => {
39 SpanParser {
40 kind: $kind,
41 flags: 0,
42 }
43 };
44 ($kind:expr) => {
45 SpanParser {
46 kind: $kind,
47 flags: 0,
48 }
49 };
50}
51
52pub struct SpanParser<'a> {
53 pub(super) kind: SpanKind<'a>,
54 pub(super) flags: u8,
55 #[cfg(feature = "diagnostics")]
56 pub(super) label: Option<&'static str>,
57}
58
59pub(super) enum SpanKind<'a> {
60 StringLiteral(&'static [u8]),
62 RegexMatch(Arc<Regex>),
63 AhoCorasickMatch(AhoCorasick),
64 TakeWhileByte(fn(u8) -> bool),
65 TakeWhileChar(Box<dyn Fn(char) -> bool + 'a>),
66 NextN(usize),
67 Epsilon,
68 TakeUntilAny1(u8),
70 TakeUntilAny2(u8, u8),
72 TakeUntilAny3(u8, u8, u8),
74 TakeUntilAnyLut(Box<[bool; 256]>),
77 TakeUntilAnySIMD {
81 lo_lut: [u8; 16],
82 hi_lut: [u8; 16],
83 },
84
85 Scanner(SpanScanner),
87
88 Seq(Vec<SpanParser<'a>>),
90 OneOf(Vec<SpanParser<'a>>),
91 Many {
92 inner: Box<SpanParser<'a>>,
93 lo: usize,
94 hi: usize,
95 },
96 SepBy {
97 inner: Box<SpanParser<'a>>,
98 sep: Box<SpanParser<'a>>,
99 lo: usize,
100 hi: usize,
101 },
102 SepByWs {
105 inner: Box<SpanParser<'a>>,
106 sep: Box<SpanParser<'a>>,
107 lo: usize,
108 hi: usize,
109 },
110 Opt(Box<SpanParser<'a>>),
111 Wrap {
112 left: Box<SpanParser<'a>>,
113 inner: Box<SpanParser<'a>>,
114 right: Box<SpanParser<'a>>,
115 },
116 Skip(Box<SpanParser<'a>>, Box<SpanParser<'a>>),
117 Next(Box<SpanParser<'a>>, Box<SpanParser<'a>>),
118 Not(Box<SpanParser<'a>>, Box<SpanParser<'a>>),
119 Minus(Box<SpanParser<'a>>, Box<SpanParser<'a>>),
122 LookAhead(Box<SpanParser<'a>>, Box<SpanParser<'a>>),
123 Negate(Box<SpanParser<'a>>),
125 Peek(Box<SpanParser<'a>>),
128 Eof,
130
131 Boxed(Box<dyn ParserFn<'a, Span<'a>> + 'a>),
133}
134
135impl<'a> SpanParser<'a> {
136 #[inline(always)]
139 pub fn call(&self, state: &mut ParserState<'a>) -> Option<Span<'a>> {
140 if self.flags == 0 {
141 return self.call_inner(state);
142 }
143 if self.flags == FLAG_TRIM_WS {
145 state.offset += trim_leading_whitespace(state);
146 let result = self.call_inner(state);
147 if result.is_some() {
148 state.offset += trim_leading_whitespace(state);
149 }
150 return result;
151 }
152 self.call_with_flags_cold(state)
153 }
154
155 #[inline(never)]
156 fn call_with_flags_cold(&self, state: &mut ParserState<'a>) -> Option<Span<'a>> {
157 if self.flags & FLAG_TRIM_WS != 0 {
158 state.offset += trim_leading_whitespace(state);
159 }
160 let checkpoint = if self.flags & FLAG_SAVE_STATE != 0 {
161 Some(state.offset)
162 } else {
163 None
164 };
165
166 let result = self.call_inner(state);
167
168 if let Some(cp) = checkpoint {
169 if result.is_none() {
170 state.furthest_offset = state.furthest_offset.max(state.offset);
171 state.offset = cp;
172 return None;
173 }
174 }
175 if result.is_some() && self.flags & FLAG_TRIM_WS != 0 {
177 state.offset += trim_leading_whitespace(state);
178 }
179 result
180 }
181
182 #[inline(always)]
183 fn call_inner(&self, state: &mut ParserState<'a>) -> Option<Span<'a>> {
184 match &self.kind {
185 SpanKind::StringLiteral(s_bytes) => {
186 let end = s_bytes.len();
187 if end == 0 {
188 return Some(Span::new(state.offset, state.offset, state.src));
189 }
190 let slc = state.src_bytes.get(state.offset..)?;
191 if slc.len() >= end
192 && slc[0] == s_bytes[0]
193 && (end == 1 || slc[1..end].starts_with(&s_bytes[1..]))
194 {
195 let start = state.offset;
196 state.offset += end;
197 Some(Span::new(start, state.offset, state.src))
198 } else {
199 #[cfg(feature = "diagnostics")]
200 if let Some(lbl) = self.label {
201 state.add_expected(lbl);
202 }
203 None
204 }
205 }
206
207 SpanKind::RegexMatch(re) => {
208 let slc = state.src.get(state.offset..)?;
209 match re.find_at(slc, 0) {
210 Some(m) if m.start() == 0 => {
211 let start = state.offset;
212 state.offset += m.end();
213 Some(Span::new(start, state.offset, state.src))
214 }
215 _ => {
216 #[cfg(feature = "diagnostics")]
217 if let Some(lbl) = self.label {
218 state.add_expected(lbl);
219 }
220 None
221 }
222 }
223 }
224
225 SpanKind::AhoCorasickMatch(ac) => {
226 let slc = state.src.get(state.offset..)?;
227 let input = Input::new(slc).anchored(Anchored::Yes);
228 match ac.find(input) {
229 Some(m) => {
230 let start = state.offset;
231 state.offset += m.end();
232 Some(Span::new(start, state.offset, state.src))
233 }
234 None => {
235 #[cfg(feature = "diagnostics")]
236 if let Some(lbl) = self.label {
237 state.add_expected(lbl);
238 }
239 None
240 }
241 }
242 }
243
244 SpanKind::TakeWhileByte(f) => {
245 let bytes = state.src_bytes;
246 let start = state.offset;
247 let end = bytes.len();
248 let mut i = start;
249 while i < end && f(unsafe { *bytes.get_unchecked(i) }) {
250 i += 1;
251 }
252 if i == start {
253 #[cfg(feature = "diagnostics")]
254 if let Some(lbl) = self.label {
255 state.add_expected(lbl);
256 }
257 return None;
258 }
259 state.offset = i;
260 Some(Span::new(start, i, state.src))
261 }
262
263 SpanKind::TakeWhileChar(f) => {
264 let slc = state.src.get(state.offset..)?;
265 match slc
266 .char_indices()
267 .take_while(|(_, c)| f(*c))
268 .map(|(i, _)| i)
269 .last()
270 {
271 Some(mut len) => {
272 len += 1;
273 while len < slc.len() && !slc.is_char_boundary(len) {
274 len += 1;
275 }
276 let start = state.offset;
277 state.offset += len;
278 Some(Span::new(start, state.offset, state.src))
279 }
280 None => {
281 #[cfg(feature = "diagnostics")]
282 if let Some(lbl) = self.label {
283 state.add_expected(lbl);
284 }
285 None
286 }
287 }
288 }
289
290 SpanKind::NextN(amount) => {
291 let start = state.offset;
292 let new_offset = start + amount;
293 if new_offset > state.src.len() {
294 #[cfg(feature = "diagnostics")]
295 if let Some(lbl) = self.label {
296 state.add_expected(lbl);
297 }
298 return None;
299 }
300 state.offset = new_offset;
301 Some(Span::new(start, new_offset, state.src))
302 }
303
304 SpanKind::Epsilon => Some(Span::new(state.offset, state.offset, state.src)),
305
306 SpanKind::Scanner(scanner) => {
308 let result = scanner.call(state);
309 #[cfg(feature = "diagnostics")]
310 if result.is_none() {
311 if let Some(lbl) = self.label {
312 state.add_expected(lbl);
313 }
314 }
315 result
316 }
317
318 SpanKind::TakeUntilAny1(b1) => {
319 let bytes = state.src_bytes;
320 let start = state.offset;
321 if start >= bytes.len() {
322 #[cfg(feature = "diagnostics")]
323 if let Some(lbl) = self.label {
324 state.add_expected(lbl);
325 }
326 return None;
327 }
328 let scan_len = memchr::memchr(*b1, &bytes[start..]).unwrap_or(bytes.len() - start);
329 if scan_len == 0 {
330 #[cfg(feature = "diagnostics")]
331 if let Some(lbl) = self.label {
332 state.add_expected(lbl);
333 }
334 return None;
335 }
336 let end = start + scan_len;
337 state.offset = end;
338 Some(Span::new(start, end, state.src))
339 }
340 SpanKind::TakeUntilAny2(b1, b2) => {
341 let bytes = state.src_bytes;
342 let start = state.offset;
343 if start >= bytes.len() {
344 #[cfg(feature = "diagnostics")]
345 if let Some(lbl) = self.label {
346 state.add_expected(lbl);
347 }
348 return None;
349 }
350 let scan_len =
351 memchr::memchr2(*b1, *b2, &bytes[start..]).unwrap_or(bytes.len() - start);
352 if scan_len == 0 {
353 #[cfg(feature = "diagnostics")]
354 if let Some(lbl) = self.label {
355 state.add_expected(lbl);
356 }
357 return None;
358 }
359 let end = start + scan_len;
360 state.offset = end;
361 Some(Span::new(start, end, state.src))
362 }
363 SpanKind::TakeUntilAny3(b1, b2, b3) => {
364 let bytes = state.src_bytes;
365 let start = state.offset;
366 if start >= bytes.len() {
367 #[cfg(feature = "diagnostics")]
368 if let Some(lbl) = self.label {
369 state.add_expected(lbl);
370 }
371 return None;
372 }
373 let scan_len =
374 memchr::memchr3(*b1, *b2, *b3, &bytes[start..]).unwrap_or(bytes.len() - start);
375 if scan_len == 0 {
376 #[cfg(feature = "diagnostics")]
377 if let Some(lbl) = self.label {
378 state.add_expected(lbl);
379 }
380 return None;
381 }
382 let end = start + scan_len;
383 state.offset = end;
384 Some(Span::new(start, end, state.src))
385 }
386 SpanKind::TakeUntilAnyLut(lut) => {
387 let bytes = state.src_bytes;
388 let start = state.offset;
389 let end = bytes.len();
390 let mut i = start;
391 while i < end && !lut[unsafe { *bytes.get_unchecked(i) } as usize] {
392 i += 1;
393 }
394 if i == start {
395 #[cfg(feature = "diagnostics")]
396 if let Some(lbl) = self.label {
397 state.add_expected(lbl);
398 }
399 return None;
400 }
401 state.offset = i;
402 Some(Span::new(start, i, state.src))
403 }
404 SpanKind::TakeUntilAnySIMD { lo_lut, hi_lut } => {
405 use std::simd::prelude::*;
406
407 let lo = u8x16::from_array(*lo_lut);
408 let hi = u8x16::from_array(*hi_lut);
409 let lo_mask_const = u8x16::splat(0x0F);
410
411 let bytes = state.src_bytes;
412 let start = state.offset;
413 let end = bytes.len();
414 let mut i = start;
415
416 while i + 16 <= end {
418 let chunk = u8x16::from_slice(&bytes[i..i + 16]);
419 let lo_nibbles = chunk & lo_mask_const;
420 let hi_nibbles = chunk >> 4;
421
422 let lo_result = lo.swizzle_dyn(lo_nibbles);
423 let hi_result = hi.swizzle_dyn(hi_nibbles);
424 let matched = lo_result & hi_result;
425
426 let is_excluded = matched.simd_ne(u8x16::splat(0));
427 if !is_excluded.any() {
428 i += 16;
429 continue;
430 }
431 i += is_excluded.to_bitmask().trailing_zeros() as usize;
432 if i == start {
434 #[cfg(feature = "diagnostics")]
435 if let Some(lbl) = self.label {
436 state.add_expected(lbl);
437 }
438 return None;
439 }
440 state.offset = i;
441 return Some(Span::new(start, i, state.src));
442 }
443
444 while i < end {
446 let b = unsafe { *bytes.get_unchecked(i) };
447 if lo_lut[(b & 0x0F) as usize] & hi_lut[(b >> 4) as usize] != 0 {
448 break;
449 }
450 i += 1;
451 }
452
453 if i == start {
454 #[cfg(feature = "diagnostics")]
455 if let Some(lbl) = self.label {
456 state.add_expected(lbl);
457 }
458 return None;
459 }
460 state.offset = i;
461 Some(Span::new(start, i, state.src))
462 }
463
464 SpanKind::Seq(parsers) => {
465 let start = state.offset;
466 for p in parsers {
467 p.call(state)?;
468 }
469 Some(Span::new(start, state.offset, state.src))
470 }
471
472 SpanKind::OneOf(parsers) => {
473 for p in parsers {
474 let cp = state.offset;
475 if let Some(span) = p.call(state) {
476 return Some(span);
477 }
478 state.furthest_offset = state.furthest_offset.max(state.offset);
479 state.offset = cp;
480 }
481 None
482 }
483
484 SpanKind::Many { inner, lo, hi } => {
485 let start = state.offset;
486 let mut end = state.offset;
487 let mut count = 0;
488 while count < *hi {
489 let prev_offset = state.offset;
490 match inner.call(state) {
491 Some(span) => {
492 end = span.end;
493 count += 1;
494 if state.offset == prev_offset {
496 break;
497 }
498 }
499 None => {
500 state.offset = prev_offset;
501 break;
502 }
503 }
504 }
505 if count >= *lo {
506 Some(Span::new(start, end, state.src))
507 } else {
508 None
509 }
510 }
511
512 SpanKind::SepBy { inner, sep, lo, hi } => {
513 let start = state.offset;
514 let mut count = 0;
515 let Some(first_span) = inner.call(state) else {
517 if *lo == 0 {
518 return Some(Span::new(start, start, state.src));
519 }
520 return None;
521 };
522 let mut end = first_span.end;
523 count += 1;
524 while count < *hi {
527 let cp = state.offset;
528 if sep.call(state).is_none() {
529 state.offset = cp;
530 break;
531 }
532 if let Some(span) = inner.call(state) {
533 end = span.end;
534 count += 1;
535 } else {
536 state.offset = cp;
539 break;
540 }
541 }
542 if count >= *lo {
543 Some(Span::new(start, end, state.src))
544 } else {
545 None
546 }
547 }
548
549 SpanKind::SepByWs { inner, sep, lo, hi } => {
550 let start = state.offset;
551 let mut count = 0;
552 trim_leading_whitespace_mut(state);
554 if inner.call(state).is_none() {
556 if *lo == 0 {
557 return Some(Span::new(start, state.offset, state.src));
558 }
559 return None;
560 }
561 count += 1;
562 while count < *hi {
563 let cp = state.offset;
564 trim_leading_whitespace_mut(state);
566 if sep.call(state).is_none() {
567 state.offset = cp;
568 break;
569 }
570 trim_leading_whitespace_mut(state);
572 if inner.call(state).is_some() {
573 count += 1;
574 } else {
575 state.offset = cp;
576 break;
577 }
578 }
579 if count >= *lo {
580 trim_leading_whitespace_mut(state);
582 Some(Span::new(start, state.offset, state.src))
583 } else {
584 None
585 }
586 }
587
588 SpanKind::Opt(inner) => {
589 let start = state.offset;
590 if inner.call(state).is_none() {
591 return Some(Span::new(start, start, state.src));
592 }
593 Some(Span::new(start, state.offset, state.src))
594 }
595
596 SpanKind::Wrap { left, inner, right } => {
597 #[cfg(feature = "diagnostics")]
598 let open_offset = state.offset;
599 left.call(state)?;
600 #[cfg(feature = "diagnostics")]
601 let open_end = state.offset;
602 let middle = inner.call(state)?;
603 if right.call(state).is_some() {
604 Some(Span::new(middle.start, middle.end, state.src))
605 } else {
606 #[cfg(feature = "diagnostics")]
607 {
608 let delimiter = state.src[open_offset..open_end].to_string();
609 state.add_suggestion(|| crate::state::Suggestion {
610 kind: crate::state::SuggestionKind::UnclosedDelimiter {
611 delimiter: delimiter.clone(),
612 open_offset,
613 },
614 message: format!(
615 "close the delimiter with matching `{}`",
616 match delimiter.as_str() {
617 "{" => "}",
618 "[" => "]",
619 "(" => ")",
620 d => d,
621 }
622 ),
623 });
624 state.add_secondary_span(
625 open_offset,
626 format!("unclosed `{}` opened here", delimiter),
627 );
628 }
629 None
630 }
631 }
632
633 SpanKind::Skip(first, second) => {
634 let span = first.call(state)?;
635 second.call(state)?;
636 Some(span)
637 }
638
639 SpanKind::Next(first, second) => {
640 first.call(state)?;
641 second.call(state)
642 }
643
644 SpanKind::Not(main, negated) => {
645 let span = main.call(state)?;
646 let checkpoint = state.offset;
647 let saved_furthest = state.furthest_offset;
648 if negated.call(state).is_none() {
649 state.offset = checkpoint;
650 state.furthest_offset = saved_furthest;
651 return Some(span);
652 }
653 state.offset = checkpoint;
654 state.furthest_offset = saved_furthest;
655 None
656 }
657
658 SpanKind::Minus(main, excluded) => {
659 let checkpoint = state.offset;
660 let saved_furthest = state.furthest_offset;
661 if excluded.call(state).is_some() {
662 state.offset = checkpoint;
663 state.furthest_offset = saved_furthest;
664 return None;
665 }
666 state.offset = checkpoint;
667 state.furthest_offset = saved_furthest;
668 main.call(state)
669 }
670
671 SpanKind::LookAhead(main, lookahead) => {
672 let span = main.call(state)?;
673 let offset_after = state.offset;
674 let result = lookahead.call(state);
675 state.offset = offset_after;
676 result?;
677 Some(span)
678 }
679
680 SpanKind::Negate(inner) => {
681 let checkpoint = state.offset;
682 let saved_furthest = state.furthest_offset;
683 if inner.call(state).is_none() {
684 state.offset = checkpoint;
685 state.furthest_offset = saved_furthest;
686 return Some(Span::new(checkpoint, checkpoint, state.src));
687 }
688 state.offset = checkpoint;
689 state.furthest_offset = saved_furthest;
690 None
691 }
692
693 SpanKind::Peek(inner) => {
694 let checkpoint = state.offset;
695 let saved_furthest = state.furthest_offset;
696 let span = inner.call(state)?;
697 state.offset = checkpoint;
698 state.furthest_offset = saved_furthest;
699 Some(span)
700 }
701
702 SpanKind::Eof => {
703 if state.is_at_end() {
704 Some(Span::new(state.offset, state.offset, state.src))
705 } else {
706 #[cfg(feature = "diagnostics")]
707 if let Some(lbl) = self.label {
708 state.add_expected(lbl);
709 }
710 None
711 }
712 }
713
714 SpanKind::Boxed(inner) => inner.call(state),
715 }
716 }
717
718}
719
720mod span_scanner;
721pub(super) use span_scanner::SpanScanner;
722
723mod methods;
724
725mod constructors;
726pub use constructors::*;