litcheck_filecheck/ast/
pattern.rs

1use std::cmp::Ordering;
2
3use crate::{
4    common::*,
5    expr::{TypedVariable, ValueType},
6};
7
8#[derive(Debug, Copy, Clone)]
9pub enum Capture {
10    /// Ignore the capture
11    Ignore(SourceSpan),
12    /// Capture the entire match, but without a name
13    All(Span<ValueType>),
14    /// Capture the entire match, and bind it with the given name and type
15    Implicit(TypedVariable),
16    /// Capture a specific named group, and bind it with a different name and type
17    Mapped { group: Symbol, with: TypedVariable },
18    /// Capture a specific group with the given name and type
19    Explicit(TypedVariable),
20}
21impl Default for Capture {
22    #[inline(always)]
23    fn default() -> Self {
24        Self::Ignore(SourceSpan::from(0..0))
25    }
26}
27impl Eq for Capture {}
28impl PartialEq for Capture {
29    fn eq(&self, other: &Self) -> bool {
30        match (self, other) {
31            (Self::Ignore(_), Self::Ignore(_)) => true,
32            (Self::All(_), Self::All(_)) => true,
33            (Self::Implicit(l), Self::Implicit(r)) => l == r,
34            (Self::Mapped { group: gl, with: l }, Self::Mapped { group: gr, with: r }) => {
35                gl == gr && l == r
36            }
37            (Self::Explicit(l), Self::Explicit(r)) => l == r,
38            _ => false,
39        }
40    }
41}
42impl PartialOrd for Capture {
43    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
44        Some(self.cmp(other))
45    }
46}
47impl Ord for Capture {
48    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
49        match (self, other) {
50            (Self::Ignore(_), Self::Ignore(_)) => Ordering::Equal,
51            (Self::Ignore(_), _) => Ordering::Less,
52            (_, Self::Ignore(_)) => Ordering::Greater,
53            (Self::All(_), Self::All(_)) => Ordering::Equal,
54            (Self::All(_), _) => Ordering::Less,
55            (_, Self::All(_)) => Ordering::Greater,
56            (Self::Implicit(l), Self::Implicit(r)) => l.cmp(r),
57            (Self::Implicit(_), _) => Ordering::Less,
58            (_, Self::Implicit(_)) => Ordering::Greater,
59            (Self::Mapped { with: l, group: gl }, Self::Mapped { with: r, group: gr }) => {
60                l.cmp(r).then(gl.cmp(gr))
61            }
62            (Self::Mapped { with: l, .. }, Self::Explicit(r)) => l.cmp(r).then(Ordering::Less),
63            (Self::Explicit(l), Self::Mapped { with: r, .. }) => l.cmp(r).then(Ordering::Greater),
64            (Self::Explicit(l), Self::Explicit(r)) => l.cmp(r),
65        }
66    }
67}
68impl Capture {
69    pub fn name(&self) -> Option<Symbol> {
70        match self {
71            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => {
72                Some(tv.name.into_inner())
73            }
74            Self::Ignore(_) | Self::All(_) => None,
75        }
76    }
77
78    pub fn group_name(&self) -> Option<Symbol> {
79        match self {
80            Self::Mapped { group, .. } => Some(*group),
81            Self::Explicit(tv) => Some(tv.name.into_inner()),
82            Self::Ignore(_) | Self::All(_) | Self::Implicit(_) => None,
83        }
84    }
85
86    pub fn value_type(&self) -> ValueType {
87        match self {
88            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => tv.ty,
89            Self::All(t) => t.into_inner(),
90            Self::Ignore(_) => ValueType::String,
91        }
92    }
93}
94impl Spanned for Capture {
95    fn span(&self) -> SourceSpan {
96        match self {
97            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => {
98                tv.name.span()
99            }
100            Self::All(span) => span.span(),
101            Self::Ignore(span) => *span,
102        }
103    }
104}
105
106#[derive(Debug, Clone, PartialEq, Eq)]
107pub struct RegexPattern<'a> {
108    pub pattern: Span<Cow<'a, str>>,
109    pub captures: SmallVec<[Capture; 1]>,
110}
111impl<'a> RegexPattern<'a> {
112    pub fn new(pattern: Span<Cow<'a, str>>) -> Self {
113        Self {
114            pattern,
115            captures: smallvec![],
116        }
117    }
118
119    pub fn is_empty(&self) -> bool {
120        self.pattern.is_empty()
121    }
122
123    pub fn len(&self) -> usize {
124        self.pattern.len()
125    }
126}
127impl<'a> AsRef<str> for RegexPattern<'a> {
128    fn as_ref(&self) -> &str {
129        self.pattern.as_ref()
130    }
131}
132impl<'a> Spanned for RegexPattern<'a> {
133    fn span(&self) -> SourceSpan {
134        self.pattern.span()
135    }
136}
137
138/// A pattern prefix represents the first distinct
139/// subpattern of the overall pattern which can be
140/// matched independently of the rest of the pattern.
141///
142/// Prefixes are used when constructing sets of patterns
143/// to find overlapping prefixes that can be collapsed
144/// into more efficient searchers for matching.
145#[derive(Clone)]
146pub enum Prefix<'a> {
147    /// The entire pattern is empty
148    Empty(SourceSpan),
149    /// The entire pattern is a literal string
150    Literal(Span<Cow<'a, str>>),
151    /// The pattern is a literal string, but only a subset is the prefix
152    Substring(Span<Cow<'a, str>>),
153    /// The prefix is a simple regular expression
154    Regex(RegexPattern<'a>),
155    /// The prefix contains a match block/substitution that cannot be
156    /// reduced to a regular expression or literal prefix.
157    Match(Cow<'a, Match<'a>>),
158}
159impl<'a> Spanned for Prefix<'a> {
160    fn span(&self) -> SourceSpan {
161        match self {
162            Self::Empty(span) => *span,
163            Self::Literal(spanned) | Self::Substring(spanned) => spanned.span(),
164            Self::Regex(spanned) => spanned.span(),
165            Self::Match(spanned) => spanned.span(),
166        }
167    }
168}
169impl<'a> Prefix<'a> {
170    pub fn as_str(&self) -> Option<&str> {
171        match self {
172            Self::Empty(_) => Some(""),
173            Self::Literal(s) | Self::Substring(s) => Some(s.as_ref()),
174            Self::Regex(regex) => Some(regex.pattern.as_ref()),
175            Self::Match(_) => None,
176        }
177    }
178}
179impl<'a> Eq for Prefix<'a> {}
180impl<'a> PartialEq for Prefix<'a> {
181    fn eq(&self, other: &Self) -> bool {
182        self.cmp(other).is_eq()
183    }
184}
185impl<'a> PartialOrd for Prefix<'a> {
186    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
187        Some(self.cmp(other))
188    }
189}
190impl<'a> Ord for Prefix<'a> {
191    fn cmp(&self, other: &Self) -> Ordering {
192        match (self, other) {
193            (Self::Match(a), Self::Match(b)) => a.cmp(b),
194            (Self::Match(_), _) => Ordering::Greater,
195            (_, Self::Match(_)) => Ordering::Less,
196            (
197                Self::Regex(RegexPattern {
198                    pattern: ap,
199                    captures: ac,
200                }),
201                Self::Regex(RegexPattern {
202                    pattern: bp,
203                    captures: bc,
204                }),
205            ) if !ac.is_empty() && !bc.is_empty() => ap.cmp(bp).then_with(|| ac.cmp(bc)),
206            (
207                Self::Regex(RegexPattern {
208                    pattern: ap,
209                    captures: ac,
210                }),
211                b,
212            ) if !ac.is_empty() => ap.as_ref().cmp(b.as_str().unwrap()).then(Ordering::Greater),
213            (
214                a,
215                Self::Regex(RegexPattern {
216                    pattern: bp,
217                    captures: bc,
218                }),
219            ) if !bc.is_empty() => a.as_str().unwrap().cmp(bp.as_ref()).then(Ordering::Less),
220            (a, b) => a.as_str().unwrap().cmp(b.as_str().unwrap()),
221        }
222    }
223}
224
225/// A check pattern is the part of a check line which must match in the check file somewhere
226#[derive(Debug)]
227pub enum CheckPattern<'a> {
228    /// There is no content, we're at the end of line
229    Empty(SourceSpan),
230    /// The entire pattern is a single raw string
231    Literal(Span<Cow<'a, str>>),
232    /// The entire pattern is a single regex string
233    Regex(RegexPattern<'a>),
234    /// The pattern is some mix of literal parts and match rules
235    Match(Span<Vec<CheckPatternPart<'a>>>),
236}
237impl<'a> PartialEq for CheckPattern<'a> {
238    fn eq(&self, other: &Self) -> bool {
239        match (self, other) {
240            (Self::Empty(_), Self::Empty(_)) => true,
241            (Self::Literal(l), Self::Literal(r)) => l == r,
242            (Self::Regex(l), Self::Regex(r)) => l == r,
243            (Self::Match(l), Self::Match(r)) => l == r,
244            _ => false,
245        }
246    }
247}
248impl<'a> CheckPattern<'a> {
249    pub fn is_empty(&self) -> bool {
250        match self {
251            Self::Empty(_) => true,
252            Self::Literal(ref spanned) => spanned.is_empty(),
253            Self::Regex(ref spanned) => spanned.is_empty(),
254            Self::Match(parts) => parts.is_empty(),
255        }
256    }
257
258    pub fn locate_variables(&self) -> impl Iterator<Item = SourceSpan> + '_ {
259        CheckPatternVarIter::Pattern(self)
260    }
261
262    pub fn prefix(&self) -> Prefix<'a> {
263        match self {
264            Self::Literal(literal) => Prefix::Literal(literal.clone()),
265            Self::Regex(pattern) => Prefix::Regex(pattern.clone()),
266            Self::Match(parts) => match &parts[0] {
267                CheckPatternPart::Literal(literal) => Prefix::Substring(literal.clone()),
268                CheckPatternPart::Regex(pattern) => Prefix::Regex(pattern.clone()),
269                CheckPatternPart::Match(Match::Numeric {
270                    span,
271                    format,
272                    capture: None,
273                    expr: None,
274                    ..
275                }) => Prefix::Regex(RegexPattern::new(Span::new(
276                    *span,
277                    format.pattern_nocapture(),
278                ))),
279                CheckPatternPart::Match(Match::Numeric {
280                    span,
281                    format,
282                    capture: Some(name),
283                    expr: None,
284                    ..
285                }) => Prefix::Regex(RegexPattern {
286                    pattern: Span::new(*span, format.pattern(None)),
287                    captures: smallvec![Capture::Implicit(TypedVariable {
288                        name: *name,
289                        ty: ValueType::Number(*format),
290                    })],
291                }),
292                CheckPatternPart::Match(Match::Substitution {
293                    pattern: Some(pattern),
294                    name,
295                    ..
296                }) => Prefix::Regex(RegexPattern {
297                    pattern: pattern.clone(),
298                    captures: smallvec![Capture::Implicit(TypedVariable {
299                        name: *name,
300                        ty: ValueType::String,
301                    })],
302                }),
303                CheckPatternPart::Match(part) => Prefix::Match(Cow::Owned(part.clone())),
304            },
305            Self::Empty(span) => Prefix::Empty(*span),
306        }
307    }
308
309    pub fn pop_prefix(&mut self) -> Prefix<'a> {
310        use std::collections::VecDeque;
311
312        match self {
313            Self::Literal(ref mut literal) => {
314                let span = literal.span();
315                let result = Prefix::Literal(core::mem::replace(
316                    literal,
317                    Span::new(span, Cow::Borrowed("")),
318                ));
319                *self = Self::Empty(span);
320                result
321            }
322            Self::Regex(ref mut pattern) => {
323                let span = pattern.span();
324                let result = Prefix::Regex(core::mem::replace(
325                    pattern,
326                    RegexPattern::new(Span::new(span, Cow::Borrowed(""))),
327                ));
328                *self = Self::Empty(span);
329                result
330            }
331            Self::Match(ref mut parts) => {
332                let span = parts.span();
333                let mut ps = VecDeque::<CheckPatternPart<'a>>::from(core::mem::take(&mut **parts));
334                let prefix = match ps.pop_front().unwrap() {
335                    CheckPatternPart::Literal(literal) => Prefix::Substring(literal),
336                    CheckPatternPart::Regex(pattern) => Prefix::Regex(pattern),
337                    CheckPatternPart::Match(Match::Numeric {
338                        span,
339                        format,
340                        capture: None,
341                        expr: None,
342                        ..
343                    }) => Prefix::Regex(RegexPattern::new(Span::new(
344                        span,
345                        format.pattern_nocapture(),
346                    ))),
347                    CheckPatternPart::Match(Match::Numeric {
348                        span,
349                        format,
350                        capture: Some(name),
351                        expr: None,
352                        ..
353                    }) => Prefix::Regex(RegexPattern {
354                        pattern: Span::new(span, format.pattern_nocapture()),
355                        captures: smallvec![Capture::Implicit(TypedVariable {
356                            name,
357                            ty: ValueType::Number(format),
358                        })],
359                    }),
360                    CheckPatternPart::Match(Match::Substitution {
361                        pattern: Some(pattern),
362                        name,
363                        ..
364                    }) => Prefix::Regex(RegexPattern {
365                        pattern,
366                        captures: smallvec![Capture::Implicit(TypedVariable {
367                            name,
368                            ty: ValueType::String,
369                        })],
370                    }),
371                    CheckPatternPart::Match(part) => Prefix::Match(Cow::Owned(part)),
372                };
373                if ps.is_empty() {
374                    *self = Self::Empty(span);
375                } else {
376                    **parts = ps.into();
377                }
378                prefix
379            }
380            Self::Empty(span) => Prefix::Empty(*span),
381        }
382    }
383
384    pub fn is_literal(&self) -> bool {
385        match self {
386            Self::Empty(_) | Self::Literal(_) => true,
387            Self::Regex(_) => false,
388            Self::Match(ref parts) => parts
389                .iter()
390                .all(|p| matches!(p, CheckPatternPart::Literal(_))),
391        }
392    }
393
394    pub fn is_regex_compatible(&self) -> bool {
395        match self {
396            Self::Empty(_) | Self::Literal(_) | Self::Regex(_) => true,
397            Self::Match(ref parts) => parts.iter().all(|p| p.is_regex_compatible()),
398        }
399    }
400
401    /// Compacts this pattern into fewer parts where possible
402    pub fn compact(&mut self, interner: &StringInterner) {
403        use std::collections::VecDeque;
404
405        fn convert_to_regex(buffer: &mut String, padding: usize) {
406            let min_capacity = padding
407                + buffer
408                    .chars()
409                    .map(|c| {
410                        if regex_syntax::is_meta_character(c) {
411                            2
412                        } else {
413                            1
414                        }
415                    })
416                    .sum::<usize>();
417            let prev = core::mem::replace(buffer, String::with_capacity(min_capacity));
418            regex_syntax::escape_into(&prev, buffer);
419        }
420
421        match self {
422            Self::Match(ref empty) if empty.is_empty() => {
423                let span = empty.span();
424                *self = Self::Empty(span);
425            }
426            Self::Match(ref mut compacted) => {
427                let span = compacted.span();
428                let mut pattern_span = span.range();
429                let mut parts = VecDeque::from(core::mem::take(&mut **compacted));
430                let mut pattern = String::new();
431                let mut captures = SmallVec::<[Capture; 1]>::new();
432                let mut is_literal_mode = true;
433                while let Some(mut part) = parts.pop_front() {
434                    match part {
435                        CheckPatternPart::Literal(part) if is_literal_mode => {
436                            pattern_span.end = part.end();
437                            pattern.push_str(part.as_ref());
438                        }
439                        CheckPatternPart::Literal(part) => {
440                            pattern_span.end = part.end();
441                            regex_syntax::escape_into(part.as_ref(), &mut pattern);
442                        }
443                        CheckPatternPart::Regex(RegexPattern {
444                            pattern: part,
445                            captures: ref mut part_captures,
446                        }) => {
447                            let (span, part) = part.into_parts();
448                            pattern_span.end = span.end();
449                            captures.append(part_captures);
450                            if is_literal_mode {
451                                is_literal_mode = false;
452                                convert_to_regex(&mut pattern, part.len())
453                            }
454                            pattern.push_str(part.as_ref());
455                        }
456                        CheckPatternPart::Match(Match::Substitution {
457                            pattern: Some(part),
458                            name,
459                            span,
460                        }) => {
461                            pattern_span.end = span.end();
462                            let part = part.into_inner();
463                            let group_name = interner.resolve(name.into_inner());
464                            if is_literal_mode {
465                                is_literal_mode = false;
466                                convert_to_regex(&mut pattern, 6 + group_name.len() + part.len());
467                            }
468                            pattern.push_str("(?P<");
469                            pattern.push_str(group_name);
470                            pattern.push('>');
471                            pattern.push_str(part.as_ref());
472                            pattern.push(')');
473                            captures.push(Capture::Explicit(TypedVariable {
474                                name,
475                                ty: ValueType::String,
476                            }));
477                        }
478                        CheckPatternPart::Match(Match::Numeric {
479                            expr: None,
480                            capture: None,
481                            span,
482                            format,
483                            ..
484                        }) => {
485                            pattern_span.end = span.end();
486                            let format_pattern = format.pattern_nocapture();
487                            if is_literal_mode {
488                                is_literal_mode = false;
489                                convert_to_regex(&mut pattern, format_pattern.len());
490                            }
491                            pattern.push_str(&format_pattern);
492                        }
493                        CheckPatternPart::Match(Match::Numeric {
494                            expr: None,
495                            capture: Some(name),
496                            span,
497                            format,
498                            ..
499                        }) => {
500                            pattern_span.end = span.end();
501                            let group_name = interner.resolve(name.into_inner());
502                            let format_pattern = format.pattern(Some(group_name));
503                            if is_literal_mode {
504                                is_literal_mode = false;
505                                convert_to_regex(&mut pattern, format_pattern.len());
506                            }
507                            pattern.push_str(&format_pattern);
508                            captures.push(Capture::Explicit(TypedVariable {
509                                name,
510                                ty: ValueType::Number(format),
511                            }));
512                        }
513                        part @ CheckPatternPart::Match(_) => {
514                            let span = part.span();
515                            if pattern.is_empty() {
516                                compacted.push(part);
517                                is_literal_mode = true;
518                                pattern.clear();
519                                captures.clear();
520                                pattern_span.end = span.end();
521                                pattern_span.start = pattern_span.end;
522                                continue;
523                            }
524
525                            if is_literal_mode {
526                                compacted.push(CheckPatternPart::Literal(Span::new(
527                                    pattern_span,
528                                    Cow::Owned(core::mem::take(&mut pattern)),
529                                )));
530                            } else {
531                                let captures = core::mem::take(&mut captures);
532                                compacted.push(CheckPatternPart::Regex(RegexPattern {
533                                    pattern: Span::new(
534                                        pattern_span,
535                                        Cow::Owned(core::mem::take(&mut pattern)),
536                                    ),
537                                    captures,
538                                }));
539                                is_literal_mode = true;
540                            }
541
542                            compacted.push(part);
543                            pattern_span.end = span.end();
544                            pattern_span.start = pattern_span.end;
545                        }
546                    }
547                }
548
549                if compacted.is_empty() {
550                    let compacted = if is_literal_mode {
551                        CheckPattern::Literal(Span::new(
552                            pattern_span,
553                            Cow::Owned(core::mem::take(&mut pattern)),
554                        ))
555                    } else {
556                        CheckPattern::Regex(RegexPattern {
557                            pattern: Span::new(
558                                pattern_span,
559                                Cow::Owned(core::mem::take(&mut pattern)),
560                            ),
561                            captures,
562                        })
563                    };
564                    *self = compacted;
565                    return;
566                }
567
568                if !pattern.is_empty() {
569                    if is_literal_mode {
570                        compacted.push(CheckPatternPart::Literal(Span::new(
571                            pattern_span,
572                            Cow::Owned(core::mem::take(&mut pattern)),
573                        )));
574                    } else {
575                        compacted.push(CheckPatternPart::Regex(RegexPattern {
576                            pattern: Span::new(
577                                pattern_span,
578                                Cow::Owned(core::mem::take(&mut pattern)),
579                            ),
580                            captures,
581                        }));
582                    }
583                }
584            }
585            Self::Empty(_) | Self::Literal(_) | Self::Regex(_) => (),
586        }
587    }
588
589    /// Converts this pattern into a string which can be used as a
590    /// regular expression, even if the pattern was not originally
591    /// expressed as a regular expression.
592    ///
593    /// Returns Err with the original pattern (potentially compacted),
594    /// if the conversion is not possible. Otherwise, returns Ok
595    /// with the built regular expression pattern.
596    pub fn into_regex_pattern(
597        mut self,
598        interner: &StringInterner,
599    ) -> Result<RegexPattern<'a>, Self> {
600        self.compact(interner);
601
602        match self {
603            Self::Literal(s) => Ok(RegexPattern::new(s)),
604            Self::Regex(regex) => Ok(regex),
605            other => Err(other),
606        }
607    }
608}
609impl<'a> Spanned for CheckPattern<'a> {
610    fn span(&self) -> SourceSpan {
611        match self {
612            Self::Empty(span) => *span,
613            Self::Literal(ref spanned) => spanned.span(),
614            Self::Regex(ref spanned) => spanned.span(),
615            Self::Match(ref spanned) => spanned.span(),
616        }
617    }
618}
619impl<'a> From<Vec<CheckPatternPart<'a>>> for CheckPattern<'a> {
620    fn from(mut parts: Vec<CheckPatternPart<'a>>) -> Self {
621        match parts.len() {
622            0 => CheckPattern::Empty(SourceSpan::from(0..0)),
623            1 => match parts.pop().unwrap() {
624                CheckPatternPart::Literal(lit) => Self::Literal(lit),
625                CheckPatternPart::Regex(re) => Self::Regex(re),
626                part @ CheckPatternPart::Match(_) => {
627                    Self::Match(Span::new(part.span(), vec![part]))
628                }
629            },
630            _ => {
631                let start = parts.first().unwrap().span().offset();
632                let last_span = parts.last().unwrap().span();
633                let end = last_span.offset() + last_span.len();
634                Self::Match(Span::new(SourceSpan::from(start..end), parts))
635            }
636        }
637    }
638}
639
640/// A check line is broken up into segments when either `[[` `]]`,
641/// or `{{` `}}` is encountered, for substitutions/captures and regex
642/// matches respectively; with the before and after parts being literal
643/// (and optional). As such we have three types of segments/parts that
644/// we can observe on a line
645#[derive(Debug, PartialEq, Eq)]
646pub enum CheckPatternPart<'a> {
647    /// This part consists of a match rule to be evaluated while matching
648    Match(Match<'a>),
649    /// This part is a raw literal string
650    Literal(Span<Cow<'a, str>>),
651    /// This part is a regex pattern
652    Regex(RegexPattern<'a>),
653}
654impl<'a> CheckPatternPart<'a> {
655    pub fn unwrap_str(self) -> Span<Cow<'a, str>> {
656        match self {
657            Self::Literal(s) => s,
658            part => panic!("expected a literal pattern, got {part:#?}"),
659        }
660    }
661
662    pub fn uses_variable(&self) -> bool {
663        match self {
664            Self::Literal(_) | Self::Regex(_) => false,
665            Self::Match(Match::Numeric {
666                expr: None,
667                capture: None,
668                ..
669            }) => false,
670            Self::Match(_) => true,
671        }
672    }
673
674    pub fn is_regex_compatible(&self) -> bool {
675        match self {
676            Self::Literal(_)
677            | Self::Regex(_)
678            | Self::Match(Match::Substitution {
679                pattern: Some(_), ..
680            })
681            | Self::Match(Match::Numeric { expr: None, .. }) => true,
682            Self::Match(_) => false,
683        }
684    }
685}
686impl<'a> Spanned for CheckPatternPart<'a> {
687    fn span(&self) -> SourceSpan {
688        match self {
689            Self::Match(m) => m.span(),
690            Self::Literal(spanned) => spanned.span(),
691            Self::Regex(spanned) => spanned.span(),
692        }
693    }
694}
695
696/// This type represents a match rule wrapped in `[[` `]]`
697#[derive(Debug, Clone)]
698pub enum Match<'a> {
699    /// Match the given regular expression pattern, optionally binding `name`
700    /// to the matched value.
701    ///
702    /// Corresponds to expressions such as `[[REG]]` and `[[REG:r[0-9]+]]`.
703    ///
704    /// The precise format of this match type is `[[<name>:<pattern>]]`, where:
705    ///
706    /// * `<name>` is a local variable name of the form `[A-Za-z_][A-Za-z0-9_]*`,
707    /// or a global variable name (prefixed with `$`). However, you are not permitted
708    /// to (re)bind global variables.
709    ///
710    /// * `:<pattern>`, is any valid, non-empty, regular expression pattern. When present,
711    /// it changes the semantics of this match type from string substitution to string
712    /// capture - i.e. `name` will be bound to the matched input string.
713    ///
714    /// If `:<pattern>` is not present, then the entire `[[<name>]]` block will be
715    /// substituted with the value of `<name>` as a literal pattern. The value will
716    /// be formatted according to its type.
717    ///
718    /// Variables bound using this syntax are available immediately on the same line, you
719    /// can do things like `CHECK: op [[REG:r[0-9]+]], [[REG]]` to bind `REG` to the register
720    /// name of the first operand of `op`, e.g., `r1`; and verify that the same register is
721    /// used as the second operand.
722    ///
723    /// NOTE: You should prefer the standard regular expression pattern matching syntax,
724    /// i.e. `{{<pattern>}}` if you don't need to bind a variable.
725    Substitution {
726        span: SourceSpan,
727        name: VariableName,
728        pattern: Option<Span<Cow<'a, str>>>,
729    },
730    /// Match the given numeric pattern, and optionally defines a variable if the
731    /// match succeeds.
732    ///
733    /// Corresponds to expressions such as `[[#]]` or `[[#%.8X]]` or `[[#REG + 1]]`,
734    /// as well as `[[#REG:]]` and `[[#%X,OFFSET:12]]`. The former are matches,
735    /// while the latter both match and define the given variable.
736    ///
737    /// The unified format is `[[#%<fmtspec>,<NUMVAR>: <constraint> <expr]]` where:
738    ///
739    /// * `%<fmtspec>` is the same format specifier as used for defining a variable, but
740    /// in this context it indicates how the numeric value should be matched. It is optional,
741    /// and if not present, both components of the format spec are inferred from the matching
742    /// format of the numeric variables used by the expression constraint (if any), and
743    /// defaults to `%u` (unsigned, no leading zeros) if no numeric variable is used. In
744    /// case of conflict between format specifiers of several numeric variables, the
745    /// conversion specifier becomes mandatory, but the precision specifier remains optional.
746    ///
747    /// * `<NUMVAR>:`, when present, indicates that `NUMVAR` will be (re)bound to the matched
748    /// value, if the match succeeds. If not present, no variable is defined.
749    ///
750    /// * `<constraint>` describes how the value to match must relate to the value of the
751    /// given expression. Currently, the only constraint type is `==` for equality. If present,
752    /// `<expr>` is mandatory; however the inverse is not true, `<expr>` can be provided
753    /// without `<constraint>`, implying a default equality constraint.
754    ///
755    /// * `<expr>` is an expression. An expression is in turn recursively defined as:
756    ///
757    ///   - A numeric operand
758    ///   - An expression followed by an operator and a numeric operand
759    ///
760    ///   A numeric operand is a previously defined numeric variable, an integer literal,
761    ///   or one of a set of built-in functions. Whitespace are allowed around these elements.
762    ///   Numeric operands are 64-bit values. Overflow and underflow are rejected. The original
763    ///   `lit` does not support operator precedence, but `litcheck` supports the standard precedence
764    ///   of the supported operators, and parentheses can be used to manually manage precedence.
765    ///
766    ///   The operators supported are:
767    ///
768    ///   - `+`, addition
769    ///   - `-`, subtraction
770    ///
771    ///   The built-in functions supported are:
772    ///
773    ///   - `add`, addition
774    ///   - `sub`, subtraction
775    ///   - `mul`, multiplication
776    ///   - `div`, integer division
777    ///   - `min`, minimum
778    ///   - `max`, maximum
779    ///
780    /// All components can be omitted except the `#`, i.e. `[[#]]` is a valid numeric match,
781    /// which defaults to matching an unsigned integer, with no leading zeros, of up to 64
782    /// bit precision.
783    Numeric {
784        span: SourceSpan,
785        /// The format of the value to match.
786        ///
787        /// If not specified, it is implied by the format
788        /// of any numeric operands in `expr`, otherwise it
789        /// defaults to an unsigned integer with no leading zeros.
790        format: NumberFormat,
791        /// If set, contains the name of the variable to bind to
792        /// the matched value if the match succeeds.
793        capture: Option<VariableName>,
794        /// If specified, this changes the meaning of `expr`
795        /// in relation to the matched value.
796        constraint: Constraint,
797        /// The numeric expression to evaluate
798        ///
799        /// If `constraint` is not set, this expression
800        /// produces a value which must match the input.
801        expr: Option<Expr>,
802    },
803}
804impl<'a> PartialOrd for Match<'a> {
805    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
806        Some(self.cmp(other))
807    }
808}
809impl<'a> Ord for Match<'a> {
810    fn cmp(&self, other: &Self) -> Ordering {
811        match (self, other) {
812            (
813                Self::Substitution {
814                    name: an,
815                    pattern: Some(ap),
816                    ..
817                },
818                Self::Substitution {
819                    name: bn,
820                    pattern: Some(bp),
821                    ..
822                },
823            ) => ap.cmp(bp).then_with(|| an.cmp(bn)),
824            (
825                Self::Substitution {
826                    pattern: Some(_), ..
827                },
828                Self::Substitution { pattern: None, .. },
829            ) => Ordering::Less,
830            (
831                Self::Substitution { pattern: None, .. },
832                Self::Substitution {
833                    pattern: Some(_), ..
834                },
835            ) => Ordering::Greater,
836            (Self::Substitution { name: an, .. }, Self::Substitution { name: bn, .. }) => {
837                an.cmp(bn)
838            }
839            (
840                Self::Numeric {
841                    format: af,
842                    capture: None,
843                    expr: aexpr,
844                    ..
845                },
846                Self::Numeric {
847                    format: bf,
848                    capture: None,
849                    expr: bexpr,
850                    ..
851                },
852            ) => af
853                .pattern(None)
854                .cmp(&bf.pattern(None))
855                .then_with(|| aexpr.cmp(bexpr)),
856            (
857                Self::Numeric { capture: None, .. },
858                Self::Numeric {
859                    capture: Some(_), ..
860                },
861            ) => Ordering::Less,
862            (
863                Self::Numeric {
864                    capture: Some(_), ..
865                },
866                Self::Numeric { capture: None, .. },
867            ) => Ordering::Greater,
868            (
869                Self::Numeric {
870                    format: af,
871                    capture: Some(acap),
872                    expr: aexpr,
873                    ..
874                },
875                Self::Numeric {
876                    format: bf,
877                    capture: Some(bcap),
878                    expr: bexpr,
879                    ..
880                },
881            ) => af
882                .pattern(None)
883                .cmp(&bf.pattern(None))
884                .then_with(|| acap.cmp(bcap))
885                .then_with(|| aexpr.cmp(bexpr)),
886            (
887                Self::Substitution {
888                    name,
889                    pattern: Some(pattern),
890                    ..
891                },
892                Self::Numeric {
893                    format,
894                    capture,
895                    expr: None,
896                    ..
897                },
898            ) => AsRef::<str>::as_ref(pattern)
899                .cmp(format.pattern(None).as_ref())
900                .then_with(|| Some(*name).cmp(capture))
901                .then(Ordering::Less),
902            (
903                Self::Numeric {
904                    format,
905                    capture,
906                    expr: None,
907                    ..
908                },
909                Self::Substitution {
910                    name,
911                    pattern: Some(pattern),
912                    ..
913                },
914            ) => format
915                .pattern(None)
916                .as_ref()
917                .cmp(pattern.as_ref())
918                .then_with(|| capture.cmp(&Some(*name)))
919                .then(Ordering::Greater),
920            (Self::Substitution { .. }, _) => Ordering::Less,
921            (_, Self::Substitution { .. }) => Ordering::Greater,
922        }
923    }
924}
925impl<'a> Spanned for Match<'a> {
926    fn span(&self) -> SourceSpan {
927        match self {
928            Self::Numeric { span, .. } | Self::Substitution { span, .. } => *span,
929        }
930    }
931}
932impl<'a> Eq for Match<'a> {}
933impl<'a> PartialEq for Match<'a> {
934    fn eq(&self, other: &Self) -> bool {
935        match (self, other) {
936            (
937                Self::Substitution {
938                    name: an,
939                    pattern: ap,
940                    ..
941                },
942                Self::Substitution {
943                    name: bn,
944                    pattern: bp,
945                    ..
946                },
947            ) => an == bn && ap == bp,
948            (
949                Self::Numeric {
950                    format: af,
951                    capture: acap,
952                    constraint: ac,
953                    expr: aexpr,
954                    ..
955                },
956                Self::Numeric {
957                    format: bf,
958                    capture: bcap,
959                    constraint: bc,
960                    expr: bexpr,
961                    ..
962                },
963            ) => af == bf && acap == bcap && ac == bc && aexpr == bexpr,
964            _ => false,
965        }
966    }
967}
968
969/// Describes available constraints that can be expressed on numeric values
970#[derive(Debug, Copy, Clone, PartialEq, Eq)]
971pub enum Constraint {
972    Eq,
973}
974
975#[derive(Default)]
976enum CheckPatternVarIter<'a, 'iter> {
977    #[default]
978    Empty,
979    Pattern(&'iter CheckPattern<'a>),
980    Regex(&'iter [Capture]),
981    Parts(&'iter [CheckPatternPart<'a>]),
982    Expr {
983        expr: &'iter Expr,
984        parts: &'iter [CheckPatternPart<'a>],
985    },
986    Buffered {
987        buffer: std::collections::VecDeque<SourceSpan>,
988        next: &'iter [CheckPatternPart<'a>],
989    },
990}
991impl<'a, 'iter> Iterator for CheckPatternVarIter<'a, 'iter> {
992    type Item = SourceSpan;
993
994    fn next(&mut self) -> Option<Self::Item> {
995        'outer: loop {
996            match core::mem::take(self) {
997                Self::Empty => break None,
998                Self::Pattern(pattern) => match pattern {
999                    CheckPattern::Empty(_) | CheckPattern::Literal(_) => break None,
1000                    CheckPattern::Regex(ref re) => {
1001                        let (item, rest) = re.captures.split_first()?;
1002                        *self = Self::Regex(rest);
1003                        break Some(item.span());
1004                    }
1005                    CheckPattern::Match(ref parts) => {
1006                        *self = Self::Parts(parts);
1007                        continue;
1008                    }
1009                },
1010                Self::Regex(captures) => {
1011                    let (item, rest) = captures.split_first()?;
1012                    *self = Self::Regex(rest);
1013                    break Some(item.span());
1014                }
1015                Self::Parts(parts) => {
1016                    while let Some((part, parts)) = parts.split_first() {
1017                        match part {
1018                            CheckPatternPart::Literal(_) => break,
1019                            CheckPatternPart::Regex(ref re) => match re.captures.split_first() {
1020                                Some((item, vars)) => {
1021                                    *self = Self::Buffered {
1022                                        buffer: vars.iter().map(|v| v.span()).collect(),
1023                                        next: parts,
1024                                    };
1025                                    break 'outer Some(item.span());
1026                                }
1027                                None => break,
1028                            },
1029                            CheckPatternPart::Match(Match::Substitution { name, .. }) => {
1030                                *self = Self::Parts(parts);
1031                                break 'outer Some(name.span());
1032                            }
1033                            CheckPatternPart::Match(Match::Numeric {
1034                                capture: None,
1035                                expr: None,
1036                                ..
1037                            }) => {
1038                                continue;
1039                            }
1040                            CheckPatternPart::Match(Match::Numeric {
1041                                capture,
1042                                expr: Some(ref expr),
1043                                ..
1044                            }) => {
1045                                *self = Self::Expr { expr, parts };
1046                                if let Some(name) = capture.as_ref() {
1047                                    break 'outer Some(name.span());
1048                                }
1049                                continue 'outer;
1050                            }
1051                            CheckPatternPart::Match(Match::Numeric {
1052                                capture: Some(name),
1053                                expr: None,
1054                                ..
1055                            }) => {
1056                                *self = Self::Parts(parts);
1057                                break 'outer Some(name.span());
1058                            }
1059                        }
1060                    }
1061
1062                    break None;
1063                }
1064                Self::Expr { expr, parts } => {
1065                    let mut worklist = std::collections::VecDeque::with_capacity(2);
1066                    let mut buffer = std::collections::VecDeque::new();
1067                    worklist.push_back(expr);
1068                    loop {
1069                        let expr = worklist.pop_front();
1070                        match expr {
1071                            None => match buffer.pop_front() {
1072                                None => {
1073                                    *self = Self::Parts(parts);
1074                                    continue 'outer;
1075                                }
1076                                Some(span) => {
1077                                    *self = Self::Buffered {
1078                                        buffer,
1079                                        next: parts,
1080                                    };
1081                                    break 'outer Some(span);
1082                                }
1083                            },
1084                            Some(Expr::Num(_)) => {
1085                                continue;
1086                            }
1087                            Some(Expr::Var(name)) => {
1088                                buffer.push_back(name.span());
1089                            }
1090                            Some(Expr::Binary {
1091                                ref lhs, ref rhs, ..
1092                            }) => {
1093                                worklist.push_back(lhs);
1094                                worklist.push_back(rhs);
1095                            }
1096                        }
1097                    }
1098                }
1099                Self::Buffered { mut buffer, next } => match buffer.pop_front() {
1100                    None => {
1101                        *self = Self::Parts(next);
1102                    }
1103                    Some(span) => {
1104                        *self = Self::Buffered { buffer, next };
1105                        break Some(span);
1106                    }
1107                },
1108            }
1109        }
1110    }
1111}