litcheck_filecheck/ast/
pattern.rs

1use std::cmp::Ordering;
2
3use crate::{
4    common::*,
5    expr::{TypedVariable, ValueType},
6};
7
8#[derive(Debug, Copy, Clone)]
9pub enum Capture {
10    /// Ignore the capture
11    Ignore(SourceSpan),
12    /// Capture the entire match, but without a name
13    All(Span<ValueType>),
14    /// Capture the entire match, and bind it with the given name and type
15    Implicit(TypedVariable),
16    /// Capture a specific named group, and bind it with a different name and type
17    Mapped { group: Symbol, with: TypedVariable },
18    /// Capture a specific group with the given name and type
19    Explicit(TypedVariable),
20}
21impl Default for Capture {
22    #[inline(always)]
23    fn default() -> Self {
24        Self::Ignore(SourceSpan::from(0..0))
25    }
26}
27impl Eq for Capture {}
28impl PartialEq for Capture {
29    fn eq(&self, other: &Self) -> bool {
30        match (self, other) {
31            (Self::Ignore(_), Self::Ignore(_)) => true,
32            (Self::All(_), Self::All(_)) => true,
33            (Self::Implicit(l), Self::Implicit(r)) => l == r,
34            (Self::Mapped { group: gl, with: l }, Self::Mapped { group: gr, with: r }) => {
35                gl == gr && l == r
36            }
37            (Self::Explicit(l), Self::Explicit(r)) => l == r,
38            _ => false,
39        }
40    }
41}
42impl PartialOrd for Capture {
43    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
44        Some(self.cmp(other))
45    }
46}
47impl Ord for Capture {
48    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
49        match (self, other) {
50            (Self::Ignore(_), Self::Ignore(_)) => Ordering::Equal,
51            (Self::Ignore(_), _) => Ordering::Less,
52            (_, Self::Ignore(_)) => Ordering::Greater,
53            (Self::All(_), Self::All(_)) => Ordering::Equal,
54            (Self::All(_), _) => Ordering::Less,
55            (_, Self::All(_)) => Ordering::Greater,
56            (Self::Implicit(l), Self::Implicit(r)) => l.cmp(r),
57            (Self::Implicit(_), _) => Ordering::Less,
58            (_, Self::Implicit(_)) => Ordering::Greater,
59            (Self::Mapped { with: l, group: gl }, Self::Mapped { with: r, group: gr }) => {
60                l.cmp(r).then(gl.cmp(gr))
61            }
62            (Self::Mapped { with: l, .. }, Self::Explicit(r)) => l.cmp(r).then(Ordering::Less),
63            (Self::Explicit(l), Self::Mapped { with: r, .. }) => l.cmp(r).then(Ordering::Greater),
64            (Self::Explicit(l), Self::Explicit(r)) => l.cmp(r),
65        }
66    }
67}
68impl Capture {
69    pub fn name(&self) -> Option<Symbol> {
70        self.variable_name().map(|v| v.into_inner())
71    }
72
73    pub fn variable_name(&self) -> Option<VariableName> {
74        match self {
75            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => {
76                Some(tv.name)
77            }
78            Self::Ignore(_) | Self::All(_) => None,
79        }
80    }
81
82    pub fn group_name(&self) -> Option<Symbol> {
83        match self {
84            Self::Mapped { group, .. } => Some(*group),
85            Self::Explicit(tv) => Some(tv.name.into_inner()),
86            Self::Ignore(_) | Self::All(_) | Self::Implicit(_) => None,
87        }
88    }
89
90    pub fn value_type(&self) -> ValueType {
91        match self {
92            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => tv.ty,
93            Self::All(t) => t.into_inner(),
94            Self::Ignore(_) => ValueType::String,
95        }
96    }
97}
98impl Spanned for Capture {
99    fn span(&self) -> SourceSpan {
100        match self {
101            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => {
102                tv.name.span()
103            }
104            Self::All(span) => span.span(),
105            Self::Ignore(span) => *span,
106        }
107    }
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub struct RegexPattern<'a> {
112    pub pattern: Span<Cow<'a, str>>,
113    pub captures: SmallVec<[Capture; 1]>,
114}
115impl<'a> RegexPattern<'a> {
116    pub fn new(pattern: Span<Cow<'a, str>>) -> Self {
117        Self {
118            pattern,
119            captures: smallvec![],
120        }
121    }
122
123    pub fn is_empty(&self) -> bool {
124        self.pattern.is_empty()
125    }
126
127    pub fn len(&self) -> usize {
128        self.pattern.len()
129    }
130}
131impl<'a> AsRef<str> for RegexPattern<'a> {
132    fn as_ref(&self) -> &str {
133        self.pattern.as_ref()
134    }
135}
136impl<'a> Spanned for RegexPattern<'a> {
137    fn span(&self) -> SourceSpan {
138        self.pattern.span()
139    }
140}
141
142/// A pattern prefix represents the first distinct
143/// subpattern of the overall pattern which can be
144/// matched independently of the rest of the pattern.
145///
146/// Prefixes are used when constructing sets of patterns
147/// to find overlapping prefixes that can be collapsed
148/// into more efficient searchers for matching.
149#[derive(Clone)]
150pub enum Prefix<'a> {
151    /// The entire pattern is empty
152    Empty(SourceSpan),
153    /// The entire pattern is a literal string
154    Literal(Span<Cow<'a, str>>),
155    /// The pattern is a literal string, but only a subset is the prefix
156    Substring(Span<Cow<'a, str>>),
157    /// The prefix is a simple regular expression
158    Regex(RegexPattern<'a>),
159    /// The prefix contains a match block/substitution that cannot be
160    /// reduced to a regular expression or literal prefix.
161    Match(Cow<'a, Match<'a>>),
162}
163impl<'a> Spanned for Prefix<'a> {
164    fn span(&self) -> SourceSpan {
165        match self {
166            Self::Empty(span) => *span,
167            Self::Literal(spanned) | Self::Substring(spanned) => spanned.span(),
168            Self::Regex(spanned) => spanned.span(),
169            Self::Match(spanned) => spanned.span(),
170        }
171    }
172}
173impl<'a> Prefix<'a> {
174    pub fn as_str(&self) -> Option<&str> {
175        match self {
176            Self::Empty(_) => Some(""),
177            Self::Literal(s) | Self::Substring(s) => Some(s.as_ref()),
178            Self::Regex(regex) => Some(regex.pattern.as_ref()),
179            Self::Match(_) => None,
180        }
181    }
182}
183impl<'a> Eq for Prefix<'a> {}
184impl<'a> PartialEq for Prefix<'a> {
185    fn eq(&self, other: &Self) -> bool {
186        self.cmp(other).is_eq()
187    }
188}
189impl<'a> PartialOrd for Prefix<'a> {
190    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
191        Some(self.cmp(other))
192    }
193}
194impl<'a> Ord for Prefix<'a> {
195    fn cmp(&self, other: &Self) -> Ordering {
196        match (self, other) {
197            (Self::Match(a), Self::Match(b)) => a.cmp(b),
198            (Self::Match(_), _) => Ordering::Greater,
199            (_, Self::Match(_)) => Ordering::Less,
200            (
201                Self::Regex(RegexPattern {
202                    pattern: ap,
203                    captures: ac,
204                }),
205                Self::Regex(RegexPattern {
206                    pattern: bp,
207                    captures: bc,
208                }),
209            ) if !ac.is_empty() && !bc.is_empty() => ap.cmp(bp).then_with(|| ac.cmp(bc)),
210            (
211                Self::Regex(RegexPattern {
212                    pattern: ap,
213                    captures: ac,
214                }),
215                b,
216            ) if !ac.is_empty() => ap.as_ref().cmp(b.as_str().unwrap()).then(Ordering::Greater),
217            (
218                a,
219                Self::Regex(RegexPattern {
220                    pattern: bp,
221                    captures: bc,
222                }),
223            ) if !bc.is_empty() => a.as_str().unwrap().cmp(bp.as_ref()).then(Ordering::Less),
224            (a, b) => a.as_str().unwrap().cmp(b.as_str().unwrap()),
225        }
226    }
227}
228
229/// A check pattern is the part of a check line which must match in the check file somewhere
230#[derive(Debug)]
231pub enum CheckPattern<'a> {
232    /// There is no content, we're at the end of line
233    Empty(SourceSpan),
234    /// The entire pattern is a single raw string
235    Literal(Span<Cow<'a, str>>),
236    /// The entire pattern is a single regex string
237    Regex(RegexPattern<'a>),
238    /// The pattern is some mix of literal parts and match rules
239    Match(Span<Vec<CheckPatternPart<'a>>>),
240}
241impl<'a> PartialEq for CheckPattern<'a> {
242    fn eq(&self, other: &Self) -> bool {
243        match (self, other) {
244            (Self::Empty(_), Self::Empty(_)) => true,
245            (Self::Literal(l), Self::Literal(r)) => l == r,
246            (Self::Regex(l), Self::Regex(r)) => l == r,
247            (Self::Match(l), Self::Match(r)) => l == r,
248            _ => false,
249        }
250    }
251}
252impl<'a> CheckPattern<'a> {
253    pub fn is_empty(&self) -> bool {
254        match self {
255            Self::Empty(_) => true,
256            Self::Literal(ref spanned) => spanned.is_empty(),
257            Self::Regex(ref spanned) => spanned.is_empty(),
258            Self::Match(parts) => parts.is_empty(),
259        }
260    }
261
262    pub fn locate_variables(&self) -> impl Iterator<Item = SourceSpan> + '_ {
263        CheckPatternVarIter::Pattern(self)
264    }
265
266    pub fn prefix(&self) -> Prefix<'a> {
267        match self {
268            Self::Literal(literal) => Prefix::Literal(literal.clone()),
269            Self::Regex(pattern) => Prefix::Regex(pattern.clone()),
270            Self::Match(parts) => match &parts[0] {
271                CheckPatternPart::Literal(literal) => Prefix::Substring(literal.clone()),
272                CheckPatternPart::Regex(pattern) => Prefix::Regex(pattern.clone()),
273                CheckPatternPart::Match(Match::Numeric {
274                    span,
275                    format,
276                    capture: None,
277                    expr: None,
278                    ..
279                }) => Prefix::Regex(RegexPattern::new(Span::new(
280                    *span,
281                    format.pattern_nocapture(),
282                ))),
283                CheckPatternPart::Match(Match::Numeric {
284                    span,
285                    format,
286                    capture: Some(name),
287                    expr: None,
288                    ..
289                }) => Prefix::Regex(RegexPattern {
290                    pattern: Span::new(*span, format.pattern(None)),
291                    captures: smallvec![Capture::Implicit(TypedVariable {
292                        name: *name,
293                        ty: ValueType::Number(*format),
294                    })],
295                }),
296                CheckPatternPart::Match(Match::Substitution {
297                    pattern: Some(pattern),
298                    name,
299                    ..
300                }) => Prefix::Regex(RegexPattern {
301                    pattern: pattern.clone(),
302                    captures: smallvec![Capture::Implicit(TypedVariable {
303                        name: *name,
304                        ty: ValueType::String,
305                    })],
306                }),
307                CheckPatternPart::Match(part) => Prefix::Match(Cow::Owned(part.clone())),
308            },
309            Self::Empty(span) => Prefix::Empty(*span),
310        }
311    }
312
313    pub fn pop_prefix(&mut self) -> Prefix<'a> {
314        use std::collections::VecDeque;
315
316        match self {
317            Self::Literal(ref mut literal) => {
318                let span = literal.span();
319                let result = Prefix::Literal(core::mem::replace(
320                    literal,
321                    Span::new(span, Cow::Borrowed("")),
322                ));
323                *self = Self::Empty(span);
324                result
325            }
326            Self::Regex(ref mut pattern) => {
327                let span = pattern.span();
328                let result = Prefix::Regex(core::mem::replace(
329                    pattern,
330                    RegexPattern::new(Span::new(span, Cow::Borrowed(""))),
331                ));
332                *self = Self::Empty(span);
333                result
334            }
335            Self::Match(ref mut parts) => {
336                let span = parts.span();
337                let mut ps = VecDeque::<CheckPatternPart<'a>>::from(core::mem::take(&mut **parts));
338                let prefix = match ps.pop_front().unwrap() {
339                    CheckPatternPart::Literal(literal) => Prefix::Substring(literal),
340                    CheckPatternPart::Regex(pattern) => Prefix::Regex(pattern),
341                    CheckPatternPart::Match(Match::Numeric {
342                        span,
343                        format,
344                        capture: None,
345                        expr: None,
346                        ..
347                    }) => Prefix::Regex(RegexPattern::new(Span::new(
348                        span,
349                        format.pattern_nocapture(),
350                    ))),
351                    CheckPatternPart::Match(Match::Numeric {
352                        span,
353                        format,
354                        capture: Some(name),
355                        expr: None,
356                        ..
357                    }) => Prefix::Regex(RegexPattern {
358                        pattern: Span::new(span, format.pattern_nocapture()),
359                        captures: smallvec![Capture::Implicit(TypedVariable {
360                            name,
361                            ty: ValueType::Number(format),
362                        })],
363                    }),
364                    CheckPatternPart::Match(Match::Substitution {
365                        pattern: Some(pattern),
366                        name,
367                        ..
368                    }) => Prefix::Regex(RegexPattern {
369                        pattern,
370                        captures: smallvec![Capture::Implicit(TypedVariable {
371                            name,
372                            ty: ValueType::String,
373                        })],
374                    }),
375                    CheckPatternPart::Match(part) => Prefix::Match(Cow::Owned(part)),
376                };
377                if ps.is_empty() {
378                    *self = Self::Empty(span);
379                } else {
380                    **parts = ps.into();
381                }
382                prefix
383            }
384            Self::Empty(span) => Prefix::Empty(*span),
385        }
386    }
387
388    pub fn is_literal(&self) -> bool {
389        match self {
390            Self::Empty(_) | Self::Literal(_) => true,
391            Self::Regex(_) => false,
392            Self::Match(ref parts) => parts
393                .iter()
394                .all(|p| matches!(p, CheckPatternPart::Literal(_))),
395        }
396    }
397
398    pub fn is_regex_compatible(&self) -> bool {
399        match self {
400            Self::Empty(_) | Self::Literal(_) | Self::Regex(_) => true,
401            Self::Match(ref parts) => parts.iter().all(|p| p.is_regex_compatible()),
402        }
403    }
404
405    /// Compacts this pattern into fewer parts where possible
406    pub fn compact(&mut self, interner: &StringInterner) {
407        use std::collections::VecDeque;
408
409        fn convert_to_regex(buffer: &mut String, padding: usize) {
410            let min_capacity = padding
411                + buffer
412                    .chars()
413                    .map(|c| {
414                        if regex_syntax::is_meta_character(c) {
415                            2
416                        } else {
417                            1
418                        }
419                    })
420                    .sum::<usize>();
421            let prev = core::mem::replace(buffer, String::with_capacity(min_capacity));
422            regex_syntax::escape_into(&prev, buffer);
423        }
424
425        match self {
426            Self::Match(ref empty) if empty.is_empty() => {
427                let span = empty.span();
428                *self = Self::Empty(span);
429            }
430            Self::Match(ref mut compacted) => {
431                let span = compacted.span();
432                let mut pattern_span = span.range();
433                let mut parts = VecDeque::from(core::mem::take(&mut **compacted));
434                let mut pattern = String::new();
435                let mut captures = SmallVec::<[Capture; 1]>::new();
436                let mut is_literal_mode = true;
437                while let Some(mut part) = parts.pop_front() {
438                    match part {
439                        CheckPatternPart::Literal(part) if is_literal_mode => {
440                            pattern_span.end = part.end();
441                            pattern.push_str(part.as_ref());
442                        }
443                        CheckPatternPart::Literal(part) => {
444                            pattern_span.end = part.end();
445                            regex_syntax::escape_into(part.as_ref(), &mut pattern);
446                        }
447                        CheckPatternPart::Regex(RegexPattern {
448                            pattern: part,
449                            captures: ref mut part_captures,
450                        }) => {
451                            let (span, part) = part.into_parts();
452                            pattern_span.end = span.end();
453                            captures.append(part_captures);
454                            if is_literal_mode {
455                                is_literal_mode = false;
456                                convert_to_regex(&mut pattern, part.len())
457                            }
458                            pattern.push_str(part.as_ref());
459                        }
460                        CheckPatternPart::Match(Match::Substitution {
461                            pattern: Some(part),
462                            name,
463                            span,
464                        }) => {
465                            pattern_span.end = span.end();
466                            let part = part.into_inner();
467                            let group_name = interner.resolve(name.into_inner());
468                            if is_literal_mode {
469                                is_literal_mode = false;
470                                convert_to_regex(&mut pattern, 6 + group_name.len() + part.len());
471                            }
472                            pattern.push_str("(?P<");
473                            pattern.push_str(group_name);
474                            pattern.push('>');
475                            pattern.push_str(part.as_ref());
476                            pattern.push(')');
477                            captures.push(Capture::Explicit(TypedVariable {
478                                name,
479                                ty: ValueType::String,
480                            }));
481                        }
482                        CheckPatternPart::Match(Match::Numeric {
483                            expr: None,
484                            capture: None,
485                            span,
486                            format,
487                            ..
488                        }) => {
489                            pattern_span.end = span.end();
490                            let format_pattern = format.pattern_nocapture();
491                            if is_literal_mode {
492                                is_literal_mode = false;
493                                convert_to_regex(&mut pattern, format_pattern.len());
494                            }
495                            pattern.push_str(&format_pattern);
496                        }
497                        CheckPatternPart::Match(Match::Numeric {
498                            expr: None,
499                            capture: Some(name),
500                            span,
501                            format,
502                            ..
503                        }) => {
504                            pattern_span.end = span.end();
505                            let group_name = interner.resolve(name.into_inner());
506                            let format_pattern = format.pattern(Some(group_name));
507                            if is_literal_mode {
508                                is_literal_mode = false;
509                                convert_to_regex(&mut pattern, format_pattern.len());
510                            }
511                            pattern.push_str(&format_pattern);
512                            captures.push(Capture::Explicit(TypedVariable {
513                                name,
514                                ty: ValueType::Number(format),
515                            }));
516                        }
517                        part @ CheckPatternPart::Match(_) => {
518                            let span = part.span();
519                            if pattern.is_empty() {
520                                compacted.push(part);
521                                is_literal_mode = true;
522                                pattern.clear();
523                                captures.clear();
524                                pattern_span.end = span.end();
525                                pattern_span.start = pattern_span.end;
526                                continue;
527                            }
528
529                            if is_literal_mode {
530                                compacted.push(CheckPatternPart::Literal(Span::new(
531                                    pattern_span,
532                                    Cow::Owned(core::mem::take(&mut pattern)),
533                                )));
534                            } else {
535                                let captures = core::mem::take(&mut captures);
536                                compacted.push(CheckPatternPart::Regex(RegexPattern {
537                                    pattern: Span::new(
538                                        pattern_span,
539                                        Cow::Owned(core::mem::take(&mut pattern)),
540                                    ),
541                                    captures,
542                                }));
543                                is_literal_mode = true;
544                            }
545
546                            compacted.push(part);
547                            pattern_span.end = span.end();
548                            pattern_span.start = pattern_span.end;
549                        }
550                    }
551                }
552
553                if compacted.is_empty() {
554                    let compacted = if is_literal_mode {
555                        CheckPattern::Literal(Span::new(
556                            pattern_span,
557                            Cow::Owned(core::mem::take(&mut pattern)),
558                        ))
559                    } else {
560                        CheckPattern::Regex(RegexPattern {
561                            pattern: Span::new(
562                                pattern_span,
563                                Cow::Owned(core::mem::take(&mut pattern)),
564                            ),
565                            captures,
566                        })
567                    };
568                    *self = compacted;
569                    return;
570                }
571
572                if !pattern.is_empty() {
573                    if is_literal_mode {
574                        compacted.push(CheckPatternPart::Literal(Span::new(
575                            pattern_span,
576                            Cow::Owned(core::mem::take(&mut pattern)),
577                        )));
578                    } else {
579                        compacted.push(CheckPatternPart::Regex(RegexPattern {
580                            pattern: Span::new(
581                                pattern_span,
582                                Cow::Owned(core::mem::take(&mut pattern)),
583                            ),
584                            captures,
585                        }));
586                    }
587                }
588            }
589            Self::Empty(_) | Self::Literal(_) | Self::Regex(_) => (),
590        }
591    }
592
593    /// Converts this pattern into a string which can be used as a
594    /// regular expression, even if the pattern was not originally
595    /// expressed as a regular expression.
596    ///
597    /// Returns Err with the original pattern (potentially compacted),
598    /// if the conversion is not possible. Otherwise, returns Ok
599    /// with the built regular expression pattern.
600    pub fn into_regex_pattern(
601        mut self,
602        interner: &StringInterner,
603    ) -> Result<RegexPattern<'a>, Self> {
604        self.compact(interner);
605
606        match self {
607            Self::Literal(s) => Ok(RegexPattern::new(s)),
608            Self::Regex(regex) => Ok(regex),
609            other => Err(other),
610        }
611    }
612}
613impl<'a> Spanned for CheckPattern<'a> {
614    fn span(&self) -> SourceSpan {
615        match self {
616            Self::Empty(span) => *span,
617            Self::Literal(ref spanned) => spanned.span(),
618            Self::Regex(ref spanned) => spanned.span(),
619            Self::Match(ref spanned) => spanned.span(),
620        }
621    }
622}
623impl<'a> From<Vec<CheckPatternPart<'a>>> for CheckPattern<'a> {
624    fn from(mut parts: Vec<CheckPatternPart<'a>>) -> Self {
625        match parts.len() {
626            0 => CheckPattern::Empty(SourceSpan::from(0..0)),
627            1 => match parts.pop().unwrap() {
628                CheckPatternPart::Literal(lit) => Self::Literal(lit),
629                CheckPatternPart::Regex(re) => Self::Regex(re),
630                part @ CheckPatternPart::Match(_) => {
631                    Self::Match(Span::new(part.span(), vec![part]))
632                }
633            },
634            _ => {
635                let start = parts.first().unwrap().span().offset();
636                let last_span = parts.last().unwrap().span();
637                let end = last_span.offset() + last_span.len();
638                Self::Match(Span::new(SourceSpan::from(start..end), parts))
639            }
640        }
641    }
642}
643
644/// A check line is broken up into segments when either `[[` `]]`,
645/// or `{{` `}}` is encountered, for substitutions/captures and regex
646/// matches respectively; with the before and after parts being literal
647/// (and optional). As such we have three types of segments/parts that
648/// we can observe on a line
649#[derive(Debug, PartialEq, Eq)]
650pub enum CheckPatternPart<'a> {
651    /// This part consists of a match rule to be evaluated while matching
652    Match(Match<'a>),
653    /// This part is a raw literal string
654    Literal(Span<Cow<'a, str>>),
655    /// This part is a regex pattern
656    Regex(RegexPattern<'a>),
657}
658impl<'a> CheckPatternPart<'a> {
659    pub fn unwrap_str(self) -> Span<Cow<'a, str>> {
660        match self {
661            Self::Literal(s) => s,
662            part => panic!("expected a literal pattern, got {part:#?}"),
663        }
664    }
665
666    pub fn uses_variable(&self) -> bool {
667        match self {
668            Self::Literal(_) | Self::Regex(_) => false,
669            Self::Match(Match::Numeric {
670                expr: None,
671                capture: None,
672                ..
673            }) => false,
674            Self::Match(_) => true,
675        }
676    }
677
678    pub fn is_regex_compatible(&self) -> bool {
679        match self {
680            Self::Literal(_)
681            | Self::Regex(_)
682            | Self::Match(Match::Substitution {
683                pattern: Some(_), ..
684            })
685            | Self::Match(Match::Numeric { expr: None, .. }) => true,
686            Self::Match(_) => false,
687        }
688    }
689}
690impl<'a> Spanned for CheckPatternPart<'a> {
691    fn span(&self) -> SourceSpan {
692        match self {
693            Self::Match(m) => m.span(),
694            Self::Literal(spanned) => spanned.span(),
695            Self::Regex(spanned) => spanned.span(),
696        }
697    }
698}
699
700/// This type represents a match rule wrapped in `[[` `]]`
701#[derive(Debug, Clone)]
702pub enum Match<'a> {
703    /// Match the given regular expression pattern, optionally binding `name`
704    /// to the matched value.
705    ///
706    /// Corresponds to expressions such as `[[REG]]` and `[[REG:r[0-9]+]]`.
707    ///
708    /// The precise format of this match type is `[[<name>:<pattern>]]`, where:
709    ///
710    /// * `<name>` is a local variable name of the form `[A-Za-z_][A-Za-z0-9_]*`, or a global
711    ///   variable name (prefixed with `$`). However, you are not permitted to (re)bind global
712    ///   variables.
713    /// * `:<pattern>`, is any valid, non-empty, regular expression pattern. When present, it
714    ///   changes the semantics of this match type from string substitution to string capture - i.e.
715    ///   `name` will be bound to the matched input string.
716    ///
717    /// If `:<pattern>` is not present, then the entire `[[<name>]]` block will be
718    /// substituted with the value of `<name>` as a literal pattern. The value will
719    /// be formatted according to its type.
720    ///
721    /// Variables bound using this syntax are available immediately on the same line, you
722    /// can do things like `CHECK: op [[REG:r[0-9]+]], [[REG]]` to bind `REG` to the register
723    /// name of the first operand of `op`, e.g., `r1`; and verify that the same register is
724    /// used as the second operand.
725    ///
726    /// NOTE: You should prefer the standard regular expression pattern matching syntax,
727    /// i.e. `{{<pattern>}}` if you don't need to bind a variable.
728    Substitution {
729        span: SourceSpan,
730        name: VariableName,
731        pattern: Option<Span<Cow<'a, str>>>,
732    },
733    /// Match the given numeric pattern, and optionally defines a variable if the
734    /// match succeeds.
735    ///
736    /// Corresponds to expressions such as `[[#]]` or `[[#%.8X]]` or `[[#REG + 1]]`,
737    /// as well as `[[#REG:]]` and `[[#%X,OFFSET:12]]`. The former are matches,
738    /// while the latter both match and define the given variable.
739    ///
740    /// The unified format is `[[#%<fmtspec>,<NUMVAR>: <constraint> <expr]]` where:
741    ///
742    /// * `%<fmtspec>` is the same format specifier as used for defining a variable, but in this
743    ///   context it indicates how the numeric value should be matched. It is optional, and if not
744    ///   present, both components of the format spec are inferred from the matching format of the
745    ///   numeric variables used by the expression constraint (if any), and defaults to `%u`
746    ///   (unsigned, no leading zeros) if no numeric variable is used. In case of conflict between
747    ///   format specifiers of several numeric variables, the conversion specifier becomes
748    ///   mandatory, but the precision specifier remains optional.
749    /// * `<NUMVAR>:`, when present, indicates that `NUMVAR` will be (re)bound to the matched value,
750    ///   if the match succeeds. If not present, no variable is defined.
751    /// * `<constraint>` describes how the value to match must relate to the value of the given
752    ///   expression. Currently, the only constraint type is `==` for equality. If present, `<expr>`
753    ///   is mandatory; however the inverse is not true, `<expr>` can be provided without
754    ///   `<constraint>`, implying a default equality constraint.
755    /// * `<expr>` is an expression. An expression is in turn recursively defined as:
756    ///   - A numeric operand
757    ///   - An expression followed by an operator and a numeric operand
758    ///
759    ///   A numeric operand is a previously defined numeric variable, an integer literal,
760    ///   or one of a set of built-in functions. Whitespace are allowed around these elements.
761    ///   Numeric operands are 64-bit values. Overflow and underflow are rejected. The original
762    ///   `lit` does not support operator precedence, but `litcheck` supports the standard precedence
763    ///   of the supported operators, and parentheses can be used to manually manage precedence.
764    ///
765    ///   The operators supported are:
766    ///
767    ///   - `+`, addition
768    ///   - `-`, subtraction
769    ///
770    ///   The built-in functions supported are:
771    ///
772    ///   - `add`, addition
773    ///   - `sub`, subtraction
774    ///   - `mul`, multiplication
775    ///   - `div`, integer division
776    ///   - `min`, minimum
777    ///   - `max`, maximum
778    ///
779    /// All components can be omitted except the `#`, i.e. `[[#]]` is a valid numeric match,
780    /// which defaults to matching an unsigned integer, with no leading zeros, of up to 64
781    /// bit precision.
782    Numeric {
783        span: SourceSpan,
784        /// The format of the value to match.
785        ///
786        /// If not specified, it is implied by the format
787        /// of any numeric operands in `expr`, otherwise it
788        /// defaults to an unsigned integer with no leading zeros.
789        format: NumberFormat,
790        /// If set, contains the name of the variable to bind to
791        /// the matched value if the match succeeds.
792        capture: Option<VariableName>,
793        /// If specified, this changes the meaning of `expr`
794        /// in relation to the matched value.
795        constraint: Constraint,
796        /// The numeric expression to evaluate
797        ///
798        /// If `constraint` is not set, this expression
799        /// produces a value which must match the input.
800        expr: Option<Expr>,
801    },
802}
803impl<'a> PartialOrd for Match<'a> {
804    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
805        Some(self.cmp(other))
806    }
807}
808impl<'a> Ord for Match<'a> {
809    fn cmp(&self, other: &Self) -> Ordering {
810        match (self, other) {
811            (
812                Self::Substitution {
813                    name: an,
814                    pattern: Some(ap),
815                    ..
816                },
817                Self::Substitution {
818                    name: bn,
819                    pattern: Some(bp),
820                    ..
821                },
822            ) => ap.cmp(bp).then_with(|| an.cmp(bn)),
823            (
824                Self::Substitution {
825                    pattern: Some(_), ..
826                },
827                Self::Substitution { pattern: None, .. },
828            ) => Ordering::Less,
829            (
830                Self::Substitution { pattern: None, .. },
831                Self::Substitution {
832                    pattern: Some(_), ..
833                },
834            ) => Ordering::Greater,
835            (Self::Substitution { name: an, .. }, Self::Substitution { name: bn, .. }) => {
836                an.cmp(bn)
837            }
838            (
839                Self::Numeric {
840                    format: af,
841                    capture: None,
842                    expr: aexpr,
843                    ..
844                },
845                Self::Numeric {
846                    format: bf,
847                    capture: None,
848                    expr: bexpr,
849                    ..
850                },
851            ) => af
852                .pattern(None)
853                .cmp(&bf.pattern(None))
854                .then_with(|| aexpr.cmp(bexpr)),
855            (
856                Self::Numeric { capture: None, .. },
857                Self::Numeric {
858                    capture: Some(_), ..
859                },
860            ) => Ordering::Less,
861            (
862                Self::Numeric {
863                    capture: Some(_), ..
864                },
865                Self::Numeric { capture: None, .. },
866            ) => Ordering::Greater,
867            (
868                Self::Numeric {
869                    format: af,
870                    capture: Some(acap),
871                    expr: aexpr,
872                    ..
873                },
874                Self::Numeric {
875                    format: bf,
876                    capture: Some(bcap),
877                    expr: bexpr,
878                    ..
879                },
880            ) => af
881                .pattern(None)
882                .cmp(&bf.pattern(None))
883                .then_with(|| acap.cmp(bcap))
884                .then_with(|| aexpr.cmp(bexpr)),
885            (
886                Self::Substitution {
887                    name,
888                    pattern: Some(pattern),
889                    ..
890                },
891                Self::Numeric {
892                    format,
893                    capture,
894                    expr: None,
895                    ..
896                },
897            ) => AsRef::<str>::as_ref(pattern)
898                .cmp(format.pattern(None).as_ref())
899                .then_with(|| Some(*name).cmp(capture))
900                .then(Ordering::Less),
901            (
902                Self::Numeric {
903                    format,
904                    capture,
905                    expr: None,
906                    ..
907                },
908                Self::Substitution {
909                    name,
910                    pattern: Some(pattern),
911                    ..
912                },
913            ) => format
914                .pattern(None)
915                .as_ref()
916                .cmp(pattern.as_ref())
917                .then_with(|| capture.cmp(&Some(*name)))
918                .then(Ordering::Greater),
919            (Self::Substitution { .. }, _) => Ordering::Less,
920            (_, Self::Substitution { .. }) => Ordering::Greater,
921        }
922    }
923}
924impl<'a> Spanned for Match<'a> {
925    fn span(&self) -> SourceSpan {
926        match self {
927            Self::Numeric { span, .. } | Self::Substitution { span, .. } => *span,
928        }
929    }
930}
931impl<'a> Eq for Match<'a> {}
932impl<'a> PartialEq for Match<'a> {
933    fn eq(&self, other: &Self) -> bool {
934        match (self, other) {
935            (
936                Self::Substitution {
937                    name: an,
938                    pattern: ap,
939                    ..
940                },
941                Self::Substitution {
942                    name: bn,
943                    pattern: bp,
944                    ..
945                },
946            ) => an == bn && ap == bp,
947            (
948                Self::Numeric {
949                    format: af,
950                    capture: acap,
951                    constraint: ac,
952                    expr: aexpr,
953                    ..
954                },
955                Self::Numeric {
956                    format: bf,
957                    capture: bcap,
958                    constraint: bc,
959                    expr: bexpr,
960                    ..
961                },
962            ) => af == bf && acap == bcap && ac == bc && aexpr == bexpr,
963            _ => false,
964        }
965    }
966}
967
968/// Describes available constraints that can be expressed on numeric values
969#[derive(Debug, Copy, Clone, PartialEq, Eq)]
970pub enum Constraint {
971    Eq,
972}
973
974#[derive(Default)]
975enum CheckPatternVarIter<'a, 'iter> {
976    #[default]
977    Empty,
978    Pattern(&'iter CheckPattern<'a>),
979    Regex(&'iter [Capture]),
980    Parts(&'iter [CheckPatternPart<'a>]),
981    Expr {
982        expr: &'iter Expr,
983        parts: &'iter [CheckPatternPart<'a>],
984    },
985    Buffered {
986        buffer: std::collections::VecDeque<SourceSpan>,
987        next: &'iter [CheckPatternPart<'a>],
988    },
989}
990impl<'a, 'iter> Iterator for CheckPatternVarIter<'a, 'iter> {
991    type Item = SourceSpan;
992
993    fn next(&mut self) -> Option<Self::Item> {
994        'outer: loop {
995            match core::mem::take(self) {
996                Self::Empty => break None,
997                Self::Pattern(pattern) => match pattern {
998                    CheckPattern::Empty(_) | CheckPattern::Literal(_) => break None,
999                    CheckPattern::Regex(ref re) => {
1000                        let (item, rest) = re.captures.split_first()?;
1001                        *self = Self::Regex(rest);
1002                        break Some(item.span());
1003                    }
1004                    CheckPattern::Match(ref parts) => {
1005                        *self = Self::Parts(parts);
1006                        continue;
1007                    }
1008                },
1009                Self::Regex(captures) => {
1010                    let (item, rest) = captures.split_first()?;
1011                    *self = Self::Regex(rest);
1012                    break Some(item.span());
1013                }
1014                Self::Parts(parts) => {
1015                    while let Some((part, parts)) = parts.split_first() {
1016                        match part {
1017                            CheckPatternPart::Literal(_) => break,
1018                            CheckPatternPart::Regex(ref re) => match re.captures.split_first() {
1019                                Some((item, vars)) => {
1020                                    *self = Self::Buffered {
1021                                        buffer: vars.iter().map(|v| v.span()).collect(),
1022                                        next: parts,
1023                                    };
1024                                    break 'outer Some(item.span());
1025                                }
1026                                None => break,
1027                            },
1028                            CheckPatternPart::Match(Match::Substitution { name, .. }) => {
1029                                *self = Self::Parts(parts);
1030                                break 'outer Some(name.span());
1031                            }
1032                            CheckPatternPart::Match(Match::Numeric {
1033                                capture: None,
1034                                expr: None,
1035                                ..
1036                            }) => {
1037                                continue;
1038                            }
1039                            CheckPatternPart::Match(Match::Numeric {
1040                                capture,
1041                                expr: Some(ref expr),
1042                                ..
1043                            }) => {
1044                                *self = Self::Expr { expr, parts };
1045                                if let Some(name) = capture.as_ref() {
1046                                    break 'outer Some(name.span());
1047                                }
1048                                continue 'outer;
1049                            }
1050                            CheckPatternPart::Match(Match::Numeric {
1051                                capture: Some(name),
1052                                expr: None,
1053                                ..
1054                            }) => {
1055                                *self = Self::Parts(parts);
1056                                break 'outer Some(name.span());
1057                            }
1058                        }
1059                    }
1060
1061                    break None;
1062                }
1063                Self::Expr { expr, parts } => {
1064                    let mut worklist = std::collections::VecDeque::with_capacity(2);
1065                    let mut buffer = std::collections::VecDeque::new();
1066                    worklist.push_back(expr);
1067                    loop {
1068                        let expr = worklist.pop_front();
1069                        match expr {
1070                            None => match buffer.pop_front() {
1071                                None => {
1072                                    *self = Self::Parts(parts);
1073                                    continue 'outer;
1074                                }
1075                                Some(span) => {
1076                                    *self = Self::Buffered {
1077                                        buffer,
1078                                        next: parts,
1079                                    };
1080                                    break 'outer Some(span);
1081                                }
1082                            },
1083                            Some(Expr::Num(_)) => {
1084                                continue;
1085                            }
1086                            Some(Expr::Var(name)) => {
1087                                buffer.push_back(name.span());
1088                            }
1089                            Some(Expr::Binary {
1090                                ref lhs, ref rhs, ..
1091                            }) => {
1092                                worklist.push_back(lhs);
1093                                worklist.push_back(rhs);
1094                            }
1095                        }
1096                    }
1097                }
1098                Self::Buffered { mut buffer, next } => match buffer.pop_front() {
1099                    None => {
1100                        *self = Self::Parts(next);
1101                    }
1102                    Some(span) => {
1103                        *self = Self::Buffered { buffer, next };
1104                        break Some(span);
1105                    }
1106                },
1107            }
1108        }
1109    }
1110}