litcheck_filecheck/ast/
pattern.rs

1use std::cmp::Ordering;
2
3use crate::{
4    common::*,
5    expr::{TypedVariable, ValueType},
6};
7
8#[derive(Debug, Copy, Clone)]
9pub enum Capture {
10    /// Ignore the capture
11    Ignore(SourceSpan),
12    /// Capture the entire match, but without a name
13    All(Span<ValueType>),
14    /// Capture the entire match, and bind it with the given name and type
15    Implicit(TypedVariable),
16    /// Capture a specific named group, and bind it with a different name and type
17    Mapped { group: Symbol, with: TypedVariable },
18    /// Capture a specific group with the given name and type
19    Explicit(TypedVariable),
20}
21impl Default for Capture {
22    #[inline(always)]
23    fn default() -> Self {
24        Self::Ignore(SourceSpan::from(0..0))
25    }
26}
27impl Eq for Capture {}
28impl PartialEq for Capture {
29    fn eq(&self, other: &Self) -> bool {
30        match (self, other) {
31            (Self::Ignore(_), Self::Ignore(_)) => true,
32            (Self::All(_), Self::All(_)) => true,
33            (Self::Implicit(l), Self::Implicit(r)) => l == r,
34            (Self::Mapped { group: gl, with: l }, Self::Mapped { group: gr, with: r }) => {
35                gl == gr && l == r
36            }
37            (Self::Explicit(l), Self::Explicit(r)) => l == r,
38            _ => false,
39        }
40    }
41}
42impl PartialOrd for Capture {
43    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
44        Some(self.cmp(other))
45    }
46}
47impl Ord for Capture {
48    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
49        match (self, other) {
50            (Self::Ignore(_), Self::Ignore(_)) => Ordering::Equal,
51            (Self::Ignore(_), _) => Ordering::Less,
52            (_, Self::Ignore(_)) => Ordering::Greater,
53            (Self::All(_), Self::All(_)) => Ordering::Equal,
54            (Self::All(_), _) => Ordering::Less,
55            (_, Self::All(_)) => Ordering::Greater,
56            (Self::Implicit(l), Self::Implicit(r)) => l.cmp(r),
57            (Self::Implicit(_), _) => Ordering::Less,
58            (_, Self::Implicit(_)) => Ordering::Greater,
59            (Self::Mapped { with: l, group: gl }, Self::Mapped { with: r, group: gr }) => {
60                l.cmp(r).then(gl.cmp(gr))
61            }
62            (Self::Mapped { with: l, .. }, Self::Explicit(r)) => l.cmp(r).then(Ordering::Less),
63            (Self::Explicit(l), Self::Mapped { with: r, .. }) => l.cmp(r).then(Ordering::Greater),
64            (Self::Explicit(l), Self::Explicit(r)) => l.cmp(r),
65        }
66    }
67}
68impl Capture {
69    pub fn name(&self) -> Option<Symbol> {
70        match self {
71            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => {
72                Some(tv.name.into_inner())
73            }
74            Self::Ignore(_) | Self::All(_) => None,
75        }
76    }
77
78    pub fn group_name(&self) -> Option<Symbol> {
79        match self {
80            Self::Mapped { group, .. } => Some(*group),
81            Self::Explicit(tv) => Some(tv.name.into_inner()),
82            Self::Ignore(_) | Self::All(_) | Self::Implicit(_) => None,
83        }
84    }
85
86    pub fn value_type(&self) -> ValueType {
87        match self {
88            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => tv.ty,
89            Self::All(t) => t.into_inner(),
90            Self::Ignore(_) => ValueType::String,
91        }
92    }
93}
94impl Spanned for Capture {
95    fn span(&self) -> SourceSpan {
96        match self {
97            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => {
98                tv.name.span()
99            }
100            Self::All(span) => span.span(),
101            Self::Ignore(span) => *span,
102        }
103    }
104}
105
106#[derive(Debug, Clone, PartialEq, Eq)]
107pub struct RegexPattern<'a> {
108    pub pattern: Span<Cow<'a, str>>,
109    pub captures: SmallVec<[Capture; 1]>,
110}
111impl<'a> RegexPattern<'a> {
112    pub fn new(pattern: Span<Cow<'a, str>>) -> Self {
113        Self {
114            pattern,
115            captures: smallvec![],
116        }
117    }
118
119    pub fn is_empty(&self) -> bool {
120        self.pattern.is_empty()
121    }
122
123    pub fn len(&self) -> usize {
124        self.pattern.len()
125    }
126}
127impl<'a> AsRef<str> for RegexPattern<'a> {
128    fn as_ref(&self) -> &str {
129        self.pattern.as_ref()
130    }
131}
132impl<'a> Spanned for RegexPattern<'a> {
133    fn span(&self) -> SourceSpan {
134        self.pattern.span()
135    }
136}
137
138/// A pattern prefix represents the first distinct
139/// subpattern of the overall pattern which can be
140/// matched independently of the rest of the pattern.
141///
142/// Prefixes are used when constructing sets of patterns
143/// to find overlapping prefixes that can be collapsed
144/// into more efficient searchers for matching.
145#[derive(Clone)]
146pub enum Prefix<'a> {
147    /// The entire pattern is empty
148    Empty(SourceSpan),
149    /// The entire pattern is a literal string
150    Literal(Span<Cow<'a, str>>),
151    /// The pattern is a literal string, but only a subset is the prefix
152    Substring(Span<Cow<'a, str>>),
153    /// The prefix is a simple regular expression
154    Regex(RegexPattern<'a>),
155    /// The prefix contains a match block/substitution that cannot be
156    /// reduced to a regular expression or literal prefix.
157    Match(Cow<'a, Match<'a>>),
158}
159impl<'a> Spanned for Prefix<'a> {
160    fn span(&self) -> SourceSpan {
161        match self {
162            Self::Empty(span) => *span,
163            Self::Literal(spanned) | Self::Substring(spanned) => spanned.span(),
164            Self::Regex(spanned) => spanned.span(),
165            Self::Match(spanned) => spanned.span(),
166        }
167    }
168}
169impl<'a> Prefix<'a> {
170    pub fn as_str(&self) -> Option<&str> {
171        match self {
172            Self::Empty(_) => Some(""),
173            Self::Literal(s) | Self::Substring(s) => Some(s.as_ref()),
174            Self::Regex(regex) => Some(regex.pattern.as_ref()),
175            Self::Match(_) => None,
176        }
177    }
178}
179impl<'a> Eq for Prefix<'a> {}
180impl<'a> PartialEq for Prefix<'a> {
181    fn eq(&self, other: &Self) -> bool {
182        self.cmp(other).is_eq()
183    }
184}
185impl<'a> PartialOrd for Prefix<'a> {
186    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
187        Some(self.cmp(other))
188    }
189}
190impl<'a> Ord for Prefix<'a> {
191    fn cmp(&self, other: &Self) -> Ordering {
192        match (self, other) {
193            (Self::Match(a), Self::Match(b)) => a.cmp(b),
194            (Self::Match(_), _) => Ordering::Greater,
195            (_, Self::Match(_)) => Ordering::Less,
196            (
197                Self::Regex(RegexPattern {
198                    pattern: ap,
199                    captures: ac,
200                }),
201                Self::Regex(RegexPattern {
202                    pattern: bp,
203                    captures: bc,
204                }),
205            ) if !ac.is_empty() && !bc.is_empty() => ap.cmp(bp).then_with(|| ac.cmp(bc)),
206            (
207                Self::Regex(RegexPattern {
208                    pattern: ap,
209                    captures: ac,
210                }),
211                b,
212            ) if !ac.is_empty() => ap.as_ref().cmp(b.as_str().unwrap()).then(Ordering::Greater),
213            (
214                a,
215                Self::Regex(RegexPattern {
216                    pattern: bp,
217                    captures: bc,
218                }),
219            ) if !bc.is_empty() => a.as_str().unwrap().cmp(bp.as_ref()).then(Ordering::Less),
220            (a, b) => a.as_str().unwrap().cmp(b.as_str().unwrap()),
221        }
222    }
223}
224
225/// A check pattern is the part of a check line which must match in the check file somewhere
226#[derive(Debug)]
227pub enum CheckPattern<'a> {
228    /// There is no content, we're at the end of line
229    Empty(SourceSpan),
230    /// The entire pattern is a single raw string
231    Literal(Span<Cow<'a, str>>),
232    /// The entire pattern is a single regex string
233    Regex(RegexPattern<'a>),
234    /// The pattern is some mix of literal parts and match rules
235    Match(Span<Vec<CheckPatternPart<'a>>>),
236}
237impl<'a> PartialEq for CheckPattern<'a> {
238    fn eq(&self, other: &Self) -> bool {
239        match (self, other) {
240            (Self::Empty(_), Self::Empty(_)) => true,
241            (Self::Literal(l), Self::Literal(r)) => l == r,
242            (Self::Regex(l), Self::Regex(r)) => l == r,
243            (Self::Match(l), Self::Match(r)) => l == r,
244            _ => false,
245        }
246    }
247}
248impl<'a> CheckPattern<'a> {
249    pub fn is_empty(&self) -> bool {
250        match self {
251            Self::Empty(_) => true,
252            Self::Literal(ref spanned) => spanned.is_empty(),
253            Self::Regex(ref spanned) => spanned.is_empty(),
254            Self::Match(parts) => parts.is_empty(),
255        }
256    }
257
258    pub fn locate_variables(&self) -> impl Iterator<Item = SourceSpan> + '_ {
259        CheckPatternVarIter::Pattern(self)
260    }
261
262    pub fn prefix(&self) -> Prefix<'a> {
263        match self {
264            Self::Literal(literal) => Prefix::Literal(literal.clone()),
265            Self::Regex(pattern) => Prefix::Regex(pattern.clone()),
266            Self::Match(parts) => match &parts[0] {
267                CheckPatternPart::Literal(literal) => Prefix::Substring(literal.clone()),
268                CheckPatternPart::Regex(pattern) => Prefix::Regex(pattern.clone()),
269                CheckPatternPart::Match(Match::Numeric {
270                    span,
271                    format,
272                    capture: None,
273                    expr: None,
274                    ..
275                }) => Prefix::Regex(RegexPattern::new(Span::new(
276                    *span,
277                    format.pattern_nocapture(),
278                ))),
279                CheckPatternPart::Match(Match::Numeric {
280                    span,
281                    format,
282                    capture: Some(name),
283                    expr: None,
284                    ..
285                }) => Prefix::Regex(RegexPattern {
286                    pattern: Span::new(*span, format.pattern(None)),
287                    captures: smallvec![Capture::Implicit(TypedVariable {
288                        name: *name,
289                        ty: ValueType::Number(*format),
290                    })],
291                }),
292                CheckPatternPart::Match(Match::Substitution {
293                    pattern: Some(pattern),
294                    name,
295                    ..
296                }) => Prefix::Regex(RegexPattern {
297                    pattern: pattern.clone(),
298                    captures: smallvec![Capture::Implicit(TypedVariable {
299                        name: *name,
300                        ty: ValueType::String,
301                    })],
302                }),
303                CheckPatternPart::Match(part) => Prefix::Match(Cow::Owned(part.clone())),
304            },
305            Self::Empty(span) => Prefix::Empty(*span),
306        }
307    }
308
309    pub fn pop_prefix(&mut self) -> Prefix<'a> {
310        use std::collections::VecDeque;
311
312        match self {
313            Self::Literal(ref mut literal) => {
314                let span = literal.span();
315                let result = Prefix::Literal(core::mem::replace(
316                    literal,
317                    Span::new(span, Cow::Borrowed("")),
318                ));
319                *self = Self::Empty(span);
320                result
321            }
322            Self::Regex(ref mut pattern) => {
323                let span = pattern.span();
324                let result = Prefix::Regex(core::mem::replace(
325                    pattern,
326                    RegexPattern::new(Span::new(span, Cow::Borrowed(""))),
327                ));
328                *self = Self::Empty(span);
329                result
330            }
331            Self::Match(ref mut parts) => {
332                let span = parts.span();
333                let mut ps = VecDeque::<CheckPatternPart<'a>>::from(core::mem::take(&mut **parts));
334                let prefix = match ps.pop_front().unwrap() {
335                    CheckPatternPart::Literal(literal) => Prefix::Substring(literal),
336                    CheckPatternPart::Regex(pattern) => Prefix::Regex(pattern),
337                    CheckPatternPart::Match(Match::Numeric {
338                        span,
339                        format,
340                        capture: None,
341                        expr: None,
342                        ..
343                    }) => Prefix::Regex(RegexPattern::new(Span::new(
344                        span,
345                        format.pattern_nocapture(),
346                    ))),
347                    CheckPatternPart::Match(Match::Numeric {
348                        span,
349                        format,
350                        capture: Some(name),
351                        expr: None,
352                        ..
353                    }) => Prefix::Regex(RegexPattern {
354                        pattern: Span::new(span, format.pattern_nocapture()),
355                        captures: smallvec![Capture::Implicit(TypedVariable {
356                            name,
357                            ty: ValueType::Number(format),
358                        })],
359                    }),
360                    CheckPatternPart::Match(Match::Substitution {
361                        pattern: Some(pattern),
362                        name,
363                        ..
364                    }) => Prefix::Regex(RegexPattern {
365                        pattern,
366                        captures: smallvec![Capture::Implicit(TypedVariable {
367                            name,
368                            ty: ValueType::String,
369                        })],
370                    }),
371                    CheckPatternPart::Match(part) => Prefix::Match(Cow::Owned(part)),
372                };
373                if ps.is_empty() {
374                    *self = Self::Empty(span);
375                } else {
376                    **parts = ps.into();
377                }
378                prefix
379            }
380            Self::Empty(span) => Prefix::Empty(*span),
381        }
382    }
383
384    pub fn is_literal(&self) -> bool {
385        match self {
386            Self::Empty(_) | Self::Literal(_) => true,
387            Self::Regex(_) => false,
388            Self::Match(ref parts) => parts
389                .iter()
390                .all(|p| matches!(p, CheckPatternPart::Literal(_))),
391        }
392    }
393
394    pub fn is_regex_compatible(&self) -> bool {
395        match self {
396            Self::Empty(_) | Self::Literal(_) | Self::Regex(_) => true,
397            Self::Match(ref parts) => parts.iter().all(|p| p.is_regex_compatible()),
398        }
399    }
400
401    /// Compacts this pattern into fewer parts where possible
402    pub fn compact(&mut self, interner: &StringInterner) {
403        use std::collections::VecDeque;
404
405        fn convert_to_regex(buffer: &mut String, padding: usize) {
406            let min_capacity = padding
407                + buffer
408                    .chars()
409                    .map(|c| {
410                        if regex_syntax::is_meta_character(c) {
411                            2
412                        } else {
413                            1
414                        }
415                    })
416                    .sum::<usize>();
417            let prev = core::mem::replace(buffer, String::with_capacity(min_capacity));
418            regex_syntax::escape_into(&prev, buffer);
419        }
420
421        match self {
422            Self::Match(ref empty) if empty.is_empty() => {
423                let span = empty.span();
424                *self = Self::Empty(span);
425            }
426            Self::Match(ref mut compacted) => {
427                let span = compacted.span();
428                let mut pattern_span = span.range();
429                let mut parts = VecDeque::from(core::mem::take(&mut **compacted));
430                let mut pattern = String::new();
431                let mut captures = SmallVec::<[Capture; 1]>::new();
432                let mut is_literal_mode = true;
433                while let Some(mut part) = parts.pop_front() {
434                    match part {
435                        CheckPatternPart::Literal(part) if is_literal_mode => {
436                            pattern_span.end = part.end();
437                            pattern.push_str(part.as_ref());
438                        }
439                        CheckPatternPart::Literal(part) => {
440                            pattern_span.end = part.end();
441                            regex_syntax::escape_into(part.as_ref(), &mut pattern);
442                        }
443                        CheckPatternPart::Regex(RegexPattern {
444                            pattern: part,
445                            captures: ref mut part_captures,
446                        }) => {
447                            let (span, part) = part.into_parts();
448                            pattern_span.end = span.end();
449                            captures.append(part_captures);
450                            if is_literal_mode {
451                                is_literal_mode = false;
452                                convert_to_regex(&mut pattern, part.len())
453                            }
454                            pattern.push_str(part.as_ref());
455                        }
456                        CheckPatternPart::Match(Match::Substitution {
457                            pattern: Some(part),
458                            name,
459                            span,
460                        }) => {
461                            pattern_span.end = span.end();
462                            let part = part.into_inner();
463                            let group_name = interner.resolve(name.into_inner());
464                            if is_literal_mode {
465                                is_literal_mode = false;
466                                convert_to_regex(&mut pattern, 6 + group_name.len() + part.len());
467                            }
468                            pattern.push_str("(?P<");
469                            pattern.push_str(group_name);
470                            pattern.push('>');
471                            pattern.push_str(part.as_ref());
472                            pattern.push(')');
473                            captures.push(Capture::Explicit(TypedVariable {
474                                name,
475                                ty: ValueType::String,
476                            }));
477                        }
478                        CheckPatternPart::Match(Match::Numeric {
479                            expr: None,
480                            capture: None,
481                            span,
482                            format,
483                            ..
484                        }) => {
485                            pattern_span.end = span.end();
486                            let format_pattern = format.pattern_nocapture();
487                            if is_literal_mode {
488                                is_literal_mode = false;
489                                convert_to_regex(&mut pattern, format_pattern.len());
490                            }
491                            pattern.push_str(&format_pattern);
492                        }
493                        CheckPatternPart::Match(Match::Numeric {
494                            expr: None,
495                            capture: Some(name),
496                            span,
497                            format,
498                            ..
499                        }) => {
500                            pattern_span.end = span.end();
501                            let group_name = interner.resolve(name.into_inner());
502                            let format_pattern = format.pattern(Some(group_name));
503                            if is_literal_mode {
504                                is_literal_mode = false;
505                                convert_to_regex(&mut pattern, format_pattern.len());
506                            }
507                            pattern.push_str(&format_pattern);
508                            captures.push(Capture::Explicit(TypedVariable {
509                                name,
510                                ty: ValueType::Number(format),
511                            }));
512                        }
513                        part @ CheckPatternPart::Match(_) => {
514                            let span = part.span();
515                            if pattern.is_empty() {
516                                compacted.push(part);
517                                is_literal_mode = true;
518                                pattern.clear();
519                                captures.clear();
520                                pattern_span.end = span.end();
521                                pattern_span.start = pattern_span.end;
522                                continue;
523                            }
524
525                            if is_literal_mode {
526                                compacted.push(CheckPatternPart::Literal(Span::new(
527                                    pattern_span,
528                                    Cow::Owned(core::mem::take(&mut pattern)),
529                                )));
530                            } else {
531                                let captures = core::mem::take(&mut captures);
532                                compacted.push(CheckPatternPart::Regex(RegexPattern {
533                                    pattern: Span::new(
534                                        pattern_span,
535                                        Cow::Owned(core::mem::take(&mut pattern)),
536                                    ),
537                                    captures,
538                                }));
539                                is_literal_mode = true;
540                            }
541
542                            compacted.push(part);
543                            pattern_span.end = span.end();
544                            pattern_span.start = pattern_span.end;
545                        }
546                    }
547                }
548
549                if compacted.is_empty() {
550                    let compacted = if is_literal_mode {
551                        CheckPattern::Literal(Span::new(
552                            pattern_span,
553                            Cow::Owned(core::mem::take(&mut pattern)),
554                        ))
555                    } else {
556                        CheckPattern::Regex(RegexPattern {
557                            pattern: Span::new(
558                                pattern_span,
559                                Cow::Owned(core::mem::take(&mut pattern)),
560                            ),
561                            captures,
562                        })
563                    };
564                    *self = compacted;
565                    return;
566                }
567
568                if !pattern.is_empty() {
569                    if is_literal_mode {
570                        compacted.push(CheckPatternPart::Literal(Span::new(
571                            pattern_span,
572                            Cow::Owned(core::mem::take(&mut pattern)),
573                        )));
574                    } else {
575                        compacted.push(CheckPatternPart::Regex(RegexPattern {
576                            pattern: Span::new(
577                                pattern_span,
578                                Cow::Owned(core::mem::take(&mut pattern)),
579                            ),
580                            captures,
581                        }));
582                    }
583                }
584            }
585            Self::Empty(_) | Self::Literal(_) | Self::Regex(_) => (),
586        }
587    }
588
589    /// Converts this pattern into a string which can be used as a
590    /// regular expression, even if the pattern was not originally
591    /// expressed as a regular expression.
592    ///
593    /// Returns Err with the original pattern (potentially compacted),
594    /// if the conversion is not possible. Otherwise, returns Ok
595    /// with the built regular expression pattern.
596    pub fn into_regex_pattern(
597        mut self,
598        interner: &StringInterner,
599    ) -> Result<RegexPattern<'a>, Self> {
600        self.compact(interner);
601
602        match self {
603            Self::Literal(s) => Ok(RegexPattern::new(s)),
604            Self::Regex(regex) => Ok(regex),
605            other => Err(other),
606        }
607    }
608}
609impl<'a> Spanned for CheckPattern<'a> {
610    fn span(&self) -> SourceSpan {
611        match self {
612            Self::Empty(span) => *span,
613            Self::Literal(ref spanned) => spanned.span(),
614            Self::Regex(ref spanned) => spanned.span(),
615            Self::Match(ref spanned) => spanned.span(),
616        }
617    }
618}
619impl<'a> From<Vec<CheckPatternPart<'a>>> for CheckPattern<'a> {
620    fn from(mut parts: Vec<CheckPatternPart<'a>>) -> Self {
621        match parts.len() {
622            0 => CheckPattern::Empty(SourceSpan::from(0..0)),
623            1 => match parts.pop().unwrap() {
624                CheckPatternPart::Literal(lit) => Self::Literal(lit),
625                CheckPatternPart::Regex(re) => Self::Regex(re),
626                part @ CheckPatternPart::Match(_) => {
627                    Self::Match(Span::new(part.span(), vec![part]))
628                }
629            },
630            _ => {
631                let start = parts.first().unwrap().span().offset();
632                let last_span = parts.last().unwrap().span();
633                let end = last_span.offset() + last_span.len();
634                Self::Match(Span::new(SourceSpan::from(start..end), parts))
635            }
636        }
637    }
638}
639
640/// A check line is broken up into segments when either `[[` `]]`,
641/// or `{{` `}}` is encountered, for substitutions/captures and regex
642/// matches respectively; with the before and after parts being literal
643/// (and optional). As such we have three types of segments/parts that
644/// we can observe on a line
645#[derive(Debug, PartialEq, Eq)]
646pub enum CheckPatternPart<'a> {
647    /// This part consists of a match rule to be evaluated while matching
648    Match(Match<'a>),
649    /// This part is a raw literal string
650    Literal(Span<Cow<'a, str>>),
651    /// This part is a regex pattern
652    Regex(RegexPattern<'a>),
653}
654impl<'a> CheckPatternPart<'a> {
655    pub fn unwrap_str(self) -> Span<Cow<'a, str>> {
656        match self {
657            Self::Literal(s) => s,
658            part => panic!("expected a literal pattern, got {part:#?}"),
659        }
660    }
661
662    pub fn uses_variable(&self) -> bool {
663        match self {
664            Self::Literal(_) | Self::Regex(_) => false,
665            Self::Match(Match::Numeric {
666                expr: None,
667                capture: None,
668                ..
669            }) => false,
670            Self::Match(_) => true,
671        }
672    }
673
674    pub fn is_regex_compatible(&self) -> bool {
675        match self {
676            Self::Literal(_)
677            | Self::Regex(_)
678            | Self::Match(Match::Substitution {
679                pattern: Some(_), ..
680            })
681            | Self::Match(Match::Numeric { expr: None, .. }) => true,
682            Self::Match(_) => false,
683        }
684    }
685}
686impl<'a> Spanned for CheckPatternPart<'a> {
687    fn span(&self) -> SourceSpan {
688        match self {
689            Self::Match(m) => m.span(),
690            Self::Literal(spanned) => spanned.span(),
691            Self::Regex(spanned) => spanned.span(),
692        }
693    }
694}
695
696/// This type represents a match rule wrapped in `[[` `]]`
697#[derive(Debug, Clone)]
698pub enum Match<'a> {
699    /// Match the given regular expression pattern, optionally binding `name`
700    /// to the matched value.
701    ///
702    /// Corresponds to expressions such as `[[REG]]` and `[[REG:r[0-9]+]]`.
703    ///
704    /// The precise format of this match type is `[[<name>:<pattern>]]`, where:
705    ///
706    /// * `<name>` is a local variable name of the form `[A-Za-z_][A-Za-z0-9_]*`, or a global
707    ///   variable name (prefixed with `$`). However, you are not permitted to (re)bind global
708    ///   variables.
709    /// * `:<pattern>`, is any valid, non-empty, regular expression pattern. When present, it
710    ///   changes the semantics of this match type from string substitution to string capture - i.e.
711    ///   `name` will be bound to the matched input string.
712    ///
713    /// If `:<pattern>` is not present, then the entire `[[<name>]]` block will be
714    /// substituted with the value of `<name>` as a literal pattern. The value will
715    /// be formatted according to its type.
716    ///
717    /// Variables bound using this syntax are available immediately on the same line, you
718    /// can do things like `CHECK: op [[REG:r[0-9]+]], [[REG]]` to bind `REG` to the register
719    /// name of the first operand of `op`, e.g., `r1`; and verify that the same register is
720    /// used as the second operand.
721    ///
722    /// NOTE: You should prefer the standard regular expression pattern matching syntax,
723    /// i.e. `{{<pattern>}}` if you don't need to bind a variable.
724    Substitution {
725        span: SourceSpan,
726        name: VariableName,
727        pattern: Option<Span<Cow<'a, str>>>,
728    },
729    /// Match the given numeric pattern, and optionally defines a variable if the
730    /// match succeeds.
731    ///
732    /// Corresponds to expressions such as `[[#]]` or `[[#%.8X]]` or `[[#REG + 1]]`,
733    /// as well as `[[#REG:]]` and `[[#%X,OFFSET:12]]`. The former are matches,
734    /// while the latter both match and define the given variable.
735    ///
736    /// The unified format is `[[#%<fmtspec>,<NUMVAR>: <constraint> <expr]]` where:
737    ///
738    /// * `%<fmtspec>` is the same format specifier as used for defining a variable, but in this
739    ///   context it indicates how the numeric value should be matched. It is optional, and if not
740    ///   present, both components of the format spec are inferred from the matching format of the
741    ///   numeric variables used by the expression constraint (if any), and defaults to `%u`
742    ///   (unsigned, no leading zeros) if no numeric variable is used. In case of conflict between
743    ///   format specifiers of several numeric variables, the conversion specifier becomes
744    ///   mandatory, but the precision specifier remains optional.
745    /// * `<NUMVAR>:`, when present, indicates that `NUMVAR` will be (re)bound to the matched value,
746    ///   if the match succeeds. If not present, no variable is defined.
747    /// * `<constraint>` describes how the value to match must relate to the value of the given
748    ///   expression. Currently, the only constraint type is `==` for equality. If present, `<expr>`
749    ///   is mandatory; however the inverse is not true, `<expr>` can be provided without
750    ///   `<constraint>`, implying a default equality constraint.
751    /// * `<expr>` is an expression. An expression is in turn recursively defined as:
752    ///   - A numeric operand
753    ///   - An expression followed by an operator and a numeric operand
754    ///
755    ///   A numeric operand is a previously defined numeric variable, an integer literal,
756    ///   or one of a set of built-in functions. Whitespace are allowed around these elements.
757    ///   Numeric operands are 64-bit values. Overflow and underflow are rejected. The original
758    ///   `lit` does not support operator precedence, but `litcheck` supports the standard precedence
759    ///   of the supported operators, and parentheses can be used to manually manage precedence.
760    ///
761    ///   The operators supported are:
762    ///
763    ///   - `+`, addition
764    ///   - `-`, subtraction
765    ///
766    ///   The built-in functions supported are:
767    ///
768    ///   - `add`, addition
769    ///   - `sub`, subtraction
770    ///   - `mul`, multiplication
771    ///   - `div`, integer division
772    ///   - `min`, minimum
773    ///   - `max`, maximum
774    ///
775    /// All components can be omitted except the `#`, i.e. `[[#]]` is a valid numeric match,
776    /// which defaults to matching an unsigned integer, with no leading zeros, of up to 64
777    /// bit precision.
778    Numeric {
779        span: SourceSpan,
780        /// The format of the value to match.
781        ///
782        /// If not specified, it is implied by the format
783        /// of any numeric operands in `expr`, otherwise it
784        /// defaults to an unsigned integer with no leading zeros.
785        format: NumberFormat,
786        /// If set, contains the name of the variable to bind to
787        /// the matched value if the match succeeds.
788        capture: Option<VariableName>,
789        /// If specified, this changes the meaning of `expr`
790        /// in relation to the matched value.
791        constraint: Constraint,
792        /// The numeric expression to evaluate
793        ///
794        /// If `constraint` is not set, this expression
795        /// produces a value which must match the input.
796        expr: Option<Expr>,
797    },
798}
799impl<'a> PartialOrd for Match<'a> {
800    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
801        Some(self.cmp(other))
802    }
803}
804impl<'a> Ord for Match<'a> {
805    fn cmp(&self, other: &Self) -> Ordering {
806        match (self, other) {
807            (
808                Self::Substitution {
809                    name: an,
810                    pattern: Some(ap),
811                    ..
812                },
813                Self::Substitution {
814                    name: bn,
815                    pattern: Some(bp),
816                    ..
817                },
818            ) => ap.cmp(bp).then_with(|| an.cmp(bn)),
819            (
820                Self::Substitution {
821                    pattern: Some(_), ..
822                },
823                Self::Substitution { pattern: None, .. },
824            ) => Ordering::Less,
825            (
826                Self::Substitution { pattern: None, .. },
827                Self::Substitution {
828                    pattern: Some(_), ..
829                },
830            ) => Ordering::Greater,
831            (Self::Substitution { name: an, .. }, Self::Substitution { name: bn, .. }) => {
832                an.cmp(bn)
833            }
834            (
835                Self::Numeric {
836                    format: af,
837                    capture: None,
838                    expr: aexpr,
839                    ..
840                },
841                Self::Numeric {
842                    format: bf,
843                    capture: None,
844                    expr: bexpr,
845                    ..
846                },
847            ) => af
848                .pattern(None)
849                .cmp(&bf.pattern(None))
850                .then_with(|| aexpr.cmp(bexpr)),
851            (
852                Self::Numeric { capture: None, .. },
853                Self::Numeric {
854                    capture: Some(_), ..
855                },
856            ) => Ordering::Less,
857            (
858                Self::Numeric {
859                    capture: Some(_), ..
860                },
861                Self::Numeric { capture: None, .. },
862            ) => Ordering::Greater,
863            (
864                Self::Numeric {
865                    format: af,
866                    capture: Some(acap),
867                    expr: aexpr,
868                    ..
869                },
870                Self::Numeric {
871                    format: bf,
872                    capture: Some(bcap),
873                    expr: bexpr,
874                    ..
875                },
876            ) => af
877                .pattern(None)
878                .cmp(&bf.pattern(None))
879                .then_with(|| acap.cmp(bcap))
880                .then_with(|| aexpr.cmp(bexpr)),
881            (
882                Self::Substitution {
883                    name,
884                    pattern: Some(pattern),
885                    ..
886                },
887                Self::Numeric {
888                    format,
889                    capture,
890                    expr: None,
891                    ..
892                },
893            ) => AsRef::<str>::as_ref(pattern)
894                .cmp(format.pattern(None).as_ref())
895                .then_with(|| Some(*name).cmp(capture))
896                .then(Ordering::Less),
897            (
898                Self::Numeric {
899                    format,
900                    capture,
901                    expr: None,
902                    ..
903                },
904                Self::Substitution {
905                    name,
906                    pattern: Some(pattern),
907                    ..
908                },
909            ) => format
910                .pattern(None)
911                .as_ref()
912                .cmp(pattern.as_ref())
913                .then_with(|| capture.cmp(&Some(*name)))
914                .then(Ordering::Greater),
915            (Self::Substitution { .. }, _) => Ordering::Less,
916            (_, Self::Substitution { .. }) => Ordering::Greater,
917        }
918    }
919}
920impl<'a> Spanned for Match<'a> {
921    fn span(&self) -> SourceSpan {
922        match self {
923            Self::Numeric { span, .. } | Self::Substitution { span, .. } => *span,
924        }
925    }
926}
927impl<'a> Eq for Match<'a> {}
928impl<'a> PartialEq for Match<'a> {
929    fn eq(&self, other: &Self) -> bool {
930        match (self, other) {
931            (
932                Self::Substitution {
933                    name: an,
934                    pattern: ap,
935                    ..
936                },
937                Self::Substitution {
938                    name: bn,
939                    pattern: bp,
940                    ..
941                },
942            ) => an == bn && ap == bp,
943            (
944                Self::Numeric {
945                    format: af,
946                    capture: acap,
947                    constraint: ac,
948                    expr: aexpr,
949                    ..
950                },
951                Self::Numeric {
952                    format: bf,
953                    capture: bcap,
954                    constraint: bc,
955                    expr: bexpr,
956                    ..
957                },
958            ) => af == bf && acap == bcap && ac == bc && aexpr == bexpr,
959            _ => false,
960        }
961    }
962}
963
964/// Describes available constraints that can be expressed on numeric values
965#[derive(Debug, Copy, Clone, PartialEq, Eq)]
966pub enum Constraint {
967    Eq,
968}
969
970#[derive(Default)]
971enum CheckPatternVarIter<'a, 'iter> {
972    #[default]
973    Empty,
974    Pattern(&'iter CheckPattern<'a>),
975    Regex(&'iter [Capture]),
976    Parts(&'iter [CheckPatternPart<'a>]),
977    Expr {
978        expr: &'iter Expr,
979        parts: &'iter [CheckPatternPart<'a>],
980    },
981    Buffered {
982        buffer: std::collections::VecDeque<SourceSpan>,
983        next: &'iter [CheckPatternPart<'a>],
984    },
985}
986impl<'a, 'iter> Iterator for CheckPatternVarIter<'a, 'iter> {
987    type Item = SourceSpan;
988
989    fn next(&mut self) -> Option<Self::Item> {
990        'outer: loop {
991            match core::mem::take(self) {
992                Self::Empty => break None,
993                Self::Pattern(pattern) => match pattern {
994                    CheckPattern::Empty(_) | CheckPattern::Literal(_) => break None,
995                    CheckPattern::Regex(ref re) => {
996                        let (item, rest) = re.captures.split_first()?;
997                        *self = Self::Regex(rest);
998                        break Some(item.span());
999                    }
1000                    CheckPattern::Match(ref parts) => {
1001                        *self = Self::Parts(parts);
1002                        continue;
1003                    }
1004                },
1005                Self::Regex(captures) => {
1006                    let (item, rest) = captures.split_first()?;
1007                    *self = Self::Regex(rest);
1008                    break Some(item.span());
1009                }
1010                Self::Parts(parts) => {
1011                    while let Some((part, parts)) = parts.split_first() {
1012                        match part {
1013                            CheckPatternPart::Literal(_) => break,
1014                            CheckPatternPart::Regex(ref re) => match re.captures.split_first() {
1015                                Some((item, vars)) => {
1016                                    *self = Self::Buffered {
1017                                        buffer: vars.iter().map(|v| v.span()).collect(),
1018                                        next: parts,
1019                                    };
1020                                    break 'outer Some(item.span());
1021                                }
1022                                None => break,
1023                            },
1024                            CheckPatternPart::Match(Match::Substitution { name, .. }) => {
1025                                *self = Self::Parts(parts);
1026                                break 'outer Some(name.span());
1027                            }
1028                            CheckPatternPart::Match(Match::Numeric {
1029                                capture: None,
1030                                expr: None,
1031                                ..
1032                            }) => {
1033                                continue;
1034                            }
1035                            CheckPatternPart::Match(Match::Numeric {
1036                                capture,
1037                                expr: Some(ref expr),
1038                                ..
1039                            }) => {
1040                                *self = Self::Expr { expr, parts };
1041                                if let Some(name) = capture.as_ref() {
1042                                    break 'outer Some(name.span());
1043                                }
1044                                continue 'outer;
1045                            }
1046                            CheckPatternPart::Match(Match::Numeric {
1047                                capture: Some(name),
1048                                expr: None,
1049                                ..
1050                            }) => {
1051                                *self = Self::Parts(parts);
1052                                break 'outer Some(name.span());
1053                            }
1054                        }
1055                    }
1056
1057                    break None;
1058                }
1059                Self::Expr { expr, parts } => {
1060                    let mut worklist = std::collections::VecDeque::with_capacity(2);
1061                    let mut buffer = std::collections::VecDeque::new();
1062                    worklist.push_back(expr);
1063                    loop {
1064                        let expr = worklist.pop_front();
1065                        match expr {
1066                            None => match buffer.pop_front() {
1067                                None => {
1068                                    *self = Self::Parts(parts);
1069                                    continue 'outer;
1070                                }
1071                                Some(span) => {
1072                                    *self = Self::Buffered {
1073                                        buffer,
1074                                        next: parts,
1075                                    };
1076                                    break 'outer Some(span);
1077                                }
1078                            },
1079                            Some(Expr::Num(_)) => {
1080                                continue;
1081                            }
1082                            Some(Expr::Var(name)) => {
1083                                buffer.push_back(name.span());
1084                            }
1085                            Some(Expr::Binary {
1086                                ref lhs, ref rhs, ..
1087                            }) => {
1088                                worklist.push_back(lhs);
1089                                worklist.push_back(rhs);
1090                            }
1091                        }
1092                    }
1093                }
1094                Self::Buffered { mut buffer, next } => match buffer.pop_front() {
1095                    None => {
1096                        *self = Self::Parts(next);
1097                    }
1098                    Some(span) => {
1099                        *self = Self::Buffered { buffer, next };
1100                        break Some(span);
1101                    }
1102                },
1103            }
1104        }
1105    }
1106}