Skip to main content

litcheck_filecheck/ast/
pattern.rs

1use std::cmp::Ordering;
2
3use crate::{
4    common::*,
5    expr::{TypedVariable, ValueType},
6};
7
8#[derive(Debug, Copy, Clone)]
9pub enum Capture {
10    /// Ignore the capture
11    Ignore(SourceSpan),
12    /// Capture the entire match, but without a name
13    All(Span<ValueType>),
14    /// Capture the entire match, and bind it with the given name and type
15    Implicit(TypedVariable),
16    /// Capture a specific named group, and bind it with a different name and type
17    Mapped { group: Symbol, with: TypedVariable },
18    /// Capture a specific group with the given name and type
19    Explicit(TypedVariable),
20}
21impl Default for Capture {
22    #[inline(always)]
23    fn default() -> Self {
24        Self::Ignore(SourceSpan::UNKNOWN)
25    }
26}
27impl Eq for Capture {}
28impl PartialEq for Capture {
29    fn eq(&self, other: &Self) -> bool {
30        match (self, other) {
31            (Self::Ignore(_), Self::Ignore(_)) => true,
32            (Self::All(_), Self::All(_)) => true,
33            (Self::Implicit(l), Self::Implicit(r)) => l == r,
34            (Self::Mapped { group: gl, with: l }, Self::Mapped { group: gr, with: r }) => {
35                gl == gr && l == r
36            }
37            (Self::Explicit(l), Self::Explicit(r)) => l == r,
38            _ => false,
39        }
40    }
41}
42impl PartialOrd for Capture {
43    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
44        Some(self.cmp(other))
45    }
46}
47impl Ord for Capture {
48    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
49        match (self, other) {
50            (Self::Ignore(_), Self::Ignore(_)) => Ordering::Equal,
51            (Self::Ignore(_), _) => Ordering::Less,
52            (_, Self::Ignore(_)) => Ordering::Greater,
53            (Self::All(_), Self::All(_)) => Ordering::Equal,
54            (Self::All(_), _) => Ordering::Less,
55            (_, Self::All(_)) => Ordering::Greater,
56            (Self::Implicit(l), Self::Implicit(r)) => l.cmp(r),
57            (Self::Implicit(_), _) => Ordering::Less,
58            (_, Self::Implicit(_)) => Ordering::Greater,
59            (Self::Mapped { with: l, group: gl }, Self::Mapped { with: r, group: gr }) => {
60                l.cmp(r).then(gl.cmp(gr))
61            }
62            (Self::Mapped { with: l, .. }, Self::Explicit(r)) => l.cmp(r).then(Ordering::Less),
63            (Self::Explicit(l), Self::Mapped { with: r, .. }) => l.cmp(r).then(Ordering::Greater),
64            (Self::Explicit(l), Self::Explicit(r)) => l.cmp(r),
65        }
66    }
67}
68impl Capture {
69    pub fn name(&self) -> Option<Symbol> {
70        self.variable_name().map(|v| v.into_inner())
71    }
72
73    pub fn variable_name(&self) -> Option<VariableName> {
74        match self {
75            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => {
76                Some(tv.name)
77            }
78            Self::Ignore(_) | Self::All(_) => None,
79        }
80    }
81
82    pub fn group_name(&self) -> Option<Symbol> {
83        match self {
84            Self::Mapped { group, .. } => Some(*group),
85            Self::Explicit(tv) => Some(tv.name.into_inner()),
86            Self::Ignore(_) | Self::All(_) | Self::Implicit(_) => None,
87        }
88    }
89
90    pub fn value_type(&self) -> ValueType {
91        match self {
92            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => tv.ty,
93            Self::All(t) => t.into_inner(),
94            Self::Ignore(_) => ValueType::String,
95        }
96    }
97}
98impl Spanned for Capture {
99    fn span(&self) -> SourceSpan {
100        match self {
101            Self::Implicit(tv) | Self::Mapped { with: tv, .. } | Self::Explicit(tv) => {
102                tv.name.span()
103            }
104            Self::All(span) => span.span(),
105            Self::Ignore(span) => *span,
106        }
107    }
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub struct RegexPattern<'a> {
112    pub pattern: Span<Cow<'a, str>>,
113    pub captures: SmallVec<[Capture; 1]>,
114}
115impl<'a> RegexPattern<'a> {
116    pub fn new(pattern: Span<Cow<'a, str>>) -> Self {
117        Self {
118            pattern,
119            captures: smallvec![],
120        }
121    }
122
123    pub fn is_empty(&self) -> bool {
124        self.pattern.is_empty()
125    }
126
127    pub fn len(&self) -> usize {
128        self.pattern.len()
129    }
130}
131impl<'a> AsRef<str> for RegexPattern<'a> {
132    fn as_ref(&self) -> &str {
133        self.pattern.inner().as_ref()
134    }
135}
136impl<'a> Spanned for RegexPattern<'a> {
137    fn span(&self) -> SourceSpan {
138        self.pattern.span()
139    }
140}
141
142/// A pattern prefix represents the first distinct
143/// subpattern of the overall pattern which can be
144/// matched independently of the rest of the pattern.
145///
146/// Prefixes are used when constructing sets of patterns
147/// to find overlapping prefixes that can be collapsed
148/// into more efficient searchers for matching.
149#[derive(Clone)]
150pub enum Prefix<'a> {
151    /// The entire pattern is empty
152    Empty(SourceSpan),
153    /// The entire pattern is a literal string
154    Literal(Span<Cow<'a, str>>),
155    /// The pattern is a literal string, but only a subset is the prefix
156    Substring(Span<Cow<'a, str>>),
157    /// The prefix is a simple regular expression
158    Regex(RegexPattern<'a>),
159    /// The prefix contains a match block/substitution that cannot be
160    /// reduced to a regular expression or literal prefix.
161    Match(Cow<'a, Match<'a>>),
162}
163impl<'a> Spanned for Prefix<'a> {
164    fn span(&self) -> SourceSpan {
165        match self {
166            Self::Empty(span) => *span,
167            Self::Literal(spanned) | Self::Substring(spanned) => spanned.span(),
168            Self::Regex(spanned) => spanned.span(),
169            Self::Match(spanned) => spanned.span(),
170        }
171    }
172}
173impl<'a> Prefix<'a> {
174    pub fn as_str(&self) -> Option<&str> {
175        match self {
176            Self::Empty(_) => Some(""),
177            Self::Literal(s) | Self::Substring(s) => Some(s.inner().as_ref()),
178            Self::Regex(regex) => Some(regex.pattern.inner().as_ref()),
179            Self::Match(_) => None,
180        }
181    }
182}
183impl<'a> Eq for Prefix<'a> {}
184impl<'a> PartialEq for Prefix<'a> {
185    fn eq(&self, other: &Self) -> bool {
186        self.cmp(other).is_eq()
187    }
188}
189impl<'a> PartialOrd for Prefix<'a> {
190    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
191        Some(self.cmp(other))
192    }
193}
194impl<'a> Ord for Prefix<'a> {
195    fn cmp(&self, other: &Self) -> Ordering {
196        match (self, other) {
197            (Self::Match(a), Self::Match(b)) => a.cmp(b),
198            (Self::Match(_), _) => Ordering::Greater,
199            (_, Self::Match(_)) => Ordering::Less,
200            (
201                Self::Regex(RegexPattern {
202                    pattern: ap,
203                    captures: ac,
204                }),
205                Self::Regex(RegexPattern {
206                    pattern: bp,
207                    captures: bc,
208                }),
209            ) if !ac.is_empty() && !bc.is_empty() => ap.cmp(bp).then_with(|| ac.cmp(bc)),
210            (
211                Self::Regex(RegexPattern {
212                    pattern: ap,
213                    captures: ac,
214                }),
215                b,
216            ) if !ac.is_empty() => ap
217                .inner()
218                .as_ref()
219                .cmp(b.as_str().unwrap())
220                .then(Ordering::Greater),
221            (
222                a,
223                Self::Regex(RegexPattern {
224                    pattern: bp,
225                    captures: bc,
226                }),
227            ) if !bc.is_empty() => a
228                .as_str()
229                .unwrap()
230                .cmp(bp.inner().as_ref())
231                .then(Ordering::Less),
232            (a, b) => a.as_str().unwrap().cmp(b.as_str().unwrap()),
233        }
234    }
235}
236
237/// A check pattern is the part of a check line which must match in the check file somewhere
238#[derive(Debug, Clone)]
239pub enum CheckPattern<'a> {
240    /// There is no content, we're at the end of line
241    Empty(SourceSpan),
242    /// The entire pattern is a single raw string
243    Literal(Span<Cow<'a, str>>),
244    /// The entire pattern is a single regex string
245    Regex(RegexPattern<'a>),
246    /// The pattern is some mix of literal parts and match rules
247    Match(Span<Vec<CheckPatternPart<'a>>>),
248}
249impl<'a> PartialEq for CheckPattern<'a> {
250    fn eq(&self, other: &Self) -> bool {
251        match (self, other) {
252            (Self::Empty(_), Self::Empty(_)) => true,
253            (Self::Literal(l), Self::Literal(r)) => l == r,
254            (Self::Regex(l), Self::Regex(r)) => l == r,
255            (Self::Match(l), Self::Match(r)) => l == r,
256            _ => false,
257        }
258    }
259}
260impl<'a> CheckPattern<'a> {
261    pub fn is_empty(&self) -> bool {
262        match self {
263            Self::Empty(_) => true,
264            Self::Literal(spanned) => spanned.is_empty(),
265            Self::Regex(spanned) => spanned.is_empty(),
266            Self::Match(parts) => parts.is_empty(),
267        }
268    }
269
270    pub fn locate_variables(&self) -> impl Iterator<Item = SourceSpan> + '_ {
271        CheckPatternVarIter::Pattern(self)
272    }
273
274    pub fn prefix(&self) -> Prefix<'a> {
275        match self {
276            Self::Literal(literal) => Prefix::Literal(literal.clone()),
277            Self::Regex(pattern) => Prefix::Regex(pattern.clone()),
278            Self::Match(parts) => match &parts[0] {
279                CheckPatternPart::Literal(literal) => Prefix::Substring(literal.clone()),
280                CheckPatternPart::Regex(pattern) => Prefix::Regex(pattern.clone()),
281                CheckPatternPart::Match(Match::Numeric {
282                    span,
283                    format,
284                    capture: None,
285                    expr: None,
286                    ..
287                }) => Prefix::Regex(RegexPattern::new(Span::new(
288                    *span,
289                    format.unwrap_or_default().pattern_nocapture(),
290                ))),
291                CheckPatternPart::Match(Match::Numeric {
292                    span,
293                    format,
294                    capture: Some(name),
295                    expr: None,
296                    ..
297                }) => Prefix::Regex(RegexPattern {
298                    pattern: Span::new(*span, format.unwrap_or_default().pattern(None)),
299                    captures: smallvec![Capture::Implicit(TypedVariable {
300                        name: *name,
301                        ty: ValueType::Number(*format),
302                    })],
303                }),
304                CheckPatternPart::Match(Match::Substitution {
305                    pattern: Some(pattern),
306                    name,
307                    ..
308                }) => Prefix::Regex(RegexPattern {
309                    pattern: pattern.clone(),
310                    captures: smallvec![Capture::Implicit(TypedVariable {
311                        name: *name,
312                        ty: ValueType::String,
313                    })],
314                }),
315                CheckPatternPart::Match(part) => Prefix::Match(Cow::Owned(part.clone())),
316            },
317            Self::Empty(span) => Prefix::Empty(*span),
318        }
319    }
320
321    pub fn pop_prefix(&mut self) -> Prefix<'a> {
322        use std::collections::VecDeque;
323
324        match self {
325            Self::Literal(literal) => {
326                let span = literal.span();
327                let result = Prefix::Literal(core::mem::replace(
328                    literal,
329                    Span::new(span, Cow::Borrowed("")),
330                ));
331                *self = Self::Empty(span);
332                result
333            }
334            Self::Regex(pattern) => {
335                let span = pattern.span();
336                let result = Prefix::Regex(core::mem::replace(
337                    pattern,
338                    RegexPattern::new(Span::new(span, Cow::Borrowed(""))),
339                ));
340                *self = Self::Empty(span);
341                result
342            }
343            Self::Match(parts) => {
344                let span = parts.span();
345                let mut ps = VecDeque::<CheckPatternPart<'a>>::from(core::mem::take(&mut **parts));
346                let prefix = match ps.pop_front().unwrap() {
347                    CheckPatternPart::Literal(literal) => Prefix::Substring(literal),
348                    CheckPatternPart::Regex(pattern) => Prefix::Regex(pattern),
349                    CheckPatternPart::Match(Match::Numeric {
350                        span,
351                        format,
352                        capture: None,
353                        expr: None,
354                        ..
355                    }) => Prefix::Regex(RegexPattern::new(Span::new(
356                        span,
357                        format.unwrap_or_default().pattern_nocapture(),
358                    ))),
359                    CheckPatternPart::Match(Match::Numeric {
360                        span,
361                        format,
362                        capture: Some(name),
363                        expr: None,
364                        ..
365                    }) => Prefix::Regex(RegexPattern {
366                        pattern: Span::new(span, format.unwrap_or_default().pattern_nocapture()),
367                        captures: smallvec![Capture::Implicit(TypedVariable {
368                            name,
369                            ty: ValueType::Number(format),
370                        })],
371                    }),
372                    CheckPatternPart::Match(Match::Substitution {
373                        pattern: Some(pattern),
374                        name,
375                        ..
376                    }) => Prefix::Regex(RegexPattern {
377                        pattern,
378                        captures: smallvec![Capture::Implicit(TypedVariable {
379                            name,
380                            ty: ValueType::String,
381                        })],
382                    }),
383                    CheckPatternPart::Match(part) => Prefix::Match(Cow::Owned(part)),
384                };
385                if ps.is_empty() {
386                    *self = Self::Empty(span);
387                } else {
388                    **parts = ps.into();
389                }
390                prefix
391            }
392            Self::Empty(span) => Prefix::Empty(*span),
393        }
394    }
395
396    pub fn is_literal(&self) -> bool {
397        match self {
398            Self::Empty(_) | Self::Literal(_) => true,
399            Self::Regex(_) => false,
400            Self::Match(parts) => parts
401                .iter()
402                .all(|p| matches!(p, CheckPatternPart::Literal(_))),
403        }
404    }
405
406    pub fn is_regex_compatible(&self) -> bool {
407        match self {
408            Self::Empty(_) | Self::Literal(_) | Self::Regex(_) => true,
409            Self::Match(parts) => parts.iter().all(|p| p.is_regex_compatible()),
410        }
411    }
412
413    /// Compacts this pattern into fewer parts where possible
414    pub fn compact(&mut self) {
415        use std::collections::VecDeque;
416
417        fn convert_to_regex(buffer: &mut String, padding: usize) {
418            let min_capacity = padding
419                + buffer
420                    .chars()
421                    .map(|c| {
422                        if regex_syntax::is_meta_character(c) {
423                            2
424                        } else {
425                            1
426                        }
427                    })
428                    .sum::<usize>();
429            let prev = core::mem::replace(buffer, String::with_capacity(min_capacity));
430            regex_syntax::escape_into(&prev, buffer);
431        }
432
433        match self {
434            Self::Match(empty) if empty.is_empty() => {
435                let span = empty.span();
436                *self = Self::Empty(span);
437            }
438            Self::Match(compacted) => {
439                let span = compacted.span();
440                let source_id = span.source_id();
441                let mut pattern_start = span.start();
442                let mut pattern_end = span.end();
443                let mut parts = VecDeque::from(core::mem::take(&mut **compacted));
444                let mut pattern = String::new();
445                let mut captures = SmallVec::<[Capture; 1]>::new();
446                let mut is_literal_mode = true;
447                while let Some(mut part) = parts.pop_front() {
448                    match part {
449                        CheckPatternPart::Literal(part) if is_literal_mode => {
450                            pattern_end = part.span().end();
451                            pattern.push_str(part.inner().as_ref());
452                        }
453                        CheckPatternPart::Literal(part) => {
454                            pattern_end = part.span().end();
455                            regex_syntax::escape_into(part.inner().as_ref(), &mut pattern);
456                        }
457                        CheckPatternPart::Regex(RegexPattern {
458                            pattern: part,
459                            captures: ref mut part_captures,
460                        }) => {
461                            let (span, part) = part.into_parts();
462                            pattern_end = span.end();
463                            captures.append(part_captures);
464                            if is_literal_mode {
465                                is_literal_mode = false;
466                                convert_to_regex(&mut pattern, part.len())
467                            }
468                            pattern.push_str(part.as_ref());
469                        }
470                        CheckPatternPart::Match(Match::Substitution {
471                            pattern: Some(part),
472                            name,
473                            span,
474                        }) => {
475                            pattern_end = span.end();
476                            let part = part.into_inner();
477                            let group_name = name.as_str();
478                            if is_literal_mode {
479                                is_literal_mode = false;
480                                convert_to_regex(&mut pattern, 6 + group_name.len() + part.len());
481                            }
482                            pattern.push_str("(?P<");
483                            pattern.push_str(group_name);
484                            pattern.push('>');
485                            pattern.push_str(part.as_ref());
486                            pattern.push(')');
487                            captures.push(Capture::Explicit(TypedVariable {
488                                name,
489                                ty: ValueType::String,
490                            }));
491                        }
492                        CheckPatternPart::Match(Match::Numeric {
493                            expr: None,
494                            capture: None,
495                            span,
496                            format,
497                            ..
498                        }) => {
499                            pattern_end = span.end();
500                            let format_pattern = format.unwrap_or_default().pattern_nocapture();
501                            if is_literal_mode {
502                                is_literal_mode = false;
503                                convert_to_regex(&mut pattern, format_pattern.len());
504                            }
505                            pattern.push_str(&format_pattern);
506                        }
507                        CheckPatternPart::Match(Match::Numeric {
508                            expr: None,
509                            capture: Some(name),
510                            span,
511                            format,
512                            ..
513                        }) => {
514                            pattern_end = span.end();
515                            let group_name = name.as_str();
516                            let format_pattern =
517                                format.unwrap_or_default().pattern(Some(group_name));
518                            if is_literal_mode {
519                                is_literal_mode = false;
520                                convert_to_regex(&mut pattern, format_pattern.len());
521                            }
522                            pattern.push_str(&format_pattern);
523                            captures.push(Capture::Explicit(TypedVariable {
524                                name,
525                                ty: ValueType::Number(format),
526                            }));
527                        }
528                        part @ CheckPatternPart::Match(_) => {
529                            let span = part.span();
530                            if pattern.is_empty() {
531                                compacted.push(part);
532                                is_literal_mode = true;
533                                pattern.clear();
534                                captures.clear();
535                                pattern_end = span.end();
536                                pattern_start = pattern_end;
537                                continue;
538                            }
539
540                            if is_literal_mode {
541                                compacted.push(CheckPatternPart::Literal(Span::new(
542                                    SourceSpan::new(
543                                        source_id,
544                                        Range::new(pattern_start, pattern_end),
545                                    ),
546                                    Cow::Owned(core::mem::take(&mut pattern)),
547                                )));
548                            } else {
549                                let captures = core::mem::take(&mut captures);
550                                compacted.push(CheckPatternPart::Regex(RegexPattern {
551                                    pattern: Span::new(
552                                        SourceSpan::new(
553                                            source_id,
554                                            Range::new(pattern_start, pattern_end),
555                                        ),
556                                        Cow::Owned(core::mem::take(&mut pattern)),
557                                    ),
558                                    captures,
559                                }));
560                                is_literal_mode = true;
561                            }
562
563                            compacted.push(part);
564                            pattern_end = span.end();
565                            pattern_start = pattern_end;
566                        }
567                    }
568                }
569
570                if compacted.is_empty() {
571                    let compacted = if is_literal_mode {
572                        CheckPattern::Literal(Span::new(
573                            SourceSpan::new(source_id, Range::new(pattern_start, pattern_end)),
574                            Cow::Owned(core::mem::take(&mut pattern)),
575                        ))
576                    } else {
577                        CheckPattern::Regex(RegexPattern {
578                            pattern: Span::new(
579                                SourceSpan::new(source_id, Range::new(pattern_start, pattern_end)),
580                                Cow::Owned(core::mem::take(&mut pattern)),
581                            ),
582                            captures,
583                        })
584                    };
585                    *self = compacted;
586                    return;
587                }
588
589                if !pattern.is_empty() {
590                    if is_literal_mode {
591                        compacted.push(CheckPatternPart::Literal(Span::new(
592                            SourceSpan::new(source_id, Range::new(pattern_start, pattern_end)),
593                            Cow::Owned(core::mem::take(&mut pattern)),
594                        )));
595                    } else {
596                        compacted.push(CheckPatternPart::Regex(RegexPattern {
597                            pattern: Span::new(
598                                SourceSpan::new(source_id, Range::new(pattern_start, pattern_end)),
599                                Cow::Owned(core::mem::take(&mut pattern)),
600                            ),
601                            captures,
602                        }));
603                    }
604                }
605            }
606            Self::Empty(_) | Self::Literal(_) | Self::Regex(_) => (),
607        }
608    }
609
610    /// Converts this pattern into a string which can be used as a
611    /// regular expression, even if the pattern was not originally
612    /// expressed as a regular expression.
613    ///
614    /// Returns Err with the original pattern (potentially compacted),
615    /// if the conversion is not possible. Otherwise, returns Ok
616    /// with the built regular expression pattern.
617    pub fn into_regex_pattern(mut self) -> Result<RegexPattern<'a>, Self> {
618        self.compact();
619
620        match self {
621            Self::Literal(s) => Ok(RegexPattern::new(s)),
622            Self::Regex(regex) => Ok(regex),
623            other => Err(other),
624        }
625    }
626}
627impl<'a> Spanned for CheckPattern<'a> {
628    fn span(&self) -> SourceSpan {
629        match self {
630            Self::Empty(span) => *span,
631            Self::Literal(spanned) => spanned.span(),
632            Self::Regex(spanned) => spanned.span(),
633            Self::Match(spanned) => spanned.span(),
634        }
635    }
636}
637impl<'a> From<Vec<CheckPatternPart<'a>>> for CheckPattern<'a> {
638    fn from(mut parts: Vec<CheckPatternPart<'a>>) -> Self {
639        match parts.len() {
640            0 => CheckPattern::Empty(SourceSpan::UNKNOWN),
641            1 => match parts.pop().unwrap() {
642                CheckPatternPart::Literal(lit) => Self::Literal(lit),
643                CheckPatternPart::Regex(re) => Self::Regex(re),
644                part @ CheckPatternPart::Match(_) => {
645                    Self::Match(Span::new(part.span(), vec![part]))
646                }
647            },
648            _ => {
649                let start = parts.first().unwrap().span().start();
650                let last_span = parts.last().unwrap().span();
651                let end = last_span.end();
652                Self::Match(Span::new(
653                    SourceSpan::new(last_span.source_id(), Range::new(start, end)),
654                    parts,
655                ))
656            }
657        }
658    }
659}
660
661/// A check line is broken up into segments when either `[[` `]]`,
662/// or `{{` `}}` is encountered, for substitutions/captures and regex
663/// matches respectively; with the before and after parts being literal
664/// (and optional). As such we have three types of segments/parts that
665/// we can observe on a line
666#[derive(Debug, Clone, PartialEq, Eq)]
667pub enum CheckPatternPart<'a> {
668    /// This part consists of a match rule to be evaluated while matching
669    Match(Match<'a>),
670    /// This part is a raw literal string
671    Literal(Span<Cow<'a, str>>),
672    /// This part is a regex pattern
673    Regex(RegexPattern<'a>),
674}
675impl<'a> CheckPatternPart<'a> {
676    pub fn unwrap_str(self) -> Span<Cow<'a, str>> {
677        match self {
678            Self::Literal(s) => s,
679            part => panic!("expected a literal pattern, got {part:#?}"),
680        }
681    }
682
683    pub fn uses_variable(&self) -> bool {
684        match self {
685            Self::Literal(_) | Self::Regex(_) => false,
686            Self::Match(Match::Numeric {
687                expr: None,
688                capture: None,
689                ..
690            }) => false,
691            Self::Match(_) => true,
692        }
693    }
694
695    pub fn is_regex_compatible(&self) -> bool {
696        match self {
697            Self::Literal(_)
698            | Self::Regex(_)
699            | Self::Match(Match::Substitution {
700                pattern: Some(_), ..
701            })
702            | Self::Match(Match::Numeric { expr: None, .. }) => true,
703            Self::Match(_) => false,
704        }
705    }
706}
707impl<'a> Spanned for CheckPatternPart<'a> {
708    fn span(&self) -> SourceSpan {
709        match self {
710            Self::Match(m) => m.span(),
711            Self::Literal(spanned) => spanned.span(),
712            Self::Regex(spanned) => spanned.span(),
713        }
714    }
715}
716
717/// This type represents a match rule wrapped in `[[` `]]`
718#[derive(Debug, Clone)]
719pub enum Match<'a> {
720    /// Match the given regular expression pattern, optionally binding `name`
721    /// to the matched value.
722    ///
723    /// Corresponds to expressions such as `[[REG]]` and `[[REG:r[0-9]+]]`.
724    ///
725    /// The precise format of this match type is `[[<name>:<pattern>]]`, where:
726    ///
727    /// * `<name>` is a local variable name of the form `[A-Za-z_][A-Za-z0-9_]*`, or a global
728    ///   variable name (prefixed with `$`). However, you are not permitted to (re)bind global
729    ///   variables.
730    /// * `:<pattern>`, is any valid, non-empty, regular expression pattern. When present, it
731    ///   changes the semantics of this match type from string substitution to string capture - i.e.
732    ///   `name` will be bound to the matched input string.
733    ///
734    /// If `:<pattern>` is not present, then the entire `[[<name>]]` block will be
735    /// substituted with the value of `<name>` as a literal pattern. The value will
736    /// be formatted according to its type.
737    ///
738    /// Variables bound using this syntax are available immediately on the same line, you
739    /// can do things like `CHECK: op [[REG:r[0-9]+]], [[REG]]` to bind `REG` to the register
740    /// name of the first operand of `op`, e.g., `r1`; and verify that the same register is
741    /// used as the second operand.
742    ///
743    /// NOTE: You should prefer the standard regular expression pattern matching syntax,
744    /// i.e. `{{<pattern>}}` if you don't need to bind a variable.
745    Substitution {
746        span: SourceSpan,
747        name: VariableName,
748        pattern: Option<Span<Cow<'a, str>>>,
749    },
750    /// Match the given numeric pattern, and optionally defines a variable if the
751    /// match succeeds.
752    ///
753    /// Corresponds to expressions such as `[[#]]` or `[[#%.8X]]` or `[[#REG + 1]]`,
754    /// as well as `[[#REG:]]` and `[[#%X,OFFSET:12]]`. The former are matches,
755    /// while the latter both match and define the given variable.
756    ///
757    /// The unified format is `[[#%<fmtspec>,<NUMVAR>: <constraint> <expr]]` where:
758    ///
759    /// * `%<fmtspec>` is the same format specifier as used for defining a variable, but in this
760    ///   context it indicates how the numeric value should be matched. It is optional, and if not
761    ///   present, both components of the format spec are inferred from the matching format of the
762    ///   numeric variables used by the expression constraint (if any), and defaults to `%u`
763    ///   (unsigned, no leading zeros) if no numeric variable is used. In case of conflict between
764    ///   format specifiers of several numeric variables, the conversion specifier becomes
765    ///   mandatory, but the precision specifier remains optional.
766    /// * `<NUMVAR>:`, when present, indicates that `NUMVAR` will be (re)bound to the matched value,
767    ///   if the match succeeds. If not present, no variable is defined.
768    /// * `<constraint>` describes how the value to match must relate to the value of the given
769    ///   expression. Currently, the only constraint type is `==` for equality. If present, `<expr>`
770    ///   is mandatory; however the inverse is not true, `<expr>` can be provided without
771    ///   `<constraint>`, implying a default equality constraint.
772    /// * `<expr>` is an expression. An expression is in turn recursively defined as:
773    ///   - A numeric operand
774    ///   - An expression followed by an operator and a numeric operand
775    ///
776    ///   A numeric operand is a previously defined numeric variable, an integer literal,
777    ///   or one of a set of built-in functions. Whitespace are allowed around these elements.
778    ///   Numeric operands are 64-bit values. Overflow and underflow are rejected. The original
779    ///   `lit` does not support operator precedence, but `litcheck` supports the standard precedence
780    ///   of the supported operators, and parentheses can be used to manually manage precedence.
781    ///
782    ///   The operators supported are:
783    ///
784    ///   - `+`, addition
785    ///   - `-`, subtraction
786    ///
787    ///   The built-in functions supported are:
788    ///
789    ///   - `add`, addition
790    ///   - `sub`, subtraction
791    ///   - `mul`, multiplication
792    ///   - `div`, integer division
793    ///   - `min`, minimum
794    ///   - `max`, maximum
795    ///
796    /// All components can be omitted except the `#`, i.e. `[[#]]` is a valid numeric match,
797    /// which defaults to matching an unsigned integer, with no leading zeros, of up to 64
798    /// bit precision.
799    Numeric {
800        span: SourceSpan,
801        /// The format of the value to match.
802        ///
803        /// If not specified, it is implied by the format
804        /// of any numeric operands in `expr`, otherwise it
805        /// defaults to an unsigned integer with no leading zeros.
806        format: Option<NumberFormat>,
807        /// If set, contains the name of the variable to bind to
808        /// the matched value if the match succeeds.
809        capture: Option<VariableName>,
810        /// If specified, this changes the meaning of `expr`
811        /// in relation to the matched value.
812        constraint: Constraint,
813        /// The numeric expression to evaluate
814        ///
815        /// If `constraint` is not set, this expression
816        /// produces a value which must match the input.
817        expr: Option<Expr>,
818    },
819}
820impl<'a> PartialOrd for Match<'a> {
821    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
822        Some(self.cmp(other))
823    }
824}
825impl<'a> Ord for Match<'a> {
826    fn cmp(&self, other: &Self) -> Ordering {
827        match (self, other) {
828            (
829                Self::Substitution {
830                    name: an,
831                    pattern: Some(ap),
832                    ..
833                },
834                Self::Substitution {
835                    name: bn,
836                    pattern: Some(bp),
837                    ..
838                },
839            ) => ap.cmp(bp).then_with(|| an.cmp(bn)),
840            (
841                Self::Substitution {
842                    pattern: Some(_), ..
843                },
844                Self::Substitution { pattern: None, .. },
845            ) => Ordering::Less,
846            (
847                Self::Substitution { pattern: None, .. },
848                Self::Substitution {
849                    pattern: Some(_), ..
850                },
851            ) => Ordering::Greater,
852            (Self::Substitution { name: an, .. }, Self::Substitution { name: bn, .. }) => {
853                an.cmp(bn)
854            }
855            (
856                Self::Numeric {
857                    format: af,
858                    capture: None,
859                    expr: aexpr,
860                    ..
861                },
862                Self::Numeric {
863                    format: bf,
864                    capture: None,
865                    expr: bexpr,
866                    ..
867                },
868            ) => af
869                .unwrap_or_default()
870                .pattern(None)
871                .cmp(&bf.unwrap_or_default().pattern(None))
872                .then_with(|| aexpr.cmp(bexpr)),
873            (
874                Self::Numeric { capture: None, .. },
875                Self::Numeric {
876                    capture: Some(_), ..
877                },
878            ) => Ordering::Less,
879            (
880                Self::Numeric {
881                    capture: Some(_), ..
882                },
883                Self::Numeric { capture: None, .. },
884            ) => Ordering::Greater,
885            (
886                Self::Numeric {
887                    format: af,
888                    capture: Some(acap),
889                    expr: aexpr,
890                    ..
891                },
892                Self::Numeric {
893                    format: bf,
894                    capture: Some(bcap),
895                    expr: bexpr,
896                    ..
897                },
898            ) => af
899                .unwrap_or_default()
900                .pattern(None)
901                .cmp(&bf.unwrap_or_default().pattern(None))
902                .then_with(|| acap.cmp(bcap))
903                .then_with(|| aexpr.cmp(bexpr)),
904            (
905                Self::Substitution {
906                    name,
907                    pattern: Some(pattern),
908                    ..
909                },
910                Self::Numeric {
911                    format,
912                    capture,
913                    expr: None,
914                    ..
915                },
916            ) => AsRef::<str>::as_ref(pattern)
917                .cmp(format.unwrap_or_default().pattern(None).as_ref())
918                .then_with(|| Some(*name).cmp(capture))
919                .then(Ordering::Less),
920            (
921                Self::Numeric {
922                    format,
923                    capture,
924                    expr: None,
925                    ..
926                },
927                Self::Substitution {
928                    name,
929                    pattern: Some(pattern),
930                    ..
931                },
932            ) => format
933                .unwrap_or_default()
934                .pattern(None)
935                .as_ref()
936                .cmp(pattern.inner().as_ref())
937                .then_with(|| capture.cmp(&Some(*name)))
938                .then(Ordering::Greater),
939            (Self::Substitution { .. }, _) => Ordering::Less,
940            (_, Self::Substitution { .. }) => Ordering::Greater,
941        }
942    }
943}
944impl<'a> Spanned for Match<'a> {
945    fn span(&self) -> SourceSpan {
946        match self {
947            Self::Numeric { span, .. } | Self::Substitution { span, .. } => *span,
948        }
949    }
950}
951impl<'a> Eq for Match<'a> {}
952impl<'a> PartialEq for Match<'a> {
953    fn eq(&self, other: &Self) -> bool {
954        match (self, other) {
955            (
956                Self::Substitution {
957                    name: an,
958                    pattern: ap,
959                    ..
960                },
961                Self::Substitution {
962                    name: bn,
963                    pattern: bp,
964                    ..
965                },
966            ) => an == bn && ap == bp,
967            (
968                Self::Numeric {
969                    format: af,
970                    capture: acap,
971                    constraint: ac,
972                    expr: aexpr,
973                    ..
974                },
975                Self::Numeric {
976                    format: bf,
977                    capture: bcap,
978                    constraint: bc,
979                    expr: bexpr,
980                    ..
981                },
982            ) => af == bf && acap == bcap && ac == bc && aexpr == bexpr,
983            _ => false,
984        }
985    }
986}
987
988/// Describes available constraints that can be expressed on numeric values
989#[derive(Debug, Copy, Clone, PartialEq, Eq)]
990pub enum Constraint {
991    Eq,
992}
993
994impl fmt::Display for Constraint {
995    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
996        match self {
997            Self::Eq => f.write_str("=="),
998        }
999    }
1000}
1001
1002#[derive(Default)]
1003enum CheckPatternVarIter<'a, 'iter> {
1004    #[default]
1005    Empty,
1006    Pattern(&'iter CheckPattern<'a>),
1007    Regex(&'iter [Capture]),
1008    Parts(&'iter [CheckPatternPart<'a>]),
1009    Expr {
1010        expr: &'iter Expr,
1011        parts: &'iter [CheckPatternPart<'a>],
1012    },
1013    Buffered {
1014        buffer: std::collections::VecDeque<SourceSpan>,
1015        next: &'iter [CheckPatternPart<'a>],
1016    },
1017}
1018impl<'a, 'iter> Iterator for CheckPatternVarIter<'a, 'iter> {
1019    type Item = SourceSpan;
1020
1021    fn next(&mut self) -> Option<Self::Item> {
1022        'outer: loop {
1023            match core::mem::take(self) {
1024                Self::Empty => break None,
1025                Self::Pattern(pattern) => match pattern {
1026                    CheckPattern::Empty(_) | CheckPattern::Literal(_) => break None,
1027                    CheckPattern::Regex(re) => {
1028                        let (item, rest) = re.captures.split_first()?;
1029                        *self = Self::Regex(rest);
1030                        break Some(item.span());
1031                    }
1032                    CheckPattern::Match(parts) => {
1033                        *self = Self::Parts(parts);
1034                        continue;
1035                    }
1036                },
1037                Self::Regex(captures) => {
1038                    let (item, rest) = captures.split_first()?;
1039                    *self = Self::Regex(rest);
1040                    break Some(item.span());
1041                }
1042                Self::Parts(parts) => {
1043                    while let Some((part, parts)) = parts.split_first() {
1044                        match part {
1045                            CheckPatternPart::Literal(_) => break,
1046                            CheckPatternPart::Regex(re) => match re.captures.split_first() {
1047                                Some((item, vars)) => {
1048                                    *self = Self::Buffered {
1049                                        buffer: vars.iter().map(|v| v.span()).collect(),
1050                                        next: parts,
1051                                    };
1052                                    break 'outer Some(item.span());
1053                                }
1054                                None => break,
1055                            },
1056                            CheckPatternPart::Match(Match::Substitution { name, .. }) => {
1057                                *self = Self::Parts(parts);
1058                                break 'outer Some(name.span());
1059                            }
1060                            CheckPatternPart::Match(Match::Numeric {
1061                                capture: None,
1062                                expr: None,
1063                                ..
1064                            }) => {
1065                                continue;
1066                            }
1067                            CheckPatternPart::Match(Match::Numeric {
1068                                capture,
1069                                expr: Some(expr),
1070                                ..
1071                            }) => {
1072                                *self = Self::Expr { expr, parts };
1073                                if let Some(name) = capture.as_ref() {
1074                                    break 'outer Some(name.span());
1075                                }
1076                                continue 'outer;
1077                            }
1078                            CheckPatternPart::Match(Match::Numeric {
1079                                capture: Some(name),
1080                                expr: None,
1081                                ..
1082                            }) => {
1083                                *self = Self::Parts(parts);
1084                                break 'outer Some(name.span());
1085                            }
1086                        }
1087                    }
1088
1089                    break None;
1090                }
1091                Self::Expr { expr, parts } => {
1092                    let mut worklist = std::collections::VecDeque::with_capacity(2);
1093                    let mut buffer = std::collections::VecDeque::new();
1094                    worklist.push_back(expr);
1095                    loop {
1096                        let expr = worklist.pop_front();
1097                        match expr {
1098                            None => match buffer.pop_front() {
1099                                None => {
1100                                    *self = Self::Parts(parts);
1101                                    continue 'outer;
1102                                }
1103                                Some(span) => {
1104                                    *self = Self::Buffered {
1105                                        buffer,
1106                                        next: parts,
1107                                    };
1108                                    break 'outer Some(span);
1109                                }
1110                            },
1111                            Some(Expr::Num(_)) => {
1112                                continue;
1113                            }
1114                            Some(Expr::Var(name)) => {
1115                                buffer.push_back(name.span());
1116                            }
1117                            Some(Expr::Binary { lhs, rhs, .. }) => {
1118                                worklist.push_back(lhs);
1119                                worklist.push_back(rhs);
1120                            }
1121                        }
1122                    }
1123                }
1124                Self::Buffered { mut buffer, next } => match buffer.pop_front() {
1125                    None => {
1126                        *self = Self::Parts(next);
1127                    }
1128                    Some(span) => {
1129                        *self = Self::Buffered { buffer, next };
1130                        break Some(span);
1131                    }
1132                },
1133            }
1134        }
1135    }
1136}