1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
mod id;
mod iter;
pub mod matcher;
mod matches;
mod prefix;
pub mod search;
pub(crate) mod visitors;

pub use self::id::PatternIdentifier;
pub use self::matcher::*;
pub use self::matches::Matches;
pub use self::prefix::PatternPrefix;
pub use self::search::{DefaultSearcher, PatternSearcher, Searcher};

use crate::{
    ast::{CheckPattern, CheckPatternPart},
    common::*,
    errors::InvalidCheckFileError,
};

/// The compiled form of [CheckPattern]
#[derive(Debug)]
pub enum Pattern<'a> {
    /// This pattern always succeeds
    Empty(AlwaysMatch),
    /// A literal string that must occur somewhere in the input
    Substring(SubstringMatcher<'a>),
    /// A regular expression that must occur somewhere in the input
    Regex(RegexMatcher<'a>),
    /// A hybrid match expression that must occur somewhere in the input
    Smart(SmartMatcher<'a>),
    /// A matcher for pure ASCII whitespace patterns
    Whitespace(AsciiWhitespaceMatcher),
}
impl<'a> Pattern<'a> {
    pub fn into_matcher_mut(self) -> AnyMatcherMut<'a> {
        match self {
            Self::Empty(matcher) => Box::new(matcher),
            Self::Substring(matcher) => Box::new(matcher),
            Self::Regex(matcher) => Box::new(matcher),
            Self::Smart(matcher) => Box::new(matcher),
            Self::Whitespace(matcher) => Box::new(matcher),
        }
    }

    pub fn from_prefix(
        prefix: PatternPrefix<'a>,
        config: &Config,
        interner: &mut StringInterner,
    ) -> DiagResult<Self> {
        match prefix {
            PatternPrefix::Literal { prefix, .. } | PatternPrefix::Substring { prefix, .. } => {
                if prefix.is_empty() {
                    return Err(Report::from(InvalidCheckFileError::EmptyPattern(
                        prefix.span(),
                    )));
                }
                if prefix.trim().is_empty() {
                    if prefix.chars().all(|c| c.is_ascii_whitespace()) {
                        Ok(Pattern::Whitespace(AsciiWhitespaceMatcher::new(
                            prefix.span(),
                        )))
                    } else {
                        Ok(Pattern::Regex(RegexMatcher::new_nocapture(
                            Span::new(prefix.span(), Cow::Borrowed(r"\s+")),
                            config,
                        )?))
                    }
                } else {
                    Ok(Pattern::Substring(SubstringMatcher::new(prefix, config)?))
                }
            }
            PatternPrefix::Regex { prefix, .. } if prefix.captures.is_empty() => Ok(
                Pattern::Regex(RegexMatcher::new_nocapture(prefix.pattern, config)?),
            ),
            PatternPrefix::Regex { prefix, .. } => {
                Ok(Pattern::Regex(RegexMatcher::new(prefix, config, interner)?))
            }
            PatternPrefix::Dynamic { prefix, .. } => {
                let mut builder = SmartMatcher::build(prefix.span(), config, interner);
                builder.lower_match(prefix.into_owned())?;
                Ok(Self::Smart(builder.build()))
            }
        }
    }

    pub fn compile(
        mut pattern: CheckPattern<'a>,
        config: &Config,
        interner: &mut StringInterner,
    ) -> DiagResult<Self> {
        pattern.compact(interner);
        match pattern {
            CheckPattern::Literal(s) => Self::from_prefix(
                PatternPrefix::Literal { prefix: s, id: 0 },
                config,
                interner,
            ),
            CheckPattern::Regex(s) => Ok(Pattern::Regex(RegexMatcher::new(s, config, interner)?)),
            CheckPattern::Match(s) => {
                // At least one part requires expression evaluation
                let (span, parts) = s.into_parts();
                let mut builder = SmartMatcher::build(span, config, interner);
                for part in parts.into_iter() {
                    match part {
                        CheckPatternPart::Literal(s) => {
                            builder.literal(s)?;
                        }
                        CheckPatternPart::Regex(s) => {
                            builder.regex_pattern(s)?;
                        }
                        CheckPatternPart::Match(m) => {
                            builder.lower_match(m)?;
                        }
                    }
                }

                let pattern = builder.build();

                Ok(Pattern::Smart(pattern))
            }
            CheckPattern::Empty(span) => Ok(Pattern::Empty(AlwaysMatch::new(span))),
        }
    }

    pub fn compile_static(
        span: SourceSpan,
        mut pattern: CheckPattern<'a>,
        config: &Config,
        interner: &mut StringInterner,
    ) -> DiagResult<SimpleMatcher<'a>> {
        pattern.compact(interner);

        match pattern {
            CheckPattern::Literal(lit) => Ok(SimpleMatcher::Substring(SubstringMatcher::new(
                lit, config,
            )?)),
            CheckPattern::Regex(regex) if regex.captures.is_empty() => Ok(SimpleMatcher::Regex(
                RegexMatcher::new(regex, config, interner)?,
            )),
            pattern @ (CheckPattern::Regex(_) | CheckPattern::Match(_)) => {
                let diag = Diag::new("invalid variable usage in pattern")
                    .with_label(Label::new(span, "occurs in this pattern"))
                    .and_labels(
                        pattern
                            .locate_variables()
                            .map(|span| Label::new(span, "occurs here").into()),
                    )
                    .with_help("CHECK-LABEL patterns must be literals or regular expressions");
                Err(Report::new(diag))
            }
            CheckPattern::Empty(_) => unreachable!(
                "{pattern:?} is only valid for CHECK-EMPTY, and is not an actual pattern"
            ),
        }
    }

    pub fn compile_literal(pattern: CheckPattern<'a>, config: &Config) -> DiagResult<Self> {
        match pattern {
            CheckPattern::Literal(lit) => {
                Ok(Pattern::Substring(SubstringMatcher::new(lit, config)?))
            }
            CheckPattern::Regex(_) | CheckPattern::Match(_) => {
                unreachable!("the lexer will never emit tokens for these non-terminals")
            }
            CheckPattern::Empty(_) => unreachable!(
                "{pattern:?} is only valid for CHECK-EMPTY, and is not an actual pattern"
            ),
        }
    }
}
impl<'a> MatcherMut for Pattern<'a> {
    fn try_match_mut<'input, 'context, C>(
        &self,
        input: Input<'input>,
        context: &mut C,
    ) -> DiagResult<MatchResult<'input>>
    where
        C: Context<'input, 'context> + ?Sized,
    {
        match self {
            Self::Substring(ref matcher) => matcher.try_match(input, context),
            Self::Regex(ref matcher) => matcher.try_match(input, context),
            Self::Smart(ref matcher) => matcher.try_match_mut(input, context),
            Self::Whitespace(ref matcher) => matcher.try_match(input, context),
            Self::Empty(ref matcher) => matcher.try_match(input, context),
        }
    }
}
impl<'a> Spanned for Pattern<'a> {
    fn span(&self) -> SourceSpan {
        match self {
            Self::Substring(ref matcher) => matcher.span(),
            Self::Regex(ref matcher) => matcher.span(),
            Self::Smart(ref matcher) => matcher.span(),
            Self::Whitespace(ref matcher) => matcher.span(),
            Self::Empty(ref matcher) => matcher.span(),
        }
    }
}