sqruff_lib_core/parser/
parsers.rs

1use ahash::AHashSet;
2use fancy_regex::Regex;
3use smol_str::SmolStr;
4
5use super::context::ParseContext;
6use super::match_result::{MatchResult, Matched, Span};
7use super::matchable::{Matchable, MatchableCacheKey, MatchableTrait, next_matchable_cache_key};
8use super::segments::ErasedSegment;
9use crate::dialects::syntax::{SyntaxKind, SyntaxSet};
10use crate::errors::SQLParseError;
11
12#[derive(Debug, Clone, PartialEq)]
13pub struct TypedParser {
14    template: SyntaxKind,
15    target_types: SyntaxSet,
16    kind: SyntaxKind,
17    optional: bool,
18    cache_key: MatchableCacheKey,
19}
20
21impl TypedParser {
22    pub fn new(template: SyntaxKind, kind: SyntaxKind) -> Self {
23        let target_types = SyntaxSet::new(&[template]);
24
25        Self {
26            template,
27            kind,
28            target_types,
29            optional: false,
30            cache_key: next_matchable_cache_key(),
31        }
32    }
33
34    pub fn is_first_match(&self, segment: &ErasedSegment) -> bool {
35        self.target_types.contains(segment.get_type())
36    }
37}
38
39impl MatchableTrait for TypedParser {
40    fn elements(&self) -> &[Matchable] {
41        &[]
42    }
43
44    fn simple(
45        &self,
46        parse_context: &ParseContext,
47        crumbs: Option<Vec<&str>>,
48    ) -> Option<(AHashSet<String>, SyntaxSet)> {
49        let _ = (parse_context, crumbs);
50        (AHashSet::new(), self.target_types.clone()).into()
51    }
52
53    fn match_segments(
54        &self,
55        segments: &[ErasedSegment],
56        idx: u32,
57        _parse_context: &mut ParseContext,
58    ) -> Result<MatchResult, SQLParseError> {
59        let segment = &segments[idx as usize];
60        if segment.is_type(self.template) {
61            return Ok(MatchResult {
62                span: Span {
63                    start: idx,
64                    end: idx + 1,
65                },
66                matched: Matched::Newtype(self.kind).into(),
67                insert_segments: Vec::new(),
68                child_matches: Vec::new(),
69            });
70        }
71
72        Ok(MatchResult::empty_at(idx))
73    }
74
75    fn cache_key(&self) -> MatchableCacheKey {
76        self.cache_key
77    }
78}
79
80#[derive(Clone, Debug, PartialEq)]
81pub struct StringParser {
82    template: String,
83    simple: AHashSet<String>,
84    kind: SyntaxKind,
85    optional: bool,
86    cache_key: MatchableCacheKey,
87}
88
89impl StringParser {
90    pub fn new(template: &str, kind: SyntaxKind) -> StringParser {
91        let template_upper = template.to_uppercase();
92        let simple_set = [template_upper.clone()].into();
93
94        StringParser {
95            template: template_upper,
96            simple: simple_set,
97            kind,
98            optional: false,
99            cache_key: next_matchable_cache_key(),
100        }
101    }
102}
103
104impl MatchableTrait for StringParser {
105    fn elements(&self) -> &[Matchable] {
106        &[]
107    }
108
109    fn is_optional(&self) -> bool {
110        self.optional
111    }
112
113    fn simple(
114        &self,
115        _parse_context: &ParseContext,
116        _crumbs: Option<Vec<&str>>,
117    ) -> Option<(AHashSet<String>, SyntaxSet)> {
118        (self.simple.clone(), SyntaxSet::EMPTY).into()
119    }
120
121    fn match_segments(
122        &self,
123        segments: &[ErasedSegment],
124        idx: u32,
125        _parse_context: &mut ParseContext,
126    ) -> Result<MatchResult, SQLParseError> {
127        let segment = &segments[idx as usize];
128
129        if segment.is_code() && self.template.eq_ignore_ascii_case(segment.raw()) {
130            return Ok(MatchResult {
131                span: Span {
132                    start: idx,
133                    end: idx + 1,
134                },
135                matched: Matched::Newtype(self.kind).into(),
136                insert_segments: Vec::new(),
137                child_matches: Vec::new(),
138            });
139        }
140
141        Ok(MatchResult::empty_at(idx))
142    }
143
144    fn cache_key(&self) -> MatchableCacheKey {
145        self.cache_key
146    }
147}
148
149#[derive(Debug, Clone)]
150pub struct RegexParser {
151    pub template: Regex,
152    pub anti_template: Option<Regex>,
153    kind: SyntaxKind,
154    cache_key: MatchableCacheKey,
155}
156
157impl PartialEq for RegexParser {
158    fn eq(&self, other: &Self) -> bool {
159        self.template.as_str() == other.template.as_str()
160            && self
161                .anti_template
162                .as_ref()
163                .zip(other.anti_template.as_ref())
164                .is_some_and(|(lhs, rhs)| lhs.as_str() == rhs.as_str())
165            && self.kind == other.kind
166    }
167}
168
169impl RegexParser {
170    pub fn new(template: &str, kind: SyntaxKind) -> Self {
171        let template_pattern = Regex::new(&format!("(?i){template}")).unwrap();
172
173        Self {
174            template: template_pattern,
175            anti_template: None,
176            kind,
177            cache_key: next_matchable_cache_key(),
178        }
179    }
180
181    pub fn anti_template(mut self, anti_template: &str) -> Self {
182        self.anti_template = Regex::new(&format!("(?i){anti_template}")).unwrap().into();
183        self
184    }
185}
186
187impl MatchableTrait for RegexParser {
188    fn elements(&self) -> &[Matchable] {
189        &[]
190    }
191
192    fn is_optional(&self) -> bool {
193        unimplemented!()
194    }
195
196    fn simple(
197        &self,
198        _parse_context: &ParseContext,
199        _crumbs: Option<Vec<&str>>,
200    ) -> Option<(AHashSet<String>, SyntaxSet)> {
201        // Does this matcher support a uppercase hash matching route?
202        // Regex segment does NOT for now. We might need to later for efficiency.
203        None
204    }
205
206    fn match_segments(
207        &self,
208        segments: &[ErasedSegment],
209        idx: u32,
210        _parse_context: &mut ParseContext,
211    ) -> Result<MatchResult, SQLParseError> {
212        let segment = &segments[idx as usize];
213        let segment_raw_upper =
214            SmolStr::from_iter(segment.raw().chars().map(|ch| ch.to_ascii_uppercase()));
215        if let Some(result) = self.template.find(&segment_raw_upper).ok().flatten() {
216            if result.as_str() == segment_raw_upper
217                && !self.anti_template.as_ref().is_some_and(|anti_template| {
218                    anti_template
219                        .is_match(&segment_raw_upper)
220                        .unwrap_or_default()
221                })
222            {
223                return Ok(MatchResult {
224                    span: Span {
225                        start: idx,
226                        end: idx + 1,
227                    },
228                    matched: Matched::Newtype(self.kind).into(),
229                    insert_segments: Vec::new(),
230                    child_matches: Vec::new(),
231                });
232            }
233        }
234
235        Ok(MatchResult::empty_at(idx))
236    }
237
238    fn cache_key(&self) -> MatchableCacheKey {
239        self.cache_key
240    }
241}
242
243#[derive(Clone, Debug, PartialEq)]
244pub struct MultiStringParser {
245    templates: AHashSet<String>,
246    simple: AHashSet<String>,
247    kind: SyntaxKind,
248    cache: MatchableCacheKey,
249}
250
251impl MultiStringParser {
252    pub fn new(templates: Vec<String>, kind: SyntaxKind) -> Self {
253        let templates = templates
254            .iter()
255            .map(|template| template.to_ascii_uppercase())
256            .collect::<AHashSet<String>>();
257
258        let _simple = templates.clone();
259
260        Self {
261            templates: templates.into_iter().collect(),
262            simple: _simple.into_iter().collect(),
263            kind,
264            cache: next_matchable_cache_key(),
265        }
266    }
267}
268
269impl MatchableTrait for MultiStringParser {
270    fn elements(&self) -> &[Matchable] {
271        &[]
272    }
273
274    fn is_optional(&self) -> bool {
275        todo!()
276    }
277
278    fn simple(
279        &self,
280        _parse_context: &ParseContext,
281        _crumbs: Option<Vec<&str>>,
282    ) -> Option<(AHashSet<String>, SyntaxSet)> {
283        (self.simple.clone(), SyntaxSet::EMPTY).into()
284    }
285
286    fn match_segments(
287        &self,
288        segments: &[ErasedSegment],
289        idx: u32,
290        _parse_context: &mut ParseContext,
291    ) -> Result<MatchResult, SQLParseError> {
292        let segment = &segments[idx as usize];
293
294        if segment.is_code() && self.templates.contains(&segment.raw().to_ascii_uppercase()) {
295            return Ok(MatchResult {
296                span: Span {
297                    start: idx,
298                    end: idx + 1,
299                },
300                matched: Matched::Newtype(self.kind).into(),
301                ..<_>::default()
302            });
303        }
304
305        Ok(MatchResult::empty_at(idx))
306    }
307
308    fn cache_key(&self) -> MatchableCacheKey {
309        self.cache
310    }
311}