sqruff_lib_core/parser/
parsers.rs

1use ahash::AHashSet;
2use fancy_regex::Regex;
3use smol_str::SmolStr;
4
5use super::context::ParseContext;
6use super::match_result::{MatchResult, Matched, Span};
7use super::matchable::{Matchable, MatchableCacheKey, MatchableTrait, next_matchable_cache_key};
8use super::segments::ErasedSegment;
9use crate::dialects::syntax::{SyntaxKind, SyntaxSet};
10use crate::errors::SQLParseError;
11
12#[derive(Debug, Clone, PartialEq)]
13pub struct TypedParser {
14    template: SyntaxKind,
15    target_types: SyntaxSet,
16    kind: SyntaxKind,
17    optional: bool,
18    cache_key: MatchableCacheKey,
19}
20
21impl TypedParser {
22    pub fn new(template: SyntaxKind, kind: SyntaxKind) -> Self {
23        let target_types = SyntaxSet::new(&[template]);
24
25        Self {
26            template,
27            kind,
28            target_types,
29            optional: false,
30            cache_key: next_matchable_cache_key(),
31        }
32    }
33
34    pub fn is_first_match(&self, segment: &ErasedSegment) -> bool {
35        self.target_types.contains(segment.get_type())
36    }
37}
38
39impl MatchableTrait for TypedParser {
40    fn elements(&self) -> &[Matchable] {
41        &[]
42    }
43
44    fn simple(
45        &self,
46        parse_context: &ParseContext,
47        crumbs: Option<Vec<&str>>,
48    ) -> Option<(AHashSet<String>, SyntaxSet)> {
49        let _ = (parse_context, crumbs);
50        (AHashSet::new(), self.target_types.clone()).into()
51    }
52
53    fn match_segments(
54        &self,
55        segments: &[ErasedSegment],
56        idx: u32,
57        _parse_context: &mut ParseContext,
58    ) -> Result<MatchResult, SQLParseError> {
59        let segment = &segments[idx as usize];
60        if segment.is_type(self.template) {
61            return Ok(MatchResult {
62                span: Span {
63                    start: idx,
64                    end: idx + 1,
65                },
66                matched: Matched::Newtype(self.kind).into(),
67                insert_segments: Vec::new(),
68                child_matches: Vec::new(),
69            });
70        }
71
72        Ok(MatchResult::empty_at(idx))
73    }
74
75    fn cache_key(&self) -> MatchableCacheKey {
76        self.cache_key
77    }
78}
79
80#[derive(Debug, Clone, PartialEq)]
81pub struct CodeParser {
82    cache_key: MatchableCacheKey,
83}
84
85impl CodeParser {
86    pub fn new() -> Self {
87        Self {
88            cache_key: next_matchable_cache_key(),
89        }
90    }
91}
92
93impl Default for CodeParser {
94    fn default() -> Self {
95        Self::new()
96    }
97}
98
99impl MatchableTrait for CodeParser {
100    fn elements(&self) -> &[Matchable] {
101        &[]
102    }
103
104    fn simple(
105        &self,
106        _parse_context: &ParseContext,
107        _crumbs: Option<Vec<&str>>,
108    ) -> Option<(AHashSet<String>, SyntaxSet)> {
109        None
110    }
111
112    fn match_segments(
113        &self,
114        segments: &[ErasedSegment],
115        idx: u32,
116        _parse_context: &mut ParseContext,
117    ) -> Result<MatchResult, SQLParseError> {
118        if idx as usize >= segments.len() {
119            return Ok(MatchResult::empty_at(idx));
120        }
121
122        if segments[idx as usize].is_code() {
123            return Ok(MatchResult::from_span(idx, idx + 1));
124        }
125
126        Ok(MatchResult::empty_at(idx))
127    }
128
129    fn cache_key(&self) -> MatchableCacheKey {
130        self.cache_key
131    }
132}
133
134#[derive(Clone, Debug, PartialEq)]
135pub struct StringParser {
136    template: String,
137    simple: AHashSet<String>,
138    kind: SyntaxKind,
139    optional: bool,
140    cache_key: MatchableCacheKey,
141}
142
143impl StringParser {
144    pub fn new(template: &str, kind: SyntaxKind) -> StringParser {
145        let template_upper = template.to_uppercase();
146        let simple_set = [template_upper.clone()].into();
147
148        StringParser {
149            template: template_upper,
150            simple: simple_set,
151            kind,
152            optional: false,
153            cache_key: next_matchable_cache_key(),
154        }
155    }
156}
157
158impl MatchableTrait for StringParser {
159    fn elements(&self) -> &[Matchable] {
160        &[]
161    }
162
163    fn is_optional(&self) -> bool {
164        self.optional
165    }
166
167    fn simple(
168        &self,
169        _parse_context: &ParseContext,
170        _crumbs: Option<Vec<&str>>,
171    ) -> Option<(AHashSet<String>, SyntaxSet)> {
172        (self.simple.clone(), SyntaxSet::EMPTY).into()
173    }
174
175    fn match_segments(
176        &self,
177        segments: &[ErasedSegment],
178        idx: u32,
179        _parse_context: &mut ParseContext,
180    ) -> Result<MatchResult, SQLParseError> {
181        let segment = &segments[idx as usize];
182
183        if segment.is_code() && self.template.eq_ignore_ascii_case(segment.raw()) {
184            return Ok(MatchResult {
185                span: Span {
186                    start: idx,
187                    end: idx + 1,
188                },
189                matched: Matched::Newtype(self.kind).into(),
190                insert_segments: Vec::new(),
191                child_matches: Vec::new(),
192            });
193        }
194
195        Ok(MatchResult::empty_at(idx))
196    }
197
198    fn cache_key(&self) -> MatchableCacheKey {
199        self.cache_key
200    }
201}
202
203#[derive(Debug, Clone)]
204pub struct RegexParser {
205    pub template: Regex,
206    pub anti_template: Option<Regex>,
207    kind: SyntaxKind,
208    cache_key: MatchableCacheKey,
209}
210
211impl PartialEq for RegexParser {
212    fn eq(&self, other: &Self) -> bool {
213        self.template.as_str() == other.template.as_str()
214            && self
215                .anti_template
216                .as_ref()
217                .zip(other.anti_template.as_ref())
218                .is_some_and(|(lhs, rhs)| lhs.as_str() == rhs.as_str())
219            && self.kind == other.kind
220    }
221}
222
223impl RegexParser {
224    pub fn new(template: &str, kind: SyntaxKind) -> Self {
225        let template_pattern = Regex::new(&format!("(?i){template}")).unwrap();
226
227        Self {
228            template: template_pattern,
229            anti_template: None,
230            kind,
231            cache_key: next_matchable_cache_key(),
232        }
233    }
234
235    pub fn anti_template(mut self, anti_template: &str) -> Self {
236        self.anti_template = Regex::new(&format!("(?i){anti_template}")).unwrap().into();
237        self
238    }
239}
240
241impl MatchableTrait for RegexParser {
242    fn elements(&self) -> &[Matchable] {
243        &[]
244    }
245
246    fn is_optional(&self) -> bool {
247        unimplemented!()
248    }
249
250    fn simple(
251        &self,
252        _parse_context: &ParseContext,
253        _crumbs: Option<Vec<&str>>,
254    ) -> Option<(AHashSet<String>, SyntaxSet)> {
255        // Does this matcher support a uppercase hash matching route?
256        // Regex segment does NOT for now. We might need to later for efficiency.
257        None
258    }
259
260    fn match_segments(
261        &self,
262        segments: &[ErasedSegment],
263        idx: u32,
264        _parse_context: &mut ParseContext,
265    ) -> Result<MatchResult, SQLParseError> {
266        let segment = &segments[idx as usize];
267        let segment_raw_upper =
268            SmolStr::from_iter(segment.raw().chars().map(|ch| ch.to_ascii_uppercase()));
269        if let Some(result) = self.template.find(&segment_raw_upper).ok().flatten()
270            && result.as_str() == segment_raw_upper
271            && !self.anti_template.as_ref().is_some_and(|anti_template| {
272                anti_template
273                    .is_match(&segment_raw_upper)
274                    .unwrap_or_default()
275            })
276        {
277            return Ok(MatchResult {
278                span: Span {
279                    start: idx,
280                    end: idx + 1,
281                },
282                matched: Matched::Newtype(self.kind).into(),
283                insert_segments: Vec::new(),
284                child_matches: Vec::new(),
285            });
286        }
287
288        Ok(MatchResult::empty_at(idx))
289    }
290
291    fn cache_key(&self) -> MatchableCacheKey {
292        self.cache_key
293    }
294}
295
296#[derive(Clone, Debug, PartialEq)]
297pub struct MultiStringParser {
298    templates: AHashSet<String>,
299    simple: AHashSet<String>,
300    kind: SyntaxKind,
301    cache: MatchableCacheKey,
302}
303
304impl MultiStringParser {
305    pub fn new(templates: Vec<String>, kind: SyntaxKind) -> Self {
306        let templates = templates
307            .iter()
308            .map(|template| template.to_ascii_uppercase())
309            .collect::<AHashSet<String>>();
310
311        let _simple = templates.clone();
312
313        Self {
314            templates: templates.into_iter().collect(),
315            simple: _simple.into_iter().collect(),
316            kind,
317            cache: next_matchable_cache_key(),
318        }
319    }
320}
321
322impl MatchableTrait for MultiStringParser {
323    fn elements(&self) -> &[Matchable] {
324        &[]
325    }
326
327    fn is_optional(&self) -> bool {
328        todo!()
329    }
330
331    fn simple(
332        &self,
333        _parse_context: &ParseContext,
334        _crumbs: Option<Vec<&str>>,
335    ) -> Option<(AHashSet<String>, SyntaxSet)> {
336        (self.simple.clone(), SyntaxSet::EMPTY).into()
337    }
338
339    fn match_segments(
340        &self,
341        segments: &[ErasedSegment],
342        idx: u32,
343        _parse_context: &mut ParseContext,
344    ) -> Result<MatchResult, SQLParseError> {
345        let segment = &segments[idx as usize];
346
347        if segment.is_code() && self.templates.contains(&segment.raw().to_ascii_uppercase()) {
348            return Ok(MatchResult {
349                span: Span {
350                    start: idx,
351                    end: idx + 1,
352                },
353                matched: Matched::Newtype(self.kind).into(),
354                ..<_>::default()
355            });
356        }
357
358        Ok(MatchResult::empty_at(idx))
359    }
360
361    fn cache_key(&self) -> MatchableCacheKey {
362        self.cache
363    }
364}