sqruff_lib_core/dialects/
base.rs

1use std::borrow::Cow;
2use std::collections::hash_map::Entry;
3use std::fmt::Debug;
4
5use ahash::{AHashMap, AHashSet};
6
7use crate::dialects::init::DialectKind;
8use crate::dialects::syntax::SyntaxKind;
9use crate::helpers::{ToMatchable, capitalize};
10use crate::parser::lexer::{Lexer, Matcher};
11use crate::parser::matchable::Matchable;
12use crate::parser::parsers::StringParser;
13use crate::parser::types::DialectElementType;
14
15#[derive(Debug, Clone, Default)]
16pub struct Dialect {
17    pub name: DialectKind,
18    lexer_matchers: Option<Vec<Matcher>>,
19    // TODO: Can we use PHF here? https://crates.io/crates/phf
20    library: AHashMap<Cow<'static, str>, DialectElementType>,
21    sets: AHashMap<&'static str, AHashSet<&'static str>>,
22    pub bracket_collections: AHashMap<&'static str, AHashSet<BracketPair>>,
23    lexer: Option<Lexer>,
24}
25
26impl PartialEq for Dialect {
27    fn eq(&self, other: &Self) -> bool {
28        self.name == other.name
29    }
30}
31
32impl Dialect {
33    pub fn new() -> Self {
34        Dialect {
35            name: DialectKind::Ansi,
36            ..Default::default()
37        }
38    }
39
40    pub fn name(&self) -> DialectKind {
41        self.name
42    }
43
44    pub fn add(
45        &mut self,
46        iter: impl IntoIterator<Item = (Cow<'static, str>, DialectElementType)> + Clone,
47    ) {
48        self.library.extend(iter);
49    }
50
51    pub fn grammar(&self, name: &str) -> Matchable {
52        match self
53            .library
54            .get(name)
55            .unwrap_or_else(|| panic!("not found {name}"))
56        {
57            DialectElementType::Matchable(matchable) => matchable.clone(),
58            DialectElementType::SegmentGenerator(_) => {
59                unreachable!("Attempted to fetch non grammar [{name}] with `Dialect::grammar`.")
60            }
61        }
62    }
63
64    #[track_caller]
65    pub fn replace_grammar(&mut self, name: &'static str, match_grammar: Matchable) {
66        match self
67            .library
68            .get_mut(name)
69            .unwrap_or_else(|| panic!("Failed to get mutable reference for {name}"))
70        {
71            DialectElementType::Matchable(matchable) => {
72                matchable.as_node_matcher().unwrap().match_grammar = match_grammar;
73            }
74            DialectElementType::SegmentGenerator(_) => {
75                unreachable!("Attempted to fetch non grammar [{name}] with `Dialect::grammar`.")
76            }
77        }
78    }
79
80    pub fn lexer_matchers(&self) -> &[Matcher] {
81        match &self.lexer_matchers {
82            Some(lexer_matchers) => lexer_matchers,
83            None => panic!("Lexing struct has not been set for dialect {self:?}"),
84        }
85    }
86
87    pub fn insert_lexer_matchers(&mut self, lexer_patch: Vec<Matcher>, before: &str) {
88        let mut buff = Vec::new();
89        let mut found = false;
90
91        if self.lexer_matchers.is_none() {
92            panic!("Lexer struct must be defined before it can be patched!");
93        }
94
95        for elem in self.lexer_matchers.take().unwrap() {
96            if elem.name() == before {
97                found = true;
98                for patch in lexer_patch.clone() {
99                    buff.push(patch);
100                }
101                buff.push(elem);
102            } else {
103                buff.push(elem);
104            }
105        }
106
107        if !found {
108            panic!("Lexer struct insert before '{before}' failed because tag never found.");
109        }
110
111        self.lexer_matchers = Some(buff);
112    }
113
114    pub fn patch_lexer_matchers(&mut self, lexer_patch: Vec<Matcher>) {
115        let mut buff = Vec::with_capacity(self.lexer_matchers.as_ref().map_or(0, Vec::len));
116        if self.lexer_matchers.is_none() {
117            panic!("Lexer struct must be defined before it can be patched!");
118        }
119
120        let patch_dict: AHashMap<&'static str, Matcher> = lexer_patch
121            .into_iter()
122            .map(|elem| (elem.name(), elem))
123            .collect();
124
125        for elem in self.lexer_matchers.take().unwrap() {
126            if let Some(patch) = patch_dict.get(elem.name()) {
127                buff.push(patch.clone());
128            } else {
129                buff.push(elem);
130            }
131        }
132
133        self.lexer_matchers = Some(buff);
134    }
135
136    pub fn set_lexer_matchers(&mut self, lexer_matchers: Vec<Matcher>) {
137        self.lexer_matchers = lexer_matchers.into();
138    }
139
140    pub fn sets(&self, label: &str) -> AHashSet<&'static str> {
141        match label {
142            "bracket_pairs" | "angle_bracket_pairs" => {
143                panic!("Use `bracket_sets` to retrieve {} set.", label);
144            }
145            _ => (),
146        }
147
148        self.sets.get(label).cloned().unwrap_or_default()
149    }
150
151    pub fn sets_mut(&mut self, label: &'static str) -> &mut AHashSet<&'static str> {
152        assert!(
153            label != "bracket_pairs" && label != "angle_bracket_pairs",
154            "Use `bracket_sets` to retrieve {} set.",
155            label
156        );
157
158        match self.sets.entry(label) {
159            Entry::Occupied(entry) => entry.into_mut(),
160            Entry::Vacant(entry) => entry.insert(<_>::default()),
161        }
162    }
163
164    pub fn update_keywords_set_from_multiline_string(
165        &mut self,
166        set_label: &'static str,
167        values: &'static str,
168    ) {
169        let keywords = values.lines().map(str::trim);
170        self.sets_mut(set_label).extend(keywords);
171    }
172
173    pub fn add_keyword_to_set(&mut self, set_label: &'static str, value: &'static str) {
174        self.sets_mut(set_label).insert(value);
175    }
176
177    pub fn bracket_sets(&self, label: &str) -> AHashSet<BracketPair> {
178        assert!(
179            label == "bracket_pairs" || label == "angle_bracket_pairs",
180            "Invalid bracket set. Consider using another identifier instead."
181        );
182
183        self.bracket_collections
184            .get(label)
185            .cloned()
186            .unwrap_or_default()
187    }
188
189    pub fn bracket_sets_mut(&mut self, label: &'static str) -> &mut AHashSet<BracketPair> {
190        assert!(
191            label == "bracket_pairs" || label == "angle_bracket_pairs",
192            "Invalid bracket set. Consider using another identifier instead."
193        );
194
195        self.bracket_collections.entry(label).or_default()
196    }
197
198    pub fn update_bracket_sets(&mut self, label: &'static str, pairs: Vec<BracketPair>) {
199        let set = self.bracket_sets_mut(label);
200        for pair in pairs {
201            set.insert(pair);
202        }
203    }
204
205    pub fn r#ref(&self, name: &str) -> Matchable {
206        match self.library.get(name) {
207            Some(DialectElementType::Matchable(matchable)) => matchable.clone(),
208            Some(DialectElementType::SegmentGenerator(_)) => {
209                panic!("Unexpected SegmentGenerator while fetching '{}'", name);
210            }
211            None => {
212                if let Some(keyword) = name.strip_suffix("KeywordSegment") {
213                    let keyword_tip = "\
214                        \n\nThe syntax in the query is not (yet?) supported. Try to \
215                        narrow down your query to a minimal, reproducible case and \
216                        raise an issue on GitHub.\n\n\
217                        Or, even better, see this guide on how to help contribute \
218                        keyword and/or dialect updates:\n\
219                        https://github.com/quarylabs/sqruff";
220                    panic!(
221                        "Grammar refers to the '{keyword}' keyword which was not found in the \
222                         dialect.{keyword_tip}",
223                    );
224                } else {
225                    panic!("Grammar refers to '{name}' which was not found in the dialect.",);
226                }
227            }
228        }
229    }
230
231    pub fn expand(&mut self) {
232        // Temporarily take ownership of 'library' from 'self' to avoid borrow checker
233        // errors during mutation.
234        let mut library = std::mem::take(&mut self.library);
235        for element in library.values_mut() {
236            if let DialectElementType::SegmentGenerator(generator) = element {
237                *element = DialectElementType::Matchable(generator.expand(self));
238            }
239        }
240        self.library = library;
241
242        for keyword_set in ["unreserved_keywords", "reserved_keywords"] {
243            if let Some(keywords) = self.sets.get(keyword_set) {
244                for kw in keywords {
245                    let n = format!("{}KeywordSegment", capitalize(kw));
246                    if !self.library.contains_key(n.as_str()) {
247                        let parser = StringParser::new(&kw.to_lowercase(), SyntaxKind::Keyword);
248
249                        self.library.insert(
250                            n.into(),
251                            DialectElementType::Matchable(parser.to_matchable()),
252                        );
253                    }
254                }
255            }
256        }
257
258        self.lexer = Lexer::new(self.lexer_matchers()).into();
259    }
260
261    pub fn lexer(&self) -> &Lexer {
262        self.lexer.as_ref().unwrap()
263    }
264}
265
266pub type BracketPair = (&'static str, &'static str, &'static str, bool);