sqruff_lib/rules/convention/
cv10.rs

1use ahash::AHashMap;
2use regex::Regex;
3use sqruff_lib_core::dialects::init::DialectKind;
4use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
5use sqruff_lib_core::lint_fix::LintFix;
6use sqruff_lib_core::parser::segments::SegmentBuilder;
7use strum_macros::{AsRefStr, EnumString};
8
9use crate::core::config::Value;
10use crate::core::rules::context::RuleContext;
11use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
12use crate::core::rules::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
13
14#[derive(Debug, Copy, Clone, AsRefStr, EnumString, PartialEq, Default)]
15#[strum(serialize_all = "snake_case")]
16enum PreferredQuotedLiteralStyle {
17    #[default]
18    Consistent,
19    SingleQuotes,
20    DoubleQuotes,
21}
22
23impl PreferredQuotedLiteralStyle {
24    fn info(&self) -> QuoteInfo {
25        match self {
26            PreferredQuotedLiteralStyle::Consistent => unimplemented!(),
27            PreferredQuotedLiteralStyle::SingleQuotes => QuoteInfo {
28                preferred_quote_char: '\'',
29                alternate_quote_char: '"',
30            },
31            PreferredQuotedLiteralStyle::DoubleQuotes => QuoteInfo {
32                preferred_quote_char: '"',
33                alternate_quote_char: '\'',
34            },
35        }
36    }
37}
38
39struct QuoteInfo {
40    preferred_quote_char: char,
41    alternate_quote_char: char,
42}
43
44#[derive(Clone, Debug, Default)]
45pub struct RuleCV10 {
46    preferred_quoted_literal_style: PreferredQuotedLiteralStyle,
47    force_enable: bool,
48}
49
50impl Rule for RuleCV10 {
51    fn load_from_config(&self, config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
52        Ok(RuleCV10 {
53            preferred_quoted_literal_style: config["preferred_quoted_literal_style"]
54                .as_string()
55                .unwrap()
56                .to_owned()
57                .parse()
58                .unwrap(),
59            force_enable: config["force_enable"].as_bool().unwrap(),
60        }
61        .erased())
62    }
63
64    fn name(&self) -> &'static str {
65        "convention.quoted_literals"
66    }
67
68    fn description(&self) -> &'static str {
69        "Consistent usage of preferred quotes for quoted literals."
70    }
71
72    fn long_description(&self) -> &'static str {
73        r#"
74**Anti-pattern**
75
76```sql
77select
78    "abc",
79    'abc',
80    "\"",
81    "abc" = 'abc'
82from foo
83```
84
85**Best practice**
86
87Ensure all quoted literals use preferred quotes, unless escaping can be reduced by using alternate quotes.
88
89```sql
90select
91    "abc",
92    "abc",
93    '"',
94    "abc" = "abc"
95from foo
96```P        
97"#
98    }
99
100    fn groups(&self) -> &'static [RuleGroups] {
101        &[RuleGroups::All, RuleGroups::Convention]
102    }
103
104    fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
105        // TODO: "databricks", "hive", "mysql"
106        if !(self.force_enable
107            || matches!(
108                context.dialect.name,
109                DialectKind::Bigquery | DialectKind::Sparksql
110            ))
111        {
112            return Vec::new();
113        }
114
115        let preferred_quoted_literal_style =
116            if self.preferred_quoted_literal_style == PreferredQuotedLiteralStyle::Consistent {
117                let preferred_quoted_literal_style = context
118                    .try_get::<PreferredQuotedLiteralStyle>()
119                    .unwrap_or_else(|| {
120                        if context.segment.raw().ends_with('"') {
121                            PreferredQuotedLiteralStyle::DoubleQuotes
122                        } else {
123                            PreferredQuotedLiteralStyle::SingleQuotes
124                        }
125                    });
126
127                context.set(preferred_quoted_literal_style);
128                preferred_quoted_literal_style
129            } else {
130                self.preferred_quoted_literal_style
131            };
132
133        let info = preferred_quoted_literal_style.info();
134        let fixed_string = normalize_preferred_quoted_literal_style(
135            context.segment.raw().as_ref(),
136            info.preferred_quote_char,
137            info.alternate_quote_char,
138        );
139
140        if fixed_string != context.segment.raw().as_str() {
141            return vec![LintResult::new(
142                context.segment.clone().into(),
143                vec![LintFix::replace(
144                    context.segment.clone(),
145                    vec![
146                        SegmentBuilder::token(
147                            context.tables.next_id(),
148                            &fixed_string,
149                            SyntaxKind::QuotedLiteral,
150                        )
151                        .finish(),
152                    ],
153                    None,
154                )],
155                Some("".into()),
156                None,
157            )];
158        }
159
160        Vec::new()
161    }
162
163    fn is_fix_compatible(&self) -> bool {
164        true
165    }
166
167    fn crawl_behaviour(&self) -> Crawler {
168        SegmentSeekerCrawler::new(const { SyntaxSet::new(&[SyntaxKind::QuotedLiteral]) }).into()
169    }
170}
171
172// FIXME: avoid memory allocations
173fn normalize_preferred_quoted_literal_style(
174    s: &str,
175    preferred_quote_char: char,
176    alternate_quote_char: char,
177) -> String {
178    let mut s = s.to_string();
179    let trimmed = s.trim_start_matches(['r', 'b', 'R', 'B']);
180
181    let (orig_quote, new_quote) = if trimmed
182        .chars()
183        .take(3)
184        .eq(std::iter::repeat_n(preferred_quote_char, 3))
185    {
186        return s.to_string();
187    } else if trimmed.starts_with(preferred_quote_char) {
188        (
189            preferred_quote_char.to_string(),
190            alternate_quote_char.to_string(),
191        )
192    } else if trimmed
193        .chars()
194        .take(3)
195        .eq(std::iter::repeat_n(alternate_quote_char, 3))
196    {
197        (
198            std::iter::repeat_n(alternate_quote_char, 3).collect(),
199            std::iter::repeat_n(preferred_quote_char, 3).collect(),
200        )
201    } else if trimmed.starts_with(alternate_quote_char) {
202        (
203            alternate_quote_char.to_string(),
204            preferred_quote_char.to_string(),
205        )
206    } else {
207        return s.to_string();
208    };
209
210    let first_quote_pos = s.find(&orig_quote).unwrap_or_default();
211    let prefix = s[..first_quote_pos].to_string();
212    let unescaped_new_quote = Regex::new(&format!(r"(([^\\]|^)(\\\\)*){new_quote}")).unwrap();
213    let escaped_new_quote = Regex::new(&format!(r"([^\\]|^)\\((?:\\\\)*){new_quote}")).unwrap();
214    let escaped_orig_quote = Regex::new(&format!(r"([^\\]|^)\\((?:\\\\)*){orig_quote}")).unwrap();
215
216    let body_start = first_quote_pos + orig_quote.len();
217    let body_end = s.len() - orig_quote.len();
218
219    let mut body = s[body_start..body_end].to_string();
220    let mut new_body = if prefix.to_lowercase().contains("r") {
221        if unescaped_new_quote.find(&body).is_some() {
222            return s.to_string();
223        }
224        body.clone()
225    } else {
226        let mut new_body =
227            regex_sub_with_overlap(&escaped_new_quote, &format!(r"$1$2{new_quote}"), &body);
228        if new_body != body {
229            body = new_body.clone();
230            s = format!("{prefix}{orig_quote}{body}{orig_quote}");
231        }
232        new_body = regex_sub_with_overlap(
233            &escaped_orig_quote,
234            &format!(r"$1$2{orig_quote}"),
235            &new_body,
236        );
237        new_body = regex_sub_with_overlap(
238            &unescaped_new_quote,
239            &format!(r"$1\\{new_quote}"),
240            &new_body,
241        );
242
243        new_body
244    };
245
246    if new_quote
247        .chars()
248        .eq(std::iter::repeat_n(preferred_quote_char, 3))
249        && new_body.ends_with(preferred_quote_char)
250    {
251        let truncated_body = &new_body[..new_body.len() - 1];
252        new_body = format!("{}\\{}", truncated_body, preferred_quote_char);
253    }
254
255    let orig_escape_count = body.matches("\\").count();
256    let new_escape_count = new_body.matches("\\").count();
257    if new_escape_count > orig_escape_count {
258        return s.to_string();
259    }
260
261    if new_escape_count == orig_escape_count && orig_quote.starts_with(preferred_quote_char) {
262        s.to_string()
263    } else {
264        format!("{prefix}{new_quote}{new_body}{new_quote}")
265    }
266}
267
268fn regex_sub_with_overlap(regex: &Regex, replacement: &str, original: &str) -> String {
269    let first_pass = regex.replace_all(original, replacement);
270    let second_pass = regex.replace_all(&first_pass, replacement);
271    second_pass.to_string()
272}