Skip to main content

flowscope_core/linter/rules/
cv_009.rs

1//! LINT_CV_009: Blocked words.
2//!
3//! SQLFluff CV09 parity (current scope): detect placeholder words such as
4//! TODO/FIXME/foo/bar.
5
6use crate::extractors::extract_tables;
7use crate::linter::config::LintConfig;
8use crate::linter::rule::{LintContext, LintRule};
9use crate::linter::visit::visit_expressions;
10use crate::types::{issue_codes, Issue};
11use regex::{Regex, RegexBuilder};
12use sqlparser::ast::{Expr, SelectItem, Statement};
13use std::collections::HashSet;
14
15use super::semantic_helpers::{table_factor_alias_name, visit_selects_in_statement};
16
17pub struct ConventionBlockedWords {
18    blocked_words: HashSet<String>,
19    blocked_regexes: Vec<Regex>,
20    match_source: bool,
21    ignore_templated_areas: bool,
22}
23
24impl ConventionBlockedWords {
25    pub fn from_config(config: &LintConfig) -> Self {
26        let blocked_words = configured_blocked_words(config)
27            .unwrap_or_else(default_blocked_words)
28            .into_iter()
29            .map(|word| normalized_token(&word))
30            .collect();
31
32        let blocked_regexes = configured_blocked_regexes(config);
33        let match_source = config
34            .rule_option_bool(issue_codes::LINT_CV_009, "match_source")
35            .unwrap_or(false);
36        let ignore_templated_areas = config
37            .core_option_bool("ignore_templated_areas")
38            .unwrap_or(true);
39
40        Self {
41            blocked_words,
42            blocked_regexes,
43            match_source,
44            ignore_templated_areas,
45        }
46    }
47}
48
49impl Default for ConventionBlockedWords {
50    fn default() -> Self {
51        Self {
52            blocked_words: default_blocked_words()
53                .into_iter()
54                .map(|word| normalized_token(&word))
55                .collect(),
56            blocked_regexes: Vec::new(),
57            match_source: false,
58            ignore_templated_areas: true,
59        }
60    }
61}
62
63impl LintRule for ConventionBlockedWords {
64    fn code(&self) -> &'static str {
65        issue_codes::LINT_CV_009
66    }
67
68    fn name(&self) -> &'static str {
69        "Blocked words"
70    }
71
72    fn description(&self) -> &'static str {
73        "Block a list of configurable words from being used."
74    }
75
76    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
77        let source_violation = if self.match_source && ctx.statement_index == 0 {
78            let source = if self.ignore_templated_areas {
79                mask_templated_areas(ctx.sql)
80            } else {
81                ctx.sql.to_string()
82            };
83            self.blocked_regexes
84                .iter()
85                .any(|regex| regex.is_match(&source))
86        } else {
87            false
88        };
89
90        if source_violation || statement_contains_blocked_word(statement, self) {
91            vec![Issue::warning(
92                issue_codes::LINT_CV_009,
93                "Blocked placeholder words detected (e.g., TODO/FIXME/foo/bar).",
94            )
95            .with_statement(ctx.statement_index)]
96        } else {
97            Vec::new()
98        }
99    }
100}
101
102fn configured_blocked_words(config: &LintConfig) -> Option<Vec<String>> {
103    if let Some(words) = config.rule_option_string_list(issue_codes::LINT_CV_009, "blocked_words") {
104        return Some(words);
105    }
106
107    config
108        .rule_option_str(issue_codes::LINT_CV_009, "blocked_words")
109        .map(|words| {
110            words
111                .split(',')
112                .map(str::trim)
113                .filter(|word| !word.is_empty())
114                .map(str::to_string)
115                .collect()
116        })
117}
118
119fn configured_blocked_regexes(config: &LintConfig) -> Vec<Regex> {
120    let mut patterns = Vec::new();
121
122    if let Some(list) = config.rule_option_string_list(issue_codes::LINT_CV_009, "blocked_regex") {
123        patterns.extend(list);
124    } else if let Some(pattern) = config.rule_option_str(issue_codes::LINT_CV_009, "blocked_regex")
125    {
126        patterns.push(pattern.to_string());
127    }
128
129    patterns
130        .into_iter()
131        .filter_map(|pattern| {
132            let trimmed = pattern.trim();
133            if trimmed.is_empty() {
134                None
135            } else {
136                RegexBuilder::new(trimmed)
137                    .case_insensitive(true)
138                    .build()
139                    .ok()
140            }
141        })
142        .collect()
143}
144
145fn default_blocked_words() -> Vec<String> {
146    vec![
147        "TODO".to_string(),
148        "FIXME".to_string(),
149        "foo".to_string(),
150        "bar".to_string(),
151    ]
152}
153
154fn statement_contains_blocked_word(statement: &Statement, config: &ConventionBlockedWords) -> bool {
155    if extract_tables(std::slice::from_ref(statement))
156        .into_iter()
157        .any(|name| name_contains_blocked_word(&name, config))
158    {
159        return true;
160    }
161
162    let mut found = false;
163    visit_expressions(statement, &mut |expr| {
164        if found {
165            return;
166        }
167        if expr_contains_blocked_word(expr, config) {
168            found = true;
169        }
170    });
171    if found {
172        return true;
173    }
174
175    visit_selects_in_statement(statement, &mut |select| {
176        if found {
177            return;
178        }
179
180        for item in &select.projection {
181            if let SelectItem::ExprWithAlias { alias, .. } = item {
182                if token_is_blocked(&alias.value, config) {
183                    found = true;
184                    return;
185                }
186            }
187        }
188
189        for table in &select.from {
190            if table_factor_alias_name(&table.relation)
191                .is_some_and(|alias| token_is_blocked(alias, config))
192            {
193                found = true;
194                return;
195            }
196            for join in &table.joins {
197                if table_factor_alias_name(&join.relation)
198                    .is_some_and(|alias| token_is_blocked(alias, config))
199                {
200                    found = true;
201                    return;
202                }
203            }
204        }
205    });
206
207    found
208}
209
210fn expr_contains_blocked_word(expr: &Expr, config: &ConventionBlockedWords) -> bool {
211    match expr {
212        Expr::Identifier(ident) => token_is_blocked(&ident.value, config),
213        Expr::CompoundIdentifier(parts) => parts
214            .iter()
215            .any(|part| token_is_blocked(&part.value, config)),
216        Expr::Function(function) => name_contains_blocked_word(&function.name.to_string(), config),
217        _ => false,
218    }
219}
220
221fn name_contains_blocked_word(name: &str, config: &ConventionBlockedWords) -> bool {
222    name.split('.').any(|token| token_is_blocked(token, config))
223}
224
225fn token_is_blocked(token: &str, config: &ConventionBlockedWords) -> bool {
226    let normalized = normalized_token(token);
227    config.blocked_words.contains(&normalized)
228        || config
229            .blocked_regexes
230            .iter()
231            .any(|regex| regex.is_match(&normalized))
232}
233
234fn normalized_token(token: &str) -> String {
235    token
236        .trim()
237        .trim_matches(|ch| matches!(ch, '"' | '`' | '\'' | '[' | ']'))
238        .to_ascii_uppercase()
239}
240
241fn mask_templated_areas(sql: &str) -> String {
242    let mut out = String::with_capacity(sql.len());
243    let mut index = 0usize;
244
245    while let Some((open_index, close_marker)) = find_next_template_open(sql, index) {
246        out.push_str(&sql[index..open_index]);
247        let marker_start = open_index + 2;
248        if let Some(close_offset) = sql[marker_start..].find(close_marker) {
249            let close_index = marker_start + close_offset + close_marker.len();
250            out.push_str(&mask_non_newlines(&sql[open_index..close_index]));
251            index = close_index;
252        } else {
253            out.push_str(&mask_non_newlines(&sql[open_index..]));
254            return out;
255        }
256    }
257
258    out.push_str(&sql[index..]);
259    out
260}
261
262fn find_next_template_open(sql: &str, from: usize) -> Option<(usize, &'static str)> {
263    let rest = sql.get(from..)?;
264    let candidates = [("{{", "}}"), ("{%", "%}"), ("{#", "#}")];
265
266    candidates
267        .into_iter()
268        .filter_map(|(open, close)| rest.find(open).map(|offset| (from + offset, close)))
269        .min_by_key(|(index, _)| *index)
270}
271
272fn mask_non_newlines(segment: &str) -> String {
273    segment
274        .chars()
275        .map(|ch| if ch == '\n' { '\n' } else { ' ' })
276        .collect()
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use crate::parser::parse_sql;
283
284    fn run(sql: &str) -> Vec<Issue> {
285        let statements = parse_sql(sql).expect("parse");
286        let rule = ConventionBlockedWords::default();
287        statements
288            .iter()
289            .enumerate()
290            .flat_map(|(index, statement)| {
291                rule.check(
292                    statement,
293                    &LintContext {
294                        sql,
295                        statement_range: 0..sql.len(),
296                        statement_index: index,
297                    },
298                )
299            })
300            .collect()
301    }
302
303    #[test]
304    fn flags_blocked_word() {
305        let issues = run("SELECT foo FROM t");
306        assert_eq!(issues.len(), 1);
307        assert_eq!(issues[0].code, issue_codes::LINT_CV_009);
308    }
309
310    #[test]
311    fn does_not_flag_clean_identifier() {
312        assert!(run("SELECT customer_id FROM t").is_empty());
313    }
314
315    #[test]
316    fn does_not_flag_blocked_word_in_string_literal() {
317        assert!(run("SELECT 'foo' AS note FROM t").is_empty());
318    }
319
320    #[test]
321    fn flags_blocked_table_name() {
322        let issues = run("SELECT id FROM foo");
323        assert_eq!(issues.len(), 1);
324        assert_eq!(issues[0].code, issue_codes::LINT_CV_009);
325    }
326
327    #[test]
328    fn flags_blocked_projection_alias() {
329        let issues = run("SELECT amount AS bar FROM t");
330        assert_eq!(issues.len(), 1);
331        assert_eq!(issues[0].code, issue_codes::LINT_CV_009);
332    }
333
334    #[test]
335    fn flags_blocked_table_alias() {
336        let issues = run("SELECT foo.id FROM users foo JOIN orders o ON foo.id = o.user_id");
337        assert_eq!(issues.len(), 1);
338        assert_eq!(issues[0].code, issue_codes::LINT_CV_009);
339    }
340
341    #[test]
342    fn configured_blocked_words_override_default_list() {
343        let config = LintConfig {
344            enabled: true,
345            disabled_rules: vec![],
346            rule_configs: std::collections::BTreeMap::from([(
347                "convention.blocked_words".to_string(),
348                serde_json::json!({"blocked_words": ["wip"]}),
349            )]),
350        };
351        let rule = ConventionBlockedWords::from_config(&config);
352        let sql = "SELECT foo, wip FROM t";
353        let statements = parse_sql(sql).expect("parse");
354        let issues = rule.check(
355            &statements[0],
356            &LintContext {
357                sql,
358                statement_range: 0..sql.len(),
359                statement_index: 0,
360            },
361        );
362        assert_eq!(issues.len(), 1);
363    }
364
365    #[test]
366    fn configured_blocked_regex_matches_identifier() {
367        let config = LintConfig {
368            enabled: true,
369            disabled_rules: vec![],
370            rule_configs: std::collections::BTreeMap::from([(
371                "LINT_CV_009".to_string(),
372                serde_json::json!({"blocked_words": [], "blocked_regex": "^TMP_"}),
373            )]),
374        };
375        let rule = ConventionBlockedWords::from_config(&config);
376        let sql = "SELECT tmp_value FROM t";
377        let statements = parse_sql(sql).expect("parse");
378        let issues = rule.check(
379            &statements[0],
380            &LintContext {
381                sql,
382                statement_range: 0..sql.len(),
383                statement_index: 0,
384            },
385        );
386        assert_eq!(issues.len(), 1);
387    }
388
389    #[test]
390    fn blocked_regex_array_matches_identifier() {
391        let config = LintConfig {
392            enabled: true,
393            disabled_rules: vec![],
394            rule_configs: std::collections::BTreeMap::from([(
395                "LINT_CV_009".to_string(),
396                serde_json::json!({"blocked_words": [], "blocked_regex": ["^TMP_", "^WIP_"]}),
397            )]),
398        };
399        let rule = ConventionBlockedWords::from_config(&config);
400        let sql = "SELECT wip_item FROM t";
401        let statements = parse_sql(sql).expect("parse");
402        let issues = rule.check(
403            &statements[0],
404            &LintContext {
405                sql,
406                statement_range: 0..sql.len(),
407                statement_index: 0,
408            },
409        );
410        assert_eq!(issues.len(), 1);
411    }
412
413    #[test]
414    fn match_source_true_allows_raw_sql_regex_matching() {
415        let config = LintConfig {
416            enabled: true,
417            disabled_rules: vec![],
418            rule_configs: std::collections::BTreeMap::from([(
419                "convention.blocked_words".to_string(),
420                serde_json::json!({"blocked_words": [], "blocked_regex": "TODO", "match_source": true}),
421            )]),
422        };
423        let rule = ConventionBlockedWords::from_config(&config);
424        let sql = "SELECT 'TODO' AS note FROM t";
425        let statements = parse_sql(sql).expect("parse");
426        let issues = rule.check(
427            &statements[0],
428            &LintContext {
429                sql,
430                statement_range: 0..sql.len(),
431                statement_index: 0,
432            },
433        );
434        assert_eq!(issues.len(), 1);
435    }
436
437    #[test]
438    fn match_source_true_checks_full_source_in_statementless_mode() {
439        let config = LintConfig {
440            enabled: true,
441            disabled_rules: vec![],
442            rule_configs: std::collections::BTreeMap::from([
443                (
444                    "core".to_string(),
445                    serde_json::json!({"ignore_templated_areas": false}),
446                ),
447                (
448                    "convention.blocked_words".to_string(),
449                    serde_json::json!({
450                        "blocked_words": [],
451                        "blocked_regex": "ref\\('deprecated_",
452                        "match_source": true
453                    }),
454                ),
455            ]),
456        };
457        let rule = ConventionBlockedWords::from_config(&config);
458        let sql = "SELECT * FROM {{ ref('deprecated_table') }}";
459        let synthetic = parse_sql("SELECT 1").expect("parse");
460        let issues = rule.check(
461            &synthetic[0],
462            &LintContext {
463                sql,
464                statement_range: 0..sql.len(),
465                statement_index: 0,
466            },
467        );
468        assert_eq!(issues.len(), 1);
469        assert_eq!(issues[0].code, issue_codes::LINT_CV_009);
470    }
471
472    #[test]
473    fn match_source_true_respects_ignore_templated_areas_core_option() {
474        let config = LintConfig {
475            enabled: true,
476            disabled_rules: vec![],
477            rule_configs: std::collections::BTreeMap::from([
478                (
479                    "core".to_string(),
480                    serde_json::json!({"ignore_templated_areas": true}),
481                ),
482                (
483                    "convention.blocked_words".to_string(),
484                    serde_json::json!({
485                        "blocked_words": [],
486                        "blocked_regex": "ref\\('deprecated_",
487                        "match_source": true
488                    }),
489                ),
490            ]),
491        };
492        let rule = ConventionBlockedWords::from_config(&config);
493        let sql = "SELECT * FROM {{ ref('deprecated_table') }}";
494        let synthetic = parse_sql("SELECT 1").expect("parse");
495        let issues = rule.check(
496            &synthetic[0],
497            &LintContext {
498                sql,
499                statement_range: 0..sql.len(),
500                statement_index: 0,
501            },
502        );
503        assert!(issues.is_empty());
504    }
505}