1use crate::extractors::extract_tables;
7use crate::linter::config::LintConfig;
8use crate::linter::rule::{LintContext, LintRule};
9use crate::linter::visit::visit_expressions;
10use crate::types::{issue_codes, Issue};
11use regex::{Regex, RegexBuilder};
12use sqlparser::ast::{Expr, SelectItem, Statement};
13use std::collections::HashSet;
14
15use super::semantic_helpers::{table_factor_alias_name, visit_selects_in_statement};
16
17pub struct ConventionBlockedWords {
18 blocked_words: HashSet<String>,
19 blocked_regexes: Vec<Regex>,
20 match_source: bool,
21 ignore_templated_areas: bool,
22}
23
24impl ConventionBlockedWords {
25 pub fn from_config(config: &LintConfig) -> Self {
26 let blocked_words = configured_blocked_words(config)
27 .unwrap_or_else(default_blocked_words)
28 .into_iter()
29 .map(|word| normalized_token(&word))
30 .collect();
31
32 let blocked_regexes = configured_blocked_regexes(config);
33 let match_source = config
34 .rule_option_bool(issue_codes::LINT_CV_009, "match_source")
35 .unwrap_or(false);
36 let ignore_templated_areas = config
37 .core_option_bool("ignore_templated_areas")
38 .unwrap_or(true);
39
40 Self {
41 blocked_words,
42 blocked_regexes,
43 match_source,
44 ignore_templated_areas,
45 }
46 }
47}
48
49impl Default for ConventionBlockedWords {
50 fn default() -> Self {
51 Self {
52 blocked_words: default_blocked_words()
53 .into_iter()
54 .map(|word| normalized_token(&word))
55 .collect(),
56 blocked_regexes: Vec::new(),
57 match_source: false,
58 ignore_templated_areas: true,
59 }
60 }
61}
62
63impl LintRule for ConventionBlockedWords {
64 fn code(&self) -> &'static str {
65 issue_codes::LINT_CV_009
66 }
67
68 fn name(&self) -> &'static str {
69 "Blocked words"
70 }
71
72 fn description(&self) -> &'static str {
73 "Block a list of configurable words from being used."
74 }
75
76 fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
77 let source_violation = if self.match_source && ctx.statement_index == 0 {
78 let source = if self.ignore_templated_areas {
79 mask_templated_areas(ctx.sql)
80 } else {
81 ctx.sql.to_string()
82 };
83 self.blocked_regexes
84 .iter()
85 .any(|regex| regex.is_match(&source))
86 } else {
87 false
88 };
89
90 if source_violation || statement_contains_blocked_word(statement, self) {
91 vec![Issue::warning(
92 issue_codes::LINT_CV_009,
93 "Blocked placeholder words detected (e.g., TODO/FIXME/foo/bar).",
94 )
95 .with_statement(ctx.statement_index)]
96 } else {
97 Vec::new()
98 }
99 }
100}
101
102fn configured_blocked_words(config: &LintConfig) -> Option<Vec<String>> {
103 if let Some(words) = config.rule_option_string_list(issue_codes::LINT_CV_009, "blocked_words") {
104 return Some(words);
105 }
106
107 config
108 .rule_option_str(issue_codes::LINT_CV_009, "blocked_words")
109 .map(|words| {
110 words
111 .split(',')
112 .map(str::trim)
113 .filter(|word| !word.is_empty())
114 .map(str::to_string)
115 .collect()
116 })
117}
118
119fn configured_blocked_regexes(config: &LintConfig) -> Vec<Regex> {
120 let mut patterns = Vec::new();
121
122 if let Some(list) = config.rule_option_string_list(issue_codes::LINT_CV_009, "blocked_regex") {
123 patterns.extend(list);
124 } else if let Some(pattern) = config.rule_option_str(issue_codes::LINT_CV_009, "blocked_regex")
125 {
126 patterns.push(pattern.to_string());
127 }
128
129 patterns
130 .into_iter()
131 .filter_map(|pattern| {
132 let trimmed = pattern.trim();
133 if trimmed.is_empty() {
134 None
135 } else {
136 RegexBuilder::new(trimmed)
137 .case_insensitive(true)
138 .build()
139 .ok()
140 }
141 })
142 .collect()
143}
144
145fn default_blocked_words() -> Vec<String> {
146 vec![
147 "TODO".to_string(),
148 "FIXME".to_string(),
149 "foo".to_string(),
150 "bar".to_string(),
151 ]
152}
153
154fn statement_contains_blocked_word(statement: &Statement, config: &ConventionBlockedWords) -> bool {
155 if extract_tables(std::slice::from_ref(statement))
156 .into_iter()
157 .any(|name| name_contains_blocked_word(&name, config))
158 {
159 return true;
160 }
161
162 let mut found = false;
163 visit_expressions(statement, &mut |expr| {
164 if found {
165 return;
166 }
167 if expr_contains_blocked_word(expr, config) {
168 found = true;
169 }
170 });
171 if found {
172 return true;
173 }
174
175 visit_selects_in_statement(statement, &mut |select| {
176 if found {
177 return;
178 }
179
180 for item in &select.projection {
181 if let SelectItem::ExprWithAlias { alias, .. } = item {
182 if token_is_blocked(&alias.value, config) {
183 found = true;
184 return;
185 }
186 }
187 }
188
189 for table in &select.from {
190 if table_factor_alias_name(&table.relation)
191 .is_some_and(|alias| token_is_blocked(alias, config))
192 {
193 found = true;
194 return;
195 }
196 for join in &table.joins {
197 if table_factor_alias_name(&join.relation)
198 .is_some_and(|alias| token_is_blocked(alias, config))
199 {
200 found = true;
201 return;
202 }
203 }
204 }
205 });
206
207 found
208}
209
210fn expr_contains_blocked_word(expr: &Expr, config: &ConventionBlockedWords) -> bool {
211 match expr {
212 Expr::Identifier(ident) => token_is_blocked(&ident.value, config),
213 Expr::CompoundIdentifier(parts) => parts
214 .iter()
215 .any(|part| token_is_blocked(&part.value, config)),
216 Expr::Function(function) => name_contains_blocked_word(&function.name.to_string(), config),
217 _ => false,
218 }
219}
220
221fn name_contains_blocked_word(name: &str, config: &ConventionBlockedWords) -> bool {
222 name.split('.').any(|token| token_is_blocked(token, config))
223}
224
225fn token_is_blocked(token: &str, config: &ConventionBlockedWords) -> bool {
226 let normalized = normalized_token(token);
227 config.blocked_words.contains(&normalized)
228 || config
229 .blocked_regexes
230 .iter()
231 .any(|regex| regex.is_match(&normalized))
232}
233
234fn normalized_token(token: &str) -> String {
235 token
236 .trim()
237 .trim_matches(|ch| matches!(ch, '"' | '`' | '\'' | '[' | ']'))
238 .to_ascii_uppercase()
239}
240
241fn mask_templated_areas(sql: &str) -> String {
242 let mut out = String::with_capacity(sql.len());
243 let mut index = 0usize;
244
245 while let Some((open_index, close_marker)) = find_next_template_open(sql, index) {
246 out.push_str(&sql[index..open_index]);
247 let marker_start = open_index + 2;
248 if let Some(close_offset) = sql[marker_start..].find(close_marker) {
249 let close_index = marker_start + close_offset + close_marker.len();
250 out.push_str(&mask_non_newlines(&sql[open_index..close_index]));
251 index = close_index;
252 } else {
253 out.push_str(&mask_non_newlines(&sql[open_index..]));
254 return out;
255 }
256 }
257
258 out.push_str(&sql[index..]);
259 out
260}
261
262fn find_next_template_open(sql: &str, from: usize) -> Option<(usize, &'static str)> {
263 let rest = sql.get(from..)?;
264 let candidates = [("{{", "}}"), ("{%", "%}"), ("{#", "#}")];
265
266 candidates
267 .into_iter()
268 .filter_map(|(open, close)| rest.find(open).map(|offset| (from + offset, close)))
269 .min_by_key(|(index, _)| *index)
270}
271
272fn mask_non_newlines(segment: &str) -> String {
273 segment
274 .chars()
275 .map(|ch| if ch == '\n' { '\n' } else { ' ' })
276 .collect()
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282 use crate::parser::parse_sql;
283
284 fn run(sql: &str) -> Vec<Issue> {
285 let statements = parse_sql(sql).expect("parse");
286 let rule = ConventionBlockedWords::default();
287 statements
288 .iter()
289 .enumerate()
290 .flat_map(|(index, statement)| {
291 rule.check(
292 statement,
293 &LintContext {
294 sql,
295 statement_range: 0..sql.len(),
296 statement_index: index,
297 },
298 )
299 })
300 .collect()
301 }
302
303 #[test]
304 fn flags_blocked_word() {
305 let issues = run("SELECT foo FROM t");
306 assert_eq!(issues.len(), 1);
307 assert_eq!(issues[0].code, issue_codes::LINT_CV_009);
308 }
309
310 #[test]
311 fn does_not_flag_clean_identifier() {
312 assert!(run("SELECT customer_id FROM t").is_empty());
313 }
314
315 #[test]
316 fn does_not_flag_blocked_word_in_string_literal() {
317 assert!(run("SELECT 'foo' AS note FROM t").is_empty());
318 }
319
320 #[test]
321 fn flags_blocked_table_name() {
322 let issues = run("SELECT id FROM foo");
323 assert_eq!(issues.len(), 1);
324 assert_eq!(issues[0].code, issue_codes::LINT_CV_009);
325 }
326
327 #[test]
328 fn flags_blocked_projection_alias() {
329 let issues = run("SELECT amount AS bar FROM t");
330 assert_eq!(issues.len(), 1);
331 assert_eq!(issues[0].code, issue_codes::LINT_CV_009);
332 }
333
334 #[test]
335 fn flags_blocked_table_alias() {
336 let issues = run("SELECT foo.id FROM users foo JOIN orders o ON foo.id = o.user_id");
337 assert_eq!(issues.len(), 1);
338 assert_eq!(issues[0].code, issue_codes::LINT_CV_009);
339 }
340
341 #[test]
342 fn configured_blocked_words_override_default_list() {
343 let config = LintConfig {
344 enabled: true,
345 disabled_rules: vec![],
346 rule_configs: std::collections::BTreeMap::from([(
347 "convention.blocked_words".to_string(),
348 serde_json::json!({"blocked_words": ["wip"]}),
349 )]),
350 };
351 let rule = ConventionBlockedWords::from_config(&config);
352 let sql = "SELECT foo, wip FROM t";
353 let statements = parse_sql(sql).expect("parse");
354 let issues = rule.check(
355 &statements[0],
356 &LintContext {
357 sql,
358 statement_range: 0..sql.len(),
359 statement_index: 0,
360 },
361 );
362 assert_eq!(issues.len(), 1);
363 }
364
365 #[test]
366 fn configured_blocked_regex_matches_identifier() {
367 let config = LintConfig {
368 enabled: true,
369 disabled_rules: vec![],
370 rule_configs: std::collections::BTreeMap::from([(
371 "LINT_CV_009".to_string(),
372 serde_json::json!({"blocked_words": [], "blocked_regex": "^TMP_"}),
373 )]),
374 };
375 let rule = ConventionBlockedWords::from_config(&config);
376 let sql = "SELECT tmp_value FROM t";
377 let statements = parse_sql(sql).expect("parse");
378 let issues = rule.check(
379 &statements[0],
380 &LintContext {
381 sql,
382 statement_range: 0..sql.len(),
383 statement_index: 0,
384 },
385 );
386 assert_eq!(issues.len(), 1);
387 }
388
389 #[test]
390 fn blocked_regex_array_matches_identifier() {
391 let config = LintConfig {
392 enabled: true,
393 disabled_rules: vec![],
394 rule_configs: std::collections::BTreeMap::from([(
395 "LINT_CV_009".to_string(),
396 serde_json::json!({"blocked_words": [], "blocked_regex": ["^TMP_", "^WIP_"]}),
397 )]),
398 };
399 let rule = ConventionBlockedWords::from_config(&config);
400 let sql = "SELECT wip_item FROM t";
401 let statements = parse_sql(sql).expect("parse");
402 let issues = rule.check(
403 &statements[0],
404 &LintContext {
405 sql,
406 statement_range: 0..sql.len(),
407 statement_index: 0,
408 },
409 );
410 assert_eq!(issues.len(), 1);
411 }
412
413 #[test]
414 fn match_source_true_allows_raw_sql_regex_matching() {
415 let config = LintConfig {
416 enabled: true,
417 disabled_rules: vec![],
418 rule_configs: std::collections::BTreeMap::from([(
419 "convention.blocked_words".to_string(),
420 serde_json::json!({"blocked_words": [], "blocked_regex": "TODO", "match_source": true}),
421 )]),
422 };
423 let rule = ConventionBlockedWords::from_config(&config);
424 let sql = "SELECT 'TODO' AS note FROM t";
425 let statements = parse_sql(sql).expect("parse");
426 let issues = rule.check(
427 &statements[0],
428 &LintContext {
429 sql,
430 statement_range: 0..sql.len(),
431 statement_index: 0,
432 },
433 );
434 assert_eq!(issues.len(), 1);
435 }
436
437 #[test]
438 fn match_source_true_checks_full_source_in_statementless_mode() {
439 let config = LintConfig {
440 enabled: true,
441 disabled_rules: vec![],
442 rule_configs: std::collections::BTreeMap::from([
443 (
444 "core".to_string(),
445 serde_json::json!({"ignore_templated_areas": false}),
446 ),
447 (
448 "convention.blocked_words".to_string(),
449 serde_json::json!({
450 "blocked_words": [],
451 "blocked_regex": "ref\\('deprecated_",
452 "match_source": true
453 }),
454 ),
455 ]),
456 };
457 let rule = ConventionBlockedWords::from_config(&config);
458 let sql = "SELECT * FROM {{ ref('deprecated_table') }}";
459 let synthetic = parse_sql("SELECT 1").expect("parse");
460 let issues = rule.check(
461 &synthetic[0],
462 &LintContext {
463 sql,
464 statement_range: 0..sql.len(),
465 statement_index: 0,
466 },
467 );
468 assert_eq!(issues.len(), 1);
469 assert_eq!(issues[0].code, issue_codes::LINT_CV_009);
470 }
471
472 #[test]
473 fn match_source_true_respects_ignore_templated_areas_core_option() {
474 let config = LintConfig {
475 enabled: true,
476 disabled_rules: vec![],
477 rule_configs: std::collections::BTreeMap::from([
478 (
479 "core".to_string(),
480 serde_json::json!({"ignore_templated_areas": true}),
481 ),
482 (
483 "convention.blocked_words".to_string(),
484 serde_json::json!({
485 "blocked_words": [],
486 "blocked_regex": "ref\\('deprecated_",
487 "match_source": true
488 }),
489 ),
490 ]),
491 };
492 let rule = ConventionBlockedWords::from_config(&config);
493 let sql = "SELECT * FROM {{ ref('deprecated_table') }}";
494 let synthetic = parse_sql("SELECT 1").expect("parse");
495 let issues = rule.check(
496 &synthetic[0],
497 &LintContext {
498 sql,
499 statement_range: 0..sql.len(),
500 statement_index: 0,
501 },
502 );
503 assert!(issues.is_empty());
504 }
505}