flowscope_core/linter/rules/
rf_005.rs1use std::collections::HashSet;
7
8use crate::linter::config::LintConfig;
9use crate::linter::rule::{LintContext, LintRule};
10use crate::types::{issue_codes, Dialect, Issue};
11use regex::Regex;
12use sqlparser::ast::Statement;
13
14use super::identifier_candidates_helpers::{
15 collect_identifier_candidates, IdentifierCandidate, IdentifierPolicy,
16};
17
18pub struct ReferencesSpecialChars {
19 quoted_policy: IdentifierPolicy,
20 unquoted_policy: IdentifierPolicy,
21 additional_allowed_characters: HashSet<char>,
22 allow_space_in_identifier: bool,
23 ignore_words: HashSet<String>,
24 ignore_words_regex: Option<Regex>,
25}
26
27impl ReferencesSpecialChars {
28 pub fn from_config(config: &LintConfig) -> Self {
29 Self {
30 quoted_policy: IdentifierPolicy::from_config(
31 config,
32 issue_codes::LINT_RF_005,
33 "quoted_identifiers_policy",
34 "all",
35 ),
36 unquoted_policy: IdentifierPolicy::from_config(
37 config,
38 issue_codes::LINT_RF_005,
39 "unquoted_identifiers_policy",
40 "all",
41 ),
42 additional_allowed_characters: configured_additional_allowed_characters(config),
43 allow_space_in_identifier: config
44 .rule_option_bool(issue_codes::LINT_RF_005, "allow_space_in_identifier")
45 .unwrap_or(false),
46 ignore_words: configured_ignore_words(config)
47 .into_iter()
48 .map(|word| normalize_token(&word))
49 .collect(),
50 ignore_words_regex: config
51 .rule_option_str(issue_codes::LINT_RF_005, "ignore_words_regex")
52 .filter(|pattern| !pattern.trim().is_empty())
53 .and_then(|pattern| Regex::new(pattern).ok()),
54 }
55 }
56}
57
58impl Default for ReferencesSpecialChars {
59 fn default() -> Self {
60 Self {
61 quoted_policy: IdentifierPolicy::All,
62 unquoted_policy: IdentifierPolicy::All,
63 additional_allowed_characters: HashSet::new(),
64 allow_space_in_identifier: false,
65 ignore_words: HashSet::new(),
66 ignore_words_regex: None,
67 }
68 }
69}
70
71impl LintRule for ReferencesSpecialChars {
72 fn code(&self) -> &'static str {
73 issue_codes::LINT_RF_005
74 }
75
76 fn name(&self) -> &'static str {
77 "References special chars"
78 }
79
80 fn description(&self) -> &'static str {
81 "Do not use special characters in identifiers."
82 }
83
84 fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
85 let dialect = ctx.dialect();
86 let has_special_chars = collect_identifier_candidates(statement)
87 .into_iter()
88 .any(|candidate| candidate_triggers_rule(&candidate, self, dialect))
89 || show_tblproperties_property_key_triggers_rule(ctx.statement_sql(), self, dialect);
90
91 if has_special_chars {
92 vec![Issue::warning(
93 issue_codes::LINT_RF_005,
94 "Identifier contains unsupported special characters.",
95 )
96 .with_statement(ctx.statement_index)]
97 } else {
98 Vec::new()
99 }
100 }
101}
102
103fn candidate_triggers_rule(
104 candidate: &IdentifierCandidate,
105 rule: &ReferencesSpecialChars,
106 dialect: Dialect,
107) -> bool {
108 if is_ignored_token(&candidate.value, rule) {
109 return false;
110 }
111
112 let policy = if candidate.quoted {
113 rule.quoted_policy
114 } else {
115 rule.unquoted_policy
116 };
117 if !policy.allows(candidate.kind) {
118 return false;
119 }
120
121 if candidate.quoted && candidate.value.starts_with('\'') && candidate.value.ends_with('\'') {
124 return false;
125 }
126
127 if candidate.quote_char == Some('`') {
130 match dialect {
131 Dialect::Bigquery => {
132 let value = &candidate.value;
135 let has_mid_star = value
136 .char_indices()
137 .any(|(i, ch)| ch == '*' && i + 1 < value.len());
138 if has_mid_star {
139 return true;
140 }
141 return contains_disallowed_identifier_chars_with_extras(
142 value,
143 &rule.additional_allowed_characters,
144 rule.allow_space_in_identifier,
145 &['-', '.', '*'],
146 );
147 }
148 Dialect::Databricks => {
149 return false;
152 }
153 _ => {}
154 }
155 }
156
157 if matches!(dialect, Dialect::Bigquery) && !candidate.quoted {
159 return contains_disallowed_identifier_chars_with_extras(
160 &candidate.value,
161 &rule.additional_allowed_characters,
162 rule.allow_space_in_identifier,
163 &['-', '.'],
164 );
165 }
166
167 if matches!(dialect, Dialect::Snowflake) && !candidate.quoted {
169 return contains_disallowed_identifier_chars_with_extras(
170 &candidate.value,
171 &rule.additional_allowed_characters,
172 rule.allow_space_in_identifier,
173 &['$'],
174 );
175 }
176
177 contains_disallowed_identifier_chars(
178 &candidate.value,
179 &rule.additional_allowed_characters,
180 rule.allow_space_in_identifier,
181 )
182}
183
184fn show_tblproperties_property_key_triggers_rule(
185 sql: &str,
186 rule: &ReferencesSpecialChars,
187 dialect: Dialect,
188) -> bool {
189 if !matches!(dialect, Dialect::Databricks) {
190 return false;
191 }
192
193 let lowered = sql.to_ascii_lowercase();
196 if !lowered.contains("show tblproperties") {
197 return false;
198 }
199
200 let Some(open_paren) = sql.find('(') else {
201 return false;
202 };
203 let Some(close_rel) = sql[open_paren + 1..].find(')') else {
204 return false;
205 };
206 let inside = sql[open_paren + 1..open_paren + 1 + close_rel].trim();
207 if inside.len() < 2 || !inside.starts_with('\'') || !inside.ends_with('\'') {
208 return false;
209 }
210
211 let property_key = inside.trim_matches('\'');
212 if is_ignored_token(property_key, rule) {
213 return false;
214 }
215
216 contains_disallowed_identifier_chars_with_extras(
217 property_key,
218 &rule.additional_allowed_characters,
219 rule.allow_space_in_identifier,
220 &['.'],
221 )
222}
223
224fn contains_disallowed_identifier_chars(
225 ident: &str,
226 additional_allowed: &HashSet<char>,
227 allow_space: bool,
228) -> bool {
229 ident.chars().any(|ch| {
230 !(ch.is_ascii_alphanumeric()
231 || ch == '_'
232 || (allow_space && ch == ' ')
233 || additional_allowed.contains(&ch))
234 })
235}
236
237fn contains_disallowed_identifier_chars_with_extras(
238 ident: &str,
239 additional_allowed: &HashSet<char>,
240 allow_space: bool,
241 extras: &[char],
242) -> bool {
243 ident.chars().any(|ch| {
244 !(ch.is_ascii_alphanumeric()
245 || ch == '_'
246 || (allow_space && ch == ' ')
247 || extras.contains(&ch)
248 || additional_allowed.contains(&ch))
249 })
250}
251
252fn configured_additional_allowed_characters(config: &LintConfig) -> HashSet<char> {
253 if let Some(values) =
254 config.rule_option_string_list(issue_codes::LINT_RF_005, "additional_allowed_characters")
255 {
256 let mut chars = HashSet::new();
257 for value in values {
258 chars.extend(value.chars());
259 }
260 return chars;
261 }
262
263 config
264 .rule_option_str(issue_codes::LINT_RF_005, "additional_allowed_characters")
265 .map(|value| {
266 value
267 .split(',')
268 .flat_map(|item| item.trim().chars())
269 .collect()
270 })
271 .unwrap_or_default()
272}
273
274fn configured_ignore_words(config: &LintConfig) -> Vec<String> {
275 if let Some(words) = config.rule_option_string_list(issue_codes::LINT_RF_005, "ignore_words") {
276 return words;
277 }
278
279 config
280 .rule_option_str(issue_codes::LINT_RF_005, "ignore_words")
281 .map(|words| {
282 words
283 .split(',')
284 .map(str::trim)
285 .filter(|word| !word.is_empty())
286 .map(str::to_string)
287 .collect()
288 })
289 .unwrap_or_default()
290}
291
292fn is_ignored_token(token: &str, rule: &ReferencesSpecialChars) -> bool {
293 let normalized = normalize_token(token);
294 if rule.ignore_words.contains(&normalized) {
296 return true;
297 }
298 if let Some(regex) = &rule.ignore_words_regex {
301 let raw = token
302 .trim()
303 .trim_matches(|ch| matches!(ch, '"' | '`' | '\'' | '[' | ']'));
304 if regex.is_match(raw) {
305 return true;
306 }
307 }
308 false
309}
310
311fn normalize_token(token: &str) -> String {
312 token
313 .trim()
314 .trim_matches(|ch| matches!(ch, '"' | '`' | '\'' | '[' | ']'))
315 .to_ascii_uppercase()
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321 use crate::linter::rule::with_active_dialect;
322 use crate::parser::parse_sql;
323 use crate::parser::parse_sql_with_dialect;
324 use crate::types::Dialect;
325
326 fn run(sql: &str) -> Vec<Issue> {
327 run_with_config(sql, LintConfig::default())
328 }
329
330 fn run_with_config(sql: &str, config: LintConfig) -> Vec<Issue> {
331 let statements = parse_sql(sql).expect("parse");
332 let rule = ReferencesSpecialChars::from_config(&config);
333 statements
334 .iter()
335 .enumerate()
336 .flat_map(|(index, statement)| {
337 rule.check(
338 statement,
339 &LintContext {
340 sql,
341 statement_range: 0..sql.len(),
342 statement_index: index,
343 },
344 )
345 })
346 .collect()
347 }
348
349 fn run_in_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
350 let statements = parse_sql_with_dialect(sql, dialect).expect("parse");
351 let rule = ReferencesSpecialChars::default();
352 let mut issues = Vec::new();
353 with_active_dialect(dialect, || {
354 for (index, statement) in statements.iter().enumerate() {
355 issues.extend(rule.check(
356 statement,
357 &LintContext {
358 sql,
359 statement_range: 0..sql.len(),
360 statement_index: index,
361 },
362 ));
363 }
364 });
365 issues
366 }
367
368 #[test]
369 fn flags_quoted_identifier_with_hyphen() {
370 let issues = run("SELECT \"bad-name\" FROM t");
371 assert_eq!(issues.len(), 1);
372 assert_eq!(issues[0].code, issue_codes::LINT_RF_005);
373 }
374
375 #[test]
376 fn does_not_flag_quoted_identifier_with_underscore() {
377 let issues = run("SELECT \"good_name\" FROM t");
378 assert!(issues.is_empty());
379 }
380
381 #[test]
382 fn does_not_flag_double_quotes_inside_string_literal() {
383 let issues = run("SELECT '\"bad-name\"' AS note FROM t");
384 assert!(issues.is_empty());
385 }
386
387 #[test]
388 fn additional_allowed_characters_permit_hyphen() {
389 let issues = run_with_config(
390 "SELECT \"bad-name\" FROM t",
391 LintConfig {
392 enabled: true,
393 disabled_rules: vec![],
394 rule_configs: std::collections::BTreeMap::from([(
395 "references.special_chars".to_string(),
396 serde_json::json!({"additional_allowed_characters": "-"}),
397 )]),
398 },
399 );
400 assert!(issues.is_empty());
401 }
402
403 #[test]
404 fn quoted_policy_none_skips_quoted_identifier_checks() {
405 let issues = run_with_config(
406 "SELECT \"bad-name\" FROM t",
407 LintConfig {
408 enabled: true,
409 disabled_rules: vec![],
410 rule_configs: std::collections::BTreeMap::from([(
411 "LINT_RF_005".to_string(),
412 serde_json::json!({"quoted_identifiers_policy": "none"}),
413 )]),
414 },
415 );
416 assert!(issues.is_empty());
417 }
418
419 #[test]
420 fn ignore_words_suppresses_configured_identifier() {
421 let issues = run_with_config(
422 "SELECT \"bad-name\" FROM t",
423 LintConfig {
424 enabled: true,
425 disabled_rules: vec![],
426 rule_configs: std::collections::BTreeMap::from([(
427 "references.special_chars".to_string(),
428 serde_json::json!({"ignore_words": ["bad-name"]}),
429 )]),
430 },
431 );
432 assert!(issues.is_empty());
433 }
434
435 #[test]
436 fn ignore_words_regex_suppresses_configured_identifier() {
437 let issues = run_with_config(
438 "SELECT \"bad-name\" FROM t",
439 LintConfig {
440 enabled: true,
441 disabled_rules: vec![],
442 rule_configs: std::collections::BTreeMap::from([(
443 "LINT_RF_005".to_string(),
444 serde_json::json!({"ignore_words_regex": "^bad-"}),
445 )]),
446 },
447 );
448 assert!(issues.is_empty());
449 }
450
451 #[test]
452 fn ignore_words_regex_is_case_sensitive() {
453 let issues = run_with_config(
454 "SELECT \"bad-name\" FROM t",
455 LintConfig {
456 enabled: true,
457 disabled_rules: vec![],
458 rule_configs: std::collections::BTreeMap::from([(
459 "LINT_RF_005".to_string(),
460 serde_json::json!({"ignore_words_regex": "^BAD-"}),
461 )]),
462 },
463 );
464 assert_eq!(issues.len(), 1, "regex should be case-sensitive");
465 }
466
467 #[test]
468 fn flags_create_table_column_with_space() {
469 let issues = run("CREATE TABLE DBO.ColumnNames (\n \"Internal Space\" INT\n)");
470 assert_eq!(issues.len(), 1);
471 assert_eq!(issues[0].code, issue_codes::LINT_RF_005);
472 }
473
474 #[test]
475 fn allow_space_in_identifier_permits_space() {
476 let issues = run_with_config(
477 "CREATE TABLE DBO.ColumnNames (\n \"Internal Space\" INT\n)",
478 LintConfig {
479 enabled: true,
480 disabled_rules: vec![],
481 rule_configs: std::collections::BTreeMap::from([(
482 "references.special_chars".to_string(),
483 serde_json::json!({"allow_space_in_identifier": true}),
484 )]),
485 },
486 );
487 assert!(issues.is_empty());
488 }
489
490 #[test]
491 fn sparksql_show_tblproperties_allows_dot_in_property_key() {
492 let issues = run_in_dialect(
493 "SHOW TBLPROPERTIES customer ('created.date');",
494 Dialect::Databricks,
495 );
496 assert!(issues.is_empty());
497 }
498
499 #[test]
500 fn sparksql_show_tblproperties_flags_wildcard_in_property_key() {
501 let issues = run_in_dialect(
502 "SHOW TBLPROPERTIES customer ('created.*');",
503 Dialect::Databricks,
504 );
505 assert_eq!(issues.len(), 1);
506 assert_eq!(issues[0].code, issue_codes::LINT_RF_005);
507 }
508}