flowscope_core/linter/rules/
cp_002.rs

1//! LINT_CP_002: Identifier capitalisation.
2//!
3//! SQLFluff CP02 parity (current scope): detect inconsistent identifier case.
4
5use std::collections::HashSet;
6
7use crate::linter::config::LintConfig;
8use crate::linter::rule::{LintContext, LintRule};
9use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
10use regex::Regex;
11use sqlparser::ast::{ObjectName, Statement};
12use sqlparser::keywords::Keyword;
13use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
14
15use super::capitalisation_policy_helpers::{
16    ignored_words_from_config, ignored_words_regex_from_config, token_is_ignored,
17    tokens_violate_policy, CapitalisationPolicy,
18};
19use super::identifier_candidates_helpers::{
20    collect_identifier_candidates, IdentifierKind, IdentifierPolicy,
21};
22
23pub struct CapitalisationIdentifiers {
24    policy: CapitalisationPolicy,
25    unquoted_policy: IdentifierPolicy,
26    ignore_words: HashSet<String>,
27    ignore_words_regex: Option<Regex>,
28}
29
30impl CapitalisationIdentifiers {
31    pub fn from_config(config: &LintConfig) -> Self {
32        Self {
33            policy: CapitalisationPolicy::from_rule_config(
34                config,
35                issue_codes::LINT_CP_002,
36                "extended_capitalisation_policy",
37            ),
38            unquoted_policy: IdentifierPolicy::from_config(
39                config,
40                issue_codes::LINT_CP_002,
41                "unquoted_identifiers_policy",
42                "all",
43            ),
44            ignore_words: ignored_words_from_config(config, issue_codes::LINT_CP_002),
45            ignore_words_regex: ignored_words_regex_from_config(config, issue_codes::LINT_CP_002),
46        }
47    }
48}
49
50impl Default for CapitalisationIdentifiers {
51    fn default() -> Self {
52        Self {
53            policy: CapitalisationPolicy::Consistent,
54            unquoted_policy: IdentifierPolicy::All,
55            ignore_words: HashSet::new(),
56            ignore_words_regex: None,
57        }
58    }
59}
60
61impl LintRule for CapitalisationIdentifiers {
62    fn code(&self) -> &'static str {
63        issue_codes::LINT_CP_002
64    }
65
66    fn name(&self) -> &'static str {
67        "Identifier capitalisation"
68    }
69
70    fn description(&self) -> &'static str {
71        "Inconsistent capitalisation of unquoted identifiers."
72    }
73
74    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
75        if databricks_case_sensitive_set_property(statement, ctx.dialect()) {
76            return Vec::new();
77        }
78
79        let ast_identifiers = identifier_tokens(
80            statement,
81            self.unquoted_policy,
82            &self.ignore_words,
83            self.ignore_words_regex.as_ref(),
84        );
85        let use_lexical_fallback =
86            ast_identifiers.is_empty() && self.unquoted_policy == IdentifierPolicy::All;
87        let identifiers = if use_lexical_fallback {
88            lexical_identifier_tokens(
89                ctx.statement_sql(),
90                ctx.dialect(),
91                &self.ignore_words,
92                self.ignore_words_regex.as_ref(),
93            )
94        } else {
95            ast_identifiers
96        };
97
98        if !tokens_violate_policy(&identifiers, self.policy) {
99            return Vec::new();
100        }
101
102        let autofix_edits = if use_lexical_fallback {
103            lexical_identifier_autofix_edits(
104                ctx.statement_sql(),
105                ctx.dialect(),
106                self.policy,
107                &self.ignore_words,
108                self.ignore_words_regex.as_ref(),
109            )
110        } else {
111            identifier_autofix_edits(
112                ctx.statement_sql(),
113                ctx.dialect(),
114                self.policy,
115                self.unquoted_policy,
116                &self.ignore_words,
117                self.ignore_words_regex.as_ref(),
118                statement,
119            )
120        };
121
122        // Emit one issue per violating identifier at its specific position
123        // (SQLFluff reports per-identifier, not per-statement).
124        if autofix_edits.is_empty() {
125            // Detection found inconsistency but autofix couldn't locate exact
126            // positions — fall back to a single statement-level issue.
127            return vec![Issue::info(
128                issue_codes::LINT_CP_002,
129                "Identifiers use inconsistent capitalisation.",
130            )
131            .with_statement(ctx.statement_index)];
132        }
133
134        autofix_edits
135            .into_iter()
136            .map(|edit| {
137                let span = ctx.span_from_statement_offset(edit.start, edit.end);
138                let patch = IssuePatchEdit::new(Span::new(span.start, span.end), edit.replacement);
139                Issue::info(
140                    issue_codes::LINT_CP_002,
141                    "Identifiers use inconsistent capitalisation.",
142                )
143                .with_statement(ctx.statement_index)
144                .with_span(span)
145                .with_autofix_edits(IssueAutofixApplicability::Safe, vec![patch])
146            })
147            .collect()
148    }
149}
150
151fn identifier_tokens(
152    statement: &Statement,
153    unquoted_policy: IdentifierPolicy,
154    ignore_words: &HashSet<String>,
155    ignore_words_regex: Option<&Regex>,
156) -> Vec<String> {
157    collect_identifier_candidates(statement)
158        .into_iter()
159        .filter_map(|candidate| {
160            if candidate.quoted || !unquoted_policy.allows(candidate.kind) {
161                return None;
162            }
163
164            if token_is_ignored(candidate.value.as_str(), ignore_words, ignore_words_regex) {
165                return None;
166            }
167
168            Some(candidate.value)
169        })
170        .collect()
171}
172
173struct Cp002AutofixEdit {
174    start: usize,
175    end: usize,
176    replacement: String,
177}
178
179fn identifier_autofix_edits(
180    sql: &str,
181    dialect: Dialect,
182    policy: CapitalisationPolicy,
183    unquoted_policy: IdentifierPolicy,
184    ignore_words: &HashSet<String>,
185    ignore_words_regex: Option<&Regex>,
186    statement: &Statement,
187) -> Vec<Cp002AutofixEdit> {
188    let Some(tokens) = tokenized(sql, dialect) else {
189        return Vec::new();
190    };
191
192    // Collect AST-level identifier candidates. Used for:
193    // 1. Alias-only policies: only fix identifiers that match the policy filter.
194    // 2. All policies: allow keyword-classified tokens through when they match
195    //    AST identifiers (e.g. SHOW TBLPROPERTIES property names).
196    let all_candidates = collect_identifier_candidates(statement);
197    let ast_ident_set: HashSet<String> = all_candidates
198        .iter()
199        .filter(|c| !c.quoted)
200        .filter(|c| !token_is_ignored(&c.value, ignore_words, ignore_words_regex))
201        .map(|c| c.value.clone())
202        .collect();
203
204    // For alias-only policies, build the set of alias values that match the
205    // policy filter (column aliases, table aliases, or both).
206    // Additionally, build a set of column-alias-only and table-alias-only
207    // values to distinguish them when a value appears as both types.
208    let alias_set: Option<HashSet<String>> = if unquoted_policy != IdentifierPolicy::All {
209        let names: HashSet<String> = all_candidates
210            .iter()
211            .filter(|c| !c.quoted && unquoted_policy.allows(c.kind))
212            .filter(|c| !token_is_ignored(&c.value, ignore_words, ignore_words_regex))
213            .map(|c| c.value.clone())
214            .collect();
215        Some(names)
216    } else {
217        None
218    };
219
220    // Build a per-value sequence of alias kinds from the AST (in order of
221    // appearance) for position-based disambiguation.
222    // E.g., if `lower_case` appears as ColumnAlias then TableAlias, we get:
223    //   alias_kind_seq["lower_case"] = [ColumnAlias, TableAlias]
224    let alias_kind_seq: std::collections::HashMap<String, Vec<IdentifierKind>> = {
225        let mut seq: std::collections::HashMap<String, Vec<IdentifierKind>> =
226            std::collections::HashMap::new();
227        for c in &all_candidates {
228            if c.quoted || token_is_ignored(&c.value, ignore_words, ignore_words_regex) {
229                continue;
230            }
231            if matches!(
232                c.kind,
233                IdentifierKind::ColumnAlias | IdentifierKind::TableAlias
234            ) {
235                seq.entry(c.value.clone()).or_default().push(c.kind);
236            }
237        }
238        seq
239    };
240    // Track per-value occurrence counters during the token scan.
241    let mut alias_occurrence_counters: std::collections::HashMap<String, usize> =
242        std::collections::HashMap::new();
243
244    // Resolve the effective policy: for Consistent, determine the concrete
245    // target case using SQLFluff's refutation algorithm.
246    // When an alias-only policy is active, only consider tokens in alias
247    // position (after AS keyword) for the consistent resolution.
248    let effective_policy = if policy == CapitalisationPolicy::Consistent {
249        if alias_set.is_some() {
250            resolve_consistent_policy_alias_only(&tokens, ignore_words, ignore_words_regex)
251        } else {
252            resolve_consistent_policy(&tokens, ignore_words, ignore_words_regex, &ast_ident_set)
253        }
254    } else {
255        policy
256    };
257
258    let mut edits = Vec::new();
259    for (index, token) in tokens.iter().enumerate() {
260        let Token::Word(word) = &token.token else {
261            continue;
262        };
263        if word.quote_style.is_some() {
264            continue;
265        }
266        // Skip words that are SQL keywords (not identifiers) unless they
267        // appear in the AST-level identifier set (e.g. SHOW TBLPROPERTIES
268        // property names that the tokenizer classifies as keywords but the
269        // AST recognizes as identifiers).
270        if word.keyword != Keyword::NoKeyword && !ast_ident_set.contains(&word.value) {
271            continue;
272        }
273        if token_is_ignored(word.value.as_str(), ignore_words, ignore_words_regex) {
274            continue;
275        }
276
277        // For alias-only policies, only fix tokens that are in alias
278        // position (after AS keyword) to avoid modifying non-alias references
279        // that happen to share the same name.
280        if alias_set.is_some() {
281            let prev_non_trivia = prev_non_trivia_index(&tokens, index);
282            let is_after_as = prev_non_trivia
283                .map(|pi| {
284                    matches!(
285                        &tokens[pi].token,
286                        Token::Word(w) if w.keyword == Keyword::AS
287                    )
288                })
289                .unwrap_or(false);
290            if !is_after_as {
291                continue;
292            }
293            // Disambiguate column vs table aliases using the AST-derived
294            // alias kind sequence. For each after-AS occurrence of a value,
295            // look up its n-th alias kind from the AST to determine whether
296            // this specific occurrence is a column alias or table alias.
297            let val = &word.value;
298            let occurrence = alias_occurrence_counters.entry(val.clone()).or_insert(0);
299            let kind = alias_kind_seq
300                .get(val)
301                .and_then(|kinds| kinds.get(*occurrence))
302                .copied();
303            *alias_occurrence_counters.get_mut(val).unwrap() += 1;
304            match kind {
305                Some(k) if !unquoted_policy.allows(k) => continue,
306                None => continue,
307                _ => {}
308            }
309        }
310
311        let next_index = next_non_trivia_index(&tokens, index + 1);
312        let is_function_name = next_index
313            .map(|next| matches!(tokens[next].token, Token::LParen))
314            .unwrap_or(false);
315        if is_function_name {
316            continue;
317        }
318
319        let Some(replacement) = identifier_case_replacement(word.value.as_str(), effective_policy)
320        else {
321            continue;
322        };
323        if replacement == word.value {
324            continue;
325        }
326
327        let Some((start, end)) = token_offsets(sql, token) else {
328            continue;
329        };
330        edits.push(Cp002AutofixEdit {
331            start,
332            end,
333            replacement,
334        });
335    }
336
337    edits
338}
339
340fn lexical_identifier_tokens(
341    sql: &str,
342    dialect: Dialect,
343    ignore_words: &HashSet<String>,
344    ignore_words_regex: Option<&Regex>,
345) -> Vec<String> {
346    let Some(tokens) = tokenized(sql, dialect) else {
347        return Vec::new();
348    };
349    lexical_identifier_values_from_tokens(&tokens, ignore_words, ignore_words_regex)
350}
351
352fn lexical_identifier_autofix_edits(
353    sql: &str,
354    dialect: Dialect,
355    policy: CapitalisationPolicy,
356    ignore_words: &HashSet<String>,
357    ignore_words_regex: Option<&Regex>,
358) -> Vec<Cp002AutofixEdit> {
359    let Some(tokens) = tokenized(sql, dialect) else {
360        return Vec::new();
361    };
362    let lexical_identifiers =
363        lexical_identifier_values_from_tokens(&tokens, ignore_words, ignore_words_regex);
364    if lexical_identifiers.is_empty() {
365        return Vec::new();
366    }
367
368    let relevant_idents = lexical_identifiers.into_iter().collect::<HashSet<_>>();
369    let effective_policy = if policy == CapitalisationPolicy::Consistent {
370        resolve_consistent_policy(&tokens, ignore_words, ignore_words_regex, &relevant_idents)
371    } else {
372        policy
373    };
374
375    let mut edits = Vec::new();
376    for (index, token) in tokens.iter().enumerate() {
377        let Token::Word(word) = &token.token else {
378            continue;
379        };
380        if !is_lexical_identifier_candidate(&tokens, index, ignore_words, ignore_words_regex) {
381            continue;
382        }
383
384        let Some(replacement) = identifier_case_replacement(word.value.as_str(), effective_policy)
385        else {
386            continue;
387        };
388        if replacement == word.value {
389            continue;
390        }
391
392        let Some((start, end)) = token_offsets(sql, token) else {
393            continue;
394        };
395        edits.push(Cp002AutofixEdit {
396            start,
397            end,
398            replacement,
399        });
400    }
401
402    edits
403}
404
405fn lexical_identifier_values_from_tokens(
406    tokens: &[TokenWithSpan],
407    ignore_words: &HashSet<String>,
408    ignore_words_regex: Option<&Regex>,
409) -> Vec<String> {
410    let mut out = Vec::new();
411    for (index, token) in tokens.iter().enumerate() {
412        let Token::Word(word) = &token.token else {
413            continue;
414        };
415        if !is_lexical_identifier_candidate(tokens, index, ignore_words, ignore_words_regex) {
416            continue;
417        }
418        out.push(word.value.clone());
419    }
420    out
421}
422
423fn is_lexical_identifier_candidate(
424    tokens: &[TokenWithSpan],
425    index: usize,
426    ignore_words: &HashSet<String>,
427    ignore_words_regex: Option<&Regex>,
428) -> bool {
429    let Token::Word(word) = &tokens[index].token else {
430        return false;
431    };
432    if word.quote_style.is_some() || word.keyword != Keyword::NoKeyword {
433        return false;
434    }
435    if token_is_ignored(word.value.as_str(), ignore_words, ignore_words_regex) {
436        return false;
437    }
438    if is_placeholder_variable_word(tokens, index) {
439        return false;
440    }
441    if token_has_div_neighbor(tokens, index) {
442        return false;
443    }
444    if is_create_task_option_name(tokens, index) {
445        return false;
446    }
447
448    let next_index = next_non_trivia_index(tokens, index + 1);
449    if next_index
450        .map(|next| matches!(tokens[next].token, Token::LParen))
451        .unwrap_or(false)
452        && !is_copy_into_target_name(tokens, index)
453    {
454        return false;
455    }
456
457    true
458}
459
460fn token_has_div_neighbor(tokens: &[TokenWithSpan], index: usize) -> bool {
461    prev_non_trivia_index(tokens, index)
462        .map(|prev| matches!(tokens[prev].token, Token::Div))
463        .unwrap_or(false)
464        || next_non_trivia_index(tokens, index + 1)
465            .map(|next| matches!(tokens[next].token, Token::Div))
466            .unwrap_or(false)
467}
468
469fn is_placeholder_variable_word(tokens: &[TokenWithSpan], index: usize) -> bool {
470    let prev = prev_non_trivia_index(tokens, index).map(|idx| &tokens[idx].token);
471    let next = next_non_trivia_index(tokens, index + 1).map(|idx| &tokens[idx].token);
472
473    matches!(
474        prev,
475        Some(Token::Placeholder(_) | Token::Char('$') | Token::LBrace)
476    ) || matches!(next, Some(Token::RBrace))
477}
478
479fn is_copy_into_target_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
480    let mut cursor = word_index;
481    let mut steps = 0usize;
482
483    while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
484        match &tokens[prev_idx].token {
485            Token::Word(word) if word.keyword == Keyword::INTO => {
486                let Some(copy_idx) = prev_non_trivia_index(tokens, prev_idx) else {
487                    return false;
488                };
489                return matches!(
490                    &tokens[copy_idx].token,
491                    Token::Word(copy_word) if copy_word.keyword == Keyword::COPY
492                );
493            }
494            Token::Word(word)
495                if matches!(
496                    word.keyword,
497                    Keyword::FROM
498                        | Keyword::SELECT
499                        | Keyword::WHERE
500                        | Keyword::JOIN
501                        | Keyword::ON
502                        | Keyword::HAVING
503                ) =>
504            {
505                return false;
506            }
507            Token::SemiColon | Token::Comma | Token::LParen | Token::RParen => return false,
508            _ => {}
509        }
510
511        cursor = prev_idx;
512        steps += 1;
513        if steps > 48 {
514            return false;
515        }
516    }
517
518    false
519}
520
521fn is_create_task_option_name(tokens: &[TokenWithSpan], word_index: usize) -> bool {
522    let Some(next_idx) = next_non_trivia_index(tokens, word_index + 1) else {
523        return false;
524    };
525    if !matches!(tokens[next_idx].token, Token::Eq) {
526        return false;
527    }
528
529    let mut cursor = word_index;
530    let mut saw_task = false;
531    let mut steps = 0usize;
532
533    while let Some(prev_idx) = prev_non_trivia_index(tokens, cursor) {
534        match &tokens[prev_idx].token {
535            // CREATE TASK option keys are before the AS clause.
536            Token::Word(word) if word.keyword == Keyword::AS => return false,
537            Token::Word(word) if word.keyword == Keyword::TASK => saw_task = true,
538            Token::Word(word) if saw_task && word.keyword == Keyword::CREATE => return true,
539            Token::SemiColon => return false,
540            _ => {}
541        }
542
543        cursor = prev_idx;
544        steps += 1;
545        if steps > 128 {
546            return false;
547        }
548    }
549
550    false
551}
552
553fn databricks_case_sensitive_set_property(statement: &Statement, dialect: Dialect) -> bool {
554    if dialect != Dialect::Databricks {
555        return false;
556    }
557
558    let Statement::Set(set_stmt) = statement else {
559        return false;
560    };
561
562    match set_stmt {
563        sqlparser::ast::Set::SingleAssignment { variable, .. } => {
564            is_databricks_delta_property_key(variable)
565        }
566        sqlparser::ast::Set::MultipleAssignments { assignments } => assignments
567            .iter()
568            .any(|assignment| is_databricks_delta_property_key(&assignment.name)),
569        _ => false,
570    }
571}
572
573fn is_databricks_delta_property_key(name: &ObjectName) -> bool {
574    let mut parts = Vec::with_capacity(name.0.len());
575    for part in &name.0 {
576        let Some(ident) = part.as_ident() else {
577            return false;
578        };
579        parts.push(ident.value.as_str());
580    }
581
582    // spark.databricks.delta.properties.<scope>.<property_name>
583    parts.len() >= 5
584        && parts[0].eq_ignore_ascii_case("spark")
585        && parts[1].eq_ignore_ascii_case("databricks")
586        && parts[2].eq_ignore_ascii_case("delta")
587        && parts[3].eq_ignore_ascii_case("properties")
588}
589
590fn identifier_case_replacement(value: &str, policy: CapitalisationPolicy) -> Option<String> {
591    match policy {
592        CapitalisationPolicy::Lower => Some(value.to_ascii_lowercase()),
593        CapitalisationPolicy::Upper => Some(value.to_ascii_uppercase()),
594        CapitalisationPolicy::Capitalise => Some(capitalise_ascii_token(value)),
595        CapitalisationPolicy::Pascal => Some(pascal_case(value)),
596        CapitalisationPolicy::Camel => Some(camel_case(value)),
597        CapitalisationPolicy::Snake => Some(snake_case(value)),
598        // Consistent should be resolved to a concrete policy before calling
599        // this function; if somehow it reaches here, fall back to lower.
600        CapitalisationPolicy::Consistent => Some(value.to_ascii_lowercase()),
601    }
602}
603
604fn capitalise_ascii_token(value: &str) -> String {
605    let mut out = String::with_capacity(value.len());
606    let mut seen_alpha = false;
607
608    for ch in value.chars() {
609        if !ch.is_ascii_alphabetic() {
610            out.push(ch);
611            continue;
612        }
613
614        if !seen_alpha {
615            out.push(ch.to_ascii_uppercase());
616            seen_alpha = true;
617        } else {
618            out.push(ch.to_ascii_lowercase());
619        }
620    }
621
622    out
623}
624
625/// SQLFluff pascal-case fix: uppercase the first letter of each "word" (split
626/// at non-alphanumeric boundaries or at string start), leave other chars as-is.
627fn pascal_case(value: &str) -> String {
628    let mut out = String::with_capacity(value.len());
629    let mut at_word_start = true;
630    for ch in value.chars() {
631        if !ch.is_ascii_alphanumeric() {
632            out.push(ch);
633            at_word_start = true;
634        } else if at_word_start {
635            out.push(ch.to_ascii_uppercase());
636            at_word_start = false;
637        } else {
638            out.push(ch);
639            at_word_start = false;
640        }
641    }
642    out
643}
644
645/// SQLFluff camel-case fix: lowercase the first letter of each "word" (split
646/// at non-alphanumeric boundaries or at string start), leave other chars as-is.
647fn camel_case(value: &str) -> String {
648    let mut out = String::with_capacity(value.len());
649    let mut at_word_start = true;
650    for ch in value.chars() {
651        if !ch.is_ascii_alphanumeric() {
652            out.push(ch);
653            at_word_start = true;
654        } else if at_word_start {
655            out.push(ch.to_ascii_lowercase());
656            at_word_start = false;
657        } else {
658            out.push(ch);
659            at_word_start = false;
660        }
661    }
662    out
663}
664
665/// Convert an identifier to snake_case matching SQLFluff's multi-pass behavior.
666///
667/// SQLFluff applies fixes iteratively: pass 1 lowercases all-uppercase tokens,
668/// pass 2 inserts underscores at letter/digit boundaries in the now-lowercase
669/// result. We produce the equivalent final output in a single pass by always
670/// inserting underscores at camelCase boundaries AND letter/digit boundaries,
671/// then lowercasing.
672fn snake_case(value: &str) -> String {
673    let mut out = String::with_capacity(value.len() + 4);
674    let chars: Vec<char> = value.chars().collect();
675    let all_upper = chars
676        .iter()
677        .filter(|c| c.is_ascii_alphabetic())
678        .all(|c| c.is_ascii_uppercase());
679    for (i, &ch) in chars.iter().enumerate() {
680        if i > 0 {
681            let prev = chars[i - 1];
682            if !all_upper {
683                // Upper after lower/digit: insert underscore
684                if ch.is_ascii_uppercase() && (prev.is_ascii_lowercase() || prev.is_ascii_digit()) {
685                    out.push('_');
686                }
687            }
688            // Digit after letter: insert underscore
689            if ch.is_ascii_digit() && prev.is_ascii_alphabetic() {
690                out.push('_');
691            }
692            // Letter after digit: insert underscore
693            if ch.is_ascii_alphabetic() && prev.is_ascii_digit() {
694                out.push('_');
695            }
696        }
697        out.push(ch);
698    }
699    out.to_ascii_lowercase()
700}
701
702/// Resolve the "consistent" policy to a concrete case by scanning identifier
703/// tokens and using SQLFluff's refutation algorithm.
704///
705/// For each identifier, we eliminate case styles that are incompatible:
706///   - camel, pascal, snake are always pre-refuted (never inferred)
707///   - first letter lowercase -> refute upper, capitalise
708///   - first letter lowercase && mixed -> refute lower
709///   - first letter uppercase -> refute lower
710///   - not all uppercase -> refute upper
711///   - not capitalize() -> refute capitalise
712///
713/// Priority order (matching SQLFluff): upper, lower, capitalise.
714/// If all are refuted, falls back to the latest possible case, or "upper".
715/// `relevant_idents` controls which tokens participate in refutation.
716/// For the "all" unquoted policy this is the full AST identifier set.
717/// For alias-only policies this is only the alias identifier values.
718fn resolve_consistent_policy(
719    tokens: &[TokenWithSpan],
720    ignore_words: &HashSet<String>,
721    ignore_words_regex: Option<&Regex>,
722    relevant_idents: &HashSet<String>,
723) -> CapitalisationPolicy {
724    const UPPER: u8 = 0b001;
725    const LOWER: u8 = 0b010;
726    const CAPITALISE: u8 = 0b100;
727
728    let mut refuted: u8 = 0;
729    let mut latest_possible = CapitalisationPolicy::Upper; // default fallback
730
731    for token in tokens {
732        let Token::Word(word) = &token.token else {
733            continue;
734        };
735        if word.quote_style.is_some() {
736            continue;
737        }
738        // Only consider tokens whose value appears in the relevant set.
739        if !relevant_idents.contains(&word.value) {
740            continue;
741        }
742        if token_is_ignored(word.value.as_str(), ignore_words, ignore_words_regex) {
743            continue;
744        }
745
746        let raw = &word.value;
747
748        // Determine if the first capitalizable character is lowercase.
749        let first_is_lower = raw
750            .chars()
751            .find(|c| c.to_ascii_lowercase() != c.to_ascii_uppercase())
752            .is_some_and(|c| c != c.to_ascii_uppercase());
753
754        if first_is_lower {
755            refuted |= UPPER | CAPITALISE;
756            if raw.as_str() != raw.to_ascii_lowercase() {
757                refuted |= LOWER;
758            }
759        } else {
760            refuted |= LOWER;
761            if raw.as_str() != raw.to_ascii_uppercase() {
762                refuted |= UPPER;
763            }
764            if raw.as_str() != capitalize_str(raw) {
765                refuted |= CAPITALISE;
766            }
767        }
768
769        // Track latest possible case before full refutation.
770        let possible = !refuted;
771        if possible & UPPER != 0 {
772            latest_possible = CapitalisationPolicy::Upper;
773        } else if possible & LOWER != 0 {
774            latest_possible = CapitalisationPolicy::Lower;
775        } else if possible & CAPITALISE != 0 {
776            latest_possible = CapitalisationPolicy::Capitalise;
777        }
778
779        // If all refuted, we already have the answer.
780        if refuted == (UPPER | LOWER | CAPITALISE) {
781            break;
782        }
783    }
784
785    if refuted != (UPPER | LOWER | CAPITALISE) {
786        // Still consistent — pick the first non-refuted case.
787        if refuted & UPPER == 0 {
788            return CapitalisationPolicy::Upper;
789        }
790        if refuted & LOWER == 0 {
791            return CapitalisationPolicy::Lower;
792        }
793        return CapitalisationPolicy::Capitalise;
794    }
795
796    latest_possible
797}
798
799/// Python-compatible str.capitalize(): first char uppercased, rest lowercased.
800fn capitalize_str(s: &str) -> String {
801    let mut chars = s.chars();
802    match chars.next() {
803        None => String::new(),
804        Some(first) => {
805            let mut out = String::with_capacity(s.len());
806            for c in first.to_uppercase() {
807                out.push(c);
808            }
809            for c in chars {
810                out.push(c.to_ascii_lowercase());
811            }
812            out
813        }
814    }
815}
816
817/// Like `resolve_consistent_policy` but only considers tokens that appear
818/// immediately after an AS keyword (alias position).
819fn resolve_consistent_policy_alias_only(
820    tokens: &[TokenWithSpan],
821    ignore_words: &HashSet<String>,
822    ignore_words_regex: Option<&Regex>,
823) -> CapitalisationPolicy {
824    const UPPER: u8 = 0b001;
825    const LOWER: u8 = 0b010;
826    const CAPITALISE: u8 = 0b100;
827
828    let mut refuted: u8 = 0;
829    let mut latest_possible = CapitalisationPolicy::Upper;
830
831    for (index, token) in tokens.iter().enumerate() {
832        let Token::Word(word) = &token.token else {
833            continue;
834        };
835        if word.quote_style.is_some() {
836            continue;
837        }
838        // Only consider tokens in alias position (after AS keyword).
839        let prev = prev_non_trivia_index(tokens, index);
840        let is_after_as = prev
841            .map(|pi| {
842                matches!(
843                    &tokens[pi].token,
844                    Token::Word(w) if w.keyword == Keyword::AS
845                )
846            })
847            .unwrap_or(false);
848        if !is_after_as {
849            continue;
850        }
851        if token_is_ignored(word.value.as_str(), ignore_words, ignore_words_regex) {
852            continue;
853        }
854
855        let raw = &word.value;
856        let first_is_lower = raw
857            .chars()
858            .find(|c| c.to_ascii_lowercase() != c.to_ascii_uppercase())
859            .is_some_and(|c| c != c.to_ascii_uppercase());
860
861        if first_is_lower {
862            refuted |= UPPER | CAPITALISE;
863            if raw.as_str() != raw.to_ascii_lowercase() {
864                refuted |= LOWER;
865            }
866        } else {
867            refuted |= LOWER;
868            if raw.as_str() != raw.to_ascii_uppercase() {
869                refuted |= UPPER;
870            }
871            if raw.as_str() != capitalize_str(raw) {
872                refuted |= CAPITALISE;
873            }
874        }
875
876        let possible = !refuted;
877        if possible & UPPER != 0 {
878            latest_possible = CapitalisationPolicy::Upper;
879        } else if possible & LOWER != 0 {
880            latest_possible = CapitalisationPolicy::Lower;
881        } else if possible & CAPITALISE != 0 {
882            latest_possible = CapitalisationPolicy::Capitalise;
883        }
884
885        if refuted == (UPPER | LOWER | CAPITALISE) {
886            break;
887        }
888    }
889
890    if refuted != (UPPER | LOWER | CAPITALISE) {
891        if refuted & UPPER == 0 {
892            return CapitalisationPolicy::Upper;
893        }
894        if refuted & LOWER == 0 {
895            return CapitalisationPolicy::Lower;
896        }
897        return CapitalisationPolicy::Capitalise;
898    }
899
900    latest_possible
901}
902
903fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
904    let dialect = dialect.to_sqlparser_dialect();
905    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
906    tokenizer.tokenize_with_location().ok()
907}
908
909fn next_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
910    while index < tokens.len() {
911        if !is_trivia_token(&tokens[index].token) {
912            return Some(index);
913        }
914        index += 1;
915    }
916    None
917}
918
919fn prev_non_trivia_index(tokens: &[TokenWithSpan], index: usize) -> Option<usize> {
920    let mut i = index;
921    while i > 0 {
922        i -= 1;
923        if !is_trivia_token(&tokens[i].token) {
924            return Some(i);
925        }
926    }
927    None
928}
929
930fn is_trivia_token(token: &Token) -> bool {
931    matches!(
932        token,
933        Token::Whitespace(
934            Whitespace::Space
935                | Whitespace::Newline
936                | Whitespace::Tab
937                | Whitespace::SingleLineComment { .. }
938                | Whitespace::MultiLineComment(_)
939        )
940    )
941}
942
943fn token_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
944    let start = line_col_to_offset(
945        sql,
946        token.span.start.line as usize,
947        token.span.start.column as usize,
948    )?;
949    let end = line_col_to_offset(
950        sql,
951        token.span.end.line as usize,
952        token.span.end.column as usize,
953    )?;
954    Some((start, end))
955}
956
957fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
958    if line == 0 || column == 0 {
959        return None;
960    }
961
962    let mut current_line = 1usize;
963    let mut current_col = 1usize;
964
965    for (offset, ch) in sql.char_indices() {
966        if current_line == line && current_col == column {
967            return Some(offset);
968        }
969
970        if ch == '\n' {
971            current_line += 1;
972            current_col = 1;
973        } else {
974            current_col += 1;
975        }
976    }
977
978    if current_line == line && current_col == column {
979        return Some(sql.len());
980    }
981
982    None
983}
984
985#[cfg(test)]
986mod tests {
987    use super::*;
988    use crate::linter::config::LintConfig;
989    use crate::linter::rule::with_active_dialect;
990    use crate::parser::{parse_sql, parse_sql_with_dialect};
991    use crate::types::Dialect;
992    use crate::types::IssueAutofixApplicability;
993
994    fn run(sql: &str) -> Vec<Issue> {
995        run_with_config(sql, LintConfig::default())
996    }
997
998    fn run_with_config(sql: &str, config: LintConfig) -> Vec<Issue> {
999        run_with_config_in_dialect(sql, Dialect::Generic, config)
1000    }
1001
1002    fn run_with_config_in_dialect(sql: &str, dialect: Dialect, config: LintConfig) -> Vec<Issue> {
1003        let statements = parse_sql_with_dialect(sql, dialect).expect("parse");
1004        let rule = CapitalisationIdentifiers::from_config(&config);
1005        with_active_dialect(dialect, || {
1006            statements
1007                .iter()
1008                .enumerate()
1009                .flat_map(|(index, statement)| {
1010                    rule.check(
1011                        statement,
1012                        &LintContext {
1013                            sql,
1014                            statement_range: 0..sql.len(),
1015                            statement_index: index,
1016                        },
1017                    )
1018                })
1019                .collect()
1020        })
1021    }
1022
1023    fn run_statementless_with_config_in_dialect(
1024        sql: &str,
1025        dialect: Dialect,
1026        config: LintConfig,
1027    ) -> Vec<Issue> {
1028        let placeholder = parse_sql("SELECT 1").expect("parse placeholder");
1029        let rule = CapitalisationIdentifiers::from_config(&config);
1030        with_active_dialect(dialect, || {
1031            rule.check(
1032                &placeholder[0],
1033                &LintContext {
1034                    sql,
1035                    statement_range: 0..sql.len(),
1036                    statement_index: 0,
1037                },
1038            )
1039        })
1040    }
1041
1042    fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
1043        let autofix = issue.autofix.as_ref()?;
1044        let mut out = sql.to_string();
1045        let mut edits = autofix.edits.clone();
1046        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
1047        for edit in edits.into_iter().rev() {
1048            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
1049        }
1050        Some(out)
1051    }
1052
1053    fn apply_all_autofixes(sql: &str, issues: &[Issue]) -> String {
1054        let mut edits: Vec<_> = issues
1055            .iter()
1056            .filter_map(|i| i.autofix.as_ref())
1057            .flat_map(|a| a.edits.clone())
1058            .collect();
1059        edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
1060        let mut out = sql.to_string();
1061        for edit in edits.into_iter().rev() {
1062            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
1063        }
1064        out
1065    }
1066
1067    #[test]
1068    fn flags_mixed_identifier_case() {
1069        // Col refutes lower and upper, leaving capitalise. col then violates
1070        // capitalise, so the consistent policy resolves to capitalise.
1071        // Per-identifier reporting: col and t both violate capitalise.
1072        let sql = "SELECT Col, col FROM t";
1073        let issues = run(sql);
1074        assert_eq!(issues.len(), 2);
1075        assert!(issues.iter().all(|i| i.code == issue_codes::LINT_CP_002));
1076        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
1077        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
1078        let fixed = apply_all_autofixes(sql, &issues);
1079        assert_eq!(fixed, "SELECT Col, Col FROM T");
1080    }
1081
1082    #[test]
1083    fn does_not_flag_consistent_identifiers() {
1084        assert!(run("SELECT col_one, col_two FROM t").is_empty());
1085    }
1086
1087    #[test]
1088    fn does_not_treat_table_valued_function_name_as_identifier() {
1089        // Postgres table-valued function names in FROM should be handled by
1090        // function/layout rules, not identifier-case consistency.
1091        assert!(run("SELECT x FROM FOO(1) AS x").is_empty());
1092    }
1093
1094    #[test]
1095    fn does_not_flag_identifier_like_words_in_strings_or_comments() {
1096        let sql = "SELECT 'Col col' AS txt -- Col col\nFROM t";
1097        assert!(run(sql).is_empty());
1098    }
1099
1100    #[test]
1101    fn upper_policy_flags_lowercase_identifier() {
1102        let config = LintConfig {
1103            enabled: true,
1104            disabled_rules: vec![],
1105            rule_configs: std::collections::BTreeMap::from([(
1106                "capitalisation.identifiers".to_string(),
1107                serde_json::json!({"extended_capitalisation_policy": "upper"}),
1108            )]),
1109        };
1110        // Both col and t violate upper policy.
1111        let issues = run_with_config("SELECT col FROM t", config);
1112        assert_eq!(issues.len(), 2);
1113    }
1114
1115    #[test]
1116    fn uppercase_policy_emits_uppercase_autofix() {
1117        let config = LintConfig {
1118            enabled: true,
1119            disabled_rules: vec![],
1120            rule_configs: std::collections::BTreeMap::from([(
1121                "capitalisation.identifiers".to_string(),
1122                serde_json::json!({"extended_capitalisation_policy": "upper"}),
1123            )]),
1124        };
1125        let sql = "SELECT col FROM t";
1126        let issues = run_with_config(sql, config);
1127        assert_eq!(issues.len(), 2);
1128        let fixed = apply_all_autofixes(sql, &issues);
1129        assert_eq!(fixed, "SELECT COL FROM T");
1130    }
1131
1132    #[test]
1133    fn ignore_words_regex_excludes_identifiers_from_check() {
1134        let config = LintConfig {
1135            enabled: true,
1136            disabled_rules: vec![],
1137            rule_configs: std::collections::BTreeMap::from([(
1138                "capitalisation.identifiers".to_string(),
1139                serde_json::json!({"ignore_words_regex": "^col$"}),
1140            )]),
1141        };
1142        let issues = run_with_config("SELECT Col, col FROM t", config);
1143        assert!(issues.is_empty());
1144    }
1145
1146    #[test]
1147    fn aliases_policy_ignores_non_alias_identifiers() {
1148        let config = LintConfig {
1149            enabled: true,
1150            disabled_rules: vec![],
1151            rule_configs: std::collections::BTreeMap::from([(
1152                "capitalisation.identifiers".to_string(),
1153                serde_json::json!({"unquoted_identifiers_policy": "aliases"}),
1154            )]),
1155        };
1156        let issues = run_with_config("SELECT Col AS alias FROM t", config);
1157        assert!(issues.is_empty());
1158    }
1159
1160    #[test]
1161    fn column_alias_policy_flags_mixed_column_alias_case() {
1162        let config = LintConfig {
1163            enabled: true,
1164            disabled_rules: vec![],
1165            rule_configs: std::collections::BTreeMap::from([(
1166                "LINT_CP_002".to_string(),
1167                serde_json::json!({"unquoted_identifiers_policy": "column_aliases"}),
1168            )]),
1169        };
1170        let sql = "SELECT amount AS Col, amount AS col FROM t";
1171        let issues = run_with_config(sql, config);
1172        assert_eq!(issues.len(), 1);
1173        assert_eq!(issues[0].code, issue_codes::LINT_CP_002);
1174        // With column_aliases policy, autofix targets only column alias tokens.
1175        // Col refutes lower/upper, leaving capitalise. col violates -> capitalise.
1176        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
1177        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
1178        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1179        assert_eq!(fixed, "SELECT amount AS Col, amount AS Col FROM t");
1180    }
1181
1182    #[test]
1183    fn consistent_policy_allows_single_letter_upper_with_capitalised_identifier() {
1184        let issues = run("SELECT A, Boo");
1185        assert!(issues.is_empty());
1186    }
1187
1188    #[test]
1189    fn pascal_policy_allows_all_caps_identifier() {
1190        let config = LintConfig {
1191            enabled: true,
1192            disabled_rules: vec![],
1193            rule_configs: std::collections::BTreeMap::from([(
1194                "capitalisation.identifiers".to_string(),
1195                serde_json::json!({"extended_capitalisation_policy": "pascal"}),
1196            )]),
1197        };
1198        let issues = run_with_config("SELECT PASCALCASE", config);
1199        assert!(issues.is_empty());
1200    }
1201
1202    #[test]
1203    fn databricks_tblproperties_mixed_case_property_is_flagged() {
1204        let issues = run_with_config_in_dialect(
1205            "SHOW TBLPROPERTIES customer (created.BY.user)",
1206            Dialect::Databricks,
1207            LintConfig::default(),
1208        );
1209        assert_eq!(issues.len(), 1);
1210        assert_eq!(issues[0].code, issue_codes::LINT_CP_002);
1211    }
1212
1213    #[test]
1214    fn databricks_tblproperties_lowercase_property_is_allowed() {
1215        let issues = run_with_config_in_dialect(
1216            "SHOW TBLPROPERTIES customer (created.by.user)",
1217            Dialect::Databricks,
1218            LintConfig::default(),
1219        );
1220        assert!(issues.is_empty());
1221    }
1222
1223    #[test]
1224    fn databricks_tblproperties_capitalised_property_is_flagged() {
1225        // customer + Created + By + User: consistent resolves to capitalise
1226        // (Created sets the style). customer violates → 3 edits (customer, By→same?, User→same?).
1227        // Actually: resolve_consistent picks capitalise from "Created".
1228        // Then: customer→Customer, By→By (ok), User→User (ok) → 1 violation.
1229        // But the lexical fallback may include more tokens.
1230        let issues = run_with_config_in_dialect(
1231            "SHOW TBLPROPERTIES customer (Created.By.User)",
1232            Dialect::Databricks,
1233            LintConfig::default(),
1234        );
1235        assert!(!issues.is_empty());
1236        assert!(issues.iter().all(|i| i.code == issue_codes::LINT_CP_002));
1237    }
1238
1239    #[test]
1240    fn databricks_set_delta_property_key_is_ignored() {
1241        let issues = run_with_config_in_dialect(
1242            "SET spark.databricks.delta.properties.defaults.enableChangeDataFeed = true;",
1243            Dialect::Databricks,
1244            LintConfig::default(),
1245        );
1246        assert!(
1247            issues.is_empty(),
1248            "databricks property keys are case-sensitive"
1249        );
1250    }
1251
1252    #[test]
1253    fn flags_mixed_identifier_case_in_delete_predicate() {
1254        // Consistent resolves to capitalise (Col sets style).
1255        // col and t both violate capitalise → 2 violations.
1256        let issues = run("DELETE FROM t WHERE Col = col");
1257        assert_eq!(issues.len(), 2);
1258        assert!(issues.iter().all(|i| i.code == issue_codes::LINT_CP_002));
1259    }
1260
1261    #[test]
1262    fn flags_mixed_identifier_case_in_update_assignment() {
1263        // Consistent resolves to capitalise (Col sets style).
1264        // col and t both violate capitalise → 2 violations.
1265        let issues = run("UPDATE t SET Col = col");
1266        assert_eq!(issues.len(), 2);
1267        assert!(issues.iter().all(|i| i.code == issue_codes::LINT_CP_002));
1268    }
1269
1270    // -- SQLFluff parity: consistent policy direction --
1271
1272    #[test]
1273    fn consistent_resolves_to_upper_when_pascal_refutes_capitalise() {
1274        // AppleFritter refutes lower, upper, capitalise -> all refuted.
1275        // latest_possible starts at upper (no lower tokens seen first).
1276        // Both identifiers violate upper → 2 violations.
1277        let sql = "SELECT AppleFritter, Banana";
1278        let issues = run(sql);
1279        assert_eq!(issues.len(), 2);
1280        let fixed = apply_all_autofixes(sql, &issues);
1281        assert_eq!(fixed, "SELECT APPLEFRITTER, BANANA");
1282    }
1283
1284    #[test]
1285    fn consistent_resolves_to_upper_for_mixed_with_numbers() {
1286        // All three identifiers violate upper → 3 violations.
1287        let sql = "SELECT AppleFritter, Apple123fritter, Apple123Fritter";
1288        let issues = run(sql);
1289        assert_eq!(issues.len(), 3);
1290        let fixed = apply_all_autofixes(sql, &issues);
1291        assert_eq!(
1292            fixed,
1293            "SELECT APPLEFRITTER, APPLE123FRITTER, APPLE123FRITTER"
1294        );
1295    }
1296
1297    #[test]
1298    fn consistent_resolves_to_lower_when_lowercase_first() {
1299        // a is lowercase -> refute upper, capitalise. B then violates.
1300        let sql = "SELECT a FROM FOO";
1301        let issues = run(sql);
1302        assert_eq!(issues.len(), 1);
1303        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1304        assert_eq!(fixed, "SELECT a FROM foo");
1305    }
1306
1307    #[test]
1308    fn consistent_resolves_to_upper_when_uppercase_first() {
1309        // B is upper -> refute lower. a and foo violate upper → 2 violations.
1310        let sql = "SELECT B, a FROM foo";
1311        let issues = run(sql);
1312        assert_eq!(issues.len(), 2);
1313        let fixed = apply_all_autofixes(sql, &issues);
1314        assert_eq!(fixed, "SELECT B, A FROM FOO");
1315    }
1316
1317    // -- SQLFluff parity: pascal autofix --
1318
1319    #[test]
1320    fn pascal_policy_fixes_lowercase_to_pascal() {
1321        let config = LintConfig {
1322            enabled: true,
1323            disabled_rules: vec![],
1324            rule_configs: std::collections::BTreeMap::from([(
1325                "capitalisation.identifiers".to_string(),
1326                serde_json::json!({"extended_capitalisation_policy": "pascal"}),
1327            )]),
1328        };
1329        let sql = "SELECT pascalcase";
1330        let issues = run_with_config(sql, config);
1331        assert_eq!(issues.len(), 1);
1332        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1333        assert_eq!(fixed, "SELECT Pascalcase");
1334    }
1335
1336    #[test]
1337    fn pascal_policy_fixes_underscored_to_pascal() {
1338        let config = LintConfig {
1339            enabled: true,
1340            disabled_rules: vec![],
1341            rule_configs: std::collections::BTreeMap::from([(
1342                "capitalisation.identifiers".to_string(),
1343                serde_json::json!({"extended_capitalisation_policy": "pascal"}),
1344            )]),
1345        };
1346        let sql = "SELECT pascal_case";
1347        let issues = run_with_config(sql, config);
1348        assert_eq!(issues.len(), 1);
1349        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1350        assert_eq!(fixed, "SELECT Pascal_Case");
1351    }
1352
1353    #[test]
1354    fn pascal_policy_fixes_upperfirst_underscored() {
1355        let config = LintConfig {
1356            enabled: true,
1357            disabled_rules: vec![],
1358            rule_configs: std::collections::BTreeMap::from([(
1359                "capitalisation.identifiers".to_string(),
1360                serde_json::json!({"extended_capitalisation_policy": "pascal"}),
1361            )]),
1362        };
1363        // pASCAL_CASE -> PASCAL_CASE (uppercase first letter of each word)
1364        let sql = "SELECT pASCAL_CASE";
1365        let issues = run_with_config(sql, config);
1366        assert_eq!(issues.len(), 1);
1367        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1368        assert_eq!(fixed, "SELECT PASCAL_CASE");
1369    }
1370
1371    #[test]
1372    fn pascal_policy_fixes_pascal_v_capitalise() {
1373        let config = LintConfig {
1374            enabled: true,
1375            disabled_rules: vec![],
1376            rule_configs: std::collections::BTreeMap::from([(
1377                "capitalisation.identifiers".to_string(),
1378                serde_json::json!({"extended_capitalisation_policy": "pascal"}),
1379            )]),
1380        };
1381        let sql = "SELECT AppleFritter, Banana_split";
1382        let issues = run_with_config(sql, config);
1383        assert_eq!(issues.len(), 1);
1384        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1385        assert_eq!(fixed, "SELECT AppleFritter, Banana_Split");
1386    }
1387
1388    // -- SQLFluff parity: camel autofix --
1389
1390    #[test]
1391    fn camel_policy_fixes_capitalised_to_camel() {
1392        let config = LintConfig {
1393            enabled: true,
1394            disabled_rules: vec![],
1395            rule_configs: std::collections::BTreeMap::from([(
1396                "capitalisation.identifiers".to_string(),
1397                serde_json::json!({"extended_capitalisation_policy": "camel"}),
1398            )]),
1399        };
1400        let sql = "SELECT Camelcase";
1401        let issues = run_with_config(sql, config);
1402        assert_eq!(issues.len(), 1);
1403        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1404        assert_eq!(fixed, "SELECT camelcase");
1405    }
1406
1407    #[test]
1408    fn camel_policy_fixes_underscored_to_camel() {
1409        let config = LintConfig {
1410            enabled: true,
1411            disabled_rules: vec![],
1412            rule_configs: std::collections::BTreeMap::from([(
1413                "capitalisation.identifiers".to_string(),
1414                serde_json::json!({"extended_capitalisation_policy": "camel"}),
1415            )]),
1416        };
1417        let sql = "SELECT Camel_Case";
1418        let issues = run_with_config(sql, config);
1419        assert_eq!(issues.len(), 1);
1420        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1421        assert_eq!(fixed, "SELECT camel_case");
1422    }
1423
1424    #[test]
1425    fn camel_policy_fixes_partial_upper() {
1426        let config = LintConfig {
1427            enabled: true,
1428            disabled_rules: vec![],
1429            rule_configs: std::collections::BTreeMap::from([(
1430                "capitalisation.identifiers".to_string(),
1431                serde_json::json!({"extended_capitalisation_policy": "camel"}),
1432            )]),
1433        };
1434        let sql = "SELECT cAMEL_CASE";
1435        let issues = run_with_config(sql, config);
1436        assert_eq!(issues.len(), 1);
1437        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1438        assert_eq!(fixed, "SELECT cAMEL_cASE");
1439    }
1440
1441    // -- SQLFluff parity: snake autofix --
1442
1443    #[test]
1444    fn snake_policy_fixes_camel_to_snake() {
1445        let config = LintConfig {
1446            enabled: true,
1447            disabled_rules: vec![],
1448            rule_configs: std::collections::BTreeMap::from([(
1449                "capitalisation.identifiers".to_string(),
1450                serde_json::json!({"extended_capitalisation_policy": "snake"}),
1451            )]),
1452        };
1453        let sql = "SELECT testColumn3";
1454        let issues = run_with_config_in_dialect(sql, Dialect::Mssql, config);
1455        assert_eq!(issues.len(), 1);
1456        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1457        assert_eq!(fixed, "SELECT test_column_3");
1458    }
1459
1460    #[test]
1461    fn snake_policy_fixes_all_upper_to_lower() {
1462        let config = LintConfig {
1463            enabled: true,
1464            disabled_rules: vec![],
1465            rule_configs: std::collections::BTreeMap::from([(
1466                "capitalisation.identifiers".to_string(),
1467                serde_json::json!({"extended_capitalisation_policy": "snake"}),
1468            )]),
1469        };
1470        let sql = "SELECT TESTCOLUMN5";
1471        let issues = run_with_config_in_dialect(sql, Dialect::Mssql, config);
1472        assert_eq!(issues.len(), 1);
1473        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1474        assert_eq!(fixed, "SELECT testcolumn_5");
1475    }
1476
1477    // -- SQLFluff parity: alias-only autofix --
1478
1479    #[test]
1480    fn aliases_policy_fixes_only_aliases() {
1481        let config = LintConfig {
1482            enabled: true,
1483            disabled_rules: vec![],
1484            rule_configs: std::collections::BTreeMap::from([(
1485                "capitalisation.identifiers".to_string(),
1486                serde_json::json!({"unquoted_identifiers_policy": "aliases"}),
1487            )]),
1488        };
1489        // low_case appears twice -> lower is consistent for aliases.
1490        // Table alias UPPER_CASE violates -> fix to lowercase.
1491        let sql =
1492            "SELECT UPPER_CASE AS low_case, PascalCase AS low_case FROM UPPER_CASE AS UPPER_CASE";
1493        let issues = run_with_config(sql, config);
1494        assert_eq!(issues.len(), 1);
1495        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1496        assert_eq!(
1497            fixed,
1498            "SELECT UPPER_CASE AS low_case, PascalCase AS low_case FROM UPPER_CASE AS upper_case"
1499        );
1500    }
1501
1502    // -- SQLFluff parity: TBLPROPERTIES autofix --
1503
1504    #[test]
1505    fn sparksql_tblproperties_autofix_lowercases() {
1506        let sql = "SHOW TBLPROPERTIES customer (created.BY.user)";
1507        let issues = run_with_config_in_dialect(sql, Dialect::Databricks, LintConfig::default());
1508        assert_eq!(issues.len(), 1);
1509        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
1510        assert!(!autofix.edits.is_empty(), "should emit autofix edits");
1511    }
1512
1513    #[test]
1514    fn statementless_fallback_fixes_copy_into_identifier_case() {
1515        let sql = "create task ${env}_ENT_LANDING.SCHEMA_NAME.TASK_NAME\nas\n    COPY INTO ${env}_ENT_LANDING.SCHEMA_NAME.ProblemHere(\n        ONE_OR_MORE_COLUMN_NAMES_HERE\n    )\n    FROM @${env}_ENT_COMMON.GLOBAL.FILEINGESTION_STAGE/file\n";
1516        let issues = run_statementless_with_config_in_dialect(
1517            sql,
1518            Dialect::Snowflake,
1519            LintConfig::default(),
1520        );
1521        assert!(!issues.is_empty(), "expected CP02 fallback issue");
1522        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1523        assert!(fixed.contains(".PROBLEMHERE("), "fixed: {fixed}");
1524        assert!(
1525            fixed.contains("/file"),
1526            "path segment casing should be preserved: {fixed}"
1527        );
1528    }
1529
1530    #[test]
1531    fn statementless_fallback_keeps_create_task_option_name_case() {
1532        let sql = "create task ${env}_ENT_LANDING.SCHEMA_NAME.TASK_NAME\n    schedule='${repl_cdc_schedule}'\nas\n    COPY INTO ${env}_ENT_LANDING.SCHEMA_NAME.ProblemHere(\n        ONE_OR_MORE_COLUMN_NAMES_HERE\n    )\n    FROM @${env}_ENT_COMMON.GLOBAL.FILEINGESTION_STAGE/file\n";
1533        let issues = run_statementless_with_config_in_dialect(
1534            sql,
1535            Dialect::Snowflake,
1536            LintConfig::default(),
1537        );
1538        assert!(!issues.is_empty(), "expected CP02 fallback issue");
1539        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
1540        assert!(
1541            fixed.contains("schedule='${repl_cdc_schedule}'"),
1542            "CREATE TASK option key should not be uppercased: {fixed}"
1543        );
1544        assert!(
1545            fixed.contains(".PROBLEMHERE("),
1546            "identifier case fix should still apply to COPY INTO target: {fixed}"
1547        );
1548    }
1549}
flowscope_core/linter/rules/cp_002.rs

flowscope_core/linter/rules/
cp_002.rs