Skip to main content

sqrust_rules/lint/
duplicate_column_in_create.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2use sqlparser::ast::Statement;
3use std::collections::{HashMap, HashSet};
4
5pub struct DuplicateColumnInCreate;
6
7impl Rule for DuplicateColumnInCreate {
8    fn name(&self) -> &'static str {
9        "Lint/DuplicateColumnInCreate"
10    }
11
12    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
13        if !ctx.parse_errors.is_empty() {
14            return Vec::new();
15        }
16
17        let mut diags = Vec::new();
18        for stmt in &ctx.statements {
19            if let Statement::CreateTable(create_table) = stmt {
20                let columns = &create_table.columns;
21
22                // Count occurrences of each column name (case-insensitive).
23                let mut seen: HashMap<String, usize> = HashMap::new();
24                // Track which duplicates were already reported (avoid duplicate reports for
25                // three-or-more occurrences of the same name).
26                let mut reported: HashSet<String> = HashSet::new();
27
28                for col_def in columns {
29                    let lower = col_def.name.value.to_lowercase();
30                    let count = seen.entry(lower.clone()).or_insert(0);
31                    *count += 1;
32
33                    if *count == 2 && !reported.contains(&lower) {
34                        reported.insert(lower.clone());
35
36                        // Find the position of the second occurrence of this column name in source.
37                        let (line, col) =
38                            find_second_occurrence(&ctx.source, &col_def.name.value);
39                        diags.push(Diagnostic {
40                            rule: "Lint/DuplicateColumnInCreate",
41                            message: format!(
42                                "Column '{}' is defined more than once in CREATE TABLE",
43                                lower
44                            ),
45                            line,
46                            col,
47                        });
48                    }
49                }
50            }
51        }
52        diags
53    }
54}
55
56/// Finds the second occurrence of `name` (case-insensitive, whole-word) in `source`
57/// and returns its 1-indexed (line, col). Falls back to (1, 1) if not found.
58fn find_second_occurrence(source: &str, name: &str) -> (usize, usize) {
59    let source_lower = source.to_lowercase();
60    let name_lower = name.to_lowercase();
61    let name_len = name_lower.len();
62    let bytes = source_lower.as_bytes();
63    let src_len = bytes.len();
64
65    let mut search_from = 0usize;
66    let mut occurrences_found = 0usize;
67
68    while search_from < src_len {
69        let Some(rel) = source_lower[search_from..].find(&name_lower) else {
70            break;
71        };
72        let abs = search_from + rel;
73
74        // Word-boundary check.
75        let before_ok = abs == 0 || {
76            let b = bytes[abs - 1];
77            !b.is_ascii_alphanumeric() && b != b'_'
78        };
79        let after = abs + name_len;
80        let after_ok = after >= src_len || {
81            let b = bytes[after];
82            !b.is_ascii_alphanumeric() && b != b'_'
83        };
84
85        if before_ok && after_ok {
86            occurrences_found += 1;
87            if occurrences_found == 2 {
88                return offset_to_line_col(source, abs);
89            }
90        }
91
92        search_from = abs + 1;
93    }
94
95    // Fallback: return position of first occurrence if second not found distinctly.
96    (1, 1)
97}
98
99/// Converts a byte offset in `source` to a 1-indexed (line, col) pair.
100fn offset_to_line_col(source: &str, offset: usize) -> (usize, usize) {
101    let before = &source[..offset];
102    let line = before.chars().filter(|&c| c == '\n').count() + 1;
103    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
104    (line, col)
105}