Skip to main content

sqrust_rules/capitalisation/
mod.rs

1pub mod functions;
2pub mod keywords;
3pub mod literals;
4pub mod types;
5
6/// Tokenises a SQL source string into a sequence of `Token`s.
7///
8/// Tokens are either:
9/// - `Code(start_byte)` — a single character of real SQL code, with its byte offset
10/// - `Skip` — a character that is inside a string/comment/quoted-identifier and
11///   should be ignored by rules
12///
13/// Rules iterate over the source and use `is_code_at` to decide whether a
14/// character at a given byte offset should be inspected.
15pub(crate) struct SkipMap {
16    /// `true` for every byte offset that is inside a string, comment, or
17    /// quoted identifier and must be skipped.
18    skip: Vec<bool>,
19}
20
21impl SkipMap {
22    pub(crate) fn build(source: &str) -> Self {
23        let bytes = source.as_bytes();
24        let len = source.len();
25        let mut skip = vec![false; len];
26
27        let mut i = 0;
28        while i < len {
29            // Line comment: -- ... end-of-line
30            if i + 1 < len && bytes[i] == b'-' && bytes[i + 1] == b'-' {
31                skip[i] = true;
32                skip[i + 1] = true;
33                i += 2;
34                while i < len && bytes[i] != b'\n' {
35                    skip[i] = true;
36                    i += 1;
37                }
38                // '\n' itself is not skipped so line numbers stay correct
39                continue;
40            }
41
42            // Block comment: /* ... */
43            if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
44                skip[i] = true;
45                skip[i + 1] = true;
46                i += 2;
47                while i < len {
48                    if i + 1 < len && bytes[i] == b'*' && bytes[i + 1] == b'/' {
49                        skip[i] = true;
50                        skip[i + 1] = true;
51                        i += 2;
52                        break;
53                    }
54                    skip[i] = true;
55                    i += 1;
56                }
57                continue;
58            }
59
60            // Single-quoted string: '...' with '' escape (SQL standard)
61            if bytes[i] == b'\'' {
62                skip[i] = true;
63                i += 1;
64                while i < len {
65                    if bytes[i] == b'\'' {
66                        skip[i] = true;
67                        i += 1;
68                        // '' is an escaped quote inside the string, not the end
69                        if i < len && bytes[i] == b'\'' {
70                            skip[i] = true;
71                            i += 1;
72                            continue;
73                        }
74                        break; // end of string
75                    }
76                    skip[i] = true;
77                    i += 1;
78                }
79                continue;
80            }
81
82            // Double-quoted identifier: "..."
83            if bytes[i] == b'"' {
84                skip[i] = true;
85                i += 1;
86                while i < len && bytes[i] != b'"' {
87                    skip[i] = true;
88                    i += 1;
89                }
90                if i < len {
91                    skip[i] = true; // closing "
92                    i += 1;
93                }
94                continue;
95            }
96
97            // Backtick identifier: `...`
98            if bytes[i] == b'`' {
99                skip[i] = true;
100                i += 1;
101                while i < len && bytes[i] != b'`' {
102                    skip[i] = true;
103                    i += 1;
104                }
105                if i < len {
106                    skip[i] = true; // closing `
107                    i += 1;
108                }
109                continue;
110            }
111
112            // Jinja/dbt template block: {{ ... }} and {% ... %} and {# ... #}
113            if i + 1 < len && bytes[i] == b'{' && (bytes[i + 1] == b'{' || bytes[i + 1] == b'%' || bytes[i + 1] == b'#') {
114                let closing_inner = match bytes[i + 1] {
115                    b'{' => b'}',
116                    b'%' => b'%',
117                    b'#' => b'#',
118                    _ => unreachable!(),
119                };
120                skip[i] = true;
121                skip[i + 1] = true;
122                i += 2;
123                while i < len {
124                    if bytes[i] == closing_inner && i + 1 < len && bytes[i + 1] == b'}' {
125                        skip[i] = true;
126                        skip[i + 1] = true;
127                        i += 2;
128                        break;
129                    }
130                    skip[i] = true;
131                    i += 1;
132                }
133                continue;
134            }
135
136            i += 1;
137        }
138
139        SkipMap { skip }
140    }
141
142    /// Returns `true` if the byte at `offset` is real SQL code (not inside a
143    /// string / comment / quoted identifier).
144    #[inline]
145    pub(crate) fn is_code(&self, offset: usize) -> bool {
146        !self.skip[offset]
147    }
148}
149
150/// Returns `true` if `ch` is a word character (`[a-zA-Z0-9_]`).
151#[inline]
152pub(crate) fn is_word_char(ch: u8) -> bool {
153    ch.is_ascii_alphanumeric() || ch == b'_'
154}