Skip to main content

sqrust_rules/capitalisation/
mod.rs

1pub mod functions;
2pub mod keywords;
3pub mod literals;
4pub mod types;
5
6/// Tokenises a SQL source string into a sequence of `Token`s.
7///
8/// Tokens are either:
9/// - `Code(start_byte)` — a single character of real SQL code, with its byte offset
10/// - `Skip` — a character that is inside a string/comment/quoted-identifier and
11///   should be ignored by rules
12///
13/// Rules iterate over the source and use `is_code_at` to decide whether a
14/// character at a given byte offset should be inspected.
15pub(crate) struct SkipMap {
16    /// `true` for every byte offset that is inside a string, comment, or
17    /// quoted identifier and must be skipped.
18    skip: Vec<bool>,
19}
20
21impl SkipMap {
22    pub(crate) fn build(source: &str) -> Self {
23        let bytes = source.as_bytes();
24        let len = source.len();
25        let mut skip = vec![false; len];
26
27        let mut i = 0;
28        while i < len {
29            // Line comment: -- ... end-of-line
30            if i + 1 < len && bytes[i] == b'-' && bytes[i + 1] == b'-' {
31                skip[i] = true;
32                skip[i + 1] = true;
33                i += 2;
34                while i < len && bytes[i] != b'\n' {
35                    skip[i] = true;
36                    i += 1;
37                }
38                // '\n' itself is not skipped so line numbers stay correct
39                continue;
40            }
41
42            // Block comment: /* ... */
43            if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
44                skip[i] = true;
45                skip[i + 1] = true;
46                i += 2;
47                while i < len {
48                    if i + 1 < len && bytes[i] == b'*' && bytes[i + 1] == b'/' {
49                        skip[i] = true;
50                        skip[i + 1] = true;
51                        i += 2;
52                        break;
53                    }
54                    skip[i] = true;
55                    i += 1;
56                }
57                continue;
58            }
59
60            // Single-quoted string: '...' with '' escape (SQL standard)
61            if bytes[i] == b'\'' {
62                skip[i] = true;
63                i += 1;
64                while i < len {
65                    if bytes[i] == b'\'' {
66                        skip[i] = true;
67                        i += 1;
68                        // '' is an escaped quote inside the string, not the end
69                        if i < len && bytes[i] == b'\'' {
70                            skip[i] = true;
71                            i += 1;
72                            continue;
73                        }
74                        break; // end of string
75                    }
76                    skip[i] = true;
77                    i += 1;
78                }
79                continue;
80            }
81
82            // Double-quoted identifier: "..."
83            if bytes[i] == b'"' {
84                skip[i] = true;
85                i += 1;
86                while i < len && bytes[i] != b'"' {
87                    skip[i] = true;
88                    i += 1;
89                }
90                if i < len {
91                    skip[i] = true; // closing "
92                    i += 1;
93                }
94                continue;
95            }
96
97            // Backtick identifier: `...`
98            if bytes[i] == b'`' {
99                skip[i] = true;
100                i += 1;
101                while i < len && bytes[i] != b'`' {
102                    skip[i] = true;
103                    i += 1;
104                }
105                if i < len {
106                    skip[i] = true; // closing `
107                    i += 1;
108                }
109                continue;
110            }
111
112            i += 1;
113        }
114
115        SkipMap { skip }
116    }
117
118    /// Returns `true` if the byte at `offset` is real SQL code (not inside a
119    /// string / comment / quoted identifier).
120    #[inline]
121    pub(crate) fn is_code(&self, offset: usize) -> bool {
122        !self.skip[offset]
123    }
124}
125
126/// Returns `true` if `ch` is a word character (`[a-zA-Z0-9_]`).
127#[inline]
128pub(crate) fn is_word_char(ch: u8) -> bool {
129    ch.is_ascii_alphanumeric() || ch == b'_'
130}