sql_lexer/
lib.rs

1mod lexer;
2mod sanitizer;
3mod writer;
4
5#[derive(Debug, PartialEq)]
6pub enum Keyword {
7    Select,  // SELECT
8    From,    // FROM
9    Where,   // WHERE
10    And,     // AND
11    Or,      // OR
12    Update,  // UPDATE
13    Set,     // SET
14    Insert,  // INSERT
15    Into,    // INTO
16    Values,  // VALUES
17    Inner,   // INNER
18    Join,    // JOIN
19    On,      // ON
20    Limit,   // LIMIT
21    Offset,  // OFFSET
22    Between, // BETWEEN
23    Array,   // ARRAY
24    Other(BufferSlice),
25}
26
27#[derive(Debug, PartialEq)]
28pub enum Operator {
29    Arithmetic(ArithmeticOperator),
30    Logical(LogicalOperator),
31    Comparison(ComparisonOperator),
32    Bitwise(BitwiseOperator),
33    Json(JsonOperator),
34}
35
36#[derive(Debug, PartialEq)]
37pub enum ArithmeticOperator {
38    Multiply, // *
39    Divide,   // /
40    Modulo,   // %
41    Plus,     // +
42    Minus,    // -
43}
44
45#[derive(Debug, PartialEq)]
46pub enum LogicalOperator {
47    In,     // IN
48    Not,    // NOT
49    Like,   // LIKE
50    Ilike,  // ILIKE
51    Rlike,  // RLIKE
52    Glob,   // GLOB
53    Match,  // MATCH
54    Regexp, // REGEXP
55    Then,   // THEN
56    Else,   // ELSE
57}
58
59#[derive(Debug, PartialEq)]
60pub enum ComparisonOperator {
61    Equal,              // =
62    Equal2,             // ==
63    NullSafeEqual,      // <=>
64    GreaterThanOrEqual, // =>
65    LessThanOrEqual,    // <=
66    EqualOrGreaterThan, // =>
67    EqualOrLessThan,    // <=
68    EqualWithArrows,    // <>
69    NotEqual,           // !=
70    GreaterThan,        // >
71    LessThan,           // <
72}
73
74#[derive(Debug, PartialEq)]
75pub enum BitwiseOperator {
76    LeftShift,  // <<
77    RightShift, // >>
78    And,        // &
79    Or,         // |
80}
81
82#[derive(Debug, PartialEq)]
83pub enum JsonOperator {
84    SpecifiedPath,       // #>
85    SpecifiedPathAsText, // #>>
86}
87
88#[derive(Debug, PartialEq)]
89pub enum LiteralValueTypeIndicator {
90    Binary,               // BINARY
91    Date,                 // DATE
92    Time,                 // TIME
93    Timestamp,            // TIMESTAMP
94    X,                    // Hexadecimal literal
95    ZeroX,                // Hexadecimal literal
96    B,                    // Bit field
97    ZeroB,                // Bit field
98    N,                    // National character set
99    Charset(BufferSlice), // Character set
100}
101
102#[derive(Debug, PartialEq)]
103pub struct BufferSlice {
104    pub start: usize,
105    pub end: usize,
106}
107
108impl BufferSlice {
109    pub fn new(start: usize, end: usize) -> BufferSlice {
110        BufferSlice { start, end }
111    }
112}
113
114#[derive(Debug, PartialEq)]
115pub enum Token {
116    Operator(Operator),
117    Keyword(Keyword),
118    LiteralValueTypeIndicator(LiteralValueTypeIndicator),
119    Backticked(BufferSlice),
120    DoubleQuoted(BufferSlice),
121    SingleQuoted(BufferSlice),
122    Numeric(BufferSlice),
123    Comment(BufferSlice),
124    Space,
125    Newline,
126    Dot,
127    Comma,
128    Wildcard,
129    ParentheseOpen,
130    ParentheseClose,
131    SquareBracketOpen,
132    SquareBracketClose,
133    Colon,
134    Semicolon,
135    // Used by the sanitizer to replace values
136    Placeholder,
137    // Used by the sanitizer to replace repeated value lists
138    Ellipsis,
139    // Used by the sanitizer to remove tokens
140    None,
141    Null,
142    True,
143    False,
144    NumberedPlaceholder(BufferSlice),
145    Unknown(char),
146}
147
148#[derive(Debug, PartialEq)]
149pub struct Sql {
150    buf: String,
151    pub tokens: Vec<Token>,
152}
153
154impl Sql {
155    pub fn buffer_content(&self, pos: &BufferSlice) -> &str {
156        let len = self.buf.len();
157        if pos.end < pos.start || pos.start > len || pos.end > len {
158            // If the positions are out of bounds return a blank string
159            return "";
160        }
161        &self.buf[pos.start..pos.end]
162    }
163}
164
165/// Lex a sql string into a `Sql` struct that contains the original
166/// buffer and the tokens found.
167pub fn lex(buf: String) -> Sql {
168    lexer::SqlLexer::new(buf).lex()
169}
170
171/// Write a `Sql` struct back to a sql string.
172pub fn write(sql: Sql) -> String {
173    writer::SqlWriter::new(sql).write()
174}
175
176/// Sanitize a `Sql` struct
177pub fn sanitize(sql: Sql) -> Sql {
178    sanitizer::SqlSanitizer::new(sql).sanitize()
179}
180
181/// Returns a sanitized sql string
182pub fn sanitize_string(buf: String) -> String {
183    write(sanitize(lex(buf)))
184}
185
186#[cfg(test)]
187mod tests {
188    use super::Sql;
189    use super::{BufferSlice, ComparisonOperator, Keyword, Operator, Token};
190
191    #[test]
192    fn test_buffer_content() {
193        let sql = Sql {
194            buf: "SELECT `table`.* FROM `table` WHERE `id` = 'secret';".to_string(),
195            tokens: Vec::new(),
196        };
197        let buffer_position = BufferSlice::new(17, 21);
198
199        assert_eq!("FROM", sql.buffer_content(&buffer_position));
200    }
201
202    #[test]
203    fn test_buffer_content_multibyte_characters() {
204        let sql = Sql {
205            buf: "\"hæld\" ; 'jæld' ; `tæld`".to_string(),
206            tokens: Vec::new(),
207        };
208
209        assert_eq!("hæld", sql.buffer_content(&BufferSlice::new(1, 6)));
210        assert_eq!("jæld", sql.buffer_content(&BufferSlice::new(11, 16)));
211        assert_eq!("tæld", sql.buffer_content(&BufferSlice::new(21, 26)));
212    }
213
214    #[test]
215    fn test_buffer_content_wrong_order() {
216        let sql = Sql {
217            buf: "buffer content".to_string(),
218            tokens: Vec::new(),
219        };
220        let buffer_position = BufferSlice::new(6, 1);
221
222        assert_eq!("", sql.buffer_content(&buffer_position));
223    }
224
225    #[test]
226    fn test_buffer_content_out_of_bounds() {
227        let sql = Sql {
228            buf: "buffer content".to_string(),
229            tokens: Vec::new(),
230        };
231        let buffer_position = BufferSlice::new(100, 200);
232
233        assert_eq!("", sql.buffer_content(&buffer_position));
234    }
235
236    #[test]
237    fn test_buffer_content_out_of_bounds_partially() {
238        let sql = Sql {
239            buf: "buffer content".to_string(),
240            tokens: Vec::new(),
241        };
242        let buffer_position = BufferSlice::new(0, 200);
243
244        assert_eq!("", sql.buffer_content(&buffer_position));
245    }
246
247    #[test]
248    fn test_lex() {
249        let sql_buffer = "SELECT * FROM `table`";
250
251        let expected = vec![
252            Token::Keyword(Keyword::Select),
253            Token::Space,
254            Token::Wildcard,
255            Token::Space,
256            Token::Keyword(Keyword::From),
257            Token::Space,
258            Token::Backticked(BufferSlice::new(15, 20)),
259        ];
260
261        let sql = super::lex(sql_buffer.to_string());
262        assert_eq!(sql.buf, sql_buffer);
263        assert_eq!(sql.tokens, expected);
264    }
265
266    #[test]
267    fn test_write() {
268        let sql_buffer = "SELECT * FROM `table`";
269        assert_eq!(super::write(super::lex(sql_buffer.to_string())), sql_buffer);
270    }
271
272    #[test]
273    fn test_sanitize() {
274        let sql = super::sanitize(super::lex(
275            "SELECT * FROM `table` WHERE `id` = 1;".to_string(),
276        ));
277
278        let expected = vec![
279            Token::Keyword(Keyword::Select),
280            Token::Space,
281            Token::Wildcard,
282            Token::Space,
283            Token::Keyword(Keyword::From),
284            Token::Space,
285            Token::Backticked(BufferSlice::new(15, 20)),
286            Token::Space,
287            Token::Keyword(Keyword::Where),
288            Token::Space,
289            Token::Backticked(BufferSlice::new(29, 31)),
290            Token::Space,
291            Token::Operator(Operator::Comparison(ComparisonOperator::Equal)),
292            Token::Space,
293            Token::Placeholder,
294            Token::Semicolon,
295        ];
296
297        assert_eq!(sql.tokens, expected);
298    }
299
300    #[test]
301    fn test_sanitize_string() {
302        assert_eq!(
303            super::sanitize_string("SELECT * FROM `table` WHERE id = 1;".to_string()),
304            "SELECT * FROM `table` WHERE id = ?;"
305        );
306    }
307}