buup/transformers/
sql_minifier.rs

1use crate::{Transform, TransformError, TransformerCategory};
2
3/// SQL Minifier transformer
4#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub struct SqlMinifier;
6
7impl Transform for SqlMinifier {
8    fn name(&self) -> &'static str {
9        "SQL Minifier"
10    }
11
12    fn id(&self) -> &'static str {
13        "sqlminifier"
14    }
15
16    fn description(&self) -> &'static str {
17        "Minifies SQL queries by removing unnecessary whitespace and formatting"
18    }
19
20    fn category(&self) -> TransformerCategory {
21        TransformerCategory::Formatter
22    }
23
24    fn default_test_input(&self) -> &'static str {
25        r#"SELECT id, username, email
26FROM users
27WHERE status = 'active'
28  AND created_at > '2023-01-01'
29ORDER BY created_at DESC
30LIMIT 10"#
31    }
32
33    fn transform(&self, input: &str) -> Result<String, TransformError> {
34        // Skip empty input
35        if input.trim().is_empty() {
36            return Ok(String::new());
37        }
38
39        minify_sql(input)
40    }
41}
42
43/// Minify SQL by removing all unnecessary whitespace while preserving semantics
44fn minify_sql(input: &str) -> Result<String, TransformError> {
45    let mut result = String::with_capacity(input.len());
46    let mut chars = input.chars().peekable();
47    let mut in_string = false;
48    let mut string_delimiter = '"';
49    let mut in_comment = false;
50    let mut in_multiline_comment = false;
51    let mut last_char = '\0';
52    let mut last_token_is_keyword = false;
53    let mut current_word = String::new();
54
55    while let Some(c) = chars.next() {
56        // Handle string literals (preserve everything inside them)
57        if (c == '\'' || c == '"') && !in_comment && !in_multiline_comment {
58            if !in_string {
59                // Starting a string
60                in_string = true;
61                string_delimiter = c;
62                result.push(c);
63            } else if c == string_delimiter {
64                // Check for escaped quotes
65                if chars.peek() == Some(&c) {
66                    // This is an escaped quote within the string
67                    result.push(c);
68                    chars.next(); // Consume the second quote
69                    result.push(c);
70                } else {
71                    // End of string
72                    in_string = false;
73                    result.push(c);
74                }
75            } else {
76                // Just a quote character inside a string delimited by a different quote
77                result.push(c);
78            }
79            last_char = c;
80            continue;
81        }
82
83        // Inside a string - add all characters as-is
84        if in_string {
85            result.push(c);
86            last_char = c;
87            continue;
88        }
89
90        // Handle single-line comments
91        if c == '-' && chars.peek() == Some(&'-') && !in_multiline_comment {
92            in_comment = true;
93            chars.next(); // consume the second dash
94
95            // Skip the entire comment
96            for next_c in chars.by_ref() {
97                if next_c == '\n' {
98                    in_comment = false;
99                    break;
100                }
101            }
102            continue;
103        }
104
105        // Skip characters in comment
106        if in_comment {
107            if c == '\n' {
108                in_comment = false;
109            }
110            continue;
111        }
112
113        // Handle multi-line comments
114        if c == '/' && chars.peek() == Some(&'*') && !in_comment {
115            in_multiline_comment = true;
116            chars.next(); // consume the *
117
118            // Skip the entire comment
119            let mut asterisk_seen = false;
120            for next_c in chars.by_ref() {
121                if asterisk_seen && next_c == '/' {
122                    in_multiline_comment = false;
123                    break;
124                }
125                asterisk_seen = next_c == '*';
126            }
127            continue;
128        }
129
130        // Skip characters in multi-line comment
131        if in_multiline_comment {
132            continue;
133        }
134
135        // Handle whitespace
136        if c.is_whitespace() {
137            // Just skip whitespace
138            continue;
139        }
140
141        // Handle keywords and identifiers
142        if c.is_alphabetic() || c == '_' {
143            current_word.clear();
144            current_word.push(c);
145
146            // Collect the entire word
147            while let Some(&next_c) = chars.peek() {
148                if next_c.is_alphanumeric() || next_c == '_' {
149                    current_word.push(next_c);
150                    chars.next();
151                } else {
152                    break;
153                }
154            }
155
156            // Check if it's a keyword
157            let upper_word = current_word.to_uppercase();
158            let is_keyword = is_sql_keyword(&upper_word);
159
160            // Add space before keyword/identifier if needed
161            let need_space = (is_keyword || last_token_is_keyword)
162                && !result.is_empty()
163                && !is_separator(last_char);
164            if need_space {
165                result.push(' ');
166            }
167
168            // Add the word to the result
169            if is_keyword {
170                result.push_str(&upper_word);
171                last_token_is_keyword = true;
172            } else {
173                result.push_str(&current_word);
174                last_token_is_keyword = false;
175            }
176
177            last_char = current_word.chars().last().unwrap_or('_');
178            continue;
179        }
180
181        // Handle separators (punctuation, operators)
182        if is_separator(c) {
183            // Special case for commas - no space before, but we reset the last token
184            if c == ',' {
185                result.push(c);
186                last_token_is_keyword = false;
187            }
188            // Special case for operators - no space before but ensure space after
189            else if "=<>!+*/".contains(c) {
190                // For compound operators like >=, <=, != etc.
191                result.push(c);
192                if chars.peek() == Some(&'=') {
193                    result.push('=');
194                    chars.next();
195                }
196                last_token_is_keyword = false;
197            }
198            // Other separators
199            else {
200                result.push(c);
201                last_token_is_keyword = false;
202            }
203
204            last_char = c;
205            continue;
206        }
207
208        // Numbers and other characters
209        result.push(c);
210        last_token_is_keyword = false;
211        last_char = c;
212    }
213
214    Ok(result)
215}
216
217// Check if a character is a separator (punctuation, operator)
218fn is_separator(c: char) -> bool {
219    "(),;=<>!+-*/".contains(c)
220}
221
222// Check if a word is an SQL keyword
223fn is_sql_keyword(word: &str) -> bool {
224    // Common SQL keywords
225    const KEYWORDS: [&str; 59] = [
226        "SELECT",
227        "FROM",
228        "WHERE",
229        "INSERT",
230        "UPDATE",
231        "DELETE",
232        "DROP",
233        "CREATE",
234        "ALTER",
235        "TABLE",
236        "VIEW",
237        "INDEX",
238        "TRIGGER",
239        "PROCEDURE",
240        "FUNCTION",
241        "DATABASE",
242        "SCHEMA",
243        "GRANT",
244        "REVOKE",
245        "JOIN",
246        "INNER",
247        "OUTER",
248        "LEFT",
249        "RIGHT",
250        "FULL",
251        "CROSS",
252        "NATURAL",
253        "GROUP",
254        "ORDER",
255        "BY",
256        "HAVING",
257        "UNION",
258        "ALL",
259        "INTERSECT",
260        "EXCEPT",
261        "INTO",
262        "VALUES",
263        "SET",
264        "AS",
265        "ON",
266        "AND",
267        "OR",
268        "NOT",
269        "NULL",
270        "IS",
271        "IN",
272        "BETWEEN",
273        "LIKE",
274        "EXISTS",
275        "CASE",
276        "WHEN",
277        "THEN",
278        "ELSE",
279        "END",
280        "ASC",
281        "DESC",
282        "LIMIT",
283        "OFFSET",
284        "WITH",
285    ];
286
287    KEYWORDS.contains(&word)
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn test_sql_minifier_empty() {
296        let transformer = SqlMinifier;
297        assert_eq!(transformer.transform("").unwrap(), "");
298        assert_eq!(transformer.transform("  ").unwrap(), "");
299    }
300
301    #[test]
302    fn test_sql_minifier_simple_select() {
303        let transformer = SqlMinifier;
304        let input = transformer.default_test_input();
305        let actual = transformer.transform(input).unwrap();
306        assert_eq!(actual, "SELECT id,username,email FROM users WHERE status='active' AND created_at>'2023-01-01' ORDER BY created_at DESC LIMIT10");
307    }
308
309    #[test]
310    fn test_sql_minifier_complex_query() {
311        let transformer = SqlMinifier;
312        let input = r#"
313        SELECT 
314            u.id, 
315            u.name, 
316            COUNT(o.id) AS order_count
317        FROM 
318            users u
319        LEFT JOIN 
320            orders o ON u.id = o.user_id
321        WHERE 
322            u.status = 'active'
323            AND u.created_at > '2023-01-01'
324        GROUP BY 
325            u.id, 
326            u.name
327        HAVING 
328            COUNT(o.id) > 0
329        ORDER BY 
330            order_count DESC
331        LIMIT 20
332        "#;
333
334        let actual = transformer.transform(input).unwrap();
335        assert_eq!(actual, "SELECT u.id,u.name,COUNT(o.id)AS order_count FROM usersu LEFT JOIN orderso ON u.id=o.user_id WHERE u.status='active' AND u.created_at>'2023-01-01' GROUP BY u.id,u.name HAVING COUNT(o.id)>0 ORDER BY order_count DESC LIMIT20");
336    }
337
338    #[test]
339    fn test_sql_minifier_preserves_string_literals() {
340        let transformer = SqlMinifier;
341        let input = "SELECT * FROM users WHERE name = 'John''s   Data' AND department = \"Sales & Marketing\"";
342        let actual = transformer.transform(input).unwrap();
343        assert_eq!(
344            actual,
345            "SELECT*FROM users WHERE name='John''s   Data' AND department=\"Sales & Marketing\""
346        );
347    }
348
349    #[test]
350    fn test_sql_minifier_strips_comments() {
351        let transformer = SqlMinifier;
352        let input = r#"
353        SELECT id, name -- This is the user ID and name
354        FROM users 
355        /* This is a multi-line comment
356         * that spans multiple lines
357         */
358        WHERE active = 1
359        "#;
360
361        let expected = "SELECT id,name FROM users WHERE active=1";
362        assert_eq!(transformer.transform(input).unwrap(), expected);
363    }
364}