mik_sql/validate/
expression.rs

1//! SQL expression validation for computed fields.
2
3/// Validate a SQL expression for computed fields.
4///
5/// Computed field expressions are dangerous because they're inserted directly
6/// into SQL. This function performs defense-in-depth validation to catch
7/// injection attempts, but **cannot provide complete protection**.
8///
9/// # Security Model
10///
11/// This validation is a safety net, not a security boundary. It catches:
12/// - Obvious injection patterns (comments, semicolons, SQL keywords)
13/// - Common attack vectors
14///
15/// It **cannot** catch:
16/// - All possible SQL injection variants
17/// - Database-specific syntax
18/// - Encoded or obfuscated attacks
19///
20/// **CRITICAL**: Only use computed fields with trusted expressions from code.
21/// Never pass user input to computed field expressions, even with validation.
22///
23/// # Valid expressions
24///
25/// - Simple field references: `first_name`, `price`
26/// - Arithmetic: `quantity * price`
27/// - String concatenation: `first_name || ' ' || last_name`
28/// - Functions: `COALESCE(nickname, name)`, `UPPER(name)`
29///
30/// # Invalid expressions (rejected)
31///
32/// - Comments: `--`, `/*`, `*/`
33/// - Statement terminators: `;`
34/// - SQL keywords: SELECT, INSERT, UPDATE, DELETE, DROP, etc.
35/// - System functions: `pg_`, `sqlite_`
36///
37/// # Examples
38///
39/// ```
40/// use mik_sql::is_valid_sql_expression;
41///
42/// assert!(is_valid_sql_expression("first_name || ' ' || last_name"));
43/// assert!(is_valid_sql_expression("quantity * price"));
44/// assert!(is_valid_sql_expression("COALESCE(nickname, name)"));
45///
46/// // Dangerous patterns are rejected
47/// assert!(!is_valid_sql_expression("1; DROP TABLE users"));
48/// assert!(!is_valid_sql_expression("name -- comment"));
49/// assert!(!is_valid_sql_expression("/* comment */ name"));
50/// ```
51#[inline]
52#[must_use]
53pub fn is_valid_sql_expression(s: &str) -> bool {
54    // Empty or oversized expressions are invalid
55    if s.is_empty() || s.len() > 1000 {
56        return false;
57    }
58
59    // No SQL comments
60    if s.contains("--") || s.contains("/*") || s.contains("*/") {
61        return false;
62    }
63
64    // No statement terminators
65    if s.contains(';') {
66        return false;
67    }
68
69    // No backticks (MySQL identifier quotes that could be used for injection)
70    if s.contains('`') {
71        return false;
72    }
73
74    // Check for dangerous SQL keywords using word boundary detection
75    let lower = s.to_ascii_lowercase();
76
77    // Dangerous DML/DDL keywords and functions
78    const DANGEROUS_KEYWORDS: &[&str] = &[
79        // DML/DDL statements
80        "select",
81        "insert",
82        "update",
83        "delete",
84        "drop",
85        "truncate",
86        "alter",
87        "create",
88        "grant",
89        "revoke",
90        "exec",
91        "execute",
92        "union",
93        "into",
94        "from",
95        "where",
96        "having",
97        "group",
98        "order",
99        "limit",
100        "offset",
101        "fetch",
102        "returning",
103        // Dangerous functions (timing attacks, DoS)
104        "sleep",
105        "benchmark",
106        "waitfor",
107        "pg_sleep",
108        "dbms_lock",
109        // File/network operations
110        "load_file",
111        "into_outfile",
112        "into_dumpfile",
113        // Encoding/conversion functions that could bypass keyword detection
114        "chr",
115        "char",
116        "ascii",
117        "unicode",
118        "hex",
119        "unhex",
120        "convert",
121        "cast",
122        "encode",
123        "decode",
124    ];
125
126    for keyword in DANGEROUS_KEYWORDS {
127        if contains_sql_keyword(&lower, keyword) {
128            return false;
129        }
130    }
131
132    // Block system catalog access patterns
133    if lower.contains("pg_")
134        || lower.contains("sqlite_")
135        || lower.contains("information_schema")
136        || lower.contains("sys.")
137    {
138        return false;
139    }
140
141    // Block hex escapes that could bypass other checks
142    if lower.contains("0x") || lower.contains("\\x") {
143        return false;
144    }
145
146    true
147}
148
149/// Check if a string contains a SQL keyword as a whole word.
150///
151/// This prevents false positives like "update" in "`last_updated`".
152#[inline]
153fn contains_sql_keyword(haystack: &str, keyword: &str) -> bool {
154    let bytes = haystack.as_bytes();
155    let kw_bytes = keyword.as_bytes();
156    let kw_len = kw_bytes.len();
157
158    if kw_len == 0 || bytes.len() < kw_len {
159        return false;
160    }
161
162    for i in 0..=(bytes.len() - kw_len) {
163        // Check if keyword matches at this position
164        if &bytes[i..i + kw_len] == kw_bytes {
165            // Check word boundaries (parentheses fix operator precedence: && binds tighter than ||)
166            let before_ok =
167                i == 0 || (!bytes[i - 1].is_ascii_alphanumeric() && bytes[i - 1] != b'_');
168            let after_ok = i + kw_len == bytes.len()
169                || (!bytes[i + kw_len].is_ascii_alphanumeric() && bytes[i + kw_len] != b'_');
170
171            if before_ok && after_ok {
172                return true;
173            }
174        }
175    }
176
177    false
178}
179
180/// Assert that a SQL expression is valid for computed fields.
181///
182/// # Panics
183///
184/// Panics if the expression contains dangerous patterns.
185#[inline]
186pub fn assert_valid_sql_expression(s: &str, context: &str) {
187    assert!(
188        is_valid_sql_expression(s),
189        "Invalid SQL expression for {context}: '{s}' contains dangerous patterns \
190             (comments, semicolons, or SQL keywords)"
191    );
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197
198    #[test]
199    fn test_valid_sql_expressions() {
200        // Valid expressions
201        assert!(is_valid_sql_expression("first_name || ' ' || last_name"));
202        assert!(is_valid_sql_expression("quantity * price"));
203        assert!(is_valid_sql_expression("COALESCE(nickname, name)"));
204        assert!(is_valid_sql_expression("age + 1"));
205        assert!(is_valid_sql_expression("CASE WHEN x > 0 THEN y ELSE z END"));
206        assert!(is_valid_sql_expression("price * 1.1"));
207        assert!(is_valid_sql_expression("UPPER(name)"));
208        assert!(is_valid_sql_expression("LENGTH(description)"));
209
210        // Word boundary detection - these contain keywords as substrings but should be allowed
211        assert!(is_valid_sql_expression("last_updated")); // contains "update"
212        assert!(is_valid_sql_expression("created_at")); // contains "create"
213        assert!(is_valid_sql_expression("selected_items")); // contains "select"
214        assert!(is_valid_sql_expression("deleted_at")); // contains "delete"
215        assert!(is_valid_sql_expression("order_total")); // contains "order"
216        assert!(is_valid_sql_expression("group_name")); // contains "group"
217        assert!(is_valid_sql_expression("from_date")); // contains "from"
218        assert!(is_valid_sql_expression("where_clause")); // contains "where"
219    }
220
221    #[test]
222    fn test_invalid_sql_expressions() {
223        // Empty
224        assert!(!is_valid_sql_expression(""));
225
226        // SQL comments
227        assert!(!is_valid_sql_expression("name -- comment"));
228        assert!(!is_valid_sql_expression("/* comment */ name"));
229        assert!(!is_valid_sql_expression("name */ attack"));
230
231        // Statement terminators
232        assert!(!is_valid_sql_expression("1; DROP TABLE users"));
233        assert!(!is_valid_sql_expression("name;"));
234
235        // Backticks
236        assert!(!is_valid_sql_expression("`table`"));
237
238        // SQL keywords as standalone words
239        assert!(!is_valid_sql_expression("(SELECT password)"));
240        assert!(!is_valid_sql_expression("INSERT INTO x"));
241        assert!(!is_valid_sql_expression("DELETE FROM x"));
242        assert!(!is_valid_sql_expression("DROP TABLE x"));
243        assert!(!is_valid_sql_expression("UPDATE SET y=1"));
244        assert!(!is_valid_sql_expression("UNION ALL"));
245        assert!(!is_valid_sql_expression("x FROM y"));
246        assert!(!is_valid_sql_expression("x WHERE y"));
247
248        // System catalog access
249        assert!(!is_valid_sql_expression("pg_catalog.pg_tables"));
250        assert!(!is_valid_sql_expression("sqlite_master"));
251        assert!(!is_valid_sql_expression("information_schema.tables"));
252
253        // Hex escapes
254        assert!(!is_valid_sql_expression("0x48454C4C4F"));
255        assert!(!is_valid_sql_expression("\\x48454C4C4F"));
256
257        // Dangerous functions (timing attacks, DoS)
258        assert!(!is_valid_sql_expression("SLEEP(10)"));
259        assert!(!is_valid_sql_expression("pg_sleep(5)"));
260        assert!(!is_valid_sql_expression("BENCHMARK(1000000, SHA1('test'))"));
261        assert!(!is_valid_sql_expression("WAITFOR DELAY '0:0:5'"));
262
263        // File operations
264        assert!(!is_valid_sql_expression("LOAD_FILE('/etc/passwd')"));
265    }
266
267    #[test]
268    #[should_panic(expected = "Invalid SQL expression")]
269    fn test_assert_valid_expression_panics() {
270        assert_valid_sql_expression("1; DROP TABLE users", "computed field");
271    }
272
273    // =========================================================================
274    // SQL INJECTION FUZZING TESTS
275    // =========================================================================
276
277    #[test]
278    fn test_sqli_classic_or_true() {
279        // Classic OR-based injection - these are blocked by comment/semicolon detection
280        assert!(!is_valid_sql_expression("' OR 1=1--")); // Blocked by --
281        assert!(!is_valid_sql_expression("1; OR 1=1")); // Blocked by ;
282    }
283
284    #[test]
285    fn test_sqli_drop_table() {
286        // DROP TABLE attacks
287        assert!(!is_valid_sql_expression("'; DROP TABLE users--"));
288        assert!(!is_valid_sql_expression("'; DROP TABLE users;--"));
289        assert!(!is_valid_sql_expression("1; DROP TABLE users"));
290        assert!(!is_valid_sql_expression("DROP TABLE users"));
291        assert!(!is_valid_sql_expression("drop table users"));
292        assert!(!is_valid_sql_expression("DrOp TaBlE users"));
293    }
294
295    #[test]
296    fn test_sqli_union_attacks() {
297        // UNION-based injection
298        assert!(!is_valid_sql_expression("' UNION SELECT * FROM users--"));
299        assert!(!is_valid_sql_expression(
300            "' UNION ALL SELECT password FROM users--"
301        ));
302        assert!(!is_valid_sql_expression("1 UNION SELECT 1,2,3"));
303        assert!(!is_valid_sql_expression(
304            "UNION SELECT username,password FROM admin"
305        ));
306        assert!(!is_valid_sql_expression("' union select null,null,null--"));
307    }
308
309    #[test]
310    fn test_sqli_comment_injection() {
311        // Comment-based attacks - blocked by comment detection
312        assert!(!is_valid_sql_expression("admin'--")); // SQL comment
313        assert!(!is_valid_sql_expression("admin'/*")); // Block comment start
314        assert!(!is_valid_sql_expression("*/; DROP TABLE users--")); // Block comment end + semicolon
315        assert!(!is_valid_sql_expression("1/**/OR/**/1=1")); // Block comments
316    }
317
318    #[test]
319    fn test_sqli_stacked_queries() {
320        // Stacked query attacks (semicolon-based)
321        assert!(!is_valid_sql_expression(
322            "; INSERT INTO users VALUES('hacker')"
323        ));
324        assert!(!is_valid_sql_expression("; UPDATE users SET role='admin'"));
325        assert!(!is_valid_sql_expression("; DELETE FROM users"));
326        assert!(!is_valid_sql_expression("1; SELECT * FROM passwords"));
327        assert!(!is_valid_sql_expression("'; TRUNCATE TABLE logs;--"));
328    }
329
330    #[test]
331    fn test_sqli_time_based_blind() {
332        // Time-based blind injection
333        assert!(!is_valid_sql_expression("SLEEP(5)"));
334        assert!(!is_valid_sql_expression("1 AND SLEEP(5)"));
335        assert!(!is_valid_sql_expression("pg_sleep(5)"));
336        assert!(!is_valid_sql_expression("1; SELECT pg_sleep(10)"));
337        assert!(!is_valid_sql_expression("BENCHMARK(10000000,SHA1('test'))"));
338        assert!(!is_valid_sql_expression("WAITFOR DELAY '0:0:5'"));
339        assert!(!is_valid_sql_expression("dbms_lock.sleep(5)"));
340    }
341
342    #[test]
343    fn test_sqli_file_operations() {
344        // File read/write attacks
345        assert!(!is_valid_sql_expression("LOAD_FILE('/etc/passwd')"));
346        assert!(!is_valid_sql_expression("load_file('/etc/shadow')"));
347        assert!(!is_valid_sql_expression(
348            "INTO OUTFILE '/var/www/shell.php'"
349        ));
350        assert!(!is_valid_sql_expression("INTO DUMPFILE '/tmp/data'"));
351        assert!(!is_valid_sql_expression("into_outfile('/tmp/x')"));
352        assert!(!is_valid_sql_expression("into_dumpfile('/tmp/x')"));
353    }
354
355    #[test]
356    fn test_sqli_system_catalog_access() {
357        // System catalog enumeration
358        assert!(!is_valid_sql_expression("pg_tables"));
359        assert!(!is_valid_sql_expression("pg_catalog.pg_tables"));
360        assert!(!is_valid_sql_expression("sqlite_master"));
361        assert!(!is_valid_sql_expression("information_schema.tables"));
362        assert!(!is_valid_sql_expression("sys.tables"));
363        assert!(!is_valid_sql_expression("SELECT FROM information_schema"));
364    }
365
366    #[test]
367    fn test_sqli_hex_encoding() {
368        // Hex-encoded attacks
369        assert!(!is_valid_sql_expression("0x27")); // Single quote
370        assert!(!is_valid_sql_expression("0x4F5220313D31")); // OR 1=1
371        assert!(!is_valid_sql_expression("\\x27"));
372        assert!(!is_valid_sql_expression("CHAR(0x27)"));
373    }
374
375    #[test]
376    fn test_sqli_keyword_boundary_detection() {
377        // These SHOULD be allowed - keywords as substrings of identifiers
378        assert!(is_valid_sql_expression("order_id")); // order
379        assert!(is_valid_sql_expression("reorder_count")); // order
380        assert!(is_valid_sql_expression("group_name")); // group
381        assert!(is_valid_sql_expression("ungroup")); // group
382        assert!(is_valid_sql_expression("from_date")); // from
383        assert!(is_valid_sql_expression("wherefrom")); // where, from
384        assert!(is_valid_sql_expression("selected_items")); // select
385        assert!(is_valid_sql_expression("preselect")); // select
386        assert!(is_valid_sql_expression("delete_flag")); // delete
387        assert!(is_valid_sql_expression("undelete")); // delete
388        assert!(is_valid_sql_expression("update_time")); // update
389        assert!(is_valid_sql_expression("last_updated")); // update
390
391        // These SHOULD be blocked - standalone keywords
392        assert!(!is_valid_sql_expression("ORDER BY name"));
393        assert!(!is_valid_sql_expression("GROUP BY id"));
394        assert!(!is_valid_sql_expression("FROM users"));
395        assert!(!is_valid_sql_expression("WHERE id=1"));
396        assert!(!is_valid_sql_expression("SELECT *"));
397        assert!(!is_valid_sql_expression("DELETE FROM"));
398        assert!(!is_valid_sql_expression("UPDATE SET"));
399    }
400
401    #[test]
402    fn test_sqli_case_variations() {
403        // Case variations of dangerous keywords
404        assert!(!is_valid_sql_expression("SELECT"));
405        assert!(!is_valid_sql_expression("select"));
406        assert!(!is_valid_sql_expression("SeLeCt"));
407        assert!(!is_valid_sql_expression("sElEcT"));
408
409        assert!(!is_valid_sql_expression("UNION"));
410        assert!(!is_valid_sql_expression("union"));
411        assert!(!is_valid_sql_expression("UnIoN"));
412
413        assert!(!is_valid_sql_expression("DROP"));
414        assert!(!is_valid_sql_expression("drop"));
415        assert!(!is_valid_sql_expression("DrOp"));
416    }
417
418    #[test]
419    fn test_sqli_whitespace_variations() {
420        // Whitespace-based evasion - these should still be caught
421        assert!(!is_valid_sql_expression("SELECT\t*"));
422        assert!(!is_valid_sql_expression("SELECT\n*"));
423        assert!(!is_valid_sql_expression("  SELECT  "));
424        assert!(!is_valid_sql_expression("DROP\t\tTABLE"));
425    }
426
427    #[test]
428    fn test_sqli_expression_length_limit() {
429        // Very long expressions should be rejected
430        let long_expr = "a".repeat(1001);
431        assert!(!is_valid_sql_expression(&long_expr));
432
433        // At limit should be OK
434        let at_limit = "a".repeat(1000);
435        assert!(is_valid_sql_expression(&at_limit));
436    }
437
438    #[test]
439    fn test_valid_safe_expressions() {
440        // Legitimate expressions that should be allowed
441        assert!(is_valid_sql_expression("first_name || ' ' || last_name"));
442        assert!(is_valid_sql_expression("price * quantity"));
443        assert!(is_valid_sql_expression("price * 1.15")); // With tax
444        assert!(is_valid_sql_expression(
445            "COALESCE(nickname, first_name, 'Anonymous')"
446        ));
447        assert!(is_valid_sql_expression("UPPER(TRIM(name))"));
448        assert!(is_valid_sql_expression("LENGTH(description)"));
449        assert!(is_valid_sql_expression("ABS(balance)"));
450        assert!(is_valid_sql_expression("ROUND(price, 2)"));
451        assert!(is_valid_sql_expression("LOWER(email)"));
452        assert!(is_valid_sql_expression("created_at + INTERVAL '1 day'"));
453        assert!(is_valid_sql_expression("age >= 18"));
454        assert!(is_valid_sql_expression("status = 'active'"));
455        assert!(is_valid_sql_expression("NOT is_deleted"));
456        assert!(is_valid_sql_expression("(price > 0) AND (quantity > 0)"));
457    }
458}