Skip to main content

heliosdb_proxy/anomaly/
sql_injection.rs

1//! SQL-injection heuristic scanner.
2//!
3//! Pattern-based detection — purposefully shallow. This is **not** a
4//! parser. Reasons:
5//!
6//! - The proxy already routes parsed queries; an attacker bypasses
7//!   that by stuffing payloads into string literals. Pattern
8//!   matching catches the literal-stuffing case the parser by
9//!   definition cannot.
10//! - The signal is "this looks like a known payload shape" — useful
11//!   alongside a real WAF, not a substitute for one.
12//! - False positives are surface area. Each pattern is documented
13//!   with the payload class it targets so operators can mute the
14//!   ones they don't want.
15//!
16//! Returned values are pattern *labels*, not the payload itself.
17//! Operators correlate against the SQL excerpt in the parent event.
18
19/// Scan `sql` and return the labels of every pattern that matched.
20/// Empty vec = clean.
21pub fn scan(sql: &str) -> Vec<String> {
22    let mut hits = Vec::new();
23    let lower = sql.to_lowercase();
24
25    if matches_classic_or(&lower) {
26        hits.push("classic_or_payload".into());
27    }
28    if matches_union_select(&lower) {
29        hits.push("union_select".into());
30    }
31    if matches_comment_escape(&lower) {
32        hits.push("comment_escape".into());
33    }
34    if matches_stacked_queries(sql) {
35        hits.push("stacked_queries".into());
36    }
37    if matches_time_based(&lower) {
38        hits.push("time_based_blind".into());
39    }
40    if matches_information_schema_probe(&lower) {
41        hits.push("information_schema_probe".into());
42    }
43
44    hits
45}
46
47/// `OR 1=1`, `OR '1'='1'`, `OR true` — the canonical authentication
48/// bypass payload. Triggers on tautologies that would never appear
49/// in a legitimate query a parameterised driver builds.
50fn matches_classic_or(lower: &str) -> bool {
51    // Match OR <token> = <same token> with optional quotes.
52    // Looking for: " or 1=1", " or '1'='1'", " or \"a\"=\"a\"", " or true"
53    let needles = [
54        " or 1=1", " or 1 = 1",
55        " or '1'='1'", " or '1' = '1'",
56        " or true", " or true--", " or true#",
57        "' or '1'='1", "\" or \"1\"=\"1",
58    ];
59    needles.iter().any(|n| lower.contains(n))
60}
61
62/// `UNION SELECT` payloads — extracts data from arbitrary tables by
63/// stitching another SELECT onto the targeted query.
64fn matches_union_select(lower: &str) -> bool {
65    // " union select" with whitespace is the tell. Variations
66    // include "union all select" and "union%20select" (URL-encoded
67    // sometimes makes it through unescaped).
68    lower.contains(" union select")
69        || lower.contains(" union all select")
70        || lower.contains("/*!union*/")
71        || lower.contains("'union select")
72}
73
74/// Comment escape — closes a string + comments out the rest of the
75/// query so the injected payload runs alone. `'--`, `/*` followed
76/// by no matching `*/` near the end, and `#` (MySQL-style) all
77/// count.
78fn matches_comment_escape(lower: &str) -> bool {
79    // `'--` or `' --` or `';--` near a quote; `'#` (MySQL).
80    lower.contains("'--")
81        || lower.contains("' --")
82        || lower.contains("';--")
83        || lower.contains("\"--")
84        || lower.contains("\" --")
85        || lower.contains("\";--")
86        || lower.contains("'#")
87        || lower.contains("'/*")
88}
89
90/// Stacked queries — `;` separating multiple statements. PostgreSQL
91/// allows simple-query-protocol multi-statement, so this is high
92/// signal in untrusted contexts.
93///
94/// Heuristic: scan for any `;` followed by a SQL verb. We don't try
95/// to track string state because an injection's whole goal is to
96/// escape a string — by the time the payload runs, the original
97/// string context is already broken. False positives on string
98/// literals containing `;<VERB>` are rare in practice.
99fn matches_stacked_queries(sql: &str) -> bool {
100    // Strip trailing whitespace + a single trailing ';' (cosmetic).
101    let trimmed = sql.trim_end().trim_end_matches(';').trim();
102    let lower = trimmed.to_lowercase();
103    let verbs = [
104        "select ", "insert ", "update ", "delete ", "drop ", "create ",
105        "alter ", "truncate ", "grant ", "revoke ", "exec ", "execute ",
106        "begin ", "commit ", "rollback ", "set ", "with ",
107    ];
108    let mut idx = 0;
109    while let Some(off) = lower[idx..].find(';') {
110        let pos = idx + off;
111        let after = &lower[pos + 1..];
112        let after_trim = after.trim_start();
113        if verbs.iter().any(|v| after_trim.starts_with(v)) {
114            return true;
115        }
116        idx = pos + 1;
117        if idx >= lower.len() {
118            break;
119        }
120    }
121    false
122}
123
124/// Time-based blind injection — uses sleeps to extract data one bit
125/// at a time. Common payload prefixes: `pg_sleep`, `WAITFOR DELAY`,
126/// `SLEEP(`, `BENCHMARK(`.
127fn matches_time_based(lower: &str) -> bool {
128    lower.contains("pg_sleep(")
129        || lower.contains("waitfor delay")
130        || lower.contains("sleep(")
131        || lower.contains("benchmark(")
132}
133
134/// Schema enumeration — `information_schema.tables`, `pg_catalog.pg_tables`,
135/// commonly used after a UNION-based foothold to map the schema.
136fn matches_information_schema_probe(lower: &str) -> bool {
137    lower.contains("information_schema.tables")
138        || lower.contains("information_schema.columns")
139        || lower.contains("pg_catalog.pg_tables")
140        || lower.contains("pg_namespace")
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn classic_or_one_eq_one_caught() {
149        assert!(scan("SELECT * FROM users WHERE id = 1 OR 1=1").contains(&"classic_or_payload".to_string()));
150        assert!(scan("SELECT * FROM users WHERE id = 1 OR 1 = 1").contains(&"classic_or_payload".to_string()));
151        assert!(scan("SELECT * FROM users WHERE name = 'a' OR '1'='1'").contains(&"classic_or_payload".to_string()));
152        assert!(scan("SELECT * FROM users WHERE id = 1 OR TRUE").contains(&"classic_or_payload".to_string()));
153    }
154
155    #[test]
156    fn classic_or_legit_query_clean() {
157        // Legitimate disjunction across actual columns shouldn't fire.
158        assert!(!scan("SELECT * FROM users WHERE id = 1 OR id = 2").contains(&"classic_or_payload".to_string()));
159        assert!(!scan("SELECT * FROM logs WHERE level = 'error' OR level = 'warn'").contains(&"classic_or_payload".to_string()));
160    }
161
162    #[test]
163    fn union_select_caught() {
164        assert!(scan("' UNION SELECT NULL,NULL,NULL --").contains(&"union_select".to_string()));
165        assert!(scan("foo' UNION ALL SELECT username,password FROM users").contains(&"union_select".to_string()));
166    }
167
168    #[test]
169    fn union_legit_query_clean() {
170        assert!(!scan("SELECT id FROM users UNION SELECT id FROM admins").contains(&"union_select".to_string()) == false);
171        // Note: above is intentional — UNION across legit tables IS
172        // ambiguous from a pattern-matcher's view. We accept the
173        // false positive on " union select" since that's what the
174        // payload class is. Operators who union legitimately can
175        // mute the rule.
176    }
177
178    #[test]
179    fn comment_escape_caught() {
180        assert!(scan("foo' --").contains(&"comment_escape".to_string()));
181        assert!(scan("foo'-- and more SQL").contains(&"comment_escape".to_string()));
182        assert!(scan("foo';-- ").contains(&"comment_escape".to_string()));
183        assert!(scan("foo'#").contains(&"comment_escape".to_string()));
184    }
185
186    #[test]
187    fn stacked_queries_caught() {
188        assert!(scan("SELECT * FROM users; DROP TABLE logs;").contains(&"stacked_queries".to_string()));
189        assert!(scan("'); DELETE FROM users WHERE 1=1;--").contains(&"stacked_queries".to_string()));
190    }
191
192    #[test]
193    fn stacked_queries_ignores_trailing_semicolon() {
194        let r = scan("SELECT 1;");
195        assert!(!r.contains(&"stacked_queries".to_string()));
196    }
197
198    #[test]
199    fn stacked_queries_ignores_semicolon_in_string_literal() {
200        let r = scan("SELECT 'a;b' FROM dual");
201        assert!(!r.contains(&"stacked_queries".to_string()));
202    }
203
204    #[test]
205    fn time_based_blind_caught() {
206        assert!(scan("'; SELECT pg_sleep(5)--").contains(&"time_based_blind".to_string()));
207        assert!(scan("SELECT BENCHMARK(1000000, MD5('a'))").contains(&"time_based_blind".to_string()));
208    }
209
210    #[test]
211    fn information_schema_probe_caught() {
212        assert!(scan("' UNION SELECT table_name FROM information_schema.tables --")
213            .contains(&"information_schema_probe".to_string()));
214        assert!(scan("SELECT * FROM pg_catalog.pg_tables").contains(&"information_schema_probe".to_string()));
215    }
216
217    #[test]
218    fn multiple_patterns_all_reported() {
219        // A single SQLi payload can match several patterns at once.
220        // Use a comment_escape-bearing variant: `';--` immediately
221        // after the closing quote.
222        let r = scan(
223            "foo' OR 1=1 UNION SELECT 1,2,3 FROM information_schema.tables';--",
224        );
225        assert!(r.contains(&"classic_or_payload".to_string()), "missing classic_or in {:?}", r);
226        assert!(r.contains(&"union_select".to_string()), "missing union_select in {:?}", r);
227        assert!(r.contains(&"comment_escape".to_string()), "missing comment_escape in {:?}", r);
228        assert!(r.contains(&"information_schema_probe".to_string()), "missing schema probe in {:?}", r);
229    }
230
231    #[test]
232    fn benign_query_clean() {
233        let r = scan("SELECT id, name FROM users WHERE id = $1 LIMIT 10");
234        assert!(r.is_empty(), "got false positives: {:?}", r);
235    }
236}