1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
//! Regression tests pinning the four SQL-injection vectors audited in
//! issue #95. Each test asserts a structural property of the existing
//! design — they fail loudly if a future refactor introduces a SQL
//! string round-trip that would defeat the parser hardening.
//!
//! Companion document: `docs/security/sql-injection-audit.md`.
//!
//! These tests are pure parser / AST exercises — no runtime, no I/O —
//! so they add zero latency to the SELECT hot path and keep CI fast.
#![allow(clippy::needless_borrow)]
use reddb_server::storage::query::ast::{
CompareOp, CreatePolicyQuery, Expr, Filter, QueryExpr, TableQuery,
};
use reddb_server::storage::query::modes::parse_multi;
use reddb_server::storage::query::parser;
use reddb_server::storage::query::planner::shape::{
bind_parameterized_query, parameterize_query_expr,
};
use reddb_server::storage::schema::Value;
/// Walk a `TableQuery` to find the WHERE expression, regardless of
/// whether the parser emitted the legacy `filter` slot or the new
/// `where_expr` slot.
fn extract_where_expr(tq: &TableQuery) -> Expr {
if let Some(expr) = &tq.where_expr {
return expr.clone();
}
panic!("table query has no where_expr after binding");
}
/// V1 — a bound string parameter is treated as a literal `Value::String`,
/// never re-tokenised. A SQL-injection-shaped payload survives binding
/// as an opaque string in `Expr::Literal { value: Value::String(...) }`.
#[test]
fn prepared_bound_string_is_treated_as_literal_not_sql() {
// Parse a parameterisable SELECT shape. The literal `'placeholder'`
// gets lifted to a `Parameter` slot during shape extraction.
let parsed =
parse_multi("SELECT * FROM users WHERE name = 'placeholder'").expect("parse base query");
let parameterised = parameterize_query_expr(&parsed).expect("query is parameterisable");
assert_eq!(
parameterised.parameter_count, 1,
"exactly one literal should have been parameterised"
);
// Bind the classic injection payload as a typed string Value.
let payload = "'; DROP TABLE users; --";
let bound = bind_parameterized_query(
¶meterised.shape,
&[Value::text(std::sync::Arc::<str>::from(payload))],
parameterised.parameter_count,
)
.expect("bind succeeds");
let table = match bound {
QueryExpr::Table(t) => t,
other => panic!("expected Table, got {:?}", other),
};
let where_expr = extract_where_expr(&table);
let (lhs, rhs) = match where_expr {
Expr::BinaryOp { lhs, rhs, .. } => (lhs, rhs),
other => panic!("expected BinaryOp, got {:?}", other),
};
assert!(
matches!(*lhs, Expr::Column { .. }),
"lhs should be a column reference"
);
let bound_value = match *rhs {
Expr::Literal { value, .. } => value,
other => panic!("rhs should be a Literal post-bind, got {:?}", other),
};
// Crucial: the entire injection payload survives as a single
// String value — no re-tokenisation, no comment truncation.
let s = match &bound_value {
Value::Text(s) => s.as_ref(),
other => panic!("bound value should be String, got {:?}", other),
};
assert_eq!(s, payload, "bound payload must round-trip byte-for-byte");
assert!(
s.contains("DROP TABLE"),
"the injection text is preserved as opaque string content"
);
}
/// V2 — a quoted string in identifier position fails at parse time. The
/// lexer emits `Token::String` (not `Token::Ident`), so `expect_ident`
/// in `parse_create_table_query` rejects the input before any engine
/// sees it.
#[test]
fn identifier_with_sql_metacharacters_is_rejected_at_parse() {
// Double-quoted form: lexer routes to scan_string → Token::String.
let result = parser::parse(r#"CREATE TABLE "users; DROP TABLE x" (id INT)"#);
let err = result.expect_err("must be a parse error");
let msg = err.to_string().to_lowercase();
assert!(
msg.contains("ident") || msg.contains("identifier") || msg.contains("expected"),
"error must point at the identifier rule, got: {}",
err
);
// Single-quoted form: same path.
let result = parser::parse("CREATE TABLE 'evil; DROP TABLE x' (id INT)");
assert!(
result.is_err(),
"single-quoted identifier must also be rejected"
);
// Sanity: a normal table name still parses.
let ok = parser::parse("CREATE TABLE legit_users (id INT)");
assert!(ok.is_ok(), "non-injection identifier must parse: {:?}", ok);
}
/// V3 — a CREATE POLICY USING (...) body whose string-literal value
/// contains comment metacharacters (`'; --`) lands as a single
/// `Filter::Compare { value: Value::String(...) }` AST node, not as a
/// truncated SQL fragment. The body is never serialised back to SQL.
#[test]
fn rls_policy_body_with_comment_metacharacters_parses_as_literal() {
// The body matches `tenant = '; -- '` — the inner string-literal
// is the SQL-escape of `'; -- ` (single-quote escaped via `''`).
let sql = "CREATE POLICY p ON users FOR SELECT TO reader USING (tenant = '''; -- ')";
let parsed = parse_multi(sql).expect("parse CREATE POLICY");
let policy: CreatePolicyQuery = match parsed {
QueryExpr::CreatePolicy(p) => p,
other => panic!("expected CreatePolicy, got {:?}", other),
};
let filter: Filter = *policy.using;
let value = match filter {
Filter::Compare {
field: _,
op: CompareOp::Eq,
value,
} => value,
other => panic!("expected Filter::Compare, got {:?}", other),
};
let s = match &value {
Value::Text(s) => s.as_ref(),
other => panic!("policy value should be String, got {:?}", other),
};
assert_eq!(
s, "'; -- ",
"literal must preserve injection-shaped bytes verbatim"
);
// The crucial structural assertion: `Filter` has no `Display` impl
// that renders to SQL, so the body cannot be re-parsed downstream.
// We pin this by asserting the name and table are preserved as
// identifiers (no smuggling), and the filter is a single Compare
// node — not a comment-truncated parse fragment that produced a
// simpler AST than expected.
assert_eq!(policy.name, "p");
assert_eq!(policy.table, "users");
}
/// V4 — `ASK '...'` parses into an `AskQuery` AST node whose only
/// free-text field is the `question` string. The runtime executes ASK
/// as RAG (search context + LLM Q&A), returning text in a result
/// column — the LLM output is never re-parsed as SQL. This test pins
/// the AST shape: any future refactor that introduces a `synthesised_sql`
/// or similar field will need to revisit this audit.
#[test]
fn ask_path_does_not_re_execute_llm_output_as_sql() {
let parsed = parse_multi("ASK 'who owns the users table'").expect("parse ASK");
let ask = match parsed {
QueryExpr::Ask(a) => a,
other => panic!("expected Ask, got {:?}", other),
};
assert_eq!(ask.question, "who owns the users table");
// Pin the AskQuery struct shape — these are the only fields the
// runtime feeds to `execute_ask`. There is no field that holds
// synthesised SQL or that flows back into the parser.
let _ = (
&ask.question,
&ask.collection,
&ask.depth,
&ask.limit,
&ask.min_score,
&ask.provider,
&ask.model,
);
// Sanity: an injection-shaped question string is just text.
let parsed = parse_multi("ASK '''; DROP TABLE users; --'").expect("parse ASK with payload");
let ask = match parsed {
QueryExpr::Ask(a) => a,
other => panic!("expected Ask, got {:?}", other),
};
assert_eq!(
ask.question, "'; DROP TABLE users; --",
"question text round-trips verbatim, no SQL parse on the payload"
);
}