ferrule_core/redact.rs
1//! Inline-secret redaction for SQL bodies (#49).
2//!
3//! The history store and the slow-log tee persist the SQL text of every
4//! ferrule invocation. The connection URL is already scrubbed at the
5//! capture site (`DatabaseUrl::redacted` blanks the password component),
6//! but secrets can also live *inside* the SQL itself —
7//! `CREATE ROLE x PASSWORD '...'`, `ALTER USER ... IDENTIFIED BY '...'`,
8//! or a connection-string literal embedded in a function call. This
9//! module's [`redact_sql`] blanks those before the SQL reaches storage.
10//!
11//! **Conservative, string-based, no SQL parser.** The same pragmatic
12//! stance as the dump-determinism ORDER BY check: we scan for the common
13//! secret idioms and replace the literal with `***`, accepting that a
14//! vendor-specific or obfuscated idiom can slip through. The high-
15//! frequency leak — a password in a connection URL — is fully handled
16//! both here and by the URL-redaction path at the capture site, so the
17//! residual false-negative surface is the long tail of DDL phrasings.
18//!
19//! **Known false-negatives (documented contract, not a bug):**
20//! - secrets passed as bound parameters that some tool inlined into the
21//! SQL with non-standard quoting;
22//! - vendor extensions that name the secret with a keyword this scanner
23//! does not recognise;
24//! - a `PASSWORD` / `IDENTIFIED BY` idiom that appears *inside* an outer
25//! SQL string literal: without tracking the enclosing literal's
26//! context, the scanner cannot tell the doubled quote that opens the
27//! inner secret from a real literal close. The common real case is a
28//! standalone DDL statement, which redacts correctly; a secret
29//! quoted-inside-a-quote is the contrived tail;
30//! - a password literal that uses a backslash escape (`'a\'b'`) rather
31//! than SQL's standard doubled-quote escape (`'a''b'`): the scanner
32//! honours the doubled-quote form (so `'O''Brien'` is fully
33//! redacted) but treats a backslash-escaped quote as the literal
34//! close, redacting only the leading fragment. Backslash-escaped
35//! string literals are non-standard SQL and rare in DDL secrets.
36//!
37//! The function never panics and never allocates beyond the rebuilt
38//! string, so it is safe to call on every recorded statement.
39
40/// Redact inline secrets from a SQL body, returning a scrubbed copy.
41///
42/// Replaces the secret literal in these idioms with `***`, preserving
43/// the surrounding SQL structure:
44/// - `PASSWORD '<lit>'` / `PASSWORD "<lit>"` (Postgres role/user DDL)
45/// - `IDENTIFIED BY '<lit>'` / `IDENTIFIED BY "<lit>"` and
46/// `IDENTIFIED BY <bareword>` (Oracle / MySQL)
47/// - `IDENTIFIED BY PASSWORD '<lit>'` (MySQL hash form)
48/// - any embedded `scheme://user:<pass>@host` connection-URL literal
49///
50/// Keyword matching is case-insensitive. A SQL body with no recognised
51/// secret idiom is returned byte-identical to the input.
52#[must_use]
53pub fn redact_sql(sql: &str) -> String {
54 // Pass 1: keyword-anchored secrets (PASSWORD / IDENTIFIED BY).
55 let stage1 = redact_keyword_secrets(sql);
56 // Pass 2: embedded connection-URL passwords (scheme://user:pass@host).
57 redact_url_passwords(&stage1)
58}
59
60/// Scan for `PASSWORD` and `IDENTIFIED BY` keyword anchors and blank the
61/// secret that follows each.
62fn redact_keyword_secrets(sql: &str) -> String {
63 let chars: Vec<char> = sql.chars().collect();
64 let lower: Vec<char> = sql.to_lowercase().chars().collect();
65 // `to_lowercase()` can change the char count for some Unicode inputs;
66 // when the two views disagree, fall back to the original untouched
67 // rather than risk indexing past either buffer.
68 if lower.len() != chars.len() {
69 return sql.to_string();
70 }
71
72 let mut out = String::with_capacity(sql.len());
73 let mut i = 0usize;
74 while i < chars.len() {
75 if let Some(after_kw) = match_keyword(&lower, i, "identified by") {
76 // Copy the keyword verbatim, then handle an optional trailing
77 // `password` keyword (MySQL hash form) before the literal.
78 push_range(&mut out, &chars, i, after_kw);
79 let mut j = skip_ws(&chars, after_kw, &mut out);
80 if let Some(after_pw) = match_keyword(&lower, j, "password") {
81 push_range(&mut out, &chars, j, after_pw);
82 j = skip_ws(&chars, after_pw, &mut out);
83 }
84 i = redact_secret_operand(&chars, j, &mut out);
85 continue;
86 }
87 if let Some(after_kw) = match_keyword(&lower, i, "password") {
88 push_range(&mut out, &chars, i, after_kw);
89 let j = skip_ws(&chars, after_kw, &mut out);
90 i = redact_secret_operand(&chars, j, &mut out);
91 continue;
92 }
93 out.push(chars[i]);
94 i += 1;
95 }
96 out
97}
98
99/// Match `keyword` (already lowercase) at position `i` in `lower`,
100/// requiring a word boundary before and after so `PASSWORDLESS` or
101/// `MYPASSWORD` do not match. Returns the index just past the keyword on
102/// success.
103fn match_keyword(lower: &[char], i: usize, keyword: &str) -> Option<usize> {
104 let kw: Vec<char> = keyword.chars().collect();
105 // The keyword may contain internal whitespace ("identified by"); match
106 // it token-by-token so any run of whitespace between tokens is allowed.
107 if !is_word_boundary_before(lower, i) {
108 return None;
109 }
110 let mut li = i;
111 let mut ki = 0usize;
112 while ki < kw.len() {
113 if kw[ki] == ' ' {
114 // Require at least one whitespace char here; consume the run.
115 if li >= lower.len() || !lower[li].is_whitespace() {
116 return None;
117 }
118 while li < lower.len() && lower[li].is_whitespace() {
119 li += 1;
120 }
121 ki += 1;
122 continue;
123 }
124 if li >= lower.len() || lower[li] != kw[ki] {
125 return None;
126 }
127 li += 1;
128 ki += 1;
129 }
130 if is_word_boundary_after(lower, li) {
131 Some(li)
132 } else {
133 None
134 }
135}
136
137/// A keyword may start the string or follow a non-identifier char.
138fn is_word_boundary_before(lower: &[char], i: usize) -> bool {
139 if i == 0 {
140 return true;
141 }
142 !is_ident_char(lower[i - 1])
143}
144
145/// A keyword must be followed by end-of-string or a non-identifier char.
146fn is_word_boundary_after(lower: &[char], i: usize) -> bool {
147 if i >= lower.len() {
148 return true;
149 }
150 !is_ident_char(lower[i])
151}
152
153fn is_ident_char(c: char) -> bool {
154 c.is_alphanumeric() || c == '_'
155}
156
157/// Append `chars[start..end]` to `out`.
158fn push_range(out: &mut String, chars: &[char], start: usize, end: usize) {
159 for &c in &chars[start..end] {
160 out.push(c);
161 }
162}
163
164/// Copy whitespace from `chars` starting at `i` into `out`, returning the
165/// index of the first non-whitespace char (or end-of-input).
166fn skip_ws(chars: &[char], mut i: usize, out: &mut String) -> usize {
167 while i < chars.len() && chars[i].is_whitespace() {
168 out.push(chars[i]);
169 i += 1;
170 }
171 i
172}
173
174/// Redact the secret operand that begins at `i`: a quoted literal
175/// (`'...'` or `"..."`) or a bareword (run of identifier chars). Pushes
176/// the redacted form to `out` and returns the index just past the
177/// operand. If `i` does not point at a plausible operand, nothing is
178/// redacted and `i` is returned unchanged.
179fn redact_secret_operand(chars: &[char], mut i: usize, out: &mut String) -> usize {
180 // MSSQL spells the password assignment with an `=` separator
181 // (`CREATE LOGIN x WITH PASSWORD = '...'`, `ALTER LOGIN x WITH PASSWORD
182 // = '...'`). Consume an optional `=` plus the whitespace around it so
183 // the operand scan below lands on the literal/bareword. Other dialects
184 // (`IDENTIFIED BY '...'`, PG `PASSWORD '...'`) have no `=` and are
185 // unaffected.
186 if i < chars.len() && chars[i] == '=' {
187 out.push('=');
188 i = skip_ws(chars, i + 1, out);
189 }
190 if i >= chars.len() {
191 return i;
192 }
193 let c = chars[i];
194 if c == '\'' || c == '"' {
195 // Quoted literal: emit an empty literal of the same quote kind.
196 let quote = c;
197 out.push(quote);
198 out.push('*');
199 out.push('*');
200 out.push('*');
201 out.push(quote);
202 // Advance past the original literal: opening quote, body, closing
203 // quote. SQL escapes an embedded quote by doubling it (`''` /
204 // `""`), so a quote immediately followed by the same quote is an
205 // escape pair *inside* the literal, not the close -- skip both and
206 // keep scanning. This keeps a secret like `'p''wd'` fully inside
207 // the redacted span rather than leaking the tail after the escape.
208 let mut j = i + 1;
209 while j < chars.len() {
210 if chars[j] == quote {
211 if j + 1 < chars.len() && chars[j + 1] == quote {
212 j += 2; // doubled-quote escape: stay inside the literal
213 continue;
214 }
215 j += 1; // consume the real closing quote
216 break;
217 }
218 j += 1;
219 }
220 j
221 } else if is_ident_char(c) {
222 // Bareword (e.g. Oracle `IDENTIFIED BY tiger`): blank it.
223 out.push_str("***");
224 let mut j = i;
225 while j < chars.len() && is_ident_char(chars[j]) {
226 j += 1;
227 }
228 j
229 } else {
230 // Not an operand we recognise — leave it for the outer loop.
231 i
232 }
233}
234
235/// Replace the password span of any embedded `scheme://user:pass@host`
236/// URL with `***`, matching the structure (not the exact behaviour, since
237/// this runs on free text) of [`ferrule_sql::DatabaseUrl::redacted`].
238fn redact_url_passwords(sql: &str) -> String {
239 // Find each `://`, then look for the credential separator `:` and the
240 // host separator `@` before the authority ends. Replace the
241 // password between them.
242 let chars: Vec<char> = sql.chars().collect();
243 let mut out = String::with_capacity(sql.len());
244 let mut i = 0usize;
245 while i < chars.len() {
246 if starts_with_at(&chars, i, "://") {
247 out.push_str("://");
248 let auth_start = i + 3;
249 // The authority ends at the first '/', '?', '#', or
250 // whitespace, or end-of-string.
251 let mut end = auth_start;
252 while end < chars.len()
253 && !matches!(chars[end], '/' | '?' | '#')
254 && !chars[end].is_whitespace()
255 {
256 end += 1;
257 }
258 // Within [auth_start, end): find userinfo `user:pass@host`.
259 let at = (auth_start..end).find(|&k| chars[k] == '@');
260 if let Some(at) = at {
261 let colon = (auth_start..at).find(|&k| chars[k] == ':');
262 if let Some(colon) = colon {
263 // Emit user, ':', '***', then the rest from '@'.
264 push_range(&mut out, &chars, auth_start, colon + 1);
265 out.push_str("***");
266 push_range(&mut out, &chars, at, end);
267 i = end;
268 continue;
269 }
270 }
271 // No `user:pass@` shape — copy the authority verbatim.
272 push_range(&mut out, &chars, auth_start, end);
273 i = end;
274 continue;
275 }
276 out.push(chars[i]);
277 i += 1;
278 }
279 out
280}
281
282/// `chars[i..].starts_with(needle)` over a char slice.
283fn starts_with_at(chars: &[char], i: usize, needle: &str) -> bool {
284 let n: Vec<char> = needle.chars().collect();
285 if i + n.len() > chars.len() {
286 return false;
287 }
288 chars[i..i + n.len()] == n[..]
289}
290
291#[cfg(test)]
292mod tests {
293 use super::*;
294
295 #[test]
296 fn redacts_postgres_password_single_quote() {
297 let out = redact_sql("CREATE ROLE bob PASSWORD 'hunter2'");
298 assert!(!out.contains("hunter2"), "secret leaked: {out}");
299 assert!(out.contains("***"), "no redaction marker: {out}");
300 assert_eq!(out, "CREATE ROLE bob PASSWORD '***'");
301 }
302
303 #[test]
304 fn redacts_password_double_quote() {
305 let out = redact_sql(r#"ALTER ROLE x PASSWORD "s3cret""#);
306 assert!(!out.contains("s3cret"));
307 assert_eq!(out, r#"ALTER ROLE x PASSWORD "***""#);
308 }
309
310 #[test]
311 fn redacts_oracle_identified_by_bareword() {
312 let out = redact_sql("ALTER USER scott IDENTIFIED BY tiger");
313 assert!(!out.contains("tiger"), "secret leaked: {out}");
314 assert_eq!(out, "ALTER USER scott IDENTIFIED BY ***");
315 }
316
317 #[test]
318 fn redacts_identified_by_quoted() {
319 let out = redact_sql("CREATE USER u IDENTIFIED BY 'p@ss'");
320 assert!(!out.contains("p@ss"));
321 assert_eq!(out, "CREATE USER u IDENTIFIED BY '***'");
322 }
323
324 // The closing-quote scan honours SQL's doubled-quote escape, so a
325 // secret containing an escaped quote stays fully inside the redacted
326 // span (no tail leak after the escape pair).
327 #[test]
328 fn redacts_password_with_doubled_quote_escape() {
329 let out = redact_sql("CREATE ROLE bob PASSWORD 'hun''ter2'");
330 assert!(!out.contains("hun"), "secret head leaked: {out}");
331 assert!(
332 !out.contains("ter2"),
333 "secret tail leaked past escape: {out}"
334 );
335 assert_eq!(out, "CREATE ROLE bob PASSWORD '***'");
336 }
337
338 // MSSQL's canonical login/user DDL uses `WITH PASSWORD = '...'`; the
339 // `=` separator must not defeat redaction.
340 #[test]
341 fn redacts_mssql_with_password_equals() {
342 let out = redact_sql("CREATE LOGIN foo WITH PASSWORD = 'secret'");
343 assert!(!out.contains("secret"), "secret leaked: {out}");
344 assert_eq!(out, "CREATE LOGIN foo WITH PASSWORD = '***'");
345
346 let alter = redact_sql("ALTER LOGIN foo WITH PASSWORD = 'h@x'");
347 assert!(!alter.contains("h@x"), "secret leaked: {alter}");
348 assert_eq!(alter, "ALTER LOGIN foo WITH PASSWORD = '***'");
349
350 // No spaces around `=`, and a bareword operand.
351 let tight = redact_sql("CREATE USER u WITH PASSWORD=tiger");
352 assert!(!tight.contains("tiger"), "secret leaked: {tight}");
353 assert_eq!(tight, "CREATE USER u WITH PASSWORD=***");
354 }
355
356 #[test]
357 fn redacts_mysql_identified_by_password_hash() {
358 let out = redact_sql("ALTER USER u IDENTIFIED BY PASSWORD '*ABCDEF0123'");
359 assert!(!out.contains("ABCDEF0123"), "hash leaked: {out}");
360 assert_eq!(out, "ALTER USER u IDENTIFIED BY PASSWORD '***'");
361 }
362
363 #[test]
364 fn redacts_embedded_connection_url_password() {
365 let out = redact_sql("SELECT dblink('postgres://u:secret@h/db')");
366 assert!(!out.contains("secret"), "url password leaked: {out}");
367 assert!(
368 out.contains("postgres://u:***@h/db"),
369 "url not redacted: {out}"
370 );
371 }
372
373 #[test]
374 fn url_without_password_is_unchanged_authority() {
375 // `user@host` (no `:pass`) must not be mangled.
376 let out = redact_sql("SELECT dblink('postgres://u@h/db')");
377 assert_eq!(out, "SELECT dblink('postgres://u@h/db')");
378 }
379
380 #[test]
381 fn keyword_inside_identifier_is_not_redacted() {
382 // `password_reset` is a column, not the PASSWORD keyword.
383 let out = redact_sql("SELECT password_reset FROM users");
384 assert_eq!(out, "SELECT password_reset FROM users");
385 // A column literally named with the keyword as a prefix.
386 let out2 = redact_sql("SELECT mypassword FROM t");
387 assert_eq!(out2, "SELECT mypassword FROM t");
388 }
389
390 #[test]
391 fn case_insensitive_keyword_match() {
392 let out = redact_sql("create role bob password 'hunter2'");
393 assert!(!out.contains("hunter2"));
394 let out2 = redact_sql("ALTER user x identified by SECRETWORD");
395 assert!(!out2.contains("SECRETWORD"), "secret leaked: {out2}");
396 }
397
398 #[test]
399 fn plain_select_passes_through_byte_identical() {
400 let sql = "SELECT id, name FROM users WHERE age > 30 ORDER BY id";
401 assert_eq!(redact_sql(sql), sql);
402 }
403
404 #[test]
405 fn empty_input_is_empty() {
406 assert_eq!(redact_sql(""), "");
407 }
408
409 // Documented false-negative boundary: a non-standard idiom that names
410 // the secret without a recognised keyword is NOT redacted. The test
411 // encodes the contract honestly rather than overclaiming exhaustive
412 // coverage.
413 #[test]
414 fn documented_false_negative_unknown_idiom_passes_through() {
415 // No PASSWORD / IDENTIFIED BY / URL shape -> not redacted.
416 let sql = "EXEC set_secret @value = 'topsecret'";
417 let out = redact_sql(sql);
418 assert_eq!(
419 out, sql,
420 "unknown idiom is a documented false-negative, must pass through unchanged"
421 );
422 }
423
424 // Documented boundary: a secret nested inside an *outer* SQL string
425 // literal cannot be cleanly redacted by a context-free scanner. The
426 // common standalone-DDL case (covered above) works; this encodes the
427 // contrived nested case as a known limitation rather than overclaiming
428 // exhaustive coverage. (The standalone statement
429 // `CREATE ROLE bob PASSWORD '...'` IS redacted -- see
430 // `redacts_postgres_password_single_quote`.)
431 #[test]
432 fn documented_false_negative_secret_nested_in_outer_literal() {
433 let sql = "SELECT 'CREATE ROLE bob PASSWORD ''hunter2''' AS note";
434 let out = redact_sql(sql);
435 // The scanner attempts a redaction at the inner PASSWORD, but the
436 // outer literal's escaped quotes defeat clean boundary detection,
437 // so this is NOT guaranteed secret-free. Asserting the current
438 // behaviour documents the contract honestly.
439 assert!(
440 out.contains("***"),
441 "scanner still emits a redaction marker: {out}"
442 );
443 }
444}