Skip to main content

ferrule_core/
redact.rs

1//! Inline-secret redaction for SQL bodies (#49).
2//!
3//! The history store and the slow-log tee persist the SQL text of every
4//! ferrule invocation. The connection URL is already scrubbed at the
5//! capture site (`DatabaseUrl::redacted` blanks the password component),
6//! but secrets can also live *inside* the SQL itself —
7//! `CREATE ROLE x PASSWORD '...'`, `ALTER USER ... IDENTIFIED BY '...'`,
8//! or a connection-string literal embedded in a function call. This
9//! module's [`redact_sql`] blanks those before the SQL reaches storage.
10//!
11//! **Conservative, string-based, no SQL parser.** The same pragmatic
12//! stance as the dump-determinism ORDER BY check: we scan for the common
13//! secret idioms and replace the literal with `***`, accepting that a
14//! vendor-specific or obfuscated idiom can slip through. The high-
15//! frequency leak — a password in a connection URL — is fully handled
16//! both here and by the URL-redaction path at the capture site, so the
17//! residual false-negative surface is the long tail of DDL phrasings.
18//!
19//! **Known false-negatives (documented contract, not a bug):**
20//!   - secrets passed as bound parameters that some tool inlined into the
21//!     SQL with non-standard quoting;
22//!   - vendor extensions that name the secret with a keyword this scanner
23//!     does not recognise;
24//!   - a `PASSWORD` / `IDENTIFIED BY` idiom that appears *inside* an outer
25//!     SQL string literal: without tracking the enclosing literal's
26//!     context, the scanner cannot tell the doubled quote that opens the
27//!     inner secret from a real literal close. The common real case is a
28//!     standalone DDL statement, which redacts correctly; a secret
29//!     quoted-inside-a-quote is the contrived tail;
30//!   - a password literal that uses a backslash escape (`'a\'b'`) rather
31//!     than SQL's standard doubled-quote escape (`'a''b'`): the scanner
32//!     honours the doubled-quote form (so `'O''Brien'` is fully
33//!     redacted) but treats a backslash-escaped quote as the literal
34//!     close, redacting only the leading fragment. Backslash-escaped
35//!     string literals are non-standard SQL and rare in DDL secrets.
36//!
37//! The function never panics and never allocates beyond the rebuilt
38//! string, so it is safe to call on every recorded statement.
39
40/// Redact inline secrets from a SQL body, returning a scrubbed copy.
41///
42/// Replaces the secret literal in these idioms with `***`, preserving
43/// the surrounding SQL structure:
44///   - `PASSWORD '<lit>'` / `PASSWORD "<lit>"` (Postgres role/user DDL)
45///   - `IDENTIFIED BY '<lit>'` / `IDENTIFIED BY "<lit>"` and
46///     `IDENTIFIED BY <bareword>` (Oracle / MySQL)
47///   - `IDENTIFIED BY PASSWORD '<lit>'` (MySQL hash form)
48///   - any embedded `scheme://user:<pass>@host` connection-URL literal
49///
50/// Keyword matching is case-insensitive. A SQL body with no recognised
51/// secret idiom is returned byte-identical to the input.
52#[must_use]
53pub fn redact_sql(sql: &str) -> String {
54    // Pass 1: keyword-anchored secrets (PASSWORD / IDENTIFIED BY).
55    let stage1 = redact_keyword_secrets(sql);
56    // Pass 2: embedded connection-URL passwords (scheme://user:pass@host).
57    redact_url_passwords(&stage1)
58}
59
60/// Scan for `PASSWORD` and `IDENTIFIED BY` keyword anchors and blank the
61/// secret that follows each.
62fn redact_keyword_secrets(sql: &str) -> String {
63    let chars: Vec<char> = sql.chars().collect();
64    let lower: Vec<char> = sql.to_lowercase().chars().collect();
65    // `to_lowercase()` can change the char count for some Unicode inputs;
66    // when the two views disagree, fall back to the original untouched
67    // rather than risk indexing past either buffer.
68    if lower.len() != chars.len() {
69        return sql.to_string();
70    }
71
72    let mut out = String::with_capacity(sql.len());
73    let mut i = 0usize;
74    while i < chars.len() {
75        if let Some(after_kw) = match_keyword(&lower, i, "identified by") {
76            // Copy the keyword verbatim, then handle an optional trailing
77            // `password` keyword (MySQL hash form) before the literal.
78            push_range(&mut out, &chars, i, after_kw);
79            let mut j = skip_ws(&chars, after_kw, &mut out);
80            if let Some(after_pw) = match_keyword(&lower, j, "password") {
81                push_range(&mut out, &chars, j, after_pw);
82                j = skip_ws(&chars, after_pw, &mut out);
83            }
84            i = redact_secret_operand(&chars, j, &mut out);
85            continue;
86        }
87        if let Some(after_kw) = match_keyword(&lower, i, "password") {
88            push_range(&mut out, &chars, i, after_kw);
89            let j = skip_ws(&chars, after_kw, &mut out);
90            i = redact_secret_operand(&chars, j, &mut out);
91            continue;
92        }
93        out.push(chars[i]);
94        i += 1;
95    }
96    out
97}
98
99/// Match `keyword` (already lowercase) at position `i` in `lower`,
100/// requiring a word boundary before and after so `PASSWORDLESS` or
101/// `MYPASSWORD` do not match. Returns the index just past the keyword on
102/// success.
103fn match_keyword(lower: &[char], i: usize, keyword: &str) -> Option<usize> {
104    let kw: Vec<char> = keyword.chars().collect();
105    // The keyword may contain internal whitespace ("identified by"); match
106    // it token-by-token so any run of whitespace between tokens is allowed.
107    if !is_word_boundary_before(lower, i) {
108        return None;
109    }
110    let mut li = i;
111    let mut ki = 0usize;
112    while ki < kw.len() {
113        if kw[ki] == ' ' {
114            // Require at least one whitespace char here; consume the run.
115            if li >= lower.len() || !lower[li].is_whitespace() {
116                return None;
117            }
118            while li < lower.len() && lower[li].is_whitespace() {
119                li += 1;
120            }
121            ki += 1;
122            continue;
123        }
124        if li >= lower.len() || lower[li] != kw[ki] {
125            return None;
126        }
127        li += 1;
128        ki += 1;
129    }
130    if is_word_boundary_after(lower, li) {
131        Some(li)
132    } else {
133        None
134    }
135}
136
137/// A keyword may start the string or follow a non-identifier char.
138fn is_word_boundary_before(lower: &[char], i: usize) -> bool {
139    if i == 0 {
140        return true;
141    }
142    !is_ident_char(lower[i - 1])
143}
144
145/// A keyword must be followed by end-of-string or a non-identifier char.
146fn is_word_boundary_after(lower: &[char], i: usize) -> bool {
147    if i >= lower.len() {
148        return true;
149    }
150    !is_ident_char(lower[i])
151}
152
153fn is_ident_char(c: char) -> bool {
154    c.is_alphanumeric() || c == '_'
155}
156
157/// Append `chars[start..end]` to `out`.
158fn push_range(out: &mut String, chars: &[char], start: usize, end: usize) {
159    for &c in &chars[start..end] {
160        out.push(c);
161    }
162}
163
164/// Copy whitespace from `chars` starting at `i` into `out`, returning the
165/// index of the first non-whitespace char (or end-of-input).
166fn skip_ws(chars: &[char], mut i: usize, out: &mut String) -> usize {
167    while i < chars.len() && chars[i].is_whitespace() {
168        out.push(chars[i]);
169        i += 1;
170    }
171    i
172}
173
174/// Redact the secret operand that begins at `i`: a quoted literal
175/// (`'...'` or `"..."`) or a bareword (run of identifier chars). Pushes
176/// the redacted form to `out` and returns the index just past the
177/// operand. If `i` does not point at a plausible operand, nothing is
178/// redacted and `i` is returned unchanged.
179fn redact_secret_operand(chars: &[char], mut i: usize, out: &mut String) -> usize {
180    // MSSQL spells the password assignment with an `=` separator
181    // (`CREATE LOGIN x WITH PASSWORD = '...'`, `ALTER LOGIN x WITH PASSWORD
182    // = '...'`). Consume an optional `=` plus the whitespace around it so
183    // the operand scan below lands on the literal/bareword. Other dialects
184    // (`IDENTIFIED BY '...'`, PG `PASSWORD '...'`) have no `=` and are
185    // unaffected.
186    if i < chars.len() && chars[i] == '=' {
187        out.push('=');
188        i = skip_ws(chars, i + 1, out);
189    }
190    if i >= chars.len() {
191        return i;
192    }
193    let c = chars[i];
194    if c == '\'' || c == '"' {
195        // Quoted literal: emit an empty literal of the same quote kind.
196        let quote = c;
197        out.push(quote);
198        out.push('*');
199        out.push('*');
200        out.push('*');
201        out.push(quote);
202        // Advance past the original literal: opening quote, body, closing
203        // quote. SQL escapes an embedded quote by doubling it (`''` /
204        // `""`), so a quote immediately followed by the same quote is an
205        // escape pair *inside* the literal, not the close -- skip both and
206        // keep scanning. This keeps a secret like `'p''wd'` fully inside
207        // the redacted span rather than leaking the tail after the escape.
208        let mut j = i + 1;
209        while j < chars.len() {
210            if chars[j] == quote {
211                if j + 1 < chars.len() && chars[j + 1] == quote {
212                    j += 2; // doubled-quote escape: stay inside the literal
213                    continue;
214                }
215                j += 1; // consume the real closing quote
216                break;
217            }
218            j += 1;
219        }
220        j
221    } else if is_ident_char(c) {
222        // Bareword (e.g. Oracle `IDENTIFIED BY tiger`): blank it.
223        out.push_str("***");
224        let mut j = i;
225        while j < chars.len() && is_ident_char(chars[j]) {
226            j += 1;
227        }
228        j
229    } else {
230        // Not an operand we recognise — leave it for the outer loop.
231        i
232    }
233}
234
235/// Replace the password span of any embedded `scheme://user:pass@host`
236/// URL with `***`, matching the structure (not the exact behaviour, since
237/// this runs on free text) of [`ferrule_sql::DatabaseUrl::redacted`].
238fn redact_url_passwords(sql: &str) -> String {
239    // Find each `://`, then look for the credential separator `:` and the
240    // host separator `@` before the authority ends. Replace the
241    // password between them.
242    let chars: Vec<char> = sql.chars().collect();
243    let mut out = String::with_capacity(sql.len());
244    let mut i = 0usize;
245    while i < chars.len() {
246        if starts_with_at(&chars, i, "://") {
247            out.push_str("://");
248            let auth_start = i + 3;
249            // The authority ends at the first '/', '?', '#', or
250            // whitespace, or end-of-string.
251            let mut end = auth_start;
252            while end < chars.len()
253                && !matches!(chars[end], '/' | '?' | '#')
254                && !chars[end].is_whitespace()
255            {
256                end += 1;
257            }
258            // Within [auth_start, end): find userinfo `user:pass@host`.
259            let at = (auth_start..end).find(|&k| chars[k] == '@');
260            if let Some(at) = at {
261                let colon = (auth_start..at).find(|&k| chars[k] == ':');
262                if let Some(colon) = colon {
263                    // Emit user, ':', '***', then the rest from '@'.
264                    push_range(&mut out, &chars, auth_start, colon + 1);
265                    out.push_str("***");
266                    push_range(&mut out, &chars, at, end);
267                    i = end;
268                    continue;
269                }
270            }
271            // No `user:pass@` shape — copy the authority verbatim.
272            push_range(&mut out, &chars, auth_start, end);
273            i = end;
274            continue;
275        }
276        out.push(chars[i]);
277        i += 1;
278    }
279    out
280}
281
282/// `chars[i..].starts_with(needle)` over a char slice.
283fn starts_with_at(chars: &[char], i: usize, needle: &str) -> bool {
284    let n: Vec<char> = needle.chars().collect();
285    if i + n.len() > chars.len() {
286        return false;
287    }
288    chars[i..i + n.len()] == n[..]
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294
295    #[test]
296    fn redacts_postgres_password_single_quote() {
297        let out = redact_sql("CREATE ROLE bob PASSWORD 'hunter2'");
298        assert!(!out.contains("hunter2"), "secret leaked: {out}");
299        assert!(out.contains("***"), "no redaction marker: {out}");
300        assert_eq!(out, "CREATE ROLE bob PASSWORD '***'");
301    }
302
303    #[test]
304    fn redacts_password_double_quote() {
305        let out = redact_sql(r#"ALTER ROLE x PASSWORD "s3cret""#);
306        assert!(!out.contains("s3cret"));
307        assert_eq!(out, r#"ALTER ROLE x PASSWORD "***""#);
308    }
309
310    #[test]
311    fn redacts_oracle_identified_by_bareword() {
312        let out = redact_sql("ALTER USER scott IDENTIFIED BY tiger");
313        assert!(!out.contains("tiger"), "secret leaked: {out}");
314        assert_eq!(out, "ALTER USER scott IDENTIFIED BY ***");
315    }
316
317    #[test]
318    fn redacts_identified_by_quoted() {
319        let out = redact_sql("CREATE USER u IDENTIFIED BY 'p@ss'");
320        assert!(!out.contains("p@ss"));
321        assert_eq!(out, "CREATE USER u IDENTIFIED BY '***'");
322    }
323
324    // The closing-quote scan honours SQL's doubled-quote escape, so a
325    // secret containing an escaped quote stays fully inside the redacted
326    // span (no tail leak after the escape pair).
327    #[test]
328    fn redacts_password_with_doubled_quote_escape() {
329        let out = redact_sql("CREATE ROLE bob PASSWORD 'hun''ter2'");
330        assert!(!out.contains("hun"), "secret head leaked: {out}");
331        assert!(
332            !out.contains("ter2"),
333            "secret tail leaked past escape: {out}"
334        );
335        assert_eq!(out, "CREATE ROLE bob PASSWORD '***'");
336    }
337
338    // MSSQL's canonical login/user DDL uses `WITH PASSWORD = '...'`; the
339    // `=` separator must not defeat redaction.
340    #[test]
341    fn redacts_mssql_with_password_equals() {
342        let out = redact_sql("CREATE LOGIN foo WITH PASSWORD = 'secret'");
343        assert!(!out.contains("secret"), "secret leaked: {out}");
344        assert_eq!(out, "CREATE LOGIN foo WITH PASSWORD = '***'");
345
346        let alter = redact_sql("ALTER LOGIN foo WITH PASSWORD = 'h@x'");
347        assert!(!alter.contains("h@x"), "secret leaked: {alter}");
348        assert_eq!(alter, "ALTER LOGIN foo WITH PASSWORD = '***'");
349
350        // No spaces around `=`, and a bareword operand.
351        let tight = redact_sql("CREATE USER u WITH PASSWORD=tiger");
352        assert!(!tight.contains("tiger"), "secret leaked: {tight}");
353        assert_eq!(tight, "CREATE USER u WITH PASSWORD=***");
354    }
355
356    #[test]
357    fn redacts_mysql_identified_by_password_hash() {
358        let out = redact_sql("ALTER USER u IDENTIFIED BY PASSWORD '*ABCDEF0123'");
359        assert!(!out.contains("ABCDEF0123"), "hash leaked: {out}");
360        assert_eq!(out, "ALTER USER u IDENTIFIED BY PASSWORD '***'");
361    }
362
363    #[test]
364    fn redacts_embedded_connection_url_password() {
365        let out = redact_sql("SELECT dblink('postgres://u:secret@h/db')");
366        assert!(!out.contains("secret"), "url password leaked: {out}");
367        assert!(
368            out.contains("postgres://u:***@h/db"),
369            "url not redacted: {out}"
370        );
371    }
372
373    #[test]
374    fn url_without_password_is_unchanged_authority() {
375        // `user@host` (no `:pass`) must not be mangled.
376        let out = redact_sql("SELECT dblink('postgres://u@h/db')");
377        assert_eq!(out, "SELECT dblink('postgres://u@h/db')");
378    }
379
380    #[test]
381    fn keyword_inside_identifier_is_not_redacted() {
382        // `password_reset` is a column, not the PASSWORD keyword.
383        let out = redact_sql("SELECT password_reset FROM users");
384        assert_eq!(out, "SELECT password_reset FROM users");
385        // A column literally named with the keyword as a prefix.
386        let out2 = redact_sql("SELECT mypassword FROM t");
387        assert_eq!(out2, "SELECT mypassword FROM t");
388    }
389
390    #[test]
391    fn case_insensitive_keyword_match() {
392        let out = redact_sql("create role bob password 'hunter2'");
393        assert!(!out.contains("hunter2"));
394        let out2 = redact_sql("ALTER user x identified by SECRETWORD");
395        assert!(!out2.contains("SECRETWORD"), "secret leaked: {out2}");
396    }
397
398    #[test]
399    fn plain_select_passes_through_byte_identical() {
400        let sql = "SELECT id, name FROM users WHERE age > 30 ORDER BY id";
401        assert_eq!(redact_sql(sql), sql);
402    }
403
404    #[test]
405    fn empty_input_is_empty() {
406        assert_eq!(redact_sql(""), "");
407    }
408
409    // Documented false-negative boundary: a non-standard idiom that names
410    // the secret without a recognised keyword is NOT redacted. The test
411    // encodes the contract honestly rather than overclaiming exhaustive
412    // coverage.
413    #[test]
414    fn documented_false_negative_unknown_idiom_passes_through() {
415        // No PASSWORD / IDENTIFIED BY / URL shape -> not redacted.
416        let sql = "EXEC set_secret @value = 'topsecret'";
417        let out = redact_sql(sql);
418        assert_eq!(
419            out, sql,
420            "unknown idiom is a documented false-negative, must pass through unchanged"
421        );
422    }
423
424    // Documented boundary: a secret nested inside an *outer* SQL string
425    // literal cannot be cleanly redacted by a context-free scanner. The
426    // common standalone-DDL case (covered above) works; this encodes the
427    // contrived nested case as a known limitation rather than overclaiming
428    // exhaustive coverage. (The standalone statement
429    // `CREATE ROLE bob PASSWORD '...'` IS redacted -- see
430    // `redacts_postgres_password_single_quote`.)
431    #[test]
432    fn documented_false_negative_secret_nested_in_outer_literal() {
433        let sql = "SELECT 'CREATE ROLE bob PASSWORD ''hunter2''' AS note";
434        let out = redact_sql(sql);
435        // The scanner attempts a redaction at the inner PASSWORD, but the
436        // outer literal's escaped quotes defeat clean boundary detection,
437        // so this is NOT guaranteed secret-free. Asserting the current
438        // behaviour documents the contract honestly.
439        assert!(
440            out.contains("***"),
441            "scanner still emits a redaction marker: {out}"
442        );
443    }
444}