ferrule-core 0.2.0-alpha

Result formatters, output rendering, and credential-resolution glue layered on the ferrule-sql driver core — the shared library behind the ferrule database CLI.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
//! Inline-secret redaction for SQL bodies (#49).
//!
//! The history store and the slow-log tee persist the SQL text of every
//! ferrule invocation. The connection URL is already scrubbed at the
//! capture site (`DatabaseUrl::redacted` blanks the password component),
//! but secrets can also live *inside* the SQL itself —
//! `CREATE ROLE x PASSWORD '...'`, `ALTER USER ... IDENTIFIED BY '...'`,
//! or a connection-string literal embedded in a function call. This
//! module's [`redact_sql`] blanks those before the SQL reaches storage.
//!
//! **Conservative, string-based, no SQL parser.** The same pragmatic
//! stance as the dump-determinism ORDER BY check: we scan for the common
//! secret idioms and replace the literal with `***`, accepting that a
//! vendor-specific or obfuscated idiom can slip through. The high-
//! frequency leak — a password in a connection URL — is fully handled
//! both here and by the URL-redaction path at the capture site, so the
//! residual false-negative surface is the long tail of DDL phrasings.
//!
//! **Known false-negatives (documented contract, not a bug):**
//!   - secrets passed as bound parameters that some tool inlined into the
//!     SQL with non-standard quoting;
//!   - vendor extensions that name the secret with a keyword this scanner
//!     does not recognise;
//!   - a `PASSWORD` / `IDENTIFIED BY` idiom that appears *inside* an outer
//!     SQL string literal: without tracking the enclosing literal's
//!     context, the scanner cannot tell the doubled quote that opens the
//!     inner secret from a real literal close. The common real case is a
//!     standalone DDL statement, which redacts correctly; a secret
//!     quoted-inside-a-quote is the contrived tail;
//!   - a password literal that uses a backslash escape (`'a\'b'`) rather
//!     than SQL's standard doubled-quote escape (`'a''b'`): the scanner
//!     honours the doubled-quote form (so `'O''Brien'` is fully
//!     redacted) but treats a backslash-escaped quote as the literal
//!     close, redacting only the leading fragment. Backslash-escaped
//!     string literals are non-standard SQL and rare in DDL secrets.
//!
//! The function never panics and never allocates beyond the rebuilt
//! string, so it is safe to call on every recorded statement.

/// Redact inline secrets from a SQL body, returning a scrubbed copy.
///
/// Replaces the secret literal in these idioms with `***`, preserving
/// the surrounding SQL structure:
///   - `PASSWORD '<lit>'` / `PASSWORD "<lit>"` (Postgres role/user DDL)
///   - `IDENTIFIED BY '<lit>'` / `IDENTIFIED BY "<lit>"` and
///     `IDENTIFIED BY <bareword>` (Oracle / MySQL)
///   - `IDENTIFIED BY PASSWORD '<lit>'` (MySQL hash form)
///   - any embedded `scheme://user:<pass>@host` connection-URL literal
///
/// Keyword matching is case-insensitive. A SQL body with no recognised
/// secret idiom is returned byte-identical to the input.
#[must_use]
pub fn redact_sql(sql: &str) -> String {
    // Pass 1: keyword-anchored secrets (PASSWORD / IDENTIFIED BY).
    let stage1 = redact_keyword_secrets(sql);
    // Pass 2: embedded connection-URL passwords (scheme://user:pass@host).
    redact_url_passwords(&stage1)
}

/// Scan for `PASSWORD` and `IDENTIFIED BY` keyword anchors and blank the
/// secret that follows each.
fn redact_keyword_secrets(sql: &str) -> String {
    let chars: Vec<char> = sql.chars().collect();
    let lower: Vec<char> = sql.to_lowercase().chars().collect();
    // `to_lowercase()` can change the char count for some Unicode inputs;
    // when the two views disagree, fall back to the original untouched
    // rather than risk indexing past either buffer.
    if lower.len() != chars.len() {
        return sql.to_string();
    }

    let mut out = String::with_capacity(sql.len());
    let mut i = 0usize;
    while i < chars.len() {
        if let Some(after_kw) = match_keyword(&lower, i, "identified by") {
            // Copy the keyword verbatim, then handle an optional trailing
            // `password` keyword (MySQL hash form) before the literal.
            push_range(&mut out, &chars, i, after_kw);
            let mut j = skip_ws(&chars, after_kw, &mut out);
            if let Some(after_pw) = match_keyword(&lower, j, "password") {
                push_range(&mut out, &chars, j, after_pw);
                j = skip_ws(&chars, after_pw, &mut out);
            }
            i = redact_secret_operand(&chars, j, &mut out);
            continue;
        }
        if let Some(after_kw) = match_keyword(&lower, i, "password") {
            push_range(&mut out, &chars, i, after_kw);
            let j = skip_ws(&chars, after_kw, &mut out);
            i = redact_secret_operand(&chars, j, &mut out);
            continue;
        }
        out.push(chars[i]);
        i += 1;
    }
    out
}

/// Match `keyword` (already lowercase) at position `i` in `lower`,
/// requiring a word boundary before and after so `PASSWORDLESS` or
/// `MYPASSWORD` do not match. Returns the index just past the keyword on
/// success.
fn match_keyword(lower: &[char], i: usize, keyword: &str) -> Option<usize> {
    let kw: Vec<char> = keyword.chars().collect();
    // The keyword may contain internal whitespace ("identified by"); match
    // it token-by-token so any run of whitespace between tokens is allowed.
    if !is_word_boundary_before(lower, i) {
        return None;
    }
    let mut li = i;
    let mut ki = 0usize;
    while ki < kw.len() {
        if kw[ki] == ' ' {
            // Require at least one whitespace char here; consume the run.
            if li >= lower.len() || !lower[li].is_whitespace() {
                return None;
            }
            while li < lower.len() && lower[li].is_whitespace() {
                li += 1;
            }
            ki += 1;
            continue;
        }
        if li >= lower.len() || lower[li] != kw[ki] {
            return None;
        }
        li += 1;
        ki += 1;
    }
    if is_word_boundary_after(lower, li) {
        Some(li)
    } else {
        None
    }
}

/// A keyword may start the string or follow a non-identifier char.
fn is_word_boundary_before(lower: &[char], i: usize) -> bool {
    if i == 0 {
        return true;
    }
    !is_ident_char(lower[i - 1])
}

/// A keyword must be followed by end-of-string or a non-identifier char.
fn is_word_boundary_after(lower: &[char], i: usize) -> bool {
    if i >= lower.len() {
        return true;
    }
    !is_ident_char(lower[i])
}

fn is_ident_char(c: char) -> bool {
    c.is_alphanumeric() || c == '_'
}

/// Append `chars[start..end]` to `out`.
fn push_range(out: &mut String, chars: &[char], start: usize, end: usize) {
    for &c in &chars[start..end] {
        out.push(c);
    }
}

/// Copy whitespace from `chars` starting at `i` into `out`, returning the
/// index of the first non-whitespace char (or end-of-input).
fn skip_ws(chars: &[char], mut i: usize, out: &mut String) -> usize {
    while i < chars.len() && chars[i].is_whitespace() {
        out.push(chars[i]);
        i += 1;
    }
    i
}

/// Redact the secret operand that begins at `i`: a quoted literal
/// (`'...'` or `"..."`) or a bareword (run of identifier chars). Pushes
/// the redacted form to `out` and returns the index just past the
/// operand. If `i` does not point at a plausible operand, nothing is
/// redacted and `i` is returned unchanged.
fn redact_secret_operand(chars: &[char], mut i: usize, out: &mut String) -> usize {
    // MSSQL spells the password assignment with an `=` separator
    // (`CREATE LOGIN x WITH PASSWORD = '...'`, `ALTER LOGIN x WITH PASSWORD
    // = '...'`). Consume an optional `=` plus the whitespace around it so
    // the operand scan below lands on the literal/bareword. Other dialects
    // (`IDENTIFIED BY '...'`, PG `PASSWORD '...'`) have no `=` and are
    // unaffected.
    if i < chars.len() && chars[i] == '=' {
        out.push('=');
        i = skip_ws(chars, i + 1, out);
    }
    if i >= chars.len() {
        return i;
    }
    let c = chars[i];
    if c == '\'' || c == '"' {
        // Quoted literal: emit an empty literal of the same quote kind.
        let quote = c;
        out.push(quote);
        out.push('*');
        out.push('*');
        out.push('*');
        out.push(quote);
        // Advance past the original literal: opening quote, body, closing
        // quote. SQL escapes an embedded quote by doubling it (`''` /
        // `""`), so a quote immediately followed by the same quote is an
        // escape pair *inside* the literal, not the close -- skip both and
        // keep scanning. This keeps a secret like `'p''wd'` fully inside
        // the redacted span rather than leaking the tail after the escape.
        let mut j = i + 1;
        while j < chars.len() {
            if chars[j] == quote {
                if j + 1 < chars.len() && chars[j + 1] == quote {
                    j += 2; // doubled-quote escape: stay inside the literal
                    continue;
                }
                j += 1; // consume the real closing quote
                break;
            }
            j += 1;
        }
        j
    } else if is_ident_char(c) {
        // Bareword (e.g. Oracle `IDENTIFIED BY tiger`): blank it.
        out.push_str("***");
        let mut j = i;
        while j < chars.len() && is_ident_char(chars[j]) {
            j += 1;
        }
        j
    } else {
        // Not an operand we recognise — leave it for the outer loop.
        i
    }
}

/// Replace the password span of any embedded `scheme://user:pass@host`
/// URL with `***`, matching the structure (not the exact behaviour, since
/// this runs on free text) of [`ferrule_sql::DatabaseUrl::redacted`].
fn redact_url_passwords(sql: &str) -> String {
    // Find each `://`, then look for the credential separator `:` and the
    // host separator `@` before the authority ends. Replace the
    // password between them.
    let chars: Vec<char> = sql.chars().collect();
    let mut out = String::with_capacity(sql.len());
    let mut i = 0usize;
    while i < chars.len() {
        if starts_with_at(&chars, i, "://") {
            out.push_str("://");
            let auth_start = i + 3;
            // The authority ends at the first '/', '?', '#', or
            // whitespace, or end-of-string.
            let mut end = auth_start;
            while end < chars.len()
                && !matches!(chars[end], '/' | '?' | '#')
                && !chars[end].is_whitespace()
            {
                end += 1;
            }
            // Within [auth_start, end): find userinfo `user:pass@host`.
            let at = (auth_start..end).find(|&k| chars[k] == '@');
            if let Some(at) = at {
                let colon = (auth_start..at).find(|&k| chars[k] == ':');
                if let Some(colon) = colon {
                    // Emit user, ':', '***', then the rest from '@'.
                    push_range(&mut out, &chars, auth_start, colon + 1);
                    out.push_str("***");
                    push_range(&mut out, &chars, at, end);
                    i = end;
                    continue;
                }
            }
            // No `user:pass@` shape — copy the authority verbatim.
            push_range(&mut out, &chars, auth_start, end);
            i = end;
            continue;
        }
        out.push(chars[i]);
        i += 1;
    }
    out
}

/// `chars[i..].starts_with(needle)` over a char slice.
fn starts_with_at(chars: &[char], i: usize, needle: &str) -> bool {
    let n: Vec<char> = needle.chars().collect();
    if i + n.len() > chars.len() {
        return false;
    }
    chars[i..i + n.len()] == n[..]
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn redacts_postgres_password_single_quote() {
        let out = redact_sql("CREATE ROLE bob PASSWORD 'hunter2'");
        assert!(!out.contains("hunter2"), "secret leaked: {out}");
        assert!(out.contains("***"), "no redaction marker: {out}");
        assert_eq!(out, "CREATE ROLE bob PASSWORD '***'");
    }

    #[test]
    fn redacts_password_double_quote() {
        let out = redact_sql(r#"ALTER ROLE x PASSWORD "s3cret""#);
        assert!(!out.contains("s3cret"));
        assert_eq!(out, r#"ALTER ROLE x PASSWORD "***""#);
    }

    #[test]
    fn redacts_oracle_identified_by_bareword() {
        let out = redact_sql("ALTER USER scott IDENTIFIED BY tiger");
        assert!(!out.contains("tiger"), "secret leaked: {out}");
        assert_eq!(out, "ALTER USER scott IDENTIFIED BY ***");
    }

    #[test]
    fn redacts_identified_by_quoted() {
        let out = redact_sql("CREATE USER u IDENTIFIED BY 'p@ss'");
        assert!(!out.contains("p@ss"));
        assert_eq!(out, "CREATE USER u IDENTIFIED BY '***'");
    }

    // The closing-quote scan honours SQL's doubled-quote escape, so a
    // secret containing an escaped quote stays fully inside the redacted
    // span (no tail leak after the escape pair).
    #[test]
    fn redacts_password_with_doubled_quote_escape() {
        let out = redact_sql("CREATE ROLE bob PASSWORD 'hun''ter2'");
        assert!(!out.contains("hun"), "secret head leaked: {out}");
        assert!(
            !out.contains("ter2"),
            "secret tail leaked past escape: {out}"
        );
        assert_eq!(out, "CREATE ROLE bob PASSWORD '***'");
    }

    // MSSQL's canonical login/user DDL uses `WITH PASSWORD = '...'`; the
    // `=` separator must not defeat redaction.
    #[test]
    fn redacts_mssql_with_password_equals() {
        let out = redact_sql("CREATE LOGIN foo WITH PASSWORD = 'secret'");
        assert!(!out.contains("secret"), "secret leaked: {out}");
        assert_eq!(out, "CREATE LOGIN foo WITH PASSWORD = '***'");

        let alter = redact_sql("ALTER LOGIN foo WITH PASSWORD = 'h@x'");
        assert!(!alter.contains("h@x"), "secret leaked: {alter}");
        assert_eq!(alter, "ALTER LOGIN foo WITH PASSWORD = '***'");

        // No spaces around `=`, and a bareword operand.
        let tight = redact_sql("CREATE USER u WITH PASSWORD=tiger");
        assert!(!tight.contains("tiger"), "secret leaked: {tight}");
        assert_eq!(tight, "CREATE USER u WITH PASSWORD=***");
    }

    #[test]
    fn redacts_mysql_identified_by_password_hash() {
        let out = redact_sql("ALTER USER u IDENTIFIED BY PASSWORD '*ABCDEF0123'");
        assert!(!out.contains("ABCDEF0123"), "hash leaked: {out}");
        assert_eq!(out, "ALTER USER u IDENTIFIED BY PASSWORD '***'");
    }

    #[test]
    fn redacts_embedded_connection_url_password() {
        let out = redact_sql("SELECT dblink('postgres://u:secret@h/db')");
        assert!(!out.contains("secret"), "url password leaked: {out}");
        assert!(
            out.contains("postgres://u:***@h/db"),
            "url not redacted: {out}"
        );
    }

    #[test]
    fn url_without_password_is_unchanged_authority() {
        // `user@host` (no `:pass`) must not be mangled.
        let out = redact_sql("SELECT dblink('postgres://u@h/db')");
        assert_eq!(out, "SELECT dblink('postgres://u@h/db')");
    }

    #[test]
    fn keyword_inside_identifier_is_not_redacted() {
        // `password_reset` is a column, not the PASSWORD keyword.
        let out = redact_sql("SELECT password_reset FROM users");
        assert_eq!(out, "SELECT password_reset FROM users");
        // A column literally named with the keyword as a prefix.
        let out2 = redact_sql("SELECT mypassword FROM t");
        assert_eq!(out2, "SELECT mypassword FROM t");
    }

    #[test]
    fn case_insensitive_keyword_match() {
        let out = redact_sql("create role bob password 'hunter2'");
        assert!(!out.contains("hunter2"));
        let out2 = redact_sql("ALTER user x identified by SECRETWORD");
        assert!(!out2.contains("SECRETWORD"), "secret leaked: {out2}");
    }

    #[test]
    fn plain_select_passes_through_byte_identical() {
        let sql = "SELECT id, name FROM users WHERE age > 30 ORDER BY id";
        assert_eq!(redact_sql(sql), sql);
    }

    #[test]
    fn empty_input_is_empty() {
        assert_eq!(redact_sql(""), "");
    }

    // Documented false-negative boundary: a non-standard idiom that names
    // the secret without a recognised keyword is NOT redacted. The test
    // encodes the contract honestly rather than overclaiming exhaustive
    // coverage.
    #[test]
    fn documented_false_negative_unknown_idiom_passes_through() {
        // No PASSWORD / IDENTIFIED BY / URL shape -> not redacted.
        let sql = "EXEC set_secret @value = 'topsecret'";
        let out = redact_sql(sql);
        assert_eq!(
            out, sql,
            "unknown idiom is a documented false-negative, must pass through unchanged"
        );
    }

    // Documented boundary: a secret nested inside an *outer* SQL string
    // literal cannot be cleanly redacted by a context-free scanner. The
    // common standalone-DDL case (covered above) works; this encodes the
    // contrived nested case as a known limitation rather than overclaiming
    // exhaustive coverage. (The standalone statement
    // `CREATE ROLE bob PASSWORD '...'` IS redacted -- see
    // `redacts_postgres_password_single_quote`.)
    #[test]
    fn documented_false_negative_secret_nested_in_outer_literal() {
        let sql = "SELECT 'CREATE ROLE bob PASSWORD ''hunter2''' AS note";
        let out = redact_sql(sql);
        // The scanner attempts a redaction at the inner PASSWORD, but the
        // outer literal's escaped quotes defeat clean boundary detection,
        // so this is NOT guaranteed secret-free. Asserting the current
        // behaviour documents the contract honestly.
        assert!(
            out.contains("***"),
            "scanner still emits a redaction marker: {out}"
        );
    }
}