pub(crate) fn obfuscate(sql: &str) -> String {
let bytes = sql.as_bytes();
let mut out = String::with_capacity(sql.len());
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if b == b'-' && bytes.get(i + 1) == Some(&b'-') {
i = scan_line_comment(sql, bytes, i, &mut out);
} else if b == b'/' && bytes.get(i + 1) == Some(&b'*') {
i = scan_block_comment(sql, bytes, i, &mut out);
} else if b == b'"' {
i = scan_quoted_identifier(sql, bytes, i, b'"', &mut out);
} else if b == b'`' {
i = scan_quoted_identifier(sql, bytes, i, b'`', &mut out);
} else if b == b'\'' {
i = scan_string_literal(bytes, i, &mut out);
} else if b == b'$' {
i = scan_dollar(sql, bytes, i, &mut out);
} else if b == b'0'
&& matches!(bytes.get(i + 1), Some(b'x' | b'X'))
&& bytes.get(i + 2).is_some_and(u8::is_ascii_hexdigit)
{
i = scan_hex_literal(bytes, i, &mut out);
} else if b.is_ascii_digit() {
i = scan_numeric_literal(bytes, i, &mut out);
} else if b.is_ascii_alphabetic() || b == b'_' {
i = scan_identifier(sql, bytes, i, &mut out);
} else {
let ch = sql[i..].chars().next().expect("i < bytes.len()");
out.push(ch);
i += ch.len_utf8();
}
}
out
}
fn scan_line_comment(sql: &str, bytes: &[u8], start: usize, out: &mut String) -> usize {
let mut i = start + 2;
while i < bytes.len() && bytes[i] != b'\n' {
i += 1;
}
out.push_str(&sql[start..i]);
i
}
fn scan_block_comment(sql: &str, bytes: &[u8], start: usize, out: &mut String) -> usize {
let mut i = start + 2;
let mut closed = false;
while i + 1 < bytes.len() {
if bytes[i] == b'*' && bytes[i + 1] == b'/' {
i += 2;
closed = true;
break;
}
i += 1;
}
if !closed {
i = bytes.len();
}
out.push_str(&sql[start..i]);
i
}
fn scan_quoted_identifier(
sql: &str,
bytes: &[u8],
start: usize,
quote: u8,
out: &mut String,
) -> usize {
let mut i = start + 1;
while i < bytes.len() {
if bytes[i] == quote {
if bytes.get(i + 1) == Some("e) {
i += 2;
} else {
i += 1;
break;
}
} else {
i += 1;
}
}
out.push_str(&sql[start..i]);
i
}
fn scan_string_literal(bytes: &[u8], start: usize, out: &mut String) -> usize {
let mut i = start + 1;
while i < bytes.len() {
let c = bytes[i];
if c == b'\\' {
i += if i + 1 < bytes.len() { 2 } else { 1 };
} else if c == b'\'' {
if bytes.get(i + 1) == Some(&b'\'') {
i += 2;
} else {
i += 1;
break;
}
} else {
i += 1;
}
}
out.push('?');
i
}
fn scan_dollar(sql: &str, bytes: &[u8], start: usize, out: &mut String) -> usize {
if bytes.get(start + 1).is_some_and(u8::is_ascii_digit) {
let mut i = start + 1;
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
out.push_str(&sql[start..i]);
return i;
}
let tag_start = start + 1;
let mut tag_end = tag_start;
if bytes
.get(tag_end)
.is_some_and(|c| c.is_ascii_alphabetic() || *c == b'_')
{
tag_end += 1;
while bytes
.get(tag_end)
.is_some_and(|c| c.is_ascii_alphanumeric() || *c == b'_')
{
tag_end += 1;
}
}
if bytes.get(tag_end) != Some(&b'$') {
out.push('$');
return start + 1;
}
let tag = &bytes[tag_start..tag_end];
let body_start = tag_end + 1;
let mut k = body_start;
while k < bytes.len() {
if bytes[k] == b'$' {
let after = k + 1;
let close_end = after + tag.len();
if close_end < bytes.len()
&& &bytes[after..close_end] == tag
&& bytes[close_end] == b'$'
{
out.push('?');
return close_end + 1;
}
}
k += 1;
}
out.push('?');
bytes.len()
}
fn scan_hex_literal(bytes: &[u8], start: usize, out: &mut String) -> usize {
let mut i = start + 2;
while i < bytes.len() && bytes[i].is_ascii_hexdigit() {
i += 1;
}
out.push('?');
i
}
fn scan_numeric_literal(bytes: &[u8], start: usize, out: &mut String) -> usize {
let mut i = start + 1;
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
if bytes.get(i) == Some(&b'.') {
i += 1;
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
}
if matches!(bytes.get(i), Some(b'e' | b'E')) {
let mut j = i + 1;
if matches!(bytes.get(j), Some(b'+' | b'-')) {
j += 1;
}
if bytes.get(j).is_some_and(u8::is_ascii_digit) {
i = j + 1;
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
}
}
out.push('?');
i
}
fn scan_identifier(sql: &str, bytes: &[u8], start: usize, out: &mut String) -> usize {
let mut i = start + 1;
while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
i += 1;
}
let ident = &sql[start..i];
if ident.eq_ignore_ascii_case("true") || ident.eq_ignore_ascii_case("false") {
out.push('?');
} else {
out.push_str(ident);
}
i
}
#[cfg(test)]
mod tests {
use super::obfuscate;
fn check(input: &str, expected: &str) {
assert_eq!(obfuscate(input), expected, "input: {input:?}");
}
#[test]
fn string_simple() {
check("'alice'", "?");
}
#[test]
fn string_empty() {
check("''", "?");
}
#[test]
fn string_doubled_quote_escape() {
check("'it''s'", "?");
}
#[test]
fn string_backslash_escape() {
check(r"'it\'s'", "?");
}
#[test]
fn string_escaped_backslash() {
check(r"'\\'", "?");
}
#[test]
fn string_multiple_in_expression() {
check("'a' || 'b'", "? || ?");
}
#[test]
fn string_unterminated() {
check("'unterminated", "?");
}
#[test]
fn string_trailing_backslash_at_eof() {
check("'\\", "?");
}
#[test]
fn number_integer() {
check("42", "?");
}
#[test]
fn number_zero() {
check("0", "?");
}
#[test]
fn number_decimal() {
check("3.14", "?");
}
#[test]
fn number_exponent_lowercase() {
check("1.5e10", "?");
}
#[test]
fn number_exponent_uppercase_signed() {
check("2E-5", "?");
}
#[test]
fn number_exponent_explicit_plus() {
check("1e+3", "?");
}
#[test]
fn number_exponent_without_digits_falls_back() {
check("1e", "?e");
}
#[test]
fn number_hex_lowercase() {
check("0xFF", "?");
}
#[test]
fn number_hex_uppercase_prefix() {
check("0XAB", "?");
}
#[test]
fn number_hex_no_digits() {
check("0xZZ", "?xZZ");
}
#[test]
fn number_hex_prefix_at_eof() {
check("0x", "?x");
}
#[test]
fn number_unary_minus_preserves_sign() {
check("WHERE x = -1", "WHERE x = -?");
}
#[test]
fn number_unary_plus_preserves_sign() {
check("WHERE x = +1", "WHERE x = +?");
}
#[test]
fn number_binary_minus_preserves_sign() {
check("SELECT a-1 FROM t", "SELECT a-? FROM t");
}
#[test]
fn number_subtract_inside_parens() {
check("SELECT (1-1)", "SELECT (?-?)");
}
#[test]
fn number_signed_in_between() {
check("BETWEEN 1 AND -2", "BETWEEN ? AND -?");
}
#[test]
fn number_signed_after_then() {
check("THEN -2", "THEN -?");
}
#[test]
fn number_identifier_with_trailing_digit() {
check("col1", "col1");
}
#[test]
fn number_leading_dot() {
check(".5", ".?");
}
#[test]
fn boolean_true_lowercase() {
check("true", "?");
}
#[test]
fn boolean_true_uppercase() {
check("TRUE", "?");
}
#[test]
fn boolean_true_mixed_case() {
check("True", "?");
}
#[test]
fn boolean_false_lowercase() {
check("false", "?");
}
#[test]
fn boolean_false_uppercase() {
check("FALSE", "?");
}
#[test]
fn null_lowercase() {
check("null", "null");
}
#[test]
fn null_uppercase() {
check("NULL", "NULL");
}
#[test]
fn boolean_substring_in_identifier_preserved() {
check("TRUE_COLUMN", "TRUE_COLUMN");
}
#[test]
fn boolean_truthy_preserved() {
check("truthy", "truthy");
}
#[test]
fn boolean_falsey_preserved() {
check("falsey", "falsey");
}
#[test]
fn ansi_quoted_identifier_preserved() {
check("\"my table\"", "\"my table\"");
}
#[test]
fn backtick_quoted_identifier_preserved() {
check("`col`", "`col`");
}
#[test]
fn ansi_quoted_with_doubled_escape() {
check("\"with\"\"quote\"", "\"with\"\"quote\"");
}
#[test]
fn backtick_quoted_with_doubled_escape() {
check("`with``tick`", "`with``tick`");
}
#[test]
fn ansi_quoted_unterminated_preserved() {
check("\"abc", "\"abc");
}
#[test]
fn quoted_identifiers_with_string_literal() {
check(
"SELECT \"name\" FROM users WHERE \"name\" = 'alice'",
"SELECT \"name\" FROM users WHERE \"name\" = ?",
);
}
#[test]
fn dollar_quoted_empty_tag() {
check("$$body$$", "?");
}
#[test]
fn dollar_quoted_with_tag_containing_inner_quotes() {
check("$tag$body with 'quotes'$tag$", "?");
}
#[test]
fn dollar_positional_param_one_digit() {
check("$1", "$1");
}
#[test]
fn dollar_positional_param_multi_digit() {
check("$42", "$42");
}
#[test]
fn dollar_quoted_unterminated() {
check("$$ab", "?");
}
#[test]
fn dollar_quoted_unterminated_with_tag() {
check("$tag$body$ta", "?");
}
#[test]
fn dollar_quoted_adjacent() {
check("$$a$$$$b$$", "??");
}
#[test]
fn dollar_lone_with_no_opener_preserved() {
check("$body", "$body");
}
#[test]
fn line_comment_preserves_content() {
check("-- secret 42\nSELECT 1", "-- secret 42\nSELECT ?");
}
#[test]
fn line_comment_at_eof() {
check("-- trailing", "-- trailing");
}
#[test]
fn block_comment_preserves_content() {
check("/* secret 42 */ SELECT 1", "/* secret 42 */ SELECT ?");
}
#[test]
fn block_comment_unterminated() {
check("/* unterminated", "/* unterminated");
}
#[test]
fn block_comment_empty() {
check("/**/SELECT 1", "/**/SELECT ?");
}
#[test]
fn golden_insert_with_inline_literals() {
check(
"INSERT INTO users (name, age) VALUES ('alice', 42)",
"INSERT INTO users (name, age) VALUES (?, ?)",
);
}
#[test]
fn golden_preserves_whitespace_and_newlines() {
check(
"SELECT *\n FROM users\n WHERE id = 1",
"SELECT *\n FROM users\n WHERE id = ?",
);
}
#[test]
fn golden_postgres_e_string_prefix() {
check(r"E'a\nb'", "E?");
}
#[test]
fn non_ascii_identifier_preserved() {
check("WHERE café = 'value'", "WHERE café = ?");
}
mod proptests {
use super::super::obfuscate;
use proptest::prelude::*;
const SENTINEL: &str = "XSECRETX";
fn whitespace() -> impl Strategy<Value = String> {
"[ \t\n]{0,5}".prop_map(String::from)
}
fn ident_lower() -> impl Strategy<Value = String> {
"[a-z_][a-z0-9_]{0,7}"
.prop_map(String::from)
.prop_filter("exclude TRUE/FALSE which the obfuscator replaces", |s| {
!s.eq_ignore_ascii_case("true") && !s.eq_ignore_ascii_case("false")
})
}
fn ansi_quoted_ident() -> impl Strategy<Value = String> {
"[a-z0-9 _]{0,8}".prop_map(|inner| format!("\"{inner}\""))
}
fn backtick_quoted_ident() -> impl Strategy<Value = String> {
"[a-z0-9 _]{0,8}".prop_map(|inner| format!("`{inner}`"))
}
fn safe_punct() -> impl Strategy<Value = String> {
prop::sample::select(vec![",", ";", "=", "<", ">", "+", "*", "(", ")"])
.prop_map(String::from)
}
fn integer() -> impl Strategy<Value = String> {
"[0-9]{1,5}".prop_map(String::from)
}
fn decimal() -> impl Strategy<Value = String> {
"[0-9]{1,3}\\.[0-9]{1,3}".prop_map(String::from)
}
fn hex_literal() -> impl Strategy<Value = String> {
"0[xX][0-9a-fA-F]{1,4}".prop_map(String::from)
}
fn string_literal_plain() -> impl Strategy<Value = String> {
"[a-z0-9 _]{0,8}".prop_map(|inner| format!("'{inner}'"))
}
fn dollar_quoted_plain() -> impl Strategy<Value = String> {
(
"[a-z_][a-z0-9_]{0,3}".prop_map(String::from),
"[a-z0-9 _]{0,8}".prop_map(String::from),
)
.prop_map(|(tag, body)| format!("${tag}${body}${tag}$"))
}
fn line_comment() -> impl Strategy<Value = String> {
"[a-z0-9 _]{0,15}".prop_map(|inner| format!("--{inner}\n"))
}
fn block_comment() -> impl Strategy<Value = String> {
"[a-z0-9 _]{0,15}".prop_map(|inner| format!("/*{inner}*/"))
}
fn boolean_kw() -> impl Strategy<Value = String> {
prop::sample::select(vec!["TRUE", "FALSE", "true", "false", "True", "False"])
.prop_map(String::from)
}
fn token_any() -> impl Strategy<Value = String> {
prop_oneof![
ident_lower(),
whitespace(),
ansi_quoted_ident(),
backtick_quoted_ident(),
safe_punct(),
integer(),
decimal(),
hex_literal(),
string_literal_plain(),
dollar_quoted_plain(),
line_comment(),
block_comment(),
boolean_kw(),
]
}
fn fragment_any() -> impl Strategy<Value = String> {
prop::collection::vec(token_any(), 0..16).prop_map(|tokens| tokens.concat())
}
fn token_preservable() -> impl Strategy<Value = String> {
prop_oneof![
ident_lower(),
whitespace(),
ansi_quoted_ident(),
backtick_quoted_ident(),
safe_punct(),
Just("NULL".to_string()),
Just("null".to_string()),
]
}
fn fragment_preservable() -> impl Strategy<Value = String> {
prop::collection::vec(token_preservable(), 0..16).prop_map(|tokens| tokens.concat())
}
fn marked_string() -> impl Strategy<Value = String> {
Just(format!("'{SENTINEL}'"))
}
fn marked_dollar() -> impl Strategy<Value = String> {
"[a-z_]{0,3}".prop_map(|tag| format!("${tag}${SENTINEL}${tag}$"))
}
fn token_marked() -> impl Strategy<Value = String> {
prop_oneof![
ident_lower(),
whitespace(),
ansi_quoted_ident(),
backtick_quoted_ident(),
safe_punct(),
marked_string(),
marked_dollar(),
]
}
fn fragment_marked() -> impl Strategy<Value = String> {
prop::collection::vec(token_marked(), 0..12).prop_map(|tokens| tokens.concat())
}
fn ident_lower_no_digits() -> impl Strategy<Value = String> {
"[a-z_]{1,8}"
.prop_map(String::from)
.prop_filter("exclude TRUE/FALSE which the obfuscator replaces", |s| {
!s.eq_ignore_ascii_case("true") && !s.eq_ignore_ascii_case("false")
})
}
fn ansi_quoted_no_digits() -> impl Strategy<Value = String> {
"[a-z _]{0,8}".prop_map(|inner| format!("\"{inner}\""))
}
fn backtick_quoted_no_digits() -> impl Strategy<Value = String> {
"[a-z _]{0,8}".prop_map(|inner| format!("`{inner}`"))
}
fn token_digit_free() -> impl Strategy<Value = String> {
prop_oneof![
ident_lower_no_digits(),
whitespace(),
ansi_quoted_no_digits(),
backtick_quoted_no_digits(),
safe_punct(),
Just("NULL".to_string()),
integer(),
decimal(),
hex_literal(),
marked_string(),
marked_dollar(),
]
}
fn fragment_digit_free() -> impl Strategy<Value = String> {
prop::collection::vec(token_digit_free(), 0..16).prop_map(|tokens| tokens.join(" "))
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(256))]
#[test]
fn no_panic_on_random_bytes(bytes in prop::collection::vec(any::<u8>(), 0..256)) {
let s = String::from_utf8_lossy(&bytes).into_owned();
let _ = obfuscate(&s);
}
#[test]
fn no_panic_on_structured_fragments(s in fragment_any()) {
let _ = obfuscate(&s);
}
#[test]
fn idempotent(s in fragment_any()) {
let once = obfuscate(&s);
let twice = obfuscate(&once);
prop_assert_eq!(once, twice);
}
#[test]
fn length_monotonic(s in fragment_any()) {
let out = obfuscate(&s);
prop_assert!(out.len() <= s.len());
}
#[test]
fn preservable_round_trip(s in fragment_preservable()) {
prop_assert_eq!(obfuscate(&s), s);
}
#[test]
fn no_leak_through_literals(s in fragment_marked()) {
let out = obfuscate(&s);
prop_assert!(
!out.contains(SENTINEL),
"sentinel leaked: input={s:?} output={out:?}"
);
}
#[test]
fn no_digit_leak(s in fragment_digit_free()) {
let out = obfuscate(&s);
prop_assert!(
!out.chars().any(|c| c.is_ascii_digit()),
"digit leaked: input={s:?} output={out:?}"
);
}
}
}
}