use std::collections::HashMap;
use std::sync::Arc;
pub type ParamBuffer = HashMap<(String, String), Arc<Vec<ParamValue>>>;
#[derive(Debug, Clone)]
pub enum ParamValue {
Quoted(String),
Bare(String),
Null,
}
impl ParamValue {
fn as_sql(&self) -> &str {
match self {
Self::Quoted(s) | Self::Bare(s) => s.as_str(),
Self::Null => "NULL",
}
}
}
#[must_use]
pub fn parse_params(body: &str) -> Option<Vec<ParamValue>> {
let brace = memchr::memmem::find(body.as_bytes(), b"={")?;
let inner = body[brace + 2..].strip_suffix('}')?;
let mut params = Vec::new();
let mut rest = inner.trim_start();
while !rest.is_empty() {
let (value, tail) = parse_one_entry(rest)?;
params.push(value);
rest = tail.trim_start();
if let Some(t) = rest.strip_prefix(',') {
rest = t.trim_start();
}
}
Some(params)
}
fn parse_one_entry(s: &str) -> Option<(ParamValue, &str)> {
let s = s.strip_prefix('(')?;
let comma1 = memchr::memchr(b',', s.as_bytes())?;
let s = s[comma1 + 1..].trim_start();
let comma2 = memchr::memchr(b',', s.as_bytes())?;
let s = s[comma2 + 1..].trim_start();
if s.starts_with('\'') {
let bytes = s.as_bytes();
let mut i = 1;
loop {
let rel = memchr::memchr(b'\'', &bytes[i..])?;
i += rel + 1;
if i < bytes.len() && bytes[i] == b'\'' {
i += 1;
} else {
break;
}
}
let quoted = &s[..i];
let tail = s[i..].trim_start().strip_prefix(')')?;
Some((ParamValue::Quoted(String::from(quoted)), tail))
} else {
let end = memchr::memchr(b')', s.as_bytes())?;
let raw = s[..end].trim();
let tail = &s[end + 1..];
let value = if raw.is_empty() {
ParamValue::Null
} else {
ParamValue::Bare(String::from(raw))
};
Some((value, tail))
}
}
#[inline]
#[must_use]
pub fn count_placeholders(sql: &str) -> (usize, bool) {
let bytes = sql.as_bytes();
let len = bytes.len();
let mut i = 0;
let mut question_count = 0usize;
let mut max_colon_ordinal = 0usize;
while i < len {
let Some(rel) = memchr::memchr3(b'\'', b'?', b':', &bytes[i..]) else {
break; };
i += rel;
match bytes[i] {
b'\'' => {
i += 1;
loop {
let Some(r) = memchr::memchr(b'\'', &bytes[i..]) else {
i = len;
break;
};
i += r + 1;
if i < len && bytes[i] == b'\'' {
i += 1; } else {
break;
}
}
}
b'?' => {
question_count += 1;
i += 1;
}
b':' => {
let start = i + 1;
let mut j = start;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
if j > start {
let n: usize = bytes[start..j].iter().fold(0usize, |acc, &b| {
acc.saturating_mul(10).saturating_add((b - b'0') as usize)
});
max_colon_ordinal = max_colon_ordinal.max(n);
i = j;
} else {
i += 1;
}
}
_ => unreachable!(),
}
}
if max_colon_ordinal > 0 {
(max_colon_ordinal, true)
} else {
(question_count, false)
}
}
#[inline]
fn apply_params_into(sql: &str, params: &[ParamValue], colon_style: bool, out: &mut Vec<u8>) {
out.clear();
if params.is_empty() {
out.extend_from_slice(sql.as_bytes());
return;
}
let extra: usize = params
.iter()
.map(|p| p.as_sql().len().saturating_sub(1))
.sum();
out.reserve(sql.len() + extra);
let bytes = sql.as_bytes();
let len = bytes.len();
let mut i = 0;
let mut seq_idx = 0usize;
while i < len {
let special = if colon_style {
memchr::memchr2(b'\'', b':', &bytes[i..])
} else {
memchr::memchr2(b'\'', b'?', &bytes[i..])
};
let Some(rel) = special else {
out.extend_from_slice(&bytes[i..]);
break;
};
if rel > 0 {
out.extend_from_slice(&bytes[i..i + rel]);
}
i += rel;
match bytes[i] {
b'\'' => {
out.push(b'\'');
i += 1;
loop {
let Some(r) = memchr::memchr(b'\'', &bytes[i..]) else {
out.extend_from_slice(&bytes[i..]);
i = len;
break;
};
out.extend_from_slice(&bytes[i..=(i + r)]); i += r + 1;
if i < len && bytes[i] == b'\'' {
out.push(b'\''); i += 1;
} else {
break;
}
}
}
b'?' if !colon_style => {
if let Some(p) = params.get(seq_idx) {
out.extend_from_slice(p.as_sql().as_bytes());
} else {
out.push(b'?');
}
seq_idx += 1;
i += 1;
}
b':' if colon_style => {
let start = i + 1;
let mut j = start;
while j < len && bytes[j].is_ascii_digit() {
j += 1;
}
if j > start {
let n: usize = bytes[start..j].iter().fold(0usize, |acc, &b| {
acc.saturating_mul(10).saturating_add((b - b'0') as usize)
});
if let Some(p) = n.checked_sub(1).and_then(|idx| params.get(idx)) {
out.extend_from_slice(p.as_sql().as_bytes());
} else {
out.extend_from_slice(&bytes[i..j]);
}
i = j;
} else {
out.push(b':');
i += 1;
}
}
b => {
out.push(b);
i += 1;
}
}
}
}
#[cfg(test)]
fn apply_params(sql: &str, params: &[ParamValue], colon_style: bool) -> String {
let mut buf = Vec::new();
apply_params_into(sql, params, colon_style, &mut buf);
String::from_utf8(buf).expect("apply_params produced invalid UTF-8")
}
pub fn compute_normalized<'a>(
record: &dm_database_parser_sqllog::Sqllog,
pm_sql: &str,
buffer: &mut ParamBuffer,
placeholder_override: Option<bool>,
scratch: &'a mut Vec<u8>,
) -> Option<&'a str> {
if record.tag.is_none() {
if pm_sql.starts_with("PARAMS(") {
if let Some(params) = parse_params(pm_sql) {
buffer.insert(
(record.sess_id.clone(), record.statement.clone()),
Arc::new(params),
);
}
}
return None;
}
let tag = record.tag.as_deref()?;
if !matches!(tag, "INS" | "DEL" | "UPD" | "SEL") {
return None;
}
let (placeholder_count, detected_colon) = count_placeholders(pm_sql);
if placeholder_count == 0 {
return None;
}
let key = (record.sess_id.clone(), record.statement.clone());
let params = buffer.get(&key)?.clone();
let colon_style = placeholder_override.unwrap_or(detected_colon);
if params.len() != placeholder_count {
log::warn!(
"replace_parameters: param count mismatch (params={}, placeholders={}) for sql: {}",
params.len(),
placeholder_count,
pm_sql
.char_indices()
.nth(80)
.map_or(pm_sql, |(i, _)| &pm_sql[..i])
);
return None;
}
apply_params_into(pm_sql, ¶ms, colon_style, scratch);
debug_assert!(
std::str::from_utf8(scratch).is_ok(),
"apply_params_into produced invalid UTF-8 — safety invariant violated"
);
Some(std::str::from_utf8(scratch).expect("apply_params_into produced invalid UTF-8"))
}
#[cfg(test)]
mod tests {
use super::*;
fn bare(s: &str) -> ParamValue {
ParamValue::Bare(String::from(s))
}
fn quoted(s: &str) -> ParamValue {
ParamValue::Quoted(String::from(s))
}
#[test]
fn test_parse_single_varchar() {
let params = parse_params("PARAMS(SEQNO, TYPE, DATA)={(0, VARCHAR, 'SM')}").unwrap();
assert_eq!(params.len(), 1);
assert_eq!(params[0].as_sql(), "'SM'");
}
#[test]
fn test_parse_mixed_types() {
let params = parse_params(
"PARAMS(SEQNO, TYPE, DATA)={(0, DEC, 3), (1, VARCHAR, 'send ok'), (2, DEC, 0), (3, INTEGER, 42)}",
)
.unwrap();
assert_eq!(params.len(), 4);
assert_eq!(params[0].as_sql(), "3");
assert_eq!(params[1].as_sql(), "'send ok'");
assert_eq!(params[2].as_sql(), "0");
assert_eq!(params[3].as_sql(), "42");
}
#[test]
fn test_parse_blob_empty() {
let params = parse_params("PARAMS(SEQNO, TYPE, DATA)={(0, DEC, 1), (1, BLOB, )}").unwrap();
assert_eq!(params.len(), 2);
assert_eq!(params[0].as_sql(), "1");
assert_eq!(params[1].as_sql(), "NULL");
}
#[test]
fn test_parse_quoted_with_escaped_quote() {
let params = parse_params("PARAMS(SEQNO, TYPE, DATA)={(0, VARCHAR, 'O''Brien')}").unwrap();
assert_eq!(params[0].as_sql(), "'O''Brien'");
}
#[test]
fn test_parse_invalid_returns_none() {
assert!(parse_params("not a params record").is_none());
}
#[test]
fn test_apply_single_string_param() {
let params = vec![quoted("'3USJ29'")];
let result = apply_params("WHERE code = ?", ¶ms, false);
assert_eq!(result, "WHERE code = '3USJ29'");
}
#[test]
fn test_apply_numeric_param() {
let params = vec![bare("42")];
let result = apply_params("WHERE id = ?", ¶ms, false);
assert_eq!(result, "WHERE id = 42");
}
#[test]
fn test_apply_null_param() {
let params = vec![ParamValue::Null];
let result = apply_params("WHERE tag = ?", ¶ms, false);
assert_eq!(result, "WHERE tag = NULL");
}
#[test]
fn test_apply_multiple_params() {
let params = vec![bare("2370075"), quoted("'SJ-1'"), ParamValue::Null];
let result = apply_params("VALUES (?, ?, ?)", ¶ms, false);
assert_eq!(result, "VALUES (2370075, 'SJ-1', NULL)");
}
#[test]
fn test_apply_no_placeholders() {
let params = vec![bare("1")];
let result = apply_params("SELECT 1", ¶ms, false);
assert_eq!(result, "SELECT 1");
}
#[test]
fn test_apply_skip_literal_contents() {
let params = vec![quoted("'real'")];
let result = apply_params("WHERE a = '?' AND b = ?", ¶ms, false);
assert_eq!(result, "WHERE a = '?' AND b = 'real'");
}
#[test]
fn test_apply_insert_with_function() {
let params = vec![bare("1"), quoted("'hello'"), bare("99")];
let result = apply_params(
"INSERT INTO t VALUES (?,current_timestamp,?,?)",
¶ms,
false,
);
assert_eq!(
result,
"INSERT INTO t VALUES (1,current_timestamp,'hello',99)"
);
}
#[test]
fn test_apply_chinese_in_param() {
let params = vec![quoted("'张三'")];
let result = apply_params("WHERE name = ?", ¶ms, false);
assert_eq!(result, "WHERE name = '张三'");
}
#[test]
fn test_apply_colon_style_basic() {
let params = vec![bare("10"), quoted("'abc'")];
let result = apply_params("WHERE id = :1 AND code = :2", ¶ms, true);
assert_eq!(result, "WHERE id = 10 AND code = 'abc'");
}
#[test]
fn test_apply_colon_style_out_of_order() {
let params = vec![bare("1"), bare("2"), bare("3")];
let result = apply_params("SELECT :3, :1, :2", ¶ms, true);
assert_eq!(result, "SELECT 3, 1, 2");
}
#[test]
fn test_count_placeholders_question() {
let (count, colon_style) = count_placeholders("WHERE a = ? AND b = ?");
assert_eq!(count, 2);
assert!(!colon_style);
}
#[test]
fn test_count_placeholders_colon() {
let (count, colon_style) = count_placeholders("WHERE a = :1 AND b = :2 AND c = :3");
assert_eq!(count, 3);
assert!(colon_style);
}
#[test]
fn test_count_placeholders_skips_literals() {
let (count, colon_style) = count_placeholders("WHERE a = '?' AND b = ?");
assert_eq!(count, 1);
assert!(!colon_style);
}
#[test]
fn test_count_placeholders_none() {
let (count, colon_style) = count_placeholders("SELECT 1");
assert_eq!(count, 0);
assert!(!colon_style);
}
#[test]
fn test_count_placeholders_unclosed_string() {
let (count, _) = count_placeholders("SELECT 'unclosed");
assert_eq!(count, 0);
}
#[test]
fn test_count_placeholders_escaped_quote() {
let (count, _) = count_placeholders("WHERE name = 'O''Brien' AND id = ?");
assert_eq!(count, 1);
}
#[test]
fn test_count_placeholders_colon_not_followed_by_digit() {
let (count, colon_style) = count_placeholders("SELECT a::text");
assert_eq!(count, 0);
assert!(!colon_style);
}
#[test]
fn test_apply_params_empty_params_returns_sql_unchanged() {
let result = apply_params("SELECT * FROM t", &[], false);
assert_eq!(result, "SELECT * FROM t");
}
#[test]
fn test_apply_params_with_string_literal_verbatim_copy() {
let params = vec![bare("42")];
let result = apply_params("WHERE code = '?' AND id = ?", ¶ms, false);
assert_eq!(result, "WHERE code = '?' AND id = 42");
}
#[test]
fn test_apply_params_escaped_quote_in_literal() {
let params = vec![bare("1")];
let result = apply_params("WHERE name = 'O''Brien' AND id = ?", ¶ms, false);
assert_eq!(result, "WHERE name = 'O''Brien' AND id = 1");
}
#[test]
fn test_apply_params_unclosed_string_literal() {
let params = vec![bare("1")];
let result = apply_params("SELECT 'unclosed", ¶ms, false);
assert_eq!(result, "SELECT 'unclosed");
}
}