dm_database_sqllog2db/stats/
normalize.rs1#[must_use]
18pub fn normalize_sql(sql: &str) -> String {
19 let bytes = sql.as_bytes();
20 let len = bytes.len();
21 let mut output = Vec::with_capacity(len);
22 let mut cursor = 0usize;
23 let mut prev_was_ident_char = false;
24
25 while cursor < len {
26 let byte = bytes[cursor];
27 match byte {
28 b'\'' => {
29 cursor = skip_string_literal(bytes, cursor + 1, len);
30 output.push(b'?');
31 prev_was_ident_char = false;
32 }
33 byte_val if byte_val.is_ascii_digit() && !prev_was_ident_char => {
34 cursor = skip_number_literal(bytes, cursor, len);
35 output.push(b'?');
36 prev_was_ident_char = false;
37 }
38 b'-' | b'+'
39 if !prev_was_ident_char
40 && cursor + 1 < len
41 && bytes[cursor + 1].is_ascii_digit() =>
42 {
43 cursor = skip_number_literal(bytes, cursor + 1, len);
45 output.push(b'?');
46 prev_was_ident_char = false;
47 }
48 _ => {
49 output.push(byte);
50 prev_was_ident_char = byte.is_ascii_alphanumeric() || byte == b'_' || byte == b'$';
51 cursor += 1;
52 }
53 }
54 }
55
56 String::from_utf8(output).expect("normalize_sql produced invalid UTF-8")
57}
58
59fn skip_string_literal(bytes: &[u8], start: usize, len: usize) -> usize {
65 let mut cursor = start;
66 loop {
67 let Some(relative_pos) = memchr::memchr(b'\'', &bytes[cursor..]) else {
68 return len;
70 };
71 cursor += relative_pos + 1;
72 if cursor < len && bytes[cursor] == b'\'' {
73 cursor += 1;
75 } else {
76 return cursor;
78 }
79 }
80}
81
82fn skip_number_literal(bytes: &[u8], start: usize, len: usize) -> usize {
88 let mut cursor = start;
89 while cursor < len && bytes[cursor].is_ascii_digit() {
91 cursor += 1;
92 }
93 if cursor + 1 < len && bytes[cursor] == b'.' && bytes[cursor + 1].is_ascii_digit() {
95 cursor += 1; while cursor < len && bytes[cursor].is_ascii_digit() {
97 cursor += 1;
98 }
99 }
100 cursor
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106
107 #[test]
108 fn test_basic_where_number_and_string() {
109 assert_eq!(
110 normalize_sql("SELECT * FROM t WHERE id = 42 AND name = 'alice'"),
111 "SELECT * FROM t WHERE id = ? AND name = ?"
112 );
113 }
114
115 #[test]
116 fn test_multiple_numeric_literals() {
117 assert_eq!(
118 normalize_sql("INSERT INTO t VALUES (1, 2, 3)"),
119 "INSERT INTO t VALUES (?, ?, ?)"
120 );
121 }
122
123 #[test]
124 fn test_escaped_quote_in_string() {
125 assert_eq!(normalize_sql("WHERE name = 'O''Brien'"), "WHERE name = ?");
126 }
127
128 #[test]
129 fn test_no_literals_unchanged() {
130 let sql_with_placeholder = "SELECT col FROM t WHERE id = ?";
131 assert_eq!(normalize_sql(sql_with_placeholder), sql_with_placeholder);
132
133 let sql_plain = "SELECT col FROM t";
134 assert_eq!(normalize_sql(sql_plain), sql_plain);
135 }
136
137 #[test]
138 fn test_insert_multiple_columns_with_float() {
139 assert_eq!(
140 normalize_sql("INSERT INTO orders (id, name, amount) VALUES (100, 'test', 3.14)"),
141 "INSERT INTO orders (id, name, amount) VALUES (?, ?, ?)"
142 );
143 }
144
145 #[test]
146 fn test_identifier_with_digits_not_replaced() {
147 assert_eq!(
148 normalize_sql("SELECT col1, table2 FROM t WHERE id = 1"),
149 "SELECT col1, table2 FROM t WHERE id = ?"
150 );
151 }
152
153 #[test]
154 fn test_unclosed_string_does_not_panic() {
155 let result = normalize_sql("SELECT 'unclosed");
157 assert_eq!(result, "SELECT ?");
158 }
159}