heliosdb_proxy/cache/
normalizer.rs1use once_cell::sync::Lazy;
10use regex::Regex;
11use std::collections::hash_map::DefaultHasher;
12use std::hash::{Hash, Hasher};
13
14#[derive(Debug, Clone)]
16pub struct NormalizedQuery {
17 pub fingerprint: String,
19
20 pub hash: u64,
22
23 pub tables: Vec<String>,
25
26 pub parameters: Vec<String>,
28}
29
30impl NormalizedQuery {
31 pub fn fingerprint(&self) -> &str {
33 &self.fingerprint
34 }
35
36 pub fn hash(&self) -> u64 {
38 self.hash
39 }
40
41 pub fn tables(&self) -> &[String] {
43 &self.tables
44 }
45}
46
47#[derive(Debug, Clone)]
49pub struct QueryNormalizer {
50 preserve_order: bool,
52}
53
54static STRING_LITERAL: Lazy<Regex> = Lazy::new(|| Regex::new(r#"'(?:[^'\\]|\\.)*'"#).unwrap());
56
57#[allow(dead_code)]
58static DOUBLE_QUOTED: Lazy<Regex> = Lazy::new(|| Regex::new(r#""(?:[^"\\]|\\.)*""#).unwrap());
59
60static NUMBER_LITERAL: Lazy<Regex> =
61 Lazy::new(|| Regex::new(r"\b\d+(?:\.\d+)?(?:e[+-]?\d+)?\b").unwrap());
62
63static WHITESPACE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\s+").unwrap());
64
65static TABLE_PATTERN: Lazy<Regex> = Lazy::new(|| {
66 Regex::new(r"(?i)(?:FROM|JOIN|INTO|UPDATE|TABLE)\s+([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)?)").unwrap()
67});
68
69static HINT_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"/\*[^*]*\*/").unwrap());
70
71static COMMENT_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"--[^\n]*").unwrap());
72
73impl QueryNormalizer {
74 pub fn new() -> Self {
76 Self {
77 preserve_order: true,
78 }
79 }
80
81 pub fn unordered() -> Self {
83 Self {
84 preserve_order: false,
85 }
86 }
87
88 pub fn normalize(&self, sql: &str) -> NormalizedQuery {
90 let mut parameters = Vec::new();
91
92 let sql = HINT_PATTERN.replace_all(sql, "");
94 let sql = COMMENT_PATTERN.replace_all(&sql, "");
95
96 let tables = self.extract_tables(&sql);
98
99 let sql = STRING_LITERAL.replace_all(&sql, |caps: ®ex::Captures| {
101 let value = caps.get(0).unwrap().as_str();
102 let inner = &value[1..value.len() - 1];
104 parameters.push(inner.to_string());
105 "?"
106 });
107
108 let sql = NUMBER_LITERAL.replace_all(&sql, |caps: ®ex::Captures| {
110 let value = caps.get(0).unwrap().as_str();
111 parameters.push(value.to_string());
112 "?"
113 });
114
115 let sql = WHITESPACE.replace_all(&sql, " ");
117
118 let fingerprint = sql.trim().to_uppercase();
120
121 let mut hasher = DefaultHasher::new();
123 fingerprint.hash(&mut hasher);
124 if self.preserve_order {
125 for param in ¶meters {
127 param.hash(&mut hasher);
128 }
129 }
130 let hash = hasher.finish();
131
132 NormalizedQuery {
133 fingerprint,
134 hash,
135 tables,
136 parameters,
137 }
138 }
139
140 fn extract_tables(&self, sql: &str) -> Vec<String> {
142 let mut tables = Vec::new();
143
144 for cap in TABLE_PATTERN.captures_iter(sql) {
145 if let Some(table_match) = cap.get(1) {
146 let table = table_match.as_str().to_lowercase();
147 let table_name = table.split('.').next_back().unwrap_or(&table);
149 if !tables.contains(&table_name.to_string()) {
150 tables.push(table_name.to_string());
151 }
152 }
153 }
154
155 tables
156 }
157
158 pub fn fingerprint(&self, sql: &str) -> String {
160 let sql = HINT_PATTERN.replace_all(sql, "");
162 let sql = COMMENT_PATTERN.replace_all(&sql, "");
163
164 let sql = STRING_LITERAL.replace_all(&sql, "?");
166 let sql = NUMBER_LITERAL.replace_all(&sql, "?");
167
168 let sql = WHITESPACE.replace_all(&sql, " ");
170
171 sql.trim().to_uppercase()
172 }
173
174 pub fn are_equivalent(&self, sql1: &str, sql2: &str) -> bool {
176 self.fingerprint(sql1) == self.fingerprint(sql2)
177 }
178}
179
180impl Default for QueryNormalizer {
181 fn default() -> Self {
182 Self::new()
183 }
184}
185
186pub fn quick_fingerprint(sql: &str) -> u64 {
188 let normalized = QueryNormalizer::new().fingerprint(sql);
189 let mut hasher = DefaultHasher::new();
190 normalized.hash(&mut hasher);
191 hasher.finish()
192}
193
194pub fn extract_tables(sql: &str) -> Vec<String> {
196 let normalizer = QueryNormalizer::new();
197 normalizer.extract_tables(sql)
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203
204 #[test]
205 fn test_normalize_simple_query() {
206 let normalizer = QueryNormalizer::new();
207 let query = "SELECT * FROM users WHERE id = 123";
208 let normalized = normalizer.normalize(query);
209
210 assert_eq!(normalized.fingerprint, "SELECT * FROM USERS WHERE ID = ?");
211 assert_eq!(normalized.parameters, vec!["123"]);
212 assert_eq!(normalized.tables, vec!["users"]);
213 }
214
215 #[test]
216 fn test_normalize_string_literals() {
217 let normalizer = QueryNormalizer::new();
218 let query = "SELECT * FROM users WHERE name = 'John Doe'";
219 let normalized = normalizer.normalize(query);
220
221 assert_eq!(normalized.fingerprint, "SELECT * FROM USERS WHERE NAME = ?");
222 assert_eq!(normalized.parameters, vec!["John Doe"]);
223 }
224
225 #[test]
226 fn test_normalize_multiple_parameters() {
227 let normalizer = QueryNormalizer::new();
228 let query = "SELECT * FROM users WHERE age > 18 AND status = 'active' AND score < 100";
229 let normalized = normalizer.normalize(query);
230
231 assert_eq!(
232 normalized.fingerprint,
233 "SELECT * FROM USERS WHERE AGE > ? AND STATUS = ? AND SCORE < ?"
234 );
235 assert_eq!(normalized.parameters.len(), 3);
236 }
237
238 #[test]
239 fn test_extract_tables_join() {
240 let normalizer = QueryNormalizer::new();
241 let query = "SELECT u.*, o.* FROM users u JOIN orders o ON u.id = o.user_id";
242 let normalized = normalizer.normalize(query);
243
244 assert!(normalized.tables.contains(&"users".to_string()));
245 assert!(normalized.tables.contains(&"orders".to_string()));
246 }
247
248 #[test]
249 fn test_normalize_removes_comments() {
250 let normalizer = QueryNormalizer::new();
251 let query = "/* helios:cache_ttl=60 */ SELECT * FROM users -- inline comment\nWHERE id = 1";
252 let normalized = normalizer.normalize(query);
253
254 assert_eq!(normalized.fingerprint, "SELECT * FROM USERS WHERE ID = ?");
255 }
256
257 #[test]
258 fn test_normalize_whitespace() {
259 let normalizer = QueryNormalizer::new();
260 let query1 = "SELECT * FROM users WHERE id=1";
261 let query2 = "SELECT * FROM users WHERE id=1";
262
263 assert_eq!(
264 normalizer.fingerprint(query1),
265 normalizer.fingerprint(query2)
266 );
267 }
268
269 #[test]
270 fn test_equivalent_queries() {
271 let normalizer = QueryNormalizer::new();
272
273 let query1 = "SELECT * FROM users WHERE id = 123";
275 let query2 = "SELECT * FROM users WHERE id = 456";
276
277 assert!(normalizer.are_equivalent(query1, query2));
278
279 let query3 = "SELECT * FROM users WHERE name = 'test'";
281 assert!(!normalizer.are_equivalent(query1, query3));
282 }
283
284 #[test]
285 fn test_hash_consistency() {
286 let normalizer = QueryNormalizer::new();
287
288 let query1 = "SELECT * FROM users WHERE id = 1";
289 let query2 = "SELECT * FROM users WHERE id = 1";
290
291 let norm1 = normalizer.normalize(query1);
292 let norm2 = normalizer.normalize(query2);
293
294 assert_eq!(norm1.hash, norm2.hash);
295 }
296
297 #[test]
298 fn test_hash_different_params() {
299 let normalizer = QueryNormalizer::new();
300
301 let query1 = "SELECT * FROM users WHERE id = 1";
303 let query2 = "SELECT * FROM users WHERE id = 2";
304
305 let norm1 = normalizer.normalize(query1);
306 let norm2 = normalizer.normalize(query2);
307
308 assert_ne!(norm1.hash, norm2.hash);
309 }
310
311 #[test]
312 fn test_unordered_normalizer() {
313 let normalizer = QueryNormalizer::unordered();
314
315 let query1 = "SELECT * FROM users WHERE id = 1";
317 let query2 = "SELECT * FROM users WHERE id = 2";
318
319 let norm1 = normalizer.normalize(query1);
320 let norm2 = normalizer.normalize(query2);
321
322 assert_eq!(norm1.fingerprint, norm2.fingerprint);
324 }
325
326 #[test]
327 fn test_extract_tables_various() {
328 let normalizer = QueryNormalizer::new();
329
330 let queries = vec![
331 ("INSERT INTO users VALUES (1)", vec!["users"]),
332 ("UPDATE products SET price = 10", vec!["products"]),
333 ("DELETE FROM orders WHERE id = 1", vec!["orders"]),
334 ("SELECT * FROM schema.table", vec!["table"]),
335 ("TABLE users", vec!["users"]),
336 ];
337
338 for (sql, expected_tables) in queries {
339 let normalized = normalizer.normalize(sql);
340 for table in expected_tables {
341 assert!(
342 normalized.tables.contains(&table.to_string()),
343 "Query '{}' should contain table '{}'",
344 sql,
345 table
346 );
347 }
348 }
349 }
350
351 #[test]
352 fn test_decimal_numbers() {
353 let normalizer = QueryNormalizer::new();
354 let query = "SELECT * FROM products WHERE price < 99.99 AND rating > 4.5";
355 let normalized = normalizer.normalize(query);
356
357 assert!(normalized.parameters.contains(&"99.99".to_string()));
358 assert!(normalized.parameters.contains(&"4.5".to_string()));
359 }
360
361 #[test]
362 fn test_scientific_notation() {
363 let normalizer = QueryNormalizer::new();
364 let query = "SELECT * FROM data WHERE value = 1e10";
365 let normalized = normalizer.normalize(query);
366
367 assert!(normalized.fingerprint.contains("VALUE = ?"));
368 }
369
370 #[test]
371 fn test_quick_fingerprint() {
372 let hash1 = quick_fingerprint("SELECT * FROM users WHERE id = 1");
373 let hash2 = quick_fingerprint("SELECT * FROM users WHERE id = 2");
374
375 assert_eq!(hash1, hash2);
377 }
378}