heliosdb_proxy/analytics/
fingerprinter.rs1use std::collections::HashSet;
6use std::hash::{Hash, Hasher};
7
8use regex::Regex;
9
10#[derive(Debug)]
12pub struct QueryFingerprinter {
13 string_literal_re: Regex,
15 numeric_literal_re: Regex,
17 in_list_re: Regex,
19 whitespace_re: Regex,
21 uuid_re: Regex,
23 hex_re: Regex,
25}
26
27impl QueryFingerprinter {
28 pub fn new() -> Self {
30 Self {
31 string_literal_re: Regex::new(r"'[^']*'").expect("Invalid regex"),
32 numeric_literal_re: Regex::new(r"\b\d+(\.\d+)?\b").expect("Invalid regex"),
33 in_list_re: Regex::new(r"(?i)IN\s*\([^)]+\)").expect("Invalid regex"),
34 whitespace_re: Regex::new(r"\s+").expect("Invalid regex"),
35 uuid_re: Regex::new(
36 r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}",
37 )
38 .expect("Invalid regex"),
39 hex_re: Regex::new(r"0x[0-9a-fA-F]+").expect("Invalid regex"),
40 }
41 }
42
43 pub fn fingerprint(&self, query: &str) -> QueryFingerprint {
45 let normalized = self.normalize(query);
46 let hash = self.compute_hash(&normalized);
47
48 QueryFingerprint {
49 hash,
50 normalized: normalized.clone(),
51 tables: self.extract_tables(query),
52 operation: self.detect_operation(query),
53 original_length: query.len(),
54 }
55 }
56
57 pub fn normalize(&self, query: &str) -> String {
59 let mut normalized = query.to_string();
60
61 normalized = self.uuid_re.replace_all(&normalized, "?").to_string();
63
64 normalized = self.hex_re.replace_all(&normalized, "?").to_string();
66
67 normalized = self
69 .string_literal_re
70 .replace_all(&normalized, "?")
71 .to_string();
72
73 normalized = self
75 .numeric_literal_re
76 .replace_all(&normalized, "?")
77 .to_string();
78
79 normalized = self
81 .in_list_re
82 .replace_all(&normalized, "IN (?)")
83 .to_string();
84
85 normalized = self.whitespace_re.replace_all(&normalized, " ").to_string();
87
88 normalized.trim().to_lowercase()
89 }
90
91 fn compute_hash(&self, normalized: &str) -> u64 {
93 use std::collections::hash_map::DefaultHasher;
94 let mut hasher = DefaultHasher::new();
95 normalized.hash(&mut hasher);
96 hasher.finish()
97 }
98
99 fn extract_tables(&self, query: &str) -> Vec<String> {
101 let query_upper = query.to_uppercase();
102 let mut tables = HashSet::new();
103
104 if let Some(from_pos) = query_upper.find("FROM") {
106 let after_from = &query[from_pos + 4..];
107 if let Some(table) = self.extract_first_identifier(after_from) {
108 tables.insert(table);
109 }
110 }
111
112 for keyword in [
114 "JOIN",
115 "INNER JOIN",
116 "LEFT JOIN",
117 "RIGHT JOIN",
118 "OUTER JOIN",
119 ] {
120 let mut search_pos = 0;
121 while let Some(pos) = query_upper[search_pos..].find(keyword) {
122 let absolute_pos = search_pos + pos + keyword.len();
123 if absolute_pos < query.len() {
124 let after_join = &query[absolute_pos..];
125 if let Some(table) = self.extract_first_identifier(after_join) {
126 tables.insert(table);
127 }
128 }
129 search_pos = absolute_pos;
130 }
131 }
132
133 if let Some(pos) = query_upper.find("INSERT INTO") {
135 let after_insert = &query[pos + 11..];
136 if let Some(table) = self.extract_first_identifier(after_insert) {
137 tables.insert(table);
138 }
139 }
140
141 if let Some(pos) = query_upper.find("UPDATE") {
143 let after_update = &query[pos + 6..];
144 if let Some(table) = self.extract_first_identifier(after_update) {
145 tables.insert(table);
146 }
147 }
148
149 if let Some(pos) = query_upper.find("DELETE FROM") {
151 let after_delete = &query[pos + 11..];
152 if let Some(table) = self.extract_first_identifier(after_delete) {
153 tables.insert(table);
154 }
155 }
156
157 tables.into_iter().collect()
158 }
159
160 fn extract_first_identifier(&self, s: &str) -> Option<String> {
162 let trimmed = s.trim();
163 let mut chars = trimmed.chars().peekable();
164
165 while chars.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
167 chars.next();
168 }
169
170 let mut ident = String::new();
172 while let Some(&c) = chars.peek() {
173 if c.is_alphanumeric() || c == '_' || c == '.' || c == '"' {
174 ident.push(c);
175 chars.next();
176 } else {
177 break;
178 }
179 }
180
181 if ident.is_empty() {
182 None
183 } else {
184 let cleaned = ident.replace('"', "").to_lowercase();
186 Some(cleaned)
187 }
188 }
189
190 #[allow(clippy::if_same_then_else)]
192 fn detect_operation(&self, query: &str) -> OperationType {
193 let trimmed = query.trim().to_uppercase();
194
195 if trimmed.starts_with("SELECT") {
196 OperationType::Select
197 } else if trimmed.starts_with("INSERT") {
198 OperationType::Insert
199 } else if trimmed.starts_with("UPDATE") {
200 OperationType::Update
201 } else if trimmed.starts_with("DELETE") {
202 OperationType::Delete
203 } else if trimmed.starts_with("CREATE") {
204 OperationType::Ddl
205 } else if trimmed.starts_with("ALTER") {
206 OperationType::Ddl
207 } else if trimmed.starts_with("DROP") {
208 OperationType::Ddl
209 } else if trimmed.starts_with("BEGIN") || trimmed.starts_with("START TRANSACTION") {
210 OperationType::Transaction
211 } else if trimmed.starts_with("COMMIT") || trimmed.starts_with("ROLLBACK") {
212 OperationType::Transaction
213 } else if trimmed.starts_with("SET") {
214 OperationType::Utility
215 } else if trimmed.starts_with("EXPLAIN") {
216 OperationType::Utility
217 } else if trimmed.starts_with("ANALYZE") {
218 OperationType::Utility
219 } else {
220 OperationType::Other
221 }
222 }
223}
224
225impl Default for QueryFingerprinter {
226 fn default() -> Self {
227 Self::new()
228 }
229}
230
231#[derive(Debug, Clone)]
233pub struct QueryFingerprint {
234 pub hash: u64,
236
237 pub normalized: String,
239
240 pub tables: Vec<String>,
242
243 pub operation: OperationType,
245
246 pub original_length: usize,
248}
249
250impl QueryFingerprint {
251 pub fn short_id(&self) -> String {
253 format!("{:016x}", self.hash)
254 }
255}
256
257#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
259pub enum OperationType {
260 Select,
261 Insert,
262 Update,
263 Delete,
264 Ddl,
265 Transaction,
266 Utility,
267 Other,
268}
269
270impl std::fmt::Display for OperationType {
271 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
272 match self {
273 OperationType::Select => write!(f, "SELECT"),
274 OperationType::Insert => write!(f, "INSERT"),
275 OperationType::Update => write!(f, "UPDATE"),
276 OperationType::Delete => write!(f, "DELETE"),
277 OperationType::Ddl => write!(f, "DDL"),
278 OperationType::Transaction => write!(f, "TRANSACTION"),
279 OperationType::Utility => write!(f, "UTILITY"),
280 OperationType::Other => write!(f, "OTHER"),
281 }
282 }
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn test_fingerprinter_new() {
291 let fp = QueryFingerprinter::new();
292 assert!(fp.string_literal_re.is_match("'hello'"));
293 }
294
295 #[test]
296 fn test_normalize_string_literals() {
297 let fp = QueryFingerprinter::new();
298
299 let normalized = fp.normalize("SELECT * FROM users WHERE name = 'Alice'");
300 assert_eq!(normalized, "select * from users where name = ?");
301 }
302
303 #[test]
304 fn test_normalize_numeric_literals() {
305 let fp = QueryFingerprinter::new();
306
307 let normalized = fp.normalize("SELECT * FROM users WHERE id = 123 AND age > 25");
308 assert_eq!(normalized, "select * from users where id = ? and age > ?");
309 }
310
311 #[test]
312 fn test_normalize_in_list() {
313 let fp = QueryFingerprinter::new();
314
315 let normalized = fp.normalize("SELECT * FROM users WHERE id IN (1, 2, 3, 4, 5)");
316 assert_eq!(normalized, "select * from users where id in (?)");
317 }
318
319 #[test]
320 fn test_normalize_uuid() {
321 let fp = QueryFingerprinter::new();
322
323 let normalized =
324 fp.normalize("SELECT * FROM users WHERE id = 'a1b2c3d4-e5f6-7890-abcd-ef1234567890'");
325 assert!(normalized.contains("?"));
326 }
327
328 #[test]
329 fn test_same_fingerprint_different_values() {
330 let fp = QueryFingerprinter::new();
331
332 let fp1 = fp.fingerprint("SELECT * FROM users WHERE id = 1");
333 let fp2 = fp.fingerprint("SELECT * FROM users WHERE id = 2");
334
335 assert_eq!(fp1.hash, fp2.hash);
336 assert_eq!(fp1.normalized, fp2.normalized);
337 }
338
339 #[test]
340 fn test_different_fingerprint_different_queries() {
341 let fp = QueryFingerprinter::new();
342
343 let fp1 = fp.fingerprint("SELECT * FROM users WHERE id = 1");
344 let fp2 = fp.fingerprint("SELECT * FROM orders WHERE id = 1");
345
346 assert_ne!(fp1.hash, fp2.hash);
347 }
348
349 #[test]
350 fn test_extract_tables() {
351 let fp = QueryFingerprinter::new();
352
353 let result = fp.fingerprint("SELECT * FROM users WHERE id = 1");
354 assert!(result.tables.contains(&"users".to_string()));
355
356 let result = fp.fingerprint("SELECT * FROM users u JOIN orders o ON u.id = o.user_id");
357 assert!(result.tables.contains(&"users".to_string()));
358 assert!(result.tables.contains(&"orders".to_string()));
359 }
360
361 #[test]
362 fn test_detect_operation() {
363 let fp = QueryFingerprinter::new();
364
365 assert_eq!(
366 fp.detect_operation("SELECT * FROM users"),
367 OperationType::Select
368 );
369 assert_eq!(
370 fp.detect_operation("INSERT INTO users VALUES (1)"),
371 OperationType::Insert
372 );
373 assert_eq!(
374 fp.detect_operation("UPDATE users SET name = 'Bob'"),
375 OperationType::Update
376 );
377 assert_eq!(
378 fp.detect_operation("DELETE FROM users WHERE id = 1"),
379 OperationType::Delete
380 );
381 assert_eq!(
382 fp.detect_operation("CREATE TABLE foo (id INT)"),
383 OperationType::Ddl
384 );
385 assert_eq!(fp.detect_operation("BEGIN"), OperationType::Transaction);
386 }
387
388 #[test]
389 fn test_operation_display() {
390 assert_eq!(OperationType::Select.to_string(), "SELECT");
391 assert_eq!(OperationType::Insert.to_string(), "INSERT");
392 }
393}