heliosdb_proxy/analytics/
fingerprinter.rs1use std::collections::HashSet;
6use std::hash::{Hash, Hasher};
7
8use regex::Regex;
9
10#[derive(Debug)]
12pub struct QueryFingerprinter {
13 string_literal_re: Regex,
15 numeric_literal_re: Regex,
17 in_list_re: Regex,
19 whitespace_re: Regex,
21 uuid_re: Regex,
23 hex_re: Regex,
25}
26
27impl QueryFingerprinter {
28 pub fn new() -> Self {
30 Self {
31 string_literal_re: Regex::new(r"'[^']*'").expect("Invalid regex"),
32 numeric_literal_re: Regex::new(r"\b\d+(\.\d+)?\b").expect("Invalid regex"),
33 in_list_re: Regex::new(r"(?i)IN\s*\([^)]+\)").expect("Invalid regex"),
34 whitespace_re: Regex::new(r"\s+").expect("Invalid regex"),
35 uuid_re: Regex::new(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}").expect("Invalid regex"),
36 hex_re: Regex::new(r"0x[0-9a-fA-F]+").expect("Invalid regex"),
37 }
38 }
39
40 pub fn fingerprint(&self, query: &str) -> QueryFingerprint {
42 let normalized = self.normalize(query);
43 let hash = self.compute_hash(&normalized);
44
45 QueryFingerprint {
46 hash,
47 normalized: normalized.clone(),
48 tables: self.extract_tables(query),
49 operation: self.detect_operation(query),
50 original_length: query.len(),
51 }
52 }
53
54 pub fn normalize(&self, query: &str) -> String {
56 let mut normalized = query.to_string();
57
58 normalized = self.uuid_re.replace_all(&normalized, "?").to_string();
60
61 normalized = self.hex_re.replace_all(&normalized, "?").to_string();
63
64 normalized = self.string_literal_re.replace_all(&normalized, "?").to_string();
66
67 normalized = self.numeric_literal_re.replace_all(&normalized, "?").to_string();
69
70 normalized = self.in_list_re.replace_all(&normalized, "IN (?)").to_string();
72
73 normalized = self.whitespace_re.replace_all(&normalized, " ").to_string();
75
76 normalized.trim().to_lowercase()
77 }
78
79 fn compute_hash(&self, normalized: &str) -> u64 {
81 use std::collections::hash_map::DefaultHasher;
82 let mut hasher = DefaultHasher::new();
83 normalized.hash(&mut hasher);
84 hasher.finish()
85 }
86
87 fn extract_tables(&self, query: &str) -> Vec<String> {
89 let query_upper = query.to_uppercase();
90 let mut tables = HashSet::new();
91
92 if let Some(from_pos) = query_upper.find("FROM") {
94 let after_from = &query[from_pos + 4..];
95 if let Some(table) = self.extract_first_identifier(after_from) {
96 tables.insert(table);
97 }
98 }
99
100 for keyword in ["JOIN", "INNER JOIN", "LEFT JOIN", "RIGHT JOIN", "OUTER JOIN"] {
102 let mut search_pos = 0;
103 while let Some(pos) = query_upper[search_pos..].find(keyword) {
104 let absolute_pos = search_pos + pos + keyword.len();
105 if absolute_pos < query.len() {
106 let after_join = &query[absolute_pos..];
107 if let Some(table) = self.extract_first_identifier(after_join) {
108 tables.insert(table);
109 }
110 }
111 search_pos = absolute_pos;
112 }
113 }
114
115 if let Some(pos) = query_upper.find("INSERT INTO") {
117 let after_insert = &query[pos + 11..];
118 if let Some(table) = self.extract_first_identifier(after_insert) {
119 tables.insert(table);
120 }
121 }
122
123 if let Some(pos) = query_upper.find("UPDATE") {
125 let after_update = &query[pos + 6..];
126 if let Some(table) = self.extract_first_identifier(after_update) {
127 tables.insert(table);
128 }
129 }
130
131 if let Some(pos) = query_upper.find("DELETE FROM") {
133 let after_delete = &query[pos + 11..];
134 if let Some(table) = self.extract_first_identifier(after_delete) {
135 tables.insert(table);
136 }
137 }
138
139 tables.into_iter().collect()
140 }
141
142 fn extract_first_identifier(&self, s: &str) -> Option<String> {
144 let trimmed = s.trim();
145 let mut chars = trimmed.chars().peekable();
146
147 while chars.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
149 chars.next();
150 }
151
152 let mut ident = String::new();
154 while let Some(&c) = chars.peek() {
155 if c.is_alphanumeric() || c == '_' || c == '.' || c == '"' {
156 ident.push(c);
157 chars.next();
158 } else {
159 break;
160 }
161 }
162
163 if ident.is_empty() {
164 None
165 } else {
166 let cleaned = ident.replace('"', "").to_lowercase();
168 Some(cleaned)
169 }
170 }
171
172 fn detect_operation(&self, query: &str) -> OperationType {
174 let trimmed = query.trim().to_uppercase();
175
176 if trimmed.starts_with("SELECT") {
177 OperationType::Select
178 } else if trimmed.starts_with("INSERT") {
179 OperationType::Insert
180 } else if trimmed.starts_with("UPDATE") {
181 OperationType::Update
182 } else if trimmed.starts_with("DELETE") {
183 OperationType::Delete
184 } else if trimmed.starts_with("CREATE") {
185 OperationType::Ddl
186 } else if trimmed.starts_with("ALTER") {
187 OperationType::Ddl
188 } else if trimmed.starts_with("DROP") {
189 OperationType::Ddl
190 } else if trimmed.starts_with("BEGIN") || trimmed.starts_with("START TRANSACTION") {
191 OperationType::Transaction
192 } else if trimmed.starts_with("COMMIT") || trimmed.starts_with("ROLLBACK") {
193 OperationType::Transaction
194 } else if trimmed.starts_with("SET") {
195 OperationType::Utility
196 } else if trimmed.starts_with("EXPLAIN") {
197 OperationType::Utility
198 } else if trimmed.starts_with("ANALYZE") {
199 OperationType::Utility
200 } else {
201 OperationType::Other
202 }
203 }
204}
205
206impl Default for QueryFingerprinter {
207 fn default() -> Self {
208 Self::new()
209 }
210}
211
212#[derive(Debug, Clone)]
214pub struct QueryFingerprint {
215 pub hash: u64,
217
218 pub normalized: String,
220
221 pub tables: Vec<String>,
223
224 pub operation: OperationType,
226
227 pub original_length: usize,
229}
230
231impl QueryFingerprint {
232 pub fn short_id(&self) -> String {
234 format!("{:016x}", self.hash)
235 }
236}
237
238#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
240pub enum OperationType {
241 Select,
242 Insert,
243 Update,
244 Delete,
245 Ddl,
246 Transaction,
247 Utility,
248 Other,
249}
250
251impl std::fmt::Display for OperationType {
252 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
253 match self {
254 OperationType::Select => write!(f, "SELECT"),
255 OperationType::Insert => write!(f, "INSERT"),
256 OperationType::Update => write!(f, "UPDATE"),
257 OperationType::Delete => write!(f, "DELETE"),
258 OperationType::Ddl => write!(f, "DDL"),
259 OperationType::Transaction => write!(f, "TRANSACTION"),
260 OperationType::Utility => write!(f, "UTILITY"),
261 OperationType::Other => write!(f, "OTHER"),
262 }
263 }
264}
265
266#[cfg(test)]
267mod tests {
268 use super::*;
269
270 #[test]
271 fn test_fingerprinter_new() {
272 let fp = QueryFingerprinter::new();
273 assert!(fp.string_literal_re.is_match("'hello'"));
274 }
275
276 #[test]
277 fn test_normalize_string_literals() {
278 let fp = QueryFingerprinter::new();
279
280 let normalized = fp.normalize("SELECT * FROM users WHERE name = 'Alice'");
281 assert_eq!(normalized, "select * from users where name = ?");
282 }
283
284 #[test]
285 fn test_normalize_numeric_literals() {
286 let fp = QueryFingerprinter::new();
287
288 let normalized = fp.normalize("SELECT * FROM users WHERE id = 123 AND age > 25");
289 assert_eq!(normalized, "select * from users where id = ? and age > ?");
290 }
291
292 #[test]
293 fn test_normalize_in_list() {
294 let fp = QueryFingerprinter::new();
295
296 let normalized = fp.normalize("SELECT * FROM users WHERE id IN (1, 2, 3, 4, 5)");
297 assert_eq!(normalized, "select * from users where id in (?)");
298 }
299
300 #[test]
301 fn test_normalize_uuid() {
302 let fp = QueryFingerprinter::new();
303
304 let normalized = fp.normalize("SELECT * FROM users WHERE id = 'a1b2c3d4-e5f6-7890-abcd-ef1234567890'");
305 assert!(normalized.contains("?"));
306 }
307
308 #[test]
309 fn test_same_fingerprint_different_values() {
310 let fp = QueryFingerprinter::new();
311
312 let fp1 = fp.fingerprint("SELECT * FROM users WHERE id = 1");
313 let fp2 = fp.fingerprint("SELECT * FROM users WHERE id = 2");
314
315 assert_eq!(fp1.hash, fp2.hash);
316 assert_eq!(fp1.normalized, fp2.normalized);
317 }
318
319 #[test]
320 fn test_different_fingerprint_different_queries() {
321 let fp = QueryFingerprinter::new();
322
323 let fp1 = fp.fingerprint("SELECT * FROM users WHERE id = 1");
324 let fp2 = fp.fingerprint("SELECT * FROM orders WHERE id = 1");
325
326 assert_ne!(fp1.hash, fp2.hash);
327 }
328
329 #[test]
330 fn test_extract_tables() {
331 let fp = QueryFingerprinter::new();
332
333 let result = fp.fingerprint("SELECT * FROM users WHERE id = 1");
334 assert!(result.tables.contains(&"users".to_string()));
335
336 let result = fp.fingerprint("SELECT * FROM users u JOIN orders o ON u.id = o.user_id");
337 assert!(result.tables.contains(&"users".to_string()));
338 assert!(result.tables.contains(&"orders".to_string()));
339 }
340
341 #[test]
342 fn test_detect_operation() {
343 let fp = QueryFingerprinter::new();
344
345 assert_eq!(fp.detect_operation("SELECT * FROM users"), OperationType::Select);
346 assert_eq!(fp.detect_operation("INSERT INTO users VALUES (1)"), OperationType::Insert);
347 assert_eq!(fp.detect_operation("UPDATE users SET name = 'Bob'"), OperationType::Update);
348 assert_eq!(fp.detect_operation("DELETE FROM users WHERE id = 1"), OperationType::Delete);
349 assert_eq!(fp.detect_operation("CREATE TABLE foo (id INT)"), OperationType::Ddl);
350 assert_eq!(fp.detect_operation("BEGIN"), OperationType::Transaction);
351 }
352
353 #[test]
354 fn test_operation_display() {
355 assert_eq!(OperationType::Select.to_string(), "SELECT");
356 assert_eq!(OperationType::Insert.to_string(), "INSERT");
357 }
358}