1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4 Assignment, AssignmentTarget, ConflictTarget, Delete, Distinct, DoUpdate, Expr, GroupByExpr,
5 Ident, Insert, JoinConstraint, JoinOperator, LimitClause, ObjectName, ObjectNamePart, Offset,
6 OnConflict, OnConflictAction, OnInsert, OrderBy, OrderByKind, Query, SelectItem, SetExpr,
7 Statement, Value, ValueWithSpan, VisitMut, VisitorMut,
8};
9use sqlparser::dialect::{Dialect, GenericDialect};
10use sqlparser::parser::Parser;
11use sqlparser::tokenizer::Span;
12use std::collections::HashMap;
13use std::ops::ControlFlow;
14
15pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
27 fingerprint_many(vec![input], dialect).join(" ")
28}
29
30pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
43 let dialect = dialect.unwrap_or(&GenericDialect {});
44
45 let mut visitor = FingerprintingVisitor::new();
46
47 input
48 .iter()
49 .map(|sql| match Parser::parse_sql(dialect, sql) {
50 Ok(mut ast) => {
51 for stmt in &mut ast {
52 let _ = stmt.visit(&mut visitor);
53 }
54
55 ast.into_iter()
56 .map(|stmt| stmt.to_string())
57 .collect::<Vec<_>>()
58 .join(" ")
59 }
60 Err(_) => sql.to_string(),
61 })
62 .collect()
63}
64
65struct FingerprintingVisitor {
66 savepoint_ids: HashMap<String, String>,
67}
68
69impl FingerprintingVisitor {
70 fn new() -> Self {
71 FingerprintingVisitor {
72 savepoint_ids: HashMap::new(),
73 }
74 }
75
76 fn visit_select(&mut self, select: &mut sqlparser::ast::Select) {
77 if !select.projection.is_empty() {
78 if let Some(item) = select.projection.first_mut() {
79 match item {
80 SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
81 *item = SelectItem::UnnamedExpr(placeholder_value());
82 }
83 _ => {}
84 }
85 }
86 select.projection.truncate(1);
87 }
88
89 if let Some(Distinct::On(exprs)) = &mut select.distinct {
90 if !exprs.is_empty() {
91 *exprs = vec![placeholder_value()];
92 }
93 };
94
95 for table_with_joins in &mut select.from {
96 for join in &mut table_with_joins.joins {
97 match &mut join.join_operator {
98 JoinOperator::Join(constraint)
99 | JoinOperator::Inner(constraint)
100 | JoinOperator::Left(constraint)
101 | JoinOperator::LeftOuter(constraint)
102 | JoinOperator::Right(constraint)
103 | JoinOperator::RightOuter(constraint)
104 | JoinOperator::FullOuter(constraint)
105 | JoinOperator::Semi(constraint)
106 | JoinOperator::LeftSemi(constraint)
107 | JoinOperator::RightSemi(constraint)
108 | JoinOperator::Anti(constraint)
109 | JoinOperator::LeftAnti(constraint)
110 | JoinOperator::RightAnti(constraint) => {
111 if let JoinConstraint::On(expr) = constraint {
112 *expr = placeholder_value();
113 }
114 }
115 _ => {}
116 }
117 }
118 }
119
120 if let Some(selection) = &mut select.selection {
121 *selection = placeholder_value();
122 }
123
124 if let GroupByExpr::Expressions(col_names, ..) = &mut select.group_by {
125 if !col_names.is_empty() {
126 *col_names = vec![placeholder_value()];
127 }
128 }
129 }
130}
131
132impl VisitorMut for FingerprintingVisitor {
133 type Break = ();
134
135 fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
136 match stmt {
137 Statement::Savepoint { name } => {
138 let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
139 self.savepoint_ids
140 .insert(name.value.clone(), savepoint_id.clone());
141 *name = Ident::new(savepoint_id);
142 }
143 Statement::ReleaseSavepoint { name } => {
144 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
145 *name = Ident::new(savepoint_id);
146 }
147 }
148 Statement::Rollback {
149 savepoint: Some(name),
150 ..
151 } => {
152 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
153 *name = Ident::new(savepoint_id);
154 }
155 }
156 Statement::Declare { stmts } => {
157 for stmt in stmts {
158 if !stmt.names.is_empty() {
159 stmt.names = vec![Ident::new("...")];
160 }
161 }
162 }
163 Statement::Insert(Insert {
164 columns,
165 source,
166 on,
167 returning,
168 ..
169 }) => {
170 if !columns.is_empty() {
171 *columns = vec![Ident::new("...")];
172 }
173 if let Some(source) = source {
174 if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
175 values.rows = vec![vec![placeholder_value()]];
176 }
177 }
178 if let Some(OnInsert::OnConflict(OnConflict {
179 conflict_target,
180 action,
181 })) = on
182 {
183 if let Some(ConflictTarget::Columns(columns)) = conflict_target {
184 if !columns.is_empty() {
185 *columns = vec![Ident::new("...")];
186 }
187 }
188 if let OnConflictAction::DoUpdate(DoUpdate {
189 assignments,
190 selection,
191 }) = action
192 {
193 if !assignments.is_empty() {
194 *assignments = vec![Assignment {
195 target: AssignmentTarget::ColumnName(ObjectName(vec![
196 ObjectNamePart::Identifier(Ident::new("...")),
197 ])),
198 value: placeholder_value(),
199 }];
200 }
201 if let Some(selection) = selection {
202 *selection = placeholder_value();
203 }
204 }
205 }
206 if let Some(returning) = returning {
207 if !returning.is_empty() {
208 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
209 }
210 }
211 }
212 Statement::Update {
213 assignments,
214 selection,
215 returning,
216 ..
217 } => {
218 if !assignments.is_empty() {
219 *assignments = vec![sqlparser::ast::Assignment {
220 target: AssignmentTarget::ColumnName(ObjectName(vec![
221 ObjectNamePart::Identifier(Ident::new("...")),
222 ])),
223 value: placeholder_value(),
224 }];
225 }
226 if let Some(selection) = selection {
227 *selection = placeholder_value();
228 }
229 if let Some(returning) = returning {
230 if !returning.is_empty() {
231 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
232 }
233 }
234 }
235 Statement::Delete(Delete {
236 selection,
237 returning,
238 ..
239 }) => {
240 if let Some(selection) = selection {
241 *selection = placeholder_value();
242 }
243 if let Some(returning) = returning {
244 if !returning.is_empty() {
245 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
246 }
247 }
248 }
249 _ => {}
250 }
251 ControlFlow::Continue(())
252 }
253
254 fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
255 match query.body.as_mut() {
256 SetExpr::Select(select) => {
257 self.visit_select(select);
258 }
259 SetExpr::SetOperation { left, right, .. } => {
260 let mut stack = vec![left.as_mut(), right.as_mut()];
263 while let Some(set_expr) = stack.pop() {
264 match set_expr {
265 SetExpr::Select(select) => {
266 self.visit_select(select);
267 }
268 SetExpr::SetOperation { left, right, .. } => {
269 stack.push(left.as_mut());
271 stack.push(right.as_mut());
272 }
273 _ => {}
274 }
275 }
276 }
277 _ => {}
278 }
279 if let Some(order_by) = &mut query.order_by {
280 let OrderBy { kind, .. } = order_by;
281 if let OrderByKind::Expressions(expressions) = kind {
282 if !expressions.is_empty() {
283 if let Some(expr) = expressions.first_mut() {
284 expr.expr = placeholder_value();
285 }
286 expressions.truncate(1);
287 }
288 }
289 }
290 if let Some(limit_clause) = &mut query.limit_clause {
291 match limit_clause {
292 LimitClause::LimitOffset {
293 limit,
294 offset,
295 limit_by,
296 } => {
297 if let Some(limit_value) = limit {
298 *limit_value = placeholder_value();
299 }
300 if let Some(Offset { value, .. }) = offset {
301 *value = placeholder_value();
302 }
303 if !limit_by.is_empty() {
304 *limit_by = vec![placeholder_value()];
305 }
306 }
307 LimitClause::OffsetCommaLimit { offset, limit } => {
309 *offset = placeholder_value();
310 *limit = placeholder_value();
311 }
312 }
313 }
314 ControlFlow::Continue(())
315 }
316
317 fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
318 for part in _relation.0.iter_mut() {
319 match part {
320 ObjectNamePart::Identifier(ident) => {
321 maybe_unquote_ident(ident);
322 }
323 }
324 }
325 ControlFlow::Continue(())
326 }
327
328 fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
329 match _expr {
330 Expr::Identifier(ident) => {
331 maybe_unquote_ident(ident);
332 }
333 Expr::CompoundIdentifier(idents) => {
334 for ident in idents {
335 maybe_unquote_ident(ident);
336 }
337 }
338 _ => {}
339 }
340 ControlFlow::Continue(())
341 }
342}
343
344fn placeholder_value() -> Expr {
345 Expr::Value(ValueWithSpan {
346 value: Value::Placeholder("...".to_string()),
347 span: Span::empty(),
348 })
349}
350
351fn maybe_unquote_ident(ident: &mut Ident) {
352 let Ident {
353 value, quote_style, ..
354 } = ident;
355
356 if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
357 *quote_style = None;
358 }
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364
365 #[test]
366 fn test_fingerprint_one() {
367 let result = fingerprint_one("SELECT 123", None);
368 assert_eq!(result, "SELECT ...");
369 }
370
371 #[test]
372 fn test_empty() {
373 let result = fingerprint_many(vec![""], None);
374 assert_eq!(result, vec![""]);
375 }
376
377 #[test]
378 fn test_unparsable() {
379 let result = fingerprint_many(vec!["SELECT SELECT SELECT SELECT"], None);
380 assert_eq!(result, vec!["SELECT SELECT SELECT SELECT"]);
381 }
382
383 #[test]
384 fn test_comments_dropped() {
385 let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
386 assert_eq!(result, vec!["SELECT ..."]);
387 }
388
389 #[test]
390 fn test_savepoint() {
391 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
392 assert_eq!(result, vec!["SAVEPOINT s1"]);
393 }
394
395 #[test]
396 fn test_multiple_savepoints() {
397 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
398 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
399 }
400
401 #[test]
402 fn test_duplicate_savepoints() {
403 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
404 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
405 }
406
407 #[test]
408 fn test_release_savepoints() {
409 let result = fingerprint_many(
410 vec![
411 "SAVEPOINT \"s1234\"",
412 "RELEASE SAVEPOINT \"s1234\"",
413 "SAVEPOINT \"s2345\"",
414 "RELEASE SAVEPOINT \"s2345\"",
415 ],
416 None,
417 );
418 assert_eq!(
419 result,
420 vec![
421 "SAVEPOINT s1",
422 "RELEASE SAVEPOINT s1",
423 "SAVEPOINT s2",
424 "RELEASE SAVEPOINT s2"
425 ]
426 );
427 }
428
429 #[test]
430 fn test_rollback_savepoint() {
431 let result = fingerprint_many(
432 vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
433 None,
434 );
435 assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
436 }
437
438 #[test]
439 fn test_select() {
440 let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
441 assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
442 }
443
444 #[test]
445 fn test_select_single_value() {
446 let result = fingerprint_many(vec!["SELECT 1"], None);
447 assert_eq!(result, vec!["SELECT ..."]);
448 }
449
450 #[test]
451 fn test_select_distinct_on() {
452 let result = fingerprint_many(vec!["SELECT DISTINCT ON (a, b) c FROM d"], None);
453 assert_eq!(result, vec!["SELECT DISTINCT ON (...) ... FROM d"]);
454 }
455
456 #[test]
457 fn test_select_with_from_quoted() {
458 let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
459 assert_eq!(result, vec!["SELECT ... FROM c.d"]);
460 }
461
462 #[test]
463 fn test_select_with_from_join() {
464 let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
465 assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
466 }
467
468 #[test]
469 fn test_select_with_from_inner_join_quoted() {
470 let result = fingerprint_many(
471 vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
472 None,
473 );
474 assert_eq!(result, vec!["SELECT ... FROM c INNER JOIN d ON ..."]);
475 }
476
477 #[test]
478 fn test_select_with_from_left_outer_join_quoted() {
479 let result = fingerprint_many(
480 vec!["SELECT a, b FROM c LEFT OUTER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
481 None,
482 );
483 assert_eq!(result, vec!["SELECT ... FROM c LEFT OUTER JOIN d ON ..."]);
484 }
485
486 #[test]
487 fn test_select_with_group_by() {
488 let result = fingerprint_many(vec!["SELECT a, b FROM c GROUP BY a, b"], None);
489 assert_eq!(result, vec!["SELECT ... FROM c GROUP BY ..."]);
490 }
491
492 #[test]
493 fn test_select_with_order_by() {
494 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
495 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
496 }
497
498 #[test]
499 fn test_select_with_order_by_more() {
500 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
501 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
502 }
503
504 #[test]
505 fn test_select_with_limit_offset() {
506 let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
507 assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
508 }
509
510 #[test]
511 fn test_clickhouse_select_with_limit_by() {
512 let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 BY c"], None);
513 assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... BY ..."]);
514 }
515
516 #[test]
517 fn test_mysql_select_with_limit_comma() {
518 let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21, 101"], None);
519 assert_eq!(result, vec!["SELECT ... FROM b LIMIT ..., ..."]);
520 }
521
522 #[test]
523 fn test_select_union() {
524 let result = fingerprint_many(vec!["SELECT a, b FROM c UNION SELECT a, b FROM d"], None);
525 assert_eq!(result, vec!["SELECT ... FROM c UNION SELECT ... FROM d"]);
526 }
527
528 #[test]
529 fn test_select_union_parenthesized() {
530 let result = fingerprint_many(
531 vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
532 None,
533 );
534 assert_eq!(
535 result,
536 vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
537 );
538 }
539
540 #[test]
541 fn test_select_union_all() {
542 let result = fingerprint_many(
543 vec!["SELECT a, b FROM c UNION ALL SELECT a, b FROM d"],
544 None,
545 );
546 assert_eq!(
547 result,
548 vec!["SELECT ... FROM c UNION ALL SELECT ... FROM d"]
549 );
550 }
551
552 #[test]
553 fn test_select_union_all_parenthesized() {
554 let result = fingerprint_many(
555 vec!["(SELECT a, b FROM c) UNION ALL (SELECT a, b FROM d)"],
556 None,
557 );
558 assert_eq!(
559 result,
560 vec!["(SELECT ... FROM c) UNION ALL (SELECT ... FROM d)"]
561 );
562 }
563
564 #[test]
565 fn test_select_except() {
566 let result = fingerprint_many(vec!["SELECT a, b FROM c EXCEPT SELECT a, b FROM d"], None);
567 assert_eq!(result, vec!["SELECT ... FROM c EXCEPT SELECT ... FROM d"]);
568 }
569
570 #[test]
571 fn test_select_except_parenthesized() {
572 let result = fingerprint_many(
573 vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
574 None,
575 );
576 assert_eq!(
577 result,
578 vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
579 );
580 }
581
582 #[test]
583 fn test_select_intersect() {
584 let result = fingerprint_many(
585 vec!["SELECT a, b FROM c INTERSECT SELECT a, b FROM d"],
586 None,
587 );
588 assert_eq!(
589 result,
590 vec!["SELECT ... FROM c INTERSECT SELECT ... FROM d"]
591 );
592 }
593
594 #[test]
595 fn test_select_intersect_parenthesized() {
596 let result = fingerprint_many(
597 vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
598 None,
599 );
600 assert_eq!(
601 result,
602 vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
603 );
604 }
605
606 #[test]
607 fn test_select_union_triple() {
608 let result = fingerprint_many(
609 vec!["SELECT a, b FROM c UNION SELECT a, b FROM d UNION SELECT a, b FROM e"],
610 None,
611 );
612 assert_eq!(
613 result,
614 vec!["SELECT ... FROM c UNION SELECT ... FROM d UNION SELECT ... FROM e"]
615 );
616 }
617
618 #[test]
619 fn test_select_union_triple_parenthesized() {
620 let result = fingerprint_many(
621 vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d) UNION (SELECT a, b FROM e)"],
622 None,
623 );
624 assert_eq!(
625 result,
626 vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d) UNION (SELECT ... FROM e)"]
627 );
628 }
629
630 #[test]
631 fn test_with_recursive_select() {
632 let result = fingerprint_many(
633 vec!["WITH RECURSIVE t AS (SELECT a, b FROM c WHERE d = 12345) SELECT * FROM t"],
634 None,
635 );
636 assert_eq!(
637 result,
638 vec!["WITH RECURSIVE t AS (SELECT ... FROM c WHERE ...) SELECT * FROM t"]
639 );
640 }
641
642 #[test]
643 fn test_with_recursive_select_union() {
644 let result = fingerprint_many(
645 vec!["WITH RECURSIVE t AS (SELECT a FROM b UNION SELECT a FROM c) SELECT * FROM t"],
646 None,
647 );
648
649 assert_eq!(
650 result,
651 vec!["WITH RECURSIVE t AS (SELECT ... FROM b UNION SELECT ... FROM c) SELECT * FROM t"],
652 );
653 }
654
655 #[test]
656 fn test_declare_cursor() {
657 let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
658 assert_eq!(
659 result,
660 vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
661 );
662 }
663
664 #[test]
665 fn test_insert() {
666 let result = fingerprint_many(
667 vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
668 None,
669 );
670 assert_eq!(
671 result,
672 vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
673 );
674 }
675
676 #[test]
677 fn test_insert_select() {
678 let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
679 assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
680 }
681
682 #[test]
683 fn test_insert_on_conflict() {
684 let result = fingerprint_many(
685 vec![
686 "INSERT INTO a (b, c) VALUES (1, 2) ON CONFLICT(\"a\", \"b\") DO UPDATE SET \"d\" = EXCLUDED.d WHERE e = f RETURNING b, c",
687 ],
688 None,
689 );
690 assert_eq!(
691 result,
692 vec![
693 "INSERT INTO a (...) VALUES (...) ON CONFLICT(...) DO UPDATE SET ... = ... WHERE ... RETURNING ..."
694 ]
695 );
696 }
697
698 #[test]
699 fn test_update() {
700 let result = fingerprint_many(
701 vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
702 None,
703 );
704 assert_eq!(
705 result,
706 vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
707 );
708 }
709
710 #[test]
711 fn test_delete() {
712 let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
713 assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
714 }
715}