sql_fingerprint/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4    Assignment, AssignmentTarget, ConflictTarget, Delete, Distinct, DoUpdate, Expr, GroupByExpr,
5    Ident, Insert, JoinConstraint, JoinOperator, LimitClause, ObjectName, ObjectNamePart, Offset,
6    OnConflict, OnConflictAction, OnInsert, OrderBy, OrderByKind, Query, SelectItem, SetExpr,
7    Statement, Value, ValueWithSpan, VisitMut, VisitorMut,
8};
9use sqlparser::dialect::{Dialect, GenericDialect};
10use sqlparser::parser::Parser;
11use sqlparser::tokenizer::Span;
12use std::collections::HashMap;
13use std::ops::ControlFlow;
14
15/// Fingerprint a single SQL string.
16///
17/// Unparsable SQL is returned as-is.
18///
19/// # Example
20/// ```
21/// use sql_fingerprint::fingerprint_one;
22///
23/// let result = fingerprint_one("SELECT a, b FROM c ORDER BY b", None);
24/// assert_eq!(result, "SELECT ... FROM c ORDER BY ...");
25/// ```
26pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
27    fingerprint_many(vec![input], dialect).join(" ")
28}
29
30/// Fingerprint multiple SQL strings.
31/// Doing so for a batch of strings allows sharing some state, such as savepoint ID aliases.
32///
33/// Unparsable SQL is returned as-is.
34///
35/// # Example
36/// ```
37/// use sql_fingerprint::fingerprint_many;
38///
39/// let result = fingerprint_many(vec!["SELECT a, b FROM c", "SELECT b, c FROM d"], None);
40/// assert_eq!(result, vec!["SELECT ... FROM c", "SELECT ... FROM d"]);
41/// ```
42pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
43    let dialect = dialect.unwrap_or(&GenericDialect {});
44
45    let mut visitor = FingerprintingVisitor::new();
46
47    input
48        .iter()
49        .map(|sql| match Parser::parse_sql(dialect, sql) {
50            Ok(mut ast) => {
51                for stmt in &mut ast {
52                    let _ = stmt.visit(&mut visitor);
53                }
54
55                ast.into_iter()
56                    .map(|stmt| stmt.to_string())
57                    .collect::<Vec<_>>()
58                    .join(" ")
59            }
60            Err(_) => sql.to_string(),
61        })
62        .collect()
63}
64
65struct FingerprintingVisitor {
66    savepoint_ids: HashMap<String, String>,
67}
68
69impl FingerprintingVisitor {
70    fn new() -> Self {
71        FingerprintingVisitor {
72            savepoint_ids: HashMap::new(),
73        }
74    }
75
76    fn visit_select(&mut self, select: &mut sqlparser::ast::Select) {
77        if !select.projection.is_empty() {
78            if let Some(item) = select.projection.first_mut() {
79                match item {
80                    SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
81                        *item = SelectItem::UnnamedExpr(placeholder_value());
82                    }
83                    _ => {}
84                }
85            }
86            select.projection.truncate(1);
87        }
88
89        if let Some(Distinct::On(exprs)) = &mut select.distinct {
90            if !exprs.is_empty() {
91                *exprs = vec![placeholder_value()];
92            }
93        };
94
95        for table_with_joins in &mut select.from {
96            for join in &mut table_with_joins.joins {
97                match &mut join.join_operator {
98                    JoinOperator::Join(constraint)
99                    | JoinOperator::Inner(constraint)
100                    | JoinOperator::Left(constraint)
101                    | JoinOperator::LeftOuter(constraint)
102                    | JoinOperator::Right(constraint)
103                    | JoinOperator::RightOuter(constraint)
104                    | JoinOperator::FullOuter(constraint)
105                    | JoinOperator::Semi(constraint)
106                    | JoinOperator::LeftSemi(constraint)
107                    | JoinOperator::RightSemi(constraint)
108                    | JoinOperator::Anti(constraint)
109                    | JoinOperator::LeftAnti(constraint)
110                    | JoinOperator::RightAnti(constraint) => {
111                        if let JoinConstraint::On(expr) = constraint {
112                            *expr = placeholder_value();
113                        }
114                    }
115                    _ => {}
116                }
117            }
118        }
119
120        if let Some(selection) = &mut select.selection {
121            *selection = placeholder_value();
122        }
123
124        if let GroupByExpr::Expressions(col_names, ..) = &mut select.group_by {
125            if !col_names.is_empty() {
126                *col_names = vec![placeholder_value()];
127            }
128        }
129    }
130}
131
132impl VisitorMut for FingerprintingVisitor {
133    type Break = ();
134
135    fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
136        match stmt {
137            Statement::Savepoint { name } => {
138                let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
139                self.savepoint_ids
140                    .insert(name.value.clone(), savepoint_id.clone());
141                *name = Ident::new(savepoint_id);
142            }
143            Statement::ReleaseSavepoint { name } => {
144                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
145                    *name = Ident::new(savepoint_id);
146                }
147            }
148            Statement::Rollback {
149                savepoint: Some(name),
150                ..
151            } => {
152                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
153                    *name = Ident::new(savepoint_id);
154                }
155            }
156            Statement::Declare { stmts } => {
157                for stmt in stmts {
158                    if !stmt.names.is_empty() {
159                        stmt.names = vec![Ident::new("...")];
160                    }
161                }
162            }
163            Statement::Insert(Insert {
164                columns,
165                source,
166                on,
167                returning,
168                ..
169            }) => {
170                if !columns.is_empty() {
171                    *columns = vec![Ident::new("...")];
172                }
173                if let Some(source) = source {
174                    if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
175                        values.rows = vec![vec![placeholder_value()]];
176                    }
177                }
178                if let Some(OnInsert::OnConflict(OnConflict {
179                    conflict_target,
180                    action,
181                })) = on
182                {
183                    if let Some(ConflictTarget::Columns(columns)) = conflict_target {
184                        if !columns.is_empty() {
185                            *columns = vec![Ident::new("...")];
186                        }
187                    }
188                    if let OnConflictAction::DoUpdate(DoUpdate {
189                        assignments,
190                        selection,
191                    }) = action
192                    {
193                        if !assignments.is_empty() {
194                            *assignments = vec![Assignment {
195                                target: AssignmentTarget::ColumnName(ObjectName(vec![
196                                    ObjectNamePart::Identifier(Ident::new("...")),
197                                ])),
198                                value: placeholder_value(),
199                            }];
200                        }
201                        if let Some(selection) = selection {
202                            *selection = placeholder_value();
203                        }
204                    }
205                }
206                if let Some(returning) = returning {
207                    if !returning.is_empty() {
208                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
209                    }
210                }
211            }
212            Statement::Update {
213                assignments,
214                selection,
215                returning,
216                ..
217            } => {
218                if !assignments.is_empty() {
219                    *assignments = vec![sqlparser::ast::Assignment {
220                        target: AssignmentTarget::ColumnName(ObjectName(vec![
221                            ObjectNamePart::Identifier(Ident::new("...")),
222                        ])),
223                        value: placeholder_value(),
224                    }];
225                }
226                if let Some(selection) = selection {
227                    *selection = placeholder_value();
228                }
229                if let Some(returning) = returning {
230                    if !returning.is_empty() {
231                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
232                    }
233                }
234            }
235            Statement::Delete(Delete {
236                selection,
237                returning,
238                ..
239            }) => {
240                if let Some(selection) = selection {
241                    *selection = placeholder_value();
242                }
243                if let Some(returning) = returning {
244                    if !returning.is_empty() {
245                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
246                    }
247                }
248            }
249            _ => {}
250        }
251        ControlFlow::Continue(())
252    }
253
254    fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
255        match query.body.as_mut() {
256            SetExpr::Select(select) => {
257                self.visit_select(select);
258            }
259            SetExpr::SetOperation { left, right, .. } => {
260                // push left and right into a double-ended queue to visit them,
261                // expnading left and right as required.
262                let mut stack = vec![left.as_mut(), right.as_mut()];
263                while let Some(set_expr) = stack.pop() {
264                    match set_expr {
265                        SetExpr::Select(select) => {
266                            self.visit_select(select);
267                        }
268                        SetExpr::SetOperation { left, right, .. } => {
269                            // Push left and right onto the stack for further processing.
270                            stack.push(left.as_mut());
271                            stack.push(right.as_mut());
272                        }
273                        _ => {}
274                    }
275                }
276            }
277            _ => {}
278        }
279        if let Some(order_by) = &mut query.order_by {
280            let OrderBy { kind, .. } = order_by;
281            if let OrderByKind::Expressions(expressions) = kind {
282                if !expressions.is_empty() {
283                    if let Some(expr) = expressions.first_mut() {
284                        expr.expr = placeholder_value();
285                    }
286                    expressions.truncate(1);
287                }
288            }
289        }
290        if let Some(limit_clause) = &mut query.limit_clause {
291            match limit_clause {
292                LimitClause::LimitOffset {
293                    limit,
294                    offset,
295                    limit_by,
296                } => {
297                    if let Some(limit_value) = limit {
298                        *limit_value = placeholder_value();
299                    }
300                    if let Some(Offset { value, .. }) = offset {
301                        *value = placeholder_value();
302                    }
303                    if !limit_by.is_empty() {
304                        *limit_by = vec![placeholder_value()];
305                    }
306                }
307                // MySQL specific, needs testing!Ó
308                LimitClause::OffsetCommaLimit { offset, limit } => {
309                    *offset = placeholder_value();
310                    *limit = placeholder_value();
311                }
312            }
313        }
314        ControlFlow::Continue(())
315    }
316
317    fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
318        for part in _relation.0.iter_mut() {
319            match part {
320                ObjectNamePart::Identifier(ident) => {
321                    maybe_unquote_ident(ident);
322                }
323            }
324        }
325        ControlFlow::Continue(())
326    }
327
328    fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
329        match _expr {
330            Expr::Identifier(ident) => {
331                maybe_unquote_ident(ident);
332            }
333            Expr::CompoundIdentifier(idents) => {
334                for ident in idents {
335                    maybe_unquote_ident(ident);
336                }
337            }
338            _ => {}
339        }
340        ControlFlow::Continue(())
341    }
342}
343
344fn placeholder_value() -> Expr {
345    Expr::Value(ValueWithSpan {
346        value: Value::Placeholder("...".to_string()),
347        span: Span::empty(),
348    })
349}
350
351fn maybe_unquote_ident(ident: &mut Ident) {
352    let Ident {
353        value, quote_style, ..
354    } = ident;
355
356    if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
357        *quote_style = None;
358    }
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364
365    #[test]
366    fn test_fingerprint_one() {
367        let result = fingerprint_one("SELECT 123", None);
368        assert_eq!(result, "SELECT ...");
369    }
370
371    #[test]
372    fn test_empty() {
373        let result = fingerprint_many(vec![""], None);
374        assert_eq!(result, vec![""]);
375    }
376
377    #[test]
378    fn test_unparsable() {
379        let result = fingerprint_many(vec!["SELECT  SELECT  SELECT  SELECT"], None);
380        assert_eq!(result, vec!["SELECT  SELECT  SELECT  SELECT"]);
381    }
382
383    #[test]
384    fn test_comments_dropped() {
385        let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
386        assert_eq!(result, vec!["SELECT ..."]);
387    }
388
389    #[test]
390    fn test_savepoint() {
391        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
392        assert_eq!(result, vec!["SAVEPOINT s1"]);
393    }
394
395    #[test]
396    fn test_multiple_savepoints() {
397        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
398        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
399    }
400
401    #[test]
402    fn test_duplicate_savepoints() {
403        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
404        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
405    }
406
407    #[test]
408    fn test_release_savepoints() {
409        let result = fingerprint_many(
410            vec![
411                "SAVEPOINT \"s1234\"",
412                "RELEASE SAVEPOINT \"s1234\"",
413                "SAVEPOINT \"s2345\"",
414                "RELEASE SAVEPOINT \"s2345\"",
415            ],
416            None,
417        );
418        assert_eq!(
419            result,
420            vec![
421                "SAVEPOINT s1",
422                "RELEASE SAVEPOINT s1",
423                "SAVEPOINT s2",
424                "RELEASE SAVEPOINT s2"
425            ]
426        );
427    }
428
429    #[test]
430    fn test_rollback_savepoint() {
431        let result = fingerprint_many(
432            vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
433            None,
434        );
435        assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
436    }
437
438    #[test]
439    fn test_select() {
440        let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
441        assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
442    }
443
444    #[test]
445    fn test_select_single_value() {
446        let result = fingerprint_many(vec!["SELECT 1"], None);
447        assert_eq!(result, vec!["SELECT ..."]);
448    }
449
450    #[test]
451    fn test_select_distinct_on() {
452        let result = fingerprint_many(vec!["SELECT DISTINCT ON (a, b) c FROM d"], None);
453        assert_eq!(result, vec!["SELECT DISTINCT ON (...) ... FROM d"]);
454    }
455
456    #[test]
457    fn test_select_with_from_quoted() {
458        let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
459        assert_eq!(result, vec!["SELECT ... FROM c.d"]);
460    }
461
462    #[test]
463    fn test_select_with_from_join() {
464        let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
465        assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
466    }
467
468    #[test]
469    fn test_select_with_from_inner_join_quoted() {
470        let result = fingerprint_many(
471            vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
472            None,
473        );
474        assert_eq!(result, vec!["SELECT ... FROM c INNER JOIN d ON ..."]);
475    }
476
477    #[test]
478    fn test_select_with_from_left_outer_join_quoted() {
479        let result = fingerprint_many(
480            vec!["SELECT a, b FROM c LEFT OUTER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
481            None,
482        );
483        assert_eq!(result, vec!["SELECT ... FROM c LEFT OUTER JOIN d ON ..."]);
484    }
485
486    #[test]
487    fn test_select_with_group_by() {
488        let result = fingerprint_many(vec!["SELECT a, b FROM c GROUP BY a, b"], None);
489        assert_eq!(result, vec!["SELECT ... FROM c GROUP BY ..."]);
490    }
491
492    #[test]
493    fn test_select_with_order_by() {
494        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
495        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
496    }
497
498    #[test]
499    fn test_select_with_order_by_more() {
500        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
501        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
502    }
503
504    #[test]
505    fn test_select_with_limit_offset() {
506        let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
507        assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
508    }
509
510    #[test]
511    fn test_clickhouse_select_with_limit_by() {
512        let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 BY c"], None);
513        assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... BY ..."]);
514    }
515
516    #[test]
517    fn test_mysql_select_with_limit_comma() {
518        let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21, 101"], None);
519        assert_eq!(result, vec!["SELECT ... FROM b LIMIT ..., ..."]);
520    }
521
522    #[test]
523    fn test_select_union() {
524        let result = fingerprint_many(vec!["SELECT a, b FROM c UNION SELECT a, b FROM d"], None);
525        assert_eq!(result, vec!["SELECT ... FROM c UNION SELECT ... FROM d"]);
526    }
527
528    #[test]
529    fn test_select_union_parenthesized() {
530        let result = fingerprint_many(
531            vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
532            None,
533        );
534        assert_eq!(
535            result,
536            vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
537        );
538    }
539
540    #[test]
541    fn test_select_union_all() {
542        let result = fingerprint_many(
543            vec!["SELECT a, b FROM c UNION ALL SELECT a, b FROM d"],
544            None,
545        );
546        assert_eq!(
547            result,
548            vec!["SELECT ... FROM c UNION ALL SELECT ... FROM d"]
549        );
550    }
551
552    #[test]
553    fn test_select_union_all_parenthesized() {
554        let result = fingerprint_many(
555            vec!["(SELECT a, b FROM c) UNION ALL (SELECT a, b FROM d)"],
556            None,
557        );
558        assert_eq!(
559            result,
560            vec!["(SELECT ... FROM c) UNION ALL (SELECT ... FROM d)"]
561        );
562    }
563
564    #[test]
565    fn test_select_except() {
566        let result = fingerprint_many(vec!["SELECT a, b FROM c EXCEPT SELECT a, b FROM d"], None);
567        assert_eq!(result, vec!["SELECT ... FROM c EXCEPT SELECT ... FROM d"]);
568    }
569
570    #[test]
571    fn test_select_except_parenthesized() {
572        let result = fingerprint_many(
573            vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
574            None,
575        );
576        assert_eq!(
577            result,
578            vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
579        );
580    }
581
582    #[test]
583    fn test_select_intersect() {
584        let result = fingerprint_many(
585            vec!["SELECT a, b FROM c INTERSECT SELECT a, b FROM d"],
586            None,
587        );
588        assert_eq!(
589            result,
590            vec!["SELECT ... FROM c INTERSECT SELECT ... FROM d"]
591        );
592    }
593
594    #[test]
595    fn test_select_intersect_parenthesized() {
596        let result = fingerprint_many(
597            vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
598            None,
599        );
600        assert_eq!(
601            result,
602            vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
603        );
604    }
605
606    #[test]
607    fn test_select_union_triple() {
608        let result = fingerprint_many(
609            vec!["SELECT a, b FROM c UNION SELECT a, b FROM d UNION SELECT a, b FROM e"],
610            None,
611        );
612        assert_eq!(
613            result,
614            vec!["SELECT ... FROM c UNION SELECT ... FROM d UNION SELECT ... FROM e"]
615        );
616    }
617
618    #[test]
619    fn test_select_union_triple_parenthesized() {
620        let result = fingerprint_many(
621            vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d) UNION (SELECT a, b FROM e)"],
622            None,
623        );
624        assert_eq!(
625            result,
626            vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d) UNION (SELECT ... FROM e)"]
627        );
628    }
629
630    #[test]
631    fn test_with_recursive_select() {
632        let result = fingerprint_many(
633            vec!["WITH RECURSIVE t AS (SELECT a, b FROM c WHERE d = 12345) SELECT * FROM t"],
634            None,
635        );
636        assert_eq!(
637            result,
638            vec!["WITH RECURSIVE t AS (SELECT ... FROM c WHERE ...) SELECT * FROM t"]
639        );
640    }
641
642    #[test]
643    fn test_with_recursive_select_union() {
644        let result = fingerprint_many(
645            vec!["WITH RECURSIVE t AS (SELECT a FROM b UNION SELECT a FROM c) SELECT * FROM t"],
646            None,
647        );
648
649        assert_eq!(
650            result,
651            vec!["WITH RECURSIVE t AS (SELECT ... FROM b UNION SELECT ... FROM c) SELECT * FROM t"],
652        );
653    }
654
655    #[test]
656    fn test_declare_cursor() {
657        let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
658        assert_eq!(
659            result,
660            vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
661        );
662    }
663
664    #[test]
665    fn test_insert() {
666        let result = fingerprint_many(
667            vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
668            None,
669        );
670        assert_eq!(
671            result,
672            vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
673        );
674    }
675
676    #[test]
677    fn test_insert_select() {
678        let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
679        assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
680    }
681
682    #[test]
683    fn test_insert_on_conflict() {
684        let result = fingerprint_many(
685            vec![
686                "INSERT INTO a (b, c) VALUES (1, 2) ON CONFLICT(\"a\", \"b\") DO UPDATE SET \"d\" = EXCLUDED.d WHERE e = f RETURNING b, c",
687            ],
688            None,
689        );
690        assert_eq!(
691            result,
692            vec![
693                "INSERT INTO a (...) VALUES (...) ON CONFLICT(...) DO UPDATE SET ... = ... WHERE ... RETURNING ..."
694            ]
695        );
696    }
697
698    #[test]
699    fn test_update() {
700        let result = fingerprint_many(
701            vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
702            None,
703        );
704        assert_eq!(
705            result,
706            vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
707        );
708    }
709
710    #[test]
711    fn test_delete() {
712        let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
713        assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
714    }
715}