sql_fingerprint/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4    Assignment, AssignmentTarget, ConflictTarget, Delete, Distinct, DoUpdate, Expr, GroupByExpr,
5    Ident, Insert, JoinConstraint, JoinOperator, ObjectName, ObjectNamePart, Offset, OnConflict,
6    OnConflictAction, OnInsert, OrderBy, OrderByKind, Query, SelectItem, SetExpr, Statement, Value,
7    ValueWithSpan, VisitMut, VisitorMut,
8};
9use sqlparser::dialect::{Dialect, GenericDialect};
10use sqlparser::parser::Parser;
11use sqlparser::tokenizer::Span;
12use std::collections::HashMap;
13use std::ops::ControlFlow;
14
15/// Fingerprint a single SQL string.
16///
17/// Unparseable SQL is returned as-is.
18///
19/// # Example
20/// ```
21/// use sql_fingerprint::fingerprint_one;
22///
23/// let result = fingerprint_one("SELECT a, b FROM c ORDER BY b", None);
24/// assert_eq!(result, "SELECT ... FROM c ORDER BY ...");
25/// ```
26pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
27    fingerprint_many(vec![input], dialect).join(" ")
28}
29
30/// Fingerprint multiple SQL strings.
31/// Doing so for a batch of strings allows sharing some state, such as savepoint ID aliases.
32///
33/// Unparseable SQL is returned as-is.
34///
35/// # Example
36/// ```
37/// use sql_fingerprint::fingerprint_many;
38///
39/// let result = fingerprint_many(vec!["SELECT a, b FROM c", "SELECT b, c FROM d"], None);
40/// assert_eq!(result, vec!["SELECT ... FROM c", "SELECT ... FROM d"]);
41/// ```
42pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
43    let dialect = dialect.unwrap_or(&GenericDialect {});
44
45    let mut savepoint_visitor = SavepointVisitor::new();
46
47    input
48        .iter()
49        .map(|sql| match Parser::parse_sql(dialect, sql) {
50            Ok(mut ast) => {
51                for stmt in &mut ast {
52                    stmt.visit(&mut savepoint_visitor);
53                }
54
55                ast.into_iter()
56                    .map(|stmt| stmt.to_string())
57                    .collect::<Vec<_>>()
58                    .join(" ")
59            }
60            Err(_) => sql.to_string(),
61        })
62        .collect()
63}
64
65struct SavepointVisitor {
66    savepoint_ids: HashMap<String, String>,
67}
68
69impl SavepointVisitor {
70    fn new() -> Self {
71        SavepointVisitor {
72            savepoint_ids: HashMap::new(),
73        }
74    }
75}
76
77impl VisitorMut for SavepointVisitor {
78    type Break = ();
79
80    fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
81        match stmt {
82            Statement::Savepoint { name } => {
83                let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
84                self.savepoint_ids
85                    .insert(name.value.clone(), savepoint_id.clone());
86                *name = Ident::new(savepoint_id);
87            }
88            Statement::ReleaseSavepoint { name } => {
89                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
90                    *name = Ident::new(savepoint_id);
91                }
92            }
93            Statement::Rollback {
94                savepoint: Some(name),
95                ..
96            } => {
97                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
98                    *name = Ident::new(savepoint_id);
99                }
100            }
101            Statement::Declare { stmts } => {
102                for stmt in stmts {
103                    if stmt.names.len() > 0 {
104                        stmt.names = vec![Ident::new("...")];
105                    }
106                }
107            }
108            Statement::Insert(Insert {
109                columns,
110                source,
111                on,
112                returning,
113                ..
114            }) => {
115                if columns.len() > 0 {
116                    *columns = vec![Ident::new("...")];
117                }
118                if let Some(source) = source {
119                    if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
120                        values.rows = vec![vec![placeholder_value()]];
121                    }
122                }
123                if let Some(on) = on {
124                    match on {
125                        OnInsert::OnConflict(OnConflict {
126                            conflict_target,
127                            action,
128                        }) => {
129                            if let Some(conflict_target) = conflict_target {
130                                match conflict_target {
131                                    ConflictTarget::Columns(columns) => {
132                                        if columns.len() > 0 {
133                                            *columns = vec![Ident::new("...")];
134                                        }
135                                    }
136                                    _ => {}
137                                }
138                            }
139                            if let OnConflictAction::DoUpdate(DoUpdate {
140                                assignments,
141                                selection,
142                            }) = action
143                            {
144                                if assignments.len() > 0 {
145                                    *assignments = vec![Assignment {
146                                        target: AssignmentTarget::ColumnName(ObjectName(vec![
147                                            ObjectNamePart::Identifier(Ident::new("...")),
148                                        ])),
149                                        value: placeholder_value(),
150                                    }];
151                                }
152                                if let Some(selection) = selection {
153                                    *selection = placeholder_value();
154                                }
155                            }
156                        }
157                        _ => {}
158                    }
159                }
160                if let Some(returning) = returning {
161                    if returning.len() > 0 {
162                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
163                    }
164                }
165            }
166            Statement::Update {
167                assignments,
168                selection,
169                returning,
170                ..
171            } => {
172                if assignments.len() > 0 {
173                    *assignments = vec![sqlparser::ast::Assignment {
174                        target: AssignmentTarget::ColumnName(ObjectName(vec![
175                            ObjectNamePart::Identifier(Ident::new("...")),
176                        ])),
177                        value: placeholder_value(),
178                    }];
179                }
180                if let Some(selection) = selection {
181                    *selection = placeholder_value();
182                }
183                if let Some(returning) = returning {
184                    if returning.len() > 0 {
185                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
186                    }
187                }
188            }
189            Statement::Delete(Delete {
190                selection,
191                returning,
192                ..
193            }) => {
194                if let Some(selection) = selection {
195                    *selection = placeholder_value();
196                }
197                if let Some(returning) = returning {
198                    if returning.len() > 0 {
199                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
200                    }
201                }
202            }
203            _ => {}
204        }
205        ControlFlow::Continue(())
206    }
207
208    fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
209        if let SetExpr::Select(select) = query.body.as_mut() {
210            if select.projection.len() > 0 {
211                if let Some(item) = select.projection.first_mut() {
212                    match item {
213                        SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
214                            *item = SelectItem::UnnamedExpr(placeholder_value());
215                        }
216                        _ => {}
217                    }
218                }
219                select.projection.truncate(1);
220            }
221
222            if let Some(distinct) = &mut select.distinct {
223                match distinct {
224                    Distinct::On(exprs) => {
225                        if exprs.len() > 0 {
226                            *exprs = vec![placeholder_value()];
227                        }
228                    }
229                    _ => {}
230                }
231            };
232
233            for table_with_joins in &mut select.from {
234                for join in &mut table_with_joins.joins {
235                    match &mut join.join_operator {
236                        JoinOperator::Join(constraint)
237                        | JoinOperator::Inner(constraint)
238                        | JoinOperator::Left(constraint)
239                        | JoinOperator::LeftOuter(constraint)
240                        | JoinOperator::Right(constraint)
241                        | JoinOperator::RightOuter(constraint)
242                        | JoinOperator::FullOuter(constraint)
243                        | JoinOperator::Semi(constraint)
244                        | JoinOperator::LeftSemi(constraint)
245                        | JoinOperator::RightSemi(constraint)
246                        | JoinOperator::Anti(constraint)
247                        | JoinOperator::LeftAnti(constraint)
248                        | JoinOperator::RightAnti(constraint) => match constraint {
249                            JoinConstraint::On(expr) => {
250                                *expr = placeholder_value();
251                            }
252                            _ => {}
253                        },
254                        _ => {}
255                    }
256                }
257            }
258
259            if let Some(selection) = &mut select.selection {
260                *selection = placeholder_value();
261            }
262
263            match &mut select.group_by {
264                GroupByExpr::Expressions(col_names, ..) => {
265                    if col_names.len() > 0 {
266                        *col_names = vec![placeholder_value()];
267                    }
268                }
269                _ => {}
270            }
271        }
272        if let Some(order_by) = &mut query.order_by {
273            let OrderBy { kind, .. } = order_by;
274            if let OrderByKind::Expressions(expressions) = kind {
275                if expressions.len() > 0 {
276                    if let Some(expr) = expressions.first_mut() {
277                        expr.expr = placeholder_value();
278                    }
279                    expressions.truncate(1);
280                }
281            }
282        }
283        if let Some(limit) = &mut query.limit {
284            *limit = placeholder_value();
285        }
286        if let Some(Offset { value, .. }) = &mut query.offset {
287            *value = placeholder_value();
288        }
289        ControlFlow::Continue(())
290    }
291
292    fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
293        for part in _relation.0.iter_mut() {
294            match part {
295                ObjectNamePart::Identifier(ident) => {
296                    maybe_unquote_ident(ident);
297                }
298            }
299        }
300        ControlFlow::Continue(())
301    }
302
303    fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
304        match _expr {
305            Expr::Identifier(ident) => {
306                maybe_unquote_ident(ident);
307            }
308            Expr::CompoundIdentifier(idents) => {
309                for ident in idents {
310                    maybe_unquote_ident(ident);
311                }
312            }
313            _ => {}
314        }
315        ControlFlow::Continue(())
316    }
317}
318
319fn placeholder_value() -> Expr {
320    Expr::Value(ValueWithSpan {
321        value: Value::Placeholder("...".to_string()),
322        span: Span::empty(),
323    })
324}
325
326fn maybe_unquote_ident(ident: &mut Ident) -> () {
327    let Ident {
328        value, quote_style, ..
329    } = ident;
330
331    if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
332        *quote_style = None;
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use super::*;
339
340    #[test]
341    fn test_fingerprint_one() {
342        let result = fingerprint_one("SELECT 123", None);
343        assert_eq!(result, "SELECT ...");
344    }
345
346    #[test]
347    fn test_empty() {
348        let result = fingerprint_many(vec![""], None);
349        assert_eq!(result, vec![""]);
350    }
351
352    #[test]
353    fn test_unparseable() {
354        let result = fingerprint_many(vec!["SELECT  SELECT  SELECT  SELECT"], None);
355        assert_eq!(result, vec!["SELECT  SELECT  SELECT  SELECT"]);
356    }
357
358    #[test]
359    fn test_comments_dropped() {
360        let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
361        assert_eq!(result, vec!["SELECT ..."]);
362    }
363
364    #[test]
365    fn test_savepoint() {
366        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
367        assert_eq!(result, vec!["SAVEPOINT s1"]);
368    }
369
370    #[test]
371    fn test_multiple_savepoints() {
372        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
373        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
374    }
375
376    #[test]
377    fn test_duplicate_savepoints() {
378        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
379        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
380    }
381
382    #[test]
383    fn test_release_savepoints() {
384        let result = fingerprint_many(
385            vec![
386                "SAVEPOINT \"s1234\"",
387                "RELEASE SAVEPOINT \"s1234\"",
388                "SAVEPOINT \"s2345\"",
389                "RELEASE SAVEPOINT \"s2345\"",
390            ],
391            None,
392        );
393        assert_eq!(
394            result,
395            vec![
396                "SAVEPOINT s1",
397                "RELEASE SAVEPOINT s1",
398                "SAVEPOINT s2",
399                "RELEASE SAVEPOINT s2"
400            ]
401        );
402    }
403
404    #[test]
405    fn test_rollback_savepoint() {
406        let result = fingerprint_many(
407            vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
408            None,
409        );
410        assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
411    }
412
413    #[test]
414    fn test_select() {
415        let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
416        assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
417    }
418
419    #[test]
420    fn test_select_single_value() {
421        let result = fingerprint_many(vec!["SELECT 1"], None);
422        assert_eq!(result, vec!["SELECT ..."]);
423    }
424
425    #[test]
426    fn test_select_distinct_on() {
427        let result = fingerprint_many(vec!["SELECT DISTINCT ON (a, b) c FROM d"], None);
428        assert_eq!(result, vec!["SELECT DISTINCT ON (...) ... FROM d"]);
429    }
430
431    #[test]
432    fn test_select_with_from_quoted() {
433        let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
434        assert_eq!(result, vec!["SELECT ... FROM c.d"]);
435    }
436
437    #[test]
438    fn test_select_with_from_join() {
439        let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
440        assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
441    }
442
443    #[test]
444    fn test_select_with_from_inner_join_quoted() {
445        let result = fingerprint_many(
446            vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
447            None,
448        );
449        assert_eq!(result, vec!["SELECT ... FROM c INNER JOIN d ON ..."]);
450    }
451
452    #[test]
453    fn test_select_with_from_left_outer_join_quoted() {
454        let result = fingerprint_many(
455            vec!["SELECT a, b FROM c LEFT OUTER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
456            None,
457        );
458        assert_eq!(result, vec!["SELECT ... FROM c LEFT OUTER JOIN d ON ..."]);
459    }
460
461    #[test]
462    fn test_select_with_group_by() {
463        let result = fingerprint_many(vec!["SELECT a, b FROM c GROUP BY a, b"], None);
464        assert_eq!(result, vec!["SELECT ... FROM c GROUP BY ..."]);
465    }
466
467    #[test]
468    fn test_select_with_order_by() {
469        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
470        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
471    }
472
473    #[test]
474    fn test_select_with_order_by_more() {
475        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
476        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
477    }
478
479    #[test]
480    fn test_select_with_limit_offset() {
481        let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
482        assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
483    }
484
485    #[test]
486    fn test_select_union() {
487        let result = fingerprint_many(
488            vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
489            None,
490        );
491        assert_eq!(
492            result,
493            vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
494        );
495    }
496
497    #[test]
498    fn test_select_except() {
499        let result = fingerprint_many(
500            vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
501            None,
502        );
503        assert_eq!(
504            result,
505            vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
506        );
507    }
508    #[test]
509    fn test_select_intersect() {
510        let result = fingerprint_many(
511            vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
512            None,
513        );
514        assert_eq!(
515            result,
516            vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
517        );
518    }
519
520    #[test]
521    fn test_declare_cursor() {
522        let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
523        assert_eq!(
524            result,
525            vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
526        );
527    }
528
529    #[test]
530    fn test_insert() {
531        let result = fingerprint_many(
532            vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
533            None,
534        );
535        assert_eq!(
536            result,
537            vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
538        );
539    }
540
541    #[test]
542    fn test_insert_select() {
543        let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
544        assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
545    }
546
547    #[test]
548    fn test_insert_on_conflict() {
549        let result = fingerprint_many(
550            vec!["INSERT INTO a (b, c) VALUES (1, 2) ON CONFLICT(\"a\", \"b\") DO UPDATE SET \"d\" = EXCLUDED.d WHERE e = f RETURNING b, c"],
551            None,
552        );
553        assert_eq!(
554            result,
555            vec![
556                "INSERT INTO a (...) VALUES (...) ON CONFLICT(...) DO UPDATE SET ... = ... WHERE ... RETURNING ..."
557            ]
558        );
559    }
560
561    #[test]
562    fn test_update() {
563        let result = fingerprint_many(
564            vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
565            None,
566        );
567        assert_eq!(
568            result,
569            vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
570        );
571    }
572
573    #[test]
574    fn test_delete() {
575        let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
576        assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
577    }
578}