sql_fingerprint/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4    Assignment, AssignmentTarget, ConflictTarget, Delete, DoUpdate, Expr, GroupByExpr, Ident,
5    Insert, JoinConstraint, JoinOperator, ObjectName, ObjectNamePart, Offset, OnConflict,
6    OnConflictAction, OnInsert, OrderBy, OrderByKind, Query, SelectItem, SetExpr, Statement, Value,
7    ValueWithSpan, VisitMut, VisitorMut,
8};
9use sqlparser::dialect::{Dialect, GenericDialect};
10use sqlparser::parser::Parser;
11use sqlparser::tokenizer::Span;
12use std::collections::HashMap;
13use std::ops::ControlFlow;
14
15/// Fingerprint a single SQL string.
16///
17/// Unparseable SQL is returned as-is.
18///
19/// # Example
20/// ```
21/// use sql_fingerprint::fingerprint_one;
22///
23/// let result = fingerprint_one("SELECT a, b FROM c ORDER BY b", None);
24/// assert_eq!(result, "SELECT ... FROM c ORDER BY ...");
25/// ```
26pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
27    fingerprint_many(vec![input], dialect).join(" ")
28}
29
30/// Fingerprint multiple SQL strings.
31/// Doing so for a batch of strings allows sharing some state, such as savepoint ID aliases.
32///
33/// Unparseable SQL is returned as-is.
34///
35/// # Example
36/// ```
37/// use sql_fingerprint::fingerprint_many;
38///
39/// let result = fingerprint_many(vec!["SELECT a, b FROM c", "SELECT b, c FROM d"], None);
40/// assert_eq!(result, vec!["SELECT ... FROM c", "SELECT ... FROM d"]);
41/// ```
42pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
43    let dialect = dialect.unwrap_or(&GenericDialect {});
44
45    let mut savepoint_visitor = SavepointVisitor::new();
46
47    input
48        .iter()
49        .map(|sql| match Parser::parse_sql(dialect, sql) {
50            Ok(mut ast) => {
51                for stmt in &mut ast {
52                    stmt.visit(&mut savepoint_visitor);
53                }
54
55                ast.into_iter()
56                    .map(|stmt| stmt.to_string())
57                    .collect::<Vec<_>>()
58                    .join(" ")
59            }
60            Err(_) => sql.to_string(),
61        })
62        .collect()
63}
64
65struct SavepointVisitor {
66    savepoint_ids: HashMap<String, String>,
67}
68
69impl SavepointVisitor {
70    fn new() -> Self {
71        SavepointVisitor {
72            savepoint_ids: HashMap::new(),
73        }
74    }
75}
76
77impl VisitorMut for SavepointVisitor {
78    type Break = ();
79
80    fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
81        match stmt {
82            Statement::Savepoint { name } => {
83                let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
84                self.savepoint_ids
85                    .insert(name.value.clone(), savepoint_id.clone());
86                *name = Ident::new(savepoint_id);
87            }
88            Statement::ReleaseSavepoint { name } => {
89                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
90                    *name = Ident::new(savepoint_id);
91                }
92            }
93            Statement::Rollback {
94                savepoint: Some(name),
95                ..
96            } => {
97                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
98                    *name = Ident::new(savepoint_id);
99                }
100            }
101            Statement::Declare { stmts } => {
102                for stmt in stmts {
103                    if stmt.names.len() > 0 {
104                        stmt.names = vec![Ident::new("...")];
105                    }
106                }
107            }
108            Statement::Insert(Insert {
109                columns,
110                source,
111                on,
112                returning,
113                ..
114            }) => {
115                if columns.len() > 0 {
116                    *columns = vec![Ident::new("...")];
117                }
118                if let Some(source) = source {
119                    if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
120                        values.rows = vec![vec![placeholder_value()]];
121                    }
122                }
123                if let Some(on) = on {
124                    match on {
125                        OnInsert::OnConflict(OnConflict {
126                            conflict_target,
127                            action,
128                        }) => {
129                            if let Some(conflict_target) = conflict_target {
130                                match conflict_target {
131                                    ConflictTarget::Columns(columns) => {
132                                        if columns.len() > 0 {
133                                            *columns = vec![Ident::new("...")];
134                                        }
135                                    }
136                                    _ => {}
137                                }
138                            }
139                            if let OnConflictAction::DoUpdate(DoUpdate {
140                                assignments,
141                                selection,
142                            }) = action
143                            {
144                                if assignments.len() > 0 {
145                                    *assignments = vec![Assignment {
146                                        target: AssignmentTarget::ColumnName(ObjectName(vec![
147                                            ObjectNamePart::Identifier(Ident::new("...")),
148                                        ])),
149                                        value: placeholder_value(),
150                                    }];
151                                }
152                                if let Some(selection) = selection {
153                                    *selection = placeholder_value();
154                                }
155                            }
156                        }
157                        _ => {}
158                    }
159                }
160                if let Some(returning) = returning {
161                    if returning.len() > 0 {
162                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
163                    }
164                }
165            }
166            Statement::Update {
167                assignments,
168                selection,
169                returning,
170                ..
171            } => {
172                if assignments.len() > 0 {
173                    *assignments = vec![sqlparser::ast::Assignment {
174                        target: AssignmentTarget::ColumnName(ObjectName(vec![
175                            ObjectNamePart::Identifier(Ident::new("...")),
176                        ])),
177                        value: placeholder_value(),
178                    }];
179                }
180                if let Some(selection) = selection {
181                    *selection = placeholder_value();
182                }
183                if let Some(returning) = returning {
184                    if returning.len() > 0 {
185                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
186                    }
187                }
188            }
189            Statement::Delete(Delete {
190                selection,
191                returning,
192                ..
193            }) => {
194                if let Some(selection) = selection {
195                    *selection = placeholder_value();
196                }
197                if let Some(returning) = returning {
198                    if returning.len() > 0 {
199                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
200                    }
201                }
202            }
203            _ => {}
204        }
205        ControlFlow::Continue(())
206    }
207
208    fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
209        if let SetExpr::Select(select) = query.body.as_mut() {
210            if select.projection.len() > 0 {
211                if let Some(item) = select.projection.first_mut() {
212                    match item {
213                        SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
214                            *item = SelectItem::UnnamedExpr(placeholder_value());
215                        }
216                        _ => {}
217                    }
218                }
219                select.projection.truncate(1);
220            }
221
222            for table_with_joins in &mut select.from {
223                for join in &mut table_with_joins.joins {
224                    match &mut join.join_operator {
225                        JoinOperator::Join(constraint)
226                        | JoinOperator::Inner(constraint)
227                        | JoinOperator::Left(constraint)
228                        | JoinOperator::LeftOuter(constraint)
229                        | JoinOperator::Right(constraint)
230                        | JoinOperator::RightOuter(constraint)
231                        | JoinOperator::FullOuter(constraint)
232                        | JoinOperator::Semi(constraint)
233                        | JoinOperator::LeftSemi(constraint)
234                        | JoinOperator::RightSemi(constraint)
235                        | JoinOperator::Anti(constraint)
236                        | JoinOperator::LeftAnti(constraint)
237                        | JoinOperator::RightAnti(constraint) => match constraint {
238                            JoinConstraint::On(expr) => {
239                                *expr = placeholder_value();
240                            }
241                            _ => {}
242                        },
243                        _ => {}
244                    }
245                }
246            }
247
248            if let Some(selection) = &mut select.selection {
249                *selection = placeholder_value();
250            }
251
252            match &mut select.group_by {
253                GroupByExpr::Expressions(col_names, ..) => {
254                    if col_names.len() > 0 {
255                        *col_names = vec![placeholder_value()];
256                    }
257                }
258                _ => {}
259            }
260        }
261        if let Some(order_by) = &mut query.order_by {
262            let OrderBy { kind, .. } = order_by;
263            if let OrderByKind::Expressions(expressions) = kind {
264                if expressions.len() > 0 {
265                    if let Some(expr) = expressions.first_mut() {
266                        expr.expr = placeholder_value();
267                    }
268                    expressions.truncate(1);
269                }
270            }
271        }
272        if let Some(limit) = &mut query.limit {
273            *limit = placeholder_value();
274        }
275        if let Some(Offset { value, .. }) = &mut query.offset {
276            *value = placeholder_value();
277        }
278        ControlFlow::Continue(())
279    }
280
281    fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
282        for part in _relation.0.iter_mut() {
283            match part {
284                ObjectNamePart::Identifier(ident) => {
285                    maybe_unquote_ident(ident);
286                }
287            }
288        }
289        ControlFlow::Continue(())
290    }
291
292    fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
293        match _expr {
294            Expr::Identifier(ident) => {
295                maybe_unquote_ident(ident);
296            }
297            Expr::CompoundIdentifier(idents) => {
298                for ident in idents {
299                    maybe_unquote_ident(ident);
300                }
301            }
302            _ => {}
303        }
304        ControlFlow::Continue(())
305    }
306}
307
308fn placeholder_value() -> Expr {
309    Expr::Value(ValueWithSpan {
310        value: Value::Placeholder("...".to_string()),
311        span: Span::empty(),
312    })
313}
314
315fn maybe_unquote_ident(ident: &mut Ident) -> () {
316    let Ident {
317        value, quote_style, ..
318    } = ident;
319
320    if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
321        *quote_style = None;
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328
329    #[test]
330    fn test_fingerprint_one() {
331        let result = fingerprint_one("SELECT 123", None);
332        assert_eq!(result, "SELECT ...");
333    }
334
335    #[test]
336    fn test_empty() {
337        let result = fingerprint_many(vec![""], None);
338        assert_eq!(result, vec![""]);
339    }
340
341    #[test]
342    fn test_unparseable() {
343        let result = fingerprint_many(vec!["SELECT  SELECT  SELECT  SELECT"], None);
344        assert_eq!(result, vec!["SELECT  SELECT  SELECT  SELECT"]);
345    }
346
347    #[test]
348    fn test_comments_dropped() {
349        let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
350        assert_eq!(result, vec!["SELECT ..."]);
351    }
352
353    #[test]
354    fn test_savepoint() {
355        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
356        assert_eq!(result, vec!["SAVEPOINT s1"]);
357    }
358
359    #[test]
360    fn test_multiple_savepoints() {
361        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
362        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
363    }
364
365    #[test]
366    fn test_duplicate_savepoints() {
367        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
368        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
369    }
370
371    #[test]
372    fn test_release_savepoints() {
373        let result = fingerprint_many(
374            vec![
375                "SAVEPOINT \"s1234\"",
376                "RELEASE SAVEPOINT \"s1234\"",
377                "SAVEPOINT \"s2345\"",
378                "RELEASE SAVEPOINT \"s2345\"",
379            ],
380            None,
381        );
382        assert_eq!(
383            result,
384            vec![
385                "SAVEPOINT s1",
386                "RELEASE SAVEPOINT s1",
387                "SAVEPOINT s2",
388                "RELEASE SAVEPOINT s2"
389            ]
390        );
391    }
392
393    #[test]
394    fn test_rollback_savepoint() {
395        let result = fingerprint_many(
396            vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
397            None,
398        );
399        assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
400    }
401
402    #[test]
403    fn test_select() {
404        let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
405        assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
406    }
407
408    #[test]
409    fn test_select_single_value() {
410        let result = fingerprint_many(vec!["SELECT 1"], None);
411        assert_eq!(result, vec!["SELECT ..."]);
412    }
413
414    #[test]
415    fn test_select_with_from_quoted() {
416        let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
417        assert_eq!(result, vec!["SELECT ... FROM c.d"]);
418    }
419
420    #[test]
421    fn test_select_with_from_join() {
422        let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
423        assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
424    }
425
426    #[test]
427    fn test_select_with_from_inner_join_quoted() {
428        let result = fingerprint_many(
429            vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
430            None,
431        );
432        assert_eq!(result, vec!["SELECT ... FROM c INNER JOIN d ON ..."]);
433    }
434
435    #[test]
436    fn test_select_with_from_left_outer_join_quoted() {
437        let result = fingerprint_many(
438            vec!["SELECT a, b FROM c LEFT OUTER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
439            None,
440        );
441        assert_eq!(result, vec!["SELECT ... FROM c LEFT OUTER JOIN d ON ..."]);
442    }
443
444    #[test]
445    fn test_select_with_group_by() {
446        let result = fingerprint_many(vec!["SELECT a, b FROM c GROUP BY a, b"], None);
447        assert_eq!(result, vec!["SELECT ... FROM c GROUP BY ..."]);
448    }
449
450    #[test]
451    fn test_select_with_order_by() {
452        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
453        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
454    }
455
456    #[test]
457    fn test_select_with_order_by_more() {
458        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
459        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
460    }
461
462    #[test]
463    fn test_select_with_limit_offset() {
464        let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
465        assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
466    }
467
468    #[test]
469    fn test_select_union() {
470        let result = fingerprint_many(
471            vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
472            None,
473        );
474        assert_eq!(
475            result,
476            vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
477        );
478    }
479
480    #[test]
481    fn test_select_except() {
482        let result = fingerprint_many(
483            vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
484            None,
485        );
486        assert_eq!(
487            result,
488            vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
489        );
490    }
491    #[test]
492    fn test_select_intersect() {
493        let result = fingerprint_many(
494            vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
495            None,
496        );
497        assert_eq!(
498            result,
499            vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
500        );
501    }
502
503    #[test]
504    fn test_declare_cursor() {
505        let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
506        assert_eq!(
507            result,
508            vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
509        );
510    }
511
512    #[test]
513    fn test_insert() {
514        let result = fingerprint_many(
515            vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
516            None,
517        );
518        assert_eq!(
519            result,
520            vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
521        );
522    }
523
524    #[test]
525    fn test_insert_select() {
526        let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
527        assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
528    }
529
530    #[test]
531    fn test_insert_on_conflict() {
532        let result = fingerprint_many(
533            vec!["INSERT INTO a (b, c) VALUES (1, 2) ON CONFLICT(\"a\", \"b\") DO UPDATE SET \"d\" = EXCLUDED.d WHERE e = f RETURNING b, c"],
534            None,
535        );
536        assert_eq!(
537            result,
538            vec![
539                "INSERT INTO a (...) VALUES (...) ON CONFLICT(...) DO UPDATE SET ... = ... WHERE ... RETURNING ..."
540            ]
541        );
542    }
543
544    #[test]
545    fn test_update() {
546        let result = fingerprint_many(
547            vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
548            None,
549        );
550        assert_eq!(
551            result,
552            vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
553        );
554    }
555
556    #[test]
557    fn test_delete() {
558        let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
559        assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
560    }
561}