sql_fingerprint/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4    Assignment, AssignmentTarget, ConflictTarget, Delete, DoUpdate, Expr, Ident, Insert,
5    ObjectName, ObjectNamePart, Offset, OnConflict, OnConflictAction, OnInsert, OrderBy,
6    OrderByKind, Query, SelectItem, SetExpr, Statement, Value, ValueWithSpan, VisitMut, VisitorMut,
7};
8use sqlparser::dialect::{Dialect, GenericDialect};
9use sqlparser::parser::Parser;
10use sqlparser::tokenizer::Span;
11use std::collections::HashMap;
12use std::ops::ControlFlow;
13
14/// Fingerprint a single SQL string.
15///
16/// Unparseable SQL is returned as-is.
17///
18/// # Example
19/// ```
20/// use sql_fingerprint::fingerprint_one;
21///
22/// let result = fingerprint_one("SELECT a, b FROM c ORDER BY b", None);
23/// assert_eq!(result, "SELECT ... FROM c ORDER BY ...");
24/// ```
25pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
26    fingerprint_many(vec![input], dialect).join(" ")
27}
28
29/// Fingerprint multiple SQL strings.
30/// Doing so for a batch of strings allows sharing some state, such as savepoint ID aliases.
31///
32/// Unparseable SQL is returned as-is.
33///
34/// # Example
35/// ```
36/// use sql_fingerprint::fingerprint_many;
37///
38/// let result = fingerprint_many(vec!["SELECT a, b FROM c", "SELECT b, c FROM d"], None);
39/// assert_eq!(result, vec!["SELECT ... FROM c", "SELECT ... FROM d"]);
40/// ```
41pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
42    let dialect = dialect.unwrap_or(&GenericDialect {});
43
44    let mut savepoint_visitor = SavepointVisitor::new();
45
46    input
47        .iter()
48        .map(|sql| match Parser::parse_sql(dialect, sql) {
49            Ok(mut ast) => {
50                for stmt in &mut ast {
51                    stmt.visit(&mut savepoint_visitor);
52                }
53
54                ast.into_iter()
55                    .map(|stmt| stmt.to_string())
56                    .collect::<Vec<_>>()
57                    .join(" ")
58            }
59            Err(_) => sql.to_string(),
60        })
61        .collect()
62}
63
64struct SavepointVisitor {
65    savepoint_ids: HashMap<String, String>,
66}
67
68impl SavepointVisitor {
69    fn new() -> Self {
70        SavepointVisitor {
71            savepoint_ids: HashMap::new(),
72        }
73    }
74}
75
76impl VisitorMut for SavepointVisitor {
77    type Break = ();
78
79    fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
80        match stmt {
81            Statement::Savepoint { name } => {
82                let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
83                self.savepoint_ids
84                    .insert(name.value.clone(), savepoint_id.clone());
85                *name = Ident::new(savepoint_id);
86            }
87            Statement::ReleaseSavepoint { name } => {
88                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
89                    *name = Ident::new(savepoint_id);
90                }
91            }
92            Statement::Rollback {
93                savepoint: Some(name),
94                ..
95            } => {
96                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
97                    *name = Ident::new(savepoint_id);
98                }
99            }
100            Statement::Declare { stmts } => {
101                for stmt in stmts {
102                    if stmt.names.len() > 0 {
103                        stmt.names = vec![Ident::new("...")];
104                    }
105                }
106            }
107            Statement::Insert(Insert {
108                columns,
109                source,
110                on,
111                returning,
112                ..
113            }) => {
114                if columns.len() > 0 {
115                    *columns = vec![Ident::new("...")];
116                }
117                if let Some(source) = source {
118                    if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
119                        values.rows = vec![vec![placeholder_value()]];
120                    }
121                }
122                if let Some(on) = on {
123                    match on {
124                        OnInsert::OnConflict(OnConflict {
125                            conflict_target,
126                            action,
127                        }) => {
128                            if let Some(conflict_target) = conflict_target {
129                                match conflict_target {
130                                    ConflictTarget::Columns(columns) => {
131                                        if columns.len() > 0 {
132                                            *columns = vec![Ident::new("...")];
133                                        }
134                                    }
135                                    _ => {}
136                                }
137                            }
138                            if let OnConflictAction::DoUpdate(DoUpdate {
139                                assignments,
140                                selection,
141                            }) = action
142                            {
143                                if assignments.len() > 0 {
144                                    *assignments = vec![Assignment {
145                                        target: AssignmentTarget::ColumnName(ObjectName(vec![
146                                            ObjectNamePart::Identifier(Ident::new("...")),
147                                        ])),
148                                        value: placeholder_value(),
149                                    }];
150                                }
151                                if let Some(selection) = selection {
152                                    *selection = placeholder_value();
153                                }
154                            }
155                        }
156                        _ => {}
157                    }
158                }
159                if let Some(returning) = returning {
160                    if returning.len() > 0 {
161                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
162                    }
163                }
164            }
165            Statement::Update {
166                assignments,
167                selection,
168                returning,
169                ..
170            } => {
171                if assignments.len() > 0 {
172                    *assignments = vec![sqlparser::ast::Assignment {
173                        target: AssignmentTarget::ColumnName(ObjectName(vec![
174                            ObjectNamePart::Identifier(Ident::new("...")),
175                        ])),
176                        value: placeholder_value(),
177                    }];
178                }
179                if let Some(selection) = selection {
180                    *selection = placeholder_value();
181                }
182                if let Some(returning) = returning {
183                    if returning.len() > 0 {
184                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
185                    }
186                }
187            }
188            Statement::Delete(Delete {
189                selection,
190                returning,
191                ..
192            }) => {
193                if let Some(selection) = selection {
194                    *selection = placeholder_value();
195                }
196                if let Some(returning) = returning {
197                    if returning.len() > 0 {
198                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
199                    }
200                }
201            }
202            _ => {}
203        }
204        ControlFlow::Continue(())
205    }
206
207    fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
208        if let SetExpr::Select(select) = query.body.as_mut() {
209            if select.projection.len() > 0 {
210                if let Some(item) = select.projection.first_mut() {
211                    match item {
212                        SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
213                            *item = SelectItem::UnnamedExpr(placeholder_value());
214                        }
215                        _ => {}
216                    }
217                }
218                select.projection.truncate(1);
219            }
220
221            if let Some(selection) = &mut select.selection {
222                *selection = placeholder_value();
223            }
224        }
225        if let Some(order_by) = &mut query.order_by {
226            let OrderBy { kind, .. } = order_by;
227            if let OrderByKind::Expressions(expressions) = kind {
228                if expressions.len() > 0 {
229                    if let Some(expr) = expressions.first_mut() {
230                        expr.expr = placeholder_value();
231                    }
232                    expressions.truncate(1);
233                }
234            }
235        }
236        if let Some(limit) = &mut query.limit {
237            *limit = placeholder_value();
238        }
239        if let Some(Offset { value, .. }) = &mut query.offset {
240            *value = placeholder_value();
241        }
242        ControlFlow::Continue(())
243    }
244
245    fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
246        for part in _relation.0.iter_mut() {
247            match part {
248                ObjectNamePart::Identifier(ident) => {
249                    maybe_unquote_ident(ident);
250                }
251            }
252        }
253        ControlFlow::Continue(())
254    }
255
256    fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
257        match _expr {
258            Expr::Identifier(ident) => {
259                maybe_unquote_ident(ident);
260            }
261            Expr::CompoundIdentifier(idents) => {
262                for ident in idents {
263                    maybe_unquote_ident(ident);
264                }
265            }
266            _ => {}
267        }
268        ControlFlow::Continue(())
269    }
270}
271
272fn placeholder_value() -> Expr {
273    Expr::Value(ValueWithSpan {
274        value: Value::Placeholder("...".to_string()),
275        span: Span::empty(),
276    })
277}
278
279fn maybe_unquote_ident(ident: &mut Ident) -> () {
280    let Ident {
281        value, quote_style, ..
282    } = ident;
283
284    if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
285        *quote_style = None;
286    }
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292
293    #[test]
294    fn test_fingerprint_one() {
295        let result = fingerprint_one("SELECT 123", None);
296        assert_eq!(result, "SELECT ...");
297    }
298
299    #[test]
300    fn test_empty() {
301        let result = fingerprint_many(vec![""], None);
302        assert_eq!(result, vec![""]);
303    }
304
305    #[test]
306    fn test_unparseable() {
307        let result = fingerprint_many(vec!["SELECT  SELECT  SELECT  SELECT"], None);
308        assert_eq!(result, vec!["SELECT  SELECT  SELECT  SELECT"]);
309    }
310
311    #[test]
312    fn test_comments_dropped() {
313        let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
314        assert_eq!(result, vec!["SELECT ..."]);
315    }
316
317    #[test]
318    fn test_savepoint() {
319        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
320        assert_eq!(result, vec!["SAVEPOINT s1"]);
321    }
322
323    #[test]
324    fn test_multiple_savepoints() {
325        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
326        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
327    }
328
329    #[test]
330    fn test_duplicate_savepoints() {
331        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
332        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
333    }
334
335    #[test]
336    fn test_release_savepoints() {
337        let result = fingerprint_many(
338            vec![
339                "SAVEPOINT \"s1234\"",
340                "RELEASE SAVEPOINT \"s1234\"",
341                "SAVEPOINT \"s2345\"",
342                "RELEASE SAVEPOINT \"s2345\"",
343            ],
344            None,
345        );
346        assert_eq!(
347            result,
348            vec![
349                "SAVEPOINT s1",
350                "RELEASE SAVEPOINT s1",
351                "SAVEPOINT s2",
352                "RELEASE SAVEPOINT s2"
353            ]
354        );
355    }
356
357    #[test]
358    fn test_rollback_savepoint() {
359        let result = fingerprint_many(
360            vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
361            None,
362        );
363        assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
364    }
365
366    #[test]
367    fn test_select() {
368        let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
369        assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
370    }
371
372    #[test]
373    fn test_select_single_value() {
374        let result = fingerprint_many(vec!["SELECT 1"], None);
375        assert_eq!(result, vec!["SELECT ..."]);
376    }
377
378    #[test]
379    fn test_select_with_from_quoted() {
380        let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
381        assert_eq!(result, vec!["SELECT ... FROM c.d"]);
382    }
383
384    #[test]
385    fn test_select_with_from_join() {
386        let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
387        assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
388    }
389
390    #[test]
391    fn test_select_with_from_join_quoted() {
392        let result = fingerprint_many(
393            vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
394            None,
395        );
396        assert_eq!(
397            result,
398            vec!["SELECT ... FROM c INNER JOIN d ON (d.a = c.a)"]
399        );
400    }
401
402    #[test]
403    fn test_select_with_order_by() {
404        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
405        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
406    }
407
408    #[test]
409    fn test_select_with_order_by_more() {
410        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
411        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
412    }
413
414    #[test]
415    fn test_select_with_limit_offset() {
416        let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
417        assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
418    }
419
420    #[test]
421    fn test_select_union() {
422        let result = fingerprint_many(
423            vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
424            None,
425        );
426        assert_eq!(
427            result,
428            vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
429        );
430    }
431
432    #[test]
433    fn test_select_except() {
434        let result = fingerprint_many(
435            vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
436            None,
437        );
438        assert_eq!(
439            result,
440            vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
441        );
442    }
443    #[test]
444    fn test_select_intersect() {
445        let result = fingerprint_many(
446            vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
447            None,
448        );
449        assert_eq!(
450            result,
451            vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
452        );
453    }
454
455    #[test]
456    fn test_declare_cursor() {
457        let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
458        assert_eq!(
459            result,
460            vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
461        );
462    }
463
464    #[test]
465    fn test_insert() {
466        let result = fingerprint_many(
467            vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
468            None,
469        );
470        assert_eq!(
471            result,
472            vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
473        );
474    }
475
476    #[test]
477    fn test_insert_select() {
478        let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
479        assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
480    }
481
482    #[test]
483    fn test_insert_on_conflict() {
484        let result = fingerprint_many(
485            vec!["INSERT INTO a (b, c) VALUES (1, 2) ON CONFLICT(\"a\", \"b\") DO UPDATE SET \"d\" = EXCLUDED.d WHERE e = f RETURNING b, c"],
486            None,
487        );
488        assert_eq!(
489            result,
490            vec![
491                "INSERT INTO a (...) VALUES (...) ON CONFLICT(...) DO UPDATE SET ... = ... WHERE ... RETURNING ..."
492            ]
493        );
494    }
495
496    #[test]
497    fn test_update() {
498        let result = fingerprint_many(
499            vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
500            None,
501        );
502        assert_eq!(
503            result,
504            vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
505        );
506    }
507
508    #[test]
509    fn test_delete() {
510        let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
511        assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
512    }
513}