sql_fingerprint/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4    Assignment, AssignmentTarget, ConflictTarget, Delete, DoUpdate, Expr, GroupByExpr, Ident,
5    Insert, ObjectName, ObjectNamePart, Offset, OnConflict, OnConflictAction, OnInsert, OrderBy,
6    OrderByKind, Query, SelectItem, SetExpr, Statement, Value, ValueWithSpan, VisitMut, VisitorMut,
7};
8use sqlparser::dialect::{Dialect, GenericDialect};
9use sqlparser::parser::Parser;
10use sqlparser::tokenizer::Span;
11use std::collections::HashMap;
12use std::ops::ControlFlow;
13
14/// Fingerprint a single SQL string.
15///
16/// Unparseable SQL is returned as-is.
17///
18/// # Example
19/// ```
20/// use sql_fingerprint::fingerprint_one;
21///
22/// let result = fingerprint_one("SELECT a, b FROM c ORDER BY b", None);
23/// assert_eq!(result, "SELECT ... FROM c ORDER BY ...");
24/// ```
25pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
26    fingerprint_many(vec![input], dialect).join(" ")
27}
28
29/// Fingerprint multiple SQL strings.
30/// Doing so for a batch of strings allows sharing some state, such as savepoint ID aliases.
31///
32/// Unparseable SQL is returned as-is.
33///
34/// # Example
35/// ```
36/// use sql_fingerprint::fingerprint_many;
37///
38/// let result = fingerprint_many(vec!["SELECT a, b FROM c", "SELECT b, c FROM d"], None);
39/// assert_eq!(result, vec!["SELECT ... FROM c", "SELECT ... FROM d"]);
40/// ```
41pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
42    let dialect = dialect.unwrap_or(&GenericDialect {});
43
44    let mut savepoint_visitor = SavepointVisitor::new();
45
46    input
47        .iter()
48        .map(|sql| match Parser::parse_sql(dialect, sql) {
49            Ok(mut ast) => {
50                for stmt in &mut ast {
51                    stmt.visit(&mut savepoint_visitor);
52                }
53
54                ast.into_iter()
55                    .map(|stmt| stmt.to_string())
56                    .collect::<Vec<_>>()
57                    .join(" ")
58            }
59            Err(_) => sql.to_string(),
60        })
61        .collect()
62}
63
64struct SavepointVisitor {
65    savepoint_ids: HashMap<String, String>,
66}
67
68impl SavepointVisitor {
69    fn new() -> Self {
70        SavepointVisitor {
71            savepoint_ids: HashMap::new(),
72        }
73    }
74}
75
76impl VisitorMut for SavepointVisitor {
77    type Break = ();
78
79    fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
80        match stmt {
81            Statement::Savepoint { name } => {
82                let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
83                self.savepoint_ids
84                    .insert(name.value.clone(), savepoint_id.clone());
85                *name = Ident::new(savepoint_id);
86            }
87            Statement::ReleaseSavepoint { name } => {
88                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
89                    *name = Ident::new(savepoint_id);
90                }
91            }
92            Statement::Rollback {
93                savepoint: Some(name),
94                ..
95            } => {
96                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
97                    *name = Ident::new(savepoint_id);
98                }
99            }
100            Statement::Declare { stmts } => {
101                for stmt in stmts {
102                    if stmt.names.len() > 0 {
103                        stmt.names = vec![Ident::new("...")];
104                    }
105                }
106            }
107            Statement::Insert(Insert {
108                columns,
109                source,
110                on,
111                returning,
112                ..
113            }) => {
114                if columns.len() > 0 {
115                    *columns = vec![Ident::new("...")];
116                }
117                if let Some(source) = source {
118                    if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
119                        values.rows = vec![vec![placeholder_value()]];
120                    }
121                }
122                if let Some(on) = on {
123                    match on {
124                        OnInsert::OnConflict(OnConflict {
125                            conflict_target,
126                            action,
127                        }) => {
128                            if let Some(conflict_target) = conflict_target {
129                                match conflict_target {
130                                    ConflictTarget::Columns(columns) => {
131                                        if columns.len() > 0 {
132                                            *columns = vec![Ident::new("...")];
133                                        }
134                                    }
135                                    _ => {}
136                                }
137                            }
138                            if let OnConflictAction::DoUpdate(DoUpdate {
139                                assignments,
140                                selection,
141                            }) = action
142                            {
143                                if assignments.len() > 0 {
144                                    *assignments = vec![Assignment {
145                                        target: AssignmentTarget::ColumnName(ObjectName(vec![
146                                            ObjectNamePart::Identifier(Ident::new("...")),
147                                        ])),
148                                        value: placeholder_value(),
149                                    }];
150                                }
151                                if let Some(selection) = selection {
152                                    *selection = placeholder_value();
153                                }
154                            }
155                        }
156                        _ => {}
157                    }
158                }
159                if let Some(returning) = returning {
160                    if returning.len() > 0 {
161                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
162                    }
163                }
164            }
165            Statement::Update {
166                assignments,
167                selection,
168                returning,
169                ..
170            } => {
171                if assignments.len() > 0 {
172                    *assignments = vec![sqlparser::ast::Assignment {
173                        target: AssignmentTarget::ColumnName(ObjectName(vec![
174                            ObjectNamePart::Identifier(Ident::new("...")),
175                        ])),
176                        value: placeholder_value(),
177                    }];
178                }
179                if let Some(selection) = selection {
180                    *selection = placeholder_value();
181                }
182                if let Some(returning) = returning {
183                    if returning.len() > 0 {
184                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
185                    }
186                }
187            }
188            Statement::Delete(Delete {
189                selection,
190                returning,
191                ..
192            }) => {
193                if let Some(selection) = selection {
194                    *selection = placeholder_value();
195                }
196                if let Some(returning) = returning {
197                    if returning.len() > 0 {
198                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
199                    }
200                }
201            }
202            _ => {}
203        }
204        ControlFlow::Continue(())
205    }
206
207    fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
208        if let SetExpr::Select(select) = query.body.as_mut() {
209            if select.projection.len() > 0 {
210                if let Some(item) = select.projection.first_mut() {
211                    match item {
212                        SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
213                            *item = SelectItem::UnnamedExpr(placeholder_value());
214                        }
215                        _ => {}
216                    }
217                }
218                select.projection.truncate(1);
219            }
220
221            if let Some(selection) = &mut select.selection {
222                *selection = placeholder_value();
223            }
224
225            match &mut select.group_by {
226                GroupByExpr::Expressions(col_names, ..) => {
227                    if col_names.len() > 0 {
228                        *col_names = vec![placeholder_value()];
229                    }
230                }
231                _ => {}
232            }
233        }
234        if let Some(order_by) = &mut query.order_by {
235            let OrderBy { kind, .. } = order_by;
236            if let OrderByKind::Expressions(expressions) = kind {
237                if expressions.len() > 0 {
238                    if let Some(expr) = expressions.first_mut() {
239                        expr.expr = placeholder_value();
240                    }
241                    expressions.truncate(1);
242                }
243            }
244        }
245        if let Some(limit) = &mut query.limit {
246            *limit = placeholder_value();
247        }
248        if let Some(Offset { value, .. }) = &mut query.offset {
249            *value = placeholder_value();
250        }
251        ControlFlow::Continue(())
252    }
253
254    fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
255        for part in _relation.0.iter_mut() {
256            match part {
257                ObjectNamePart::Identifier(ident) => {
258                    maybe_unquote_ident(ident);
259                }
260            }
261        }
262        ControlFlow::Continue(())
263    }
264
265    fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
266        match _expr {
267            Expr::Identifier(ident) => {
268                maybe_unquote_ident(ident);
269            }
270            Expr::CompoundIdentifier(idents) => {
271                for ident in idents {
272                    maybe_unquote_ident(ident);
273                }
274            }
275            _ => {}
276        }
277        ControlFlow::Continue(())
278    }
279}
280
281fn placeholder_value() -> Expr {
282    Expr::Value(ValueWithSpan {
283        value: Value::Placeholder("...".to_string()),
284        span: Span::empty(),
285    })
286}
287
288fn maybe_unquote_ident(ident: &mut Ident) -> () {
289    let Ident {
290        value, quote_style, ..
291    } = ident;
292
293    if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
294        *quote_style = None;
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn test_fingerprint_one() {
304        let result = fingerprint_one("SELECT 123", None);
305        assert_eq!(result, "SELECT ...");
306    }
307
308    #[test]
309    fn test_empty() {
310        let result = fingerprint_many(vec![""], None);
311        assert_eq!(result, vec![""]);
312    }
313
314    #[test]
315    fn test_unparseable() {
316        let result = fingerprint_many(vec!["SELECT  SELECT  SELECT  SELECT"], None);
317        assert_eq!(result, vec!["SELECT  SELECT  SELECT  SELECT"]);
318    }
319
320    #[test]
321    fn test_comments_dropped() {
322        let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
323        assert_eq!(result, vec!["SELECT ..."]);
324    }
325
326    #[test]
327    fn test_savepoint() {
328        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
329        assert_eq!(result, vec!["SAVEPOINT s1"]);
330    }
331
332    #[test]
333    fn test_multiple_savepoints() {
334        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
335        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
336    }
337
338    #[test]
339    fn test_duplicate_savepoints() {
340        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
341        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
342    }
343
344    #[test]
345    fn test_release_savepoints() {
346        let result = fingerprint_many(
347            vec![
348                "SAVEPOINT \"s1234\"",
349                "RELEASE SAVEPOINT \"s1234\"",
350                "SAVEPOINT \"s2345\"",
351                "RELEASE SAVEPOINT \"s2345\"",
352            ],
353            None,
354        );
355        assert_eq!(
356            result,
357            vec![
358                "SAVEPOINT s1",
359                "RELEASE SAVEPOINT s1",
360                "SAVEPOINT s2",
361                "RELEASE SAVEPOINT s2"
362            ]
363        );
364    }
365
366    #[test]
367    fn test_rollback_savepoint() {
368        let result = fingerprint_many(
369            vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
370            None,
371        );
372        assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
373    }
374
375    #[test]
376    fn test_select() {
377        let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
378        assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
379    }
380
381    #[test]
382    fn test_select_single_value() {
383        let result = fingerprint_many(vec!["SELECT 1"], None);
384        assert_eq!(result, vec!["SELECT ..."]);
385    }
386
387    #[test]
388    fn test_select_with_from_quoted() {
389        let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
390        assert_eq!(result, vec!["SELECT ... FROM c.d"]);
391    }
392
393    #[test]
394    fn test_select_with_from_join() {
395        let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
396        assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
397    }
398
399    #[test]
400    fn test_select_with_from_join_quoted() {
401        let result = fingerprint_many(
402            vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
403            None,
404        );
405        assert_eq!(
406            result,
407            vec!["SELECT ... FROM c INNER JOIN d ON (d.a = c.a)"]
408        );
409    }
410
411    #[test]
412    fn test_select_with_group_by() {
413        let result = fingerprint_many(vec!["SELECT a, b FROM c GROUP BY a, b"], None);
414        assert_eq!(result, vec!["SELECT ... FROM c GROUP BY ..."]);
415    }
416
417    #[test]
418    fn test_select_with_order_by() {
419        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
420        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
421    }
422
423    #[test]
424    fn test_select_with_order_by_more() {
425        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
426        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
427    }
428
429    #[test]
430    fn test_select_with_limit_offset() {
431        let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
432        assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
433    }
434
435    #[test]
436    fn test_select_union() {
437        let result = fingerprint_many(
438            vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
439            None,
440        );
441        assert_eq!(
442            result,
443            vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
444        );
445    }
446
447    #[test]
448    fn test_select_except() {
449        let result = fingerprint_many(
450            vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
451            None,
452        );
453        assert_eq!(
454            result,
455            vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
456        );
457    }
458    #[test]
459    fn test_select_intersect() {
460        let result = fingerprint_many(
461            vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
462            None,
463        );
464        assert_eq!(
465            result,
466            vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
467        );
468    }
469
470    #[test]
471    fn test_declare_cursor() {
472        let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
473        assert_eq!(
474            result,
475            vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
476        );
477    }
478
479    #[test]
480    fn test_insert() {
481        let result = fingerprint_many(
482            vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
483            None,
484        );
485        assert_eq!(
486            result,
487            vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
488        );
489    }
490
491    #[test]
492    fn test_insert_select() {
493        let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
494        assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
495    }
496
497    #[test]
498    fn test_insert_on_conflict() {
499        let result = fingerprint_many(
500            vec!["INSERT INTO a (b, c) VALUES (1, 2) ON CONFLICT(\"a\", \"b\") DO UPDATE SET \"d\" = EXCLUDED.d WHERE e = f RETURNING b, c"],
501            None,
502        );
503        assert_eq!(
504            result,
505            vec![
506                "INSERT INTO a (...) VALUES (...) ON CONFLICT(...) DO UPDATE SET ... = ... WHERE ... RETURNING ..."
507            ]
508        );
509    }
510
511    #[test]
512    fn test_update() {
513        let result = fingerprint_many(
514            vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
515            None,
516        );
517        assert_eq!(
518            result,
519            vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
520        );
521    }
522
523    #[test]
524    fn test_delete() {
525        let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
526        assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
527    }
528}