sql_fingerprint/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4    AssignmentTarget, Delete, Expr, Ident, Insert, ObjectName, ObjectNamePart, Offset, OrderBy,
5    OrderByKind, Query, SelectItem, SetExpr, Statement, Value, ValueWithSpan, VisitMut, VisitorMut,
6};
7use sqlparser::dialect::{Dialect, GenericDialect};
8use sqlparser::parser::Parser;
9use sqlparser::tokenizer::Span;
10use std::collections::HashMap;
11use std::ops::ControlFlow;
12
13/// Fingerprint a single SQL string.
14///
15/// Unparseable SQL is returned as-is.
16///
17/// # Example
18/// ```
19/// use sql_fingerprint::fingerprint_one;
20///
21/// let result = fingerprint_one("SELECT a, b FROM c ORDER BY b", None);
22/// assert_eq!(result, "SELECT ... FROM c ORDER BY ...");
23/// ```
24pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
25    fingerprint_many(vec![input], dialect).join(" ")
26}
27
28/// Fingerprint multiple SQL strings.
29/// Doing so for a batch of strings allows sharing some state, such as savepoint ID aliases.
30///
31/// Unparseable SQL is returned as-is.
32///
33/// # Example
34/// ```
35/// use sql_fingerprint::fingerprint_many;
36///
37/// let result = fingerprint_many(vec!["SELECT a, b FROM c", "SELECT b, c FROM d"], None);
38/// assert_eq!(result, vec!["SELECT ... FROM c", "SELECT ... FROM d"]);
39/// ```
40pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
41    let dialect = dialect.unwrap_or(&GenericDialect {});
42
43    let mut savepoint_visitor = SavepointVisitor::new();
44
45    input
46        .iter()
47        .map(|sql| match Parser::parse_sql(dialect, sql) {
48            Ok(mut ast) => {
49                for stmt in &mut ast {
50                    stmt.visit(&mut savepoint_visitor);
51                }
52
53                ast.into_iter()
54                    .map(|stmt| stmt.to_string())
55                    .collect::<Vec<_>>()
56                    .join(" ")
57            }
58            Err(_) => sql.to_string(),
59        })
60        .collect()
61}
62
63struct SavepointVisitor {
64    savepoint_ids: HashMap<String, String>,
65}
66
67impl SavepointVisitor {
68    fn new() -> Self {
69        SavepointVisitor {
70            savepoint_ids: HashMap::new(),
71        }
72    }
73}
74
75impl VisitorMut for SavepointVisitor {
76    type Break = ();
77
78    fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
79        match stmt {
80            Statement::Savepoint { name } => {
81                let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
82                self.savepoint_ids
83                    .insert(name.value.clone(), savepoint_id.clone());
84                *name = Ident::new(savepoint_id);
85            }
86            Statement::ReleaseSavepoint { name } => {
87                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
88                    *name = Ident::new(savepoint_id);
89                }
90            }
91            Statement::Rollback {
92                savepoint: Some(name),
93                ..
94            } => {
95                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
96                    *name = Ident::new(savepoint_id);
97                }
98            }
99            Statement::Declare { stmts } => {
100                for stmt in stmts {
101                    if stmt.names.len() > 0 {
102                        stmt.names = vec![Ident::new("...")];
103                    }
104                }
105            }
106            Statement::Insert(Insert {
107                columns,
108                source,
109                returning,
110                ..
111            }) => {
112                if columns.len() > 0 {
113                    *columns = vec![Ident::new("...")];
114                }
115                if let Some(source) = source {
116                    if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
117                        values.rows = vec![vec![placeholder_value()]];
118                    }
119                }
120                if let Some(returning) = returning {
121                    if returning.len() > 0 {
122                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
123                    }
124                }
125            }
126            Statement::Update {
127                assignments,
128                selection,
129                returning,
130                ..
131            } => {
132                if assignments.len() > 0 {
133                    *assignments = vec![sqlparser::ast::Assignment {
134                        target: AssignmentTarget::ColumnName(ObjectName(vec![
135                            ObjectNamePart::Identifier(Ident::new("...")),
136                        ])),
137                        value: placeholder_value(),
138                    }];
139                }
140                if let Some(selection) = selection {
141                    *selection = placeholder_value();
142                }
143                if let Some(returning) = returning {
144                    if returning.len() > 0 {
145                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
146                    }
147                }
148            }
149            Statement::Delete(Delete {
150                selection,
151                returning,
152                ..
153            }) => {
154                if let Some(selection) = selection {
155                    *selection = placeholder_value();
156                }
157                if let Some(returning) = returning {
158                    if returning.len() > 0 {
159                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
160                    }
161                }
162            }
163            _ => {}
164        }
165        ControlFlow::Continue(())
166    }
167
168    fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
169        if let SetExpr::Select(select) = query.body.as_mut() {
170            if select.projection.len() > 0 {
171                if let Some(item) = select.projection.first_mut() {
172                    match item {
173                        SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
174                            *item = SelectItem::UnnamedExpr(placeholder_value());
175                        }
176                        _ => {}
177                    }
178                }
179                select.projection.truncate(1);
180            }
181
182            if let Some(selection) = &mut select.selection {
183                *selection = placeholder_value();
184            }
185        }
186        if let Some(order_by) = &mut query.order_by {
187            let OrderBy { kind, .. } = order_by;
188            if let OrderByKind::Expressions(expressions) = kind {
189                if expressions.len() > 0 {
190                    if let Some(expr) = expressions.first_mut() {
191                        expr.expr = placeholder_value();
192                    }
193                    expressions.truncate(1);
194                }
195            }
196        }
197        if let Some(limit) = &mut query.limit {
198            *limit = placeholder_value();
199        }
200        if let Some(Offset { value, .. }) = &mut query.offset {
201            *value = placeholder_value();
202        }
203        ControlFlow::Continue(())
204    }
205
206    fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
207        for part in _relation.0.iter_mut() {
208            match part {
209                ObjectNamePart::Identifier(ident) => {
210                    maybe_unquote_ident(ident);
211                }
212            }
213        }
214        ControlFlow::Continue(())
215    }
216
217    fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
218        match _expr {
219            Expr::Identifier(ident) => {
220                maybe_unquote_ident(ident);
221            }
222            Expr::CompoundIdentifier(idents) => {
223                for ident in idents {
224                    maybe_unquote_ident(ident);
225                }
226            }
227            _ => {}
228        }
229        ControlFlow::Continue(())
230    }
231}
232
233fn placeholder_value() -> Expr {
234    Expr::Value(ValueWithSpan {
235        value: Value::Placeholder("...".to_string()),
236        span: Span::empty(),
237    })
238}
239
240fn maybe_unquote_ident(ident: &mut Ident) -> () {
241    let Ident {
242        value, quote_style, ..
243    } = ident;
244
245    if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
246        *quote_style = None;
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn test_fingerprint_one() {
256        let result = fingerprint_one("SELECT 123", None);
257        assert_eq!(result, "SELECT ...");
258    }
259
260    #[test]
261    fn test_empty() {
262        let result = fingerprint_many(vec![""], None);
263        assert_eq!(result, vec![""]);
264    }
265
266    #[test]
267    fn test_unparseable() {
268        let result = fingerprint_many(vec!["SELECT  SELECT  SELECT  SELECT"], None);
269        assert_eq!(result, vec!["SELECT  SELECT  SELECT  SELECT"]);
270    }
271
272    #[test]
273    fn test_comments_dropped() {
274        let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
275        assert_eq!(result, vec!["SELECT ..."]);
276    }
277
278    #[test]
279    fn test_savepoint() {
280        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
281        assert_eq!(result, vec!["SAVEPOINT s1"]);
282    }
283
284    #[test]
285    fn test_multiple_savepoints() {
286        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
287        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
288    }
289
290    #[test]
291    fn test_duplicate_savepoints() {
292        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
293        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
294    }
295
296    #[test]
297    fn test_release_savepoints() {
298        let result = fingerprint_many(
299            vec![
300                "SAVEPOINT \"s1234\"",
301                "RELEASE SAVEPOINT \"s1234\"",
302                "SAVEPOINT \"s2345\"",
303                "RELEASE SAVEPOINT \"s2345\"",
304            ],
305            None,
306        );
307        assert_eq!(
308            result,
309            vec![
310                "SAVEPOINT s1",
311                "RELEASE SAVEPOINT s1",
312                "SAVEPOINT s2",
313                "RELEASE SAVEPOINT s2"
314            ]
315        );
316    }
317
318    #[test]
319    fn test_rollback_savepoint() {
320        let result = fingerprint_many(
321            vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
322            None,
323        );
324        assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
325    }
326
327    #[test]
328    fn test_select() {
329        let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
330        assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
331    }
332
333    #[test]
334    fn test_select_single_value() {
335        let result = fingerprint_many(vec!["SELECT 1"], None);
336        assert_eq!(result, vec!["SELECT ..."]);
337    }
338
339    #[test]
340    fn test_select_with_from_quoted() {
341        let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
342        assert_eq!(result, vec!["SELECT ... FROM c.d"]);
343    }
344
345    #[test]
346    fn test_select_with_from_join() {
347        let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
348        assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
349    }
350
351    #[test]
352    fn test_select_with_from_join_quoted() {
353        let result = fingerprint_many(
354            vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
355            None,
356        );
357        assert_eq!(
358            result,
359            vec!["SELECT ... FROM c INNER JOIN d ON (d.a = c.a)"]
360        );
361    }
362
363    #[test]
364    fn test_select_with_order_by() {
365        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
366        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
367    }
368
369    #[test]
370    fn test_select_with_order_by_more() {
371        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
372        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
373    }
374
375    #[test]
376    fn test_select_with_limit_offset() {
377        let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
378        assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
379    }
380
381    #[test]
382    fn test_select_union() {
383        let result = fingerprint_many(
384            vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
385            None,
386        );
387        assert_eq!(
388            result,
389            vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
390        );
391    }
392
393    #[test]
394    fn test_select_except() {
395        let result = fingerprint_many(
396            vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
397            None,
398        );
399        assert_eq!(
400            result,
401            vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
402        );
403    }
404    #[test]
405    fn test_select_intersect() {
406        let result = fingerprint_many(
407            vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
408            None,
409        );
410        assert_eq!(
411            result,
412            vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
413        );
414    }
415
416    #[test]
417    fn test_declare_cursor() {
418        let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
419        assert_eq!(
420            result,
421            vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
422        );
423    }
424
425    #[test]
426    fn test_insert() {
427        let result = fingerprint_many(
428            vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
429            None,
430        );
431        assert_eq!(
432            result,
433            vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
434        );
435    }
436
437    #[test]
438    fn test_insert_select() {
439        let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
440        assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
441    }
442
443    #[test]
444    fn test_update() {
445        let result = fingerprint_many(
446            vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
447            None,
448        );
449        assert_eq!(
450            result,
451            vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
452        );
453    }
454
455    #[test]
456    fn test_delete() {
457        let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
458        assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
459    }
460}