sql_fingerprint/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4    AssignmentTarget, Delete, Expr, Ident, Insert, ObjectName, ObjectNamePart, OrderBy,
5    OrderByKind, Query, SelectItem, SetExpr, Statement, Value, ValueWithSpan, VisitMut, VisitorMut,
6};
7use sqlparser::dialect::{Dialect, GenericDialect};
8use sqlparser::parser::Parser;
9use sqlparser::tokenizer::Span;
10use std::collections::HashMap;
11use std::ops::ControlFlow;
12
13/// Fingerprint a single SQL string.
14///
15/// # Example
16/// ```
17/// use sql_fingerprint::fingerprint_one;
18///
19/// let result = fingerprint_one("SELECT a, b FROM c ORDER BY b", None);
20/// assert_eq!(result, "SELECT ... FROM c ORDER BY ...");
21/// ```
22pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
23    fingerprint_many(vec![input], dialect).join(" ")
24}
25
26/// Fingerprint multiple SQL strings.
27/// Doing so for a batch of strings allows sharing some state, such as savepoint ID aliases.
28///
29/// # Example
30/// ```
31/// use sql_fingerprint::fingerprint_many;
32///
33/// let result = fingerprint_many(vec!["SELECT a, b FROM c", "SELECT b, c FROM d"], None);
34/// assert_eq!(result, vec!["SELECT ... FROM c", "SELECT ... FROM d"]);
35/// ```
36pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
37    let dialect = dialect.unwrap_or(&GenericDialect {});
38
39    let mut savepoint_visitor = SavepointVisitor::new();
40
41    input
42        .iter()
43        .map(|sql| {
44            let mut ast = Parser::parse_sql(dialect, sql).unwrap();
45
46            for stmt in &mut ast {
47                stmt.visit(&mut savepoint_visitor);
48            }
49
50            ast.into_iter()
51                .map(|stmt| stmt.to_string())
52                .collect::<Vec<_>>()
53                .join(" ")
54        })
55        .collect()
56}
57
58struct SavepointVisitor {
59    savepoint_ids: HashMap<String, String>,
60}
61
62impl SavepointVisitor {
63    fn new() -> Self {
64        SavepointVisitor {
65            savepoint_ids: HashMap::new(),
66        }
67    }
68}
69
70impl VisitorMut for SavepointVisitor {
71    type Break = ();
72
73    fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
74        match stmt {
75            Statement::Savepoint { name } => {
76                let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
77                self.savepoint_ids
78                    .insert(name.value.clone(), savepoint_id.clone());
79                *name = Ident::new(savepoint_id);
80            }
81            Statement::ReleaseSavepoint { name } => {
82                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
83                    *name = Ident::new(savepoint_id);
84                }
85            }
86            Statement::Rollback {
87                savepoint: Some(name),
88                ..
89            } => {
90                if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
91                    *name = Ident::new(savepoint_id);
92                }
93            }
94            Statement::Declare { stmts } => {
95                for stmt in stmts {
96                    if stmt.names.len() > 0 {
97                        stmt.names = vec![Ident::new("...")];
98                    }
99                }
100            }
101            Statement::Insert(Insert {
102                columns,
103                source,
104                returning,
105                ..
106            }) => {
107                if columns.len() > 0 {
108                    *columns = vec![Ident::new("...")];
109                }
110                if let Some(source) = source {
111                    if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
112                        values.rows = vec![vec![placeholder_value()]];
113                    }
114                }
115                if let Some(returning) = returning {
116                    if returning.len() > 0 {
117                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
118                    }
119                }
120            }
121            Statement::Update {
122                assignments,
123                selection,
124                returning,
125                ..
126            } => {
127                if assignments.len() > 0 {
128                    *assignments = vec![sqlparser::ast::Assignment {
129                        target: AssignmentTarget::ColumnName(ObjectName(vec![
130                            ObjectNamePart::Identifier(Ident::new("...")),
131                        ])),
132                        value: placeholder_value(),
133                    }];
134                }
135                if let Some(selection) = selection {
136                    *selection = placeholder_value();
137                }
138                if let Some(returning) = returning {
139                    if returning.len() > 0 {
140                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
141                    }
142                }
143            }
144            Statement::Delete(Delete {
145                selection,
146                returning,
147                ..
148            }) => {
149                if let Some(selection) = selection {
150                    *selection = placeholder_value();
151                }
152                if let Some(returning) = returning {
153                    if returning.len() > 0 {
154                        *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
155                    }
156                }
157            }
158            _ => {}
159        }
160        ControlFlow::Continue(())
161    }
162
163    fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
164        if let SetExpr::Select(select) = query.body.as_mut() {
165            if select.projection.len() > 0 {
166                if let Some(item) = select.projection.first_mut() {
167                    match item {
168                        SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
169                            *item = SelectItem::UnnamedExpr(placeholder_value());
170                        }
171                        _ => {}
172                    }
173                }
174                select.projection.truncate(1);
175            }
176        }
177        if let Some(order_by) = &mut query.order_by {
178            let OrderBy { kind, .. } = order_by;
179            if let OrderByKind::Expressions(expressions) = kind {
180                if expressions.len() > 0 {
181                    if let Some(expr) = expressions.first_mut() {
182                        expr.expr = placeholder_value();
183                    }
184                    expressions.truncate(1);
185                }
186            }
187        }
188        ControlFlow::Continue(())
189    }
190
191    fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
192        for part in _relation.0.iter_mut() {
193            match part {
194                ObjectNamePart::Identifier(ident) => {
195                    if ident.value.chars().all(|c| c.is_alphanumeric() || c == '_') {
196                        ident.quote_style = None;
197                    }
198                }
199            }
200        }
201        ControlFlow::Continue(())
202    }
203}
204
205fn placeholder_value() -> Expr {
206    Expr::Value(ValueWithSpan {
207        value: Value::Placeholder("...".to_string()),
208        span: Span::empty(),
209    })
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn test_fingerprint_one() {
218        let result = fingerprint_one("SELECT 123", None);
219        assert_eq!(result, "SELECT ...");
220    }
221
222    #[test]
223    fn test_empty() {
224        let result = fingerprint_many(vec![""], None);
225        assert_eq!(result, vec![""]);
226    }
227
228    #[test]
229    fn test_comments_dropped() {
230        let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
231        assert_eq!(result, vec!["SELECT ..."]);
232    }
233
234    #[test]
235    fn test_savepoint() {
236        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
237        assert_eq!(result, vec!["SAVEPOINT s1"]);
238    }
239
240    #[test]
241    fn test_multiple_savepoints() {
242        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
243        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
244    }
245
246    #[test]
247    fn test_duplicate_savepoints() {
248        let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
249        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
250    }
251
252    #[test]
253    fn test_release_savepoints() {
254        let result = fingerprint_many(
255            vec![
256                "SAVEPOINT \"s1234\"",
257                "RELEASE SAVEPOINT \"s1234\"",
258                "SAVEPOINT \"s2345\"",
259                "RELEASE SAVEPOINT \"s2345\"",
260            ],
261            None,
262        );
263        assert_eq!(
264            result,
265            vec![
266                "SAVEPOINT s1",
267                "RELEASE SAVEPOINT s1",
268                "SAVEPOINT s2",
269                "RELEASE SAVEPOINT s2"
270            ]
271        );
272    }
273
274    #[test]
275    fn test_rollback_savepoint() {
276        let result = fingerprint_many(
277            vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
278            None,
279        );
280        assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
281    }
282
283    #[test]
284    fn test_select() {
285        let result = fingerprint_many(vec!["SELECT 1"], None);
286        assert_eq!(result, vec!["SELECT ..."]);
287    }
288
289    #[test]
290    fn test_select_with_from() {
291        let result = fingerprint_many(vec!["SELECT a, b FROM c"], None);
292        assert_eq!(result, vec!["SELECT ... FROM c"]);
293    }
294
295    #[test]
296    fn test_select_with_from_quoted() {
297        let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
298        assert_eq!(result, vec!["SELECT ... FROM c.d"]);
299    }
300
301    #[test]
302    fn test_select_with_from_join() {
303        let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
304        assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
305    }
306
307    #[test]
308    fn test_select_with_order_by() {
309        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
310        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
311    }
312
313    #[test]
314    fn test_select_with_order_by_more() {
315        let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
316        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
317    }
318
319    #[test]
320    fn test_select_union() {
321        let result = fingerprint_many(
322            vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
323            None,
324        );
325        assert_eq!(
326            result,
327            vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
328        );
329    }
330
331    #[test]
332    fn test_select_except() {
333        let result = fingerprint_many(
334            vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
335            None,
336        );
337        assert_eq!(
338            result,
339            vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
340        );
341    }
342    #[test]
343    fn test_select_intersect() {
344        let result = fingerprint_many(
345            vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
346            None,
347        );
348        assert_eq!(
349            result,
350            vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
351        );
352    }
353
354    #[test]
355    fn test_declare_cursor() {
356        let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
357        assert_eq!(
358            result,
359            vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
360        );
361    }
362
363    #[test]
364    fn test_insert() {
365        let result = fingerprint_many(
366            vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
367            None,
368        );
369        assert_eq!(
370            result,
371            vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
372        );
373    }
374
375    #[test]
376    fn test_insert_select() {
377        let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
378        assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
379    }
380
381    #[test]
382    fn test_update() {
383        let result = fingerprint_many(
384            vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
385            None,
386        );
387        assert_eq!(
388            result,
389            vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
390        );
391    }
392
393    #[test]
394    fn test_delete() {
395        let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
396        assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
397    }
398}