sql_fingerprint/
lib.rs

1use sqlparser::ast::{
2    Expr, Ident, OrderBy, OrderByKind, Query, SelectItem, SetExpr, Statement, Value, ValueWithSpan,
3};
4use sqlparser::dialect::{Dialect, GenericDialect};
5use sqlparser::parser::Parser;
6use sqlparser::tokenizer::Span;
7use std::collections::HashMap;
8
9pub fn fingerprint(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
10    let dialect = dialect.unwrap_or(&GenericDialect {});
11    let mut savepoint_simple_ids: HashMap<String, String> = HashMap::new();
12    input
13        .iter()
14        .map(|sql| {
15            let ast = Parser::parse_sql(dialect, sql).unwrap();
16            ast.into_iter()
17                .map(|mut stmt| simplify_statement(&mut stmt, &mut savepoint_simple_ids))
18                .collect::<Vec<_>>()
19                .join(" ")
20        })
21        .collect()
22}
23
24fn simplify_statement(
25    stmt: &mut Statement,
26    savepoint_simple_ids: &mut HashMap<String, String>,
27) -> String {
28    match stmt {
29        Statement::Savepoint { name } => {
30            let savepoint_id = format!("s{}", savepoint_simple_ids.len() + 1);
31            savepoint_simple_ids.insert(name.value.clone(), savepoint_id.clone());
32            *name = Ident::new(savepoint_id);
33        }
34        Statement::ReleaseSavepoint { name } => {
35            let savepoint_id = savepoint_simple_ids.get(&name.value).unwrap().clone();
36            *name = Ident::new(savepoint_id);
37        }
38        Statement::Rollback {
39            savepoint: Some(name),
40            ..
41        } => {
42            let savepoint_id = savepoint_simple_ids.get(&name.value).unwrap().clone();
43            *name = Ident::new(savepoint_id);
44        }
45        Statement::Query(query) => {
46            simplify_query(query);
47        }
48        Statement::Declare { stmts } => {
49            for stmt in stmts {
50                if stmt.names.len() > 0 {
51                    stmt.names = vec![Ident::new("...")];
52                }
53                if let Some(for_query) = &mut stmt.for_query {
54                    simplify_query(for_query);
55                }
56            }
57        }
58        Statement::Insert(insert) => {
59            if insert.columns.len() > 0 {
60                insert.columns = vec![Ident::new("...")];
61            }
62            if let Some(source) = &mut insert.source {
63                if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
64                    values.rows = vec![vec![Expr::Value(ValueWithSpan {
65                        value: Value::Placeholder("...".to_string()),
66                        span: Span::empty(),
67                    })]];
68                }
69            }
70            if let Some(returning) = &mut insert.returning {
71                if returning.len() > 0 {
72                    *returning = vec![SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan {
73                        value: Value::Placeholder("...".to_string()),
74                        span: Span::empty(),
75                    }))];
76                }
77            }
78        }
79        _ => {}
80    }
81    stmt.to_string()
82}
83
84fn simplify_query(query: &mut Query) {
85    if let SetExpr::Select(select) = query.body.as_mut() {
86        if select.projection.len() > 0 {
87            if let Some(item) = select.projection.first_mut() {
88                match item {
89                    SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
90                        *item = SelectItem::UnnamedExpr(Expr::Value(ValueWithSpan {
91                            value: Value::Placeholder("...".to_string()),
92                            span: Span::empty(),
93                        }));
94                    }
95                    _ => {}
96                }
97            }
98            select.projection.truncate(1);
99        }
100    }
101    if let Some(order_by) = &mut query.order_by {
102        let OrderBy { kind, .. } = order_by;
103        if let OrderByKind::Expressions(expressions) = kind {
104            if expressions.len() > 0 {
105                if let Some(expr) = expressions.first_mut() {
106                    expr.expr = Expr::Value(ValueWithSpan {
107                        value: Value::Placeholder("...".to_string()),
108                        span: Span::empty(),
109                    });
110                }
111                expressions.truncate(1);
112            }
113        }
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    #[test]
122    fn test_comments_dropped() {
123        let result = fingerprint(vec!["SELECT 123 /* magic value */"], None);
124        assert_eq!(result, vec!["SELECT ..."]);
125    }
126
127    #[test]
128    fn test_savepoint() {
129        let result = fingerprint(vec!["SAVEPOINT \"s1234\""], None);
130        assert_eq!(result, vec!["SAVEPOINT s1"]);
131    }
132
133    #[test]
134    fn test_multiple_savepoints() {
135        let result = fingerprint(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
136        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
137    }
138
139    #[test]
140    fn test_duplicate_savepoints() {
141        let result = fingerprint(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
142        assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
143    }
144
145    #[test]
146    fn test_release_savepoints() {
147        let result = fingerprint(
148            vec![
149                "SAVEPOINT \"s1234\"",
150                "RELEASE SAVEPOINT \"s1234\"",
151                "SAVEPOINT \"s2345\"",
152                "RELEASE SAVEPOINT \"s2345\"",
153            ],
154            None,
155        );
156        assert_eq!(
157            result,
158            vec![
159                "SAVEPOINT s1",
160                "RELEASE SAVEPOINT s1",
161                "SAVEPOINT s2",
162                "RELEASE SAVEPOINT s2"
163            ]
164        );
165    }
166
167    #[test]
168    fn test_rollback_savepoint() {
169        let result = fingerprint(
170            vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
171            None,
172        );
173        assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
174    }
175
176    #[test]
177    fn test_select() {
178        let result = fingerprint(vec!["SELECT 1"], None);
179        assert_eq!(result, vec!["SELECT ..."]);
180    }
181
182    #[test]
183    fn test_select_with_from() {
184        let result = fingerprint(vec!["SELECT a, b FROM c"], None);
185        assert_eq!(result, vec!["SELECT ... FROM c"]);
186    }
187
188    #[test]
189    fn test_select_with_from_join() {
190        let result = fingerprint(vec!["SELECT a, b FROM c JOIN d"], None);
191        assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
192    }
193
194    #[test]
195    fn test_select_with_order_by() {
196        let result = fingerprint(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
197        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
198    }
199
200    #[test]
201    fn test_select_with_order_by_more() {
202        let result = fingerprint(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
203        assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
204    }
205
206    #[test]
207    fn test_declare_cursor() {
208        let result = fingerprint(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
209        assert_eq!(
210            result,
211            vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
212        );
213    }
214
215    #[test]
216    fn test_insert() {
217        let result = fingerprint(
218            vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
219            None,
220        );
221        assert_eq!(
222            result,
223            vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
224        );
225    }
226}