1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4 AssignmentTarget, Delete, Expr, Ident, Insert, ObjectName, ObjectNamePart, Offset, OrderBy,
5 OrderByKind, Query, SelectItem, SetExpr, Statement, Value, ValueWithSpan, VisitMut, VisitorMut,
6};
7use sqlparser::dialect::{Dialect, GenericDialect};
8use sqlparser::parser::Parser;
9use sqlparser::tokenizer::Span;
10use std::collections::HashMap;
11use std::ops::ControlFlow;
12
13pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
25 fingerprint_many(vec![input], dialect).join(" ")
26}
27
28pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
41 let dialect = dialect.unwrap_or(&GenericDialect {});
42
43 let mut savepoint_visitor = SavepointVisitor::new();
44
45 input
46 .iter()
47 .map(|sql| match Parser::parse_sql(dialect, sql) {
48 Ok(mut ast) => {
49 for stmt in &mut ast {
50 stmt.visit(&mut savepoint_visitor);
51 }
52
53 ast.into_iter()
54 .map(|stmt| stmt.to_string())
55 .collect::<Vec<_>>()
56 .join(" ")
57 }
58 Err(_) => sql.to_string(),
59 })
60 .collect()
61}
62
63struct SavepointVisitor {
64 savepoint_ids: HashMap<String, String>,
65}
66
67impl SavepointVisitor {
68 fn new() -> Self {
69 SavepointVisitor {
70 savepoint_ids: HashMap::new(),
71 }
72 }
73}
74
75impl VisitorMut for SavepointVisitor {
76 type Break = ();
77
78 fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
79 match stmt {
80 Statement::Savepoint { name } => {
81 let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
82 self.savepoint_ids
83 .insert(name.value.clone(), savepoint_id.clone());
84 *name = Ident::new(savepoint_id);
85 }
86 Statement::ReleaseSavepoint { name } => {
87 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
88 *name = Ident::new(savepoint_id);
89 }
90 }
91 Statement::Rollback {
92 savepoint: Some(name),
93 ..
94 } => {
95 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
96 *name = Ident::new(savepoint_id);
97 }
98 }
99 Statement::Declare { stmts } => {
100 for stmt in stmts {
101 if stmt.names.len() > 0 {
102 stmt.names = vec![Ident::new("...")];
103 }
104 }
105 }
106 Statement::Insert(Insert {
107 columns,
108 source,
109 returning,
110 ..
111 }) => {
112 if columns.len() > 0 {
113 *columns = vec![Ident::new("...")];
114 }
115 if let Some(source) = source {
116 if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
117 values.rows = vec![vec![placeholder_value()]];
118 }
119 }
120 if let Some(returning) = returning {
121 if returning.len() > 0 {
122 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
123 }
124 }
125 }
126 Statement::Update {
127 assignments,
128 selection,
129 returning,
130 ..
131 } => {
132 if assignments.len() > 0 {
133 *assignments = vec![sqlparser::ast::Assignment {
134 target: AssignmentTarget::ColumnName(ObjectName(vec![
135 ObjectNamePart::Identifier(Ident::new("...")),
136 ])),
137 value: placeholder_value(),
138 }];
139 }
140 if let Some(selection) = selection {
141 *selection = placeholder_value();
142 }
143 if let Some(returning) = returning {
144 if returning.len() > 0 {
145 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
146 }
147 }
148 }
149 Statement::Delete(Delete {
150 selection,
151 returning,
152 ..
153 }) => {
154 if let Some(selection) = selection {
155 *selection = placeholder_value();
156 }
157 if let Some(returning) = returning {
158 if returning.len() > 0 {
159 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
160 }
161 }
162 }
163 _ => {}
164 }
165 ControlFlow::Continue(())
166 }
167
168 fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
169 if let SetExpr::Select(select) = query.body.as_mut() {
170 if select.projection.len() > 0 {
171 if let Some(item) = select.projection.first_mut() {
172 match item {
173 SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
174 *item = SelectItem::UnnamedExpr(placeholder_value());
175 }
176 _ => {}
177 }
178 }
179 select.projection.truncate(1);
180 }
181
182 if let Some(selection) = &mut select.selection {
183 *selection = placeholder_value();
184 }
185 }
186 if let Some(order_by) = &mut query.order_by {
187 let OrderBy { kind, .. } = order_by;
188 if let OrderByKind::Expressions(expressions) = kind {
189 if expressions.len() > 0 {
190 if let Some(expr) = expressions.first_mut() {
191 expr.expr = placeholder_value();
192 }
193 expressions.truncate(1);
194 }
195 }
196 }
197 if let Some(limit) = &mut query.limit {
198 *limit = placeholder_value();
199 }
200 if let Some(Offset { value, .. }) = &mut query.offset {
201 *value = placeholder_value();
202 }
203 ControlFlow::Continue(())
204 }
205
206 fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
207 for part in _relation.0.iter_mut() {
208 match part {
209 ObjectNamePart::Identifier(ident) => {
210 maybe_unquote_ident(ident);
211 }
212 }
213 }
214 ControlFlow::Continue(())
215 }
216
217 fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
218 match _expr {
219 Expr::Identifier(ident) => {
220 maybe_unquote_ident(ident);
221 }
222 Expr::CompoundIdentifier(idents) => {
223 for ident in idents {
224 maybe_unquote_ident(ident);
225 }
226 }
227 _ => {}
228 }
229 ControlFlow::Continue(())
230 }
231}
232
233fn placeholder_value() -> Expr {
234 Expr::Value(ValueWithSpan {
235 value: Value::Placeholder("...".to_string()),
236 span: Span::empty(),
237 })
238}
239
240fn maybe_unquote_ident(ident: &mut Ident) -> () {
241 let Ident {
242 value, quote_style, ..
243 } = ident;
244
245 if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
246 *quote_style = None;
247 }
248}
249
250#[cfg(test)]
251mod tests {
252 use super::*;
253
254 #[test]
255 fn test_fingerprint_one() {
256 let result = fingerprint_one("SELECT 123", None);
257 assert_eq!(result, "SELECT ...");
258 }
259
260 #[test]
261 fn test_empty() {
262 let result = fingerprint_many(vec![""], None);
263 assert_eq!(result, vec![""]);
264 }
265
266 #[test]
267 fn test_unparseable() {
268 let result = fingerprint_many(vec!["SELECT SELECT SELECT SELECT"], None);
269 assert_eq!(result, vec!["SELECT SELECT SELECT SELECT"]);
270 }
271
272 #[test]
273 fn test_comments_dropped() {
274 let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
275 assert_eq!(result, vec!["SELECT ..."]);
276 }
277
278 #[test]
279 fn test_savepoint() {
280 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
281 assert_eq!(result, vec!["SAVEPOINT s1"]);
282 }
283
284 #[test]
285 fn test_multiple_savepoints() {
286 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
287 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
288 }
289
290 #[test]
291 fn test_duplicate_savepoints() {
292 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
293 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
294 }
295
296 #[test]
297 fn test_release_savepoints() {
298 let result = fingerprint_many(
299 vec![
300 "SAVEPOINT \"s1234\"",
301 "RELEASE SAVEPOINT \"s1234\"",
302 "SAVEPOINT \"s2345\"",
303 "RELEASE SAVEPOINT \"s2345\"",
304 ],
305 None,
306 );
307 assert_eq!(
308 result,
309 vec![
310 "SAVEPOINT s1",
311 "RELEASE SAVEPOINT s1",
312 "SAVEPOINT s2",
313 "RELEASE SAVEPOINT s2"
314 ]
315 );
316 }
317
318 #[test]
319 fn test_rollback_savepoint() {
320 let result = fingerprint_many(
321 vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
322 None,
323 );
324 assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
325 }
326
327 #[test]
328 fn test_select() {
329 let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
330 assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
331 }
332
333 #[test]
334 fn test_select_single_value() {
335 let result = fingerprint_many(vec!["SELECT 1"], None);
336 assert_eq!(result, vec!["SELECT ..."]);
337 }
338
339 #[test]
340 fn test_select_with_from_quoted() {
341 let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
342 assert_eq!(result, vec!["SELECT ... FROM c.d"]);
343 }
344
345 #[test]
346 fn test_select_with_from_join() {
347 let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
348 assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
349 }
350
351 #[test]
352 fn test_select_with_from_join_quoted() {
353 let result = fingerprint_many(
354 vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
355 None,
356 );
357 assert_eq!(
358 result,
359 vec!["SELECT ... FROM c INNER JOIN d ON (d.a = c.a)"]
360 );
361 }
362
363 #[test]
364 fn test_select_with_order_by() {
365 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
366 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
367 }
368
369 #[test]
370 fn test_select_with_order_by_more() {
371 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
372 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
373 }
374
375 #[test]
376 fn test_select_with_limit_offset() {
377 let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
378 assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
379 }
380
381 #[test]
382 fn test_select_union() {
383 let result = fingerprint_many(
384 vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
385 None,
386 );
387 assert_eq!(
388 result,
389 vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
390 );
391 }
392
393 #[test]
394 fn test_select_except() {
395 let result = fingerprint_many(
396 vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
397 None,
398 );
399 assert_eq!(
400 result,
401 vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
402 );
403 }
404 #[test]
405 fn test_select_intersect() {
406 let result = fingerprint_many(
407 vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
408 None,
409 );
410 assert_eq!(
411 result,
412 vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
413 );
414 }
415
416 #[test]
417 fn test_declare_cursor() {
418 let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
419 assert_eq!(
420 result,
421 vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
422 );
423 }
424
425 #[test]
426 fn test_insert() {
427 let result = fingerprint_many(
428 vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
429 None,
430 );
431 assert_eq!(
432 result,
433 vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
434 );
435 }
436
437 #[test]
438 fn test_insert_select() {
439 let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
440 assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
441 }
442
443 #[test]
444 fn test_update() {
445 let result = fingerprint_many(
446 vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
447 None,
448 );
449 assert_eq!(
450 result,
451 vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
452 );
453 }
454
455 #[test]
456 fn test_delete() {
457 let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
458 assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
459 }
460}