1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4 Assignment, AssignmentTarget, ConflictTarget, Delete, DoUpdate, Expr, Ident, Insert,
5 ObjectName, ObjectNamePart, Offset, OnConflict, OnConflictAction, OnInsert, OrderBy,
6 OrderByKind, Query, SelectItem, SetExpr, Statement, Value, ValueWithSpan, VisitMut, VisitorMut,
7};
8use sqlparser::dialect::{Dialect, GenericDialect};
9use sqlparser::parser::Parser;
10use sqlparser::tokenizer::Span;
11use std::collections::HashMap;
12use std::ops::ControlFlow;
13
14pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
26 fingerprint_many(vec![input], dialect).join(" ")
27}
28
29pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
42 let dialect = dialect.unwrap_or(&GenericDialect {});
43
44 let mut savepoint_visitor = SavepointVisitor::new();
45
46 input
47 .iter()
48 .map(|sql| match Parser::parse_sql(dialect, sql) {
49 Ok(mut ast) => {
50 for stmt in &mut ast {
51 stmt.visit(&mut savepoint_visitor);
52 }
53
54 ast.into_iter()
55 .map(|stmt| stmt.to_string())
56 .collect::<Vec<_>>()
57 .join(" ")
58 }
59 Err(_) => sql.to_string(),
60 })
61 .collect()
62}
63
64struct SavepointVisitor {
65 savepoint_ids: HashMap<String, String>,
66}
67
68impl SavepointVisitor {
69 fn new() -> Self {
70 SavepointVisitor {
71 savepoint_ids: HashMap::new(),
72 }
73 }
74}
75
76impl VisitorMut for SavepointVisitor {
77 type Break = ();
78
79 fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
80 match stmt {
81 Statement::Savepoint { name } => {
82 let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
83 self.savepoint_ids
84 .insert(name.value.clone(), savepoint_id.clone());
85 *name = Ident::new(savepoint_id);
86 }
87 Statement::ReleaseSavepoint { name } => {
88 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
89 *name = Ident::new(savepoint_id);
90 }
91 }
92 Statement::Rollback {
93 savepoint: Some(name),
94 ..
95 } => {
96 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
97 *name = Ident::new(savepoint_id);
98 }
99 }
100 Statement::Declare { stmts } => {
101 for stmt in stmts {
102 if stmt.names.len() > 0 {
103 stmt.names = vec![Ident::new("...")];
104 }
105 }
106 }
107 Statement::Insert(Insert {
108 columns,
109 source,
110 on,
111 returning,
112 ..
113 }) => {
114 if columns.len() > 0 {
115 *columns = vec![Ident::new("...")];
116 }
117 if let Some(source) = source {
118 if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
119 values.rows = vec![vec![placeholder_value()]];
120 }
121 }
122 if let Some(on) = on {
123 match on {
124 OnInsert::OnConflict(OnConflict {
125 conflict_target,
126 action,
127 }) => {
128 if let Some(conflict_target) = conflict_target {
129 match conflict_target {
130 ConflictTarget::Columns(columns) => {
131 if columns.len() > 0 {
132 *columns = vec![Ident::new("...")];
133 }
134 }
135 _ => {}
136 }
137 }
138 if let OnConflictAction::DoUpdate(DoUpdate {
139 assignments,
140 selection,
141 }) = action
142 {
143 if assignments.len() > 0 {
144 *assignments = vec![Assignment {
145 target: AssignmentTarget::ColumnName(ObjectName(vec![
146 ObjectNamePart::Identifier(Ident::new("...")),
147 ])),
148 value: placeholder_value(),
149 }];
150 }
151 if let Some(selection) = selection {
152 *selection = placeholder_value();
153 }
154 }
155 }
156 _ => {}
157 }
158 }
159 if let Some(returning) = returning {
160 if returning.len() > 0 {
161 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
162 }
163 }
164 }
165 Statement::Update {
166 assignments,
167 selection,
168 returning,
169 ..
170 } => {
171 if assignments.len() > 0 {
172 *assignments = vec![sqlparser::ast::Assignment {
173 target: AssignmentTarget::ColumnName(ObjectName(vec![
174 ObjectNamePart::Identifier(Ident::new("...")),
175 ])),
176 value: placeholder_value(),
177 }];
178 }
179 if let Some(selection) = selection {
180 *selection = placeholder_value();
181 }
182 if let Some(returning) = returning {
183 if returning.len() > 0 {
184 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
185 }
186 }
187 }
188 Statement::Delete(Delete {
189 selection,
190 returning,
191 ..
192 }) => {
193 if let Some(selection) = selection {
194 *selection = placeholder_value();
195 }
196 if let Some(returning) = returning {
197 if returning.len() > 0 {
198 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
199 }
200 }
201 }
202 _ => {}
203 }
204 ControlFlow::Continue(())
205 }
206
207 fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
208 if let SetExpr::Select(select) = query.body.as_mut() {
209 if select.projection.len() > 0 {
210 if let Some(item) = select.projection.first_mut() {
211 match item {
212 SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
213 *item = SelectItem::UnnamedExpr(placeholder_value());
214 }
215 _ => {}
216 }
217 }
218 select.projection.truncate(1);
219 }
220
221 if let Some(selection) = &mut select.selection {
222 *selection = placeholder_value();
223 }
224 }
225 if let Some(order_by) = &mut query.order_by {
226 let OrderBy { kind, .. } = order_by;
227 if let OrderByKind::Expressions(expressions) = kind {
228 if expressions.len() > 0 {
229 if let Some(expr) = expressions.first_mut() {
230 expr.expr = placeholder_value();
231 }
232 expressions.truncate(1);
233 }
234 }
235 }
236 if let Some(limit) = &mut query.limit {
237 *limit = placeholder_value();
238 }
239 if let Some(Offset { value, .. }) = &mut query.offset {
240 *value = placeholder_value();
241 }
242 ControlFlow::Continue(())
243 }
244
245 fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
246 for part in _relation.0.iter_mut() {
247 match part {
248 ObjectNamePart::Identifier(ident) => {
249 maybe_unquote_ident(ident);
250 }
251 }
252 }
253 ControlFlow::Continue(())
254 }
255
256 fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
257 match _expr {
258 Expr::Identifier(ident) => {
259 maybe_unquote_ident(ident);
260 }
261 Expr::CompoundIdentifier(idents) => {
262 for ident in idents {
263 maybe_unquote_ident(ident);
264 }
265 }
266 _ => {}
267 }
268 ControlFlow::Continue(())
269 }
270}
271
272fn placeholder_value() -> Expr {
273 Expr::Value(ValueWithSpan {
274 value: Value::Placeholder("...".to_string()),
275 span: Span::empty(),
276 })
277}
278
279fn maybe_unquote_ident(ident: &mut Ident) -> () {
280 let Ident {
281 value, quote_style, ..
282 } = ident;
283
284 if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
285 *quote_style = None;
286 }
287}
288
289#[cfg(test)]
290mod tests {
291 use super::*;
292
293 #[test]
294 fn test_fingerprint_one() {
295 let result = fingerprint_one("SELECT 123", None);
296 assert_eq!(result, "SELECT ...");
297 }
298
299 #[test]
300 fn test_empty() {
301 let result = fingerprint_many(vec![""], None);
302 assert_eq!(result, vec![""]);
303 }
304
305 #[test]
306 fn test_unparseable() {
307 let result = fingerprint_many(vec!["SELECT SELECT SELECT SELECT"], None);
308 assert_eq!(result, vec!["SELECT SELECT SELECT SELECT"]);
309 }
310
311 #[test]
312 fn test_comments_dropped() {
313 let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
314 assert_eq!(result, vec!["SELECT ..."]);
315 }
316
317 #[test]
318 fn test_savepoint() {
319 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
320 assert_eq!(result, vec!["SAVEPOINT s1"]);
321 }
322
323 #[test]
324 fn test_multiple_savepoints() {
325 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
326 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
327 }
328
329 #[test]
330 fn test_duplicate_savepoints() {
331 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
332 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
333 }
334
335 #[test]
336 fn test_release_savepoints() {
337 let result = fingerprint_many(
338 vec![
339 "SAVEPOINT \"s1234\"",
340 "RELEASE SAVEPOINT \"s1234\"",
341 "SAVEPOINT \"s2345\"",
342 "RELEASE SAVEPOINT \"s2345\"",
343 ],
344 None,
345 );
346 assert_eq!(
347 result,
348 vec![
349 "SAVEPOINT s1",
350 "RELEASE SAVEPOINT s1",
351 "SAVEPOINT s2",
352 "RELEASE SAVEPOINT s2"
353 ]
354 );
355 }
356
357 #[test]
358 fn test_rollback_savepoint() {
359 let result = fingerprint_many(
360 vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
361 None,
362 );
363 assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
364 }
365
366 #[test]
367 fn test_select() {
368 let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
369 assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
370 }
371
372 #[test]
373 fn test_select_single_value() {
374 let result = fingerprint_many(vec!["SELECT 1"], None);
375 assert_eq!(result, vec!["SELECT ..."]);
376 }
377
378 #[test]
379 fn test_select_with_from_quoted() {
380 let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
381 assert_eq!(result, vec!["SELECT ... FROM c.d"]);
382 }
383
384 #[test]
385 fn test_select_with_from_join() {
386 let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
387 assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
388 }
389
390 #[test]
391 fn test_select_with_from_join_quoted() {
392 let result = fingerprint_many(
393 vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
394 None,
395 );
396 assert_eq!(
397 result,
398 vec!["SELECT ... FROM c INNER JOIN d ON (d.a = c.a)"]
399 );
400 }
401
402 #[test]
403 fn test_select_with_order_by() {
404 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
405 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
406 }
407
408 #[test]
409 fn test_select_with_order_by_more() {
410 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
411 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
412 }
413
414 #[test]
415 fn test_select_with_limit_offset() {
416 let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
417 assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
418 }
419
420 #[test]
421 fn test_select_union() {
422 let result = fingerprint_many(
423 vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
424 None,
425 );
426 assert_eq!(
427 result,
428 vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
429 );
430 }
431
432 #[test]
433 fn test_select_except() {
434 let result = fingerprint_many(
435 vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
436 None,
437 );
438 assert_eq!(
439 result,
440 vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
441 );
442 }
443 #[test]
444 fn test_select_intersect() {
445 let result = fingerprint_many(
446 vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
447 None,
448 );
449 assert_eq!(
450 result,
451 vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
452 );
453 }
454
455 #[test]
456 fn test_declare_cursor() {
457 let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
458 assert_eq!(
459 result,
460 vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
461 );
462 }
463
464 #[test]
465 fn test_insert() {
466 let result = fingerprint_many(
467 vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
468 None,
469 );
470 assert_eq!(
471 result,
472 vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
473 );
474 }
475
476 #[test]
477 fn test_insert_select() {
478 let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
479 assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
480 }
481
482 #[test]
483 fn test_insert_on_conflict() {
484 let result = fingerprint_many(
485 vec!["INSERT INTO a (b, c) VALUES (1, 2) ON CONFLICT(\"a\", \"b\") DO UPDATE SET \"d\" = EXCLUDED.d WHERE e = f RETURNING b, c"],
486 None,
487 );
488 assert_eq!(
489 result,
490 vec![
491 "INSERT INTO a (...) VALUES (...) ON CONFLICT(...) DO UPDATE SET ... = ... WHERE ... RETURNING ..."
492 ]
493 );
494 }
495
496 #[test]
497 fn test_update() {
498 let result = fingerprint_many(
499 vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
500 None,
501 );
502 assert_eq!(
503 result,
504 vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
505 );
506 }
507
508 #[test]
509 fn test_delete() {
510 let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
511 assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
512 }
513}