1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4 Assignment, AssignmentTarget, ConflictTarget, Delete, Distinct, DoUpdate, Expr, GroupByExpr,
5 Ident, Insert, JoinConstraint, JoinOperator, ObjectName, ObjectNamePart, Offset, OnConflict,
6 OnConflictAction, OnInsert, OrderBy, OrderByKind, Query, SelectItem, SetExpr, Statement, Value,
7 ValueWithSpan, VisitMut, VisitorMut,
8};
9use sqlparser::dialect::{Dialect, GenericDialect};
10use sqlparser::parser::Parser;
11use sqlparser::tokenizer::Span;
12use std::collections::HashMap;
13use std::ops::ControlFlow;
14
15pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
27 fingerprint_many(vec![input], dialect).join(" ")
28}
29
30pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
43 let dialect = dialect.unwrap_or(&GenericDialect {});
44
45 let mut savepoint_visitor = SavepointVisitor::new();
46
47 input
48 .iter()
49 .map(|sql| match Parser::parse_sql(dialect, sql) {
50 Ok(mut ast) => {
51 for stmt in &mut ast {
52 stmt.visit(&mut savepoint_visitor);
53 }
54
55 ast.into_iter()
56 .map(|stmt| stmt.to_string())
57 .collect::<Vec<_>>()
58 .join(" ")
59 }
60 Err(_) => sql.to_string(),
61 })
62 .collect()
63}
64
65struct SavepointVisitor {
66 savepoint_ids: HashMap<String, String>,
67}
68
69impl SavepointVisitor {
70 fn new() -> Self {
71 SavepointVisitor {
72 savepoint_ids: HashMap::new(),
73 }
74 }
75}
76
77impl VisitorMut for SavepointVisitor {
78 type Break = ();
79
80 fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
81 match stmt {
82 Statement::Savepoint { name } => {
83 let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
84 self.savepoint_ids
85 .insert(name.value.clone(), savepoint_id.clone());
86 *name = Ident::new(savepoint_id);
87 }
88 Statement::ReleaseSavepoint { name } => {
89 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
90 *name = Ident::new(savepoint_id);
91 }
92 }
93 Statement::Rollback {
94 savepoint: Some(name),
95 ..
96 } => {
97 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
98 *name = Ident::new(savepoint_id);
99 }
100 }
101 Statement::Declare { stmts } => {
102 for stmt in stmts {
103 if stmt.names.len() > 0 {
104 stmt.names = vec![Ident::new("...")];
105 }
106 }
107 }
108 Statement::Insert(Insert {
109 columns,
110 source,
111 on,
112 returning,
113 ..
114 }) => {
115 if columns.len() > 0 {
116 *columns = vec![Ident::new("...")];
117 }
118 if let Some(source) = source {
119 if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
120 values.rows = vec![vec![placeholder_value()]];
121 }
122 }
123 if let Some(on) = on {
124 match on {
125 OnInsert::OnConflict(OnConflict {
126 conflict_target,
127 action,
128 }) => {
129 if let Some(conflict_target) = conflict_target {
130 match conflict_target {
131 ConflictTarget::Columns(columns) => {
132 if columns.len() > 0 {
133 *columns = vec![Ident::new("...")];
134 }
135 }
136 _ => {}
137 }
138 }
139 if let OnConflictAction::DoUpdate(DoUpdate {
140 assignments,
141 selection,
142 }) = action
143 {
144 if assignments.len() > 0 {
145 *assignments = vec![Assignment {
146 target: AssignmentTarget::ColumnName(ObjectName(vec![
147 ObjectNamePart::Identifier(Ident::new("...")),
148 ])),
149 value: placeholder_value(),
150 }];
151 }
152 if let Some(selection) = selection {
153 *selection = placeholder_value();
154 }
155 }
156 }
157 _ => {}
158 }
159 }
160 if let Some(returning) = returning {
161 if returning.len() > 0 {
162 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
163 }
164 }
165 }
166 Statement::Update {
167 assignments,
168 selection,
169 returning,
170 ..
171 } => {
172 if assignments.len() > 0 {
173 *assignments = vec![sqlparser::ast::Assignment {
174 target: AssignmentTarget::ColumnName(ObjectName(vec![
175 ObjectNamePart::Identifier(Ident::new("...")),
176 ])),
177 value: placeholder_value(),
178 }];
179 }
180 if let Some(selection) = selection {
181 *selection = placeholder_value();
182 }
183 if let Some(returning) = returning {
184 if returning.len() > 0 {
185 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
186 }
187 }
188 }
189 Statement::Delete(Delete {
190 selection,
191 returning,
192 ..
193 }) => {
194 if let Some(selection) = selection {
195 *selection = placeholder_value();
196 }
197 if let Some(returning) = returning {
198 if returning.len() > 0 {
199 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
200 }
201 }
202 }
203 _ => {}
204 }
205 ControlFlow::Continue(())
206 }
207
208 fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
209 if let SetExpr::Select(select) = query.body.as_mut() {
210 if select.projection.len() > 0 {
211 if let Some(item) = select.projection.first_mut() {
212 match item {
213 SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
214 *item = SelectItem::UnnamedExpr(placeholder_value());
215 }
216 _ => {}
217 }
218 }
219 select.projection.truncate(1);
220 }
221
222 if let Some(distinct) = &mut select.distinct {
223 match distinct {
224 Distinct::On(exprs) => {
225 if exprs.len() > 0 {
226 *exprs = vec![placeholder_value()];
227 }
228 }
229 _ => {}
230 }
231 };
232
233 for table_with_joins in &mut select.from {
234 for join in &mut table_with_joins.joins {
235 match &mut join.join_operator {
236 JoinOperator::Join(constraint)
237 | JoinOperator::Inner(constraint)
238 | JoinOperator::Left(constraint)
239 | JoinOperator::LeftOuter(constraint)
240 | JoinOperator::Right(constraint)
241 | JoinOperator::RightOuter(constraint)
242 | JoinOperator::FullOuter(constraint)
243 | JoinOperator::Semi(constraint)
244 | JoinOperator::LeftSemi(constraint)
245 | JoinOperator::RightSemi(constraint)
246 | JoinOperator::Anti(constraint)
247 | JoinOperator::LeftAnti(constraint)
248 | JoinOperator::RightAnti(constraint) => match constraint {
249 JoinConstraint::On(expr) => {
250 *expr = placeholder_value();
251 }
252 _ => {}
253 },
254 _ => {}
255 }
256 }
257 }
258
259 if let Some(selection) = &mut select.selection {
260 *selection = placeholder_value();
261 }
262
263 match &mut select.group_by {
264 GroupByExpr::Expressions(col_names, ..) => {
265 if col_names.len() > 0 {
266 *col_names = vec![placeholder_value()];
267 }
268 }
269 _ => {}
270 }
271 }
272 if let Some(order_by) = &mut query.order_by {
273 let OrderBy { kind, .. } = order_by;
274 if let OrderByKind::Expressions(expressions) = kind {
275 if expressions.len() > 0 {
276 if let Some(expr) = expressions.first_mut() {
277 expr.expr = placeholder_value();
278 }
279 expressions.truncate(1);
280 }
281 }
282 }
283 if let Some(limit) = &mut query.limit {
284 *limit = placeholder_value();
285 }
286 if let Some(Offset { value, .. }) = &mut query.offset {
287 *value = placeholder_value();
288 }
289 ControlFlow::Continue(())
290 }
291
292 fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
293 for part in _relation.0.iter_mut() {
294 match part {
295 ObjectNamePart::Identifier(ident) => {
296 maybe_unquote_ident(ident);
297 }
298 }
299 }
300 ControlFlow::Continue(())
301 }
302
303 fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
304 match _expr {
305 Expr::Identifier(ident) => {
306 maybe_unquote_ident(ident);
307 }
308 Expr::CompoundIdentifier(idents) => {
309 for ident in idents {
310 maybe_unquote_ident(ident);
311 }
312 }
313 _ => {}
314 }
315 ControlFlow::Continue(())
316 }
317}
318
319fn placeholder_value() -> Expr {
320 Expr::Value(ValueWithSpan {
321 value: Value::Placeholder("...".to_string()),
322 span: Span::empty(),
323 })
324}
325
326fn maybe_unquote_ident(ident: &mut Ident) -> () {
327 let Ident {
328 value, quote_style, ..
329 } = ident;
330
331 if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
332 *quote_style = None;
333 }
334}
335
336#[cfg(test)]
337mod tests {
338 use super::*;
339
340 #[test]
341 fn test_fingerprint_one() {
342 let result = fingerprint_one("SELECT 123", None);
343 assert_eq!(result, "SELECT ...");
344 }
345
346 #[test]
347 fn test_empty() {
348 let result = fingerprint_many(vec![""], None);
349 assert_eq!(result, vec![""]);
350 }
351
352 #[test]
353 fn test_unparseable() {
354 let result = fingerprint_many(vec!["SELECT SELECT SELECT SELECT"], None);
355 assert_eq!(result, vec!["SELECT SELECT SELECT SELECT"]);
356 }
357
358 #[test]
359 fn test_comments_dropped() {
360 let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
361 assert_eq!(result, vec!["SELECT ..."]);
362 }
363
364 #[test]
365 fn test_savepoint() {
366 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
367 assert_eq!(result, vec!["SAVEPOINT s1"]);
368 }
369
370 #[test]
371 fn test_multiple_savepoints() {
372 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
373 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
374 }
375
376 #[test]
377 fn test_duplicate_savepoints() {
378 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
379 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
380 }
381
382 #[test]
383 fn test_release_savepoints() {
384 let result = fingerprint_many(
385 vec![
386 "SAVEPOINT \"s1234\"",
387 "RELEASE SAVEPOINT \"s1234\"",
388 "SAVEPOINT \"s2345\"",
389 "RELEASE SAVEPOINT \"s2345\"",
390 ],
391 None,
392 );
393 assert_eq!(
394 result,
395 vec![
396 "SAVEPOINT s1",
397 "RELEASE SAVEPOINT s1",
398 "SAVEPOINT s2",
399 "RELEASE SAVEPOINT s2"
400 ]
401 );
402 }
403
404 #[test]
405 fn test_rollback_savepoint() {
406 let result = fingerprint_many(
407 vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
408 None,
409 );
410 assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
411 }
412
413 #[test]
414 fn test_select() {
415 let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
416 assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
417 }
418
419 #[test]
420 fn test_select_single_value() {
421 let result = fingerprint_many(vec!["SELECT 1"], None);
422 assert_eq!(result, vec!["SELECT ..."]);
423 }
424
425 #[test]
426 fn test_select_distinct_on() {
427 let result = fingerprint_many(vec!["SELECT DISTINCT ON (a, b) c FROM d"], None);
428 assert_eq!(result, vec!["SELECT DISTINCT ON (...) ... FROM d"]);
429 }
430
431 #[test]
432 fn test_select_with_from_quoted() {
433 let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
434 assert_eq!(result, vec!["SELECT ... FROM c.d"]);
435 }
436
437 #[test]
438 fn test_select_with_from_join() {
439 let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
440 assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
441 }
442
443 #[test]
444 fn test_select_with_from_inner_join_quoted() {
445 let result = fingerprint_many(
446 vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
447 None,
448 );
449 assert_eq!(result, vec!["SELECT ... FROM c INNER JOIN d ON ..."]);
450 }
451
452 #[test]
453 fn test_select_with_from_left_outer_join_quoted() {
454 let result = fingerprint_many(
455 vec!["SELECT a, b FROM c LEFT OUTER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
456 None,
457 );
458 assert_eq!(result, vec!["SELECT ... FROM c LEFT OUTER JOIN d ON ..."]);
459 }
460
461 #[test]
462 fn test_select_with_group_by() {
463 let result = fingerprint_many(vec!["SELECT a, b FROM c GROUP BY a, b"], None);
464 assert_eq!(result, vec!["SELECT ... FROM c GROUP BY ..."]);
465 }
466
467 #[test]
468 fn test_select_with_order_by() {
469 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
470 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
471 }
472
473 #[test]
474 fn test_select_with_order_by_more() {
475 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
476 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
477 }
478
479 #[test]
480 fn test_select_with_limit_offset() {
481 let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
482 assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
483 }
484
485 #[test]
486 fn test_select_union() {
487 let result = fingerprint_many(
488 vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
489 None,
490 );
491 assert_eq!(
492 result,
493 vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
494 );
495 }
496
497 #[test]
498 fn test_select_except() {
499 let result = fingerprint_many(
500 vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
501 None,
502 );
503 assert_eq!(
504 result,
505 vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
506 );
507 }
508 #[test]
509 fn test_select_intersect() {
510 let result = fingerprint_many(
511 vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
512 None,
513 );
514 assert_eq!(
515 result,
516 vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
517 );
518 }
519
520 #[test]
521 fn test_declare_cursor() {
522 let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
523 assert_eq!(
524 result,
525 vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
526 );
527 }
528
529 #[test]
530 fn test_insert() {
531 let result = fingerprint_many(
532 vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
533 None,
534 );
535 assert_eq!(
536 result,
537 vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
538 );
539 }
540
541 #[test]
542 fn test_insert_select() {
543 let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
544 assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
545 }
546
547 #[test]
548 fn test_insert_on_conflict() {
549 let result = fingerprint_many(
550 vec!["INSERT INTO a (b, c) VALUES (1, 2) ON CONFLICT(\"a\", \"b\") DO UPDATE SET \"d\" = EXCLUDED.d WHERE e = f RETURNING b, c"],
551 None,
552 );
553 assert_eq!(
554 result,
555 vec![
556 "INSERT INTO a (...) VALUES (...) ON CONFLICT(...) DO UPDATE SET ... = ... WHERE ... RETURNING ..."
557 ]
558 );
559 }
560
561 #[test]
562 fn test_update() {
563 let result = fingerprint_many(
564 vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
565 None,
566 );
567 assert_eq!(
568 result,
569 vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
570 );
571 }
572
573 #[test]
574 fn test_delete() {
575 let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
576 assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
577 }
578}