1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4 Assignment, AssignmentTarget, ConflictTarget, Delete, DoUpdate, Expr, GroupByExpr, Ident,
5 Insert, JoinConstraint, JoinOperator, ObjectName, ObjectNamePart, Offset, OnConflict,
6 OnConflictAction, OnInsert, OrderBy, OrderByKind, Query, SelectItem, SetExpr, Statement, Value,
7 ValueWithSpan, VisitMut, VisitorMut,
8};
9use sqlparser::dialect::{Dialect, GenericDialect};
10use sqlparser::parser::Parser;
11use sqlparser::tokenizer::Span;
12use std::collections::HashMap;
13use std::ops::ControlFlow;
14
15pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
27 fingerprint_many(vec![input], dialect).join(" ")
28}
29
30pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
43 let dialect = dialect.unwrap_or(&GenericDialect {});
44
45 let mut savepoint_visitor = SavepointVisitor::new();
46
47 input
48 .iter()
49 .map(|sql| match Parser::parse_sql(dialect, sql) {
50 Ok(mut ast) => {
51 for stmt in &mut ast {
52 stmt.visit(&mut savepoint_visitor);
53 }
54
55 ast.into_iter()
56 .map(|stmt| stmt.to_string())
57 .collect::<Vec<_>>()
58 .join(" ")
59 }
60 Err(_) => sql.to_string(),
61 })
62 .collect()
63}
64
65struct SavepointVisitor {
66 savepoint_ids: HashMap<String, String>,
67}
68
69impl SavepointVisitor {
70 fn new() -> Self {
71 SavepointVisitor {
72 savepoint_ids: HashMap::new(),
73 }
74 }
75}
76
77impl VisitorMut for SavepointVisitor {
78 type Break = ();
79
80 fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
81 match stmt {
82 Statement::Savepoint { name } => {
83 let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
84 self.savepoint_ids
85 .insert(name.value.clone(), savepoint_id.clone());
86 *name = Ident::new(savepoint_id);
87 }
88 Statement::ReleaseSavepoint { name } => {
89 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
90 *name = Ident::new(savepoint_id);
91 }
92 }
93 Statement::Rollback {
94 savepoint: Some(name),
95 ..
96 } => {
97 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
98 *name = Ident::new(savepoint_id);
99 }
100 }
101 Statement::Declare { stmts } => {
102 for stmt in stmts {
103 if stmt.names.len() > 0 {
104 stmt.names = vec![Ident::new("...")];
105 }
106 }
107 }
108 Statement::Insert(Insert {
109 columns,
110 source,
111 on,
112 returning,
113 ..
114 }) => {
115 if columns.len() > 0 {
116 *columns = vec![Ident::new("...")];
117 }
118 if let Some(source) = source {
119 if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
120 values.rows = vec![vec![placeholder_value()]];
121 }
122 }
123 if let Some(on) = on {
124 match on {
125 OnInsert::OnConflict(OnConflict {
126 conflict_target,
127 action,
128 }) => {
129 if let Some(conflict_target) = conflict_target {
130 match conflict_target {
131 ConflictTarget::Columns(columns) => {
132 if columns.len() > 0 {
133 *columns = vec![Ident::new("...")];
134 }
135 }
136 _ => {}
137 }
138 }
139 if let OnConflictAction::DoUpdate(DoUpdate {
140 assignments,
141 selection,
142 }) = action
143 {
144 if assignments.len() > 0 {
145 *assignments = vec![Assignment {
146 target: AssignmentTarget::ColumnName(ObjectName(vec![
147 ObjectNamePart::Identifier(Ident::new("...")),
148 ])),
149 value: placeholder_value(),
150 }];
151 }
152 if let Some(selection) = selection {
153 *selection = placeholder_value();
154 }
155 }
156 }
157 _ => {}
158 }
159 }
160 if let Some(returning) = returning {
161 if returning.len() > 0 {
162 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
163 }
164 }
165 }
166 Statement::Update {
167 assignments,
168 selection,
169 returning,
170 ..
171 } => {
172 if assignments.len() > 0 {
173 *assignments = vec![sqlparser::ast::Assignment {
174 target: AssignmentTarget::ColumnName(ObjectName(vec![
175 ObjectNamePart::Identifier(Ident::new("...")),
176 ])),
177 value: placeholder_value(),
178 }];
179 }
180 if let Some(selection) = selection {
181 *selection = placeholder_value();
182 }
183 if let Some(returning) = returning {
184 if returning.len() > 0 {
185 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
186 }
187 }
188 }
189 Statement::Delete(Delete {
190 selection,
191 returning,
192 ..
193 }) => {
194 if let Some(selection) = selection {
195 *selection = placeholder_value();
196 }
197 if let Some(returning) = returning {
198 if returning.len() > 0 {
199 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
200 }
201 }
202 }
203 _ => {}
204 }
205 ControlFlow::Continue(())
206 }
207
208 fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
209 if let SetExpr::Select(select) = query.body.as_mut() {
210 if select.projection.len() > 0 {
211 if let Some(item) = select.projection.first_mut() {
212 match item {
213 SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
214 *item = SelectItem::UnnamedExpr(placeholder_value());
215 }
216 _ => {}
217 }
218 }
219 select.projection.truncate(1);
220 }
221
222 for table_with_joins in &mut select.from {
223 for join in &mut table_with_joins.joins {
224 match &mut join.join_operator {
225 JoinOperator::Join(constraint)
226 | JoinOperator::Inner(constraint)
227 | JoinOperator::Left(constraint)
228 | JoinOperator::LeftOuter(constraint)
229 | JoinOperator::Right(constraint)
230 | JoinOperator::RightOuter(constraint)
231 | JoinOperator::FullOuter(constraint)
232 | JoinOperator::Semi(constraint)
233 | JoinOperator::LeftSemi(constraint)
234 | JoinOperator::RightSemi(constraint)
235 | JoinOperator::Anti(constraint)
236 | JoinOperator::LeftAnti(constraint)
237 | JoinOperator::RightAnti(constraint) => match constraint {
238 JoinConstraint::On(expr) => {
239 *expr = placeholder_value();
240 }
241 _ => {}
242 },
243 _ => {}
244 }
245 }
246 }
247
248 if let Some(selection) = &mut select.selection {
249 *selection = placeholder_value();
250 }
251
252 match &mut select.group_by {
253 GroupByExpr::Expressions(col_names, ..) => {
254 if col_names.len() > 0 {
255 *col_names = vec![placeholder_value()];
256 }
257 }
258 _ => {}
259 }
260 }
261 if let Some(order_by) = &mut query.order_by {
262 let OrderBy { kind, .. } = order_by;
263 if let OrderByKind::Expressions(expressions) = kind {
264 if expressions.len() > 0 {
265 if let Some(expr) = expressions.first_mut() {
266 expr.expr = placeholder_value();
267 }
268 expressions.truncate(1);
269 }
270 }
271 }
272 if let Some(limit) = &mut query.limit {
273 *limit = placeholder_value();
274 }
275 if let Some(Offset { value, .. }) = &mut query.offset {
276 *value = placeholder_value();
277 }
278 ControlFlow::Continue(())
279 }
280
281 fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
282 for part in _relation.0.iter_mut() {
283 match part {
284 ObjectNamePart::Identifier(ident) => {
285 maybe_unquote_ident(ident);
286 }
287 }
288 }
289 ControlFlow::Continue(())
290 }
291
292 fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
293 match _expr {
294 Expr::Identifier(ident) => {
295 maybe_unquote_ident(ident);
296 }
297 Expr::CompoundIdentifier(idents) => {
298 for ident in idents {
299 maybe_unquote_ident(ident);
300 }
301 }
302 _ => {}
303 }
304 ControlFlow::Continue(())
305 }
306}
307
308fn placeholder_value() -> Expr {
309 Expr::Value(ValueWithSpan {
310 value: Value::Placeholder("...".to_string()),
311 span: Span::empty(),
312 })
313}
314
315fn maybe_unquote_ident(ident: &mut Ident) -> () {
316 let Ident {
317 value, quote_style, ..
318 } = ident;
319
320 if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
321 *quote_style = None;
322 }
323}
324
325#[cfg(test)]
326mod tests {
327 use super::*;
328
329 #[test]
330 fn test_fingerprint_one() {
331 let result = fingerprint_one("SELECT 123", None);
332 assert_eq!(result, "SELECT ...");
333 }
334
335 #[test]
336 fn test_empty() {
337 let result = fingerprint_many(vec![""], None);
338 assert_eq!(result, vec![""]);
339 }
340
341 #[test]
342 fn test_unparseable() {
343 let result = fingerprint_many(vec!["SELECT SELECT SELECT SELECT"], None);
344 assert_eq!(result, vec!["SELECT SELECT SELECT SELECT"]);
345 }
346
347 #[test]
348 fn test_comments_dropped() {
349 let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
350 assert_eq!(result, vec!["SELECT ..."]);
351 }
352
353 #[test]
354 fn test_savepoint() {
355 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
356 assert_eq!(result, vec!["SAVEPOINT s1"]);
357 }
358
359 #[test]
360 fn test_multiple_savepoints() {
361 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
362 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
363 }
364
365 #[test]
366 fn test_duplicate_savepoints() {
367 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
368 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
369 }
370
371 #[test]
372 fn test_release_savepoints() {
373 let result = fingerprint_many(
374 vec![
375 "SAVEPOINT \"s1234\"",
376 "RELEASE SAVEPOINT \"s1234\"",
377 "SAVEPOINT \"s2345\"",
378 "RELEASE SAVEPOINT \"s2345\"",
379 ],
380 None,
381 );
382 assert_eq!(
383 result,
384 vec![
385 "SAVEPOINT s1",
386 "RELEASE SAVEPOINT s1",
387 "SAVEPOINT s2",
388 "RELEASE SAVEPOINT s2"
389 ]
390 );
391 }
392
393 #[test]
394 fn test_rollback_savepoint() {
395 let result = fingerprint_many(
396 vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
397 None,
398 );
399 assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
400 }
401
402 #[test]
403 fn test_select() {
404 let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
405 assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
406 }
407
408 #[test]
409 fn test_select_single_value() {
410 let result = fingerprint_many(vec!["SELECT 1"], None);
411 assert_eq!(result, vec!["SELECT ..."]);
412 }
413
414 #[test]
415 fn test_select_with_from_quoted() {
416 let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
417 assert_eq!(result, vec!["SELECT ... FROM c.d"]);
418 }
419
420 #[test]
421 fn test_select_with_from_join() {
422 let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
423 assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
424 }
425
426 #[test]
427 fn test_select_with_from_inner_join_quoted() {
428 let result = fingerprint_many(
429 vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
430 None,
431 );
432 assert_eq!(result, vec!["SELECT ... FROM c INNER JOIN d ON ..."]);
433 }
434
435 #[test]
436 fn test_select_with_from_left_outer_join_quoted() {
437 let result = fingerprint_many(
438 vec!["SELECT a, b FROM c LEFT OUTER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
439 None,
440 );
441 assert_eq!(result, vec!["SELECT ... FROM c LEFT OUTER JOIN d ON ..."]);
442 }
443
444 #[test]
445 fn test_select_with_group_by() {
446 let result = fingerprint_many(vec!["SELECT a, b FROM c GROUP BY a, b"], None);
447 assert_eq!(result, vec!["SELECT ... FROM c GROUP BY ..."]);
448 }
449
450 #[test]
451 fn test_select_with_order_by() {
452 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
453 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
454 }
455
456 #[test]
457 fn test_select_with_order_by_more() {
458 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
459 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
460 }
461
462 #[test]
463 fn test_select_with_limit_offset() {
464 let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
465 assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
466 }
467
468 #[test]
469 fn test_select_union() {
470 let result = fingerprint_many(
471 vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
472 None,
473 );
474 assert_eq!(
475 result,
476 vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
477 );
478 }
479
480 #[test]
481 fn test_select_except() {
482 let result = fingerprint_many(
483 vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
484 None,
485 );
486 assert_eq!(
487 result,
488 vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
489 );
490 }
491 #[test]
492 fn test_select_intersect() {
493 let result = fingerprint_many(
494 vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
495 None,
496 );
497 assert_eq!(
498 result,
499 vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
500 );
501 }
502
503 #[test]
504 fn test_declare_cursor() {
505 let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
506 assert_eq!(
507 result,
508 vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
509 );
510 }
511
512 #[test]
513 fn test_insert() {
514 let result = fingerprint_many(
515 vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
516 None,
517 );
518 assert_eq!(
519 result,
520 vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
521 );
522 }
523
524 #[test]
525 fn test_insert_select() {
526 let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
527 assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
528 }
529
530 #[test]
531 fn test_insert_on_conflict() {
532 let result = fingerprint_many(
533 vec!["INSERT INTO a (b, c) VALUES (1, 2) ON CONFLICT(\"a\", \"b\") DO UPDATE SET \"d\" = EXCLUDED.d WHERE e = f RETURNING b, c"],
534 None,
535 );
536 assert_eq!(
537 result,
538 vec![
539 "INSERT INTO a (...) VALUES (...) ON CONFLICT(...) DO UPDATE SET ... = ... WHERE ... RETURNING ..."
540 ]
541 );
542 }
543
544 #[test]
545 fn test_update() {
546 let result = fingerprint_many(
547 vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
548 None,
549 );
550 assert_eq!(
551 result,
552 vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
553 );
554 }
555
556 #[test]
557 fn test_delete() {
558 let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
559 assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
560 }
561}