1#![doc = include_str!("../README.md")]
2
3use sqlparser::ast::{
4 Assignment, AssignmentTarget, ConflictTarget, Delete, DoUpdate, Expr, GroupByExpr, Ident,
5 Insert, ObjectName, ObjectNamePart, Offset, OnConflict, OnConflictAction, OnInsert, OrderBy,
6 OrderByKind, Query, SelectItem, SetExpr, Statement, Value, ValueWithSpan, VisitMut, VisitorMut,
7};
8use sqlparser::dialect::{Dialect, GenericDialect};
9use sqlparser::parser::Parser;
10use sqlparser::tokenizer::Span;
11use std::collections::HashMap;
12use std::ops::ControlFlow;
13
14pub fn fingerprint_one(input: &str, dialect: Option<&dyn Dialect>) -> String {
26 fingerprint_many(vec![input], dialect).join(" ")
27}
28
29pub fn fingerprint_many(input: Vec<&str>, dialect: Option<&dyn Dialect>) -> Vec<String> {
42 let dialect = dialect.unwrap_or(&GenericDialect {});
43
44 let mut savepoint_visitor = SavepointVisitor::new();
45
46 input
47 .iter()
48 .map(|sql| match Parser::parse_sql(dialect, sql) {
49 Ok(mut ast) => {
50 for stmt in &mut ast {
51 stmt.visit(&mut savepoint_visitor);
52 }
53
54 ast.into_iter()
55 .map(|stmt| stmt.to_string())
56 .collect::<Vec<_>>()
57 .join(" ")
58 }
59 Err(_) => sql.to_string(),
60 })
61 .collect()
62}
63
64struct SavepointVisitor {
65 savepoint_ids: HashMap<String, String>,
66}
67
68impl SavepointVisitor {
69 fn new() -> Self {
70 SavepointVisitor {
71 savepoint_ids: HashMap::new(),
72 }
73 }
74}
75
76impl VisitorMut for SavepointVisitor {
77 type Break = ();
78
79 fn pre_visit_statement(&mut self, stmt: &mut Statement) -> ControlFlow<Self::Break> {
80 match stmt {
81 Statement::Savepoint { name } => {
82 let savepoint_id = format!("s{}", self.savepoint_ids.len() + 1);
83 self.savepoint_ids
84 .insert(name.value.clone(), savepoint_id.clone());
85 *name = Ident::new(savepoint_id);
86 }
87 Statement::ReleaseSavepoint { name } => {
88 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
89 *name = Ident::new(savepoint_id);
90 }
91 }
92 Statement::Rollback {
93 savepoint: Some(name),
94 ..
95 } => {
96 if let Some(savepoint_id) = self.savepoint_ids.get(&name.value).cloned() {
97 *name = Ident::new(savepoint_id);
98 }
99 }
100 Statement::Declare { stmts } => {
101 for stmt in stmts {
102 if stmt.names.len() > 0 {
103 stmt.names = vec![Ident::new("...")];
104 }
105 }
106 }
107 Statement::Insert(Insert {
108 columns,
109 source,
110 on,
111 returning,
112 ..
113 }) => {
114 if columns.len() > 0 {
115 *columns = vec![Ident::new("...")];
116 }
117 if let Some(source) = source {
118 if let SetExpr::Values(values) = source.as_mut().body.as_mut() {
119 values.rows = vec![vec![placeholder_value()]];
120 }
121 }
122 if let Some(on) = on {
123 match on {
124 OnInsert::OnConflict(OnConflict {
125 conflict_target,
126 action,
127 }) => {
128 if let Some(conflict_target) = conflict_target {
129 match conflict_target {
130 ConflictTarget::Columns(columns) => {
131 if columns.len() > 0 {
132 *columns = vec![Ident::new("...")];
133 }
134 }
135 _ => {}
136 }
137 }
138 if let OnConflictAction::DoUpdate(DoUpdate {
139 assignments,
140 selection,
141 }) = action
142 {
143 if assignments.len() > 0 {
144 *assignments = vec![Assignment {
145 target: AssignmentTarget::ColumnName(ObjectName(vec![
146 ObjectNamePart::Identifier(Ident::new("...")),
147 ])),
148 value: placeholder_value(),
149 }];
150 }
151 if let Some(selection) = selection {
152 *selection = placeholder_value();
153 }
154 }
155 }
156 _ => {}
157 }
158 }
159 if let Some(returning) = returning {
160 if returning.len() > 0 {
161 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
162 }
163 }
164 }
165 Statement::Update {
166 assignments,
167 selection,
168 returning,
169 ..
170 } => {
171 if assignments.len() > 0 {
172 *assignments = vec![sqlparser::ast::Assignment {
173 target: AssignmentTarget::ColumnName(ObjectName(vec![
174 ObjectNamePart::Identifier(Ident::new("...")),
175 ])),
176 value: placeholder_value(),
177 }];
178 }
179 if let Some(selection) = selection {
180 *selection = placeholder_value();
181 }
182 if let Some(returning) = returning {
183 if returning.len() > 0 {
184 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
185 }
186 }
187 }
188 Statement::Delete(Delete {
189 selection,
190 returning,
191 ..
192 }) => {
193 if let Some(selection) = selection {
194 *selection = placeholder_value();
195 }
196 if let Some(returning) = returning {
197 if returning.len() > 0 {
198 *returning = vec![SelectItem::UnnamedExpr(placeholder_value())];
199 }
200 }
201 }
202 _ => {}
203 }
204 ControlFlow::Continue(())
205 }
206
207 fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
208 if let SetExpr::Select(select) = query.body.as_mut() {
209 if select.projection.len() > 0 {
210 if let Some(item) = select.projection.first_mut() {
211 match item {
212 SelectItem::UnnamedExpr(_) | SelectItem::ExprWithAlias { .. } => {
213 *item = SelectItem::UnnamedExpr(placeholder_value());
214 }
215 _ => {}
216 }
217 }
218 select.projection.truncate(1);
219 }
220
221 if let Some(selection) = &mut select.selection {
222 *selection = placeholder_value();
223 }
224
225 match &mut select.group_by {
226 GroupByExpr::Expressions(col_names, ..) => {
227 if col_names.len() > 0 {
228 *col_names = vec![placeholder_value()];
229 }
230 }
231 _ => {}
232 }
233 }
234 if let Some(order_by) = &mut query.order_by {
235 let OrderBy { kind, .. } = order_by;
236 if let OrderByKind::Expressions(expressions) = kind {
237 if expressions.len() > 0 {
238 if let Some(expr) = expressions.first_mut() {
239 expr.expr = placeholder_value();
240 }
241 expressions.truncate(1);
242 }
243 }
244 }
245 if let Some(limit) = &mut query.limit {
246 *limit = placeholder_value();
247 }
248 if let Some(Offset { value, .. }) = &mut query.offset {
249 *value = placeholder_value();
250 }
251 ControlFlow::Continue(())
252 }
253
254 fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow<Self::Break> {
255 for part in _relation.0.iter_mut() {
256 match part {
257 ObjectNamePart::Identifier(ident) => {
258 maybe_unquote_ident(ident);
259 }
260 }
261 }
262 ControlFlow::Continue(())
263 }
264
265 fn pre_visit_expr(&mut self, _expr: &mut Expr) -> ControlFlow<Self::Break> {
266 match _expr {
267 Expr::Identifier(ident) => {
268 maybe_unquote_ident(ident);
269 }
270 Expr::CompoundIdentifier(idents) => {
271 for ident in idents {
272 maybe_unquote_ident(ident);
273 }
274 }
275 _ => {}
276 }
277 ControlFlow::Continue(())
278 }
279}
280
281fn placeholder_value() -> Expr {
282 Expr::Value(ValueWithSpan {
283 value: Value::Placeholder("...".to_string()),
284 span: Span::empty(),
285 })
286}
287
288fn maybe_unquote_ident(ident: &mut Ident) -> () {
289 let Ident {
290 value, quote_style, ..
291 } = ident;
292
293 if value.chars().all(|c| c.is_alphanumeric() || c == '_') {
294 *quote_style = None;
295 }
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301
302 #[test]
303 fn test_fingerprint_one() {
304 let result = fingerprint_one("SELECT 123", None);
305 assert_eq!(result, "SELECT ...");
306 }
307
308 #[test]
309 fn test_empty() {
310 let result = fingerprint_many(vec![""], None);
311 assert_eq!(result, vec![""]);
312 }
313
314 #[test]
315 fn test_unparseable() {
316 let result = fingerprint_many(vec!["SELECT SELECT SELECT SELECT"], None);
317 assert_eq!(result, vec!["SELECT SELECT SELECT SELECT"]);
318 }
319
320 #[test]
321 fn test_comments_dropped() {
322 let result = fingerprint_many(vec!["SELECT 123 /* magic value */"], None);
323 assert_eq!(result, vec!["SELECT ..."]);
324 }
325
326 #[test]
327 fn test_savepoint() {
328 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\""], None);
329 assert_eq!(result, vec!["SAVEPOINT s1"]);
330 }
331
332 #[test]
333 fn test_multiple_savepoints() {
334 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s3456\""], None);
335 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
336 }
337
338 #[test]
339 fn test_duplicate_savepoints() {
340 let result = fingerprint_many(vec!["SAVEPOINT \"s1234\"", "SAVEPOINT \"s1234\""], None);
341 assert_eq!(result, vec!["SAVEPOINT s1", "SAVEPOINT s2"]);
342 }
343
344 #[test]
345 fn test_release_savepoints() {
346 let result = fingerprint_many(
347 vec![
348 "SAVEPOINT \"s1234\"",
349 "RELEASE SAVEPOINT \"s1234\"",
350 "SAVEPOINT \"s2345\"",
351 "RELEASE SAVEPOINT \"s2345\"",
352 ],
353 None,
354 );
355 assert_eq!(
356 result,
357 vec![
358 "SAVEPOINT s1",
359 "RELEASE SAVEPOINT s1",
360 "SAVEPOINT s2",
361 "RELEASE SAVEPOINT s2"
362 ]
363 );
364 }
365
366 #[test]
367 fn test_rollback_savepoint() {
368 let result = fingerprint_many(
369 vec!["SAVEPOINT \"s1234\"", "ROLLBACK TO SAVEPOINT \"s1234\""],
370 None,
371 );
372 assert_eq!(result, vec!["SAVEPOINT s1", "ROLLBACK TO SAVEPOINT s1"]);
373 }
374
375 #[test]
376 fn test_select() {
377 let result = fingerprint_many(vec!["SELECT a, b FROM c WHERE a = b"], None);
378 assert_eq!(result, vec!["SELECT ... FROM c WHERE ..."]);
379 }
380
381 #[test]
382 fn test_select_single_value() {
383 let result = fingerprint_many(vec!["SELECT 1"], None);
384 assert_eq!(result, vec!["SELECT ..."]);
385 }
386
387 #[test]
388 fn test_select_with_from_quoted() {
389 let result = fingerprint_many(vec!["SELECT a, b FROM \"c\".\"d\""], None);
390 assert_eq!(result, vec!["SELECT ... FROM c.d"]);
391 }
392
393 #[test]
394 fn test_select_with_from_join() {
395 let result = fingerprint_many(vec!["SELECT a, b FROM c JOIN d"], None);
396 assert_eq!(result, vec!["SELECT ... FROM c JOIN d"]);
397 }
398
399 #[test]
400 fn test_select_with_from_join_quoted() {
401 let result = fingerprint_many(
402 vec!["SELECT a, b FROM c INNER JOIN d ON (\"d\".\"a\" = \"c\".\"a\")"],
403 None,
404 );
405 assert_eq!(
406 result,
407 vec!["SELECT ... FROM c INNER JOIN d ON (d.a = c.a)"]
408 );
409 }
410
411 #[test]
412 fn test_select_with_group_by() {
413 let result = fingerprint_many(vec!["SELECT a, b FROM c GROUP BY a, b"], None);
414 assert_eq!(result, vec!["SELECT ... FROM c GROUP BY ..."]);
415 }
416
417 #[test]
418 fn test_select_with_order_by() {
419 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a, b DESC"], None);
420 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ..."]);
421 }
422
423 #[test]
424 fn test_select_with_order_by_more() {
425 let result = fingerprint_many(vec!["SELECT a, b FROM c ORDER BY a ASC, b DESC"], None);
426 assert_eq!(result, vec!["SELECT ... FROM c ORDER BY ... ASC"]);
427 }
428
429 #[test]
430 fn test_select_with_limit_offset() {
431 let result = fingerprint_many(vec!["SELECT a FROM b LIMIT 21 OFFSET 101 ROWS"], None);
432 assert_eq!(result, vec!["SELECT ... FROM b LIMIT ... OFFSET ... ROWS"]);
433 }
434
435 #[test]
436 fn test_select_union() {
437 let result = fingerprint_many(
438 vec!["(SELECT a, b FROM c) UNION (SELECT a, b FROM d)"],
439 None,
440 );
441 assert_eq!(
442 result,
443 vec!["(SELECT ... FROM c) UNION (SELECT ... FROM d)"]
444 );
445 }
446
447 #[test]
448 fn test_select_except() {
449 let result = fingerprint_many(
450 vec!["(SELECT a, b FROM c) EXCEPT (SELECT a, b FROM d)"],
451 None,
452 );
453 assert_eq!(
454 result,
455 vec!["(SELECT ... FROM c) EXCEPT (SELECT ... FROM d)"]
456 );
457 }
458 #[test]
459 fn test_select_intersect() {
460 let result = fingerprint_many(
461 vec!["(SELECT a, b FROM c) INTERSECT (SELECT a, b FROM d)"],
462 None,
463 );
464 assert_eq!(
465 result,
466 vec!["(SELECT ... FROM c) INTERSECT (SELECT ... FROM d)"]
467 );
468 }
469
470 #[test]
471 fn test_declare_cursor() {
472 let result = fingerprint_many(vec!["DECLARE c CURSOR FOR SELECT a, b FROM c join d"], None);
473 assert_eq!(
474 result,
475 vec!["DECLARE ... CURSOR FOR SELECT ... FROM c JOIN d"]
476 );
477 }
478
479 #[test]
480 fn test_insert() {
481 let result = fingerprint_many(
482 vec!["INSERT INTO c (a, b) VALUES (1, 2), (3, 4) RETURNING d"],
483 None,
484 );
485 assert_eq!(
486 result,
487 vec!["INSERT INTO c (...) VALUES (...) RETURNING ..."]
488 );
489 }
490
491 #[test]
492 fn test_insert_select() {
493 let result = fingerprint_many(vec!["INSERT INTO a (b, c) SELECT d FROM e"], None);
494 assert_eq!(result, vec!["INSERT INTO a (...) SELECT ... FROM e"]);
495 }
496
497 #[test]
498 fn test_insert_on_conflict() {
499 let result = fingerprint_many(
500 vec!["INSERT INTO a (b, c) VALUES (1, 2) ON CONFLICT(\"a\", \"b\") DO UPDATE SET \"d\" = EXCLUDED.d WHERE e = f RETURNING b, c"],
501 None,
502 );
503 assert_eq!(
504 result,
505 vec![
506 "INSERT INTO a (...) VALUES (...) ON CONFLICT(...) DO UPDATE SET ... = ... WHERE ... RETURNING ..."
507 ]
508 );
509 }
510
511 #[test]
512 fn test_update() {
513 let result = fingerprint_many(
514 vec!["UPDATE a SET b = 1, c = 2 WHERE d = 3 RETURNING e"],
515 None,
516 );
517 assert_eq!(
518 result,
519 vec!["UPDATE a SET ... = ... WHERE ... RETURNING ..."]
520 );
521 }
522
523 #[test]
524 fn test_delete() {
525 let result = fingerprint_many(vec!["DELETE FROM a WHERE b = 1 RETURNING c"], None);
526 assert_eq!(result, vec!["DELETE FROM a WHERE ... RETURNING ..."]);
527 }
528}