sql_insight/
normalizer.rs

1//! A Normalizer that converts SQL queries to a canonical form.
2//!
3//! See [`normalize`](crate::normalize()) as the entry point for normalizing SQL.
4
5use std::ops::ControlFlow;
6
7use crate::error::Error;
8use sqlparser::ast::{Expr, VisitMut, VisitorMut};
9use sqlparser::ast::{Query, SetExpr, Value};
10use sqlparser::dialect::Dialect;
11use sqlparser::parser::Parser;
12use std::ops::DerefMut;
13
14/// Convenience function to normalize SQL with default options.
15///
16/// ## Example
17///
18/// ```rust
19/// use sql_insight::sqlparser::dialect::GenericDialect;
20///
21/// let dialect = GenericDialect {};
22/// let sql = "SELECT a FROM t1 WHERE b = 1 AND c in (2, 3) AND d LIKE '%foo'";
23/// let result = sql_insight::normalize(&dialect, sql).unwrap();
24/// assert_eq!(result, ["SELECT a FROM t1 WHERE b = ? AND c IN (?, ?) AND d LIKE ?"]);
25/// ```
26pub fn normalize(dialect: &dyn Dialect, sql: &str) -> Result<Vec<String>, Error> {
27    Normalizer::normalize(dialect, sql, NormalizerOptions::new())
28}
29
30/// Convenience function to normalize SQL with options.
31///
32/// ## Example
33///
34/// ```rust
35/// use sql_insight::sqlparser::dialect::GenericDialect;
36/// use sql_insight::NormalizerOptions;
37///
38/// let dialect = GenericDialect {};
39/// let sql = "SELECT a FROM t1 WHERE b = 1 AND c in (2, 3, 4)";
40/// let result = sql_insight::normalize_with_options(&dialect, sql, NormalizerOptions::new().with_unify_in_list(true)).unwrap();
41/// assert_eq!(result, ["SELECT a FROM t1 WHERE b = ? AND c IN (...)"]);
42/// ```
43pub fn normalize_with_options(
44    dialect: &dyn Dialect,
45    sql: &str,
46    options: NormalizerOptions,
47) -> Result<Vec<String>, Error> {
48    Normalizer::normalize(dialect, sql, options)
49}
50
51/// Options for normalizing SQL.
52#[derive(Default, Clone)]
53pub struct NormalizerOptions {
54    /// Unify IN lists to a single form when all elements are literal values.
55    /// For example, `IN (1, 2, 3)` becomes `IN (...)`.
56    pub unify_in_list: bool,
57    /// Unify VALUES lists to a single form when all elements are literal values.
58    /// For example, `VALUES (1, 2, 3), (4, 5, 6)` becomes `VALUES (...)`.
59    pub unify_values: bool,
60}
61
62impl NormalizerOptions {
63    pub fn new() -> Self {
64        Self::default()
65    }
66
67    pub fn with_unify_in_list(mut self, unify_in_list: bool) -> Self {
68        self.unify_in_list = unify_in_list;
69        self
70    }
71
72    pub fn with_unify_values(mut self, unify_values: bool) -> Self {
73        self.unify_values = unify_values;
74        self
75    }
76}
77
78/// A visitor for SQL AST nodes that normalizes SQL queries.
79#[derive(Default)]
80pub struct Normalizer {
81    pub options: NormalizerOptions,
82}
83
84impl VisitorMut for Normalizer {
85    type Break = ();
86
87    fn post_visit_query(&mut self, query: &mut Query) -> ControlFlow<Self::Break> {
88        if let SetExpr::Values(values) = query.body.deref_mut() {
89            if self.options.unify_values {
90                let rows = &mut values.rows;
91                if rows.is_empty()
92                    || rows.iter().all(|row| {
93                        row.is_empty() || row.iter().all(|expr| matches!(expr, Expr::Value(_)))
94                    })
95                {
96                    *rows = vec![vec![Expr::Value(Value::Placeholder("...".into()))]];
97                }
98            }
99        }
100        ControlFlow::Continue(())
101    }
102
103    fn pre_visit_expr(&mut self, expr: &mut Expr) -> ControlFlow<Self::Break> {
104        if let Expr::Value(value) = expr {
105            *value = Value::Placeholder("?".into());
106        }
107        ControlFlow::Continue(())
108    }
109
110    fn post_visit_expr(&mut self, expr: &mut Expr) -> ControlFlow<Self::Break> {
111        match expr {
112            Expr::InList { list, .. } if self.options.unify_in_list => {
113                if list.is_empty() || list.iter().all(|expr| matches!(expr, Expr::Value(_))) {
114                    *list = vec![Expr::Value(Value::Placeholder("...".into()))];
115                }
116            }
117            _ => {}
118        }
119        ControlFlow::Continue(())
120    }
121}
122
123impl Normalizer {
124    pub fn new() -> Self {
125        Self::default()
126    }
127
128    pub fn with_options(mut self, options: NormalizerOptions) -> Self {
129        self.options = options;
130        self
131    }
132
133    /// Normalize SQL.
134    pub fn normalize(
135        dialect: &dyn Dialect,
136        sql: &str,
137        options: NormalizerOptions,
138    ) -> Result<Vec<String>, Error> {
139        let mut statements = Parser::parse_sql(dialect, sql)?;
140        statements.visit(&mut Self::new().with_options(options));
141        Ok(statements
142            .into_iter()
143            .map(|statement| statement.to_string())
144            .collect::<Vec<String>>())
145    }
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151    use crate::test_utils::all_dialects;
152
153    fn assert_normalize(
154        sql: &str,
155        expected: Vec<String>,
156        dialects: Vec<Box<dyn Dialect>>,
157        options: NormalizerOptions,
158    ) {
159        for dialect in dialects {
160            let result = Normalizer::normalize(dialect.as_ref(), sql, options.clone()).unwrap();
161            assert_eq!(result, expected, "Failed for dialect: {dialect:?}")
162        }
163    }
164
165    #[test]
166    fn test_single_sql() {
167        let sql = "SELECT a FROM t1 WHERE b = 1 AND c in (2, (select * from b)) AND d LIKE '%foo'";
168        let expected = vec![
169            "SELECT a FROM t1 WHERE b = ? AND c IN (?, (SELECT * FROM b)) AND d LIKE ?".into(),
170        ];
171        assert_normalize(sql, expected, all_dialects(), NormalizerOptions::new());
172    }
173
174    #[test]
175    fn test_multiple_sql() {
176        let sql = "INSERT INTO t2 (a) VALUES (4); UPDATE t1 SET a = 1 WHERE b = 2; DELETE FROM t3 WHERE c = 3";
177        let expected = vec![
178            "INSERT INTO t2 (a) VALUES (?)".into(),
179            "UPDATE t1 SET a = ? WHERE b = ?".into(),
180            "DELETE FROM t3 WHERE c = ?".into(),
181        ];
182        assert_normalize(sql, expected, all_dialects(), NormalizerOptions::new());
183    }
184
185    #[test]
186    fn test_sql_with_in_list_without_unify_in_list_option() {
187        let sql = "SELECT a FROM t1 WHERE b = 1 AND c in (2, 3, 4)";
188        let expected = vec!["SELECT a FROM t1 WHERE b = ? AND c IN (?, ?, ?)".into()];
189        assert_normalize(sql, expected, all_dialects(), NormalizerOptions::new());
190    }
191
192    #[test]
193    fn test_sql_with_in_list_with_unify_in_list_option() {
194        let sql = "SELECT a FROM t1 WHERE b = 1 AND c in (2, 3, NULL)";
195        let expected = vec!["SELECT a FROM t1 WHERE b = ? AND c IN (...)".into()];
196        assert_normalize(
197            sql,
198            expected,
199            all_dialects(),
200            NormalizerOptions::new().with_unify_in_list(true),
201        );
202    }
203
204    #[test]
205    fn test_sql_with_in_list_with_unify_in_list_option_when_not_all_elements_are_literal_values() {
206        let sql = "SELECT a FROM t1 WHERE b = 1 AND c in (2, (SELECT * FROM t2 WHERE d IN (3, COALESCE(e, 5))))";
207        let expected = vec!["SELECT a FROM t1 WHERE b = ? AND c IN (?, (SELECT * FROM t2 WHERE d IN (?, COALESCE(e, ?))))".into()];
208        assert_normalize(
209            sql,
210            expected,
211            all_dialects(),
212            NormalizerOptions::new().with_unify_in_list(true),
213        );
214    }
215
216    #[test]
217    fn test_sql_with_values_without_unify_values_option() {
218        let sql = "INSERT INTO t1 (a, b, c) VALUES (1, 2, 3), (4, 5, 6), (7, 8, 9)";
219        let expected =
220            vec!["INSERT INTO t1 (a, b, c) VALUES (?, ?, ?), (?, ?, ?), (?, ?, ?)".into()];
221        assert_normalize(sql, expected, all_dialects(), NormalizerOptions::new());
222    }
223
224    #[test]
225    fn test_sql_with_values_with_unify_values_option() {
226        let sql = "INSERT INTO t1 (a, b, c) VALUES (1, 2, 3), (4, 5, 6), (7, 8, 9)";
227        let expected = vec!["INSERT INTO t1 (a, b, c) VALUES (...)".into()];
228        assert_normalize(
229            sql,
230            expected,
231            all_dialects(),
232            NormalizerOptions::new().with_unify_values(true),
233        );
234    }
235
236    #[test]
237    fn test_sql_with_values_with_row_constructor_with_unify_values_option() {
238        let sql = "INSERT INTO t1 (a, b, c) VALUES ROW(1, 2, 3), ROW(4, 5, 6), ROW(7, 8, 9)";
239        let expected = vec!["INSERT INTO t1 (a, b, c) VALUES ROW(...)".into()];
240        assert_normalize(
241            sql,
242            expected,
243            all_dialects(),
244            NormalizerOptions::new().with_unify_values(true),
245        );
246    }
247
248    #[test]
249    fn test_sql_with_values_with_unify_values_option_when_not_all_elements_are_literal_values() {
250        let sql = "INSERT INTO t1 (a, b, c) VALUES (1, 2, 3), (4, 5, 6), (7, (SELECT * FROM t2 WHERE d = 9))";
251        let expected = vec!["INSERT INTO t1 (a, b, c) VALUES (?, ?, ?), (?, ?, ?), (?, (SELECT * FROM t2 WHERE d = ?))".into()];
252        assert_normalize(
253            sql,
254            expected,
255            all_dialects(),
256            NormalizerOptions::new().with_unify_values(true),
257        );
258    }
259}