Skip to main content

spark_sql_parser/
lib.rs

1//! Parse SQL into [sqlparser] AST.
2//!
3//! Supports a Spark-style subset: single-statement SELECT, CREATE SCHEMA/DATABASE,
4//! and DROP TABLE/VIEW/SCHEMA, plus many DDL and utility statements (CREATE/ALTER/DROP
5//! TABLE/VIEW/FUNCTION/SCHEMA, SHOW, INSERT, DESCRIBE, SET, RESET, CACHE, EXPLAIN, etc.).
6//!
7//! # SELECT and query compatibility
8//!
9//! Any statement that [sqlparser] parses as a `Query` (e.g. `SELECT`, `WITH ... SELECT`)
10//! is accepted. Clause support is determined by [sqlparser] and the dialect in use
11//! (this crate uses [GenericDialect](sqlparser::dialect::GenericDialect)).
12//!
13//! ## Known gaps
14//!
15//! Spark-specific query clauses such as `DISTRIBUTE BY`, `CLUSTER BY`, `SORT BY`
16//! may not be recognized by the parser or may be rejected; behavior depends on
17//! the upstream dialect and parser. Use single-statement queries only (one statement
18//! per call).
19
20use sqlparser::ast::Statement;
21use sqlparser::dialect::GenericDialect;
22use sqlparser::parser::Parser;
23use thiserror::Error;
24
25/// Error returned when SQL parsing or validation fails.
26#[derive(Error, Debug)]
27#[error("{0}")]
28pub struct ParseError(String);
29
30/// Parse a single SQL statement (SELECT or DDL: CREATE SCHEMA / CREATE DATABASE / DROP TABLE/VIEW/SCHEMA).
31///
32/// Returns the [sqlparser::ast::Statement] on success. Only one statement per call;
33/// run one statement at a time.
34pub fn parse_sql(query: &str) -> Result<Statement, ParseError> {
35    let dialect = GenericDialect {};
36    let stmts = Parser::parse_sql(&dialect, query).map_err(|e| {
37        ParseError(format!(
38            "SQL parse error: {}. Hint: supported statements include SELECT, CREATE TABLE/VIEW/FUNCTION/SCHEMA/DATABASE, DROP TABLE/VIEW/SCHEMA.",
39            e
40        ))
41    })?;
42    if stmts.len() != 1 {
43        return Err(ParseError(format!(
44            "SQL: expected exactly one statement, got {}. Hint: run one statement at a time.",
45            stmts.len()
46        )));
47    }
48    let stmt = stmts.into_iter().next().expect("len == 1");
49    match &stmt {
50        Statement::Query(_) => {}
51        Statement::CreateSchema { .. } | Statement::CreateDatabase { .. } => {}
52        Statement::CreateTable(_) | Statement::CreateView(_) | Statement::CreateFunction(_) => {}
53        Statement::AlterTable(_) | Statement::AlterView { .. } | Statement::AlterSchema(_) => {}
54        Statement::Drop {
55            object_type:
56                sqlparser::ast::ObjectType::Table
57                | sqlparser::ast::ObjectType::View
58                | sqlparser::ast::ObjectType::Schema,
59            ..
60        } => {}
61        Statement::DropFunction(_) => {}
62        Statement::Use(_) | Statement::Truncate(_) | Statement::Declare { .. } => {}
63        Statement::ShowTables { .. }
64        | Statement::ShowDatabases { .. }
65        | Statement::ShowSchemas { .. }
66        | Statement::ShowFunctions { .. }
67        | Statement::ShowColumns { .. }
68        | Statement::ShowViews { .. }
69        | Statement::ShowCreate { .. } => {}
70        Statement::Insert(_) | Statement::Directory { .. } | Statement::LoadData { .. } => {}
71        Statement::ExplainTable { .. } => {}
72        Statement::Set(_) | Statement::Reset(_) => {}
73        Statement::Cache { .. } | Statement::UNCache { .. } => {}
74        Statement::Explain { .. } => {}
75        _ => {
76            return Err(ParseError(format!(
77                "SQL: statement type not supported, got {:?}.",
78                stmt
79            )));
80        }
81    }
82    Ok(stmt)
83}
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88    use sqlparser::ast::{ObjectType, Statement};
89
90    /// Assert that `sql` parses to the given statement variant.
91    fn assert_parses_to<F>(sql: &str, check: F)
92    where
93        F: FnOnce(&Statement) -> bool,
94    {
95        let stmt = parse_sql(sql).unwrap_or_else(|e| panic!("parse_sql failed: {e}"));
96        assert!(check(&stmt), "expected match for: {sql}");
97    }
98
99    // --- Error handling ---
100
101    #[test]
102    fn error_multiple_statements() {
103        let err = parse_sql("SELECT 1; SELECT 2").unwrap_err();
104        assert!(err.0.contains("expected exactly one statement"));
105        assert!(err.0.contains("2"));
106    }
107
108    #[test]
109    fn error_zero_statements() {
110        let err = parse_sql("").unwrap_err();
111        assert!(err.0.contains("expected exactly one statement") || err.0.contains("parse error"));
112    }
113
114    #[test]
115    fn error_unsupported_statement_type() {
116        // UPDATE is parsed by sqlparser but not in our whitelist
117        let err = parse_sql("UPDATE t SET x = 1").unwrap_err();
118        assert!(err.0.contains("not supported"));
119    }
120
121    #[test]
122    fn error_syntax() {
123        let err = parse_sql("SELECT FROM").unwrap_err();
124        assert!(!err.0.is_empty());
125    }
126
127    // --- Queries ---
128
129    #[test]
130    fn query_select_simple() {
131        assert_parses_to("SELECT 1", |s| matches!(s, Statement::Query(_)));
132    }
133
134    #[test]
135    fn query_select_with_from() {
136        assert_parses_to("SELECT a FROM t", |s| matches!(s, Statement::Query(_)));
137    }
138
139    #[test]
140    fn query_with_cte() {
141        assert_parses_to("WITH cte AS (SELECT 1) SELECT * FROM cte", |s| {
142            matches!(s, Statement::Query(_))
143        });
144    }
145
146    #[test]
147    fn query_create_schema() {
148        assert_parses_to("CREATE SCHEMA s", |s| {
149            matches!(s, Statement::CreateSchema { .. })
150        });
151    }
152
153    #[test]
154    fn query_create_database() {
155        assert_parses_to("CREATE DATABASE d", |s| {
156            matches!(s, Statement::CreateDatabase { .. })
157        });
158    }
159
160    // --- DDL: CREATE (issue #652) ---
161
162    #[test]
163    fn test_issue_652_create_table() {
164        assert_parses_to("CREATE TABLE t (a INT)", |s| {
165            matches!(s, Statement::CreateTable(_))
166        });
167    }
168
169    #[test]
170    fn test_issue_652_create_view() {
171        assert_parses_to("CREATE VIEW v AS SELECT 1", |s| {
172            matches!(s, Statement::CreateView(_))
173        });
174    }
175
176    #[test]
177    fn test_issue_652_create_function() {
178        assert_parses_to("CREATE FUNCTION f() AS 'com.example.UDF'", |s| {
179            matches!(s, Statement::CreateFunction(_))
180        });
181    }
182
183    // --- DDL: ALTER (issue #653) ---
184
185    #[test]
186    fn test_issue_653_alter_table() {
187        assert_parses_to("ALTER TABLE t ADD COLUMN c INT", |s| {
188            matches!(s, Statement::AlterTable(_))
189        });
190    }
191
192    #[test]
193    fn test_issue_653_alter_view() {
194        assert_parses_to("ALTER VIEW v AS SELECT 1", |s| {
195            matches!(s, Statement::AlterView { .. })
196        });
197    }
198
199    #[test]
200    fn test_issue_653_alter_schema() {
201        assert_parses_to("ALTER SCHEMA db RENAME TO db2", |s| {
202            matches!(s, Statement::AlterSchema(_))
203        });
204    }
205
206    // --- DDL: DROP (issue #654) ---
207
208    #[test]
209    fn test_issue_654_drop_table() {
210        let stmt = parse_sql("DROP TABLE t").unwrap();
211        match &stmt {
212            Statement::Drop {
213                object_type: ObjectType::Table,
214                ..
215            } => {}
216            _ => panic!("expected Drop Table: {stmt:?}"),
217        }
218    }
219
220    #[test]
221    fn test_issue_654_drop_view() {
222        let stmt = parse_sql("DROP VIEW v").unwrap();
223        match &stmt {
224            Statement::Drop {
225                object_type: ObjectType::View,
226                ..
227            } => {}
228            _ => panic!("expected Drop View: {stmt:?}"),
229        }
230    }
231
232    #[test]
233    fn test_issue_654_drop_schema() {
234        let stmt = parse_sql("DROP SCHEMA s").unwrap();
235        match &stmt {
236            Statement::Drop {
237                object_type: ObjectType::Schema,
238                ..
239            } => {}
240            _ => panic!("expected Drop Schema: {stmt:?}"),
241        }
242    }
243
244    #[test]
245    fn test_issue_654_drop_function() {
246        assert_parses_to("DROP FUNCTION f", |s| {
247            matches!(s, Statement::DropFunction(_))
248        });
249    }
250
251    // --- Utility: USE, TRUNCATE, DECLARE (issue #655) ---
252
253    #[test]
254    fn test_issue_655_use() {
255        assert_parses_to("USE db1", |s| matches!(s, Statement::Use(_)));
256    }
257
258    #[test]
259    fn test_issue_655_truncate() {
260        assert_parses_to("TRUNCATE TABLE t", |s| matches!(s, Statement::Truncate(_)));
261    }
262
263    #[test]
264    fn test_issue_655_declare() {
265        assert_parses_to("DECLARE c CURSOR FOR SELECT 1", |s| {
266            matches!(s, Statement::Declare { .. })
267        });
268    }
269
270    // --- SHOW (issue #656) ---
271
272    #[test]
273    fn test_issue_656_show_tables() {
274        assert_parses_to("SHOW TABLES", |s| matches!(s, Statement::ShowTables { .. }));
275    }
276
277    #[test]
278    fn test_issue_656_show_databases() {
279        assert_parses_to("SHOW DATABASES", |s| {
280            matches!(s, Statement::ShowDatabases { .. })
281        });
282    }
283
284    #[test]
285    fn test_issue_656_show_schemas() {
286        assert_parses_to("SHOW SCHEMAS", |s| {
287            matches!(s, Statement::ShowSchemas { .. })
288        });
289    }
290
291    #[test]
292    fn test_issue_656_show_functions() {
293        assert_parses_to("SHOW FUNCTIONS", |s| {
294            matches!(s, Statement::ShowFunctions { .. })
295        });
296    }
297
298    #[test]
299    fn test_issue_656_show_columns() {
300        assert_parses_to("SHOW COLUMNS FROM t", |s| {
301            matches!(s, Statement::ShowColumns { .. })
302        });
303    }
304
305    #[test]
306    fn test_issue_656_show_views() {
307        assert_parses_to("SHOW VIEWS", |s| matches!(s, Statement::ShowViews { .. }));
308    }
309
310    #[test]
311    fn test_issue_656_show_create_table() {
312        assert_parses_to("SHOW CREATE TABLE t", |s| {
313            matches!(s, Statement::ShowCreate { .. })
314        });
315    }
316
317    // --- INSERT / DIRECTORY (issue #657) ---
318
319    #[test]
320    fn test_issue_657_insert() {
321        assert_parses_to("INSERT INTO t SELECT 1", |s| {
322            matches!(s, Statement::Insert(_))
323        });
324    }
325
326    #[test]
327    fn test_issue_657_directory() {
328        assert_parses_to("INSERT OVERWRITE DIRECTORY '/path' SELECT 1", |s| {
329            matches!(s, Statement::Directory { .. })
330        });
331    }
332
333    // --- DESCRIBE (issue #658) ---
334
335    #[test]
336    fn test_issue_658_describe_table() {
337        assert_parses_to("DESCRIBE t", |s| {
338            matches!(s, Statement::ExplainTable { .. })
339        });
340    }
341
342    // --- SET, RESET, CACHE, UNCACHE (issue #659) ---
343
344    #[test]
345    fn test_issue_659_set() {
346        assert_parses_to("SET x = 1", |s| matches!(s, Statement::Set(_)));
347    }
348
349    #[test]
350    fn test_issue_659_reset() {
351        assert_parses_to("RESET x", |s| matches!(s, Statement::Reset(_)));
352    }
353
354    #[test]
355    fn test_issue_659_cache() {
356        assert_parses_to("CACHE TABLE t", |s| matches!(s, Statement::Cache { .. }));
357    }
358
359    #[test]
360    fn test_issue_659_uncache() {
361        assert_parses_to("UNCACHE TABLE t", |s| {
362            matches!(s, Statement::UNCache { .. })
363        });
364    }
365
366    #[test]
367    fn test_issue_659_uncache_if_exists() {
368        assert_parses_to("UNCACHE TABLE IF EXISTS t", |s| {
369            matches!(s, Statement::UNCache { .. })
370        });
371    }
372
373    // --- EXPLAIN (issue #660) ---
374
375    #[test]
376    fn test_issue_660_explain() {
377        assert_parses_to("EXPLAIN SELECT 1", |s| {
378            matches!(s, Statement::Explain { .. })
379        });
380    }
381}