Skip to main content

spark_sql_parser/
lib.rs

1//! Parse SQL into [sqlparser] AST.
2//!
3//! Supports a Spark-style subset: single-statement SELECT, CREATE SCHEMA/DATABASE,
4//! and DROP TABLE/VIEW/SCHEMA, plus many DDL and utility statements (CREATE/ALTER/DROP
5//! TABLE/VIEW/FUNCTION/SCHEMA, SHOW, INSERT, DESCRIBE, SET, RESET, CACHE, EXPLAIN, etc.).
6//!
7//! # SELECT and query compatibility
8//!
9//! Any statement that [sqlparser] parses as a `Query` (e.g. `SELECT`, `WITH ... SELECT`)
10//! is accepted. Clause support is determined by [sqlparser] and the dialect in use
11//! (this crate uses [GenericDialect](sqlparser::dialect::GenericDialect)).
12//!
13//! ## Known gaps
14//!
15//! Spark-specific query clauses such as `DISTRIBUTE BY`, `CLUSTER BY`, `SORT BY`
16//! may not be recognized by the parser or may be rejected; behavior depends on
17//! the upstream dialect and parser. Use single-statement queries only (one statement
18//! per call).
19
20use sqlparser::ast::Statement;
21use sqlparser::dialect::GenericDialect;
22use sqlparser::parser::Parser;
23use thiserror::Error;
24
25/// Error returned when SQL parsing or validation fails.
26#[derive(Error, Debug)]
27#[error("{0}")]
28pub struct ParseError(String);
29
30/// Parse a single SQL statement (SELECT or DDL: CREATE SCHEMA / CREATE DATABASE / DROP TABLE/VIEW/SCHEMA).
31///
32/// Returns the [sqlparser::ast::Statement] on success. Only one statement per call;
33/// run one statement at a time.
34pub fn parse_sql(query: &str) -> Result<Statement, ParseError> {
35    let dialect = GenericDialect {};
36    let stmts = Parser::parse_sql(&dialect, query).map_err(|e| {
37        ParseError(format!(
38            "SQL parse error: {}. Hint: supported statements include SELECT, CREATE TABLE/VIEW/FUNCTION/SCHEMA/DATABASE, DROP TABLE/VIEW/SCHEMA.",
39            e
40        ))
41    })?;
42    if stmts.len() != 1 {
43        return Err(ParseError(format!(
44            "SQL: expected exactly one statement, got {}. Hint: run one statement at a time.",
45            stmts.len()
46        )));
47    }
48    let stmt = stmts.into_iter().next().expect("len == 1");
49    match &stmt {
50        Statement::Query(_) => {}
51        Statement::CreateSchema { .. } | Statement::CreateDatabase { .. } => {}
52        Statement::CreateTable(_) | Statement::CreateView(_) | Statement::CreateFunction(_) => {}
53        Statement::AlterTable(_) | Statement::AlterView { .. } | Statement::AlterSchema(_) => {}
54        Statement::Drop {
55            object_type:
56                sqlparser::ast::ObjectType::Table
57                | sqlparser::ast::ObjectType::View
58                | sqlparser::ast::ObjectType::Schema,
59            ..
60        } => {}
61        Statement::DropFunction(_) => {}
62        Statement::Use(_) | Statement::Truncate(_) | Statement::Declare { .. } => {}
63        Statement::ShowTables { .. }
64        | Statement::ShowDatabases { .. }
65        | Statement::ShowSchemas { .. }
66        | Statement::ShowFunctions { .. }
67        | Statement::ShowColumns { .. }
68        | Statement::ShowViews { .. }
69        | Statement::ShowCreate { .. } => {}
70        Statement::Insert(_) | Statement::Directory { .. } | Statement::LoadData { .. } => {}
71        Statement::Update(_) | Statement::Delete(_) => {}
72        Statement::ExplainTable { .. } => {}
73        Statement::Set(_) | Statement::Reset(_) => {}
74        Statement::Cache { .. } | Statement::UNCache { .. } => {}
75        Statement::Explain { .. } => {}
76        _ => {
77            return Err(ParseError(format!(
78                "SQL: statement type not supported, got {:?}.",
79                stmt
80            )));
81        }
82    }
83    Ok(stmt)
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89    use sqlparser::ast::{ObjectType, Statement};
90
91    /// Assert that `sql` parses to the given statement variant.
92    fn assert_parses_to<F>(sql: &str, check: F)
93    where
94        F: FnOnce(&Statement) -> bool,
95    {
96        let stmt = parse_sql(sql).unwrap_or_else(|e| panic!("parse_sql failed: {e}"));
97        assert!(check(&stmt), "expected match for: {sql}");
98    }
99
100    // --- Error handling ---
101
102    #[test]
103    fn error_multiple_statements() {
104        let err = parse_sql("SELECT 1; SELECT 2").unwrap_err();
105        assert!(err.0.contains("expected exactly one statement"));
106        assert!(err.0.contains("2"));
107    }
108
109    #[test]
110    fn error_zero_statements() {
111        let err = parse_sql("").unwrap_err();
112        assert!(err.0.contains("expected exactly one statement") || err.0.contains("parse error"));
113    }
114
115    #[test]
116    fn error_unsupported_statement_type() {
117        // COMMIT is parsed by sqlparser but not in our whitelist
118        let err = parse_sql("COMMIT").unwrap_err();
119        assert!(err.0.contains("not supported"));
120    }
121
122    #[test]
123    fn error_syntax() {
124        let err = parse_sql("SELECT FROM").unwrap_err();
125        assert!(!err.0.is_empty());
126    }
127
128    // --- Queries ---
129
130    #[test]
131    fn query_select_simple() {
132        assert_parses_to("SELECT 1", |s| matches!(s, Statement::Query(_)));
133    }
134
135    #[test]
136    fn query_select_with_from() {
137        assert_parses_to("SELECT a FROM t", |s| matches!(s, Statement::Query(_)));
138    }
139
140    #[test]
141    fn query_with_cte() {
142        assert_parses_to("WITH cte AS (SELECT 1) SELECT * FROM cte", |s| {
143            matches!(s, Statement::Query(_))
144        });
145    }
146
147    #[test]
148    fn query_create_schema() {
149        assert_parses_to("CREATE SCHEMA s", |s| {
150            matches!(s, Statement::CreateSchema { .. })
151        });
152    }
153
154    #[test]
155    fn query_create_database() {
156        assert_parses_to("CREATE DATABASE d", |s| {
157            matches!(s, Statement::CreateDatabase { .. })
158        });
159    }
160
161    // --- DDL: CREATE (issue #652) ---
162
163    #[test]
164    fn test_issue_652_create_table() {
165        assert_parses_to("CREATE TABLE t (a INT)", |s| {
166            matches!(s, Statement::CreateTable(_))
167        });
168    }
169
170    #[test]
171    fn test_issue_652_create_view() {
172        assert_parses_to("CREATE VIEW v AS SELECT 1", |s| {
173            matches!(s, Statement::CreateView(_))
174        });
175    }
176
177    #[test]
178    fn test_issue_652_create_function() {
179        assert_parses_to("CREATE FUNCTION f() AS 'com.example.UDF'", |s| {
180            matches!(s, Statement::CreateFunction(_))
181        });
182    }
183
184    // --- DDL: ALTER (issue #653) ---
185
186    #[test]
187    fn test_issue_653_alter_table() {
188        assert_parses_to("ALTER TABLE t ADD COLUMN c INT", |s| {
189            matches!(s, Statement::AlterTable(_))
190        });
191    }
192
193    #[test]
194    fn test_issue_653_alter_view() {
195        assert_parses_to("ALTER VIEW v AS SELECT 1", |s| {
196            matches!(s, Statement::AlterView { .. })
197        });
198    }
199
200    #[test]
201    fn test_issue_653_alter_schema() {
202        assert_parses_to("ALTER SCHEMA db RENAME TO db2", |s| {
203            matches!(s, Statement::AlterSchema(_))
204        });
205    }
206
207    // --- DDL: DROP (issue #654) ---
208
209    #[test]
210    fn test_issue_654_drop_table() {
211        let stmt = parse_sql("DROP TABLE t").unwrap();
212        match &stmt {
213            Statement::Drop {
214                object_type: ObjectType::Table,
215                ..
216            } => {}
217            _ => panic!("expected Drop Table: {stmt:?}"),
218        }
219    }
220
221    #[test]
222    fn test_issue_654_drop_view() {
223        let stmt = parse_sql("DROP VIEW v").unwrap();
224        match &stmt {
225            Statement::Drop {
226                object_type: ObjectType::View,
227                ..
228            } => {}
229            _ => panic!("expected Drop View: {stmt:?}"),
230        }
231    }
232
233    #[test]
234    fn test_issue_654_drop_schema() {
235        let stmt = parse_sql("DROP SCHEMA s").unwrap();
236        match &stmt {
237            Statement::Drop {
238                object_type: ObjectType::Schema,
239                ..
240            } => {}
241            _ => panic!("expected Drop Schema: {stmt:?}"),
242        }
243    }
244
245    #[test]
246    fn test_issue_654_drop_function() {
247        assert_parses_to("DROP FUNCTION f", |s| {
248            matches!(s, Statement::DropFunction(_))
249        });
250    }
251
252    // --- Utility: USE, TRUNCATE, DECLARE (issue #655) ---
253
254    #[test]
255    fn test_issue_655_use() {
256        assert_parses_to("USE db1", |s| matches!(s, Statement::Use(_)));
257    }
258
259    #[test]
260    fn test_issue_655_truncate() {
261        assert_parses_to("TRUNCATE TABLE t", |s| matches!(s, Statement::Truncate(_)));
262    }
263
264    #[test]
265    fn test_issue_655_declare() {
266        assert_parses_to("DECLARE c CURSOR FOR SELECT 1", |s| {
267            matches!(s, Statement::Declare { .. })
268        });
269    }
270
271    // --- SHOW (issue #656) ---
272
273    #[test]
274    fn test_issue_656_show_tables() {
275        assert_parses_to("SHOW TABLES", |s| matches!(s, Statement::ShowTables { .. }));
276    }
277
278    #[test]
279    fn test_issue_656_show_databases() {
280        assert_parses_to("SHOW DATABASES", |s| {
281            matches!(s, Statement::ShowDatabases { .. })
282        });
283    }
284
285    #[test]
286    fn test_issue_656_show_schemas() {
287        assert_parses_to("SHOW SCHEMAS", |s| {
288            matches!(s, Statement::ShowSchemas { .. })
289        });
290    }
291
292    #[test]
293    fn test_issue_656_show_functions() {
294        assert_parses_to("SHOW FUNCTIONS", |s| {
295            matches!(s, Statement::ShowFunctions { .. })
296        });
297    }
298
299    #[test]
300    fn test_issue_656_show_columns() {
301        assert_parses_to("SHOW COLUMNS FROM t", |s| {
302            matches!(s, Statement::ShowColumns { .. })
303        });
304    }
305
306    #[test]
307    fn test_issue_656_show_views() {
308        assert_parses_to("SHOW VIEWS", |s| matches!(s, Statement::ShowViews { .. }));
309    }
310
311    #[test]
312    fn test_issue_656_show_create_table() {
313        assert_parses_to("SHOW CREATE TABLE t", |s| {
314            matches!(s, Statement::ShowCreate { .. })
315        });
316    }
317
318    // --- INSERT / DIRECTORY (issue #657) ---
319
320    #[test]
321    fn test_issue_657_insert() {
322        assert_parses_to("INSERT INTO t SELECT 1", |s| {
323            matches!(s, Statement::Insert(_))
324        });
325    }
326
327    #[test]
328    fn test_issue_657_directory() {
329        assert_parses_to("INSERT OVERWRITE DIRECTORY '/path' SELECT 1", |s| {
330            matches!(s, Statement::Directory { .. })
331        });
332    }
333
334    // --- DESCRIBE (issue #658) ---
335
336    #[test]
337    fn test_issue_658_describe_table() {
338        assert_parses_to("DESCRIBE t", |s| {
339            matches!(s, Statement::ExplainTable { .. })
340        });
341    }
342
343    // --- SET, RESET, CACHE, UNCACHE (issue #659) ---
344
345    #[test]
346    fn test_issue_659_set() {
347        assert_parses_to("SET x = 1", |s| matches!(s, Statement::Set(_)));
348    }
349
350    #[test]
351    fn test_issue_659_reset() {
352        assert_parses_to("RESET x", |s| matches!(s, Statement::Reset(_)));
353    }
354
355    #[test]
356    fn test_issue_659_cache() {
357        assert_parses_to("CACHE TABLE t", |s| matches!(s, Statement::Cache { .. }));
358    }
359
360    #[test]
361    fn test_issue_659_uncache() {
362        assert_parses_to("UNCACHE TABLE t", |s| {
363            matches!(s, Statement::UNCache { .. })
364        });
365    }
366
367    #[test]
368    fn test_issue_659_uncache_if_exists() {
369        assert_parses_to("UNCACHE TABLE IF EXISTS t", |s| {
370            matches!(s, Statement::UNCache { .. })
371        });
372    }
373
374    // --- EXPLAIN (issue #660) ---
375
376    #[test]
377    fn test_issue_660_explain() {
378        assert_parses_to("EXPLAIN SELECT 1", |s| {
379            matches!(s, Statement::Explain { .. })
380        });
381    }
382}