Skip to main content

surql_parser/
lib.rs

1//! surql-parser — Standalone SurrealQL parser extracted from SurrealDB.
2//!
3//! Provides a complete SurrealQL parser without depending on the SurrealDB engine.
4//! Useful for building tools, linters, formatters, IDE extensions, and migration systems.
5//!
6//! # Quick Start
7//!
8//! ```
9//! let ast = surql_parser::parse("SELECT name, age FROM user WHERE age > 18").unwrap();
10//! assert!(!ast.expressions.is_empty());
11//! ```
12//!
13//! # Sync with SurrealDB
14//!
15//! Parser source is auto-extracted from SurrealDB via `tools/transform/`.
16//! See UPSTREAM_SYNC.md for details.
17
18#[macro_use]
19extern crate tracing;
20
21pub mod compat;
22pub mod config;
23pub mod error;
24
25#[cfg(feature = "build")]
26pub mod build;
27
28#[allow(
29	clippy::useless_conversion,
30	clippy::large_enum_variant,
31	clippy::match_single_binding,
32	clippy::needless_borrow
33)]
34pub mod upstream;
35
36pub mod builtins_generated;
37pub mod diff;
38pub mod doc_urls;
39pub mod filesystem;
40pub mod formatting;
41pub mod keywords;
42pub mod lint;
43pub mod params;
44pub mod recovery;
45pub mod schema_graph;
46pub mod schema_lookup;
47
48// Re-export for backward compat
49pub use error::{Error, Result};
50pub use filesystem::*;
51pub use keywords::all_keywords;
52pub use params::*;
53pub use schema_lookup::*;
54
55// ─── Public API ───
56
57/// Parse a SurrealQL query string into an AST.
58///
59/// Returns a list of top-level expressions (statements).
60///
61/// # Example
62///
63/// ```
64/// let ast = surql_parser::parse("CREATE user SET name = 'Alice'").unwrap();
65/// assert_eq!(ast.expressions.len(), 1);
66/// ```
67pub fn parse(input: &str) -> Result<Ast> {
68	upstream::syn::parse(input).map_err(|e| Error::Parse(e.to_string()))
69}
70
71/// Parse a SurrealQL query with custom parser settings.
72pub fn parse_with_settings(input: &str, settings: ParserSettings) -> Result<Ast> {
73	upstream::syn::parse_with_settings(input.as_bytes(), settings, async |parser, stk| {
74		parser.parse_query(stk).await
75	})
76	.map_err(|e| Error::Parse(e.to_string()))
77}
78
79/// Parse a SurrealQL type annotation (e.g., `record<user>`, `option<string>`).
80pub fn parse_kind(input: &str) -> Result<Kind> {
81	upstream::syn::kind(input).map_err(|e| Error::Parse(e.to_string()))
82}
83
84/// Check if a string could be a reserved keyword in certain contexts.
85pub fn is_reserved_keyword(s: &str) -> bool {
86	upstream::syn::could_be_reserved_keyword(s)
87}
88
89// ─── Schema Extraction ───
90
91/// All definitions found in a SurrealQL file.
92///
93/// Use `extract_definitions()` to get this from a .surql file.
94/// This is the primary tool for migration systems and schema analyzers.
95use upstream::sql::statements::define;
96
97#[derive(Debug, Default)]
98pub struct SchemaDefinitions {
99	pub namespaces: Vec<statements::DefineNamespaceStatement>,
100	pub databases: Vec<define::DefineDatabaseStatement>,
101	pub tables: Vec<statements::DefineTableStatement>,
102	pub fields: Vec<statements::DefineFieldStatement>,
103	pub indexes: Vec<statements::DefineIndexStatement>,
104	pub functions: Vec<statements::DefineFunctionStatement>,
105	pub analyzers: Vec<define::DefineAnalyzerStatement>,
106	pub events: Vec<statements::DefineEventStatement>,
107	pub params: Vec<define::DefineParamStatement>,
108	pub users: Vec<define::DefineUserStatement>,
109	pub accesses: Vec<define::DefineAccessStatement>,
110	/// Current NS/DB context from USE statements (tracked during extraction)
111	pub current_ns: Option<String>,
112	pub current_db: Option<String>,
113}
114
115/// Extract all DEFINE statements from a SurrealQL string.
116///
117/// Useful for schema analysis, migration tools, and validation.
118///
119/// # Example
120///
121/// ```
122/// let defs = surql_parser::extract_definitions("
123///     DEFINE TABLE user SCHEMAFULL;
124///     DEFINE FIELD name ON user TYPE string;
125///     DEFINE FIELD age ON user TYPE int DEFAULT 0;
126///     DEFINE INDEX email_idx ON user FIELDS email UNIQUE;
127///     DEFINE FUNCTION fn::greet($name: string) { RETURN 'Hello, ' + $name; };
128/// ").unwrap();
129///
130/// assert_eq!(defs.tables.len(), 1);
131/// assert_eq!(defs.fields.len(), 2);
132/// assert_eq!(defs.indexes.len(), 1);
133/// assert_eq!(defs.functions.len(), 1);
134/// ```
135pub fn extract_definitions(input: &str) -> Result<SchemaDefinitions> {
136	let ast = parse(input)?;
137	extract_definitions_from_ast(&ast.expressions)
138}
139
140/// Extract definitions from pre-parsed statements (e.g., from error-recovering parser).
141pub fn extract_definitions_from_ast(
142	stmts: &[upstream::sql::ast::TopLevelExpr],
143) -> Result<SchemaDefinitions> {
144	let mut defs = SchemaDefinitions::default();
145
146	for top in stmts {
147		// Track USE NS/DB context changes
148		if let upstream::sql::ast::TopLevelExpr::Use(use_stmt) = top {
149			let (ns, db) = extract_use_context(use_stmt);
150			if let Some(ns) = ns {
151				defs.current_ns = Some(ns);
152			}
153			if let Some(db) = db {
154				defs.current_db = Some(db);
155			}
156			continue;
157		}
158
159		if let upstream::sql::ast::TopLevelExpr::Expr(Expr::Define(stmt)) = top {
160			use define::DefineStatement as DS;
161			match stmt.as_ref() {
162				DS::Namespace(s) => defs.namespaces.push(s.clone()),
163				DS::Database(s) => defs.databases.push(s.clone()),
164				DS::Table(s) => defs.tables.push(s.clone()),
165				DS::Field(s) => defs.fields.push(s.clone()),
166				DS::Index(s) => defs.indexes.push(s.clone()),
167				DS::Function(s) => defs.functions.push(s.clone()),
168				DS::Analyzer(s) => defs.analyzers.push(s.clone()),
169				DS::Event(s) => defs.events.push(s.clone()),
170				DS::Param(s) => defs.params.push(s.clone()),
171				DS::User(s) => defs.users.push(s.clone()),
172				DS::Access(s) => defs.accesses.push(s.clone()),
173				_ => {}
174			}
175		}
176	}
177
178	Ok(defs)
179}
180
181fn extract_use_context(
182	use_stmt: &upstream::sql::statements::r#use::UseStatement,
183) -> (Option<String>, Option<String>) {
184	use surrealdb_types::{SqlFormat, ToSql};
185	use upstream::sql::statements::r#use::UseStatement;
186
187	fn expr_to_string(expr: &Expr) -> String {
188		let mut s = String::new();
189		expr.fmt_sql(&mut s, SqlFormat::SingleLine);
190		s
191	}
192
193	match use_stmt {
194		UseStatement::Ns(ns) => (Some(expr_to_string(ns)), None),
195		UseStatement::Db(db) => (None, Some(expr_to_string(db))),
196		UseStatement::NsDb(ns, db) => (Some(expr_to_string(ns)), Some(expr_to_string(db))),
197		UseStatement::Default => (None, None),
198	}
199}
200
201/// List all function names defined in a SurrealQL string.
202///
203/// # Example
204///
205/// ```
206/// let fns = surql_parser::list_functions("
207///     DEFINE FUNCTION fn::greet($name: string) { RETURN 'Hello, ' + $name; };
208///     DEFINE FUNCTION fn::add($a: int, $b: int) { RETURN $a + $b; };
209/// ").unwrap();
210///
211/// assert_eq!(fns, vec!["greet", "add"]);
212/// ```
213pub fn list_functions(input: &str) -> Result<Vec<String>> {
214	let defs = extract_definitions(input)?;
215	Ok(defs
216		.functions
217		.iter()
218		.map(|f| {
219			use surrealdb_types::{SqlFormat, ToSql};
220			let mut name = String::new();
221			f.name.fmt_sql(&mut name, SqlFormat::SingleLine);
222			name
223		})
224		.collect())
225}
226
227/// List all table names defined in a SurrealQL string.
228///
229/// # Example
230///
231/// ```
232/// let tables = surql_parser::list_tables("
233///     DEFINE TABLE user SCHEMAFULL;
234///     DEFINE TABLE post SCHEMALESS;
235///     SELECT * FROM user;
236/// ").unwrap();
237///
238/// assert_eq!(tables, vec!["user", "post"]);
239/// ```
240pub fn list_tables(input: &str) -> Result<Vec<String>> {
241	let defs = extract_definitions(input)?;
242	Ok(defs
243		.tables
244		.iter()
245		.map(|t| {
246			use surrealdb_types::{SqlFormat, ToSql};
247			let mut name = String::new();
248			t.name.fmt_sql(&mut name, SqlFormat::SingleLine);
249			name
250		})
251		.collect())
252}
253
254/// Format an AST back to SurrealQL string.
255///
256/// # Example
257///
258/// ```
259/// let ast = surql_parser::parse("SELECT * FROM user").unwrap();
260/// let sql = surql_parser::format(&ast);
261/// assert!(sql.contains("SELECT"));
262/// ```
263pub fn format(ast: &Ast) -> String {
264	use surrealdb_types::{SqlFormat, ToSql};
265	let mut buf = String::new();
266	ast.fmt_sql(&mut buf, SqlFormat::SingleLine);
267	buf
268}
269
270// ─── Diagnostics ───
271
272/// A parse diagnostic with source location.
273#[derive(Debug, Clone)]
274pub struct ParseDiagnostic {
275	pub message: String,
276	/// 1-indexed line number.
277	pub line: usize,
278	/// 1-indexed column number (in chars).
279	pub column: usize,
280	/// 1-indexed end line number.
281	pub end_line: usize,
282	/// 1-indexed end column number.
283	pub end_column: usize,
284}
285
286/// Parse SurrealQL and return structured diagnostics on error.
287///
288/// Unlike [`parse()`], this function returns diagnostics with precise
289/// source positions suitable for LSP and IDE integration.
290///
291/// # Example
292///
293/// ```
294/// let result = surql_parser::parse_for_diagnostics("SELEC * FROM user");
295/// assert!(result.is_err());
296/// let diags = result.unwrap_err();
297/// assert!(!diags.is_empty());
298/// assert_eq!(diags[0].line, 1);
299/// ```
300pub fn parse_for_diagnostics(input: &str) -> std::result::Result<Ast, Vec<ParseDiagnostic>> {
301	use upstream::syn::error::Location;
302	use upstream::syn::token::Span;
303
304	let bytes = input.as_bytes();
305	if bytes.len() > u32::MAX as usize {
306		return Err(vec![ParseDiagnostic {
307			message: "Query too large".into(),
308			line: 1,
309			column: 1,
310			end_line: 1,
311			end_column: 1,
312		}]);
313	}
314
315	let settings = upstream::syn::settings_from_capabilities(&compat::Capabilities::all());
316	let mut parser = upstream::syn::parser::Parser::new_with_settings(bytes, settings);
317	let mut stack = reblessive::Stack::new();
318
319	match stack.enter(|stk| parser.parse_query(stk)).finish() {
320		Ok(ast) => Ok(ast),
321		Err(syntax_error) => {
322			// Collect spans from the error chain
323			let mut spans: Vec<Span> = Vec::new();
324			let err = syntax_error.update_spans(|span| {
325				spans.push(*span);
326			});
327
328			// Get rendered error messages
329			let rendered = err.render_on(input);
330
331			let message = rendered.errors.join(": ");
332
333			let mut diags: Vec<ParseDiagnostic> = spans
334				.iter()
335				.map(|span| {
336					let range = Location::range_of_span(input, *span);
337					ParseDiagnostic {
338						message: message.clone(),
339						line: range.start.line,
340						column: range.start.column,
341						end_line: range.end.line,
342						end_column: range.end.column,
343					}
344				})
345				.collect();
346
347			// If no spans but there are error messages, create a fallback diagnostic
348			if diags.is_empty() && !message.is_empty() {
349				diags.push(ParseDiagnostic {
350					message,
351					line: 1,
352					column: 1,
353					end_line: 1,
354					end_column: 1,
355				});
356			}
357
358			Err(diags)
359		}
360	}
361}
362
363// ─── Re-exports ───
364
365/// The parsed AST (list of top-level statements).
366pub use upstream::sql::Ast;
367
368/// A single expression in the AST.
369pub use upstream::sql::expression::Expr;
370
371/// Parser configuration settings.
372pub use upstream::syn::ParserSettings;
373
374/// SurrealQL type annotation (e.g., `string`, `record<user>`, `array<int>`).
375pub use upstream::sql::Kind;
376
377/// An identifier path (e.g., `user.name`, `->knows->person`).
378pub use upstream::sql::Idiom;
379
380/// A SurrealQL statement (SELECT, CREATE, DEFINE, etc.).
381pub use upstream::sql::statements;
382
383/// Syntax error type from the upstream parser.
384pub use upstream::syn::error as syntax_error;
385
386pub use recovery::parse_with_recovery;
387pub use schema_graph::{DependencyNode, SchemaGraph};
388
389// ─── Built-in Function Lookup ───
390
391/// Look up a built-in SurrealQL function by its full name (e.g., `"string::len"`).
392pub fn builtin_function(name: &str) -> Option<&'static builtins_generated::BuiltinFn> {
393	use std::collections::HashMap;
394	use std::sync::LazyLock;
395
396	static INDEX: LazyLock<HashMap<&'static str, &'static builtins_generated::BuiltinFn>> =
397		LazyLock::new(|| {
398			builtins_generated::BUILTINS
399				.iter()
400				.map(|f| (f.name, f))
401				.collect()
402		});
403
404	INDEX.get(name).copied()
405}
406
407/// Return all built-in functions in a given namespace (e.g., `"string"` returns all `string::*`).
408pub fn builtins_in_namespace(ns: &str) -> Vec<&'static builtins_generated::BuiltinFn> {
409	let prefix = format!("{ns}::");
410	builtins_generated::BUILTINS
411		.iter()
412		.filter(|f| f.name.starts_with(&prefix))
413		.collect()
414}