Skip to main content

surql_parser/
recovery.rs

1//! Error-recovering parser — parses valid statements even when some are broken.
2//!
3//! Splits the source into statement chunks by scanning for `;` tokens,
4//! then parses each chunk independently. Returns partial results (successful
5//! ASTs) alongside diagnostics for failed chunks.
6//!
7//! This is essential for LSP — the document is *always* invalid while typing,
8//! but the LSP needs to provide completions/hover for the valid parts.
9//!
10//! # Example
11//!
12//! ```
13//! let (stmts, diags) = surql_parser::parse_with_recovery(
14//!     "SELECT * FROM user; SELEC broken; DEFINE TABLE post SCHEMAFULL"
15//! );
16//! assert_eq!(stmts.len(), 2); // first and third succeeded
17//! assert_eq!(diags.len(), 1); // second failed
18//! ```
19
20use crate::ParseDiagnostic;
21use crate::upstream::sql::ast::TopLevelExpr;
22use crate::upstream::syn::lexer::Lexer;
23use crate::upstream::syn::token::{Delim, TokenKind};
24
25/// Parse a SurrealQL document with error recovery.
26///
27/// Returns a tuple of:
28/// - Successfully parsed statements
29/// - Diagnostics for statements that failed to parse
30pub fn parse_with_recovery(source: &str) -> (Vec<TopLevelExpr>, Vec<ParseDiagnostic>) {
31	if source.trim().is_empty() {
32		return (Vec::new(), Vec::new());
33	}
34
35	let chunks = split_into_chunks(source);
36	let mut all_stmts = Vec::new();
37	let mut all_diags = Vec::new();
38
39	for chunk in &chunks {
40		let chunk_source = &source[chunk.start..chunk.end];
41		if chunk_source.trim().is_empty() {
42			continue;
43		}
44
45		// Single parse — parse_for_diagnostics returns AST on success, structured errors on failure
46		match crate::parse_for_diagnostics(chunk_source) {
47			Ok(ast) => {
48				all_stmts.extend(ast.expressions);
49			}
50			Err(diags) => {
51				let (base_line, base_col) = byte_offset_to_line_col(source, chunk.start);
52				for mut d in diags {
53					if d.line == 1 {
54						d.column += base_col;
55						d.end_column += base_col;
56					}
57					d.line += base_line;
58					d.end_line += base_line;
59					all_diags.push(d);
60				}
61			}
62		}
63	}
64
65	(all_stmts, all_diags)
66}
67
68/// A byte range representing a statement chunk in the source.
69struct Chunk {
70	start: usize,
71	end: usize,
72}
73
74/// Split source into statement chunks by tokenizing and finding `;` boundaries.
75/// Respects brace depth — semicolons inside `{ }` blocks are not split points.
76fn split_into_chunks(source: &str) -> Vec<Chunk> {
77	let bytes = source.as_bytes();
78	if bytes.len() > u32::MAX as usize {
79		return vec![Chunk {
80			start: 0,
81			end: source.len(),
82		}];
83	}
84
85	let lexer = Lexer::new(bytes);
86	let mut chunks = Vec::new();
87	let mut chunk_start = 0;
88	let mut brace_depth: u32 = 0;
89
90	for token in lexer {
91		match token.kind {
92			TokenKind::OpenDelim(Delim::Brace) => brace_depth += 1,
93			TokenKind::CloseDelim(Delim::Brace) => brace_depth = brace_depth.saturating_sub(1),
94			TokenKind::SemiColon if brace_depth == 0 => {
95				let semi_end = token.span.offset as usize + token.span.len as usize;
96				if chunk_start < token.span.offset as usize {
97					chunks.push(Chunk {
98						start: chunk_start,
99						end: token.span.offset as usize,
100					});
101				}
102				chunk_start = semi_end;
103			}
104			_ => {}
105		}
106	}
107
108	if chunk_start < source.len() {
109		let remaining = source[chunk_start..].trim();
110		if !remaining.is_empty() {
111			chunks.push(Chunk {
112				start: chunk_start,
113				end: source.len(),
114			});
115		}
116	}
117
118	if chunks.is_empty() && !source.trim().is_empty() {
119		chunks.push(Chunk {
120			start: 0,
121			end: source.len(),
122		});
123	}
124
125	chunks
126}
127
128/// Convert a byte offset to (0-indexed line, 0-indexed column).
129fn byte_offset_to_line_col(source: &str, offset: usize) -> (usize, usize) {
130	let offset = offset.min(source.len());
131	let before = &source[..offset];
132	let line = before.matches('\n').count();
133	let col = before.rfind('\n').map(|i| offset - i - 1).unwrap_or(offset);
134	(line, col)
135}