surql_parser/upstream/syn/parser/
mod.rs1use self::token_buffer::TokenBuffer;
63use crate::upstream::sql;
64use crate::upstream::syn::error::{SyntaxError, bail};
65use crate::upstream::syn::lexer::Lexer;
66use crate::upstream::syn::lexer::compound::CompoundToken;
67use crate::upstream::syn::token::{Span, Token, TokenKind, t};
68use bytes::BytesMut;
69use reblessive::{Stack, Stk};
70mod basic;
71mod builtin;
72mod expression;
73mod function;
74mod idiom;
75mod kind;
76pub mod mac;
77mod object;
78mod prime;
79mod record_id;
80mod stmt;
81mod token;
82mod token_buffer;
83mod value;
84use super::error::{RenderedError, syntax_error};
85#[cfg(feature = "arbitrary")]
86pub use builtin::{PATHS, PathKind};
87pub(crate) use mac::{enter_object_recursion, enter_query_recursion, unexpected};
88pub type ParseResult<T> = Result<T, SyntaxError>;
90#[derive(Debug)]
92pub enum PartialResult<T> {
93 MoreData,
94 Empty {
96 used: usize,
97 },
98 Ok {
99 value: T,
100 used: usize,
101 },
102 Err {
103 err: SyntaxError,
104 used: usize,
105 },
106}
107#[derive(Clone, Debug)]
108pub struct ParserSettings {
109 pub legacy_strands: bool,
112 pub flexible_record_id: bool,
117 pub object_recursion_limit: usize,
120 pub query_recursion_limit: usize,
125 pub files_enabled: bool,
127 pub surrealism_enabled: bool,
129}
130impl Default for ParserSettings {
131 fn default() -> Self {
132 ParserSettings {
133 legacy_strands: false,
134 flexible_record_id: true,
135 object_recursion_limit: 100,
136 query_recursion_limit: 20,
137 files_enabled: false,
138 surrealism_enabled: false,
139 }
140 }
141}
142impl ParserSettings {
143 pub fn default_with_experimental(enabled: bool) -> Self {
144 ParserSettings {
145 files_enabled: enabled,
146 surrealism_enabled: enabled,
147 ..Self::default()
148 }
149 }
150}
151pub struct Parser<'a> {
153 lexer: Lexer<'a>,
154 last_span: Span,
155 token_buffer: TokenBuffer<4>,
156 pub table_as_field: bool,
157 settings: ParserSettings,
158 unscape_buffer: Vec<u8>,
159}
160impl<'a> Parser<'a> {
161 pub fn new(source: &'a [u8]) -> Self {
163 Parser::new_with_settings(source, ParserSettings::default())
164 }
165 pub fn new_with_experimental(source: &'a [u8], enabled: bool) -> Self {
167 Parser::new_with_settings(source, ParserSettings::default_with_experimental(enabled))
168 }
169 pub fn new_with_settings(source: &'a [u8], settings: ParserSettings) -> Self {
171 Parser {
172 lexer: Lexer::new(source),
173 last_span: Span::empty(),
174 token_buffer: TokenBuffer::new(),
175 table_as_field: true,
176 settings,
177 unscape_buffer: Vec::new(),
178 }
179 }
180 pub fn with_settings(mut self, settings: ParserSettings) -> Self {
181 self.settings = settings;
182 self
183 }
184 #[expect(clippy::should_implement_trait)]
186 pub fn next(&mut self) -> Token {
187 let res = self
188 .token_buffer
189 .pop()
190 .unwrap_or_else(|| self.lexer.next_token());
191 self.last_span = res.span;
192 res
193 }
194 pub fn next_whitespace(&mut self) -> Option<Token> {
199 if let Some(x) = self.peek_whitespace() {
200 self.pop_peek();
201 return Some(x);
202 }
203 None
204 }
205 pub fn has_peek(&self) -> bool {
208 self.token_buffer.is_empty()
209 }
210 pub fn pop_peek(&mut self) -> Token {
215 let res = self.token_buffer.pop().expect("token buffer is non-empty");
216 self.last_span = res.span;
217 res
218 }
219 pub fn peek(&mut self) -> Token {
221 let Some(x) = self.token_buffer.first() else {
222 let res = self.lexer.next_token();
223 self.token_buffer.push(res);
224 return res;
225 };
226 x
227 }
228 pub fn peek_whitespace(&mut self) -> Option<Token> {
234 let token = if let Some(x) = self.token_buffer.first() {
235 x
236 } else {
237 let token = self.lexer.next_token();
238 self.token_buffer.push(token);
239 token
240 };
241 if !token.span.follows_from(&self.last_span) {
242 return None;
243 }
244 Some(token)
245 }
246 pub fn peek_kind(&mut self) -> TokenKind {
248 self.peek().kind
249 }
250 pub fn peek_token_at(&mut self, at: u8) -> Token {
253 for _ in self.token_buffer.len()..=at {
254 let r = self.lexer.next_token();
255 self.token_buffer.push(r);
256 }
257 self.token_buffer.at(at).expect("token exists at index")
258 }
259 pub fn peek1(&mut self) -> Token {
260 self.peek_token_at(1)
261 }
262 pub fn peek2(&mut self) -> Token {
263 self.peek_token_at(2)
264 }
265 pub fn peek_whitespace_token_at<const AT: u8>(&mut self) -> Option<Token> {
271 const { assert!(AT < 4, "Peeking more then 4 tokens is not supported") };
272 if AT == 0 {
273 return self.peek_whitespace();
274 }
275 for _ in self.token_buffer.len()..=AT {
276 let res = self.lexer.next_token();
277 self.token_buffer.push(res);
278 }
279 let Some(token) = self.token_buffer.at(AT) else {
280 unreachable!()
281 };
282 let Some(prev_token) = self.token_buffer.at(AT - 1) else {
283 unreachable!()
284 };
285 if !token.span.follows_from(&prev_token.span) {
286 return None;
287 }
288 Some(token)
289 }
290 pub fn peek_whitespace1(&mut self) -> Option<Token> {
291 self.peek_whitespace_token_at::<1>()
292 }
293 pub fn peek_whitespace2(&mut self) -> Option<Token> {
294 self.peek_whitespace_token_at::<2>()
295 }
296 pub fn recent_span(&mut self) -> Span {
299 self.token_buffer
300 .first()
301 .map(|x| x.span)
302 .unwrap_or(self.last_span)
303 }
304 pub fn last_span(&mut self) -> Span {
306 self.last_span
307 }
308 pub fn assert_finished(&mut self) -> ParseResult<()> {
309 let p = self.peek();
310 if p.kind != TokenKind::Eof {
311 bail!("Unexpected token `{}`, expected no more tokens", p.kind, @ p.span);
312 }
313 Ok(())
314 }
315 pub fn eat(&mut self, token: TokenKind) -> bool {
318 let peek = self.peek();
319 if token == peek.kind {
320 self.token_buffer.pop();
321 self.last_span = peek.span;
322 true
323 } else {
324 false
325 }
326 }
327 pub fn eat_whitespace(&mut self, token: TokenKind) -> bool {
333 let Some(peek) = self.peek_whitespace() else {
334 return false;
335 };
336 if token == peek.kind {
337 self.token_buffer.pop();
338 self.last_span = peek.span;
339 true
340 } else {
341 false
342 }
343 }
344 fn expect_closing_delimiter(&mut self, kind: TokenKind, should_close: Span) -> ParseResult<()> {
347 let peek = self.peek();
348 if peek.kind != kind {
349 bail!(
350 "Unexpected token `{}` expected delimiter `{kind}`", peek.kind, @ self
351 .recent_span(), @ should_close => "expected this delimiter to close"
352 );
353 }
354 self.pop_peek();
355 Ok(())
356 }
357 pub fn backup_after(&mut self, span: Span) {
359 self.token_buffer.clear();
360 self.lexer.backup_after(span);
361 }
362 pub async fn parse_query(&mut self, stk: &mut Stk) -> ParseResult<sql::Ast> {
366 let statements = self.parse_stmt_list(stk).await?;
367 Ok(sql::Ast {
368 expressions: statements,
369 })
370 }
371 async fn parse_statement(&mut self, stk: &mut Stk) -> ParseResult<sql::TopLevelExpr> {
373 self.parse_top_level_expr(stk).await
374 }
375 pub async fn parse_expr(&mut self, stk: &mut Stk) -> ParseResult<sql::Expr> {
377 self.parse_expr_start(stk).await
378 }
379 pub fn lex_compound<F, R>(
380 &mut self,
381 start: Token,
382 f: F,
383 ) -> Result<CompoundToken<R>, SyntaxError>
384 where
385 F: Fn(&mut Lexer, Token) -> Result<R, SyntaxError>,
386 {
387 let res = self.lexer.lex_compound(start, f)?;
388 self.last_span = res.span;
389 Ok(res)
390 }
391 pub fn span_str(&self, span: Span) -> &str {
392 self.lexer.span_str(span)
393 }
394 pub fn unescape_ident_span(&mut self, span: Span) -> Result<&str, SyntaxError> {
395 let str = self.lexer.span_str(span);
396 Lexer::unescape_ident_span(str, span, &mut self.unscape_buffer)
397 }
398 pub fn unescape_string_span(&mut self, span: Span) -> Result<&str, SyntaxError> {
399 let str = self.lexer.span_str(span);
400 Lexer::unescape_string_span(str, span, &mut self.unscape_buffer)
401 }
402 pub fn unescape_regex_span(&mut self, span: Span) -> Result<&str, SyntaxError> {
403 let str = self.lexer.span_str(span);
404 Lexer::unescape_regex_span(str, span, &mut self.unscape_buffer)
405 }
406 pub async fn speculate<T, F>(&mut self, stk: &mut Stk, cb: F) -> ParseResult<Option<T>>
431 where
432 F: AsyncFnOnce(&mut Stk, &mut Parser) -> ParseResult<Option<T>>,
433 {
434 let backup = self.last_span();
435 match cb(stk, self).await {
436 Ok(Some(x)) => Ok(Some(x)),
437 Ok(None) => {
438 self.backup_after(backup);
439 Ok(None)
440 }
441 Err(e) => Err(e),
442 }
443 }
444}
445pub struct StatementStream {
447 stack: Stack,
448 settings: ParserSettings,
449 col_offset: usize,
450 line_offset: usize,
451}
452impl StatementStream {
453 #[expect(clippy::new_without_default)]
454 pub fn new() -> Self {
455 Self::new_with_settings(ParserSettings::default())
456 }
457 pub fn new_with_settings(settings: ParserSettings) -> Self {
458 StatementStream {
459 stack: Stack::new(),
460 settings,
461 col_offset: 0,
462 line_offset: 0,
463 }
464 }
465 fn accumulate_line_col(&mut self, bytes: &[u8]) {
467 let (line_num, remaining) = std::str::from_utf8(bytes)
468 .expect("parser validated utf8")
469 .lines()
470 .enumerate()
471 .last()
472 .unwrap_or((0, ""));
473 self.line_offset += line_num;
474 if line_num > 0 {
475 self.col_offset = 0;
476 }
477 self.col_offset += remaining.chars().count();
478 }
479 pub fn parse_partial(
490 &mut self,
491 buffer: &mut BytesMut,
492 ) -> Result<Option<sql::TopLevelExpr>, RenderedError> {
493 let mut slice = &**buffer;
494 if slice.len() > u32::MAX as usize {
495 slice = &slice[..u32::MAX as usize];
496 }
497 let mut parser = Parser::new_with_settings(slice, self.settings.clone());
498 while parser.eat(t!(";")) {}
499 if parser.peek().span.offset != 0 && buffer.len() > u32::MAX as usize {
500 let eaten = buffer.split_to(parser.peek().span.offset as usize);
501 self.accumulate_line_col(&eaten);
502 slice = &**buffer;
503 if slice.len() > u32::MAX as usize {
504 slice = &slice[..u32::MAX as usize];
505 }
506 parser = Parser::new_with_settings(slice, self.settings.clone());
507 }
508 if parser.peek().is_eof() {
509 return Ok(None);
510 }
511 let res = self.stack.enter(|stk| parser.parse_statement(stk)).finish();
512 if parser.peek().is_eof() {
513 if buffer.len() > u32::MAX as usize {
514 let error = syntax_error!(
515 "Cannot parse query, statement exceeded maximum size of 4GB", @
516 parser.last_span()
517 );
518 return Err(error
519 .render_on_bytes(buffer)
520 .offset_location(self.line_offset, self.col_offset));
521 }
522 return Ok(None);
523 }
524 if !parser.eat(t!(";")) {
525 let peek = parser.next();
526 if parser.peek1().is_eof() {
527 return Ok(None);
528 }
529 if let Err(e) = res {
530 return Err(e
531 .render_on_bytes(slice)
532 .offset_location(self.line_offset, self.col_offset));
533 }
534 let error = syntax_error!(
535 "Unexpected token `{}` expected the query to end.", peek.kind.as_str(), @
536 peek.span => "maybe forgot a semicolon after the previous statement?"
537 );
538 return Err(error
539 .render_on_bytes(slice)
540 .offset_location(self.line_offset, self.col_offset));
541 }
542 while parser.eat(t!(";")) {}
543 let eaten = buffer.split_to(parser.last_span().after_offset() as usize);
544 let res = res.map(Some).map_err(|e| {
545 e.render_on_bytes(&eaten)
546 .offset_location(self.line_offset, self.col_offset)
547 });
548 self.accumulate_line_col(&eaten);
549 res
550 }
551 pub fn parse_complete(
553 &mut self,
554 buffer: &mut BytesMut,
555 ) -> Result<Option<sql::TopLevelExpr>, RenderedError> {
556 let mut slice = &**buffer;
557 if slice.len() > u32::MAX as usize {
558 slice = &slice[..u32::MAX as usize];
559 }
560 let mut parser = Parser::new_with_settings(slice, self.settings.clone());
561 while parser.eat(t!(";")) {}
562 if parser.peek().is_eof() {
563 buffer.clear();
564 return Ok(None);
565 }
566 match self.stack.enter(|stk| parser.parse_statement(stk)).finish() {
567 Ok(x) => {
568 if !parser.peek().is_eof() && !parser.eat(t!(";")) {
569 let peek = parser.peek();
570 let error = syntax_error!(
571 "Unexpected token `{}` expected the query to end.", peek.kind
572 .as_str(), @ peek.span =>
573 "maybe forgot a semicolon after the previous statement?"
574 );
575 return Err(error
576 .render_on_bytes(slice)
577 .offset_location(self.line_offset, self.col_offset));
578 }
579 let eaten = buffer.split_to(parser.last_span().after_offset() as usize);
580 self.accumulate_line_col(&eaten);
581 Ok(Some(x))
582 }
583 Err(e) => Err(e
584 .render_on_bytes(slice)
585 .offset_location(self.line_offset, self.col_offset)),
586 }
587 }
588}