1use crate::ast::*;
2use crate::lexer::{Token, TokenKind};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum ParseError {
7 #[error("Unexpected token {0:?} at position {1}")]
8 Unexpected(TokenKind, usize),
9 #[error("Expected {0} but got {1:?} at position {2}")]
10 Expected(String, TokenKind, usize),
11 #[error("t_sda_selector_not_static: selector not static")]
12 SelectorNotStatic,
13 #[error("t_sda_duplicate_label_in_selector: duplicate label")]
14 DuplicateLabelInSelector,
15 #[error("t_sda_reserved_placeholder: reserved placeholder")]
16 ReservedPlaceholder,
17 #[error("t_sda_invalid_map_key: invalid map key")]
18 InvalidMapKey,
19 #[error("t_sda_invalid_bagkv_key: invalid bagkv key")]
20 InvalidBagkvKey,
21 #[error("Invalid bytes literal '{literal}' at position {pos}: {reason}")]
22 InvalidBytesLiteral { literal: String, pos: usize, reason: String },
23 #[error("Unexpected end of input")]
24 UnexpectedEof,
25}
26
27struct Parser {
28 tokens: Vec<Token>,
29 pos: usize,
30}
31
32impl Parser {
33 fn new(tokens: Vec<Token>) -> Self {
34 Self { tokens, pos: 0 }
35 }
36
37 fn peek(&self) -> &TokenKind {
38 &self.tokens[self.pos].kind
39 }
40
41 fn peek_pos(&self) -> usize {
42 self.tokens[self.pos].pos
43 }
44
45 fn peek_next(&self) -> &TokenKind {
46 if self.pos + 1 < self.tokens.len() {
47 &self.tokens[self.pos + 1].kind
48 } else {
49 &TokenKind::Eof
50 }
51 }
52
53 fn advance(&mut self) -> &Token {
54 let token = &self.tokens[self.pos];
55 if self.pos + 1 < self.tokens.len() {
56 self.pos += 1;
57 }
58 token
59 }
60
61 fn expect(&mut self, expected: TokenKind) -> Result<(), ParseError> {
62 let token = self.peek().clone();
63 let pos = self.peek_pos();
64 if token == expected {
65 self.advance();
66 Ok(())
67 } else {
68 Err(ParseError::Expected(format!("{expected:?}"), token, pos))
69 }
70 }
71
72 fn expect_ident(&mut self) -> Result<String, ParseError> {
73 let token = self.peek().clone();
74 let pos = self.peek_pos();
75 if let TokenKind::Ident(name) = token {
76 self.advance();
77 Ok(name)
78 } else {
79 Err(ParseError::Expected("identifier".to_string(), token, pos))
80 }
81 }
82
83 fn expected_generator_expr_error(&self) -> ParseError {
84 ParseError::Expected(
85 "generator expression `name in collection`".to_string(),
86 self.peek().clone(),
87 self.peek_pos(),
88 )
89 }
90
91 fn parse_program(&mut self) -> Result<Program, ParseError> {
92 let mut stmts = Vec::new();
93 while *self.peek() != TokenKind::Eof {
94 stmts.push(self.parse_stmt()?);
95 }
96 Ok(Program { stmts })
97 }
98
99 fn parse_stmt(&mut self) -> Result<Stmt, ParseError> {
100 if *self.peek() == TokenKind::Let {
101 self.advance();
102 if *self.peek() == TokenKind::Placeholder {
103 return Err(ParseError::ReservedPlaceholder);
104 }
105 let name = self.expect_ident()?;
106 self.expect(TokenKind::Eq)?;
107 let expr = self.parse_expr()?;
108 self.expect(TokenKind::Semi)?;
109 Ok(Stmt::Let(name, expr))
110 } else {
111 let expr = self.parse_expr()?;
112 self.expect(TokenKind::Semi)?;
113 Ok(Stmt::Expr(expr))
114 }
115 }
116
117 fn parse_expr(&mut self) -> Result<Expr, ParseError> {
118 self.parse_pipe()
119 }
120
121 fn parse_pipe(&mut self) -> Result<Expr, ParseError> {
122 let mut lhs = self.parse_or()?;
123 while *self.peek() == TokenKind::Pipe {
124 self.advance();
125 let rhs = self.parse_or()?;
126 lhs = Expr::Pipe(Box::new(lhs), Box::new(rhs));
127 }
128 Ok(lhs)
129 }
130
131 fn parse_or(&mut self) -> Result<Expr, ParseError> {
132 let mut lhs = self.parse_and()?;
133 while *self.peek() == TokenKind::Or {
134 self.advance();
135 let rhs = self.parse_and()?;
136 lhs = Expr::BinOp(BinOpKind::Or, Box::new(lhs), Box::new(rhs));
137 }
138 Ok(lhs)
139 }
140
141 fn parse_and(&mut self) -> Result<Expr, ParseError> {
142 let mut lhs = self.parse_not()?;
143 while *self.peek() == TokenKind::And {
144 self.advance();
145 let rhs = self.parse_not()?;
146 lhs = Expr::BinOp(BinOpKind::And, Box::new(lhs), Box::new(rhs));
147 }
148 Ok(lhs)
149 }
150
151 fn parse_not(&mut self) -> Result<Expr, ParseError> {
152 if *self.peek() == TokenKind::Not {
153 self.advance();
154 let expr = self.parse_not()?;
155 Ok(Expr::UnOp(UnOpKind::Not, Box::new(expr)))
156 } else {
157 self.parse_cmp()
158 }
159 }
160
161 fn parse_cmp(&mut self) -> Result<Expr, ParseError> {
162 let lhs = self.parse_setish()?;
163 let op = match self.peek() {
164 TokenKind::Eq => Some(BinOpKind::Eq),
165 TokenKind::Neq => Some(BinOpKind::Neq),
166 TokenKind::Lt => Some(BinOpKind::Lt),
167 TokenKind::Le => Some(BinOpKind::Le),
168 TokenKind::Gt => Some(BinOpKind::Gt),
169 TokenKind::Ge => Some(BinOpKind::Ge),
170 _ => None,
171 };
172 if let Some(op) = op {
173 self.advance();
174 let rhs = self.parse_setish()?;
175 Ok(Expr::BinOp(op, Box::new(lhs), Box::new(rhs)))
176 } else {
177 Ok(lhs)
178 }
179 }
180
181 fn parse_setish(&mut self) -> Result<Expr, ParseError> {
182 let mut lhs = self.parse_add()?;
183 loop {
184 let op = match self.peek() {
185 TokenKind::Union => Some(BinOpKind::Union),
186 TokenKind::Inter => Some(BinOpKind::Inter),
187 TokenKind::Diff => Some(BinOpKind::Diff),
188 TokenKind::BUnion => Some(BinOpKind::BUnion),
189 TokenKind::BDiff => Some(BinOpKind::BDiff),
190 TokenKind::In => Some(BinOpKind::In),
191 _ => None,
192 };
193 if let Some(op) = op {
194 self.advance();
195 let rhs = self.parse_add()?;
196 lhs = Expr::BinOp(op, Box::new(lhs), Box::new(rhs));
197 } else {
198 break;
199 }
200 }
201 Ok(lhs)
202 }
203
204 fn parse_add(&mut self) -> Result<Expr, ParseError> {
205 let mut lhs = self.parse_mul()?;
206 loop {
207 let op = match self.peek() {
208 TokenKind::Plus => Some(BinOpKind::Add),
209 TokenKind::Minus => Some(BinOpKind::Sub),
210 TokenKind::Concat => Some(BinOpKind::Concat),
211 _ => None,
212 };
213 if let Some(op) = op {
214 self.advance();
215 let rhs = self.parse_mul()?;
216 lhs = Expr::BinOp(op, Box::new(lhs), Box::new(rhs));
217 } else {
218 break;
219 }
220 }
221 Ok(lhs)
222 }
223
224 fn parse_mul(&mut self) -> Result<Expr, ParseError> {
225 let mut lhs = self.parse_unary()?;
226 loop {
227 let op = match self.peek() {
228 TokenKind::Star => Some(BinOpKind::Mul),
229 TokenKind::Slash => Some(BinOpKind::Div),
230 _ => None,
231 };
232 if let Some(op) = op {
233 self.advance();
234 let rhs = self.parse_unary()?;
235 lhs = Expr::BinOp(op, Box::new(lhs), Box::new(rhs));
236 } else {
237 break;
238 }
239 }
240 Ok(lhs)
241 }
242
243 fn parse_unary(&mut self) -> Result<Expr, ParseError> {
244 if *self.peek() == TokenKind::Minus {
245 self.advance();
246 let expr = self.parse_unary()?;
247 Ok(Expr::UnOp(UnOpKind::Neg, Box::new(expr)))
248 } else {
249 self.parse_postfix()
250 }
251 }
252
253 fn is_selector_access(&self) -> bool {
254 if self.pos + 2 < self.tokens.len() {
255 let next = &self.tokens[self.pos + 1].kind;
256 let after = &self.tokens[self.pos + 2].kind;
257 matches!(next, TokenKind::Ident(_) | TokenKind::Str(_)) && *after == TokenKind::Gt
258 } else {
259 false
260 }
261 }
262
263 fn parse_postfix(&mut self) -> Result<Expr, ParseError> {
264 let mut expr = self.parse_primary()?;
265 loop {
266 match self.peek() {
267 TokenKind::Lt if self.is_selector_access() => {
268 self.advance();
269 let selector = match self.peek().clone() {
270 TokenKind::Ident(name) => {
271 self.advance();
272 name
273 }
274 TokenKind::Str(s) => {
275 self.advance();
276 s
277 }
278 token => {
279 let pos = self.peek_pos();
280 return Err(ParseError::Expected("selector".to_string(), token, pos));
281 }
282 };
283 self.expect(TokenKind::Gt)?;
284 let mode = match self.peek() {
285 TokenKind::QMark => {
286 self.advance();
287 SelectMode::Optional
288 }
289 TokenKind::Bang => {
290 self.advance();
291 SelectMode::Required
292 }
293 _ => SelectMode::Plain,
294 };
295 expr = Expr::Select(Box::new(expr), selector, mode);
296 }
297 TokenKind::SelL => {
298 self.advance();
299 let selector = match self.peek().clone() {
300 TokenKind::Ident(name) => {
301 self.advance();
302 name
303 }
304 TokenKind::Str(s) => {
305 self.advance();
306 s
307 }
308 token => {
309 let pos = self.peek_pos();
310 return Err(ParseError::Expected("selector".to_string(), token, pos));
311 }
312 };
313 self.expect(TokenKind::SelR)?;
314 let mode = match self.peek() {
315 TokenKind::QMark => {
316 self.advance();
317 SelectMode::Optional
318 }
319 TokenKind::Bang => {
320 self.advance();
321 SelectMode::Required
322 }
323 _ => SelectMode::Plain,
324 };
325 expr = Expr::Select(Box::new(expr), selector, mode);
326 }
327 TokenKind::LParen => {
328 self.advance();
329 let mut args = Vec::new();
330 if *self.peek() != TokenKind::RParen {
331 args.push(self.parse_expr()?);
332 while *self.peek() == TokenKind::Comma {
333 self.advance();
334 args.push(self.parse_expr()?);
335 }
336 }
337 self.expect(TokenKind::RParen)?;
338 expr = Expr::Call(Box::new(expr), args);
339 }
340 _ => break,
341 }
342 }
343 Ok(expr)
344 }
345
346 fn parse_primary(&mut self) -> Result<Expr, ParseError> {
347 match self.peek().clone() {
348 TokenKind::Null => {
349 self.advance();
350 Ok(Expr::Null)
351 }
352 TokenKind::True => {
353 self.advance();
354 Ok(Expr::Bool(true))
355 }
356 TokenKind::False => {
357 self.advance();
358 Ok(Expr::Bool(false))
359 }
360 TokenKind::Num(n) => {
361 self.advance();
362 Ok(Expr::Num(n))
363 }
364 TokenKind::Str(s) => {
365 self.advance();
366 Ok(Expr::Str(s))
367 }
368 TokenKind::Bytes => {
369 let pos = self.peek_pos();
370 self.advance();
371 self.expect(TokenKind::LParen)?;
372 let literal = match self.peek().clone() {
373 TokenKind::Str(s) => {
374 self.advance();
375 s
376 }
377 token => {
378 let pos = self.peek_pos();
379 return Err(ParseError::Expected("string literal".to_string(), token, pos));
380 }
381 };
382 self.expect(TokenKind::RParen)?;
383 let bytes = parse_bytes_literal(&literal).map_err(|reason| ParseError::InvalidBytesLiteral {
384 literal,
385 pos,
386 reason,
387 })?;
388 Ok(Expr::Bytes(bytes))
389 }
390 TokenKind::Placeholder => {
391 if *self.peek_next() == TokenKind::FatArrow {
392 return Err(ParseError::ReservedPlaceholder);
393 }
394 self.advance();
395 Ok(Expr::Placeholder)
396 }
397 TokenKind::Ident(name) => {
398 if *self.peek_next() == TokenKind::FatArrow {
399 self.advance();
400 self.advance();
401 let body = self.parse_expr()?;
402 Ok(Expr::Lambda(name, Box::new(body)))
403 } else {
404 self.advance();
405 Ok(Expr::Ident(name))
406 }
407 }
408 TokenKind::LParen => {
409 self.advance();
410 let expr = self.parse_expr()?;
411 self.expect(TokenKind::RParen)?;
412 Ok(expr)
413 }
414 TokenKind::Seq => {
415 self.advance();
416 self.expect(TokenKind::LBrack)?;
417 let items = self.parse_expr_list(TokenKind::RBrack)?;
418 Ok(Expr::Seq(items))
419 }
420 TokenKind::Set => {
421 self.advance();
422 self.expect(TokenKind::LBrace)?;
423 let items = self.parse_expr_list(TokenKind::RBrace)?;
424 Ok(Expr::Set(items))
425 }
426 TokenKind::Bag => {
427 self.advance();
428 self.expect(TokenKind::LBrace)?;
429 let items = self.parse_expr_list(TokenKind::RBrace)?;
430 Ok(Expr::Bag(items))
431 }
432 TokenKind::Map => {
433 self.advance();
434 self.expect(TokenKind::LBrace)?;
435 let mut entries = Vec::new();
436 if *self.peek() != TokenKind::RBrace {
437 entries.push(self.parse_map_entry()?);
438 while *self.peek() == TokenKind::Comma {
439 self.advance();
440 if *self.peek() == TokenKind::RBrace {
441 break;
442 }
443 entries.push(self.parse_map_entry()?);
444 }
445 }
446 self.expect(TokenKind::RBrace)?;
447 Ok(Expr::Map(entries))
448 }
449 TokenKind::Prod => {
450 self.advance();
451 self.expect(TokenKind::LBrace)?;
452 let mut fields = Vec::new();
453 if *self.peek() != TokenKind::RBrace {
454 fields.push(self.parse_prod_field()?);
455 while *self.peek() == TokenKind::Comma {
456 self.advance();
457 if *self.peek() == TokenKind::RBrace {
458 break;
459 }
460 fields.push(self.parse_prod_field()?);
461 }
462 }
463 self.expect(TokenKind::RBrace)?;
464 Ok(Expr::Prod(fields))
465 }
466 TokenKind::BagKV => {
467 self.advance();
468 self.expect(TokenKind::LBrace)?;
469 let mut entries = Vec::new();
470 if *self.peek() != TokenKind::RBrace {
471 entries.push(self.parse_bagkv_entry()?);
472 while *self.peek() == TokenKind::Comma {
473 self.advance();
474 if *self.peek() == TokenKind::RBrace {
475 break;
476 }
477 entries.push(self.parse_bagkv_entry()?);
478 }
479 }
480 self.expect(TokenKind::RBrace)?;
481 Ok(Expr::BagKV(entries))
482 }
483 TokenKind::Some => {
484 self.advance();
485 self.expect(TokenKind::LParen)?;
486 let expr = self.parse_expr()?;
487 self.expect(TokenKind::RParen)?;
488 Ok(Expr::Some_(Box::new(expr)))
489 }
490 TokenKind::None => {
491 self.advance();
492 Ok(Expr::None_)
493 }
494 TokenKind::Ok => {
495 self.advance();
496 self.expect(TokenKind::LParen)?;
497 let expr = self.parse_expr()?;
498 self.expect(TokenKind::RParen)?;
499 Ok(Expr::Ok_(Box::new(expr)))
500 }
501 TokenKind::Fail => {
502 self.advance();
503 self.expect(TokenKind::LParen)?;
504 let code = self.parse_expr()?;
505 self.expect(TokenKind::Comma)?;
506 let msg = self.parse_expr()?;
507 self.expect(TokenKind::RParen)?;
508 Ok(Expr::Fail_(Box::new(code), Box::new(msg)))
509 }
510 TokenKind::LBrace => {
511 self.advance();
512 if let Some(err) = self.detect_static_selector_error() {
513 return Err(err);
514 }
515 self.parse_comprehension()
516 }
517 token => {
518 let pos = self.peek_pos();
519 Err(ParseError::Unexpected(token, pos))
520 }
521 }
522 }
523
524 fn parse_comprehension(&mut self) -> Result<Expr, ParseError> {
525 if *self.peek() == TokenKind::Yield {
526 self.advance();
527 let yield_expr = self.parse_expr()?;
528 self.expect(TokenKind::Bar)?;
529 let (binding, collection) = self.parse_generator_expr()?;
530 let pred = if *self.peek() == TokenKind::Bar {
531 self.advance();
532 Some(Box::new(self.parse_expr()?))
533 } else {
534 None
535 };
536 self.expect(TokenKind::RBrace)?;
537 return Ok(Expr::Comprehension {
538 yield_expr: Some(Box::new(yield_expr)),
539 binding,
540 collection: Box::new(collection),
541 pred,
542 });
543 }
544
545 let first_expr = self.parse_expr()?;
546 if let Some((binding, collection)) = Self::decompose_generator_expr(first_expr.clone()) {
547 let pred = if *self.peek() == TokenKind::Bar {
548 self.advance();
549 Some(Box::new(self.parse_expr()?))
550 } else {
551 None
552 };
553 self.expect(TokenKind::RBrace)?;
554 return Ok(Expr::Comprehension {
555 yield_expr: None,
556 binding,
557 collection: Box::new(collection),
558 pred,
559 });
560 }
561
562 if matches!(first_expr, Expr::BinOp(BinOpKind::In, _, _)) {
563 return Err(self.expected_generator_expr_error());
564 }
565
566 self.expect(TokenKind::Bar)?;
567 let (binding, collection) = self.parse_generator_expr()?;
568 let pred = if *self.peek() == TokenKind::Bar {
569 self.advance();
570 Some(Box::new(self.parse_expr()?))
571 } else {
572 None
573 };
574 self.expect(TokenKind::RBrace)?;
575 Ok(Expr::Comprehension {
576 yield_expr: Some(Box::new(first_expr)),
577 binding,
578 collection: Box::new(collection),
579 pred,
580 })
581 }
582
583 fn parse_generator_expr(&mut self) -> Result<(String, Expr), ParseError> {
584 let expr = self.parse_expr()?;
585 Self::decompose_generator_expr(expr).ok_or_else(|| self.expected_generator_expr_error())
586 }
587
588 fn decompose_generator_expr(expr: Expr) -> Option<(String, Expr)> {
589 match expr {
590 Expr::BinOp(BinOpKind::In, lhs, rhs) => match *lhs {
591 Expr::Ident(name) => Some((name, *rhs)),
592 _ => None,
593 },
594 _ => None,
595 }
596 }
597
598 fn parse_expr_list(&mut self, close: TokenKind) -> Result<Vec<Expr>, ParseError> {
599 let mut items = Vec::new();
600 if *self.peek() != close {
601 items.push(self.parse_expr()?);
602 while *self.peek() == TokenKind::Comma {
603 self.advance();
604 if *self.peek() == close {
605 break;
606 }
607 items.push(self.parse_expr()?);
608 }
609 }
610 self.expect(close)?;
611 Ok(items)
612 }
613
614 fn parse_map_entry(&mut self) -> Result<(String, Expr), ParseError> {
615 let key = match self.peek().clone() {
616 TokenKind::Str(s) => {
617 self.advance();
618 s
619 }
620 _ => return Err(ParseError::InvalidMapKey),
621 };
622 self.expect(TokenKind::Arrow)?;
623 let value = self.parse_expr()?;
624 Ok((key, value))
625 }
626
627 fn parse_bagkv_entry(&mut self) -> Result<(String, Expr), ParseError> {
628 let key = match self.peek().clone() {
629 TokenKind::Str(s) => {
630 self.advance();
631 s
632 }
633 TokenKind::Ident(name) => {
634 self.advance();
635 name
636 }
637 _ => return Err(ParseError::InvalidBagkvKey),
638 };
639 self.expect(TokenKind::Arrow)?;
640 let value = self.parse_expr()?;
641 Ok((key, value))
642 }
643
644 fn parse_prod_field(&mut self) -> Result<(String, Expr), ParseError> {
645 let name = self.expect_ident()?;
646 self.expect(TokenKind::Colon)?;
647 let value = self.parse_expr()?;
648 Ok((name, value))
649 }
650
651 fn detect_static_selector_error(&self) -> Option<ParseError> {
652 let mut labels = Vec::new();
653 let mut idx = self.pos;
654 while idx < self.tokens.len() {
655 match &self.tokens[idx].kind {
656 TokenKind::Ident(name) => labels.push(name.clone()),
657 TokenKind::Str(s) => labels.push(s.clone()),
658 TokenKind::RBrace => break,
659 _ => return None,
660 }
661 idx += 1;
662 }
663
664 if idx >= self.tokens.len() || self.tokens[idx].kind != TokenKind::RBrace || labels.is_empty() {
665 return None;
666 }
667
668 let mut seen = std::collections::BTreeSet::new();
669 for label in labels {
670 if !seen.insert(label) {
671 return Some(ParseError::DuplicateLabelInSelector);
672 }
673 }
674
675 Some(ParseError::SelectorNotStatic)
676 }
677}
678
679pub fn parse(tokens: Vec<Token>) -> Result<Program, ParseError> {
680 let mut parser = Parser::new(tokens);
681 parser.parse_program()
682}
683
684fn parse_bytes_literal(src: &str) -> Result<Vec<u8>, String> {
685 if src.len() % 2 != 0 {
686 return Err("expected even-length base16 string".to_string());
687 }
688
689 let mut out = Vec::with_capacity(src.len() / 2);
690 let mut chars = src.chars();
691 while let (Some(hi), Some(lo)) = (chars.next(), chars.next()) {
692 let hi = hi
693 .to_digit(16)
694 .ok_or_else(|| "expected base16 digits only".to_string())?;
695 let lo = lo
696 .to_digit(16)
697 .ok_or_else(|| "expected base16 digits only".to_string())?;
698 out.push(((hi << 4) | lo) as u8);
699 }
700 Ok(out)
701}