Skip to main content

sda_lib/
parser.rs

1use crate::ast::*;
2use crate::lexer::{Token, TokenKind};
3use thiserror::Error;
4
5#[derive(Debug, Error)]
6pub enum ParseError {
7    #[error("Unexpected token {0:?} at position {1}")]
8    Unexpected(TokenKind, usize),
9    #[error("Expected {0} but got {1:?} at position {2}")]
10    Expected(String, TokenKind, usize),
11    #[error("t_sda_selector_not_static: selector not static")]
12    SelectorNotStatic,
13    #[error("t_sda_duplicate_label_in_selector: duplicate label")]
14    DuplicateLabelInSelector,
15    #[error("t_sda_reserved_placeholder: reserved placeholder")]
16    ReservedPlaceholder,
17    #[error("t_sda_invalid_map_key: invalid map key")]
18    InvalidMapKey,
19    #[error("t_sda_invalid_bagkv_key: invalid bagkv key")]
20    InvalidBagkvKey,
21    #[error("Invalid bytes literal '{literal}' at position {pos}: {reason}")]
22    InvalidBytesLiteral { literal: String, pos: usize, reason: String },
23    #[error("Unexpected end of input")]
24    UnexpectedEof,
25}
26
27struct Parser {
28    tokens: Vec<Token>,
29    pos: usize,
30}
31
32impl Parser {
33    fn new(tokens: Vec<Token>) -> Self {
34        Self { tokens, pos: 0 }
35    }
36
37    fn peek(&self) -> &TokenKind {
38        &self.tokens[self.pos].kind
39    }
40
41    fn peek_pos(&self) -> usize {
42        self.tokens[self.pos].pos
43    }
44
45    fn peek_next(&self) -> &TokenKind {
46        if self.pos + 1 < self.tokens.len() {
47            &self.tokens[self.pos + 1].kind
48        } else {
49            &TokenKind::Eof
50        }
51    }
52
53    fn advance(&mut self) -> &Token {
54        let token = &self.tokens[self.pos];
55        if self.pos + 1 < self.tokens.len() {
56            self.pos += 1;
57        }
58        token
59    }
60
61    fn expect(&mut self, expected: TokenKind) -> Result<(), ParseError> {
62        let token = self.peek().clone();
63        let pos = self.peek_pos();
64        if token == expected {
65            self.advance();
66            Ok(())
67        } else {
68            Err(ParseError::Expected(format!("{expected:?}"), token, pos))
69        }
70    }
71
72    fn expect_ident(&mut self) -> Result<String, ParseError> {
73        let token = self.peek().clone();
74        let pos = self.peek_pos();
75        if let TokenKind::Ident(name) = token {
76            self.advance();
77            Ok(name)
78        } else {
79            Err(ParseError::Expected("identifier".to_string(), token, pos))
80        }
81    }
82
83    fn expected_generator_expr_error(&self) -> ParseError {
84        ParseError::Expected(
85            "generator expression `name in collection`".to_string(),
86            self.peek().clone(),
87            self.peek_pos(),
88        )
89    }
90
91    fn parse_program(&mut self) -> Result<Program, ParseError> {
92        let mut stmts = Vec::new();
93        while *self.peek() != TokenKind::Eof {
94            stmts.push(self.parse_stmt()?);
95        }
96        Ok(Program { stmts })
97    }
98
99    fn parse_stmt(&mut self) -> Result<Stmt, ParseError> {
100        if *self.peek() == TokenKind::Let {
101            self.advance();
102            if *self.peek() == TokenKind::Placeholder {
103                return Err(ParseError::ReservedPlaceholder);
104            }
105            let name = self.expect_ident()?;
106            self.expect(TokenKind::Eq)?;
107            let expr = self.parse_expr()?;
108            self.expect(TokenKind::Semi)?;
109            Ok(Stmt::Let(name, expr))
110        } else {
111            let expr = self.parse_expr()?;
112            self.expect(TokenKind::Semi)?;
113            Ok(Stmt::Expr(expr))
114        }
115    }
116
117    fn parse_expr(&mut self) -> Result<Expr, ParseError> {
118        self.parse_pipe()
119    }
120
121    fn parse_pipe(&mut self) -> Result<Expr, ParseError> {
122        let mut lhs = self.parse_or()?;
123        while *self.peek() == TokenKind::Pipe {
124            self.advance();
125            let rhs = self.parse_or()?;
126            lhs = Expr::Pipe(Box::new(lhs), Box::new(rhs));
127        }
128        Ok(lhs)
129    }
130
131    fn parse_or(&mut self) -> Result<Expr, ParseError> {
132        let mut lhs = self.parse_and()?;
133        while *self.peek() == TokenKind::Or {
134            self.advance();
135            let rhs = self.parse_and()?;
136            lhs = Expr::BinOp(BinOpKind::Or, Box::new(lhs), Box::new(rhs));
137        }
138        Ok(lhs)
139    }
140
141    fn parse_and(&mut self) -> Result<Expr, ParseError> {
142        let mut lhs = self.parse_not()?;
143        while *self.peek() == TokenKind::And {
144            self.advance();
145            let rhs = self.parse_not()?;
146            lhs = Expr::BinOp(BinOpKind::And, Box::new(lhs), Box::new(rhs));
147        }
148        Ok(lhs)
149    }
150
151    fn parse_not(&mut self) -> Result<Expr, ParseError> {
152        if *self.peek() == TokenKind::Not {
153            self.advance();
154            let expr = self.parse_not()?;
155            Ok(Expr::UnOp(UnOpKind::Not, Box::new(expr)))
156        } else {
157            self.parse_cmp()
158        }
159    }
160
161    fn parse_cmp(&mut self) -> Result<Expr, ParseError> {
162        let lhs = self.parse_setish()?;
163        let op = match self.peek() {
164            TokenKind::Eq => Some(BinOpKind::Eq),
165            TokenKind::Neq => Some(BinOpKind::Neq),
166            TokenKind::Lt => Some(BinOpKind::Lt),
167            TokenKind::Le => Some(BinOpKind::Le),
168            TokenKind::Gt => Some(BinOpKind::Gt),
169            TokenKind::Ge => Some(BinOpKind::Ge),
170            _ => None,
171        };
172        if let Some(op) = op {
173            self.advance();
174            let rhs = self.parse_setish()?;
175            Ok(Expr::BinOp(op, Box::new(lhs), Box::new(rhs)))
176        } else {
177            Ok(lhs)
178        }
179    }
180
181    fn parse_setish(&mut self) -> Result<Expr, ParseError> {
182        let mut lhs = self.parse_add()?;
183        loop {
184            let op = match self.peek() {
185                TokenKind::Union => Some(BinOpKind::Union),
186                TokenKind::Inter => Some(BinOpKind::Inter),
187                TokenKind::Diff => Some(BinOpKind::Diff),
188                TokenKind::BUnion => Some(BinOpKind::BUnion),
189                TokenKind::BDiff => Some(BinOpKind::BDiff),
190                TokenKind::In => Some(BinOpKind::In),
191                _ => None,
192            };
193            if let Some(op) = op {
194                self.advance();
195                let rhs = self.parse_add()?;
196                lhs = Expr::BinOp(op, Box::new(lhs), Box::new(rhs));
197            } else {
198                break;
199            }
200        }
201        Ok(lhs)
202    }
203
204    fn parse_add(&mut self) -> Result<Expr, ParseError> {
205        let mut lhs = self.parse_mul()?;
206        loop {
207            let op = match self.peek() {
208                TokenKind::Plus => Some(BinOpKind::Add),
209                TokenKind::Minus => Some(BinOpKind::Sub),
210                TokenKind::Concat => Some(BinOpKind::Concat),
211                _ => None,
212            };
213            if let Some(op) = op {
214                self.advance();
215                let rhs = self.parse_mul()?;
216                lhs = Expr::BinOp(op, Box::new(lhs), Box::new(rhs));
217            } else {
218                break;
219            }
220        }
221        Ok(lhs)
222    }
223
224    fn parse_mul(&mut self) -> Result<Expr, ParseError> {
225        let mut lhs = self.parse_unary()?;
226        loop {
227            let op = match self.peek() {
228                TokenKind::Star => Some(BinOpKind::Mul),
229                TokenKind::Slash => Some(BinOpKind::Div),
230                _ => None,
231            };
232            if let Some(op) = op {
233                self.advance();
234                let rhs = self.parse_unary()?;
235                lhs = Expr::BinOp(op, Box::new(lhs), Box::new(rhs));
236            } else {
237                break;
238            }
239        }
240        Ok(lhs)
241    }
242
243    fn parse_unary(&mut self) -> Result<Expr, ParseError> {
244        if *self.peek() == TokenKind::Minus {
245            self.advance();
246            let expr = self.parse_unary()?;
247            Ok(Expr::UnOp(UnOpKind::Neg, Box::new(expr)))
248        } else {
249            self.parse_postfix()
250        }
251    }
252
253    fn is_selector_access(&self) -> bool {
254        if self.pos + 2 < self.tokens.len() {
255            let next = &self.tokens[self.pos + 1].kind;
256            let after = &self.tokens[self.pos + 2].kind;
257            matches!(next, TokenKind::Ident(_) | TokenKind::Str(_)) && *after == TokenKind::Gt
258        } else {
259            false
260        }
261    }
262
263    fn parse_postfix(&mut self) -> Result<Expr, ParseError> {
264        let mut expr = self.parse_primary()?;
265        loop {
266            match self.peek() {
267                TokenKind::Lt if self.is_selector_access() => {
268                    self.advance();
269                    let selector = match self.peek().clone() {
270                        TokenKind::Ident(name) => {
271                            self.advance();
272                            name
273                        }
274                        TokenKind::Str(s) => {
275                            self.advance();
276                            s
277                        }
278                        token => {
279                            let pos = self.peek_pos();
280                            return Err(ParseError::Expected("selector".to_string(), token, pos));
281                        }
282                    };
283                    self.expect(TokenKind::Gt)?;
284                    let mode = match self.peek() {
285                        TokenKind::QMark => {
286                            self.advance();
287                            SelectMode::Optional
288                        }
289                        TokenKind::Bang => {
290                            self.advance();
291                            SelectMode::Required
292                        }
293                        _ => SelectMode::Plain,
294                    };
295                    expr = Expr::Select(Box::new(expr), selector, mode);
296                }
297                TokenKind::SelL => {
298                    self.advance();
299                    let selector = match self.peek().clone() {
300                        TokenKind::Ident(name) => {
301                            self.advance();
302                            name
303                        }
304                        TokenKind::Str(s) => {
305                            self.advance();
306                            s
307                        }
308                        token => {
309                            let pos = self.peek_pos();
310                            return Err(ParseError::Expected("selector".to_string(), token, pos));
311                        }
312                    };
313                    self.expect(TokenKind::SelR)?;
314                    let mode = match self.peek() {
315                        TokenKind::QMark => {
316                            self.advance();
317                            SelectMode::Optional
318                        }
319                        TokenKind::Bang => {
320                            self.advance();
321                            SelectMode::Required
322                        }
323                        _ => SelectMode::Plain,
324                    };
325                    expr = Expr::Select(Box::new(expr), selector, mode);
326                }
327                TokenKind::LParen => {
328                    self.advance();
329                    let mut args = Vec::new();
330                    if *self.peek() != TokenKind::RParen {
331                        args.push(self.parse_expr()?);
332                        while *self.peek() == TokenKind::Comma {
333                            self.advance();
334                            args.push(self.parse_expr()?);
335                        }
336                    }
337                    self.expect(TokenKind::RParen)?;
338                    expr = Expr::Call(Box::new(expr), args);
339                }
340                _ => break,
341            }
342        }
343        Ok(expr)
344    }
345
346    fn parse_primary(&mut self) -> Result<Expr, ParseError> {
347        match self.peek().clone() {
348            TokenKind::Null => {
349                self.advance();
350                Ok(Expr::Null)
351            }
352            TokenKind::True => {
353                self.advance();
354                Ok(Expr::Bool(true))
355            }
356            TokenKind::False => {
357                self.advance();
358                Ok(Expr::Bool(false))
359            }
360            TokenKind::Num(n) => {
361                self.advance();
362                Ok(Expr::Num(n))
363            }
364            TokenKind::Str(s) => {
365                self.advance();
366                Ok(Expr::Str(s))
367            }
368            TokenKind::Bytes => {
369                let pos = self.peek_pos();
370                self.advance();
371                self.expect(TokenKind::LParen)?;
372                let literal = match self.peek().clone() {
373                    TokenKind::Str(s) => {
374                        self.advance();
375                        s
376                    }
377                    token => {
378                        let pos = self.peek_pos();
379                        return Err(ParseError::Expected("string literal".to_string(), token, pos));
380                    }
381                };
382                self.expect(TokenKind::RParen)?;
383                let bytes = parse_bytes_literal(&literal).map_err(|reason| ParseError::InvalidBytesLiteral {
384                    literal,
385                    pos,
386                    reason,
387                })?;
388                Ok(Expr::Bytes(bytes))
389            }
390            TokenKind::Placeholder => {
391                if *self.peek_next() == TokenKind::FatArrow {
392                    return Err(ParseError::ReservedPlaceholder);
393                }
394                self.advance();
395                Ok(Expr::Placeholder)
396            }
397            TokenKind::Ident(name) => {
398                if *self.peek_next() == TokenKind::FatArrow {
399                    self.advance();
400                    self.advance();
401                    let body = self.parse_expr()?;
402                    Ok(Expr::Lambda(name, Box::new(body)))
403                } else {
404                    self.advance();
405                    Ok(Expr::Ident(name))
406                }
407            }
408            TokenKind::LParen => {
409                self.advance();
410                let expr = self.parse_expr()?;
411                self.expect(TokenKind::RParen)?;
412                Ok(expr)
413            }
414            TokenKind::Seq => {
415                self.advance();
416                self.expect(TokenKind::LBrack)?;
417                let items = self.parse_expr_list(TokenKind::RBrack)?;
418                Ok(Expr::Seq(items))
419            }
420            TokenKind::Set => {
421                self.advance();
422                self.expect(TokenKind::LBrace)?;
423                let items = self.parse_expr_list(TokenKind::RBrace)?;
424                Ok(Expr::Set(items))
425            }
426            TokenKind::Bag => {
427                self.advance();
428                self.expect(TokenKind::LBrace)?;
429                let items = self.parse_expr_list(TokenKind::RBrace)?;
430                Ok(Expr::Bag(items))
431            }
432            TokenKind::Map => {
433                self.advance();
434                self.expect(TokenKind::LBrace)?;
435                let mut entries = Vec::new();
436                if *self.peek() != TokenKind::RBrace {
437                    entries.push(self.parse_map_entry()?);
438                    while *self.peek() == TokenKind::Comma {
439                        self.advance();
440                        if *self.peek() == TokenKind::RBrace {
441                            break;
442                        }
443                        entries.push(self.parse_map_entry()?);
444                    }
445                }
446                self.expect(TokenKind::RBrace)?;
447                Ok(Expr::Map(entries))
448            }
449            TokenKind::Prod => {
450                self.advance();
451                self.expect(TokenKind::LBrace)?;
452                let mut fields = Vec::new();
453                if *self.peek() != TokenKind::RBrace {
454                    fields.push(self.parse_prod_field()?);
455                    while *self.peek() == TokenKind::Comma {
456                        self.advance();
457                        if *self.peek() == TokenKind::RBrace {
458                            break;
459                        }
460                        fields.push(self.parse_prod_field()?);
461                    }
462                }
463                self.expect(TokenKind::RBrace)?;
464                Ok(Expr::Prod(fields))
465            }
466            TokenKind::BagKV => {
467                self.advance();
468                self.expect(TokenKind::LBrace)?;
469                let mut entries = Vec::new();
470                if *self.peek() != TokenKind::RBrace {
471                    entries.push(self.parse_bagkv_entry()?);
472                    while *self.peek() == TokenKind::Comma {
473                        self.advance();
474                        if *self.peek() == TokenKind::RBrace {
475                            break;
476                        }
477                        entries.push(self.parse_bagkv_entry()?);
478                    }
479                }
480                self.expect(TokenKind::RBrace)?;
481                Ok(Expr::BagKV(entries))
482            }
483            TokenKind::Some => {
484                self.advance();
485                self.expect(TokenKind::LParen)?;
486                let expr = self.parse_expr()?;
487                self.expect(TokenKind::RParen)?;
488                Ok(Expr::Some_(Box::new(expr)))
489            }
490            TokenKind::None => {
491                self.advance();
492                Ok(Expr::None_)
493            }
494            TokenKind::Ok => {
495                self.advance();
496                self.expect(TokenKind::LParen)?;
497                let expr = self.parse_expr()?;
498                self.expect(TokenKind::RParen)?;
499                Ok(Expr::Ok_(Box::new(expr)))
500            }
501            TokenKind::Fail => {
502                self.advance();
503                self.expect(TokenKind::LParen)?;
504                let code = self.parse_expr()?;
505                self.expect(TokenKind::Comma)?;
506                let msg = self.parse_expr()?;
507                self.expect(TokenKind::RParen)?;
508                Ok(Expr::Fail_(Box::new(code), Box::new(msg)))
509            }
510            TokenKind::LBrace => {
511                self.advance();
512                if let Some(err) = self.detect_static_selector_error() {
513                    return Err(err);
514                }
515                self.parse_comprehension()
516            }
517            token => {
518                let pos = self.peek_pos();
519                Err(ParseError::Unexpected(token, pos))
520            }
521        }
522    }
523
524    fn parse_comprehension(&mut self) -> Result<Expr, ParseError> {
525        if *self.peek() == TokenKind::Yield {
526            self.advance();
527            let yield_expr = self.parse_expr()?;
528            self.expect(TokenKind::Bar)?;
529            let (binding, collection) = self.parse_generator_expr()?;
530            let pred = if *self.peek() == TokenKind::Bar {
531                self.advance();
532                Some(Box::new(self.parse_expr()?))
533            } else {
534                None
535            };
536            self.expect(TokenKind::RBrace)?;
537            return Ok(Expr::Comprehension {
538                yield_expr: Some(Box::new(yield_expr)),
539                binding,
540                collection: Box::new(collection),
541                pred,
542            });
543        }
544
545        let first_expr = self.parse_expr()?;
546        if let Some((binding, collection)) = Self::decompose_generator_expr(first_expr.clone()) {
547            let pred = if *self.peek() == TokenKind::Bar {
548                self.advance();
549                Some(Box::new(self.parse_expr()?))
550            } else {
551                None
552            };
553            self.expect(TokenKind::RBrace)?;
554            return Ok(Expr::Comprehension {
555                yield_expr: None,
556                binding,
557                collection: Box::new(collection),
558                pred,
559            });
560        }
561
562        if matches!(first_expr, Expr::BinOp(BinOpKind::In, _, _)) {
563            return Err(self.expected_generator_expr_error());
564        }
565
566        self.expect(TokenKind::Bar)?;
567        let (binding, collection) = self.parse_generator_expr()?;
568        let pred = if *self.peek() == TokenKind::Bar {
569            self.advance();
570            Some(Box::new(self.parse_expr()?))
571        } else {
572            None
573        };
574        self.expect(TokenKind::RBrace)?;
575        Ok(Expr::Comprehension {
576            yield_expr: Some(Box::new(first_expr)),
577            binding,
578            collection: Box::new(collection),
579            pred,
580        })
581    }
582
583    fn parse_generator_expr(&mut self) -> Result<(String, Expr), ParseError> {
584        let expr = self.parse_expr()?;
585        Self::decompose_generator_expr(expr).ok_or_else(|| self.expected_generator_expr_error())
586    }
587
588    fn decompose_generator_expr(expr: Expr) -> Option<(String, Expr)> {
589        match expr {
590            Expr::BinOp(BinOpKind::In, lhs, rhs) => match *lhs {
591                Expr::Ident(name) => Some((name, *rhs)),
592                _ => None,
593            },
594            _ => None,
595        }
596    }
597
598    fn parse_expr_list(&mut self, close: TokenKind) -> Result<Vec<Expr>, ParseError> {
599        let mut items = Vec::new();
600        if *self.peek() != close {
601            items.push(self.parse_expr()?);
602            while *self.peek() == TokenKind::Comma {
603                self.advance();
604                if *self.peek() == close {
605                    break;
606                }
607                items.push(self.parse_expr()?);
608            }
609        }
610        self.expect(close)?;
611        Ok(items)
612    }
613
614    fn parse_map_entry(&mut self) -> Result<(String, Expr), ParseError> {
615        let key = match self.peek().clone() {
616            TokenKind::Str(s) => {
617                self.advance();
618                s
619            }
620            _ => return Err(ParseError::InvalidMapKey),
621        };
622        self.expect(TokenKind::Arrow)?;
623        let value = self.parse_expr()?;
624        Ok((key, value))
625    }
626
627    fn parse_bagkv_entry(&mut self) -> Result<(String, Expr), ParseError> {
628        let key = match self.peek().clone() {
629            TokenKind::Str(s) => {
630                self.advance();
631                s
632            }
633            TokenKind::Ident(name) => {
634                self.advance();
635                name
636            }
637            _ => return Err(ParseError::InvalidBagkvKey),
638        };
639        self.expect(TokenKind::Arrow)?;
640        let value = self.parse_expr()?;
641        Ok((key, value))
642    }
643
644    fn parse_prod_field(&mut self) -> Result<(String, Expr), ParseError> {
645        let name = self.expect_ident()?;
646        self.expect(TokenKind::Colon)?;
647        let value = self.parse_expr()?;
648        Ok((name, value))
649    }
650
651    fn detect_static_selector_error(&self) -> Option<ParseError> {
652        let mut labels = Vec::new();
653        let mut idx = self.pos;
654        while idx < self.tokens.len() {
655            match &self.tokens[idx].kind {
656                TokenKind::Ident(name) => labels.push(name.clone()),
657                TokenKind::Str(s) => labels.push(s.clone()),
658                TokenKind::RBrace => break,
659                _ => return None,
660            }
661            idx += 1;
662        }
663
664        if idx >= self.tokens.len() || self.tokens[idx].kind != TokenKind::RBrace || labels.is_empty() {
665            return None;
666        }
667
668        let mut seen = std::collections::BTreeSet::new();
669        for label in labels {
670            if !seen.insert(label) {
671                return Some(ParseError::DuplicateLabelInSelector);
672            }
673        }
674
675        Some(ParseError::SelectorNotStatic)
676    }
677}
678
679pub fn parse(tokens: Vec<Token>) -> Result<Program, ParseError> {
680    let mut parser = Parser::new(tokens);
681    parser.parse_program()
682}
683
684fn parse_bytes_literal(src: &str) -> Result<Vec<u8>, String> {
685    if src.len() % 2 != 0 {
686        return Err("expected even-length base16 string".to_string());
687    }
688
689    let mut out = Vec::with_capacity(src.len() / 2);
690    let mut chars = src.chars();
691    while let (Some(hi), Some(lo)) = (chars.next(), chars.next()) {
692        let hi = hi
693            .to_digit(16)
694            .ok_or_else(|| "expected base16 digits only".to_string())?;
695        let lo = lo
696            .to_digit(16)
697            .ok_or_else(|| "expected base16 digits only".to_string())?;
698        out.push(((hi << 4) | lo) as u8);
699    }
700    Ok(out)
701}