Skip to main content

graphcal_compiler/syntax/parser/
mod.rs

1use std::sync::Arc;
2
3use miette::{Diagnostic, NamedSource, SourceSpan};
4use thiserror::Error;
5
6use crate::syntax::ast::{Expr, Ident, IdentPath};
7use crate::syntax::comments::SourceMetadata;
8use crate::syntax::lexer::Lexer;
9use crate::syntax::names::NameAtom;
10use crate::syntax::span::Span;
11use crate::syntax::token::Token;
12
13mod compound;
14mod decl;
15mod expr;
16mod table;
17mod type_expr;
18
19/// Rich parse error with miette diagnostics.
20#[derive(Debug, Clone, Error, Diagnostic)]
21pub enum ParseError {
22    #[error("unexpected token `{found}`")]
23    #[diagnostic(code(graphcal::P001), help("expected {expected}"))]
24    UnexpectedToken {
25        expected: String,
26        found: String,
27        #[source_code]
28        src: NamedSource<Arc<String>>,
29        #[label("here")]
30        span: SourceSpan,
31    },
32
33    #[error("unexpected end of file")]
34    #[diagnostic(code(graphcal::P002), help("expected {expected}"))]
35    UnexpectedEof {
36        expected: String,
37        #[source_code]
38        src: NamedSource<Arc<String>>,
39        #[label("here")]
40        span: SourceSpan,
41    },
42
43    #[error("invalid number literal")]
44    #[diagnostic(code(graphcal::P003))]
45    InvalidNumber {
46        reason: String,
47        #[source_code]
48        src: NamedSource<Arc<String>>,
49        #[label("{reason}")]
50        span: SourceSpan,
51    },
52
53    #[error("table row has {got} value(s), but the header has {expected} column(s)")]
54    #[diagnostic(code(graphcal::P004))]
55    TableRowLengthMismatch {
56        expected: usize,
57        got: usize,
58        #[source_code]
59        src: NamedSource<Arc<String>>,
60        #[label("this row has {got} value(s)")]
61        span: SourceSpan,
62    },
63
64    #[error("unknown domain constraint key `{key}`")]
65    #[diagnostic(
66        code(graphcal::P005),
67        help("valid domain constraint keys are `min` and `max`")
68    )]
69    InvalidDomainBoundKey {
70        key: String,
71        #[source_code]
72        src: NamedSource<Arc<String>>,
73        #[label("unknown key")]
74        span: SourceSpan,
75    },
76
77    #[error("stray character in source")]
78    #[diagnostic(
79        code(graphcal::P006),
80        help("remove or replace this character; it is not part of the graphcal grammar")
81    )]
82    UnknownToken {
83        #[source_code]
84        src: NamedSource<Arc<String>>,
85        #[label("stray character")]
86        span: SourceSpan,
87    },
88
89    #[error(
90        "multi-decl slot tuple has {tuple_count} entr{}, but the multi-decl declares {slot_count} slot{}",
91        if *tuple_count == 1 { "y" } else { "ies" },
92        if *slot_count == 1 { "" } else { "s" }
93    )]
94    #[diagnostic(
95        code(graphcal::P007),
96        help(
97            "the slot tuple in `table[..., (…)]` must contain exactly one entry per declared slot"
98        )
99    )]
100    MultiDeclTupleArity {
101        slot_count: usize,
102        tuple_count: usize,
103        #[source_code]
104        src: NamedSource<Arc<String>>,
105        #[label("slot tuple here")]
106        span: SourceSpan,
107    },
108
109    #[error(
110        "multi-decl header row has {header_count} cell{}, but the multi-decl declares {slot_count} slot{}",
111        if *header_count == 1 { "" } else { "s" },
112        if *slot_count == 1 { "" } else { "s" }
113    )]
114    #[diagnostic(
115        code(graphcal::P008),
116        help("the header row (`: _, _, …;`) must have exactly one cell per slot")
117    )]
118    MultiDeclHeaderArity {
119        slot_count: usize,
120        header_count: usize,
121        #[source_code]
122        src: NamedSource<Arc<String>>,
123        #[label("header row here")]
124        span: SourceSpan,
125    },
126
127    #[error(
128        "multi-decl row `{row_label}` has {got} value(s), but the multi-decl declares {slot_count} slot{}",
129        if *slot_count == 1 { "" } else { "s" }
130    )]
131    #[diagnostic(
132        code(graphcal::P009),
133        help("each row must have exactly one value per slot")
134    )]
135    MultiDeclRowArity {
136        slot_count: usize,
137        got: usize,
138        row_label: String,
139        #[source_code]
140        src: NamedSource<Arc<String>>,
141        #[label("this row has {got} value(s)")]
142        span: SourceSpan,
143    },
144
145    #[error("multi-decl requires at least two slots")]
146    #[diagnostic(
147        code(graphcal::P010),
148        help(
149            "for a single declaration, use the regular `param`/`node`/`const node` form without a trailing comma"
150        )
151    )]
152    MultiDeclSingleSlot {
153        #[source_code]
154        src: NamedSource<Arc<String>>,
155        #[label("single slot here")]
156        span: SourceSpan,
157    },
158
159    #[error("multi-decl requires at least one shared axis")]
160    #[diagnostic(
161        code(graphcal::P011),
162        help("declare the row axis in `table[SharedAxis, (…)]`")
163    )]
164    MultiDeclNoSharedAxis {
165        #[source_code]
166        src: NamedSource<Arc<String>>,
167        #[label("missing shared axis")]
168        span: SourceSpan,
169    },
170
171    #[error("{reason}")]
172    #[diagnostic(
173        code(graphcal::P012),
174        help(
175            "this multi-decl shape is scheduled for a later version; see issue #481 for the incremental plan"
176        )
177    )]
178    MultiDeclUnsupportedShape {
179        reason: String,
180        #[source_code]
181        src: NamedSource<Arc<String>>,
182        #[label("here")]
183        span: SourceSpan,
184    },
185
186    #[error("inline DAG call requires `.<out>` projection")]
187    #[diagnostic(
188        code(graphcal::P014),
189        help(
190            "add `.<output_name>` after the call; an instantiated DAG without a projection is not a node"
191        )
192    )]
193    InlineDagCallMissingProjection {
194        #[source_code]
195        src: NamedSource<Arc<String>>,
196        #[label("expected `.<out>` projection here")]
197        span: SourceSpan,
198    },
199
200    #[error("expression nesting is too deep")]
201    #[diagnostic(
202        code(graphcal::P015),
203        help("the parser limits nesting to {MAX_NESTING_DEPTH} levels; simplify the expression")
204    )]
205    TooDeeplyNested {
206        #[source_code]
207        src: NamedSource<Arc<String>>,
208        #[label("nesting exceeds the limit here")]
209        span: SourceSpan,
210    },
211
212    #[error("unit reference path is too deep")]
213    #[diagnostic(
214        code(graphcal::P017),
215        help(
216            "unit references are at most `alias.unit` — a bare name for local, selectively imported, or prelude units, or one module-alias qualifier for module-imported units"
217        )
218    )]
219    UnitReferenceTooDeep {
220        #[source_code]
221        src: NamedSource<Arc<String>>,
222        #[label("at most one `alias.` qualifier is allowed here")]
223        span: SourceSpan,
224    },
225
226    #[error("`^0` exponent has no effect")]
227    #[diagnostic(
228        code(graphcal::P016),
229        help(
230            "a zero power erases its term; remove the term (or the exponent) instead of raising to zero"
231        )
232    )]
233    ZeroExponent {
234        #[source_code]
235        src: NamedSource<Arc<String>>,
236        #[label("exponent must be a non-zero integer")]
237        span: SourceSpan,
238    },
239
240    #[error("duplicate `{field}` in {context}")]
241    #[diagnostic(
242        code(graphcal::P018),
243        help("each field may appear at most once; remove or rename the duplicate")
244    )]
245    DuplicatePlotField {
246        field: String,
247        context: String,
248        #[source_code]
249        src: NamedSource<Arc<String>>,
250        #[label("duplicate field here")]
251        span: SourceSpan,
252    },
253
254    #[error("plot declaration has no encoding channels")]
255    #[diagnostic(
256        code(graphcal::P019),
257        help(
258            "add an `encode:` block with at least one channel, e.g. `encode: {{ x: ..., y: ... }}`"
259        )
260    )]
261    MissingPlotEncoding {
262        #[source_code]
263        src: NamedSource<Arc<String>>,
264        #[label("this plot has an empty or missing `encode:` block")]
265        span: SourceSpan,
266    },
267
268    #[error("{kind} declaration has no plots")]
269    #[diagnostic(
270        code(graphcal::P020),
271        help("add a non-empty `plots:` list, e.g. `plots: [my_plot]`")
272    )]
273    EmptyCompositionPlots {
274        kind: &'static str,
275        #[source_code]
276        src: NamedSource<Arc<String>>,
277        #[label("this {kind} has an empty or missing `plots:` list")]
278        span: SourceSpan,
279    },
280}
281
282/// Maximum nesting depth for recursive grammar productions (expressions,
283/// unary chains, type expressions).
284///
285/// The recursive-descent parser consumes one or more stack frames per
286/// nesting level; without a bound, pathological input like 100k nested
287/// parentheses overflows the stack and aborts the process (including the
288/// LSP server). The limit is far above any realistic engineering program —
289/// note that left-nested operator *chains* (`1.0 + 1.0 + …`) are parsed
290/// iteratively and are not limited by this bound.
291pub const MAX_NESTING_DEPTH: usize = 256;
292
293impl ParseError {
294    /// Return the `NamedSource` embedded in this error.
295    ///
296    /// Every variant carries the file's name and full source text via miette's
297    /// `#[source_code]` field. Exposing it as a typed accessor lets diagnostic
298    /// emitters pair the error's offsets with the exact source they index into
299    /// — instead of inferring (name, source) from external context, which can
300    /// silently desynchronize when an imported file is the origin.
301    #[must_use]
302    pub const fn named_source(&self) -> &NamedSource<Arc<String>> {
303        match self {
304            Self::UnexpectedToken { src, .. }
305            | Self::UnexpectedEof { src, .. }
306            | Self::InvalidNumber { src, .. }
307            | Self::TableRowLengthMismatch { src, .. }
308            | Self::InvalidDomainBoundKey { src, .. }
309            | Self::UnknownToken { src, .. }
310            | Self::MultiDeclTupleArity { src, .. }
311            | Self::MultiDeclHeaderArity { src, .. }
312            | Self::MultiDeclRowArity { src, .. }
313            | Self::MultiDeclSingleSlot { src, .. }
314            | Self::MultiDeclNoSharedAxis { src, .. }
315            | Self::MultiDeclUnsupportedShape { src, .. }
316            | Self::InlineDagCallMissingProjection { src, .. }
317            | Self::TooDeeplyNested { src, .. }
318            | Self::ZeroExponent { src, .. }
319            | Self::UnitReferenceTooDeep { src, .. }
320            | Self::DuplicatePlotField { src, .. }
321            | Self::MissingPlotEncoding { src, .. }
322            | Self::EmptyCompositionPlots { src, .. } => src,
323        }
324    }
325}
326
327pub struct Parser<'src> {
328    pub(super) lexer: Lexer<'src>,
329    pub(super) source: Arc<String>,
330    pub(super) source_name: String,
331    /// Current nesting depth of recursive grammar productions; bounded by
332    /// [`MAX_NESTING_DEPTH`] via [`Self::with_depth`].
333    depth: usize,
334}
335
336impl<'src> Parser<'src> {
337    #[must_use]
338    pub fn new(source: &'src str) -> Self {
339        Self {
340            lexer: Lexer::new(source),
341            source: Arc::new(source.to_string()),
342            source_name: "input".to_string(),
343            depth: 0,
344        }
345    }
346
347    #[must_use]
348    pub fn with_name(source: &'src str, name: &str) -> Self {
349        Self {
350            lexer: Lexer::new(source),
351            source: Arc::new(source.to_string()),
352            source_name: name.to_string(),
353            depth: 0,
354        }
355    }
356
357    /// Run `f` one nesting level deeper, erroring out once the depth budget
358    /// is exhausted instead of overflowing the stack.
359    ///
360    /// Within the budget, the stack is grown on demand (`stacker`): the
361    /// recursive-descent frames for [`MAX_NESTING_DEPTH`] levels exceed the
362    /// default stack of secondary threads (tests, LSP workers) in debug
363    /// builds, so the bound alone would not prevent an abort.
364    pub(super) fn with_depth<T>(
365        &mut self,
366        f: impl FnOnce(&mut Self) -> Result<T, ParseError>,
367    ) -> Result<T, ParseError> {
368        if self.depth >= MAX_NESTING_DEPTH {
369            let span = self.lexer.peek_with_span().map(|(_, span)| span);
370            return Err(ParseError::TooDeeplyNested {
371                src: self.named_source(),
372                span: span
373                    .unwrap_or_else(|| Span::new(self.lexer.source_len(), 0))
374                    .into(),
375            });
376        }
377        self.depth += 1;
378        let result = crate::stack::with_stack_growth(|| f(self));
379        self.depth -= 1;
380        result
381    }
382
383    #[must_use]
384    pub fn into_source_metadata(self) -> SourceMetadata {
385        self.lexer.into_source_metadata()
386    }
387
388    pub(super) fn named_source(&self) -> NamedSource<Arc<String>> {
389        crate::syntax::named_source(&self.source_name, Arc::clone(&self.source))
390    }
391
392    pub(super) fn unexpected_token(&self, expected: &str, found: &str, span: Span) -> ParseError {
393        ParseError::UnexpectedToken {
394            expected: expected.to_string(),
395            found: found.to_string(),
396            src: self.named_source(),
397            span: span.into(),
398        }
399    }
400
401    /// Build a duplicate-field error for plot/figure/layer block parsing.
402    pub(super) fn duplicate_plot_field(
403        &self,
404        field: &str,
405        context: &str,
406        span: Span,
407    ) -> ParseError {
408        ParseError::DuplicatePlotField {
409            field: field.to_string(),
410            context: context.to_string(),
411            src: self.named_source(),
412            span: span.into(),
413        }
414    }
415
416    pub(super) fn unexpected_eof(&self, expected: &str) -> ParseError {
417        ParseError::UnexpectedEof {
418            expected: expected.to_string(),
419            src: self.named_source(),
420            span: Span::new(self.lexer.source_len(), 0).into(),
421        }
422    }
423
424    /// Consume any remaining tokens and, if the lexer encountered an unrecognized
425    /// character at any point, replace `result` with a `ParseError::UnknownToken`
426    /// pointing at the first such span.
427    ///
428    /// A stray character is a root-cause lex-level failure; it should eclipse any
429    /// downstream parse error that was caused by the character having been
430    /// silently skipped.
431    fn finalize<T>(&mut self, result: Result<T, ParseError>) -> Result<T, ParseError> {
432        while self.lexer.peek().is_some() {
433            self.lexer.next_token();
434        }
435        if let Some(span) = self.lexer.first_error_span() {
436            return Err(ParseError::UnknownToken {
437                src: self.named_source(),
438                span: span.into(),
439            });
440        }
441        result
442    }
443
444    /// Consume the next token, returning an error if the lexer is exhausted.
445    ///
446    /// Use this after `peek()` has confirmed `Some`.
447    pub(super) fn advance(&mut self) -> Result<(Token, Span), ParseError> {
448        self.lexer
449            .next_token()
450            .ok_or_else(|| self.unexpected_eof("token"))
451    }
452
453    /// Parse a finite `f64` literal from already-normalized token text.
454    pub(super) fn parse_finite_f64_literal(
455        &self,
456        text: &str,
457        span: Span,
458    ) -> Result<f64, ParseError> {
459        let value: f64 =
460            text.parse()
461                .map_err(|e: std::num::ParseFloatError| ParseError::InvalidNumber {
462                    reason: e.to_string(),
463                    src: self.named_source(),
464                    span: span.into(),
465                })?;
466        if value.is_finite() {
467            Ok(value)
468        } else {
469            Err(ParseError::InvalidNumber {
470                reason: "floating-point literal must be finite".to_string(),
471                src: self.named_source(),
472                span: span.into(),
473            })
474        }
475    }
476
477    /// Parse a single expression from the source string.
478    ///
479    /// Expects the entire input to be consumed; returns an error if there
480    /// are trailing tokens after the expression.
481    ///
482    /// # Errors
483    ///
484    /// Returns a [`ParseError`] if the source is not a valid expression
485    /// or if there are unexpected trailing tokens.
486    pub fn parse_single_expr(&mut self) -> Result<Expr, ParseError> {
487        let result = self.parse_single_expr_inner();
488        self.finalize(result)
489    }
490
491    fn parse_single_expr_inner(&mut self) -> Result<Expr, ParseError> {
492        let expr = self.parse_expr()?;
493        if let Some((tok, span)) = self.lexer.peek_with_span() {
494            let tok = *tok;
495            return Err(self.unexpected_token("end of input", &tok.to_string(), span));
496        }
497        Ok(expr)
498    }
499
500    /// Parse a standalone unit expression (e.g., `m/s^2`, `kg * m / s^2`).
501    ///
502    /// Expects the entire input to be consumed; returns an error if there
503    /// are trailing tokens after the unit expression.
504    ///
505    /// # Errors
506    ///
507    /// Returns a [`ParseError`] if the source is not a valid unit expression.
508    pub fn parse_standalone_unit_expr(
509        &mut self,
510    ) -> Result<crate::syntax::ast::UnitExpr, ParseError> {
511        let result = self.parse_standalone_unit_expr_inner();
512        self.finalize(result)
513    }
514
515    fn parse_standalone_unit_expr_inner(
516        &mut self,
517    ) -> Result<crate::syntax::ast::UnitExpr, ParseError> {
518        let expr = self.parse_unit_expr()?;
519        if let Some((tok, span)) = self.lexer.peek_with_span() {
520            let tok = *tok;
521            return Err(self.unexpected_token("end of input", &tok.to_string(), span));
522        }
523        Ok(expr)
524    }
525
526    /// Parse a standalone dimension expression (e.g., `Length / Time`).
527    ///
528    /// Expects the entire input to be consumed; returns an error if there
529    /// are trailing tokens after the dimension expression.
530    ///
531    /// # Errors
532    ///
533    /// Returns a [`ParseError`] if the source is not a valid dimension expression.
534    pub fn parse_standalone_dim_expr(&mut self) -> Result<crate::syntax::ast::DimExpr, ParseError> {
535        let result = self.parse_standalone_dim_expr_inner();
536        self.finalize(result)
537    }
538
539    fn parse_standalone_dim_expr_inner(
540        &mut self,
541    ) -> Result<crate::syntax::ast::DimExpr, ParseError> {
542        let expr = self.parse_dim_expr()?;
543        if let Some((tok, span)) = self.lexer.peek_with_span() {
544            let tok = *tok;
545            return Err(self.unexpected_token("end of input", &tok.to_string(), span));
546        }
547        Ok(expr)
548    }
549
550    /// Parse the full source file into a [`File`](crate::syntax::ast::File) AST node.
551    ///
552    /// # Errors
553    ///
554    /// Returns a [`ParseError`] if the source contains invalid syntax.
555    pub fn parse_file(&mut self) -> Result<crate::syntax::ast::File, ParseError> {
556        let result = self.parse_file_inner();
557        self.finalize(result)
558    }
559
560    fn parse_file_inner(&mut self) -> Result<crate::syntax::ast::File, ParseError> {
561        let mut declarations = Vec::new();
562        while self.lexer.peek().is_some() {
563            declarations.push(self.parse_declaration()?);
564        }
565        Ok(crate::syntax::ast::File { declarations })
566    }
567
568    // --- Helper methods ---
569
570    pub(super) fn expect(&mut self, expected: Token) -> Result<(Token, Span), ParseError> {
571        let expected_str = format!("`{expected}`");
572        match self.lexer.next_token() {
573            Some((tok, span)) if tok == expected => Ok((tok, span)),
574            Some((tok, span)) => Err(self.unexpected_token(&expected_str, &tok.to_string(), span)),
575            None => Err(self.unexpected_eof(&expected_str)),
576        }
577    }
578
579    /// Parse a comma-separated list of items until `end_token` is peeked.
580    ///
581    /// Supports trailing commas. Does **not** consume the `end_token`.
582    pub(super) fn parse_comma_separated<T>(
583        &mut self,
584        end_token: Token,
585        mut parse_item: impl FnMut(&mut Self) -> Result<T, ParseError>,
586    ) -> Result<Vec<T>, ParseError> {
587        let mut items = Vec::new();
588        loop {
589            if self.lexer.peek() == Some(&end_token) {
590                break;
591            }
592            items.push(parse_item(self)?);
593            if self.lexer.peek() == Some(&Token::Comma) {
594                self.lexer.next_token();
595            } else {
596                break;
597            }
598        }
599        Ok(items)
600    }
601
602    /// Parse any identifier regardless of casing.
603    pub(super) fn parse_any_ident(&mut self) -> Result<Ident, ParseError> {
604        match self.lexer.next_token() {
605            Some((Token::Ident, span)) => Ok(Ident {
606                name: NameAtom::new_unchecked_for_parser(self.lexer.slice_at(span).to_string()),
607                span,
608            }),
609            Some((tok, span)) => Err(self.unexpected_token("identifier", &tok.to_string(), span)),
610            None => Err(self.unexpected_eof("identifier")),
611        }
612    }
613
614    /// Parse a non-empty dot-separated identifier path.
615    pub(super) fn parse_ident_path(&mut self) -> Result<IdentPath, ParseError> {
616        let first = self.parse_any_ident()?;
617        let mut rest = Vec::new();
618        while self.lexer.peek() == Some(&Token::Dot)
619            && self.lexer.peek_second() == Some(&Token::Ident)
620        {
621            self.lexer.next_token(); // consume `.`
622            rest.push(self.parse_any_ident()?);
623        }
624        Ok(IdentPath::new(crate::syntax::non_empty::NonEmpty::new(
625            first, rest,
626        )))
627    }
628}
629
630#[cfg(test)]
631mod tests {
632    use crate::syntax::parser::{ParseError, Parser};
633
634    #[test]
635    fn stray_character_in_source_surfaces_as_unknown_token() {
636        let input = "param x = 1.0; §";
637        let mut parser = Parser::new(input);
638        let err = parser.parse_file().expect_err("expected parse error");
639        match err {
640            ParseError::UnknownToken { span, .. } => {
641                let byte_start: usize = span.offset();
642                let byte_end = byte_start + span.len();
643                assert_eq!(&input[byte_start..byte_end], "§");
644            }
645            other => panic!("expected UnknownToken, got {other:?}"),
646        }
647    }
648
649    #[test]
650    fn stray_character_preempts_other_parse_errors() {
651        // Even when the parse would otherwise fail with UnexpectedToken on the
652        // trailing `+`, the stray `§` earlier in the source is the root cause
653        // and should be reported.
654        let input = "param x = §1.0 +";
655        let mut parser = Parser::new(input);
656        let err = parser.parse_file().expect_err("expected parse error");
657        assert!(
658            matches!(err, ParseError::UnknownToken { .. }),
659            "expected UnknownToken, got {err:?}"
660        );
661    }
662}