1use std::sync::Arc;
2
3use miette::{Diagnostic, NamedSource, SourceSpan};
4use thiserror::Error;
5
6use crate::syntax::ast::{Expr, Ident, IdentPath};
7use crate::syntax::comments::SourceMetadata;
8use crate::syntax::lexer::Lexer;
9use crate::syntax::names::NameAtom;
10use crate::syntax::span::Span;
11use crate::syntax::token::Token;
12
13mod compound;
14mod decl;
15mod expr;
16mod table;
17mod type_expr;
18
19#[derive(Debug, Clone, Error, Diagnostic)]
21pub enum ParseError {
22 #[error("unexpected token `{found}`")]
23 #[diagnostic(code(graphcal::P001), help("expected {expected}"))]
24 UnexpectedToken {
25 expected: String,
26 found: String,
27 #[source_code]
28 src: NamedSource<Arc<String>>,
29 #[label("here")]
30 span: SourceSpan,
31 },
32
33 #[error("unexpected end of file")]
34 #[diagnostic(code(graphcal::P002), help("expected {expected}"))]
35 UnexpectedEof {
36 expected: String,
37 #[source_code]
38 src: NamedSource<Arc<String>>,
39 #[label("here")]
40 span: SourceSpan,
41 },
42
43 #[error("invalid number literal")]
44 #[diagnostic(code(graphcal::P003))]
45 InvalidNumber {
46 reason: String,
47 #[source_code]
48 src: NamedSource<Arc<String>>,
49 #[label("{reason}")]
50 span: SourceSpan,
51 },
52
53 #[error("table row has {got} value(s), but the header has {expected} column(s)")]
54 #[diagnostic(code(graphcal::P004))]
55 TableRowLengthMismatch {
56 expected: usize,
57 got: usize,
58 #[source_code]
59 src: NamedSource<Arc<String>>,
60 #[label("this row has {got} value(s)")]
61 span: SourceSpan,
62 },
63
64 #[error("unknown domain constraint key `{key}`")]
65 #[diagnostic(
66 code(graphcal::P005),
67 help("valid domain constraint keys are `min` and `max`")
68 )]
69 InvalidDomainBoundKey {
70 key: String,
71 #[source_code]
72 src: NamedSource<Arc<String>>,
73 #[label("unknown key")]
74 span: SourceSpan,
75 },
76
77 #[error("stray character in source")]
78 #[diagnostic(
79 code(graphcal::P006),
80 help("remove or replace this character; it is not part of the graphcal grammar")
81 )]
82 UnknownToken {
83 #[source_code]
84 src: NamedSource<Arc<String>>,
85 #[label("stray character")]
86 span: SourceSpan,
87 },
88
89 #[error(
90 "multi-decl slot tuple has {tuple_count} entr{}, but the multi-decl declares {slot_count} slot{}",
91 if *tuple_count == 1 { "y" } else { "ies" },
92 if *slot_count == 1 { "" } else { "s" }
93 )]
94 #[diagnostic(
95 code(graphcal::P007),
96 help(
97 "the slot tuple in `table[..., (…)]` must contain exactly one entry per declared slot"
98 )
99 )]
100 MultiDeclTupleArity {
101 slot_count: usize,
102 tuple_count: usize,
103 #[source_code]
104 src: NamedSource<Arc<String>>,
105 #[label("slot tuple here")]
106 span: SourceSpan,
107 },
108
109 #[error(
110 "multi-decl header row has {header_count} cell{}, but the multi-decl declares {slot_count} slot{}",
111 if *header_count == 1 { "" } else { "s" },
112 if *slot_count == 1 { "" } else { "s" }
113 )]
114 #[diagnostic(
115 code(graphcal::P008),
116 help("the header row (`: _, _, …;`) must have exactly one cell per slot")
117 )]
118 MultiDeclHeaderArity {
119 slot_count: usize,
120 header_count: usize,
121 #[source_code]
122 src: NamedSource<Arc<String>>,
123 #[label("header row here")]
124 span: SourceSpan,
125 },
126
127 #[error(
128 "multi-decl row `{row_label}` has {got} value(s), but the multi-decl declares {slot_count} slot{}",
129 if *slot_count == 1 { "" } else { "s" }
130 )]
131 #[diagnostic(
132 code(graphcal::P009),
133 help("each row must have exactly one value per slot")
134 )]
135 MultiDeclRowArity {
136 slot_count: usize,
137 got: usize,
138 row_label: String,
139 #[source_code]
140 src: NamedSource<Arc<String>>,
141 #[label("this row has {got} value(s)")]
142 span: SourceSpan,
143 },
144
145 #[error("multi-decl requires at least two slots")]
146 #[diagnostic(
147 code(graphcal::P010),
148 help(
149 "for a single declaration, use the regular `param`/`node`/`const node` form without a trailing comma"
150 )
151 )]
152 MultiDeclSingleSlot {
153 #[source_code]
154 src: NamedSource<Arc<String>>,
155 #[label("single slot here")]
156 span: SourceSpan,
157 },
158
159 #[error("multi-decl requires at least one shared axis")]
160 #[diagnostic(
161 code(graphcal::P011),
162 help("declare the row axis in `table[SharedAxis, (…)]`")
163 )]
164 MultiDeclNoSharedAxis {
165 #[source_code]
166 src: NamedSource<Arc<String>>,
167 #[label("missing shared axis")]
168 span: SourceSpan,
169 },
170
171 #[error("{reason}")]
172 #[diagnostic(
173 code(graphcal::P012),
174 help(
175 "this multi-decl shape is scheduled for a later version; see issue #481 for the incremental plan"
176 )
177 )]
178 MultiDeclUnsupportedShape {
179 reason: String,
180 #[source_code]
181 src: NamedSource<Arc<String>>,
182 #[label("here")]
183 span: SourceSpan,
184 },
185
186 #[error("inline DAG call requires `.<out>` projection")]
187 #[diagnostic(
188 code(graphcal::P014),
189 help(
190 "add `.<output_name>` after the call; an instantiated DAG without a projection is not a node"
191 )
192 )]
193 InlineDagCallMissingProjection {
194 #[source_code]
195 src: NamedSource<Arc<String>>,
196 #[label("expected `.<out>` projection here")]
197 span: SourceSpan,
198 },
199
200 #[error("expression nesting is too deep")]
201 #[diagnostic(
202 code(graphcal::P015),
203 help("the parser limits nesting to {MAX_NESTING_DEPTH} levels; simplify the expression")
204 )]
205 TooDeeplyNested {
206 #[source_code]
207 src: NamedSource<Arc<String>>,
208 #[label("nesting exceeds the limit here")]
209 span: SourceSpan,
210 },
211
212 #[error("unit reference path is too deep")]
213 #[diagnostic(
214 code(graphcal::P017),
215 help(
216 "unit references are at most `alias.unit` — a bare name for local, selectively imported, or prelude units, or one module-alias qualifier for module-imported units"
217 )
218 )]
219 UnitReferenceTooDeep {
220 #[source_code]
221 src: NamedSource<Arc<String>>,
222 #[label("at most one `alias.` qualifier is allowed here")]
223 span: SourceSpan,
224 },
225
226 #[error("`^0` exponent has no effect")]
227 #[diagnostic(
228 code(graphcal::P016),
229 help(
230 "a zero power erases its term; remove the term (or the exponent) instead of raising to zero"
231 )
232 )]
233 ZeroExponent {
234 #[source_code]
235 src: NamedSource<Arc<String>>,
236 #[label("exponent must be a non-zero integer")]
237 span: SourceSpan,
238 },
239
240 #[error("duplicate `{field}` in {context}")]
241 #[diagnostic(
242 code(graphcal::P018),
243 help("each field may appear at most once; remove or rename the duplicate")
244 )]
245 DuplicatePlotField {
246 field: String,
247 context: String,
248 #[source_code]
249 src: NamedSource<Arc<String>>,
250 #[label("duplicate field here")]
251 span: SourceSpan,
252 },
253
254 #[error("plot declaration has no encoding channels")]
255 #[diagnostic(
256 code(graphcal::P019),
257 help(
258 "add an `encode:` block with at least one channel, e.g. `encode: {{ x: ..., y: ... }}`"
259 )
260 )]
261 MissingPlotEncoding {
262 #[source_code]
263 src: NamedSource<Arc<String>>,
264 #[label("this plot has an empty or missing `encode:` block")]
265 span: SourceSpan,
266 },
267
268 #[error("{kind} declaration has no plots")]
269 #[diagnostic(
270 code(graphcal::P020),
271 help("add a non-empty `plots:` list, e.g. `plots: [my_plot]`")
272 )]
273 EmptyCompositionPlots {
274 kind: &'static str,
275 #[source_code]
276 src: NamedSource<Arc<String>>,
277 #[label("this {kind} has an empty or missing `plots:` list")]
278 span: SourceSpan,
279 },
280}
281
282pub const MAX_NESTING_DEPTH: usize = 256;
292
293impl ParseError {
294 #[must_use]
302 pub const fn named_source(&self) -> &NamedSource<Arc<String>> {
303 match self {
304 Self::UnexpectedToken { src, .. }
305 | Self::UnexpectedEof { src, .. }
306 | Self::InvalidNumber { src, .. }
307 | Self::TableRowLengthMismatch { src, .. }
308 | Self::InvalidDomainBoundKey { src, .. }
309 | Self::UnknownToken { src, .. }
310 | Self::MultiDeclTupleArity { src, .. }
311 | Self::MultiDeclHeaderArity { src, .. }
312 | Self::MultiDeclRowArity { src, .. }
313 | Self::MultiDeclSingleSlot { src, .. }
314 | Self::MultiDeclNoSharedAxis { src, .. }
315 | Self::MultiDeclUnsupportedShape { src, .. }
316 | Self::InlineDagCallMissingProjection { src, .. }
317 | Self::TooDeeplyNested { src, .. }
318 | Self::ZeroExponent { src, .. }
319 | Self::UnitReferenceTooDeep { src, .. }
320 | Self::DuplicatePlotField { src, .. }
321 | Self::MissingPlotEncoding { src, .. }
322 | Self::EmptyCompositionPlots { src, .. } => src,
323 }
324 }
325}
326
327pub struct Parser<'src> {
328 pub(super) lexer: Lexer<'src>,
329 pub(super) source: Arc<String>,
330 pub(super) source_name: String,
331 depth: usize,
334}
335
336impl<'src> Parser<'src> {
337 #[must_use]
338 pub fn new(source: &'src str) -> Self {
339 Self {
340 lexer: Lexer::new(source),
341 source: Arc::new(source.to_string()),
342 source_name: "input".to_string(),
343 depth: 0,
344 }
345 }
346
347 #[must_use]
348 pub fn with_name(source: &'src str, name: &str) -> Self {
349 Self {
350 lexer: Lexer::new(source),
351 source: Arc::new(source.to_string()),
352 source_name: name.to_string(),
353 depth: 0,
354 }
355 }
356
357 pub(super) fn with_depth<T>(
365 &mut self,
366 f: impl FnOnce(&mut Self) -> Result<T, ParseError>,
367 ) -> Result<T, ParseError> {
368 if self.depth >= MAX_NESTING_DEPTH {
369 let span = self.lexer.peek_with_span().map(|(_, span)| span);
370 return Err(ParseError::TooDeeplyNested {
371 src: self.named_source(),
372 span: span
373 .unwrap_or_else(|| Span::new(self.lexer.source_len(), 0))
374 .into(),
375 });
376 }
377 self.depth += 1;
378 let result = crate::stack::with_stack_growth(|| f(self));
379 self.depth -= 1;
380 result
381 }
382
383 #[must_use]
384 pub fn into_source_metadata(self) -> SourceMetadata {
385 self.lexer.into_source_metadata()
386 }
387
388 pub(super) fn named_source(&self) -> NamedSource<Arc<String>> {
389 crate::syntax::named_source(&self.source_name, Arc::clone(&self.source))
390 }
391
392 pub(super) fn unexpected_token(&self, expected: &str, found: &str, span: Span) -> ParseError {
393 ParseError::UnexpectedToken {
394 expected: expected.to_string(),
395 found: found.to_string(),
396 src: self.named_source(),
397 span: span.into(),
398 }
399 }
400
401 pub(super) fn duplicate_plot_field(
403 &self,
404 field: &str,
405 context: &str,
406 span: Span,
407 ) -> ParseError {
408 ParseError::DuplicatePlotField {
409 field: field.to_string(),
410 context: context.to_string(),
411 src: self.named_source(),
412 span: span.into(),
413 }
414 }
415
416 pub(super) fn unexpected_eof(&self, expected: &str) -> ParseError {
417 ParseError::UnexpectedEof {
418 expected: expected.to_string(),
419 src: self.named_source(),
420 span: Span::new(self.lexer.source_len(), 0).into(),
421 }
422 }
423
424 fn finalize<T>(&mut self, result: Result<T, ParseError>) -> Result<T, ParseError> {
432 while self.lexer.peek().is_some() {
433 self.lexer.next_token();
434 }
435 if let Some(span) = self.lexer.first_error_span() {
436 return Err(ParseError::UnknownToken {
437 src: self.named_source(),
438 span: span.into(),
439 });
440 }
441 result
442 }
443
444 pub(super) fn advance(&mut self) -> Result<(Token, Span), ParseError> {
448 self.lexer
449 .next_token()
450 .ok_or_else(|| self.unexpected_eof("token"))
451 }
452
453 pub(super) fn parse_finite_f64_literal(
455 &self,
456 text: &str,
457 span: Span,
458 ) -> Result<f64, ParseError> {
459 let value: f64 =
460 text.parse()
461 .map_err(|e: std::num::ParseFloatError| ParseError::InvalidNumber {
462 reason: e.to_string(),
463 src: self.named_source(),
464 span: span.into(),
465 })?;
466 if value.is_finite() {
467 Ok(value)
468 } else {
469 Err(ParseError::InvalidNumber {
470 reason: "floating-point literal must be finite".to_string(),
471 src: self.named_source(),
472 span: span.into(),
473 })
474 }
475 }
476
477 pub fn parse_single_expr(&mut self) -> Result<Expr, ParseError> {
487 let result = self.parse_single_expr_inner();
488 self.finalize(result)
489 }
490
491 fn parse_single_expr_inner(&mut self) -> Result<Expr, ParseError> {
492 let expr = self.parse_expr()?;
493 if let Some((tok, span)) = self.lexer.peek_with_span() {
494 let tok = *tok;
495 return Err(self.unexpected_token("end of input", &tok.to_string(), span));
496 }
497 Ok(expr)
498 }
499
500 pub fn parse_standalone_unit_expr(
509 &mut self,
510 ) -> Result<crate::syntax::ast::UnitExpr, ParseError> {
511 let result = self.parse_standalone_unit_expr_inner();
512 self.finalize(result)
513 }
514
515 fn parse_standalone_unit_expr_inner(
516 &mut self,
517 ) -> Result<crate::syntax::ast::UnitExpr, ParseError> {
518 let expr = self.parse_unit_expr()?;
519 if let Some((tok, span)) = self.lexer.peek_with_span() {
520 let tok = *tok;
521 return Err(self.unexpected_token("end of input", &tok.to_string(), span));
522 }
523 Ok(expr)
524 }
525
526 pub fn parse_standalone_dim_expr(&mut self) -> Result<crate::syntax::ast::DimExpr, ParseError> {
535 let result = self.parse_standalone_dim_expr_inner();
536 self.finalize(result)
537 }
538
539 fn parse_standalone_dim_expr_inner(
540 &mut self,
541 ) -> Result<crate::syntax::ast::DimExpr, ParseError> {
542 let expr = self.parse_dim_expr()?;
543 if let Some((tok, span)) = self.lexer.peek_with_span() {
544 let tok = *tok;
545 return Err(self.unexpected_token("end of input", &tok.to_string(), span));
546 }
547 Ok(expr)
548 }
549
550 pub fn parse_file(&mut self) -> Result<crate::syntax::ast::File, ParseError> {
556 let result = self.parse_file_inner();
557 self.finalize(result)
558 }
559
560 fn parse_file_inner(&mut self) -> Result<crate::syntax::ast::File, ParseError> {
561 let mut declarations = Vec::new();
562 while self.lexer.peek().is_some() {
563 declarations.push(self.parse_declaration()?);
564 }
565 Ok(crate::syntax::ast::File { declarations })
566 }
567
568 pub(super) fn expect(&mut self, expected: Token) -> Result<(Token, Span), ParseError> {
571 let expected_str = format!("`{expected}`");
572 match self.lexer.next_token() {
573 Some((tok, span)) if tok == expected => Ok((tok, span)),
574 Some((tok, span)) => Err(self.unexpected_token(&expected_str, &tok.to_string(), span)),
575 None => Err(self.unexpected_eof(&expected_str)),
576 }
577 }
578
579 pub(super) fn parse_comma_separated<T>(
583 &mut self,
584 end_token: Token,
585 mut parse_item: impl FnMut(&mut Self) -> Result<T, ParseError>,
586 ) -> Result<Vec<T>, ParseError> {
587 let mut items = Vec::new();
588 loop {
589 if self.lexer.peek() == Some(&end_token) {
590 break;
591 }
592 items.push(parse_item(self)?);
593 if self.lexer.peek() == Some(&Token::Comma) {
594 self.lexer.next_token();
595 } else {
596 break;
597 }
598 }
599 Ok(items)
600 }
601
602 pub(super) fn parse_any_ident(&mut self) -> Result<Ident, ParseError> {
604 match self.lexer.next_token() {
605 Some((Token::Ident, span)) => Ok(Ident {
606 name: NameAtom::new_unchecked_for_parser(self.lexer.slice_at(span).to_string()),
607 span,
608 }),
609 Some((tok, span)) => Err(self.unexpected_token("identifier", &tok.to_string(), span)),
610 None => Err(self.unexpected_eof("identifier")),
611 }
612 }
613
614 pub(super) fn parse_ident_path(&mut self) -> Result<IdentPath, ParseError> {
616 let first = self.parse_any_ident()?;
617 let mut rest = Vec::new();
618 while self.lexer.peek() == Some(&Token::Dot)
619 && self.lexer.peek_second() == Some(&Token::Ident)
620 {
621 self.lexer.next_token(); rest.push(self.parse_any_ident()?);
623 }
624 Ok(IdentPath::new(crate::syntax::non_empty::NonEmpty::new(
625 first, rest,
626 )))
627 }
628}
629
630#[cfg(test)]
631mod tests {
632 use crate::syntax::parser::{ParseError, Parser};
633
634 #[test]
635 fn stray_character_in_source_surfaces_as_unknown_token() {
636 let input = "param x = 1.0; §";
637 let mut parser = Parser::new(input);
638 let err = parser.parse_file().expect_err("expected parse error");
639 match err {
640 ParseError::UnknownToken { span, .. } => {
641 let byte_start: usize = span.offset();
642 let byte_end = byte_start + span.len();
643 assert_eq!(&input[byte_start..byte_end], "§");
644 }
645 other => panic!("expected UnknownToken, got {other:?}"),
646 }
647 }
648
649 #[test]
650 fn stray_character_preempts_other_parse_errors() {
651 let input = "param x = §1.0 +";
655 let mut parser = Parser::new(input);
656 let err = parser.parse_file().expect_err("expected parse error");
657 assert!(
658 matches!(err, ParseError::UnknownToken { .. }),
659 "expected UnknownToken, got {err:?}"
660 );
661 }
662}