1#![forbid(unsafe_code)]
28
29pub mod ast;
30pub mod cst;
31pub mod directive;
32pub mod fast_path;
33pub mod lex;
34pub(crate) mod lower;
42pub mod rewrite;
43pub mod syntax;
44pub mod token;
45
46pub use fast_path::parse_document_fast;
47
48pub use token::*;
49
50#[derive(Debug, Clone, PartialEq, Eq)]
51pub enum ParseDocumentError {
52 Parse { offset: usize, message: String },
53 TrailingInput { offset: usize, remaining: String },
54}
55
56impl std::fmt::Display for ParseDocumentError {
57 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58 match self {
59 Self::Parse { message, .. } => write!(f, "parse error: {message}"),
60 Self::TrailingInput { offset, remaining } => {
61 write!(f, "trailing input at byte {offset}: {remaining:?}")
62 }
63 }
64 }
65}
66
67impl std::error::Error for ParseDocumentError {}
68
69impl ParseDocumentError {
70 pub fn source_span(&self) -> Option<miette::SourceSpan> {
71 match self {
72 Self::Parse { offset, .. } => Some((*offset, 1).into()),
73 Self::TrailingInput { offset, remaining } => {
74 Some((*offset, remaining.len().max(1)).into())
75 }
76 }
77 }
78}
79
80pub fn parse_document(source: &str) -> Result<Node, ParseDocumentError> {
99 let parse = cst::parse_cst(source);
100 lower::lower_document(&parse, source)
101}
102
103#[derive(Debug, Clone, PartialEq, Eq)]
107pub struct ParseDiagnostic {
108 pub message: String,
109 pub range: TokenRange,
110}
111
112#[derive(Debug, Clone)]
116pub struct ParsedDocument {
117 pub nodes: Vec<Node>,
124 pub diagnostics: Vec<ParseDiagnostic>,
127}
128
129pub fn parse_document_recovering(source: &str) -> ParsedDocument {
146 let _scope = lower::RecoveringScope::enter();
147 let parse = cst::parse_cst(source);
148 let mut nodes: Vec<Node> = Vec::new();
149 let mut diagnostics: Vec<ParseDiagnostic> = Vec::new();
150
151 for err in &parse.errors {
155 let end = (err.offset + 1).min(source.len().max(err.offset));
156 diagnostics.push(ParseDiagnostic {
157 message: err.message.clone(),
158 range: lower::range_from_offsets(source, err.offset, end),
159 });
160 }
161
162 if let Some(doc) = ast::document_of(parse.syntax()) {
163 if doc.root_expr().is_some() {
172 if let Some(node) = lower::lower_document_node_v2(&doc, source) {
173 nodes.push(node);
174 } else {
175 let end_offset = source.len();
180 nodes.push(Node {
181 id: NodeId::alloc(),
182 expr: std::sync::Arc::new(Expr::Missing),
183 decorators: Vec::new(),
184 directives: Vec::new(),
185 type_hint: None,
186 range: lower::range_from_offsets(source, 0, end_offset),
187 doc_comment: None,
188 });
189 if parse.errors.is_empty() {
190 diagnostics.push(ParseDiagnostic {
191 message: "could not lower CST to legacy Node".to_string(),
192 range: lower::range_from_offsets(source, 0, end_offset),
193 });
194 }
195 }
196 } else {
197 let end_offset = source.len();
203 nodes.push(Node {
204 id: NodeId::alloc(),
205 expr: std::sync::Arc::new(Expr::Missing),
206 decorators: Vec::new(),
207 directives: Vec::new(),
208 type_hint: None,
209 range: lower::range_from_offsets(source, 0, end_offset),
210 doc_comment: None,
211 });
212 if parse.errors.is_empty() {
213 diagnostics.push(ParseDiagnostic {
214 message: "empty document".to_string(),
215 range: lower::range_from_offsets(source, 0, 0),
216 });
217 }
218 }
219 }
220
221 ParsedDocument { nodes, diagnostics }
222}
223
224pub fn parse_leading_comments(source: &str) -> (Option<String>, usize) {
230 let bytes = source.as_bytes();
231 let mut i = 0;
232 let mut comments: Vec<String> = Vec::new();
233 loop {
234 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
236 i += 1;
237 }
238 if i + 2 <= bytes.len() && &bytes[i..i + 2] == b"//" {
240 let start = i + 2;
241 let mut end = start;
242 while end < bytes.len() && bytes[end] != b'\n' && bytes[end] != b'\r' {
243 end += 1;
244 }
245 comments.push(source[start..end].trim().to_string());
246 i = end;
247 continue;
248 }
249 if i + 2 <= bytes.len() && &bytes[i..i + 2] == b"/*" {
251 let start = i + 2;
252 let mut end = start;
253 while end + 1 < bytes.len() && !(bytes[end] == b'*' && bytes[end + 1] == b'/') {
254 end += 1;
255 }
256 comments.push(source[start..end].trim().to_string());
257 if end + 1 < bytes.len() {
258 i = end + 2;
259 } else {
260 i = bytes.len();
261 }
262 continue;
263 }
264 break;
265 }
266 let joined = if comments.is_empty() {
267 None
268 } else {
269 Some(comments.join("\n"))
270 };
271 (joined, i)
272}
273
274pub fn combine_ranges(start: TokenRange, end: TokenRange) -> TokenRange {
278 TokenRange {
279 start: start.start,
280 end: end.end,
281 }
282}
283
284pub(crate) fn position_at_source(source: &str, offset: usize) -> TokenPosition {
285 let offset = offset.min(source.len());
286 let end = if source.is_char_boundary(offset) {
287 offset
288 } else {
289 let mut boundary = offset;
290 while boundary > 0 && !source.is_char_boundary(boundary) {
291 boundary -= 1;
292 }
293 boundary
294 };
295
296 let mut line = 1u32;
297 let mut column = 1usize;
298 let mut chars = source[..end].chars().peekable();
299 while let Some(ch) = chars.next() {
300 match ch {
301 '\r' => {
302 if chars.peek() == Some(&'\n') {
303 chars.next();
304 }
305 line += 1;
306 column = 1;
307 }
308 '\n' => {
309 line += 1;
310 column = 1;
311 }
312 _ => column += 1,
313 }
314 }
315
316 TokenPosition {
317 line,
318 column,
319 offset,
320 }
321}
322
323pub fn child_nodes(node: &Node) -> Vec<&Node> {
328 let mut out = Vec::new();
329 match &*node.expr {
330 Expr::Dict(pairs) => {
331 for (_, value) in pairs {
332 out.push(value);
333 }
334 }
335 Expr::List(items) => out.extend(items.iter()),
336 Expr::Tuple(items) => out.extend(items.iter()),
337 Expr::Spread(inner) => out.push(inner),
338 Expr::Comprehension {
339 element,
340 iterable,
341 condition,
342 ..
343 } => {
344 out.push(element);
345 out.push(iterable);
346 if let Some(cond) = condition {
347 out.push(cond);
348 }
349 }
350 Expr::Binary(_, l, r) => {
351 out.push(l);
352 out.push(r);
353 }
354 Expr::Unary(_, inner) => out.push(inner),
355 Expr::Ternary { cond, then, els } => {
356 out.push(cond);
357 out.push(then);
358 out.push(els);
359 }
360 Expr::FnCall { args, .. } => {
361 for arg in args {
362 out.push(&arg.value);
363 }
364 }
365 Expr::FString(parts) => {
366 for part in parts {
367 if let crate::FStringPart::Interpolation(n) = part {
368 out.push(n);
369 }
370 }
371 }
372 Expr::Where { expr, bindings } => {
373 out.push(expr);
374 out.push(bindings);
375 }
376 Expr::Match { expr, arms } => {
377 out.push(expr);
378 for (pat, body) in arms {
379 out.push(pat);
380 out.push(body);
381 }
382 }
383 Expr::Closure { body, .. } => out.push(body),
384 Expr::VariantCtor { body, .. } => out.push(body),
385 Expr::VariantPattern { .. } => {}
386 Expr::Reference { .. }
387 | Expr::Variable(_)
388 | Expr::Type(_)
389 | Expr::Wildcard
390 | Expr::Missing
391 | Expr::Bool(_)
392 | Expr::Int(_)
393 | Expr::Float(_)
394 | Expr::String(_) => {}
395 }
396 out
397}
398
399#[cfg(test)]
400mod tests {
401 use super::*;
402
403 #[test]
404 fn test_comments() {
405 let src = r##"/* hello world */
406// this is a test file
407{}"##;
408 let node = parse_document(src).unwrap();
409 assert!(matches!(*node.expr, Expr::Dict(_)));
410 }
411
412 #[test]
413 fn test_parse_document_accepts_trailing_trivia() {
414 assert!(parse_document("{ a: 1 } // trailing\n /* ok */").is_ok());
415 }
416
417 #[test]
418 fn test_parse_document_rejects_trailing_tokens() {
419 let err = parse_document("{ a: 1 } true").unwrap_err();
420 assert!(matches!(
421 err,
422 ParseDocumentError::TrailingInput {
423 offset: 9,
424 ref remaining
425 } if remaining == "true"
426 ));
427 assert_eq!(err.source_span(), Some((9, 4).into()));
428 }
429
430 #[test]
431 fn test_parse_document_reports_parse_error_span() {
432 let err = parse_document("{ a: }").unwrap_err();
433 assert!(matches!(err, ParseDocumentError::Parse { .. }));
434 assert!(err.source_span().is_some());
435 }
436
437 #[test]
438 fn test_token_range_has_line_and_column() {
439 let node = parse_document("// leading\n{\n answer: 42\n}\n").unwrap();
440 assert_eq!(node.range.start.line, 2);
441 assert_eq!(node.range.start.column, 1);
442 assert_eq!(node.range.end.line, 4);
443 assert_eq!(node.range.end.column, 2);
444
445 if let Expr::Dict(pairs) = &*node.expr {
446 let TokenKey::String(_, key_range, _) = &pairs[0].0 else {
447 panic!("Expected string key")
448 };
449 assert_eq!(key_range.start.line, 3);
450 assert_eq!(key_range.start.column, 3);
451 assert_eq!(pairs[0].1.range.start.line, 3);
452 assert_eq!(pairs[0].1.range.start.column, 11);
453 } else {
454 panic!("Expected dict")
455 }
456 }
457
458 #[test]
459 fn test_simple_root() {
460 let node = parse_document(r#"{ "a": 1 }"#).unwrap();
461 if let Expr::Dict(pairs) = &*node.expr {
462 assert_eq!(pairs.len(), 1);
463 } else {
464 panic!()
465 }
466
467 let node = parse_document("// comment \n {foo: 1, bar: 2,}").unwrap();
468 if let Expr::Dict(pairs) = &*node.expr {
469 assert_eq!(pairs.len(), 2);
470 } else {
471 panic!()
472 }
473 }
474
475 #[test]
476 fn test_expr_integration() {
477 let node = parse_document(r#"{ "a": 1 != 2 }"#).unwrap();
478 if let Expr::Dict(pairs) = &*node.expr {
479 assert!(matches!(*pairs[0].1.expr, Expr::Binary(Operator::Ne, _, _)));
480 } else {
481 panic!()
482 }
483 }
484
485 #[test]
486 fn test_comment_decorator_integration() {
487 let node = parse_document(
488 r###"
489 // foo decorator
490 @foo
491 { "a": 1 }"###,
492 )
493 .unwrap();
494 assert_eq!(node.decorators.len(), 1);
495 assert_eq!(node.decorators[0].path[0].to_string_key(), "foo");
496 }
497
498 #[test]
499 fn test_tuple_two_element() {
500 let node = parse_document("(1, \"a\")").unwrap();
502 match &*node.expr {
503 Expr::Tuple(items) => {
504 assert_eq!(items.len(), 2);
505 assert!(matches!(*items[0].expr, Expr::Int(1)));
506 assert!(matches!(*items[1].expr, Expr::String(_)));
507 }
508 other => panic!("expected Tuple, got {other:?}"),
509 }
510 }
511
512 #[test]
513 fn test_tuple_one_element_trailing_comma() {
514 let node = parse_document("(42,)").unwrap();
516 match &*node.expr {
517 Expr::Tuple(items) => {
518 assert_eq!(items.len(), 1);
519 assert!(matches!(*items[0].expr, Expr::Int(42)));
520 }
521 other => panic!("expected 1-tuple, got {other:?}"),
522 }
523 }
524
525 #[test]
526 fn test_unit_tuple() {
527 let node = parse_document("()").unwrap();
529 match &*node.expr {
530 Expr::Tuple(items) => assert!(items.is_empty()),
531 other => panic!("expected unit tuple, got {other:?}"),
532 }
533 }
534
535 #[test]
536 fn test_paren_grouping_is_not_tuple() {
537 let node = parse_document("(1 + 2)").unwrap();
540 assert!(
541 matches!(&*node.expr, Expr::Binary(Operator::Add, _, _)),
542 "grouping must not produce a Tuple: {:?}",
543 node.expr
544 );
545 }
546
547 #[test]
548 fn test_nested_tuple() {
549 let node = parse_document("((1, 2), 3)").unwrap();
551 match &*node.expr {
552 Expr::Tuple(items) => {
553 assert_eq!(items.len(), 2);
554 assert!(matches!(*items[0].expr, Expr::Tuple(_)));
555 assert!(matches!(*items[1].expr, Expr::Int(3)));
556 }
557 other => panic!("expected nested tuple, got {other:?}"),
558 }
559 }
560
561 #[test]
562 fn test_list_integration() {
563 let node = parse_document(r#"[1, 2, 3]"#).unwrap();
564 if let Expr::List(elements) = &*node.expr {
565 assert_eq!(elements.len(), 3);
566 } else {
567 panic!()
568 }
569 }
570
571 #[test]
572 fn test_ref_dict() {
573 let node = parse_document(r#"{ "a": &sibling.b, "b": 2 }"#).unwrap();
574 if let Expr::Dict(pairs) = &*node.expr {
575 assert_eq!(pairs.len(), 2);
576 assert!(matches!(
577 *pairs[0].1.expr,
578 Expr::Reference {
579 base: RefBase::Sibling,
580 ..
581 }
582 ));
583 } else {
584 panic!()
585 }
586 }
587
588 #[test]
589 fn test_ref_list() {
590 let node = parse_document(r#"[&sibling.b[1], 2]"#).unwrap();
591 if let Expr::List(elements) = &*node.expr {
592 assert_eq!(elements.len(), 2);
593 } else {
594 panic!()
595 }
596 }
597
598 #[test]
599 fn test_var_list() {
600 let node = parse_document(r#"[a, 2]"#).unwrap();
601 if let Expr::List(elements) = &*node.expr {
602 assert!(matches!(*elements[0].expr, Expr::Variable(_)));
603 } else {
604 panic!()
605 }
606 }
607
608 #[test]
609 fn test_fn_call_list() {
610 let node = parse_document(r#"[f({a: 1}), 2]"#).unwrap();
611 if let Expr::List(elements) = &*node.expr {
612 assert!(matches!(*elements[0].expr, Expr::FnCall { .. }));
613 } else {
614 panic!()
615 }
616 }
617
618 #[test]
619 fn test_fmt_string_list() {
620 let node = parse_document(r#"[f"a ${ &sibling.b[1] }", "b"]"#).unwrap();
621 if let Expr::List(elements) = &*node.expr {
622 assert!(matches!(*elements[0].expr, Expr::FString(_)));
623 } else {
624 panic!()
625 }
626 }
627
628 #[test]
629 fn test_root_ref_in_fmt_string_dict() {
630 assert!(parse_document(r#"{ "a": f"a ${ &root.b[0] }", "b": [0, 1] }"#).is_ok());
631 }
632
633 #[test]
634 fn test_doc_comment_extraction() {
635 let src = r#"{
636 // line 1
637 // line 2
638 a: 1,
639 /* block */
640 b: 2
641 }"#;
642 let node = parse_document(src).unwrap();
643 if let Expr::Dict(pairs) = &*node.expr {
644 assert_eq!(pairs[0].1.doc_comment.as_deref(), Some("line 1\nline 2"));
645 assert_eq!(pairs[1].1.doc_comment.as_deref(), Some("block"));
646 } else {
647 panic!()
648 }
649 }
650
651 #[test]
655 fn test_root_accepts_atomic_literals() {
656 let node = parse_document("42").unwrap();
657 assert!(matches!(*node.expr, Expr::Int(42)));
658
659 let node = parse_document(r#""hello""#).unwrap();
660 assert!(matches!(*node.expr, Expr::String(_)));
661
662 let node = parse_document("true").unwrap();
663 assert!(matches!(*node.expr, Expr::Bool(true)));
664
665 let node = parse_document("null").unwrap();
666 assert!(matches!(*node.expr, Expr::Missing));
667 }
668
669 #[test]
670 fn test_root_accepts_binary_expression() {
671 let node = parse_document("1 + 2").unwrap();
672 assert!(matches!(*node.expr, Expr::Binary(Operator::Add, _, _)));
673 }
674
675 #[test]
676 fn test_root_accepts_variant_constructor() {
677 let node = parse_document("Result.Ok { value: 1 }").unwrap();
678 assert!(matches!(*node.expr, Expr::VariantCtor { .. }));
679 }
680
681 #[test]
682 fn test_root_accepts_fn_call() {
683 let node = parse_document("range(0, 10)").unwrap();
684 assert!(matches!(*node.expr, Expr::FnCall { .. }));
685 }
686
687 #[test]
690 fn test_root_dict_and_list_still_work() {
691 let node = parse_document("{ a: 1 }").unwrap();
692 assert!(matches!(*node.expr, Expr::Dict(_)));
693
694 let node = parse_document("[1, 2, 3]").unwrap();
695 assert!(matches!(*node.expr, Expr::List(_)));
696 }
697
698 #[test]
701 fn test_root_rejects_garbage() {
702 assert!(parse_document("").is_err());
703 assert!(parse_document(" \n\t ").is_err());
704 assert!(parse_document("{ bad syntax").is_err());
705 }
706
707 #[test]
712 fn recovering_clean_input_yields_one_node_no_diagnostics() {
713 let result = parse_document_recovering("{ a: 1, b: 2 }");
714 assert_eq!(result.nodes.len(), 1);
715 assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
716 if let Expr::Dict(pairs) = &*result.nodes[0].expr {
717 assert_eq!(pairs.len(), 2);
718 } else {
719 panic!("expected Dict root");
720 }
721 }
722
723 #[test]
724 fn recovering_never_errs_on_partial_inputs() {
725 for src in &[
728 "#", "&", "@", "{", "{a:", "{ ?", "}", "[", "(", "f\"hi ${", "", " ", "\n\t",
729 ] {
730 let result = parse_document_recovering(src);
731 let _ = result.nodes;
735 let _ = result.diagnostics;
736 }
737 }
738
739 #[test]
740 fn recovering_reports_diagnostic_for_unterminated_dict() {
741 let result = parse_document_recovering("{ a: ");
742 assert!(
743 !result.diagnostics.is_empty(),
744 "expected at least one diagnostic for unterminated dict"
745 );
746 for diag in &result.diagnostics {
748 assert!(
749 diag.range.start.offset <= 5,
750 "diagnostic offset out of range: {:?}",
751 diag
752 );
753 }
754 }
755
756 #[test]
757 fn recovering_includes_empty_document_diagnostic() {
758 let result = parse_document_recovering("");
759 assert_eq!(result.nodes.len(), 1);
762 assert!(matches!(&*result.nodes[0].expr, Expr::Missing));
763 assert!(!result.diagnostics.is_empty());
767 }
768
769 #[test]
770 fn recovering_completes_partial_for_lone_hash() {
771 let result = parse_document_recovering("#");
774 assert!(!result.diagnostics.is_empty());
775 }
776
777 #[test]
778 fn recovering_completes_partial_for_lone_amp() {
779 let result = parse_document_recovering("&");
780 assert!(!result.diagnostics.is_empty());
781 }
782
783 #[test]
784 fn recovering_always_yields_at_least_one_node() {
785 for src in [
789 "@",
790 "#",
791 "&",
792 "{",
793 "{ @",
794 "{ x: 1, @ }",
795 "[",
796 "}",
797 "{ a:",
798 "{ ?",
799 "f\"hi ${",
800 "(",
801 "",
802 ] {
803 let r = parse_document_recovering(src);
804 assert!(
805 !r.nodes.is_empty(),
806 "expected at least one partial node for src {:?}, got 0",
807 src
808 );
809 }
810 }
811
812 #[test]
813 fn recovering_at_decorator_keeps_sibling_fields() {
814 let r = parse_document_recovering("{ fmt: (v) => v + 1, @ y: 2 }");
819 assert_eq!(r.nodes.len(), 1, "expected partial Dict root");
820 match &*r.nodes[0].expr {
821 Expr::Dict(fields) => {
822 let has_fmt = fields.iter().any(|(k, _)| {
823 matches!(
824 k,
825 TokenKey::String(s, _, _) if s == "fmt"
826 )
827 });
828 assert!(
829 has_fmt,
830 "expected the `fmt` sibling to survive partial lowering, got {:?}",
831 fields
832 );
833 }
834 other => panic!("expected Dict root, got {:?}", other),
835 }
836 }
837}