#![forbid(unsafe_code)]
pub mod ast;
pub mod cst;
pub mod directive;
pub mod fast_path;
pub mod lex;
pub(crate) mod lower;
pub mod rewrite;
pub mod syntax;
pub mod token;
pub use fast_path::parse_document_fast;
pub use token::*;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ParseDocumentError {
Parse { offset: usize, message: String },
TrailingInput { offset: usize, remaining: String },
}
impl std::fmt::Display for ParseDocumentError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Parse { message, .. } => write!(f, "parse error: {message}"),
Self::TrailingInput { offset, remaining } => {
write!(f, "trailing input at byte {offset}: {remaining:?}")
}
}
}
}
impl std::error::Error for ParseDocumentError {}
impl ParseDocumentError {
pub fn source_span(&self) -> Option<miette::SourceSpan> {
match self {
Self::Parse { offset, .. } => Some((*offset, 1).into()),
Self::TrailingInput { offset, remaining } => {
Some((*offset, remaining.len().max(1)).into())
}
}
}
}
pub fn parse_document(source: &str) -> Result<Node, ParseDocumentError> {
let parse = cst::parse_cst(source);
lower::lower_document(&parse, source)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseDiagnostic {
pub message: String,
pub range: TokenRange,
}
#[derive(Debug, Clone)]
pub struct ParsedDocument {
pub nodes: Vec<Node>,
pub diagnostics: Vec<ParseDiagnostic>,
}
pub fn parse_document_recovering(source: &str) -> ParsedDocument {
let _scope = lower::RecoveringScope::enter();
let parse = cst::parse_cst(source);
let mut nodes: Vec<Node> = Vec::new();
let mut diagnostics: Vec<ParseDiagnostic> = Vec::new();
for err in &parse.errors {
let end = (err.offset + 1).min(source.len().max(err.offset));
diagnostics.push(ParseDiagnostic {
message: err.message.clone(),
range: lower::range_from_offsets(source, err.offset, end),
});
}
if let Some(doc) = ast::document_of(parse.syntax()) {
if doc.root_expr().is_some() {
if let Some(node) = lower::lower_document_node_v2(&doc, source) {
nodes.push(node);
} else {
let end_offset = source.len();
nodes.push(Node {
id: NodeId::alloc(),
expr: std::sync::Arc::new(Expr::Missing),
decorators: Vec::new(),
directives: Vec::new(),
type_hint: None,
range: lower::range_from_offsets(source, 0, end_offset),
doc_comment: None,
});
if parse.errors.is_empty() {
diagnostics.push(ParseDiagnostic {
message: "could not lower CST to legacy Node".to_string(),
range: lower::range_from_offsets(source, 0, end_offset),
});
}
}
} else {
let end_offset = source.len();
nodes.push(Node {
id: NodeId::alloc(),
expr: std::sync::Arc::new(Expr::Missing),
decorators: Vec::new(),
directives: Vec::new(),
type_hint: None,
range: lower::range_from_offsets(source, 0, end_offset),
doc_comment: None,
});
if parse.errors.is_empty() {
diagnostics.push(ParseDiagnostic {
message: "empty document".to_string(),
range: lower::range_from_offsets(source, 0, 0),
});
}
}
}
ParsedDocument { nodes, diagnostics }
}
pub fn parse_leading_comments(source: &str) -> (Option<String>, usize) {
let bytes = source.as_bytes();
let mut i = 0;
let mut comments: Vec<String> = Vec::new();
loop {
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i + 2 <= bytes.len() && &bytes[i..i + 2] == b"//" {
let start = i + 2;
let mut end = start;
while end < bytes.len() && bytes[end] != b'\n' && bytes[end] != b'\r' {
end += 1;
}
comments.push(source[start..end].trim().to_string());
i = end;
continue;
}
if i + 2 <= bytes.len() && &bytes[i..i + 2] == b"/*" {
let start = i + 2;
let mut end = start;
while end + 1 < bytes.len() && !(bytes[end] == b'*' && bytes[end + 1] == b'/') {
end += 1;
}
comments.push(source[start..end].trim().to_string());
if end + 1 < bytes.len() {
i = end + 2;
} else {
i = bytes.len();
}
continue;
}
break;
}
let joined = if comments.is_empty() {
None
} else {
Some(comments.join("\n"))
};
(joined, i)
}
pub fn combine_ranges(start: TokenRange, end: TokenRange) -> TokenRange {
TokenRange {
start: start.start,
end: end.end,
}
}
pub(crate) fn position_at_source(source: &str, offset: usize) -> TokenPosition {
let offset = offset.min(source.len());
let end = if source.is_char_boundary(offset) {
offset
} else {
let mut boundary = offset;
while boundary > 0 && !source.is_char_boundary(boundary) {
boundary -= 1;
}
boundary
};
let mut line = 1u32;
let mut column = 1usize;
let mut chars = source[..end].chars().peekable();
while let Some(ch) = chars.next() {
match ch {
'\r' => {
if chars.peek() == Some(&'\n') {
chars.next();
}
line += 1;
column = 1;
}
'\n' => {
line += 1;
column = 1;
}
_ => column += 1,
}
}
TokenPosition {
line,
column,
offset,
}
}
pub fn child_nodes(node: &Node) -> Vec<&Node> {
let mut out = Vec::new();
match &*node.expr {
Expr::Dict(pairs) => {
for (_, value) in pairs {
out.push(value);
}
}
Expr::List(items) => out.extend(items.iter()),
Expr::Tuple(items) => out.extend(items.iter()),
Expr::Spread(inner) => out.push(inner),
Expr::Comprehension {
element,
iterable,
condition,
..
} => {
out.push(element);
out.push(iterable);
if let Some(cond) = condition {
out.push(cond);
}
}
Expr::Binary(_, l, r) => {
out.push(l);
out.push(r);
}
Expr::Unary(_, inner) => out.push(inner),
Expr::Ternary { cond, then, els } => {
out.push(cond);
out.push(then);
out.push(els);
}
Expr::FnCall { args, .. } => {
for arg in args {
out.push(&arg.value);
}
}
Expr::FString(parts) => {
for part in parts {
if let crate::FStringPart::Interpolation(n) = part {
out.push(n);
}
}
}
Expr::Where { expr, bindings } => {
out.push(expr);
out.push(bindings);
}
Expr::Match { expr, arms } => {
out.push(expr);
for (pat, body) in arms {
out.push(pat);
out.push(body);
}
}
Expr::Closure { body, .. } => out.push(body),
Expr::VariantCtor { body, .. } => out.push(body),
Expr::VariantPattern { .. } => {}
Expr::Reference { .. }
| Expr::Variable(_)
| Expr::Type(_)
| Expr::Wildcard
| Expr::Missing
| Expr::Bool(_)
| Expr::Int(_)
| Expr::Float(_)
| Expr::String(_) => {}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_comments() {
let src = r##"/* hello world */
// this is a test file
{}"##;
let node = parse_document(src).unwrap();
assert!(matches!(*node.expr, Expr::Dict(_)));
}
#[test]
fn test_parse_document_accepts_trailing_trivia() {
assert!(parse_document("{ a: 1 } // trailing\n /* ok */").is_ok());
}
#[test]
fn test_parse_document_rejects_trailing_tokens() {
let err = parse_document("{ a: 1 } true").unwrap_err();
assert!(matches!(
err,
ParseDocumentError::TrailingInput {
offset: 9,
ref remaining
} if remaining == "true"
));
assert_eq!(err.source_span(), Some((9, 4).into()));
}
#[test]
fn test_parse_document_reports_parse_error_span() {
let err = parse_document("{ a: }").unwrap_err();
assert!(matches!(err, ParseDocumentError::Parse { .. }));
assert!(err.source_span().is_some());
}
#[test]
fn test_token_range_has_line_and_column() {
let node = parse_document("// leading\n{\n answer: 42\n}\n").unwrap();
assert_eq!(node.range.start.line, 2);
assert_eq!(node.range.start.column, 1);
assert_eq!(node.range.end.line, 4);
assert_eq!(node.range.end.column, 2);
if let Expr::Dict(pairs) = &*node.expr {
let TokenKey::String(_, key_range, _) = &pairs[0].0 else {
panic!("Expected string key")
};
assert_eq!(key_range.start.line, 3);
assert_eq!(key_range.start.column, 3);
assert_eq!(pairs[0].1.range.start.line, 3);
assert_eq!(pairs[0].1.range.start.column, 11);
} else {
panic!("Expected dict")
}
}
#[test]
fn test_simple_root() {
let node = parse_document(r#"{ "a": 1 }"#).unwrap();
if let Expr::Dict(pairs) = &*node.expr {
assert_eq!(pairs.len(), 1);
} else {
panic!()
}
let node = parse_document("// comment \n {foo: 1, bar: 2,}").unwrap();
if let Expr::Dict(pairs) = &*node.expr {
assert_eq!(pairs.len(), 2);
} else {
panic!()
}
}
#[test]
fn test_expr_integration() {
let node = parse_document(r#"{ "a": 1 != 2 }"#).unwrap();
if let Expr::Dict(pairs) = &*node.expr {
assert!(matches!(*pairs[0].1.expr, Expr::Binary(Operator::Ne, _, _)));
} else {
panic!()
}
}
#[test]
fn test_comment_decorator_integration() {
let node = parse_document(
r###"
// foo decorator
@foo
{ "a": 1 }"###,
)
.unwrap();
assert_eq!(node.decorators.len(), 1);
assert_eq!(node.decorators[0].path[0].to_string_key(), "foo");
}
#[test]
fn test_tuple_two_element() {
let node = parse_document("(1, \"a\")").unwrap();
match &*node.expr {
Expr::Tuple(items) => {
assert_eq!(items.len(), 2);
assert!(matches!(*items[0].expr, Expr::Int(1)));
assert!(matches!(*items[1].expr, Expr::String(_)));
}
other => panic!("expected Tuple, got {other:?}"),
}
}
#[test]
fn test_tuple_one_element_trailing_comma() {
let node = parse_document("(42,)").unwrap();
match &*node.expr {
Expr::Tuple(items) => {
assert_eq!(items.len(), 1);
assert!(matches!(*items[0].expr, Expr::Int(42)));
}
other => panic!("expected 1-tuple, got {other:?}"),
}
}
#[test]
fn test_unit_tuple() {
let node = parse_document("()").unwrap();
match &*node.expr {
Expr::Tuple(items) => assert!(items.is_empty()),
other => panic!("expected unit tuple, got {other:?}"),
}
}
#[test]
fn test_paren_grouping_is_not_tuple() {
let node = parse_document("(1 + 2)").unwrap();
assert!(
matches!(&*node.expr, Expr::Binary(Operator::Add, _, _)),
"grouping must not produce a Tuple: {:?}",
node.expr
);
}
#[test]
fn test_nested_tuple() {
let node = parse_document("((1, 2), 3)").unwrap();
match &*node.expr {
Expr::Tuple(items) => {
assert_eq!(items.len(), 2);
assert!(matches!(*items[0].expr, Expr::Tuple(_)));
assert!(matches!(*items[1].expr, Expr::Int(3)));
}
other => panic!("expected nested tuple, got {other:?}"),
}
}
#[test]
fn test_list_integration() {
let node = parse_document(r#"[1, 2, 3]"#).unwrap();
if let Expr::List(elements) = &*node.expr {
assert_eq!(elements.len(), 3);
} else {
panic!()
}
}
#[test]
fn test_ref_dict() {
let node = parse_document(r#"{ "a": &sibling.b, "b": 2 }"#).unwrap();
if let Expr::Dict(pairs) = &*node.expr {
assert_eq!(pairs.len(), 2);
assert!(matches!(
*pairs[0].1.expr,
Expr::Reference {
base: RefBase::Sibling,
..
}
));
} else {
panic!()
}
}
#[test]
fn test_ref_list() {
let node = parse_document(r#"[&sibling.b[1], 2]"#).unwrap();
if let Expr::List(elements) = &*node.expr {
assert_eq!(elements.len(), 2);
} else {
panic!()
}
}
#[test]
fn test_var_list() {
let node = parse_document(r#"[a, 2]"#).unwrap();
if let Expr::List(elements) = &*node.expr {
assert!(matches!(*elements[0].expr, Expr::Variable(_)));
} else {
panic!()
}
}
#[test]
fn test_fn_call_list() {
let node = parse_document(r#"[f({a: 1}), 2]"#).unwrap();
if let Expr::List(elements) = &*node.expr {
assert!(matches!(*elements[0].expr, Expr::FnCall { .. }));
} else {
panic!()
}
}
#[test]
fn test_fmt_string_list() {
let node = parse_document(r#"[f"a ${ &sibling.b[1] }", "b"]"#).unwrap();
if let Expr::List(elements) = &*node.expr {
assert!(matches!(*elements[0].expr, Expr::FString(_)));
} else {
panic!()
}
}
#[test]
fn test_root_ref_in_fmt_string_dict() {
assert!(parse_document(r#"{ "a": f"a ${ &root.b[0] }", "b": [0, 1] }"#).is_ok());
}
#[test]
fn test_doc_comment_extraction() {
let src = r#"{
// line 1
// line 2
a: 1,
/* block */
b: 2
}"#;
let node = parse_document(src).unwrap();
if let Expr::Dict(pairs) = &*node.expr {
assert_eq!(pairs[0].1.doc_comment.as_deref(), Some("line 1\nline 2"));
assert_eq!(pairs[1].1.doc_comment.as_deref(), Some("block"));
} else {
panic!()
}
}
#[test]
fn test_root_accepts_atomic_literals() {
let node = parse_document("42").unwrap();
assert!(matches!(*node.expr, Expr::Int(42)));
let node = parse_document(r#""hello""#).unwrap();
assert!(matches!(*node.expr, Expr::String(_)));
let node = parse_document("true").unwrap();
assert!(matches!(*node.expr, Expr::Bool(true)));
let node = parse_document("null").unwrap();
assert!(matches!(*node.expr, Expr::Missing));
}
#[test]
fn test_root_accepts_binary_expression() {
let node = parse_document("1 + 2").unwrap();
assert!(matches!(*node.expr, Expr::Binary(Operator::Add, _, _)));
}
#[test]
fn test_root_accepts_variant_constructor() {
let node = parse_document("Result.Ok { value: 1 }").unwrap();
assert!(matches!(*node.expr, Expr::VariantCtor { .. }));
}
#[test]
fn test_root_accepts_fn_call() {
let node = parse_document("range(0, 10)").unwrap();
assert!(matches!(*node.expr, Expr::FnCall { .. }));
}
#[test]
fn test_root_dict_and_list_still_work() {
let node = parse_document("{ a: 1 }").unwrap();
assert!(matches!(*node.expr, Expr::Dict(_)));
let node = parse_document("[1, 2, 3]").unwrap();
assert!(matches!(*node.expr, Expr::List(_)));
}
#[test]
fn test_root_rejects_garbage() {
assert!(parse_document("").is_err());
assert!(parse_document(" \n\t ").is_err());
assert!(parse_document("{ bad syntax").is_err());
}
#[test]
fn recovering_clean_input_yields_one_node_no_diagnostics() {
let result = parse_document_recovering("{ a: 1, b: 2 }");
assert_eq!(result.nodes.len(), 1);
assert!(result.diagnostics.is_empty(), "{:?}", result.diagnostics);
if let Expr::Dict(pairs) = &*result.nodes[0].expr {
assert_eq!(pairs.len(), 2);
} else {
panic!("expected Dict root");
}
}
#[test]
fn recovering_never_errs_on_partial_inputs() {
for src in &[
"#", "&", "@", "{", "{a:", "{ ?", "}", "[", "(", "f\"hi ${", "", " ", "\n\t",
] {
let result = parse_document_recovering(src);
let _ = result.nodes;
let _ = result.diagnostics;
}
}
#[test]
fn recovering_reports_diagnostic_for_unterminated_dict() {
let result = parse_document_recovering("{ a: ");
assert!(
!result.diagnostics.is_empty(),
"expected at least one diagnostic for unterminated dict"
);
for diag in &result.diagnostics {
assert!(
diag.range.start.offset <= 5,
"diagnostic offset out of range: {:?}",
diag
);
}
}
#[test]
fn recovering_includes_empty_document_diagnostic() {
let result = parse_document_recovering("");
assert_eq!(result.nodes.len(), 1);
assert!(matches!(&*result.nodes[0].expr, Expr::Missing));
assert!(!result.diagnostics.is_empty());
}
#[test]
fn recovering_completes_partial_for_lone_hash() {
let result = parse_document_recovering("#");
assert!(!result.diagnostics.is_empty());
}
#[test]
fn recovering_completes_partial_for_lone_amp() {
let result = parse_document_recovering("&");
assert!(!result.diagnostics.is_empty());
}
#[test]
fn recovering_always_yields_at_least_one_node() {
for src in [
"@",
"#",
"&",
"{",
"{ @",
"{ x: 1, @ }",
"[",
"}",
"{ a:",
"{ ?",
"f\"hi ${",
"(",
"",
] {
let r = parse_document_recovering(src);
assert!(
!r.nodes.is_empty(),
"expected at least one partial node for src {:?}, got 0",
src
);
}
}
#[test]
fn recovering_at_decorator_keeps_sibling_fields() {
let r = parse_document_recovering("{ fmt: (v) => v + 1, @ y: 2 }");
assert_eq!(r.nodes.len(), 1, "expected partial Dict root");
match &*r.nodes[0].expr {
Expr::Dict(fields) => {
let has_fmt = fields.iter().any(|(k, _)| {
matches!(
k,
TokenKey::String(s, _, _) if s == "fmt"
)
});
assert!(
has_fmt,
"expected the `fmt` sibling to survive partial lowering, got {:?}",
fields
);
}
other => panic!("expected Dict root, got {:?}", other),
}
}
}