use std::sync::Arc;
use crate::lower::range_from_offsets;
use crate::token::{
Directive, DirectiveBody, DirectiveMainParam, Expr, Node, NodeId, Operator, TypeNode,
};
pub fn parse_document_fast(source: &str) -> Option<Node> {
let mut p = FastParser::new(source);
p.skip_ws()?;
if !p.eat_str("#main") {
return None;
}
p.skip_inline_ws();
if !p.eat_char(b'(') {
return None;
}
let directive_start_offset = p.pos - "#main(".len();
let params = p.parse_main_params()?;
if !p.eat_char(b')') {
return None;
}
p.skip_inline_ws();
let mut directive_end_offset = p.pos; let return_type = if p.peek_str("->") {
p.pos += 2;
p.skip_inline_ws();
let t = p.parse_scalar_type()?;
directive_end_offset = p.pos; Some(t)
} else {
None
};
p.skip_inline_ws();
if !p.eat_newline() {
return None;
}
p.skip_ws()?;
let body_start = p.pos;
let body_expr = p.parse_trivial_expr()?;
p.skip_trailing()?;
let body_end = p.pos_after_last_token;
let directive_range = range_from_offsets(source, directive_start_offset, directive_end_offset);
let body_range = range_from_offsets(source, body_start, body_end);
let doc_range = range_from_offsets(source, directive_start_offset, body_end);
let directive = Directive {
name: "main".to_string(),
body: DirectiveBody::Main {
params,
return_type,
},
range: directive_range,
};
Some(Node {
id: NodeId::alloc(),
expr: Arc::new(body_expr),
decorators: Vec::new(),
directives: vec![directive],
type_hint: None,
range: doc_range,
doc_comment: None,
})
.filter(|_| {
body_range.end.offset <= source.len()
})
}
struct FastParser<'a> {
source: &'a str,
bytes: &'a [u8],
pos: usize,
pos_after_last_token: usize,
}
impl<'a> FastParser<'a> {
fn new(source: &'a str) -> Self {
Self {
source,
bytes: source.as_bytes(),
pos: 0,
pos_after_last_token: 0,
}
}
fn skip_ws(&mut self) -> Option<()> {
while self.pos < self.bytes.len() {
let b = self.bytes[self.pos];
if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' {
self.pos += 1;
} else if b == b'/'
&& self.pos + 1 < self.bytes.len()
&& (self.bytes[self.pos + 1] == b'/' || self.bytes[self.pos + 1] == b'*')
{
return None;
} else {
break;
}
}
Some(())
}
fn skip_inline_ws(&mut self) {
while self.pos < self.bytes.len() {
let b = self.bytes[self.pos];
if b == b' ' || b == b'\t' {
self.pos += 1;
} else {
break;
}
}
}
fn skip_trailing(&mut self) -> Option<()> {
while self.pos < self.bytes.len() {
let b = self.bytes[self.pos];
if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' {
self.pos += 1;
} else {
return None;
}
}
Some(())
}
fn eat_str(&mut self, s: &str) -> bool {
if self.bytes.len() - self.pos >= s.len()
&& &self.bytes[self.pos..self.pos + s.len()] == s.as_bytes()
{
self.pos += s.len();
true
} else {
false
}
}
fn peek_str(&self, s: &str) -> bool {
self.bytes.len() - self.pos >= s.len()
&& &self.bytes[self.pos..self.pos + s.len()] == s.as_bytes()
}
fn eat_char(&mut self, c: u8) -> bool {
if self.pos < self.bytes.len() && self.bytes[self.pos] == c {
self.pos += 1;
true
} else {
false
}
}
fn eat_newline(&mut self) -> bool {
if self.pos < self.bytes.len() && self.bytes[self.pos] == b'\r' {
self.pos += 1;
if self.pos < self.bytes.len() && self.bytes[self.pos] == b'\n' {
self.pos += 1;
}
true
} else if self.pos < self.bytes.len() && self.bytes[self.pos] == b'\n' {
self.pos += 1;
true
} else {
false
}
}
fn parse_main_params(&mut self) -> Option<Vec<DirectiveMainParam>> {
let mut params = Vec::new();
self.skip_inline_ws();
if self.peek_str(")") {
return Some(params);
}
loop {
self.skip_inline_ws();
let type_node = self.parse_scalar_type()?;
self.skip_inline_ws();
let name_start = self.pos;
let name = self.parse_identifier()?;
let name_end = self.pos;
let name_range = range_from_offsets(self.source, name_start, name_end);
params.push(DirectiveMainParam {
name,
name_range,
type_node,
});
self.skip_inline_ws();
if self.peek_str(",") {
self.pos += 1;
continue;
} else {
break;
}
}
Some(params)
}
fn parse_scalar_type(&mut self) -> Option<TypeNode> {
let start = self.pos;
let name = self.parse_identifier()?;
if !matches!(name.as_str(), "Int" | "Float" | "Bool" | "String") {
return None;
}
if self.pos < self.bytes.len() && matches!(self.bytes[self.pos], b'?' | b'<' | b'.') {
return None;
}
let end = self.pos;
Some(TypeNode {
path: vec![name],
generics: Vec::new(),
is_optional: false,
range: range_from_offsets(self.source, start, end),
variant_fields: None,
doc_comment: None,
})
}
fn parse_identifier(&mut self) -> Option<String> {
let start = self.pos;
if start >= self.bytes.len() {
return None;
}
let first = self.bytes[start];
if !(first.is_ascii_alphabetic() || first == b'_') {
return None;
}
self.pos += 1;
while self.pos < self.bytes.len() {
let b = self.bytes[self.pos];
if b.is_ascii_alphanumeric() || b == b'_' {
self.pos += 1;
} else {
break;
}
}
Some(self.source[start..self.pos].to_string())
}
fn parse_trivial_expr(&mut self) -> Option<Expr> {
self.parse_ternary()
}
fn parse_ternary(&mut self) -> Option<Expr> {
let start = self.pos;
let cond_expr = self.parse_binary(0)?;
let cond_end = self.pos_after_last_token;
self.skip_inline_ws();
if self.peek_str("?") && !self.peek_str("??") {
self.pos += 1;
self.skip_inline_ws();
let then_start = self.pos;
let then_expr = self.parse_binary(0)?;
let then_end = self.pos_after_last_token;
self.skip_inline_ws();
if !self.eat_char(b':') {
return None;
}
self.skip_inline_ws();
let els_start = self.pos;
let els_expr = self.parse_binary(0)?;
let els_end = self.pos_after_last_token;
Some(Expr::Ternary {
cond: Node {
id: NodeId::alloc(),
expr: Arc::new(cond_expr),
decorators: Vec::new(),
directives: Vec::new(),
type_hint: None,
range: range_from_offsets(self.source, start, cond_end),
doc_comment: None,
},
then: Node {
id: NodeId::alloc(),
expr: Arc::new(then_expr),
decorators: Vec::new(),
directives: Vec::new(),
type_hint: None,
range: range_from_offsets(self.source, then_start, then_end),
doc_comment: None,
},
els: Node {
id: NodeId::alloc(),
expr: Arc::new(els_expr),
decorators: Vec::new(),
directives: Vec::new(),
type_hint: None,
range: range_from_offsets(self.source, els_start, els_end),
doc_comment: None,
},
})
} else {
Some(cond_expr)
}
}
fn parse_binary(&mut self, min_prec: u8) -> Option<Expr> {
let lhs_start = self.pos;
let mut lhs = self.parse_unary()?;
let mut lhs_end = self.pos_after_last_token;
loop {
self.skip_inline_ws();
let Some((op, prec)) = self.peek_binary_op() else {
break;
};
if prec < min_prec {
break;
}
let op_len = op_str(op).len();
self.pos += op_len;
self.skip_inline_ws();
let rhs_start = self.pos;
let rhs = self.parse_binary(prec + 1)?;
let rhs_end = self.pos_after_last_token;
let lhs_node = Node {
id: NodeId::alloc(),
expr: Arc::new(lhs),
decorators: Vec::new(),
directives: Vec::new(),
type_hint: None,
range: range_from_offsets(self.source, lhs_start, lhs_end),
doc_comment: None,
};
let rhs_node = Node {
id: NodeId::alloc(),
expr: Arc::new(rhs),
decorators: Vec::new(),
directives: Vec::new(),
type_hint: None,
range: range_from_offsets(self.source, rhs_start, rhs_end),
doc_comment: None,
};
lhs = Expr::Binary(op, lhs_node, rhs_node);
lhs_end = rhs_end;
}
self.pos_after_last_token = lhs_end;
Some(lhs)
}
fn parse_unary(&mut self) -> Option<Expr> {
self.skip_inline_ws();
if self.pos < self.bytes.len() && self.bytes[self.pos] == b'!' {
self.pos += 1;
self.skip_inline_ws();
let inner_start = self.pos;
let inner = self.parse_unary()?;
let inner_end = self.pos_after_last_token;
return Some(Expr::Unary(
Operator::Not,
Node {
id: NodeId::alloc(),
expr: Arc::new(inner),
decorators: Vec::new(),
directives: Vec::new(),
type_hint: None,
range: range_from_offsets(self.source, inner_start, inner_end),
doc_comment: None,
},
));
}
self.parse_leaf()
}
fn parse_leaf(&mut self) -> Option<Expr> {
self.skip_inline_ws();
let start = self.pos;
if start >= self.bytes.len() {
return None;
}
let b = self.bytes[start];
if b == b'-' {
return None;
}
if b.is_ascii_digit() {
return self.parse_number(start);
}
if b.is_ascii_alphabetic() || b == b'_' {
let name = self.parse_identifier()?;
self.pos_after_last_token = self.pos;
return Some(match name.as_str() {
"true" => Expr::Bool(true),
"false" => Expr::Bool(false),
"null" => return None,
_ => {
if self.pos < self.bytes.len()
&& matches!(self.bytes[self.pos], b'.' | b'(' | b'[')
{
return None;
}
let name_range = range_from_offsets(self.source, start, self.pos);
Expr::Variable(vec![crate::token::TokenKey::String(
name, name_range, false,
)])
}
});
}
if b == b'"' {
self.pos += 1;
let content_start = self.pos;
while self.pos < self.bytes.len() {
let c = self.bytes[self.pos];
if c == b'\\' || c == b'\n' || c == b'\r' {
return None;
}
if c == b'"' {
let s = self.source[content_start..self.pos].to_string();
self.pos += 1;
self.pos_after_last_token = self.pos;
return Some(Expr::String(s));
}
self.pos += 1;
}
return None;
}
if b == b'(' {
return None;
}
None
}
fn parse_number(&mut self, start: usize) -> Option<Expr> {
let mut saw_dot = false;
let mut saw_exp = false;
while self.pos < self.bytes.len() {
let c = self.bytes[self.pos];
if c.is_ascii_digit() {
self.pos += 1;
} else if c == b'.' && !saw_dot && !saw_exp {
if self.pos + 1 < self.bytes.len() && self.bytes[self.pos + 1].is_ascii_digit() {
saw_dot = true;
self.pos += 1;
} else {
break;
}
} else if (c == b'e' || c == b'E') && !saw_exp {
saw_exp = true;
self.pos += 1;
if self.pos < self.bytes.len()
&& (self.bytes[self.pos] == b'+' || self.bytes[self.pos] == b'-')
{
self.pos += 1;
}
} else {
break;
}
}
let text = &self.source[start..self.pos];
self.pos_after_last_token = self.pos;
if saw_dot || saw_exp {
let v: f64 = text.parse().ok()?;
Some(Expr::Float(ordered_float::OrderedFloat(v)))
} else {
let v: i64 = text.parse().ok()?;
Some(Expr::Int(v))
}
}
fn peek_binary_op(&self) -> Option<(Operator, u8)> {
if self.pos >= self.bytes.len() {
return None;
}
let b = self.bytes[self.pos];
if self.peek_str("==") {
return Some((Operator::Eq, 4));
}
if self.peek_str("!=") {
return Some((Operator::Ne, 4));
}
if self.peek_str("<=") {
return Some((Operator::Le, 5));
}
if self.peek_str(">=") {
return Some((Operator::Ge, 5));
}
match b {
b'+' => Some((Operator::Add, 6)),
b'-' => {
Some((Operator::Sub, 6))
}
b'*' => Some((Operator::Mul, 7)),
b'/' => {
if self.pos + 1 < self.bytes.len()
&& (self.bytes[self.pos + 1] == b'/' || self.bytes[self.pos + 1] == b'*')
{
None
} else {
Some((Operator::Div, 7))
}
}
b'%' => Some((Operator::Mod, 7)),
b'<' => Some((Operator::Lt, 5)),
b'>' => Some((Operator::Gt, 5)),
_ => None,
}
}
}
fn op_str(op: Operator) -> &'static str {
match op {
Operator::Add => "+",
Operator::Sub => "-",
Operator::Mul => "*",
Operator::Div => "/",
Operator::Mod => "%",
Operator::Eq => "==",
Operator::Ne => "!=",
Operator::Lt => "<",
Operator::Gt => ">",
Operator::Le => "<=",
Operator::Ge => ">=",
Operator::And | Operator::Or | Operator::Not | Operator::Pipe | Operator::Concat => {
""
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parse_document;
fn assert_eq_modulo_ids(a: &Node, b: &Node) {
assert_eq!(a, b, "fast vs slow path Node mismatch");
}
#[test]
fn fast_path_matches_slow_path_on_w11_shape() {
let src = "#main(Int x) -> Int\nx + 1\n";
let fast = parse_document_fast(src).expect("fast path must accept");
let slow = parse_document(src).expect("slow path must accept");
assert_eq_modulo_ids(&fast, &slow);
}
#[test]
fn fast_path_matches_slow_path_on_int_literal_body() {
let src = "#main(Int x) -> Int\n42\n";
let fast = parse_document_fast(src).expect("fast path must accept");
let slow = parse_document(src).expect("slow path must accept");
assert_eq_modulo_ids(&fast, &slow);
}
#[test]
fn fast_path_matches_slow_path_on_multi_param() {
let src = "#main(Int x, Int y) -> Int\nx * y + 7\n";
let fast = parse_document_fast(src).expect("fast path must accept");
let slow = parse_document(src).expect("slow path must accept");
assert_eq_modulo_ids(&fast, &slow);
}
#[test]
fn fast_path_matches_slow_path_on_ternary() {
let src = "#main(Int x) -> Int\nx > 0 ? x : 0\n";
let fast = parse_document_fast(src).expect("fast path must accept");
let slow = parse_document(src).expect("slow path must accept");
assert_eq_modulo_ids(&fast, &slow);
}
#[test]
fn fast_path_rejects_leading_comment() {
let src = "// hello\n#main(Int x) -> Int\nx + 1\n";
assert!(parse_document_fast(src).is_none());
}
#[test]
fn fast_path_rejects_decorator() {
let src = "@brand(X)\n#main(Int x) -> Int\nx + 1\n";
assert!(parse_document_fast(src).is_none());
}
#[test]
fn fast_path_rejects_import_directive() {
let src = "#import std from \"std/string\"\n#main(Int x) -> Int\nx + 1\n";
assert!(parse_document_fast(src).is_none());
}
#[test]
fn fast_path_rejects_list_body() {
let src = "#main(Int x) -> Int\n[1, 2, 3]\n";
assert!(parse_document_fast(src).is_none());
}
#[test]
fn fast_path_rejects_fn_call_body() {
let src = "#main(Int x) -> Int\nabs(x)\n";
assert!(parse_document_fast(src).is_none());
}
#[test]
fn fast_path_rejects_generic_param_type() {
let src = "#main(List<Int> xs) -> Int\n0\n";
assert!(parse_document_fast(src).is_none());
}
#[test]
fn fast_path_rejects_optional_param_type() {
let src = "#main(Int? x) -> Int\n0\n";
assert!(parse_document_fast(src).is_none());
}
#[test]
fn fast_path_rejects_trailing_garbage() {
let src = "#main(Int x) -> Int\nx + 1\nextra\n";
assert!(parse_document_fast(src).is_none());
}
#[test]
fn fast_path_matches_slow_path_with_no_return_type() {
let src = "#main(Int x)\nx + 1\n";
let fast = parse_document_fast(src).expect("fast path must accept");
let slow = parse_document(src).expect("slow path must accept");
assert_eq_modulo_ids(&fast, &slow);
}
#[test]
fn fast_path_matches_slow_path_on_string_literal_body() {
let src = "#main(String s) -> String\n\"hello\"\n";
let fast = parse_document_fast(src).expect("fast path must accept");
let slow = parse_document(src).expect("slow path must accept");
assert_eq_modulo_ids(&fast, &slow);
}
#[test]
fn fast_path_bails_on_negative_number_literal() {
let src = "#main(Int x) -> Int\n-1\n";
assert!(parse_document_fast(src).is_none());
assert!(parse_document(src).is_ok());
}
#[test]
fn fast_path_bails_on_parenthesised_subexpression() {
let src = "#main(Int x) -> Int\n(x + 1) * 2\n";
assert!(parse_document_fast(src).is_none());
assert!(parse_document(src).is_ok());
}
}