1use std::sync::Arc;
29
30use crate::lower::range_from_offsets;
31use crate::token::{
32 Directive, DirectiveBody, DirectiveMainParam, Expr, Node, NodeId, Operator, TypeNode,
33};
34
35pub fn parse_document_fast(source: &str) -> Option<Node> {
47 let mut p = FastParser::new(source);
48 p.skip_ws()?;
49 if !p.eat_str("#main") {
52 return None;
53 }
54 p.skip_inline_ws();
55 if !p.eat_char(b'(') {
56 return None;
57 }
58 let directive_start_offset = p.pos - "#main(".len();
62 let params = p.parse_main_params()?;
63 if !p.eat_char(b')') {
64 return None;
65 }
66 p.skip_inline_ws();
67 let mut directive_end_offset = p.pos; let return_type = if p.peek_str("->") {
69 p.pos += 2;
70 p.skip_inline_ws();
71 let t = p.parse_scalar_type()?;
72 directive_end_offset = p.pos; Some(t)
74 } else {
75 None
76 };
77 p.skip_inline_ws();
82 if !p.eat_newline() {
83 return None;
84 }
85 p.skip_ws()?;
86 let body_start = p.pos;
87 let body_expr = p.parse_trivial_expr()?;
88 p.skip_trailing()?;
90 let body_end = p.pos_after_last_token;
91
92 let directive_range = range_from_offsets(source, directive_start_offset, directive_end_offset);
93 let body_range = range_from_offsets(source, body_start, body_end);
94 let doc_range = range_from_offsets(source, directive_start_offset, body_end);
95 let directive = Directive {
96 name: "main".to_string(),
97 body: DirectiveBody::Main {
98 params,
99 return_type,
100 },
101 range: directive_range,
102 };
103 Some(Node {
104 id: NodeId::alloc(),
105 expr: Arc::new(body_expr),
106 decorators: Vec::new(),
107 directives: vec![directive],
108 type_hint: None,
109 range: doc_range,
110 doc_comment: None,
111 })
112 .filter(|_| {
113 body_range.end.offset <= source.len()
117 })
118}
119
120struct FastParser<'a> {
125 source: &'a str,
126 bytes: &'a [u8],
127 pos: usize,
128 pos_after_last_token: usize,
132}
133
134impl<'a> FastParser<'a> {
135 fn new(source: &'a str) -> Self {
136 Self {
137 source,
138 bytes: source.as_bytes(),
139 pos: 0,
140 pos_after_last_token: 0,
141 }
142 }
143
144 fn skip_ws(&mut self) -> Option<()> {
149 while self.pos < self.bytes.len() {
150 let b = self.bytes[self.pos];
151 if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' {
152 self.pos += 1;
153 } else if b == b'/'
154 && self.pos + 1 < self.bytes.len()
155 && (self.bytes[self.pos + 1] == b'/' || self.bytes[self.pos + 1] == b'*')
156 {
157 return None;
159 } else {
160 break;
161 }
162 }
163 Some(())
164 }
165
166 fn skip_inline_ws(&mut self) {
167 while self.pos < self.bytes.len() {
168 let b = self.bytes[self.pos];
169 if b == b' ' || b == b'\t' {
170 self.pos += 1;
171 } else {
172 break;
173 }
174 }
175 }
176
177 fn skip_trailing(&mut self) -> Option<()> {
180 while self.pos < self.bytes.len() {
181 let b = self.bytes[self.pos];
182 if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' {
183 self.pos += 1;
184 } else {
185 return None;
186 }
187 }
188 Some(())
189 }
190
191 fn eat_str(&mut self, s: &str) -> bool {
192 if self.bytes.len() - self.pos >= s.len()
193 && &self.bytes[self.pos..self.pos + s.len()] == s.as_bytes()
194 {
195 self.pos += s.len();
196 true
197 } else {
198 false
199 }
200 }
201
202 fn peek_str(&self, s: &str) -> bool {
203 self.bytes.len() - self.pos >= s.len()
204 && &self.bytes[self.pos..self.pos + s.len()] == s.as_bytes()
205 }
206
207 fn eat_char(&mut self, c: u8) -> bool {
208 if self.pos < self.bytes.len() && self.bytes[self.pos] == c {
209 self.pos += 1;
210 true
211 } else {
212 false
213 }
214 }
215
216 fn eat_newline(&mut self) -> bool {
218 if self.pos < self.bytes.len() && self.bytes[self.pos] == b'\r' {
219 self.pos += 1;
220 if self.pos < self.bytes.len() && self.bytes[self.pos] == b'\n' {
221 self.pos += 1;
222 }
223 true
224 } else if self.pos < self.bytes.len() && self.bytes[self.pos] == b'\n' {
225 self.pos += 1;
226 true
227 } else {
228 false
229 }
230 }
231
232 fn parse_main_params(&mut self) -> Option<Vec<DirectiveMainParam>> {
233 let mut params = Vec::new();
234 self.skip_inline_ws();
235 if self.peek_str(")") {
240 return Some(params);
241 }
242 loop {
243 self.skip_inline_ws();
244 let type_node = self.parse_scalar_type()?;
245 self.skip_inline_ws();
246 let name_start = self.pos;
247 let name = self.parse_identifier()?;
248 let name_end = self.pos;
249 let name_range = range_from_offsets(self.source, name_start, name_end);
250 params.push(DirectiveMainParam {
251 name,
252 name_range,
253 type_node,
254 });
255 self.skip_inline_ws();
256 if self.peek_str(",") {
257 self.pos += 1;
258 continue;
259 } else {
260 break;
261 }
262 }
263 Some(params)
264 }
265
266 fn parse_scalar_type(&mut self) -> Option<TypeNode> {
269 let start = self.pos;
270 let name = self.parse_identifier()?;
271 if !matches!(name.as_str(), "Int" | "Float" | "Bool" | "String") {
272 return None;
273 }
274 if self.pos < self.bytes.len() && matches!(self.bytes[self.pos], b'?' | b'<' | b'.') {
277 return None;
278 }
279 let end = self.pos;
280 Some(TypeNode {
281 path: vec![name],
282 generics: Vec::new(),
283 is_optional: false,
284 range: range_from_offsets(self.source, start, end),
285 variant_fields: None,
286 doc_comment: None,
287 })
288 }
289
290 fn parse_identifier(&mut self) -> Option<String> {
293 let start = self.pos;
294 if start >= self.bytes.len() {
295 return None;
296 }
297 let first = self.bytes[start];
298 if !(first.is_ascii_alphabetic() || first == b'_') {
299 return None;
300 }
301 self.pos += 1;
302 while self.pos < self.bytes.len() {
303 let b = self.bytes[self.pos];
304 if b.is_ascii_alphanumeric() || b == b'_' {
305 self.pos += 1;
306 } else {
307 break;
308 }
309 }
310 Some(self.source[start..self.pos].to_string())
311 }
312
313 fn parse_trivial_expr(&mut self) -> Option<Expr> {
318 self.parse_ternary()
319 }
320
321 fn parse_ternary(&mut self) -> Option<Expr> {
322 let start = self.pos;
323 let cond_expr = self.parse_binary(0)?;
324 let cond_end = self.pos_after_last_token;
325 self.skip_inline_ws();
326 if self.peek_str("?") && !self.peek_str("??") {
327 self.pos += 1;
328 self.skip_inline_ws();
329 let then_start = self.pos;
330 let then_expr = self.parse_binary(0)?;
331 let then_end = self.pos_after_last_token;
332 self.skip_inline_ws();
333 if !self.eat_char(b':') {
334 return None;
335 }
336 self.skip_inline_ws();
337 let els_start = self.pos;
338 let els_expr = self.parse_binary(0)?;
339 let els_end = self.pos_after_last_token;
340 Some(Expr::Ternary {
341 cond: Node {
342 id: NodeId::alloc(),
343 expr: Arc::new(cond_expr),
344 decorators: Vec::new(),
345 directives: Vec::new(),
346 type_hint: None,
347 range: range_from_offsets(self.source, start, cond_end),
348 doc_comment: None,
349 },
350 then: Node {
351 id: NodeId::alloc(),
352 expr: Arc::new(then_expr),
353 decorators: Vec::new(),
354 directives: Vec::new(),
355 type_hint: None,
356 range: range_from_offsets(self.source, then_start, then_end),
357 doc_comment: None,
358 },
359 els: Node {
360 id: NodeId::alloc(),
361 expr: Arc::new(els_expr),
362 decorators: Vec::new(),
363 directives: Vec::new(),
364 type_hint: None,
365 range: range_from_offsets(self.source, els_start, els_end),
366 doc_comment: None,
367 },
368 })
369 } else {
370 Some(cond_expr)
371 }
372 }
373
374 fn parse_binary(&mut self, min_prec: u8) -> Option<Expr> {
375 let lhs_start = self.pos;
376 let mut lhs = self.parse_unary()?;
377 let mut lhs_end = self.pos_after_last_token;
378 loop {
379 self.skip_inline_ws();
380 let Some((op, prec)) = self.peek_binary_op() else {
381 break;
382 };
383 if prec < min_prec {
384 break;
385 }
386 let op_len = op_str(op).len();
388 self.pos += op_len;
389 self.skip_inline_ws();
390 let rhs_start = self.pos;
391 let rhs = self.parse_binary(prec + 1)?;
392 let rhs_end = self.pos_after_last_token;
393 let lhs_node = Node {
394 id: NodeId::alloc(),
395 expr: Arc::new(lhs),
396 decorators: Vec::new(),
397 directives: Vec::new(),
398 type_hint: None,
399 range: range_from_offsets(self.source, lhs_start, lhs_end),
400 doc_comment: None,
401 };
402 let rhs_node = Node {
403 id: NodeId::alloc(),
404 expr: Arc::new(rhs),
405 decorators: Vec::new(),
406 directives: Vec::new(),
407 type_hint: None,
408 range: range_from_offsets(self.source, rhs_start, rhs_end),
409 doc_comment: None,
410 };
411 lhs = Expr::Binary(op, lhs_node, rhs_node);
412 lhs_end = rhs_end;
413 }
417 self.pos_after_last_token = lhs_end;
419 Some(lhs)
420 }
421
422 fn parse_unary(&mut self) -> Option<Expr> {
423 self.skip_inline_ws();
424 if self.pos < self.bytes.len() && self.bytes[self.pos] == b'!' {
428 self.pos += 1;
429 self.skip_inline_ws();
430 let inner_start = self.pos;
431 let inner = self.parse_unary()?;
432 let inner_end = self.pos_after_last_token;
433 return Some(Expr::Unary(
434 Operator::Not,
435 Node {
436 id: NodeId::alloc(),
437 expr: Arc::new(inner),
438 decorators: Vec::new(),
439 directives: Vec::new(),
440 type_hint: None,
441 range: range_from_offsets(self.source, inner_start, inner_end),
442 doc_comment: None,
443 },
444 ));
445 }
446 self.parse_leaf()
451 }
452
453 fn parse_leaf(&mut self) -> Option<Expr> {
454 self.skip_inline_ws();
455 let start = self.pos;
456 if start >= self.bytes.len() {
457 return None;
458 }
459 let b = self.bytes[start];
460 if b == b'-' {
466 return None;
467 }
468 if b.is_ascii_digit() {
469 return self.parse_number(start);
470 }
471 if b.is_ascii_alphabetic() || b == b'_' {
473 let name = self.parse_identifier()?;
474 self.pos_after_last_token = self.pos;
475 return Some(match name.as_str() {
476 "true" => Expr::Bool(true),
477 "false" => Expr::Bool(false),
478 "null" => return None,
479 _ => {
480 if self.pos < self.bytes.len()
484 && matches!(self.bytes[self.pos], b'.' | b'(' | b'[')
485 {
486 return None;
487 }
488 let name_range = range_from_offsets(self.source, start, self.pos);
489 Expr::Variable(vec![crate::token::TokenKey::String(
490 name, name_range, false,
491 )])
492 }
493 });
494 }
495 if b == b'"' {
498 self.pos += 1;
499 let content_start = self.pos;
500 while self.pos < self.bytes.len() {
501 let c = self.bytes[self.pos];
502 if c == b'\\' || c == b'\n' || c == b'\r' {
503 return None;
504 }
505 if c == b'"' {
506 let s = self.source[content_start..self.pos].to_string();
507 self.pos += 1;
508 self.pos_after_last_token = self.pos;
509 return Some(Expr::String(s));
510 }
511 self.pos += 1;
512 }
513 return None;
514 }
515 if b == b'(' {
523 return None;
524 }
525 None
526 }
527
528 fn parse_number(&mut self, start: usize) -> Option<Expr> {
529 let mut saw_dot = false;
531 let mut saw_exp = false;
532 while self.pos < self.bytes.len() {
533 let c = self.bytes[self.pos];
534 if c.is_ascii_digit() {
535 self.pos += 1;
536 } else if c == b'.' && !saw_dot && !saw_exp {
537 if self.pos + 1 < self.bytes.len() && self.bytes[self.pos + 1].is_ascii_digit() {
539 saw_dot = true;
540 self.pos += 1;
541 } else {
542 break;
543 }
544 } else if (c == b'e' || c == b'E') && !saw_exp {
545 saw_exp = true;
546 self.pos += 1;
547 if self.pos < self.bytes.len()
548 && (self.bytes[self.pos] == b'+' || self.bytes[self.pos] == b'-')
549 {
550 self.pos += 1;
551 }
552 } else {
553 break;
554 }
555 }
556 let text = &self.source[start..self.pos];
557 self.pos_after_last_token = self.pos;
558 if saw_dot || saw_exp {
559 let v: f64 = text.parse().ok()?;
560 Some(Expr::Float(ordered_float::OrderedFloat(v)))
561 } else {
562 let v: i64 = text.parse().ok()?;
563 Some(Expr::Int(v))
564 }
565 }
566
567 fn peek_binary_op(&self) -> Option<(Operator, u8)> {
572 if self.pos >= self.bytes.len() {
573 return None;
574 }
575 let b = self.bytes[self.pos];
576 if self.peek_str("==") {
578 return Some((Operator::Eq, 4));
579 }
580 if self.peek_str("!=") {
581 return Some((Operator::Ne, 4));
582 }
583 if self.peek_str("<=") {
584 return Some((Operator::Le, 5));
585 }
586 if self.peek_str(">=") {
587 return Some((Operator::Ge, 5));
588 }
589 match b {
592 b'+' => Some((Operator::Add, 6)),
593 b'-' => {
594 Some((Operator::Sub, 6))
597 }
598 b'*' => Some((Operator::Mul, 7)),
599 b'/' => {
600 if self.pos + 1 < self.bytes.len()
602 && (self.bytes[self.pos + 1] == b'/' || self.bytes[self.pos + 1] == b'*')
603 {
604 None
605 } else {
606 Some((Operator::Div, 7))
607 }
608 }
609 b'%' => Some((Operator::Mod, 7)),
610 b'<' => Some((Operator::Lt, 5)),
611 b'>' => Some((Operator::Gt, 5)),
612 _ => None,
613 }
614 }
615}
616
617fn op_str(op: Operator) -> &'static str {
618 match op {
619 Operator::Add => "+",
620 Operator::Sub => "-",
621 Operator::Mul => "*",
622 Operator::Div => "/",
623 Operator::Mod => "%",
624 Operator::Eq => "==",
625 Operator::Ne => "!=",
626 Operator::Lt => "<",
627 Operator::Gt => ">",
628 Operator::Le => "<=",
629 Operator::Ge => ">=",
630 Operator::And | Operator::Or | Operator::Not | Operator::Pipe | Operator::Concat => {
631 ""
634 }
635 }
636}
637
638#[cfg(test)]
639mod tests {
640 use super::*;
641 use crate::parse_document;
642
643 fn assert_eq_modulo_ids(a: &Node, b: &Node) {
644 assert_eq!(a, b, "fast vs slow path Node mismatch");
650 }
651
652 #[test]
653 fn fast_path_matches_slow_path_on_w11_shape() {
654 let src = "#main(Int x) -> Int\nx + 1\n";
655 let fast = parse_document_fast(src).expect("fast path must accept");
656 let slow = parse_document(src).expect("slow path must accept");
657 assert_eq_modulo_ids(&fast, &slow);
658 }
659
660 #[test]
661 fn fast_path_matches_slow_path_on_int_literal_body() {
662 let src = "#main(Int x) -> Int\n42\n";
663 let fast = parse_document_fast(src).expect("fast path must accept");
664 let slow = parse_document(src).expect("slow path must accept");
665 assert_eq_modulo_ids(&fast, &slow);
666 }
667
668 #[test]
669 fn fast_path_matches_slow_path_on_multi_param() {
670 let src = "#main(Int x, Int y) -> Int\nx * y + 7\n";
671 let fast = parse_document_fast(src).expect("fast path must accept");
672 let slow = parse_document(src).expect("slow path must accept");
673 assert_eq_modulo_ids(&fast, &slow);
674 }
675
676 #[test]
677 fn fast_path_matches_slow_path_on_ternary() {
678 let src = "#main(Int x) -> Int\nx > 0 ? x : 0\n";
679 let fast = parse_document_fast(src).expect("fast path must accept");
680 let slow = parse_document(src).expect("slow path must accept");
681 assert_eq_modulo_ids(&fast, &slow);
682 }
683
684 #[test]
685 fn fast_path_rejects_leading_comment() {
686 let src = "// hello\n#main(Int x) -> Int\nx + 1\n";
687 assert!(parse_document_fast(src).is_none());
688 }
689
690 #[test]
691 fn fast_path_rejects_decorator() {
692 let src = "@brand(X)\n#main(Int x) -> Int\nx + 1\n";
693 assert!(parse_document_fast(src).is_none());
694 }
695
696 #[test]
697 fn fast_path_rejects_import_directive() {
698 let src = "#import std from \"std/string\"\n#main(Int x) -> Int\nx + 1\n";
699 assert!(parse_document_fast(src).is_none());
700 }
701
702 #[test]
703 fn fast_path_rejects_list_body() {
704 let src = "#main(Int x) -> Int\n[1, 2, 3]\n";
705 assert!(parse_document_fast(src).is_none());
706 }
707
708 #[test]
709 fn fast_path_rejects_fn_call_body() {
710 let src = "#main(Int x) -> Int\nabs(x)\n";
711 assert!(parse_document_fast(src).is_none());
712 }
713
714 #[test]
715 fn fast_path_rejects_generic_param_type() {
716 let src = "#main(List<Int> xs) -> Int\n0\n";
717 assert!(parse_document_fast(src).is_none());
718 }
719
720 #[test]
721 fn fast_path_rejects_optional_param_type() {
722 let src = "#main(Int? x) -> Int\n0\n";
723 assert!(parse_document_fast(src).is_none());
724 }
725
726 #[test]
727 fn fast_path_rejects_trailing_garbage() {
728 let src = "#main(Int x) -> Int\nx + 1\nextra\n";
729 assert!(parse_document_fast(src).is_none());
730 }
731
732 #[test]
733 fn fast_path_matches_slow_path_with_no_return_type() {
734 let src = "#main(Int x)\nx + 1\n";
735 let fast = parse_document_fast(src).expect("fast path must accept");
736 let slow = parse_document(src).expect("slow path must accept");
737 assert_eq_modulo_ids(&fast, &slow);
738 }
739
740 #[test]
741 fn fast_path_matches_slow_path_on_string_literal_body() {
742 let src = "#main(String s) -> String\n\"hello\"\n";
743 let fast = parse_document_fast(src).expect("fast path must accept");
744 let slow = parse_document(src).expect("slow path must accept");
745 assert_eq_modulo_ids(&fast, &slow);
746 }
747
748 #[test]
749 fn fast_path_bails_on_negative_number_literal() {
750 let src = "#main(Int x) -> Int\n-1\n";
755 assert!(parse_document_fast(src).is_none());
756 assert!(parse_document(src).is_ok());
759 }
760
761 #[test]
762 fn fast_path_bails_on_parenthesised_subexpression() {
763 let src = "#main(Int x) -> Int\n(x + 1) * 2\n";
767 assert!(parse_document_fast(src).is_none());
768 assert!(parse_document(src).is_ok());
769 }
770}