leekscript_core/
parser.rs

1//! Parser entry points: build grammar once and parse source.
2
3use std::sync::OnceLock;
4
5use sipha::engine::{Engine, ParseError, ParseOutput, RecoverMultiResult};
6use sipha::incremental::reparse as sipha_reparse;
7pub use sipha::incremental::TextEdit;
8use sipha::insn::ParseGraph;
9use sipha::parsed_doc::ParsedDoc;
10use sipha::red::SyntaxNode;
11use sipha::types::Span;
12
13use crate::grammar::{
14    build_expression_grammar, build_grammar, build_program_grammar, build_signature_grammar,
15};
16
17type BuiltAndGraph = (sipha::builder::BuiltGraph, ParseGraph);
18
19fn cache_grammar<F>(storage: &'static OnceLock<BuiltAndGraph>, build: F) -> &'static BuiltAndGraph
20where
21    F: FnOnce() -> sipha::builder::BuiltGraph,
22{
23    storage.get_or_init(|| {
24        let grammar = build();
25        let graph = grammar.as_graph();
26        (grammar, graph)
27    })
28}
29
30macro_rules! cached_grammar_fn {
31    ($name:ident, $build:ident) => {
32        fn $name() -> &'static BuiltAndGraph {
33            static STORAGE: OnceLock<BuiltAndGraph> = OnceLock::new();
34            cache_grammar(&STORAGE, $build)
35        }
36    };
37}
38
39cached_grammar_fn!(token_stream_built_and_graph, build_grammar);
40cached_grammar_fn!(expression_built_and_graph, build_expression_grammar);
41cached_grammar_fn!(program_built_and_graph, build_program_grammar);
42cached_grammar_fn!(signature_built_and_graph, build_signature_grammar);
43
44/// Single place for engine creation and parse; returns raw `ParseOutput`.
45fn run_parse(
46    source: &str,
47    get_graph: fn() -> &'static BuiltAndGraph,
48) -> Result<ParseOutput, ParseError> {
49    let (_, graph) = get_graph();
50    let mut engine = Engine::new().with_memo();
51    engine.parse(graph, source.as_bytes())
52}
53
54fn parse_to_syntax_root(
55    source: &str,
56    get_graph: fn() -> &'static BuiltAndGraph,
57) -> Result<Option<SyntaxNode>, ParseError> {
58    let out = run_parse(source, get_graph)?;
59    Ok(out.syntax_root(source.as_bytes()))
60}
61
62fn parse_to_output(
63    source: &str,
64    get_graph: fn() -> &'static BuiltAndGraph,
65) -> Result<ParseOutput, ParseError> {
66    run_parse(source, get_graph)
67}
68
69/// Parse source as a token stream (Phase 1 lexer).
70///
71/// Returns the sipha parse output; use `.syntax_root(source.as_bytes())` to get
72/// the root syntax node, or `.tree_events` for the raw event list.
73pub fn parse_tokens(source: &str) -> Result<ParseOutput, ParseError> {
74    run_parse(source, token_stream_built_and_graph)
75}
76
77/// Parse source as a program (Phase 3/4: list of statements).
78///
79/// Returns the program root node (`NODE_ROOT` with statement children).
80/// For token stream only, use [`parse_tokens`].
81pub fn parse(source: &str) -> Result<Option<SyntaxNode>, ParseError> {
82    parse_to_syntax_root(source, program_built_and_graph)
83}
84
85/// Parse source as a single expression (Phase 2).
86///
87/// Uses a dedicated expression grammar (primary: number, string, identifier, parenthesized expr).
88pub fn parse_expression(source: &str) -> Result<Option<SyntaxNode>, ParseError> {
89    parse_to_syntax_root(source, expression_built_and_graph)
90}
91
92/// Parse source as a signature file (function/class/global declarations only).
93///
94/// Returns the root node (`NodeSigFile`) whose children are sig items.
95/// Use for loading stdlib or other API signature definitions.
96pub fn parse_signatures(source: &str) -> Result<Option<SyntaxNode>, ParseError> {
97    parse_to_syntax_root(source, signature_built_and_graph)
98}
99
100/// Parse source and return a [`ParsedDoc`]: source bytes, line index, and syntax root.
101///
102/// Use this when you need offset-to-line/column, [`ParsedDoc::node_at_offset`],
103/// [`ParsedDoc::token_at_offset`], or formatted diagnostics. Returns `None` if
104/// the parse produced no or invalid tree events.
105pub fn parse_to_doc(source: &str) -> Result<Option<ParsedDoc>, ParseError> {
106    let out = parse_to_output(source, program_built_and_graph)?;
107    Ok(ParsedDoc::new(source.as_bytes().to_vec(), &out))
108}
109
110/// Parse in recovering mode: on failure, returns the partial output and the error.
111///
112/// Returns `Ok(out)` on full success; `Err((partial, e))` on failure, with
113/// `partial` containing tree events and `consumed` up to the error position.
114/// Use `partial.syntax_root(source.as_bytes())` to try to build a partial tree
115/// (may be `None` if events are not well-nested). Use for IDE or multi-error reporting.
116pub fn parse_recovering(source: &str) -> Result<ParseOutput, (ParseOutput, ParseError)> {
117    let (_, graph) = program_built_and_graph();
118    let mut engine = Engine::new().with_memo();
119    engine.parse_recovering(graph, source.as_bytes())
120}
121
122/// Parse in multi-error recovery mode: on statement failures, skip to the next sync point
123/// (e.g. `;`, `}`, or statement-start keyword) and continue, collecting up to `max_errors` errors.
124///
125/// Requires the program grammar to use [`recover_until`](sipha::builder::GrammarBuilder::recover_until)
126/// (used for `program` and `block`). Returns `Ok(output)` on full success; `Err(RecoverMultiResult { partial, errors })`
127/// when at least one parse error was collected. Use the partial output's syntax root for a best-effort
128/// tree and convert each error to diagnostics for IDE or batch reporting.
129pub fn parse_recovering_multi(
130    source: &str,
131    max_errors: usize,
132) -> Result<ParseOutput, RecoverMultiResult> {
133    let (_, graph) = program_built_and_graph();
134    let mut engine = Engine::new().with_memo();
135    engine.parse_recovering_multi(graph, source.as_bytes(), max_errors)
136}
137
138/// Literal table for the program grammar (used for parsing full programs).
139///
140/// Use with [`parse_error_to_miette`] so that "expected literal#n" in diagnostics
141/// is resolved to the actual token text (e.g. `"var"`, `"function"`).
142#[must_use]
143pub fn program_literals() -> &'static sipha::insn::LiteralTable {
144    &program_built_and_graph().1.literals
145}
146
147/// Rule names for the program grammar (used for diagnostics).
148///
149/// Use with [`parse_error_to_miette`] so that "expected rule#n" shows as the rule name.
150#[must_use]
151pub fn program_rule_names() -> &'static [&'static str] {
152    program_built_and_graph().1.rule_names
153}
154
155/// Reparse after a text edit, reusing unchanged parts of the tree.
156///
157/// Takes the previous source, the previous syntax root (from [`parse`]), and an edit.
158/// Returns the new syntax root, or `None` if the new parse produced no root.
159/// Use for incremental updates in editors or formatters.
160pub fn reparse(
161    old_source: &str,
162    old_root: &SyntaxNode,
163    edit: &TextEdit,
164) -> Result<Option<SyntaxNode>, ParseError> {
165    let (_, graph) = program_built_and_graph();
166    let mut engine = Engine::new().with_memo();
167    sipha_reparse(&mut engine, graph, old_source.as_bytes(), old_root, edit)
168}
169
170/// Apply a text edit and reparse incrementally, or fall back to full parse.
171///
172/// Applies `edit` to `old_source` to obtain the new source. If `old_root` is `Some`,
173/// tries [`reparse`]; on success returns the new root. On reparse failure or when
174/// `old_root` is `None`, runs [`parse`](parse) on the new source and returns the result.
175/// Use in LSP or editors to update the tree after a single range-based change.
176#[must_use]
177pub fn reparse_or_parse(
178    old_source: &str,
179    old_root: Option<&SyntaxNode>,
180    edit: &TextEdit,
181) -> Option<SyntaxNode> {
182    let new_source_bytes = edit.apply(old_source.as_bytes());
183    let new_source = std::str::from_utf8(&new_source_bytes).ok()?;
184    if let Some(root) = old_root {
185        if let Ok(Some(new_root)) = reparse(old_source, root, edit) {
186            return Some(new_root);
187        }
188    }
189    parse(new_source).ok().and_then(std::convert::identity)
190}
191
192/// Expected labels for the program grammar (used for diagnostics).
193#[must_use]
194pub fn program_expected_labels() -> &'static [&'static str] {
195    program_built_and_graph().1.expected_labels
196}
197
198/// Convert a parse error into semantic diagnostics for LSP or other tooling.
199///
200/// Returns a single-element vec for [`ParseError::NoMatch`] (with message from
201/// the program grammar's literals/rule names) or [`ParseError::BadGraph`].
202/// Use when the main program failed to parse so that diagnostics include the
203/// parse error without duplicating conversion logic in the LSP.
204#[must_use]
205pub fn parse_error_to_diagnostics(
206    parse_err: &ParseError,
207    source: &str,
208) -> Vec<sipha::error::SemanticDiagnostic> {
209    let source_bytes = source.as_bytes();
210    let line_index = sipha::line_index::LineIndex::new(source_bytes);
211    let (span, message) = match parse_err {
212        ParseError::NoMatch(diag) => {
213            let message = diag.format_with_source(
214                source_bytes,
215                &line_index,
216                Some(program_literals()),
217                Some(program_rule_names()),
218                Some(program_expected_labels()),
219            );
220            (Span::new(diag.furthest, diag.furthest), message)
221        }
222        ParseError::BadGraph => (Span::new(0, 0), "malformed parse graph".to_string()),
223    };
224    vec![sipha::error::SemanticDiagnostic {
225        span,
226        message,
227        severity: sipha::error::Severity::Error,
228        code: Some("parse_error".to_string()),
229        file_id: None,
230        related: vec![],
231    }]
232}
233
234/// Convert a parse error into a [`miette::Report`] with source snippet and resolved literals.
235///
236/// Uses the **program** grammar's literal and rule-name tables so that expected
237/// tokens and rules show as readable text (e.g. `"var"`, `statement`). Returns
238/// `None` for [`ParseError::BadGraph`](sipha::engine::ParseError::BadGraph).
239///
240/// Use when the error came from [`parse`]. For [`parse_expression`] or
241/// [`parse_tokens`], use the corresponding graph literals via sipha directly
242/// if you need miette reports.
243#[must_use]
244pub fn parse_error_to_miette(
245    e: &ParseError,
246    source: &str,
247    filename: &str,
248) -> Option<miette::Report> {
249    e.to_miette_report(
250        source,
251        filename,
252        Some(program_literals()),
253        Some(program_rule_names()),
254        Some(program_expected_labels()),
255    )
256}
257
258#[cfg(test)]
259mod tests {
260    use sipha::red::SyntaxElement;
261    use sipha::types::IntoSyntaxKind;
262
263    use crate::syntax::Kind;
264
265    use super::{parse, parse_expression, parse_tokens, reparse};
266
267    #[test]
268    fn parse_tokens_valid() {
269        let out = parse_tokens("var x = 42").unwrap();
270        let root = out.syntax_root("var x = 42".as_bytes());
271        assert!(root.is_some(), "token stream should produce a root");
272    }
273
274    #[test]
275    fn parse_tokens_invalid() {
276        let result = parse_tokens("'unterminated string");
277        assert!(result.is_err(), "unterminated string should fail");
278    }
279
280    // ─── Parser edge cases: malformed or ambiguous inputs ────────────────────
281
282    #[test]
283    fn parse_edge_unterminated_double_quote_string() {
284        let result = parse(r#"return "hello"#);
285        assert!(
286            result.is_err(),
287            "unterminated double-quote string should fail"
288        );
289    }
290
291    #[test]
292    fn parse_edge_unterminated_single_quote_string() {
293        let result = parse("return 'x");
294        assert!(
295            result.is_err(),
296            "unterminated single-quote string should fail"
297        );
298    }
299
300    #[test]
301    fn parse_edge_empty_input() {
302        let result = parse("");
303        assert!(result.is_ok(), "empty input should not panic");
304        // Empty input may return None or Some(empty root) depending on grammar.
305        let _ = result.unwrap();
306    }
307
308    #[test]
309    fn parse_edge_only_whitespace() {
310        let result = parse("   \n\t  ");
311        assert!(result.is_ok());
312    }
313
314    #[test]
315    fn parse_edge_incomplete_binary_op() {
316        let result = parse("return 1 + ");
317        assert!(result.is_err(), "incomplete expression after + should fail");
318    }
319
320    #[test]
321    fn parse_edge_unclosed_paren() {
322        let result = parse("return (1 + 2");
323        assert!(result.is_err(), "unclosed parenthesis should fail");
324    }
325
326    #[test]
327    fn parse_edge_unclosed_brace() {
328        let result = parse("function f() { return 1;");
329        // Parser may fail or recover; we only check it doesn't panic.
330        let _ = result;
331    }
332
333    #[test]
334    fn parse_edge_odd_operator_sequence() {
335        let result = parse("return 1 * * 2;");
336        // Grammar may reject or accept; we lock in that we don't panic.
337        let _ = result;
338    }
339
340    #[test]
341    fn parse_edge_recovery_produces_partial_tree() {
342        use super::parse_recovering_multi;
343        let source = "var x = 1; return ( ; var y = 2;";
344        let out = parse_recovering_multi(source, 5);
345        // Recovery returns Ok(ParseOutput) when parse succeeds, or Err with .partial and .errors.
346        // When Err, partial result should still yield a syntax root for downstream use.
347        if let Err(err) = &out {
348            assert!(
349                err.partial.syntax_root(source.as_bytes()).is_some(),
350                "recovery Err should yield partial syntax root"
351            );
352        }
353    }
354
355    #[test]
356    fn parse_expression_valid() {
357        let root = parse_expression("1").unwrap();
358        assert!(root.is_some(), "simple expression should parse");
359    }
360
361    #[test]
362    fn parse_expression_invalid() {
363        let result = parse_expression("1 + ");
364        assert!(result.is_err() || result.as_ref().ok().and_then(|r| r.as_ref()).is_none());
365    }
366
367    #[test]
368    fn parse_valid_program() {
369        let root = parse("return 1 + 2").unwrap().expect("root");
370        assert_eq!(root.kind_as::<Kind>(), Some(Kind::NodeRoot));
371        let node_children: Vec<_> = root
372            .children()
373            .filter_map(|c| match c {
374                SyntaxElement::Node(n) => Some(n),
375                _ => None,
376            })
377            .collect();
378        assert!(
379            !node_children.is_empty(),
380            "root should have statement children"
381        );
382        assert_eq!(
383            node_children[0].kind_as::<Kind>(),
384            Some(Kind::NodeReturnStmt),
385            "first statement should be return"
386        );
387    }
388
389    #[test]
390    fn parse_invalid_program() {
391        // Unclosed brace or invalid token sequence should fail.
392        let result = parse("return (");
393        assert!(result.is_err(), "invalid program should return parse error");
394    }
395
396    #[test]
397    fn parse_recovering_multi_collects_multiple_errors() {
398        use super::parse_recovering_multi;
399
400        // Two invalid statements: "return (" and "var x = " — recovery skips to next sync point.
401        let source = "return ( ; var x = ";
402        let result = parse_recovering_multi(source, 10);
403        let err = result.expect_err("recovery should return Err with collected errors");
404        assert!(
405            err.errors.len() >= 2,
406            "expected at least 2 parse errors, got {}",
407            err.errors.len()
408        );
409    }
410
411    #[test]
412    fn parse_error_to_miette_produces_report() {
413        use super::parse_error_to_miette;
414
415        let source = "return (";
416        let err = parse(source).unwrap_err();
417        let filename = "test.leek";
418        let report = parse_error_to_miette(&err, source, filename);
419        assert!(
420            report.is_some(),
421            "NoMatch parse error should produce a miette report"
422        );
423        let report = report.unwrap();
424        let report_str = format!("{report:?}");
425        assert!(
426            report_str.contains("expected") || report_str.contains("test.leek"),
427            "report should contain expected tokens or filename: {:?}",
428            report_str
429        );
430    }
431
432    #[test]
433    fn assert_parse_sexp() {
434        use sipha_diff::{assert_parse_eq, syntax_node_to_sexp, SexpOptions};
435
436        let opts = SexpOptions {
437            kind_to_name: Some(|k| Some(crate::syntax::kind_name(k))),
438            ..SexpOptions::semantic_only()
439        };
440        let root = parse_expression("1").unwrap().expect("root");
441        let expected = syntax_node_to_sexp(&root, &opts);
442        assert_parse_eq(parse_expression("1"), "1", &expected, &opts);
443        assert!(
444            expected.contains("EXPR"),
445            "readable kind names in S-expression"
446        );
447    }
448
449    #[test]
450    fn reparse_after_edit() {
451        let old = "var x = 1;";
452        let root = parse(old).unwrap().expect("root");
453        let edit = super::TextEdit {
454            start: 8,
455            end: 9,
456            new_text: b"2".to_vec(),
457        };
458        let new_root = reparse(old, &root, &edit).unwrap();
459        let new_root = new_root.expect("reparse should yield root");
460        let new_text = new_root.collect_text();
461        assert!(
462            new_text.contains("2"),
463            "edited content in reparsed tree: {:?}",
464            new_text
465        );
466    }
467
468    // binary_expr_rhs_field test lives in leekscript-analysis (uses analysis::binary_expr_rhs).
469
470    // ─── Function declaration forms (plan: ensure all shapes parse) ───────────
471
472    fn assert_parse_function_decl(source: &str, test_name: &str) {
473        let root = parse(source).unwrap().expect(test_name);
474        let funcs = root.find_all_nodes(Kind::NodeFunctionDecl.into_syntax_kind());
475        assert!(
476            !funcs.is_empty(),
477            "{}: expected at least one NodeFunctionDecl in {:?}",
478            test_name,
479            source
480        );
481    }
482
483    #[test]
484    fn parse_function_untyped_params_no_return() {
485        assert_parse_function_decl("function a(b, c) {}", "untyped params, no return");
486    }
487
488    #[test]
489    fn parse_function_untyped_params_arrow_return() {
490        assert_parse_function_decl("function a(b, c) -> void {}", "untyped params, -> void");
491    }
492
493    #[test]
494    fn parse_function_untyped_params_fat_arrow_return() {
495        assert_parse_function_decl("function a(b, c) => void {}", "untyped params, => void");
496    }
497
498    #[test]
499    fn parse_function_mixed_params_fat_arrow_return() {
500        assert_parse_function_decl(
501            "function a(integer b, c) => void {}",
502            "mixed params, => void",
503        );
504    }
505
506    #[test]
507    fn parse_function_no_params() {
508        assert_parse_function_decl("function a() {}", "no params");
509    }
510
511    #[test]
512    fn parse_function_typed_params_arrow_return() {
513        assert_parse_function_decl(
514            "function a(integer x, integer y) -> integer {}",
515            "typed params, -> integer",
516        );
517    }
518
519    // ─── Function type in type position (Function<...>) ────────────────────────
520
521    #[test]
522    fn parse_program_with_function_type_two_args_return() {
523        let source = "var f = null as Function<integer, integer => void>;";
524        let root = parse(source)
525            .unwrap()
526            .expect("Function<integer, integer => void>");
527        let type_exprs = root.find_all_nodes(Kind::NodeTypeExpr.into_syntax_kind());
528        assert!(
529            !type_exprs.is_empty(),
530            "expected NodeTypeExpr for Function<...> type"
531        );
532    }
533
534    #[test]
535    fn parse_program_with_function_type_zero_params() {
536        let source = "var f = null as Function< => void>;";
537        let root = parse(source).unwrap().expect("Function< => void>");
538        let type_exprs = root.find_all_nodes(Kind::NodeTypeExpr.into_syntax_kind());
539        assert!(
540            !type_exprs.is_empty(),
541            "expected NodeTypeExpr for Function< => void>"
542        );
543    }
544
545    #[test]
546    fn parse_program_with_function_type_one_param() {
547        let source = "var f = null as Function<integer => void>;";
548        let root = parse(source).unwrap().expect("Function<integer => void>");
549        let type_exprs = root.find_all_nodes(Kind::NodeTypeExpr.into_syntax_kind());
550        assert!(
551            !type_exprs.is_empty(),
552            "expected NodeTypeExpr for Function<integer => void>"
553        );
554    }
555
556    #[test]
557    fn parse_program_with_function_type_three_params() {
558        let source = "var f = null as Function<integer, string, real => boolean>;";
559        let root = parse(source)
560            .unwrap()
561            .expect("Function<integer, string, real => boolean>");
562        let type_exprs = root.find_all_nodes(Kind::NodeTypeExpr.into_syntax_kind());
563        assert!(
564            !type_exprs.is_empty(),
565            "expected NodeTypeExpr for Function<...>"
566        );
567    }
568
569    // ─── Anonymous function (function(a, b) { ... }) ───────────────────────────
570
571    #[test]
572    fn parse_anonymous_function() {
573        let source = "var sum = function(a, b) { return a + b; };";
574        let root = parse(source).unwrap().expect("anonymous function");
575        let anon_fns = root.find_all_nodes(Kind::NodeAnonFn.into_syntax_kind());
576        assert!(
577            !anon_fns.is_empty(),
578            "expected NodeAnonFn for function(a, b) {{ ... }}"
579        );
580    }
581}
leekscript_core/parser.rs

leekscript_core/
parser.rs