oak_core/parser/mod.rs
1#![doc = include_str!("readme.md")]
2
3/// Content-based caching for parsed results.
4pub mod cache;
5/// Pratt parser implementation for operator precedence parsing.
6pub mod pratt;
7/// Parser memory pool management.
8pub mod session;
9/// Internal parser state and checkpointing.
10pub mod state;
11
12pub use self::{
13 cache::{CachingParseSession, ContentCache},
14 pratt::{Associativity, OperatorInfo, Pratt, PrattParser, binary, postfix, unary},
15 session::{ParseCache, ParseSession},
16 state::{ParserState, deep_clone_node},
17};
18
19pub use triomphe::Arc;
20
21pub use crate::{
22 Language, Lexer,
23 errors::{OakDiagnostics, OakError},
24 source::{Source, TextEdit},
25 tree::GreenNode,
26};
27
28/// The output of a parsing operation, containing the result and diagnostics.
29pub type ParseOutput<'a, L: Language> = OakDiagnostics<&'a GreenNode<'a, L>>;
30
31/// Core parser trait that defines the interface for language parsers.
32///
33/// This trait is responsible for converting a stream of tokens into a green tree
34/// (a lossless, immutable syntax tree). It supports incremental parsing by
35/// taking previous edits and using a cache for reuse.
36///
37/// # Usage Scenario
38///
39/// The `Parser` is typically used after lexical analysis to:
40/// 1. Take a sequence of tokens produced by a [`Lexer`].
41/// 2. Build a [`GreenNode`] tree representing the hierarchical structure of the source.
42/// 3. Handle incremental updates by reusing nodes from a previous [`GreenNode`] tree.
43///
44/// # Incremental Parsing
45///
46/// The `parse` method should ideally be able to reuse nodes from a previous
47/// parse if the source has only changed partially. This is facilitated by
48/// the [`ParseCache`] and the provided [`TextEdit`]s.
49pub trait Parser<L: Language + Send + Sync>
50where
51 L::ElementType: From<L::TokenType>,
52{
53 /// The core parsing entry point for converting tokens into a syntax tree.
54 ///
55 /// This method orchestrates the parsing process. It performs lexical analysis
56 /// (if not already cached) and then builds a green tree structure. It handles
57 /// incremental reuse automatically if the cache contains a previous tree.
58 ///
59 /// # Arguments
60 ///
61 /// * `text` - The source text to parse.
62 /// * `edits` - A slice of [`TextEdit`]s representing changes since the last parse.
63 /// Used for incremental parsing.
64 /// * `cache` - The [`ParseCache`] for resources, incremental reuse, and diagnostics.
65 ///
66 /// # Returns
67 ///
68 /// A [`ParseOutput`] containing the root [`GreenNode`] and any diagnostics.
69 fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<L>) -> ParseOutput<'a, L>;
70}
71
72/// Standalone parsing function that coordinates lexing and parsing.
73///
74/// This is a convenience function for performing a complete parse (lexing + parsing)
75/// in one call.
76pub fn parse<'a, L, P, Lex, S>(parser: &P, _lexer: &Lex, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<L>) -> ParseOutput<'a, L>
77where
78 L: Language + Send + Sync,
79 L::ElementType: From<L::TokenType>,
80 P: Parser<L>,
81 Lex: Lexer<L>,
82 S: Source + ?Sized,
83{
84 parser.parse(text, edits, cache)
85}
86
87/// Standalone parsing function that performs a complete parse without incremental reuse.
88///
89/// This is a convenience function for parsing a source from scratch.
90pub fn parse_one_pass<'a, L, P, S>(parser: &P, text: &'a S, cache: &'a mut impl ParseCache<L>) -> ParseOutput<'a, L>
91where
92 L: Language + Send + Sync,
93 L::ElementType: From<L::TokenType>,
94 P: Parser<L>,
95 S: Source + ?Sized,
96{
97 parser.parse(text, &[], cache)
98}
99
100/// Standalone parsing function that performs parallel parsing for large files.
101///
102/// This function splits the source into chunks and parses them in parallel,
103/// then merges the results. It's designed for large files where parallel processing
104/// can significantly improve performance.
105#[cfg(feature = "parallel")]
106pub fn parse_parallel<'a, L, P, S>(parser: &P, text: &'a S, cache: &'a mut impl ParseCache<L>) -> ParseOutput<'a, L>
107where
108 L: Language + Send + Sync,
109 L::ElementType: From<L::TokenType>,
110 P: Parser<L> + Sync,
111 S: Source + ?Sized,
112{
113 use rayon::prelude::*;
114
115 let length = text.length();
116 const CHUNK_SIZE: usize = 1024 * 1024; // 1MB chunks
117
118 if length <= CHUNK_SIZE {
119 // For small files, use single-threaded parsing
120 return parse_one_pass(parser, text, cache);
121 }
122
123 // Split the source into chunks
124 let chunks: Vec<_> = (0..length)
125 .step_by(CHUNK_SIZE)
126 .map(|start| {
127 let end = std::cmp::min(start + CHUNK_SIZE, length);
128 (start, end)
129 })
130 .collect();
131
132 // Parse each chunk in parallel
133 let results: Vec<_> = chunks
134 .par_iter()
135 .map(|&(start, end)| {
136 // Create a sub-cache for each chunk
137 let mut chunk_cache = cache.clone();
138 // Parse the chunk
139 // Note: This requires the Source to support sub-slicing
140 // For simplicity, we'll assume the source is a contiguous string
141 // In a real implementation, we'd need to handle different Source types
142 parser.parse(text, &[], &mut chunk_cache)
143 })
144 .collect();
145
146 // Merge the results
147 // This is a simplified implementation
148 // In a real implementation, we'd need to properly merge the syntax trees
149 results.into_iter().next().unwrap_or_else(|| parse_one_pass(parser, text, cache))
150}
151
152/// This function handles the boilerplate of preparing the cache, ensuring lexing is performed,
153/// setting up the parser state, and committing the result.
154pub fn parse_with_lexer<'a, L, S, Lex>(lexer: &Lex, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<L>, run: impl FnOnce(&mut ParserState<'a, L, S>) -> Result<&'a GreenNode<'a, L>, OakError>) -> ParseOutput<'a, L>
155where
156 L: Language + Send + Sync,
157 L::ElementType: From<L::TokenType>,
158 S: Source + ?Sized,
159 Lex: Lexer<L>,
160{
161 // 2. Get Lexing Result (Auto-lex if missing)
162 let lex_out = match cache.lex_output().cloned() {
163 Some(out) => out,
164 None => {
165 let out = lexer.lex(text, edits, cache);
166 cache.set_lex_output(out.clone());
167 out
168 }
169 };
170
171 let capacity_hint = cache.old_tree().map(|old| old.children.len().max(1024)).unwrap_or(1024);
172
173 // 3. Initialize Parser State
174 // Safety: We transmute the arena and old tree to 'a to satisfy the borrow checker.
175 // The ParseCache guarantees that the arena and old tree live long enough.
176 let arena: &'a crate::memory::arena::SyntaxArena = unsafe { std::mem::transmute(cache.arena()) };
177 let mut st = ParserState::new(arena, lex_out, text, capacity_hint);
178
179 if let Some(old) = cache.old_tree() {
180 let old: &'a GreenNode<'a, L> = unsafe { std::mem::transmute(old) };
181 st.set_incremental(old, edits);
182 }
183
184 // 4. Run Parser Logic
185 let result = run(&mut st);
186 let output = st.finish(result);
187
188 // 5. Commit Generation
189 if let Ok(root) = output.result {
190 cache.commit_generation(root);
191 }
192
193 output
194}