oak_core/parser/
mod.rs

1#![doc = include_str!("readme.md")]
2
3/// Content-based caching for parsed results.
4pub mod cache;
5/// Pratt parser implementation for operator precedence parsing.
6pub mod pratt;
7/// Parser memory pool management.
8pub mod session;
9/// Internal parser state and checkpointing.
10pub mod state;
11
12pub use self::{
13    cache::{CachingParseSession, ContentCache},
14    pratt::{Associativity, OperatorInfo, Pratt, PrattParser, binary, postfix, unary},
15    session::{ParseCache, ParseSession},
16    state::{ParserState, deep_clone_node},
17};
18
19pub use triomphe::Arc;
20
21pub use crate::{
22    Language, Lexer,
23    errors::{OakDiagnostics, OakError},
24    source::{Source, TextEdit},
25    tree::GreenNode,
26};
27
28/// The output of a parsing operation, containing the result and diagnostics.
29pub type ParseOutput<'a, L: Language> = OakDiagnostics<&'a GreenNode<'a, L>>;
30
31/// Core parser trait that defines the interface for language parsers.
32///
33/// This trait is responsible for converting a stream of tokens into a green tree
34/// (a lossless, immutable syntax tree). It supports incremental parsing by
35/// taking previous edits and using a cache for reuse.
36///
37/// # Usage Scenario
38///
39/// The `Parser` is typically used after lexical analysis to:
40/// 1. Take a sequence of tokens produced by a [`Lexer`].
41/// 2. Build a [`GreenNode`] tree representing the hierarchical structure of the source.
42/// 3. Handle incremental updates by reusing nodes from a previous [`GreenNode`] tree.
43///
44/// # Incremental Parsing
45///
46/// The `parse` method should ideally be able to reuse nodes from a previous
47/// parse if the source has only changed partially. This is facilitated by
48/// the [`ParseCache`] and the provided [`TextEdit`]s.
49pub trait Parser<L: Language + Send + Sync>
50where
51    L::ElementType: From<L::TokenType>,
52{
53    /// The core parsing entry point for converting tokens into a syntax tree.
54    ///
55    /// This method orchestrates the parsing process. It performs lexical analysis
56    /// (if not already cached) and then builds a green tree structure. It handles
57    /// incremental reuse automatically if the cache contains a previous tree.
58    ///
59    /// # Arguments
60    ///
61    /// * `text` - The source text to parse.
62    /// * `edits` - A slice of [`TextEdit`]s representing changes since the last parse.
63    ///             Used for incremental parsing.
64    /// * `cache` - The [`ParseCache`] for resources, incremental reuse, and diagnostics.
65    ///
66    /// # Returns
67    ///
68    /// A [`ParseOutput`] containing the root [`GreenNode`] and any diagnostics.
69    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<L>) -> ParseOutput<'a, L>;
70}
71
72/// Standalone parsing function that coordinates lexing and parsing.
73///
74/// This is a convenience function for performing a complete parse (lexing + parsing)
75/// in one call.
76pub fn parse<'a, L, P, Lex, S>(parser: &P, _lexer: &Lex, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<L>) -> ParseOutput<'a, L>
77where
78    L: Language + Send + Sync,
79    L::ElementType: From<L::TokenType>,
80    P: Parser<L>,
81    Lex: Lexer<L>,
82    S: Source + ?Sized,
83{
84    parser.parse(text, edits, cache)
85}
86
87/// Standalone parsing function that performs a complete parse without incremental reuse.
88///
89/// This is a convenience function for parsing a source from scratch.
90pub fn parse_one_pass<'a, L, P, S>(parser: &P, text: &'a S, cache: &'a mut impl ParseCache<L>) -> ParseOutput<'a, L>
91where
92    L: Language + Send + Sync,
93    L::ElementType: From<L::TokenType>,
94    P: Parser<L>,
95    S: Source + ?Sized,
96{
97    parser.parse(text, &[], cache)
98}
99
100/// Standalone parsing function that performs parallel parsing for large files.
101///
102/// This function splits the source into chunks and parses them in parallel,
103/// then merges the results. It's designed for large files where parallel processing
104/// can significantly improve performance.
105#[cfg(feature = "parallel")]
106pub fn parse_parallel<'a, L, P, S>(parser: &P, text: &'a S, cache: &'a mut impl ParseCache<L>) -> ParseOutput<'a, L>
107where
108    L: Language + Send + Sync,
109    L::ElementType: From<L::TokenType>,
110    P: Parser<L> + Sync,
111    S: Source + ?Sized,
112{
113    use rayon::prelude::*;
114
115    let length = text.length();
116    const CHUNK_SIZE: usize = 1024 * 1024; // 1MB chunks
117
118    if length <= CHUNK_SIZE {
119        // For small files, use single-threaded parsing
120        return parse_one_pass(parser, text, cache);
121    }
122
123    // Split the source into chunks
124    let chunks: Vec<_> = (0..length)
125        .step_by(CHUNK_SIZE)
126        .map(|start| {
127            let end = std::cmp::min(start + CHUNK_SIZE, length);
128            (start, end)
129        })
130        .collect();
131
132    // Parse each chunk in parallel
133    let results: Vec<_> = chunks
134        .par_iter()
135        .map(|&(start, end)| {
136            // Create a sub-cache for each chunk
137            let mut chunk_cache = cache.clone();
138            // Parse the chunk
139            // Note: This requires the Source to support sub-slicing
140            // For simplicity, we'll assume the source is a contiguous string
141            // In a real implementation, we'd need to handle different Source types
142            parser.parse(text, &[], &mut chunk_cache)
143        })
144        .collect();
145
146    // Merge the results
147    // This is a simplified implementation
148    // In a real implementation, we'd need to properly merge the syntax trees
149    results.into_iter().next().unwrap_or_else(|| parse_one_pass(parser, text, cache))
150}
151
152/// This function handles the boilerplate of preparing the cache, ensuring lexing is performed,
153/// setting up the parser state, and committing the result.
154pub fn parse_with_lexer<'a, L, S, Lex>(lexer: &Lex, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<L>, run: impl FnOnce(&mut ParserState<'a, L, S>) -> Result<&'a GreenNode<'a, L>, OakError>) -> ParseOutput<'a, L>
155where
156    L: Language + Send + Sync,
157    L::ElementType: From<L::TokenType>,
158    S: Source + ?Sized,
159    Lex: Lexer<L>,
160{
161    // 2. Get Lexing Result (Auto-lex if missing)
162    let lex_out = match cache.lex_output().cloned() {
163        Some(out) => out,
164        None => {
165            let out = lexer.lex(text, edits, cache);
166            cache.set_lex_output(out.clone());
167            out
168        }
169    };
170
171    let capacity_hint = cache.old_tree().map(|old| old.children.len().max(1024)).unwrap_or(1024);
172
173    // 3. Initialize Parser State
174    // Safety: We transmute the arena and old tree to 'a to satisfy the borrow checker.
175    // The ParseCache guarantees that the arena and old tree live long enough.
176    let arena: &'a crate::memory::arena::SyntaxArena = unsafe { std::mem::transmute(cache.arena()) };
177    let mut st = ParserState::new(arena, lex_out, text, capacity_hint);
178
179    if let Some(old) = cache.old_tree() {
180        let old: &'a GreenNode<'a, L> = unsafe { std::mem::transmute(old) };
181        st.set_incremental(old, edits);
182    }
183
184    // 4. Run Parser Logic
185    let result = run(&mut st);
186    let output = st.finish(result);
187
188    // 5. Commit Generation
189    if let Ok(root) = output.result {
190        cache.commit_generation(root);
191    }
192
193    output
194}
oak_core/parser/mod.rs

oak_core/parser/
mod.rs