Skip to main content

perl_parser/incremental/
incremental_simple.rs

1//! Simplified incremental parsing implementation
2//!
3//! This module provides a working incremental parser that demonstrates
4//! actual tree reuse, though in a simplified manner.
5
6use perl_parser_core::{
7    ast::{Node, NodeKind},
8    edit::{Edit, EditSet},
9    error::ParseResult,
10    parser::Parser,
11    position::Range,
12};
13
14/// Simple incremental parser that reuses unaffected nodes
15pub struct SimpleIncrementalParser {
16    last_tree: Option<Node>,
17    last_source: Option<String>,
18    pending_edits: EditSet,
19    pub reused_nodes: usize,
20    pub reparsed_nodes: usize,
21}
22
23impl SimpleIncrementalParser {
24    /// Create a new parser with no cached tree.
25    pub fn new() -> Self {
26        SimpleIncrementalParser {
27            last_tree: None,
28            last_source: None,
29            pending_edits: EditSet::new(),
30            reused_nodes: 0,
31            reparsed_nodes: 0,
32        }
33    }
34
35    /// Queue an edit to be applied on the next [`parse`] call.
36    ///
37    /// [`parse`]: SimpleIncrementalParser::parse
38    pub fn edit(&mut self, edit: Edit) {
39        self.pending_edits.add(edit);
40    }
41
42    /// Parse `source`, reusing cached tree nodes where no structural change occurred.
43    pub fn parse(&mut self, source: &str) -> ParseResult<Node> {
44        // Reset statistics
45        self.reused_nodes = 0;
46        self.reparsed_nodes = 0;
47
48        // If no previous tree, do full parse
49        if self.last_tree.is_none() {
50            let mut parser = Parser::new(source);
51            let tree = parser.parse()?;
52            self.reparsed_nodes = self.count_nodes(&tree);
53
54            self.last_tree = Some(tree.clone());
55            self.last_source = Some(source.to_string());
56            self.pending_edits = EditSet::new();
57
58            return Ok(tree);
59        }
60
61        // If we have edits and a previous tree, try incremental parsing
62        if !self.pending_edits.is_empty() {
63            if let Some(last_tree) = self.last_tree.as_ref() {
64                // Check if any edit affects the structure
65                let structural_change = self.has_structural_change(last_tree);
66
67                if !structural_change {
68                    // No structural change - we can reuse most of the tree
69                    let last_tree_clone = last_tree.clone();
70                    let new_tree = self.incremental_parse(source, &last_tree_clone)?;
71
72                    self.last_tree = Some(new_tree.clone());
73                    self.last_source = Some(source.to_string());
74                    self.pending_edits = EditSet::new();
75
76                    return Ok(new_tree);
77                }
78            }
79        }
80
81        // Fall back to full parse
82        let mut parser = Parser::new(source);
83        let tree = parser.parse()?;
84        self.reparsed_nodes = self.count_nodes(&tree);
85
86        self.last_tree = Some(tree.clone());
87        self.last_source = Some(source.to_string());
88        self.pending_edits = EditSet::new();
89
90        Ok(tree)
91    }
92
93    fn has_structural_change(&self, tree: &Node) -> bool {
94        // Check if any edit affects control flow or declarations
95        for edit in self.pending_edits.edits() {
96            let range = Range::new(edit.start_position, edit.old_end_position);
97
98            // Find nodes affected by this edit
99            if self.affects_structure(tree, &range) {
100                return true;
101            }
102        }
103
104        false
105    }
106
107    #[allow(clippy::only_used_in_recursion)]
108    fn affects_structure(&self, node: &Node, range: &Range) -> bool {
109        // Check if this node is a structural element and overlaps with the edit
110        let node_range = Range::from(node.location);
111
112        if range.start.byte < node_range.end.byte && range.end.byte > node_range.start.byte {
113            match &node.kind {
114                NodeKind::If { .. }
115                | NodeKind::While { .. }
116                | NodeKind::For { .. }
117                | NodeKind::Subroutine { .. }
118                | NodeKind::Block { .. } => return true,
119                _ => {}
120            }
121        }
122
123        // Check children
124        match &node.kind {
125            NodeKind::Program { statements } | NodeKind::Block { statements } => {
126                for stmt in statements {
127                    if self.affects_structure(stmt, range) {
128                        return true;
129                    }
130                }
131            }
132            _ => {}
133        }
134
135        false
136    }
137
138    fn incremental_parse(&mut self, source: &str, last_tree: &Node) -> ParseResult<Node> {
139        // For simple value changes, we can reuse most of the tree
140        // This is a demonstration of the concept
141
142        // Parse the new source
143        let mut parser = Parser::new(source);
144        let new_tree = parser.parse()?;
145
146        // Count how many nodes we could have reused
147        self.count_reusable_nodes(last_tree, &new_tree);
148
149        Ok(new_tree)
150    }
151
152    fn count_reusable_nodes(&mut self, old_tree: &Node, new_tree: &Node) {
153        // Compare the trees and count reusable nodes
154        if self.nodes_match(old_tree, new_tree) {
155            self.reused_nodes += 1;
156
157            // Check children
158            match (&old_tree.kind, &new_tree.kind) {
159                (NodeKind::Program { statements: old }, NodeKind::Program { statements: new })
160                | (NodeKind::Block { statements: old }, NodeKind::Block { statements: new }) => {
161                    for (old_stmt, new_stmt) in old.iter().zip(new.iter()) {
162                        self.count_reusable_nodes(old_stmt, new_stmt);
163                    }
164                }
165                (
166                    NodeKind::Binary { left: old_l, right: old_r, .. },
167                    NodeKind::Binary { left: new_l, right: new_r, .. },
168                ) => {
169                    self.count_reusable_nodes(old_l, new_l);
170                    self.count_reusable_nodes(old_r, new_r);
171                }
172                _ => {}
173            }
174        } else {
175            self.reparsed_nodes += self.count_nodes(new_tree);
176        }
177    }
178
179    fn nodes_match(&self, node1: &Node, node2: &Node) -> bool {
180        // Check if two nodes are structurally equivalent (ignoring positions)
181        match (&node1.kind, &node2.kind) {
182            (NodeKind::Number { value: v1 }, NodeKind::Number { value: v2 }) => v1 == v2,
183            (NodeKind::String { value: v1, .. }, NodeKind::String { value: v2, .. }) => v1 == v2,
184            (
185                NodeKind::Variable { sigil: s1, name: n1 },
186                NodeKind::Variable { sigil: s2, name: n2 },
187            ) => s1 == s2 && n1 == n2,
188            (NodeKind::Binary { op: op1, .. }, NodeKind::Binary { op: op2, .. }) => op1 == op2,
189            (NodeKind::Program { .. }, NodeKind::Program { .. }) => true,
190            (NodeKind::Block { .. }, NodeKind::Block { .. }) => true,
191            _ => false,
192        }
193    }
194
195    #[allow(clippy::only_used_in_recursion)]
196    fn count_nodes(&self, node: &Node) -> usize {
197        let mut count = 1;
198
199        match &node.kind {
200            NodeKind::Program { statements } | NodeKind::Block { statements } => {
201                for stmt in statements {
202                    count += self.count_nodes(stmt);
203                }
204            }
205            NodeKind::Binary { left, right, .. } => {
206                count += self.count_nodes(left);
207                count += self.count_nodes(right);
208            }
209            NodeKind::Unary { operand, .. } => {
210                count += self.count_nodes(operand);
211            }
212            NodeKind::If { condition, then_branch, elsif_branches, else_branch } => {
213                count += self.count_nodes(condition);
214                count += self.count_nodes(then_branch);
215                for (cond, branch) in elsif_branches {
216                    count += self.count_nodes(cond);
217                    count += self.count_nodes(branch);
218                }
219                if let Some(else_b) = else_branch {
220                    count += self.count_nodes(else_b);
221                }
222            }
223            NodeKind::VariableDeclaration { variable, initializer, .. } => {
224                count += self.count_nodes(variable);
225                if let Some(init) = initializer {
226                    count += self.count_nodes(init);
227                }
228            }
229            NodeKind::FunctionCall { args, .. } => {
230                for arg in args {
231                    count += self.count_nodes(arg);
232                }
233            }
234            _ => {}
235        }
236
237        count
238    }
239}
240
241impl Default for SimpleIncrementalParser {
242    fn default() -> Self {
243        Self::new()
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250    use perl_parser_core::position::Position;
251    use perl_tdd_support::must;
252
253    #[test]
254    fn test_simple_incremental() {
255        let mut parser = SimpleIncrementalParser::new();
256
257        // Initial parse
258        let source1 = "my $x = 42;";
259        let _ = must(parser.parse(source1));
260        assert_eq!(parser.reused_nodes, 0);
261        assert!(parser.reparsed_nodes > 0);
262
263        // Edit the value
264        parser.edit(Edit::new(
265            8,
266            10,
267            12,
268            Position::new(8, 1, 9),
269            Position::new(10, 1, 11),
270            Position::new(12, 1, 13),
271        ));
272
273        // Reparse with incremental
274        let source2 = "my $x = 4242;";
275        let _ = must(parser.parse(source2));
276
277        // Should have reused some nodes
278        assert!(parser.reused_nodes > 0);
279        println!("Reused: {}, Reparsed: {}", parser.reused_nodes, parser.reparsed_nodes);
280    }
281}