Skip to main content

perl_lexer/
checkpoint.rs

1//! Lexer checkpointing for incremental parsing
2//!
3//! This module provides checkpointing functionality for the Perl lexer,
4//! allowing it to save and restore its state for incremental parsing.
5
6use crate::{LexerMode, Position};
7use std::fmt;
8
9/// A checkpoint that captures the complete lexer state
10#[derive(Debug, Clone, PartialEq)]
11pub struct LexerCheckpoint {
12    /// Current position in the input
13    pub position: usize,
14    /// Current lexer mode (`ExpectTerm`, `ExpectOperator`, etc.)
15    pub mode: LexerMode,
16    /// Stack for nested delimiters in s{}{} constructs
17    pub delimiter_stack: Vec<char>,
18    /// Whether we're inside prototype parens after 'sub'
19    pub in_prototype: bool,
20    /// Paren depth to track when we exit prototype
21    pub prototype_depth: usize,
22    /// Current position with line/column tracking
23    pub current_pos: Position,
24    /// Additional context for complex states
25    pub context: CheckpointContext,
26}
27
28/// Additional context that may be needed for certain lexer states
29#[derive(Debug, Clone, PartialEq)]
30pub enum CheckpointContext {
31    /// Normal lexing
32    Normal,
33    /// Inside a heredoc (tracks the terminator)
34    Heredoc { terminator: String, is_interpolated: bool },
35    /// Inside a format body
36    Format { start_position: usize },
37    /// Inside a regex or substitution
38    Regex { delimiter: char, flags_position: Option<usize> },
39    /// Inside a quote-like operator
40    QuoteLike { operator: String, delimiter: char, is_paired: bool },
41}
42
43impl LexerCheckpoint {
44    /// Create a new checkpoint with default values
45    pub fn new() -> Self {
46        Self {
47            position: 0,
48            mode: LexerMode::ExpectTerm,
49            delimiter_stack: Vec::new(),
50            in_prototype: false,
51            prototype_depth: 0,
52            current_pos: Position::start(),
53            context: CheckpointContext::Normal,
54        }
55    }
56
57    /// Create a checkpoint at a specific position
58    pub fn at_position(position: usize) -> Self {
59        Self { position, ..Self::new() }
60    }
61
62    /// Check if this checkpoint is at the start of input
63    pub fn is_at_start(&self) -> bool {
64        self.position == 0
65    }
66
67    /// Calculate the difference between two checkpoints
68    pub fn diff(&self, other: &Self) -> CheckpointDiff {
69        CheckpointDiff {
70            position_delta: self.position as isize - other.position as isize,
71            mode_changed: self.mode != other.mode,
72            delimiter_stack_changed: self.delimiter_stack != other.delimiter_stack,
73            prototype_state_changed: self.in_prototype != other.in_prototype
74                || self.prototype_depth != other.prototype_depth,
75            context_changed: self.context != other.context,
76        }
77    }
78
79    /// Apply an edit to this checkpoint
80    pub fn apply_edit(&mut self, start: usize, old_len: usize, new_len: usize) {
81        if self.position > start {
82            if self.position >= start + old_len {
83                // Checkpoint is after the edit
84                self.position = self.position - old_len + new_len;
85            } else {
86                // Checkpoint is inside the edit - invalidate
87                self.position = start;
88                self.mode = LexerMode::ExpectTerm;
89                self.delimiter_stack.clear();
90                self.in_prototype = false;
91                self.prototype_depth = 0;
92                self.context = CheckpointContext::Normal;
93            }
94        }
95
96        // Update position tracking
97        // In a real implementation, we'd update line/column based on the edit
98    }
99
100    /// Validate that this checkpoint is valid for the given input
101    pub fn is_valid_for(&self, input: &str) -> bool {
102        self.position <= input.len()
103    }
104}
105
106impl Default for LexerCheckpoint {
107    fn default() -> Self {
108        Self::new()
109    }
110}
111
112impl fmt::Display for LexerCheckpoint {
113    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114        write!(
115            f,
116            "Checkpoint@{} mode={:?} delims={} proto={}",
117            self.position,
118            self.mode,
119            self.delimiter_stack.len(),
120            self.in_prototype
121        )
122    }
123}
124
125/// Represents the difference between two checkpoints
126#[derive(Debug)]
127pub struct CheckpointDiff {
128    pub position_delta: isize,
129    pub mode_changed: bool,
130    pub delimiter_stack_changed: bool,
131    pub prototype_state_changed: bool,
132    pub context_changed: bool,
133}
134
135impl CheckpointDiff {
136    /// Check if any state changed besides position
137    pub fn has_state_changes(&self) -> bool {
138        self.mode_changed
139            || self.delimiter_stack_changed
140            || self.prototype_state_changed
141            || self.context_changed
142    }
143}
144
145/// Trait for types that support checkpointing
146pub trait Checkpointable {
147    /// Create a checkpoint of the current state
148    fn checkpoint(&self) -> LexerCheckpoint;
149
150    /// Restore state from a checkpoint
151    fn restore(&mut self, checkpoint: &LexerCheckpoint);
152
153    /// Check if we can restore to a given checkpoint
154    fn can_restore(&self, checkpoint: &LexerCheckpoint) -> bool;
155}
156
157/// A checkpoint cache for efficient incremental parsing
158pub struct CheckpointCache {
159    /// Cached checkpoints at various positions
160    checkpoints: Vec<(usize, LexerCheckpoint)>,
161    /// Maximum number of checkpoints to cache
162    max_checkpoints: usize,
163}
164
165impl CheckpointCache {
166    /// Create a new checkpoint cache
167    pub fn new(max_checkpoints: usize) -> Self {
168        Self { checkpoints: Vec::new(), max_checkpoints }
169    }
170
171    /// Add a checkpoint to the cache
172    pub fn add(&mut self, checkpoint: LexerCheckpoint) {
173        let position = checkpoint.position;
174
175        // Remove any existing checkpoint at this position
176        self.checkpoints.retain(|(pos, _)| *pos != position);
177
178        // Add the new checkpoint
179        self.checkpoints.push((position, checkpoint));
180
181        // Sort by position
182        self.checkpoints.sort_by_key(|(pos, _)| *pos);
183
184        // Trim to max size
185        if self.checkpoints.len() > self.max_checkpoints {
186            // Keep checkpoints evenly distributed
187            let total = self.checkpoints.len();
188            let step = total as f64 / self.max_checkpoints as f64;
189            let mut kept = Vec::new();
190            for i in 0..self.max_checkpoints {
191                let idx = (i as f64 * step) as usize;
192                if idx < total {
193                    kept.push(self.checkpoints[idx].clone());
194                }
195            }
196            self.checkpoints = kept;
197        }
198    }
199
200    /// Find the nearest checkpoint before a given position
201    pub fn find_before(&self, position: usize) -> Option<&LexerCheckpoint> {
202        self.checkpoints.iter().rev().find(|(pos, _)| *pos <= position).map(|(_, cp)| cp)
203    }
204
205    /// Clear all cached checkpoints
206    pub fn clear(&mut self) {
207        self.checkpoints.clear();
208    }
209
210    /// Apply an edit to all cached checkpoints
211    pub fn apply_edit(&mut self, start: usize, old_len: usize, new_len: usize) {
212        // Update all checkpoints
213        for (pos, checkpoint) in &mut self.checkpoints {
214            checkpoint.apply_edit(start, old_len, new_len);
215            *pos = checkpoint.position;
216        }
217
218        // Remove invalid checkpoints
219        self.checkpoints
220            .retain(|(_, cp)| !matches!(cp.context, CheckpointContext::Normal) || cp.position > 0);
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227
228    #[test]
229    fn test_checkpoint_creation() {
230        let cp = LexerCheckpoint::new();
231        assert_eq!(cp.position, 0);
232        assert_eq!(cp.mode, LexerMode::ExpectTerm);
233        assert!(cp.delimiter_stack.is_empty());
234    }
235
236    #[test]
237    fn test_checkpoint_diff() {
238        let cp1 = LexerCheckpoint::at_position(10);
239        let mut cp2 = cp1.clone();
240        cp2.position = 20;
241        cp2.mode = LexerMode::ExpectOperator;
242
243        let diff = cp2.diff(&cp1);
244        assert_eq!(diff.position_delta, 10);
245        assert!(diff.mode_changed);
246        assert!(!diff.delimiter_stack_changed);
247    }
248
249    #[test]
250    fn test_checkpoint_edit() {
251        let mut cp = LexerCheckpoint::at_position(50);
252
253        // Edit before checkpoint
254        cp.apply_edit(10, 5, 10);
255        assert_eq!(cp.position, 55); // Shifted by +5
256
257        // Edit after checkpoint
258        let mut cp2 = LexerCheckpoint::at_position(50);
259        cp2.apply_edit(60, 10, 5);
260        assert_eq!(cp2.position, 50); // No change
261
262        // Edit containing checkpoint
263        let mut cp3 = LexerCheckpoint::at_position(50);
264        cp3.apply_edit(45, 10, 5);
265        assert_eq!(cp3.position, 45); // Reset to edit start
266    }
267
268    #[test]
269    fn test_checkpoint_cache() -> std::result::Result<(), Box<dyn std::error::Error>> {
270        let mut cache = CheckpointCache::new(3);
271
272        // Add checkpoints
273        cache.add(LexerCheckpoint::at_position(10));
274        cache.add(LexerCheckpoint::at_position(20));
275        cache.add(LexerCheckpoint::at_position(30));
276        cache.add(LexerCheckpoint::at_position(40));
277
278        // Should keep 3 evenly distributed
279        assert_eq!(cache.checkpoints.len(), 3);
280
281        // Find nearest before position 25
282        let cp = cache.find_before(25).ok_or("Expected checkpoint before position 25")?;
283        assert_eq!(cp.position, 20);
284        Ok(())
285    }
286}