ass_editor/core/
incremental.rs

1//! Incremental parsing integration with ass-core
2//!
3//! Provides efficient incremental parsing by leveraging Script::parse_partial()
4//! to achieve <1ms edit times and <5ms reparse times. Tracks deltas for proper
5//! undo/redo integration and maintains consistency with the rope structure.
6
7use super::{Range, Result};
8use crate::core::errors::EditorError;
9use ass_core::parser::{script::ScriptDeltaOwned, Script};
10
11#[cfg(feature = "std")]
12use std::borrow::Cow;
13
14#[cfg(not(feature = "std"))]
15use alloc::{borrow::Cow, format, string::String, string::ToString};
16
17#[cfg(not(feature = "std"))]
18use alloc::vec::Vec;
19
20/// Represents a change to the document with delta tracking
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct DocumentChange<'a> {
23    /// The range that was affected
24    pub range: Range,
25
26    /// The new text that replaced the range
27    pub new_text: Cow<'a, str>,
28
29    /// The old text that was replaced (for undo)
30    pub old_text: Cow<'a, str>,
31
32    /// Timestamp when change occurred (for grouping)
33    #[cfg(feature = "std")]
34    pub timestamp: std::time::Instant,
35
36    /// Change ID for tracking (monotonic counter)
37    pub change_id: u64,
38}
39
40/// Manages incremental parsing state and delta accumulation
41#[derive(Debug)]
42pub struct IncrementalParser {
43    /// Last successfully parsed script (cached)
44    cached_script: Option<String>,
45
46    /// Accumulated changes since last full parse (owned to persist)
47    pending_changes: Vec<DocumentChange<'static>>,
48
49    /// Next change ID
50    next_change_id: u64,
51
52    /// Threshold for triggering full reparse (in bytes changed)
53    reparse_threshold: usize,
54
55    /// Total bytes changed since last full parse
56    bytes_changed: usize,
57}
58
59impl Default for IncrementalParser {
60    fn default() -> Self {
61        Self::new()
62    }
63}
64
65impl IncrementalParser {
66    /// Create a new incremental parser
67    pub fn new() -> Self {
68        Self {
69            cached_script: None,
70            pending_changes: Vec::new(),
71            next_change_id: 1,
72            reparse_threshold: 10_000, // 10KB of changes triggers full reparse
73            bytes_changed: 0,
74        }
75    }
76
77    /// Set the reparse threshold in bytes
78    pub fn set_reparse_threshold(&mut self, threshold: usize) {
79        self.reparse_threshold = threshold;
80    }
81
82    /// Initialize the cache with the current document content
83    /// This primes the incremental parser for efficient subsequent edits
84    pub fn initialize_cache(&mut self, content: &str) {
85        self.cached_script = Some(content.to_string());
86        self.pending_changes.clear();
87        self.bytes_changed = 0;
88    }
89
90    /// Check if there's a cached script available
91    pub fn has_cached_script(&self) -> bool {
92        self.cached_script.is_some()
93    }
94
95    /// Execute a function with the cached script (avoids re-parsing)
96    pub fn with_cached_script<F, R>(&self, f: F) -> Result<R>
97    where
98        F: FnOnce(&Script) -> Result<R>,
99    {
100        let cached = self
101            .cached_script
102            .as_ref()
103            .ok_or_else(|| EditorError::command_failed("No cached script available"))?;
104        let script = Script::parse(cached).map_err(EditorError::from)?;
105        f(&script)
106    }
107
108    /// Apply a change incrementally, returning the delta
109    pub fn apply_change(
110        &mut self,
111        document_text: &str,
112        range: Range,
113        new_text: &str,
114    ) -> Result<ScriptDeltaOwned> {
115        // If we don't have a cached script or too many changes accumulated, do full parse
116        if self.cached_script.is_none() || self.bytes_changed >= self.reparse_threshold {
117            return self.full_reparse(document_text);
118        }
119
120        // Validate range
121        if range.end.offset > document_text.len() || range.start.offset > range.end.offset {
122            return Err(EditorError::InvalidRange {
123                start: range.start.offset,
124                end: range.end.offset,
125                length: document_text.len(),
126            });
127        }
128
129        // Check if we're already on valid UTF-8 boundaries
130        let start_is_valid = range.start.offset == 0
131            || range.start.offset == document_text.len()
132            || document_text.is_char_boundary(range.start.offset);
133        let end_is_valid = range.end.offset == 0
134            || range.end.offset == document_text.len()
135            || document_text.is_char_boundary(range.end.offset);
136
137        if !start_is_valid || !end_is_valid {
138            // The range is not on valid UTF-8 boundaries - this is an error
139            // We should not silently adjust the range as it will cause undo/redo issues
140            return Err(EditorError::command_failed(
141                "Edit range is not on valid UTF-8 character boundaries",
142            ));
143        }
144
145        // Get the old text being replaced
146        let old_text = &document_text[range.start.offset..range.end.offset];
147        let (start_byte, end_byte) = (range.start.offset, range.end.offset);
148
149        // Track the change (convert to owned for storage)
150        let change = DocumentChange {
151            range,
152            new_text: Cow::Owned(new_text.to_string()),
153            old_text: Cow::Owned(old_text.to_string()),
154            #[cfg(feature = "std")]
155            timestamp: std::time::Instant::now(),
156            change_id: self.next_change_id,
157        };
158        self.next_change_id += 1;
159
160        // Update bytes changed counter
161        let change_size = new_text.len().abs_diff(old_text.len());
162        self.bytes_changed += change_size;
163
164        // Store the change for potential rollback
165        self.pending_changes.push(change);
166
167        // Convert editor Range to std::ops::Range for parse_partial
168        // Use the corrected boundaries from old_text extraction
169        let byte_range = start_byte..end_byte;
170
171        // Parse the cached script first to get a Script instance
172        let cached = self.cached_script.as_ref().ok_or_else(|| {
173            EditorError::command_failed("Cached script unavailable for incremental parsing")
174        })?;
175        let script = Script::parse(cached).map_err(EditorError::from)?;
176
177        // Apply incremental parsing
178        match script.parse_partial(byte_range, new_text) {
179            Ok(delta) => {
180                // Update cached script with the change
181                self.update_cached_script(range, new_text)?;
182                Ok(delta)
183            }
184            Err(_e) => {
185                // Fall back to full reparse on error
186                self.pending_changes.pop(); // Remove failed change
187                self.bytes_changed -= change_size;
188
189                // Log the error for debugging
190                #[cfg(feature = "std")]
191                eprintln!("Incremental parse failed, falling back to full parse: {_e}");
192
193                self.full_reparse(document_text)
194            }
195        }
196    }
197
198    /// Force a full reparse of the document
199    pub fn full_reparse(&mut self, content: &str) -> Result<ScriptDeltaOwned> {
200        // Parse the entire document
201        let new_script = Script::parse(content).map_err(EditorError::from)?;
202
203        // If we had a previous script, calculate delta
204        let delta = if let Some(cached_content) = &self.cached_script {
205            let old_script = Script::parse(cached_content).map_err(EditorError::from)?;
206
207            // Calculate sections that changed
208            let delta = ass_core::parser::calculate_delta(&old_script, &new_script);
209
210            // Convert to owned format
211            let mut owned_delta = ScriptDeltaOwned {
212                added: Vec::new(),
213                modified: Vec::new(),
214                removed: Vec::new(),
215                new_issues: new_script.issues().to_vec(),
216            };
217
218            // Convert added sections
219            for section in delta.added {
220                owned_delta.added.push(format!("{section:?}"));
221            }
222
223            // Convert modified sections
224            for (idx, section) in delta.modified {
225                owned_delta.modified.push((idx, format!("{section:?}")));
226            }
227
228            // Copy removed indices
229            owned_delta.removed = delta.removed;
230
231            owned_delta
232        } else {
233            // First parse - everything is "added"
234            ScriptDeltaOwned {
235                added: new_script
236                    .sections()
237                    .iter()
238                    .map(|s| format!("{s:?}"))
239                    .collect(),
240                modified: Vec::new(),
241                removed: Vec::new(),
242                new_issues: new_script.issues().to_vec(),
243            }
244        };
245
246        // Update cache
247        self.cached_script = Some(content.to_string());
248        self.pending_changes.clear();
249        self.bytes_changed = 0;
250
251        Ok(delta)
252    }
253
254    /// Clear all cached state
255    pub fn clear_cache(&mut self) {
256        self.cached_script = None;
257        self.pending_changes.clear();
258        self.bytes_changed = 0;
259        self.next_change_id = 1;
260    }
261
262    /// Get accumulated changes since last full parse
263    pub fn pending_changes(&self) -> &[DocumentChange<'static>] {
264        &self.pending_changes
265    }
266
267    /// Check if a full reparse is recommended
268    pub fn should_reparse(&self) -> bool {
269        self.bytes_changed >= self.reparse_threshold || self.pending_changes.len() > 50
270    }
271
272    /// Update the cached script content with a change
273    fn update_cached_script(&mut self, range: Range, new_text: &str) -> Result<()> {
274        if let Some(cached) = &mut self.cached_script {
275            // Validate boundaries
276            if range.start.offset > cached.len() || range.end.offset > cached.len() {
277                return Err(EditorError::InvalidRange {
278                    start: range.start.offset,
279                    end: range.end.offset,
280                    length: cached.len(),
281                });
282            }
283
284            // Ensure we're on valid UTF-8 boundaries
285            if !cached.is_char_boundary(range.start.offset)
286                || !cached.is_char_boundary(range.end.offset)
287            {
288                return Err(EditorError::command_failed(
289                    "Cache update range is not on valid UTF-8 character boundaries",
290                ));
291            }
292
293            // Build the new content
294            let mut result = String::with_capacity(
295                cached.len() - (range.end.offset - range.start.offset) + new_text.len(),
296            );
297
298            // Copy content before the change
299            result.push_str(&cached[..range.start.offset]);
300
301            // Insert new text
302            result.push_str(new_text);
303
304            // Copy content after the change
305            if range.end.offset < cached.len() {
306                result.push_str(&cached[range.end.offset..]);
307            }
308
309            *cached = result;
310        }
311
312        Ok(())
313    }
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319    use crate::core::Position;
320    #[cfg(not(feature = "std"))]
321    use alloc::string::ToString;
322    #[cfg(not(feature = "std"))]
323    #[test]
324    fn test_incremental_parser_creation() {
325        let parser = IncrementalParser::new();
326        assert!(parser.cached_script.is_none());
327        assert!(parser.pending_changes.is_empty());
328        assert_eq!(parser.bytes_changed, 0);
329    }
330
331    #[test]
332    fn test_document_change_tracking() {
333        let change = DocumentChange {
334            range: Range::new(Position::new(0), Position::new(5)),
335            new_text: Cow::Borrowed("Hello"),
336            old_text: Cow::Borrowed("World"),
337            #[cfg(feature = "std")]
338            timestamp: std::time::Instant::now(),
339            change_id: 1,
340        };
341
342        assert_eq!(change.new_text, "Hello");
343        assert_eq!(change.old_text, "World");
344        assert_eq!(change.change_id, 1);
345    }
346
347    #[test]
348    fn test_should_reparse_threshold() {
349        let mut parser = IncrementalParser::new();
350        parser.set_reparse_threshold(100);
351
352        assert!(!parser.should_reparse());
353
354        parser.bytes_changed = 101;
355        assert!(parser.should_reparse());
356    }
357
358    #[test]
359    fn test_clear_cache() {
360        let mut parser = IncrementalParser::new();
361        parser.cached_script = Some("test".to_string());
362        parser.bytes_changed = 100;
363        parser.next_change_id = 5;
364
365        parser.clear_cache();
366
367        assert!(parser.cached_script.is_none());
368        assert_eq!(parser.bytes_changed, 0);
369        assert_eq!(parser.next_change_id, 1);
370    }
371
372    #[test]
373    fn test_error_recovery() {
374        let mut parser = IncrementalParser::new();
375
376        // Test full reparse on first use (no cached script)
377        let content = "[Script Info]\nTitle: Test";
378        let result = parser.apply_change(
379            content,
380            Range::new(Position::new(0), Position::new(5)),
381            "New",
382        );
383        assert!(result.is_ok());
384        assert!(parser.cached_script.is_some());
385
386        // Test threshold-based full reparse
387        parser.set_reparse_threshold(10);
388        parser.bytes_changed = 11;
389        let result = parser.apply_change(
390            content,
391            Range::new(Position::new(0), Position::new(5)),
392            "Changed",
393        );
394        assert!(result.is_ok());
395        assert_eq!(parser.bytes_changed, 0); // Reset after full reparse
396    }
397}