sqry_core/ast/
incremental_parse.rs

1//! Incremental parsing support for tree-sitter.
2//!
3//! This module provides utilities for incremental AST parsing using tree-sitter's
4//! `InputEdit` API. When a file changes, instead of re-parsing from scratch,
5//! tree-sitter can reuse unchanged portions of the old parse tree.
6//!
7//! # Architecture
8//!
9//! - `InputEditCalculator`: Calculates byte/line/column edits between file versions
10//! - `TreeCache`: LRU cache storing recently parsed trees for reuse
11//! - `IncrementalParser`: High-level wrapper coordinating incremental parsing
12//!
13//! # Performance
14//!
15//! Incremental parsing typically provides 5-10x speedup for small edits compared
16//! to full re-parsing, as tree-sitter reuses unchanged AST subtrees.
17
18use lru::LruCache;
19use std::num::NonZeroUsize;
20use std::path::{Path, PathBuf};
21use std::sync::{Mutex, MutexGuard};
22use tree_sitter::{InputEdit, Point, Tree};
23
24/// Calculates `InputEdit` describing changes between two file versions.
25///
26/// The calculator finds the common prefix and suffix between old and new content,
27/// then computes the byte offsets and line/column positions for the changed region.
28///
29/// # Algorithm
30///
31/// 1. Find longest common prefix → start of edit
32/// 2. Find longest common suffix → end of edit
33/// 3. Convert byte offsets to (line, column) via `byte_to_point()`
34/// 4. Build `InputEdit` with old/new positions
35///
36/// # Example
37///
38/// ```ignore
39/// let old = b"fn foo() {}\n";
40/// let new = b"fn foo() { return 42; }\n";
41/// let edit = InputEditCalculator::calculate(old, new);
42/// // edit describes: inserted " return 42;" at position (0, 10)
43/// ```
44pub struct InputEditCalculator;
45
46impl InputEditCalculator {
47    /// Calculate the `InputEdit` between old and new file content.
48    ///
49    /// Returns an `InputEdit` that can be passed to `tree.edit()` to inform
50    /// tree-sitter about the changes, enabling incremental re-parsing.
51    ///
52    /// # Edge Cases
53    ///
54    /// - Empty old → full insert
55    /// - Empty new → full delete
56    /// - Identical content → zero-width edit at start
57    #[must_use]
58    pub fn calculate(old_content: &[u8], new_content: &[u8]) -> InputEdit {
59        // Handle empty cases
60        if old_content.is_empty() && new_content.is_empty() {
61            return InputEdit {
62                start_byte: 0,
63                old_end_byte: 0,
64                new_end_byte: 0,
65                start_position: Point { row: 0, column: 0 },
66                old_end_position: Point { row: 0, column: 0 },
67                new_end_position: Point { row: 0, column: 0 },
68            };
69        }
70
71        if old_content.is_empty() {
72            // Full insert
73            let new_end_position = Self::byte_to_point(new_content, new_content.len());
74            return InputEdit {
75                start_byte: 0,
76                old_end_byte: 0,
77                new_end_byte: new_content.len(),
78                start_position: Point { row: 0, column: 0 },
79                old_end_position: Point { row: 0, column: 0 },
80                new_end_position,
81            };
82        }
83
84        if new_content.is_empty() {
85            // Full delete
86            let old_end_position = Self::byte_to_point(old_content, old_content.len());
87            return InputEdit {
88                start_byte: 0,
89                old_end_byte: old_content.len(),
90                new_end_byte: 0,
91                start_position: Point { row: 0, column: 0 },
92                old_end_position,
93                new_end_position: Point { row: 0, column: 0 },
94            };
95        }
96
97        // Find common prefix
98        let prefix_len = Self::find_common_prefix(old_content, new_content);
99
100        // Find common suffix (in the remaining content after prefix)
101        let old_remaining = &old_content[prefix_len..];
102        let new_remaining = &new_content[prefix_len..];
103        let suffix_len = Self::find_common_suffix(old_remaining, new_remaining);
104
105        // Calculate byte positions
106        let start_byte = prefix_len;
107        let old_end_byte = prefix_len + old_remaining.len() - suffix_len;
108        let new_end_byte = prefix_len + new_remaining.len() - suffix_len;
109
110        // Convert to (line, column) positions
111        let start_position = Self::byte_to_point(old_content, start_byte);
112        let old_end_position = Self::byte_to_point(old_content, old_end_byte);
113        let new_end_position = Self::byte_to_point(new_content, new_end_byte);
114
115        InputEdit {
116            start_byte,
117            old_end_byte,
118            new_end_byte,
119            start_position,
120            old_end_position,
121            new_end_position,
122        }
123    }
124
125    /// Find the length of the common prefix between two byte slices.
126    fn find_common_prefix(a: &[u8], b: &[u8]) -> usize {
127        a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count()
128    }
129
130    /// Find the length of the common suffix between two byte slices.
131    fn find_common_suffix(a: &[u8], b: &[u8]) -> usize {
132        a.iter()
133            .rev()
134            .zip(b.iter().rev())
135            .take_while(|(x, y)| x == y)
136            .count()
137    }
138
139    /// Convert a byte offset to a (line, column) position.
140    ///
141    /// Lines are 0-indexed. Columns count UTF-8 bytes (not characters).
142    /// Newlines are '\n' (LF) - '\r' is treated as regular character.
143    fn byte_to_point(content: &[u8], offset: usize) -> Point {
144        let mut row = 0;
145        let mut column = 0;
146
147        for (i, &byte) in content.iter().enumerate() {
148            if i >= offset {
149                break;
150            }
151            if byte == b'\n' {
152                row += 1;
153                column = 0;
154            } else {
155                column += 1;
156            }
157        }
158
159        Point { row, column }
160    }
161}
162
163/// Cached parse tree with metadata.
164///
165/// Stores a tree-sitter `Tree` along with the content hash it was parsed from.
166/// This allows validating cache hits and detecting when re-parsing is needed.
167#[derive(Debug)]
168struct CachedTree {
169    /// The parsed tree
170    tree: Tree,
171    /// Hash of the content this tree was parsed from
172    hash: u64,
173}
174
175impl CachedTree {
176    fn new(tree: Tree, hash: u64) -> Self {
177        Self { tree, hash }
178    }
179}
180
181/// LRU cache for parsed trees to enable incremental parsing.
182///
183/// Stores recently parsed trees so that when a file changes, we can:
184/// 1. Retrieve the old tree from cache
185/// 2. Calculate the `InputEdit` describing the change
186/// 3. Call `tree.edit(InputEdit)` to inform tree-sitter
187/// 4. Re-parse with the edited tree (tree-sitter reuses unchanged subtrees)
188///
189/// # Capacity
190///
191/// Default capacity is 100 trees. Assuming ~10-50KB per tree, this uses ~1-5MB memory.
192/// Oldest trees are evicted when capacity is exceeded (LRU policy).
193///
194/// # Thread Safety
195///
196/// The cache is wrapped in a `Mutex` for thread-safe access. Lock contention should
197/// be minimal as operations are fast (hash lookups, no disk I/O).
198///
199/// # Example
200///
201/// ```ignore
202/// let cache = TreeCache::with_default_capacity();
203/// cache.insert(&path, tree, hash);
204/// if let Some((old_tree, old_hash)) = cache.get(&path) {
205///     // Use old_tree for incremental parsing
206/// }
207/// ```
208pub struct TreeCache {
209    cache: Mutex<LruCache<PathBuf, CachedTree>>,
210}
211
212impl TreeCache {
213    /// Default cache capacity (number of trees).
214    pub const DEFAULT_CAPACITY: usize = 100;
215
216    /// Create a new tree cache with the specified capacity.
217    ///
218    /// # Arguments
219    ///
220    /// * `capacity` - Maximum number of trees to cache (must be > 0)
221    ///
222    /// # Panics
223    ///
224    /// Panics if capacity is 0.
225    #[must_use]
226    pub fn new(capacity: usize) -> Self {
227        let capacity = NonZeroUsize::new(capacity).expect("capacity must be > 0");
228        Self {
229            cache: Mutex::new(LruCache::new(capacity)),
230        }
231    }
232
233    fn lock_cache(&self) -> MutexGuard<'_, LruCache<PathBuf, CachedTree>> {
234        self.cache
235            .lock()
236            .unwrap_or_else(std::sync::PoisonError::into_inner)
237    }
238
239    /// Create a new tree cache with default capacity (100 trees).
240    #[must_use]
241    pub fn with_default_capacity() -> Self {
242        Self::new(Self::DEFAULT_CAPACITY)
243    }
244
245    /// Insert a tree into the cache.
246    ///
247    /// # Arguments
248    ///
249    /// * `path` - Absolute path to the file
250    /// * `tree` - The parsed tree
251    /// * `hash` - Hash of the content the tree was parsed from
252    ///
253    /// If the cache is at capacity, the least recently used tree is evicted.
254    pub fn insert(&self, path: &Path, tree: Tree, hash: u64) {
255        let mut cache = self.lock_cache();
256        cache.put(path.to_path_buf(), CachedTree::new(tree, hash));
257    }
258
259    /// Retrieve a tree from the cache.
260    ///
261    /// # Arguments
262    ///
263    /// * `path` - Absolute path to the file
264    ///
265    /// # Returns
266    ///
267    /// `Some((tree, hash))` if the tree is cached, `None` otherwise.
268    /// The returned tree is cloned (tree-sitter trees are cheap to clone).
269    ///
270    /// # Note
271    ///
272    /// Accessing a tree marks it as recently used (LRU update).
273    pub fn get(&self, path: &Path) -> Option<(Tree, u64)> {
274        let mut cache = self.lock_cache();
275        cache
276            .get(path)
277            .map(|cached| (cached.tree.clone(), cached.hash))
278    }
279
280    /// Remove a tree from the cache.
281    ///
282    /// # Arguments
283    ///
284    /// * `path` - Absolute path to the file
285    ///
286    /// # Returns
287    ///
288    /// `true` if the tree was present and removed, `false` if not found.
289    pub fn remove(&self, path: &Path) -> bool {
290        let mut cache = self.lock_cache();
291        cache.pop(path).is_some()
292    }
293
294    /// Clear all trees from the cache.
295    pub fn clear(&self) {
296        let mut cache = self.lock_cache();
297        cache.clear();
298    }
299
300    /// Get the current number of cached trees.
301    pub fn len(&self) -> usize {
302        let cache = self.lock_cache();
303        cache.len()
304    }
305
306    /// Check if the cache is empty.
307    pub fn is_empty(&self) -> bool {
308        self.len() == 0
309    }
310}
311
312/// High-level incremental parser coordinating tree caching and re-parsing.
313///
314/// Combines `TreeCache`, `InputEditCalculator`, and tree-sitter's incremental
315/// parsing to minimize re-parsing overhead when files change.
316///
317/// # Workflow
318///
319/// 1. Check if old tree is cached for the file
320/// 2. If cached and content changed:
321///    - Calculate `InputEdit` describing changes
322///    - Call `tree.edit(InputEdit)` to inform tree-sitter
323///    - Re-parse with the edited tree (tree-sitter reuses unchanged subtrees)
324/// 3. If not cached or edit fails:
325///    - Fall back to full parse
326/// 4. Cache the resulting tree for future incremental parses
327///
328/// # Performance
329///
330/// - **Cache hit + small edit**: 5-10x faster than full parse
331/// - **Cache miss**: Same as full parse (no overhead)
332/// - **Incremental parse failure**: Automatic fallback to full parse
333///
334/// # Thread Safety
335///
336/// The internal `TreeCache` uses a `Mutex`, so `IncrementalParser` is `Send + Sync`.
337/// Multiple threads can share the same parser instance safely.
338///
339/// # Example
340///
341/// ```ignore
342/// use sqry_core::ast::IncrementalParser;
343///
344/// let parser = IncrementalParser::with_default_capacity();
345///
346/// // First parse (cache miss)
347/// let tree1 = parser.parse(&plugin, &path, &content1, None)?;
348///
349/// // Second parse after edit (cache hit, incremental)
350/// let tree2 = parser.parse(&plugin, &path, &content2, Some(&content1))?;
351/// // ↑ 5-10x faster than full re-parse
352/// ```
353pub struct IncrementalParser {
354    cache: TreeCache,
355}
356
357impl IncrementalParser {
358    /// Create a new incremental parser with the specified cache capacity.
359    ///
360    /// # Arguments
361    ///
362    /// * `capacity` - Maximum number of trees to cache (must be > 0)
363    ///
364    /// # Panics
365    ///
366    /// Panics if capacity is 0.
367    #[must_use]
368    pub fn new(capacity: usize) -> Self {
369        Self {
370            cache: TreeCache::new(capacity),
371        }
372    }
373
374    /// Create a new incremental parser with default cache capacity (100 trees).
375    #[must_use]
376    pub fn with_default_capacity() -> Self {
377        Self {
378            cache: TreeCache::with_default_capacity(),
379        }
380    }
381
382    /// Parse content using incremental parsing when possible.
383    ///
384    /// # Arguments
385    ///
386    /// * `plugin` - Language plugin providing `parse_ast()` method
387    /// * `path` - Absolute path to the file (used as cache key)
388    /// * `new_content` - Current file content
389    /// * `old_content` - Previous file content (optional, for `InputEdit` calculation)
390    ///
391    /// # Returns
392    ///
393    /// Parsed tree-sitter `Tree`. The tree is also cached for future incremental parses.
394    ///
395    /// # Incremental Parsing
396    ///
397    /// Incremental parsing is used when:
398    /// 1. Old tree is cached for this path
399    /// 2. `old_content` is provided
400    /// 3. Content actually changed (different hash)
401    ///
402    /// Otherwise falls back to full parse (no performance penalty).
403    ///
404    /// # Errors
405    ///
406    /// Returns error if parsing fails (both incremental and fallback).
407    pub fn parse<P>(
408        &self,
409        plugin: &P,
410        path: &Path,
411        new_content: &[u8],
412        old_content: Option<&[u8]>,
413    ) -> Result<Tree, crate::plugin::error::ParseError>
414    where
415        P: crate::plugin::LanguagePlugin + ?Sized,
416    {
417        // Calculate hash of new content
418        let new_hash = Self::hash_content(new_content);
419
420        // Try incremental parse if we have old tree and old content
421        if let (Some(old_content), Some((old_tree, old_hash))) = (old_content, self.cache.get(path))
422        {
423            // Only do incremental parse if content actually changed
424            if new_hash == old_hash {
425                // Content unchanged - return cached tree
426                return Ok(old_tree);
427            }
428            match Self::parse_incremental(plugin, &old_tree, old_content, new_content) {
429                Ok(tree) => {
430                    // Incremental parse succeeded - cache and return
431                    self.cache.insert(path, tree.clone(), new_hash);
432                    return Ok(tree);
433                }
434                Err(_) => {
435                    // Incremental parse failed - fall through to full parse
436                    log::debug!(
437                        "Incremental parse failed for {}, falling back to full parse",
438                        path.display()
439                    );
440                }
441            }
442        }
443
444        // Fall back to full parse (cache miss or incremental failed)
445        let tree = plugin.parse_ast(new_content)?;
446        self.cache.insert(path, tree.clone(), new_hash);
447        Ok(tree)
448    }
449
450    /// Perform incremental parse using tree-sitter's `InputEdit` API.
451    ///
452    /// # Arguments
453    ///
454    /// * `plugin` - Language plugin providing `parse_ast()` method
455    /// * `old_tree` - Previously parsed tree
456    /// * `old_content` - Previous file content
457    /// * `new_content` - Current file content
458    ///
459    /// # Returns
460    ///
461    /// New tree if incremental parse succeeds, error otherwise.
462    ///
463    /// # Algorithm
464    ///
465    /// 1. Calculate `InputEdit` describing changes between old/new content
466    /// 2. Clone old tree and call `tree.edit(InputEdit)`
467    /// 3. Re-parse with edited tree as hint to tree-sitter
468    /// 4. Tree-sitter reuses unchanged AST subtrees internally
469    fn parse_incremental<P>(
470        plugin: &P,
471        old_tree: &Tree,
472        old_content: &[u8],
473        new_content: &[u8],
474    ) -> Result<Tree, crate::plugin::error::ParseError>
475    where
476        P: crate::plugin::LanguagePlugin + ?Sized,
477    {
478        // Calculate edit describing changes
479        let edit = InputEditCalculator::calculate(old_content, new_content);
480
481        // Clone tree and apply edit
482        let mut edited_tree = old_tree.clone();
483        edited_tree.edit(&edit);
484
485        // Re-parse with edited tree as hint
486        // Note: We need to use tree-sitter Parser directly here
487        let mut parser = tree_sitter::Parser::new();
488        parser
489            .set_language(&plugin.language())
490            .map_err(|e| crate::plugin::error::ParseError::LanguageSetFailed(e.to_string()))?;
491
492        parser
493            .parse(new_content, Some(&edited_tree))
494            .ok_or(crate::plugin::error::ParseError::TreeSitterFailed)
495    }
496
497    /// Calculate `XXHash3` hash of content.
498    fn hash_content(content: &[u8]) -> u64 {
499        use std::hash::{Hash, Hasher};
500        let mut hasher = std::collections::hash_map::DefaultHasher::new();
501        content.hash(&mut hasher);
502        hasher.finish()
503    }
504
505    /// Clear the tree cache.
506    pub fn clear_cache(&self) {
507        self.cache.clear();
508    }
509
510    /// Get the number of cached trees.
511    pub fn cache_len(&self) -> usize {
512        self.cache.len()
513    }
514}
515
516#[cfg(test)]
517mod tests {
518    use super::*;
519
520    #[test]
521    fn test_calculate_single_line_edit() {
522        let old = b"fn foo() {}";
523        let new = b"fn bar() {}";
524        let edit = InputEditCalculator::calculate(old, new);
525
526        assert_eq!(edit.start_byte, 3); // After "fn "
527        assert_eq!(edit.old_end_byte, 6); // After "foo"
528        assert_eq!(edit.new_end_byte, 6); // After "bar"
529        assert_eq!(edit.start_position, Point { row: 0, column: 3 });
530        assert_eq!(edit.old_end_position, Point { row: 0, column: 6 });
531        assert_eq!(edit.new_end_position, Point { row: 0, column: 6 });
532    }
533
534    #[test]
535    fn test_calculate_multiline_insert() {
536        let old = b"line1\nline3\n";
537        let new = b"line1\nline2\nline3\n";
538        let edit = InputEditCalculator::calculate(old, new);
539
540        // Common prefix: "line1\nline" (10 bytes) - stops at '3' vs '2'
541        // Remaining: old="3\n" (2), new="2\nline3\n" (8), suffix="3\n" reversed (2)
542        assert_eq!(edit.start_byte, 10); // After "line1\nline"
543        assert_eq!(edit.old_end_byte, 10); // Zero-width in old (entire "3\n" is suffix)
544        assert_eq!(edit.new_end_byte, 16); // After "line1\nline2\nline"
545        assert_eq!(edit.start_position, Point { row: 1, column: 4 });
546        assert_eq!(edit.old_end_position, Point { row: 1, column: 4 });
547        assert_eq!(edit.new_end_position, Point { row: 2, column: 4 });
548    }
549
550    #[test]
551    fn test_calculate_multiline_delete() {
552        let old = b"line1\nline2\nline3\n";
553        let new = b"line1\nline3\n";
554        let edit = InputEditCalculator::calculate(old, new);
555
556        // Common prefix: "line1\nline" (10 bytes) - stops at '2' vs '3'
557        // Remaining: old="2\nline3\n" (8), new="3\n" (2), suffix="3\n" reversed (2)
558        assert_eq!(edit.start_byte, 10); // After "line1\nline"
559        assert_eq!(edit.old_end_byte, 16); // After "line1\nline2\nline"
560        assert_eq!(edit.new_end_byte, 10); // Zero-width in new (entire "3\n" is suffix)
561        assert_eq!(edit.start_position, Point { row: 1, column: 4 });
562        assert_eq!(edit.old_end_position, Point { row: 2, column: 4 });
563        assert_eq!(edit.new_end_position, Point { row: 1, column: 4 });
564    }
565
566    #[test]
567    fn test_calculate_empty_to_content() {
568        let old = b"";
569        let new = b"hello\nworld\n";
570        let edit = InputEditCalculator::calculate(old, new);
571
572        assert_eq!(edit.start_byte, 0);
573        assert_eq!(edit.old_end_byte, 0);
574        assert_eq!(edit.new_end_byte, 12);
575        assert_eq!(edit.start_position, Point { row: 0, column: 0 });
576        assert_eq!(edit.old_end_position, Point { row: 0, column: 0 });
577        assert_eq!(edit.new_end_position, Point { row: 2, column: 0 });
578    }
579
580    #[test]
581    fn test_calculate_content_to_empty() {
582        let old = b"hello\nworld\n";
583        let new = b"";
584        let edit = InputEditCalculator::calculate(old, new);
585
586        assert_eq!(edit.start_byte, 0);
587        assert_eq!(edit.old_end_byte, 12);
588        assert_eq!(edit.new_end_byte, 0);
589        assert_eq!(edit.start_position, Point { row: 0, column: 0 });
590        assert_eq!(edit.old_end_position, Point { row: 2, column: 0 });
591        assert_eq!(edit.new_end_position, Point { row: 0, column: 0 });
592    }
593
594    #[test]
595    fn test_calculate_no_change() {
596        let old = b"unchanged";
597        let new = b"unchanged";
598        let edit = InputEditCalculator::calculate(old, new);
599
600        assert_eq!(edit.start_byte, 9); // End of content
601        assert_eq!(edit.old_end_byte, 9);
602        assert_eq!(edit.new_end_byte, 9);
603        assert_eq!(edit.start_position, Point { row: 0, column: 9 });
604        assert_eq!(edit.old_end_position, Point { row: 0, column: 9 });
605        assert_eq!(edit.new_end_position, Point { row: 0, column: 9 });
606    }
607
608    #[test]
609    fn test_byte_to_point_multiline() {
610        let content = b"line1\nline2\nline3\n";
611
612        assert_eq!(
613            InputEditCalculator::byte_to_point(content, 0),
614            Point { row: 0, column: 0 }
615        );
616        assert_eq!(
617            InputEditCalculator::byte_to_point(content, 5),
618            Point { row: 0, column: 5 }
619        );
620        assert_eq!(
621            InputEditCalculator::byte_to_point(content, 6),
622            Point { row: 1, column: 0 }
623        );
624        assert_eq!(
625            InputEditCalculator::byte_to_point(content, 12),
626            Point { row: 2, column: 0 }
627        );
628    }
629
630    #[test]
631    fn test_find_common_prefix() {
632        assert_eq!(InputEditCalculator::find_common_prefix(b"abc", b"abx"), 2);
633        assert_eq!(InputEditCalculator::find_common_prefix(b"abc", b"xyz"), 0);
634        assert_eq!(InputEditCalculator::find_common_prefix(b"abc", b"abc"), 3);
635        assert_eq!(InputEditCalculator::find_common_prefix(b"", b"abc"), 0);
636    }
637
638    #[test]
639    fn test_find_common_suffix() {
640        assert_eq!(InputEditCalculator::find_common_suffix(b"abc", b"xbc"), 2);
641        assert_eq!(InputEditCalculator::find_common_suffix(b"abc", b"xyz"), 0);
642        assert_eq!(InputEditCalculator::find_common_suffix(b"abc", b"abc"), 3);
643        assert_eq!(InputEditCalculator::find_common_suffix(b"", b"abc"), 0);
644    }
645
646    // TreeCache tests
647
648    fn create_dummy_tree() -> Tree {
649        use tree_sitter::Parser;
650        let mut parser = Parser::new();
651        parser
652            .set_language(&tree_sitter_rust::LANGUAGE.into())
653            .expect("set language");
654        parser.parse("fn main() {}", None).expect("parse")
655    }
656
657    #[test]
658    fn test_tree_cache_insert_and_get() {
659        let cache = TreeCache::with_default_capacity();
660        let path = PathBuf::from("/test/file.rs");
661        let tree = create_dummy_tree();
662        let hash = 0x1234_5678_90ab_cdef;
663
664        // Insert tree
665        cache.insert(&path, tree, hash);
666        assert_eq!(cache.len(), 1);
667        assert!(!cache.is_empty());
668
669        // Retrieve tree
670        let result = cache.get(&path);
671        assert!(result.is_some());
672        let (cached_tree, cached_hash) = result.unwrap();
673        assert_eq!(cached_hash, hash);
674        // Tree should be valid (has a root node)
675        assert!(!cached_tree.root_node().kind().is_empty());
676    }
677
678    #[test]
679    fn test_tree_cache_miss() {
680        let cache = TreeCache::with_default_capacity();
681        let path = PathBuf::from("/test/file.rs");
682
683        // Cache miss
684        assert!(cache.get(&path).is_none());
685        assert!(cache.is_empty());
686    }
687
688    #[test]
689    fn test_tree_cache_remove() {
690        let cache = TreeCache::with_default_capacity();
691        let path = PathBuf::from("/test/file.rs");
692        let tree = create_dummy_tree();
693
694        cache.insert(&path, tree, 0x123);
695        assert_eq!(cache.len(), 1);
696
697        // Remove
698        assert!(cache.remove(&path));
699        assert_eq!(cache.len(), 0);
700        assert!(cache.get(&path).is_none());
701
702        // Remove non-existent
703        assert!(!cache.remove(&path));
704    }
705
706    #[test]
707    fn test_tree_cache_clear() {
708        let cache = TreeCache::with_default_capacity();
709        let path1 = PathBuf::from("/test/file1.rs");
710        let path2 = PathBuf::from("/test/file2.rs");
711
712        cache.insert(&path1, create_dummy_tree(), 0x123);
713        cache.insert(&path2, create_dummy_tree(), 0x456);
714        assert_eq!(cache.len(), 2);
715
716        cache.clear();
717        assert_eq!(cache.len(), 0);
718        assert!(cache.is_empty());
719        assert!(cache.get(&path1).is_none());
720        assert!(cache.get(&path2).is_none());
721    }
722
723    #[test]
724    fn test_tree_cache_lru_eviction() {
725        let cache = TreeCache::new(2); // Small cache
726        let path1 = PathBuf::from("/test/file1.rs");
727        let path2 = PathBuf::from("/test/file2.rs");
728        let path3 = PathBuf::from("/test/file3.rs");
729
730        // Fill cache
731        cache.insert(&path1, create_dummy_tree(), 0x111);
732        cache.insert(&path2, create_dummy_tree(), 0x222);
733        assert_eq!(cache.len(), 2);
734
735        // Insert 3rd tree - should evict path1 (LRU)
736        cache.insert(&path3, create_dummy_tree(), 0x333);
737        assert_eq!(cache.len(), 2);
738        assert!(cache.get(&path1).is_none()); // Evicted
739        assert!(cache.get(&path2).is_some()); // Still present
740        assert!(cache.get(&path3).is_some()); // Newly added
741    }
742
743    #[test]
744    fn test_tree_cache_lru_access_updates() {
745        let cache = TreeCache::new(2); // Small cache
746        let path1 = PathBuf::from("/test/file1.rs");
747        let path2 = PathBuf::from("/test/file2.rs");
748        let path3 = PathBuf::from("/test/file3.rs");
749
750        // Fill cache
751        cache.insert(&path1, create_dummy_tree(), 0x111);
752        cache.insert(&path2, create_dummy_tree(), 0x222);
753
754        // Access path1 to mark it as recently used
755        assert!(cache.get(&path1).is_some());
756
757        // Insert path3 - should evict path2 (now LRU), not path1
758        cache.insert(&path3, create_dummy_tree(), 0x333);
759        assert_eq!(cache.len(), 2);
760        assert!(cache.get(&path1).is_some()); // Still present (was accessed)
761        assert!(cache.get(&path2).is_none()); // Evicted (LRU)
762        assert!(cache.get(&path3).is_some()); // Newly added
763    }
764
765    #[test]
766    fn test_tree_cache_thread_safety() {
767        use std::sync::Arc;
768        use std::thread;
769
770        let cache = Arc::new(TreeCache::with_default_capacity());
771        let mut handles = vec![];
772
773        // Spawn multiple threads inserting trees
774        for i in 0_u64..10 {
775            let cache_clone = Arc::clone(&cache);
776            let handle = thread::spawn(move || {
777                let path = PathBuf::from(format!("/test/file{i}.rs"));
778                cache_clone.insert(&path, create_dummy_tree(), i);
779            });
780            handles.push(handle);
781        }
782
783        // Wait for all threads
784        for handle in handles {
785            handle.join().unwrap();
786        }
787
788        // Verify all trees inserted
789        assert_eq!(cache.len(), 10);
790    }
791
792    // IncrementalParser tests
793
794    // Simple mock plugin for testing
795    struct MockPlugin;
796
797    impl crate::plugin::LanguagePlugin for MockPlugin {
798        fn metadata(&self) -> crate::plugin::LanguageMetadata {
799            crate::plugin::LanguageMetadata {
800                id: "mock",
801                name: "Mock",
802                version: "1.0.0",
803                author: "test",
804                description: "Mock plugin for testing",
805                tree_sitter_version: "0.24",
806            }
807        }
808
809        fn extensions(&self) -> &'static [&'static str] {
810            &["mock"]
811        }
812
813        fn language(&self) -> tree_sitter::Language {
814            tree_sitter_rust::LANGUAGE.into()
815        }
816
817        fn parse_ast(&self, content: &[u8]) -> Result<Tree, crate::plugin::error::ParseError> {
818            use tree_sitter::Parser;
819            let mut parser = Parser::new();
820            parser
821                .set_language(&self.language())
822                .map_err(|e| crate::plugin::error::ParseError::LanguageSetFailed(e.to_string()))?;
823            parser
824                .parse(content, None)
825                .ok_or(crate::plugin::error::ParseError::TreeSitterFailed)
826        }
827
828        fn extract_scopes(
829            &self,
830            _tree: &Tree,
831            _content: &[u8],
832            _file_path: &Path,
833        ) -> Result<Vec<crate::ast::Scope>, crate::plugin::error::ScopeError> {
834            Ok(vec![])
835        }
836    }
837
838    #[test]
839    fn test_incremental_parser_cache_miss() {
840        let parser = IncrementalParser::with_default_capacity();
841        let plugin = MockPlugin;
842        let path = PathBuf::from("/test/file.rs");
843        let content = b"fn foo() {}";
844
845        // First parse (cache miss)
846        let tree = parser.parse(&plugin, &path, content, None).unwrap();
847        assert!(!tree.root_node().kind().is_empty());
848        assert_eq!(parser.cache_len(), 1);
849    }
850
851    #[test]
852    fn test_incremental_parser_cache_hit_unchanged() {
853        let parser = IncrementalParser::with_default_capacity();
854        let plugin = MockPlugin;
855        let path = PathBuf::from("/test/file.rs");
856        let content = b"fn foo() {}";
857
858        // First parse
859        let tree1 = parser.parse(&plugin, &path, content, None).unwrap();
860
861        // Second parse with same content (cache hit, no change)
862        let tree2 = parser
863            .parse(&plugin, &path, content, Some(content))
864            .unwrap();
865
866        // Should return same tree (content unchanged)
867        assert_eq!(tree1.root_node().kind(), tree2.root_node().kind());
868        assert_eq!(parser.cache_len(), 1);
869    }
870
871    #[test]
872    fn test_incremental_parser_incremental_parse() {
873        let parser = IncrementalParser::with_default_capacity();
874        let plugin = MockPlugin;
875        let path = PathBuf::from("/test/file.rs");
876        let old_content = b"fn foo() {}";
877        let new_content = b"fn bar() {}";
878
879        // First parse
880        let tree1 = parser.parse(&plugin, &path, old_content, None).unwrap();
881        assert_eq!(parser.cache_len(), 1);
882
883        // Second parse after edit (incremental parse)
884        let tree2 = parser
885            .parse(&plugin, &path, new_content, Some(old_content))
886            .unwrap();
887
888        // Both trees should be valid
889        assert!(!tree1.root_node().kind().is_empty());
890        assert!(!tree2.root_node().kind().is_empty());
891        // Cache should still have 1 entry (same path)
892        assert_eq!(parser.cache_len(), 1);
893    }
894
895    #[test]
896    fn test_incremental_parser_multiline_edit() {
897        let parser = IncrementalParser::with_default_capacity();
898        let plugin = MockPlugin;
899        let path = PathBuf::from("/test/file.rs");
900
901        let old_content = b"fn foo() {\n    println!(\"hello\");\n}";
902        let new_content = b"fn foo() {\n    println!(\"world\");\n    println!(\"test\");\n}";
903
904        // First parse
905        parser.parse(&plugin, &path, old_content, None).unwrap();
906
907        // Second parse with multiline edit (incremental)
908        let tree = parser
909            .parse(&plugin, &path, new_content, Some(old_content))
910            .unwrap();
911
912        // Tree should be valid
913        assert!(!tree.root_node().kind().is_empty());
914        assert_eq!(parser.cache_len(), 1);
915    }
916
917    #[test]
918    fn test_incremental_parser_clear_cache() {
919        let parser = IncrementalParser::with_default_capacity();
920        let plugin = MockPlugin;
921        let path1 = PathBuf::from("/test/file1.rs");
922        let path2 = PathBuf::from("/test/file2.rs");
923
924        parser.parse(&plugin, &path1, b"fn foo() {}", None).unwrap();
925        parser.parse(&plugin, &path2, b"fn bar() {}", None).unwrap();
926        assert_eq!(parser.cache_len(), 2);
927
928        parser.clear_cache();
929        assert_eq!(parser.cache_len(), 0);
930    }
931
932    #[test]
933    fn test_incremental_parser_different_files() {
934        let parser = IncrementalParser::with_default_capacity();
935        let plugin = MockPlugin;
936        let path1 = PathBuf::from("/test/file1.rs");
937        let path2 = PathBuf::from("/test/file2.rs");
938
939        // Parse two different files
940        parser.parse(&plugin, &path1, b"fn foo() {}", None).unwrap();
941        parser.parse(&plugin, &path2, b"fn bar() {}", None).unwrap();
942
943        // Both should be cached
944        assert_eq!(parser.cache_len(), 2);
945
946        // Edit first file (should use incremental parse)
947        parser
948            .parse(&plugin, &path1, b"fn baz() {}", Some(b"fn foo() {}"))
949            .unwrap();
950
951        // Still 2 entries
952        assert_eq!(parser.cache_len(), 2);
953    }
954
955    /// ACCEPTANCE TEST: Verify incremental parsing produces identical results to full parsing
956    /// This is the core correctness guarantee of incremental parsing.
957    #[test]
958    fn test_acceptance_incremental_equals_full_parse() {
959        let parser = IncrementalParser::with_default_capacity();
960        let plugin = MockPlugin;
961        let path = PathBuf::from("/test/file.rs");
962
963        let original_content = b"fn foo() { let x = 1; }";
964        let modified_content = b"fn foo() { let x = 2; let y = 3; }";
965
966        // Full parse of original
967        let tree1 = parser
968            .parse(&plugin, &path, original_content, None)
969            .unwrap();
970
971        // Incremental parse (with old content)
972        let tree2_incremental = parser
973            .parse(&plugin, &path, modified_content, Some(original_content))
974            .unwrap();
975
976        // Full parse of modified (for comparison)
977        parser.clear_cache(); // Clear cache to force full parse
978        let tree2_full = parser
979            .parse(&plugin, &path, modified_content, None)
980            .unwrap();
981
982        // CRITICAL: Incremental and full parse MUST produce identical trees
983        assert_eq!(
984            tree2_incremental.root_node().to_sexp(),
985            tree2_full.root_node().to_sexp(),
986            "Incremental parsing MUST produce identical AST to full parsing"
987        );
988
989        // Both should have valid trees
990        assert!(!tree1.root_node().kind().is_empty());
991        assert!(!tree2_incremental.root_node().kind().is_empty());
992        assert!(!tree2_full.root_node().kind().is_empty());
993    }
994
995    /// ACCEPTANCE TEST: Verify fallback behavior when incremental parsing fails
996    /// Tests that the system gracefully degrades to full parsing on errors.
997    #[test]
998    fn test_acceptance_fallback_on_incremental_failure() {
999        let parser = IncrementalParser::with_default_capacity();
1000        let plugin = MockPlugin;
1001        let path = PathBuf::from("/test/file.rs");
1002
1003        let original_content = b"fn foo() {}";
1004        let modified_content = b"fn bar() {}";
1005
1006        // Parse original
1007        let tree1 = parser
1008            .parse(&plugin, &path, original_content, None)
1009            .unwrap();
1010        assert!(!tree1.root_node().kind().is_empty());
1011
1012        // Provide corrupted "old content" that doesn't match cached tree
1013        // This should trigger fallback to full parse (instead of crashing)
1014        let corrupted_old_content = b"this is not the old content";
1015
1016        let result = parser.parse(
1017            &plugin,
1018            &path,
1019            modified_content,
1020            Some(corrupted_old_content),
1021        );
1022
1023        // Should succeed (via fallback to full parse)
1024        assert!(result.is_ok(), "Should fall back to full parse on mismatch");
1025        let tree2 = result.unwrap();
1026        assert!(!tree2.root_node().kind().is_empty());
1027
1028        // Should still cache the result
1029        assert_eq!(parser.cache_len(), 1);
1030    }
1031}
sqry_core/ast/incremental_parse.rs

sqry_core/ast/
incremental_parse.rs