use lru::LruCache;
use std::num::NonZeroUsize;
use std::path::{Path, PathBuf};
use std::sync::{Mutex, MutexGuard};
use tree_sitter::{InputEdit, Point, Tree};
pub struct InputEditCalculator;
impl InputEditCalculator {
#[must_use]
pub fn calculate(old_content: &[u8], new_content: &[u8]) -> InputEdit {
if old_content.is_empty() && new_content.is_empty() {
return InputEdit {
start_byte: 0,
old_end_byte: 0,
new_end_byte: 0,
start_position: Point { row: 0, column: 0 },
old_end_position: Point { row: 0, column: 0 },
new_end_position: Point { row: 0, column: 0 },
};
}
if old_content.is_empty() {
let new_end_position = Self::byte_to_point(new_content, new_content.len());
return InputEdit {
start_byte: 0,
old_end_byte: 0,
new_end_byte: new_content.len(),
start_position: Point { row: 0, column: 0 },
old_end_position: Point { row: 0, column: 0 },
new_end_position,
};
}
if new_content.is_empty() {
let old_end_position = Self::byte_to_point(old_content, old_content.len());
return InputEdit {
start_byte: 0,
old_end_byte: old_content.len(),
new_end_byte: 0,
start_position: Point { row: 0, column: 0 },
old_end_position,
new_end_position: Point { row: 0, column: 0 },
};
}
let prefix_len = Self::find_common_prefix(old_content, new_content);
let old_remaining = &old_content[prefix_len..];
let new_remaining = &new_content[prefix_len..];
let suffix_len = Self::find_common_suffix(old_remaining, new_remaining);
let start_byte = prefix_len;
let old_end_byte = prefix_len + old_remaining.len() - suffix_len;
let new_end_byte = prefix_len + new_remaining.len() - suffix_len;
let start_position = Self::byte_to_point(old_content, start_byte);
let old_end_position = Self::byte_to_point(old_content, old_end_byte);
let new_end_position = Self::byte_to_point(new_content, new_end_byte);
InputEdit {
start_byte,
old_end_byte,
new_end_byte,
start_position,
old_end_position,
new_end_position,
}
}
fn find_common_prefix(a: &[u8], b: &[u8]) -> usize {
a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count()
}
fn find_common_suffix(a: &[u8], b: &[u8]) -> usize {
a.iter()
.rev()
.zip(b.iter().rev())
.take_while(|(x, y)| x == y)
.count()
}
fn byte_to_point(content: &[u8], offset: usize) -> Point {
let mut row = 0;
let mut column = 0;
for (i, &byte) in content.iter().enumerate() {
if i >= offset {
break;
}
if byte == b'\n' {
row += 1;
column = 0;
} else {
column += 1;
}
}
Point { row, column }
}
}
#[derive(Debug)]
struct CachedTree {
tree: Tree,
hash: u64,
}
impl CachedTree {
fn new(tree: Tree, hash: u64) -> Self {
Self { tree, hash }
}
}
pub struct TreeCache {
cache: Mutex<LruCache<PathBuf, CachedTree>>,
}
impl TreeCache {
pub const DEFAULT_CAPACITY: usize = 100;
#[must_use]
pub fn new(capacity: usize) -> Self {
let capacity = NonZeroUsize::new(capacity).expect("capacity must be > 0");
Self {
cache: Mutex::new(LruCache::new(capacity)),
}
}
fn lock_cache(&self) -> MutexGuard<'_, LruCache<PathBuf, CachedTree>> {
self.cache
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner)
}
#[must_use]
pub fn with_default_capacity() -> Self {
Self::new(Self::DEFAULT_CAPACITY)
}
pub fn insert(&self, path: &Path, tree: Tree, hash: u64) {
let mut cache = self.lock_cache();
cache.put(path.to_path_buf(), CachedTree::new(tree, hash));
}
pub fn get(&self, path: &Path) -> Option<(Tree, u64)> {
let mut cache = self.lock_cache();
cache
.get(path)
.map(|cached| (cached.tree.clone(), cached.hash))
}
pub fn remove(&self, path: &Path) -> bool {
let mut cache = self.lock_cache();
cache.pop(path).is_some()
}
pub fn clear(&self) {
let mut cache = self.lock_cache();
cache.clear();
}
pub fn len(&self) -> usize {
let cache = self.lock_cache();
cache.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
pub struct IncrementalParser {
cache: TreeCache,
}
impl IncrementalParser {
#[must_use]
pub fn new(capacity: usize) -> Self {
Self {
cache: TreeCache::new(capacity),
}
}
#[must_use]
pub fn with_default_capacity() -> Self {
Self {
cache: TreeCache::with_default_capacity(),
}
}
pub fn parse<P>(
&self,
plugin: &P,
path: &Path,
new_content: &[u8],
old_content: Option<&[u8]>,
) -> Result<Tree, crate::plugin::error::ParseError>
where
P: crate::plugin::LanguagePlugin + ?Sized,
{
let new_hash = Self::hash_content(new_content);
if let (Some(old_content), Some((old_tree, old_hash))) = (old_content, self.cache.get(path))
{
if new_hash == old_hash {
return Ok(old_tree);
}
match Self::parse_incremental(plugin, &old_tree, old_content, new_content) {
Ok(tree) => {
self.cache.insert(path, tree.clone(), new_hash);
return Ok(tree);
}
Err(_) => {
log::debug!(
"Incremental parse failed for {}, falling back to full parse",
path.display()
);
}
}
}
let tree = plugin.parse_ast(new_content)?;
self.cache.insert(path, tree.clone(), new_hash);
Ok(tree)
}
fn parse_incremental<P>(
plugin: &P,
old_tree: &Tree,
old_content: &[u8],
new_content: &[u8],
) -> Result<Tree, crate::plugin::error::ParseError>
where
P: crate::plugin::LanguagePlugin + ?Sized,
{
let edit = InputEditCalculator::calculate(old_content, new_content);
let mut edited_tree = old_tree.clone();
edited_tree.edit(&edit);
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&plugin.language())
.map_err(|e| crate::plugin::error::ParseError::LanguageSetFailed(e.to_string()))?;
parser
.parse(new_content, Some(&edited_tree))
.ok_or(crate::plugin::error::ParseError::TreeSitterFailed)
}
fn hash_content(content: &[u8]) -> u64 {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
content.hash(&mut hasher);
hasher.finish()
}
pub fn clear_cache(&self) {
self.cache.clear();
}
pub fn cache_len(&self) -> usize {
self.cache.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_calculate_single_line_edit() {
let old = b"fn foo() {}";
let new = b"fn bar() {}";
let edit = InputEditCalculator::calculate(old, new);
assert_eq!(edit.start_byte, 3); assert_eq!(edit.old_end_byte, 6); assert_eq!(edit.new_end_byte, 6); assert_eq!(edit.start_position, Point { row: 0, column: 3 });
assert_eq!(edit.old_end_position, Point { row: 0, column: 6 });
assert_eq!(edit.new_end_position, Point { row: 0, column: 6 });
}
#[test]
fn test_calculate_multiline_insert() {
let old = b"line1\nline3\n";
let new = b"line1\nline2\nline3\n";
let edit = InputEditCalculator::calculate(old, new);
assert_eq!(edit.start_byte, 10); assert_eq!(edit.old_end_byte, 10); assert_eq!(edit.new_end_byte, 16); assert_eq!(edit.start_position, Point { row: 1, column: 4 });
assert_eq!(edit.old_end_position, Point { row: 1, column: 4 });
assert_eq!(edit.new_end_position, Point { row: 2, column: 4 });
}
#[test]
fn test_calculate_multiline_delete() {
let old = b"line1\nline2\nline3\n";
let new = b"line1\nline3\n";
let edit = InputEditCalculator::calculate(old, new);
assert_eq!(edit.start_byte, 10); assert_eq!(edit.old_end_byte, 16); assert_eq!(edit.new_end_byte, 10); assert_eq!(edit.start_position, Point { row: 1, column: 4 });
assert_eq!(edit.old_end_position, Point { row: 2, column: 4 });
assert_eq!(edit.new_end_position, Point { row: 1, column: 4 });
}
#[test]
fn test_calculate_empty_to_content() {
let old = b"";
let new = b"hello\nworld\n";
let edit = InputEditCalculator::calculate(old, new);
assert_eq!(edit.start_byte, 0);
assert_eq!(edit.old_end_byte, 0);
assert_eq!(edit.new_end_byte, 12);
assert_eq!(edit.start_position, Point { row: 0, column: 0 });
assert_eq!(edit.old_end_position, Point { row: 0, column: 0 });
assert_eq!(edit.new_end_position, Point { row: 2, column: 0 });
}
#[test]
fn test_calculate_content_to_empty() {
let old = b"hello\nworld\n";
let new = b"";
let edit = InputEditCalculator::calculate(old, new);
assert_eq!(edit.start_byte, 0);
assert_eq!(edit.old_end_byte, 12);
assert_eq!(edit.new_end_byte, 0);
assert_eq!(edit.start_position, Point { row: 0, column: 0 });
assert_eq!(edit.old_end_position, Point { row: 2, column: 0 });
assert_eq!(edit.new_end_position, Point { row: 0, column: 0 });
}
#[test]
fn test_calculate_no_change() {
let old = b"unchanged";
let new = b"unchanged";
let edit = InputEditCalculator::calculate(old, new);
assert_eq!(edit.start_byte, 9); assert_eq!(edit.old_end_byte, 9);
assert_eq!(edit.new_end_byte, 9);
assert_eq!(edit.start_position, Point { row: 0, column: 9 });
assert_eq!(edit.old_end_position, Point { row: 0, column: 9 });
assert_eq!(edit.new_end_position, Point { row: 0, column: 9 });
}
#[test]
fn test_byte_to_point_multiline() {
let content = b"line1\nline2\nline3\n";
assert_eq!(
InputEditCalculator::byte_to_point(content, 0),
Point { row: 0, column: 0 }
);
assert_eq!(
InputEditCalculator::byte_to_point(content, 5),
Point { row: 0, column: 5 }
);
assert_eq!(
InputEditCalculator::byte_to_point(content, 6),
Point { row: 1, column: 0 }
);
assert_eq!(
InputEditCalculator::byte_to_point(content, 12),
Point { row: 2, column: 0 }
);
}
#[test]
fn test_find_common_prefix() {
assert_eq!(InputEditCalculator::find_common_prefix(b"abc", b"abx"), 2);
assert_eq!(InputEditCalculator::find_common_prefix(b"abc", b"xyz"), 0);
assert_eq!(InputEditCalculator::find_common_prefix(b"abc", b"abc"), 3);
assert_eq!(InputEditCalculator::find_common_prefix(b"", b"abc"), 0);
}
#[test]
fn test_find_common_suffix() {
assert_eq!(InputEditCalculator::find_common_suffix(b"abc", b"xbc"), 2);
assert_eq!(InputEditCalculator::find_common_suffix(b"abc", b"xyz"), 0);
assert_eq!(InputEditCalculator::find_common_suffix(b"abc", b"abc"), 3);
assert_eq!(InputEditCalculator::find_common_suffix(b"", b"abc"), 0);
}
fn create_dummy_tree() -> Tree {
use tree_sitter::Parser;
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_rust::LANGUAGE.into())
.expect("set language");
parser.parse("fn main() {}", None).expect("parse")
}
#[test]
fn test_tree_cache_insert_and_get() {
let cache = TreeCache::with_default_capacity();
let path = PathBuf::from("/test/file.rs");
let tree = create_dummy_tree();
let hash = 0x1234_5678_90ab_cdef;
cache.insert(&path, tree, hash);
assert_eq!(cache.len(), 1);
assert!(!cache.is_empty());
let result = cache.get(&path);
assert!(result.is_some());
let (cached_tree, cached_hash) = result.unwrap();
assert_eq!(cached_hash, hash);
assert!(!cached_tree.root_node().kind().is_empty());
}
#[test]
fn test_tree_cache_miss() {
let cache = TreeCache::with_default_capacity();
let path = PathBuf::from("/test/file.rs");
assert!(cache.get(&path).is_none());
assert!(cache.is_empty());
}
#[test]
fn test_tree_cache_remove() {
let cache = TreeCache::with_default_capacity();
let path = PathBuf::from("/test/file.rs");
let tree = create_dummy_tree();
cache.insert(&path, tree, 0x123);
assert_eq!(cache.len(), 1);
assert!(cache.remove(&path));
assert_eq!(cache.len(), 0);
assert!(cache.get(&path).is_none());
assert!(!cache.remove(&path));
}
#[test]
fn test_tree_cache_clear() {
let cache = TreeCache::with_default_capacity();
let path1 = PathBuf::from("/test/file1.rs");
let path2 = PathBuf::from("/test/file2.rs");
cache.insert(&path1, create_dummy_tree(), 0x123);
cache.insert(&path2, create_dummy_tree(), 0x456);
assert_eq!(cache.len(), 2);
cache.clear();
assert_eq!(cache.len(), 0);
assert!(cache.is_empty());
assert!(cache.get(&path1).is_none());
assert!(cache.get(&path2).is_none());
}
#[test]
fn test_tree_cache_lru_eviction() {
let cache = TreeCache::new(2); let path1 = PathBuf::from("/test/file1.rs");
let path2 = PathBuf::from("/test/file2.rs");
let path3 = PathBuf::from("/test/file3.rs");
cache.insert(&path1, create_dummy_tree(), 0x111);
cache.insert(&path2, create_dummy_tree(), 0x222);
assert_eq!(cache.len(), 2);
cache.insert(&path3, create_dummy_tree(), 0x333);
assert_eq!(cache.len(), 2);
assert!(cache.get(&path1).is_none()); assert!(cache.get(&path2).is_some()); assert!(cache.get(&path3).is_some()); }
#[test]
fn test_tree_cache_lru_access_updates() {
let cache = TreeCache::new(2); let path1 = PathBuf::from("/test/file1.rs");
let path2 = PathBuf::from("/test/file2.rs");
let path3 = PathBuf::from("/test/file3.rs");
cache.insert(&path1, create_dummy_tree(), 0x111);
cache.insert(&path2, create_dummy_tree(), 0x222);
assert!(cache.get(&path1).is_some());
cache.insert(&path3, create_dummy_tree(), 0x333);
assert_eq!(cache.len(), 2);
assert!(cache.get(&path1).is_some()); assert!(cache.get(&path2).is_none()); assert!(cache.get(&path3).is_some()); }
#[test]
fn test_tree_cache_thread_safety() {
use std::sync::Arc;
use std::thread;
let cache = Arc::new(TreeCache::with_default_capacity());
let mut handles = vec![];
for i in 0_u64..10 {
let cache_clone = Arc::clone(&cache);
let handle = thread::spawn(move || {
let path = PathBuf::from(format!("/test/file{i}.rs"));
cache_clone.insert(&path, create_dummy_tree(), i);
});
handles.push(handle);
}
for handle in handles {
handle.join().unwrap();
}
assert_eq!(cache.len(), 10);
}
struct MockPlugin;
impl crate::plugin::LanguagePlugin for MockPlugin {
fn metadata(&self) -> crate::plugin::LanguageMetadata {
crate::plugin::LanguageMetadata {
id: "mock",
name: "Mock",
version: "1.0.0",
author: "test",
description: "Mock plugin for testing",
tree_sitter_version: "0.24",
}
}
fn extensions(&self) -> &'static [&'static str] {
&["mock"]
}
fn language(&self) -> tree_sitter::Language {
tree_sitter_rust::LANGUAGE.into()
}
fn parse_ast(&self, content: &[u8]) -> Result<Tree, crate::plugin::error::ParseError> {
use tree_sitter::Parser;
let mut parser = Parser::new();
parser
.set_language(&self.language())
.map_err(|e| crate::plugin::error::ParseError::LanguageSetFailed(e.to_string()))?;
parser
.parse(content, None)
.ok_or(crate::plugin::error::ParseError::TreeSitterFailed)
}
fn extract_scopes(
&self,
_tree: &Tree,
_content: &[u8],
_file_path: &Path,
) -> Result<Vec<crate::ast::Scope>, crate::plugin::error::ScopeError> {
Ok(vec![])
}
}
#[test]
fn test_incremental_parser_cache_miss() {
let parser = IncrementalParser::with_default_capacity();
let plugin = MockPlugin;
let path = PathBuf::from("/test/file.rs");
let content = b"fn foo() {}";
let tree = parser.parse(&plugin, &path, content, None).unwrap();
assert!(!tree.root_node().kind().is_empty());
assert_eq!(parser.cache_len(), 1);
}
#[test]
fn test_incremental_parser_cache_hit_unchanged() {
let parser = IncrementalParser::with_default_capacity();
let plugin = MockPlugin;
let path = PathBuf::from("/test/file.rs");
let content = b"fn foo() {}";
let tree1 = parser.parse(&plugin, &path, content, None).unwrap();
let tree2 = parser
.parse(&plugin, &path, content, Some(content))
.unwrap();
assert_eq!(tree1.root_node().kind(), tree2.root_node().kind());
assert_eq!(parser.cache_len(), 1);
}
#[test]
fn test_incremental_parser_incremental_parse() {
let parser = IncrementalParser::with_default_capacity();
let plugin = MockPlugin;
let path = PathBuf::from("/test/file.rs");
let old_content = b"fn foo() {}";
let new_content = b"fn bar() {}";
let tree1 = parser.parse(&plugin, &path, old_content, None).unwrap();
assert_eq!(parser.cache_len(), 1);
let tree2 = parser
.parse(&plugin, &path, new_content, Some(old_content))
.unwrap();
assert!(!tree1.root_node().kind().is_empty());
assert!(!tree2.root_node().kind().is_empty());
assert_eq!(parser.cache_len(), 1);
}
#[test]
fn test_incremental_parser_multiline_edit() {
let parser = IncrementalParser::with_default_capacity();
let plugin = MockPlugin;
let path = PathBuf::from("/test/file.rs");
let old_content = b"fn foo() {\n println!(\"hello\");\n}";
let new_content = b"fn foo() {\n println!(\"world\");\n println!(\"test\");\n}";
parser.parse(&plugin, &path, old_content, None).unwrap();
let tree = parser
.parse(&plugin, &path, new_content, Some(old_content))
.unwrap();
assert!(!tree.root_node().kind().is_empty());
assert_eq!(parser.cache_len(), 1);
}
#[test]
fn test_incremental_parser_clear_cache() {
let parser = IncrementalParser::with_default_capacity();
let plugin = MockPlugin;
let path1 = PathBuf::from("/test/file1.rs");
let path2 = PathBuf::from("/test/file2.rs");
parser.parse(&plugin, &path1, b"fn foo() {}", None).unwrap();
parser.parse(&plugin, &path2, b"fn bar() {}", None).unwrap();
assert_eq!(parser.cache_len(), 2);
parser.clear_cache();
assert_eq!(parser.cache_len(), 0);
}
#[test]
fn test_incremental_parser_different_files() {
let parser = IncrementalParser::with_default_capacity();
let plugin = MockPlugin;
let path1 = PathBuf::from("/test/file1.rs");
let path2 = PathBuf::from("/test/file2.rs");
parser.parse(&plugin, &path1, b"fn foo() {}", None).unwrap();
parser.parse(&plugin, &path2, b"fn bar() {}", None).unwrap();
assert_eq!(parser.cache_len(), 2);
parser
.parse(&plugin, &path1, b"fn baz() {}", Some(b"fn foo() {}"))
.unwrap();
assert_eq!(parser.cache_len(), 2);
}
#[test]
fn test_acceptance_incremental_equals_full_parse() {
let parser = IncrementalParser::with_default_capacity();
let plugin = MockPlugin;
let path = PathBuf::from("/test/file.rs");
let original_content = b"fn foo() { let x = 1; }";
let modified_content = b"fn foo() { let x = 2; let y = 3; }";
let tree1 = parser
.parse(&plugin, &path, original_content, None)
.unwrap();
let tree2_incremental = parser
.parse(&plugin, &path, modified_content, Some(original_content))
.unwrap();
parser.clear_cache(); let tree2_full = parser
.parse(&plugin, &path, modified_content, None)
.unwrap();
assert_eq!(
tree2_incremental.root_node().to_sexp(),
tree2_full.root_node().to_sexp(),
"Incremental parsing MUST produce identical AST to full parsing"
);
assert!(!tree1.root_node().kind().is_empty());
assert!(!tree2_incremental.root_node().kind().is_empty());
assert!(!tree2_full.root_node().kind().is_empty());
}
#[test]
fn test_acceptance_fallback_on_incremental_failure() {
let parser = IncrementalParser::with_default_capacity();
let plugin = MockPlugin;
let path = PathBuf::from("/test/file.rs");
let original_content = b"fn foo() {}";
let modified_content = b"fn bar() {}";
let tree1 = parser
.parse(&plugin, &path, original_content, None)
.unwrap();
assert!(!tree1.root_node().kind().is_empty());
let corrupted_old_content = b"this is not the old content";
let result = parser.parse(
&plugin,
&path,
modified_content,
Some(corrupted_old_content),
);
assert!(result.is_ok(), "Should fall back to full parse on mismatch");
let tree2 = result.unwrap();
assert!(!tree2.root_node().kind().is_empty());
assert_eq!(parser.cache_len(), 1);
}
}