use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use syntext::index::overlay::{compute_delete_set, OverlayView};
use syntext::IndexError;
fn dirty(files: &[(&str, &[u8])]) -> Vec<(PathBuf, Arc<[u8]>)> {
files
.iter()
.map(|(p, c)| (PathBuf::from(p), Arc::from(*c)))
.collect()
}
#[test]
fn overlay_single_file_add() {
let overlay =
OverlayView::build(10, dirty(&[("src/main.rs", b"fn parse_query() { }")])).unwrap();
assert_eq!(overlay.docs.len(), 1);
assert_eq!(overlay.docs[0].doc_id, 10); assert_eq!(overlay.docs[0].path, Path::new("src/main.rs"));
assert!(!overlay.gram_index.is_empty(), "overlay should have grams");
}
#[test]
fn overlay_multiple_files() {
let overlay = OverlayView::build(
5,
dirty(&[("a.rs", b"fn alpha() {}"), ("b.rs", b"fn beta() {}")]),
)
.unwrap();
assert_eq!(overlay.docs.len(), 2);
assert_eq!(overlay.docs[0].doc_id, 5);
assert_eq!(overlay.docs[1].doc_id, 6);
assert_eq!(overlay.next_doc_id, 7);
}
#[test]
fn overlay_empty() {
let overlay = OverlayView::build(100, vec![]).unwrap();
assert!(overlay.docs.is_empty());
assert!(overlay.gram_index.is_empty());
assert_eq!(overlay.next_doc_id, 100);
}
#[test]
fn overlay_doc_lookup_by_id() {
let overlay = OverlayView::build(0, dirty(&[("test.rs", b"hello world")])).unwrap();
assert!(overlay.get_doc(0).is_some());
assert!(overlay.get_doc(1).is_none());
}
#[test]
fn overlay_doc_lookup_by_path() {
let overlay = OverlayView::build(0, dirty(&[("a.rs", b"aaa"), ("b.rs", b"bbb")])).unwrap();
assert!(overlay.get_doc_by_path(Path::new("a.rs")).is_some());
assert!(overlay.get_doc_by_path(Path::new("b.rs")).is_some());
assert!(overlay.get_doc_by_path(Path::new("c.rs")).is_none());
}
#[test]
fn overlay_rebuild_replaces_content() {
let ov1 = OverlayView::build(10, dirty(&[("file.rs", b"fn old_function() {}")])).unwrap();
let grams_v1: Vec<u64> = ov1.gram_index.keys().copied().collect();
let ov2 = OverlayView::build(10, dirty(&[("file.rs", b"fn new_function() {}")])).unwrap();
let grams_v2: Vec<u64> = ov2.gram_index.keys().copied().collect();
assert_eq!(
std::str::from_utf8(&ov2.docs[0].content).unwrap(),
"fn new_function() {}"
);
assert_ne!(grams_v1, grams_v2, "gram sets should differ after modify");
}
#[test]
fn overlay_delete_removes_file() {
let ov_with = OverlayView::build(10, dirty(&[("file.rs", b"fn something() {}")])).unwrap();
assert_eq!(ov_with.docs.len(), 1);
let ov_without = OverlayView::build(10, vec![]).unwrap();
assert_eq!(ov_without.docs.len(), 0);
assert!(ov_without.gram_index.is_empty());
}
#[test]
fn snapshot_isolation_via_arc() {
let ov1 = Arc::new(OverlayView::build(0, dirty(&[("file.rs", b"version one")])).unwrap());
let reader_snap = Arc::clone(&ov1);
let _ov2 = Arc::new(OverlayView::build(0, dirty(&[("file.rs", b"version two")])).unwrap());
assert_eq!(
std::str::from_utf8(&reader_snap.docs[0].content).unwrap(),
"version one"
);
}
#[test]
fn incremental_reuses_unchanged_content() {
let old = OverlayView::build(
10,
dirty(&[("a.rs", b"aaa content"), ("b.rs", b"bbb content")]),
)
.unwrap();
let old_a_ptr = Arc::as_ptr(
&old.docs
.iter()
.find(|d| d.path == Path::new("a.rs"))
.unwrap()
.content,
);
let newly_changed: HashSet<PathBuf> = [PathBuf::from("b.rs")].into();
let removed: HashSet<PathBuf> = HashSet::new();
let new_files = dirty(&[("b.rs", b"bbb updated")]);
let inc =
OverlayView::build_incremental(10, &old, new_files, &newly_changed, &removed).unwrap();
assert_eq!(inc.docs.len(), 2);
let inc_a = inc
.docs
.iter()
.find(|d| d.path == Path::new("a.rs"))
.unwrap();
assert!(
std::ptr::eq(Arc::as_ptr(&inc_a.content), old_a_ptr),
"unchanged doc should share Arc, not clone"
);
let inc_b = inc
.docs
.iter()
.find(|d| d.path == Path::new("b.rs"))
.unwrap();
assert_eq!(std::str::from_utf8(&inc_b.content).unwrap(), "bbb updated");
}
#[test]
fn incremental_removes_deleted() {
let old = OverlayView::build(10, dirty(&[("a.rs", b"aaa"), ("b.rs", b"bbb")])).unwrap();
let newly_changed: HashSet<PathBuf> = HashSet::new();
let removed: HashSet<PathBuf> = [PathBuf::from("b.rs")].into();
let inc = OverlayView::build_incremental(10, &old, vec![], &newly_changed, &removed).unwrap();
assert_eq!(inc.docs.len(), 1);
assert_eq!(inc.docs[0].path, Path::new("a.rs"));
assert!(inc.get_doc_by_path(Path::new("b.rs")).is_none());
}
#[test]
fn incremental_from_empty_old_overlay() {
let old = OverlayView::empty();
let newly_changed: HashSet<PathBuf> = [PathBuf::from("new.rs")].into();
let removed: HashSet<PathBuf> = HashSet::new();
let new_files = dirty(&[("new.rs", b"fn new() {}")]);
let inc = OverlayView::build_incremental(5, &old, new_files, &newly_changed, &removed).unwrap();
assert_eq!(inc.docs.len(), 1);
assert_eq!(inc.docs[0].path, Path::new("new.rs"));
assert_eq!(inc.docs[0].doc_id, 5);
}
#[test]
fn compute_delete_set_marks_all_base_docs_for_invalidated_paths() {
let mut base_path_doc_ids = HashMap::new();
base_path_doc_ids.insert(PathBuf::from("src/main.rs"), vec![1, 7]);
base_path_doc_ids.insert(PathBuf::from("src/lib.rs"), vec![3]);
let modified: std::collections::HashSet<PathBuf> = [PathBuf::from("src/main.rs")].into();
let deleted: std::collections::HashSet<PathBuf> = [PathBuf::from("src/missing.rs")].into();
let delete_set = compute_delete_set(
&base_path_doc_ids,
&modified,
&deleted,
&roaring::RoaringBitmap::new(),
);
assert!(delete_set.contains(1));
assert!(delete_set.contains(7));
assert!(!delete_set.contains(3));
}
#[test]
fn overlay_build_stores_base_doc_count() {
let ov = OverlayView::build(42, dirty(&[("a.rs", b"fn a() {}")])).unwrap();
assert_eq!(ov.base_doc_count, 42);
}
#[test]
fn overlay_empty_base_doc_count_is_zero() {
let ov = OverlayView::empty();
assert_eq!(ov.base_doc_count, 0);
}
#[test]
fn overlay_build_returns_doc_id_overflow() {
let result = OverlayView::build(u32::MAX, dirty(&[("a.rs", b"fn a() {}")]));
assert!(
matches!(
result,
Err(IndexError::DocIdOverflow {
base_doc_count: u32::MAX,
overlay_docs: 1,
})
),
"overflow should return a structured error"
);
}
#[test]
fn incremental_reuses_cached_grams() {
let old = OverlayView::build(10, dirty(&[("a.rs", b"fn alpha_function() {}")])).unwrap();
let old_a_grams: Vec<u64> = {
let doc = old
.docs
.iter()
.find(|d| d.path == Path::new("a.rs"))
.unwrap();
doc.grams.clone()
};
assert!(!old_a_grams.is_empty(), "should have grams");
let newly_changed: HashSet<PathBuf> = [PathBuf::from("b.rs")].into();
let removed: HashSet<PathBuf> = HashSet::new();
let new_files = dirty(&[("b.rs", b"fn beta() {}")]);
let inc =
OverlayView::build_incremental(10, &old, new_files, &newly_changed, &removed).unwrap();
let inc_a = inc
.docs
.iter()
.find(|d| d.path == Path::new("a.rs"))
.unwrap();
assert_eq!(
inc_a.grams, old_a_grams,
"reused doc should have same cached grams"
);
}
#[test]
fn delta_unchanged_files_keep_doc_ids() {
let old = OverlayView::build(
10,
dirty(&[("a.rs", b"fn alpha() {}"), ("b.rs", b"fn beta() {}")]),
)
.unwrap();
let a_old_id = old
.docs
.iter()
.find(|d| d.path == Path::new("a.rs"))
.unwrap()
.doc_id;
let newly_changed: HashSet<PathBuf> = [PathBuf::from("b.rs")].into();
let removed: HashSet<PathBuf> = HashSet::new();
let new_files = dirty(&[("b.rs", b"fn beta_v2() {}")]);
let inc =
OverlayView::build_incremental(10, &old, new_files, &newly_changed, &removed).unwrap();
let a_new_id = inc
.docs
.iter()
.find(|d| d.path == Path::new("a.rs"))
.unwrap()
.doc_id;
assert_eq!(
a_new_id, a_old_id,
"unchanged doc keeps its doc_id on delta path"
);
}
#[test]
fn delta_gram_index_matches_full_rebuild() {
let old = OverlayView::build(
10,
dirty(&[("a.rs", b"fn alpha() {}"), ("b.rs", b"fn beta() {}")]),
)
.unwrap();
let newly_changed: HashSet<PathBuf> = [PathBuf::from("b.rs")].into();
let removed: HashSet<PathBuf> = HashSet::new();
let new_b = dirty(&[("b.rs", b"fn beta_v2() {}")]);
let delta =
OverlayView::build_incremental(10, &old, new_b.clone(), &newly_changed, &removed).unwrap();
let all_files = dirty(&[("a.rs", b"fn alpha() {}"), ("b.rs", b"fn beta_v2() {}")]);
let full = OverlayView::build(10, all_files).unwrap();
let mut delta_keys: Vec<u64> = delta.gram_index.keys().copied().collect();
let mut full_keys: Vec<u64> = full.gram_index.keys().copied().collect();
delta_keys.sort_unstable();
full_keys.sort_unstable();
assert_eq!(
delta_keys, full_keys,
"gram_index keys must match full rebuild"
);
for (&hash, delta_ids) in &delta.gram_index {
let mut d = delta_ids.clone();
d.sort_unstable();
let delta_paths: std::collections::BTreeSet<PathBuf> = d
.iter()
.filter_map(|&id| delta.get_doc(id).map(|doc| doc.path.clone()))
.collect();
let full_ids = full.gram_index.get(&hash).cloned().unwrap_or_default();
let full_paths: std::collections::BTreeSet<PathBuf> = full_ids
.iter()
.filter_map(|&id| full.get_doc(id).map(|doc| doc.path.clone()))
.collect();
assert_eq!(
delta_paths, full_paths,
"posting list paths must match for gram {hash:#x}"
);
}
}
#[test]
fn delta_deletion_removes_grams() {
let old = OverlayView::build(
5,
dirty(&[
("a.rs", b"fn unique_zzzq() {}"),
("b.rs", b"fn common() {}"),
]),
)
.unwrap();
let zzzq_hash = old
.gram_index
.keys()
.find(|&&h| {
let ids = &old.gram_index[&h];
ids == &[5]
})
.copied();
if let Some(h) = zzzq_hash {
let removed: HashSet<PathBuf> = [PathBuf::from("a.rs")].into();
let newly_changed: HashSet<PathBuf> = HashSet::new();
let inc =
OverlayView::build_incremental(5, &old, vec![], &newly_changed, &removed).unwrap();
assert!(
!inc.gram_index.contains_key(&h),
"gram unique to deleted file should be removed from index"
);
}
}
#[test]
fn delta_posting_lists_sorted_after_new_doc() {
let old = OverlayView::build(0, dirty(&[("a.rs", b"fn shared_token() {}")])).unwrap();
let newly_changed: HashSet<PathBuf> = [PathBuf::from("b.rs")].into();
let removed: HashSet<PathBuf> = HashSet::new();
let new_files = dirty(&[("b.rs", b"fn shared_token() {}")]);
let inc = OverlayView::build_incremental(0, &old, new_files, &newly_changed, &removed).unwrap();
for ids in inc.gram_index.values() {
let mut sorted = ids.clone();
sorted.sort_unstable();
assert_eq!(ids, &sorted, "posting list must be sorted");
}
}
#[test]
fn incremental_base_changed_reassigns_doc_ids() {
let old = OverlayView::build(10, dirty(&[("a.rs", b"fn alpha() {}")])).unwrap();
let a_old_id = old.docs[0].doc_id;
let newly_changed: HashSet<PathBuf> = HashSet::new();
let removed: HashSet<PathBuf> = HashSet::new();
let inc = OverlayView::build_incremental(20, &old, vec![], &newly_changed, &removed).unwrap();
let a_new_id = inc.docs[0].doc_id;
assert_ne!(
a_new_id, a_old_id,
"doc_id must be reassigned when base_doc_count grows"
);
assert_eq!(a_new_id, 20, "first doc starts at new base_doc_count");
}
#[test]
fn commit_does_not_clone_base_doc_to_file_id() {
use syntext::index::Index;
use syntext::Config;
use tempfile::TempDir;
let repo = TempDir::new().unwrap();
let index_dir = TempDir::new().unwrap();
for i in 0..10u8 {
let name = format!("base_{i}.rs");
std::fs::write(repo.path().join(&name), format!("fn f{i}() {{}}\n")).unwrap();
}
let config = Config {
index_dir: index_dir.path().to_path_buf(),
repo_root: repo.path().to_path_buf(),
..Config::default()
};
let index = Index::build(config).unwrap();
let snap_before = index.snapshot();
let ptr_before = Arc::as_ptr(&snap_before.base_doc_to_file_id);
let new_file = repo.path().join("new.rs");
std::fs::write(&new_file, b"fn beta() {}\n").unwrap();
index.notify_change(&new_file).unwrap();
index.commit_batch().unwrap();
let snap_after = index.snapshot();
let ptr_after = Arc::as_ptr(&snap_after.base_doc_to_file_id);
assert_eq!(
ptr_before, ptr_after,
"base_doc_to_file_id Arc must be shared across commits"
);
drop(index);
}