use std::fs;
use std::path::{Path, PathBuf};
use ripvec_core::embed::SearchConfig;
use ripvec_core::encoder::ripvec::dense::{DEFAULT_MODEL_REPO, StaticEncoder};
use ripvec_core::encoder::ripvec::index::RipvecIndex;
use ripvec_core::hybrid::SearchMode;
use ripvec_core::profile::Profiler;
fn resolve_model_source() -> String {
std::env::var("RIPVEC_SEMBLE_MODEL_PATH").unwrap_or_else(|_| DEFAULT_MODEL_REPO.to_string())
}
fn download_lock() -> &'static std::sync::Mutex<()> {
static M: std::sync::OnceLock<std::sync::Mutex<()>> = std::sync::OnceLock::new();
M.get_or_init(|| std::sync::Mutex::new(()))
}
fn build_test_corpus(root: &Path) {
let files: &[(&str, &str)] = &[
(
"src/lib.rs",
"pub fn one() -> u32 { 1 }\npub fn two() -> u32 { 2 }\n",
),
("src/util.rs", "pub fn helper(x: u32) -> u32 { x + 1 }\n"),
(
"README.md",
"# Test corpus\nAn empty test project for reconcile tests.\n",
),
];
for (rel, content) in files {
let full = root.join(rel);
if let Some(parent) = full.parent() {
fs::create_dir_all(parent).unwrap();
}
fs::write(&full, content).unwrap();
}
}
fn load_index(root: &Path) -> RipvecIndex {
let source = resolve_model_source();
let guard = download_lock().lock().unwrap();
let encoder = StaticEncoder::from_pretrained(&source).expect("encoder load");
drop(guard);
let cfg = SearchConfig {
batch_size: 32,
max_tokens: 512,
chunk: ripvec_core::chunk::ChunkConfig {
max_chunk_bytes: 4096,
window_size: 2048,
window_overlap: 512,
},
text_mode: false,
cascade_dim: None,
file_type: None,
exclude_extensions: Vec::new(),
include_extensions: Vec::new(),
ignore_patterns: Vec::new(),
scope: ripvec_core::embed::Scope::All,
mode: SearchMode::Hybrid,
};
RipvecIndex::from_root(root, encoder, &cfg, &Profiler::noop(), None, 0.0)
.expect("RipvecIndex build")
}
fn manifest_path_for(index: &RipvecIndex, filename: &str) -> Option<PathBuf> {
index
.manifest()
.files
.keys()
.find(|p| p.ends_with(filename))
.cloned()
}
#[test]
#[ignore = "requires Model2Vec download (~32 MB on first run)"]
fn manifest_populated_at_build_time() {
let tmp = tempfile::TempDir::new().unwrap();
build_test_corpus(tmp.path());
let index = load_index(tmp.path());
let manifest = index.manifest();
assert_eq!(
manifest.len(),
3,
"manifest should track all 3 corpus files; got {}",
manifest.len()
);
let chunk_files: std::collections::HashSet<&str> = index
.chunks()
.iter()
.map(|c| c.file_path.as_str())
.collect();
for chunk_file in chunk_files {
let exists_in_manifest = manifest
.files
.keys()
.any(|p| p.to_string_lossy().ends_with(chunk_file));
assert!(
exists_in_manifest,
"chunk file {chunk_file:?} must also exist in manifest"
);
}
}
#[test]
#[ignore = "requires Model2Vec download (~32 MB on first run)"]
fn diff_empty_immediately_after_build() {
let tmp = tempfile::TempDir::new().unwrap();
build_test_corpus(tmp.path());
let index = load_index(tmp.path());
let diff = index.diff_against_filesystem();
assert!(
diff.is_empty(),
"fresh index against unchanged FS must yield empty diff; got dirty={} new={} deleted={}",
diff.dirty.len(),
diff.new.len(),
diff.deleted.len()
);
}
#[test]
#[ignore = "requires Model2Vec download (~32 MB on first run)"]
fn diff_detects_added_file() {
let tmp = tempfile::TempDir::new().unwrap();
build_test_corpus(tmp.path());
let index = load_index(tmp.path());
let new_path = tmp.path().join("src/added.rs");
fs::write(&new_path, "pub fn fresh() {}\n").unwrap();
let diff = index.diff_against_filesystem();
assert!(
diff.dirty.is_empty(),
"no dirty expected; got {:?}",
diff.dirty
);
assert!(
diff.deleted.is_empty(),
"no deleted expected; got {:?}",
diff.deleted
);
assert_eq!(
diff.new.len(),
1,
"added.rs must appear in new; got {:?}",
diff.new
);
assert!(
diff.new[0].ends_with("src/added.rs"),
"new path {:?} must end with src/added.rs",
diff.new[0]
);
}
#[test]
#[ignore = "requires Model2Vec download (~32 MB on first run)"]
fn diff_detects_deleted_file() {
let tmp = tempfile::TempDir::new().unwrap();
build_test_corpus(tmp.path());
let index = load_index(tmp.path());
let util = manifest_path_for(&index, "src/util.rs").expect("util.rs in manifest");
fs::remove_file(&util).unwrap();
let diff = index.diff_against_filesystem();
assert!(diff.dirty.is_empty());
assert!(diff.new.is_empty());
assert_eq!(diff.deleted.len(), 1);
assert!(diff.deleted[0].ends_with("src/util.rs"));
}
#[test]
#[ignore = "requires Model2Vec download (~32 MB on first run)"]
fn diff_detects_real_content_change() {
let tmp = tempfile::TempDir::new().unwrap();
build_test_corpus(tmp.path());
let index = load_index(tmp.path());
let util = manifest_path_for(&index, "src/util.rs").expect("util.rs in manifest");
std::thread::sleep(std::time::Duration::from_millis(20));
fs::write(&util, "pub fn helper(x: u32) -> u32 { x * 2 }\n").unwrap();
let diff = index.diff_against_filesystem();
assert!(diff.new.is_empty(), "no new expected; got {:?}", diff.new);
assert!(
diff.deleted.is_empty(),
"no deleted; got {:?}",
diff.deleted
);
assert_eq!(diff.dirty.len(), 1, "util.rs edit must be dirty");
assert!(diff.dirty[0].ends_with("src/util.rs"));
}
#[test]
#[ignore = "requires Model2Vec download (~32 MB on first run)"]
fn diff_ignores_touched_but_unchanged() {
let tmp = tempfile::TempDir::new().unwrap();
build_test_corpus(tmp.path());
let index = load_index(tmp.path());
let util = manifest_path_for(&index, "src/util.rs").expect("util.rs in manifest");
let original = fs::read_to_string(&util).unwrap();
std::thread::sleep(std::time::Duration::from_millis(20));
fs::write(&util, original).unwrap();
let diff = index.diff_against_filesystem();
assert!(
diff.is_empty(),
"touch-with-same-content must yield empty diff; got dirty={:?} new={:?} deleted={:?}",
diff.dirty,
diff.new,
diff.deleted
);
}
#[test]
#[ignore = "requires Model2Vec download (~32 MB on first run)"]
fn diff_handles_simultaneous_add_edit_delete() {
let tmp = tempfile::TempDir::new().unwrap();
build_test_corpus(tmp.path());
let index = load_index(tmp.path());
let lib = manifest_path_for(&index, "src/lib.rs").expect("lib.rs in manifest");
let util = manifest_path_for(&index, "src/util.rs").expect("util.rs in manifest");
std::thread::sleep(std::time::Duration::from_millis(20));
fs::write(&lib, "pub fn renamed() -> u32 { 99 }\n").unwrap(); fs::remove_file(&util).unwrap(); fs::write(tmp.path().join("src/added.rs"), "pub fn novel() {}\n").unwrap();
let diff = index.diff_against_filesystem();
assert_eq!(diff.dirty.len(), 1, "expected 1 dirty (lib.rs)");
assert!(diff.dirty[0].ends_with("src/lib.rs"));
assert_eq!(diff.deleted.len(), 1, "expected 1 deleted (util.rs)");
assert!(diff.deleted[0].ends_with("src/util.rs"));
assert_eq!(diff.new.len(), 1, "expected 1 new (added.rs)");
assert!(diff.new[0].ends_with("src/added.rs"));
assert_eq!(diff.total(), 3);
}
#[test]
#[ignore = "requires Model2Vec download (~32 MB on first run)"]
fn diff_honors_walk_options_for_added_files() {
let tmp = tempfile::TempDir::new().unwrap();
build_test_corpus(tmp.path());
let source = resolve_model_source();
let guard = download_lock().lock().unwrap();
let encoder = StaticEncoder::from_pretrained(&source).expect("encoder load");
drop(guard);
let cfg = SearchConfig {
batch_size: 32,
max_tokens: 512,
chunk: ripvec_core::chunk::ChunkConfig {
max_chunk_bytes: 4096,
window_size: 2048,
window_overlap: 512,
},
text_mode: false,
cascade_dim: None,
file_type: None,
exclude_extensions: vec!["json".to_string()],
include_extensions: Vec::new(),
ignore_patterns: Vec::new(),
scope: ripvec_core::embed::Scope::All,
mode: SearchMode::Hybrid,
};
let index = RipvecIndex::from_root(tmp.path(), encoder, &cfg, &Profiler::noop(), None, 0.0)
.expect("build");
fs::write(tmp.path().join("data.json"), "{\"x\": 1}\n").unwrap();
fs::write(tmp.path().join("src/included.rs"), "fn x() {}\n").unwrap();
let diff = index.diff_against_filesystem();
assert!(
diff.new.iter().all(|p| !p.ends_with("data.json")),
"excluded .json must not appear in diff.new: {:?}",
diff.new
);
assert!(
diff.new.iter().any(|p| p.ends_with("src/included.rs")),
"included .rs must appear in diff.new: {:?}",
diff.new
);
}