use super::to_corpus_relative_path;
#[test]
fn to_corpus_relative_path_agrees_with_batch_loop() {
let root = std::path::Path::new("/repo/root");
let path = std::path::Path::new("/repo/root/src/lib.rs");
let expected = path
.strip_prefix(root)
.unwrap_or(path)
.display()
.to_string();
assert_eq!(to_corpus_relative_path(root, path), expected);
}
#[test]
fn disk_existence_guard_skips_live_file() {
let dir = tempfile::tempdir().unwrap();
let live_file = dir.path().join("live.rs");
std::fs::write(&live_file, "fn live() {}").unwrap();
let corpus_relative = "live.rs";
let absolute = dir.path().join(corpus_relative);
assert!(absolute.exists(), "test setup: live.rs must exist on disk");
let would_prune = !absolute.exists();
assert!(
!would_prune,
"disk-existence guard must prevent pruning a file still present on disk"
);
}
#[test]
fn list_indexed_files_returns_distinct_paths() {
use crate::core::chunker::{ChunkType, RawChunk};
use crate::core::corpus::CorpusStore;
let dir = tempfile::tempdir().unwrap();
let db_path = dir.path().join("index.redb");
let store = CorpusStore::open(&db_path).unwrap();
let chunk = |file: &str, id: &str| RawChunk {
id: id.to_string(),
file: file.to_string(),
start_line: 1,
end_line: 1,
content: format!("fn {id}() {{}}"),
function_name: None,
language: Some("rust".to_string()),
chunk_type: ChunkType::Code,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
};
store
.upsert_chunks(&[
chunk("src/a.rs", "a:1:10"),
chunk("src/a.rs", "a:11:20"),
chunk("src/b.rs", "b:1:10"),
])
.unwrap();
let mut files = store.list_indexed_files().unwrap();
files.sort();
assert_eq!(
files,
vec!["src/a.rs".to_string(), "src/b.rs".to_string()],
"#848: list_indexed_files must return each file exactly once"
);
}
#[test]
fn deleted_file_chunks_persist_without_prune_pass() {
use crate::core::chunker::{ChunkType, RawChunk};
use crate::core::corpus::CorpusStore;
let dir = tempfile::tempdir().unwrap();
let chunk = |file: &str, id: &str| RawChunk {
id: id.to_string(),
file: file.to_string(),
start_line: 1,
end_line: 1,
content: format!("fn {id}() {{}}"),
function_name: None,
language: Some("rust".to_string()),
chunk_type: ChunkType::Code,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
};
let live_path = dir.path().join("pre848_live.redb");
{
let live = CorpusStore::open(&live_path).unwrap();
live.upsert_chunks(&[
chunk("kept.rs", "kept:1:10"),
chunk("deleted.rs", "deleted:1:10"),
])
.unwrap();
live.upsert_file_hashes(&[("kept.rs", "aa"), ("deleted.rs", "bb")])
.unwrap();
}
let staging_path = dir.path().join("pre848_staging.redb");
{
let live = CorpusStore::open(&live_path).unwrap();
let staging = CorpusStore::open_fresh(&staging_path).unwrap();
staging.copy_all_from(&live).unwrap();
}
let reopened = CorpusStore::open(&staging_path).unwrap();
let files = reopened.list_indexed_files().unwrap();
assert!(
files.iter().any(|f| f == "deleted.rs"),
"PRE-FIX #848 model: deleted.rs MUST still be present without a prune pass \
(proving the bug exists and the fix is needed)"
);
}
#[test]
fn prune_pass_removes_deleted_file_from_staged_corpus() {
use crate::core::chunker::{ChunkType, RawChunk};
use crate::core::corpus::CorpusStore;
let dir = tempfile::tempdir().unwrap();
let chunk = |file: &str, id: &str| RawChunk {
id: id.to_string(),
file: file.to_string(),
start_line: 1,
end_line: 1,
content: format!("fn {id}() {{}}"),
function_name: None,
language: Some("rust".to_string()),
chunk_type: ChunkType::Code,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
};
let live_path = dir.path().join("post848_live.redb");
{
let live = CorpusStore::open(&live_path).unwrap();
live.upsert_chunks(&[
chunk("kept.rs", "kept:1:10"),
chunk("deleted.rs", "deleted:1:10"),
])
.unwrap();
live.upsert_entities(&[
("kept.rs".to_string(), Vec::new()),
("deleted.rs".to_string(), Vec::new()),
])
.unwrap();
live.upsert_file_hashes(&[("kept.rs", "aa"), ("deleted.rs", "bb")])
.unwrap();
}
let staging_path = dir.path().join("post848_staging.redb");
let staging = {
let live = CorpusStore::open(&live_path).unwrap();
let s = CorpusStore::open_fresh(&staging_path).unwrap();
s.copy_all_from(&live).unwrap();
s
};
let indexed = staging.list_indexed_files().unwrap();
let walked_set: std::collections::HashSet<String> =
["kept.rs".to_string()].into_iter().collect();
let deleted: Vec<String> = indexed
.into_iter()
.filter(|f| !walked_set.contains(f))
.collect();
assert_eq!(
deleted,
vec!["deleted.rs".to_string()],
"#848: set-difference must identify deleted.rs as stale"
);
let chunk_ids: Vec<String> = staging
.load_all_chunks()
.unwrap()
.into_iter()
.filter(|c| c.file == "deleted.rs")
.map(|c| c.id)
.collect();
staging.delete_chunks(&chunk_ids).unwrap();
staging.delete_entities("deleted.rs").unwrap();
staging
.delete_file_hash_entries(&["deleted.rs".to_string()])
.unwrap();
drop(staging);
let reopened = CorpusStore::open(&staging_path).unwrap();
let files_after = reopened.list_indexed_files().unwrap();
assert!(
!files_after.iter().any(|f| f == "deleted.rs"),
"#848 POST-FIX: deleted.rs must be absent from the promoted corpus \
after the prune pass; found files: {:?}",
files_after
);
assert!(
files_after.iter().any(|f| f == "kept.rs"),
"#848 POST-FIX: kept.rs must still be present in the promoted corpus"
);
let hashes = reopened.load_file_hashes().unwrap();
assert!(
!hashes.iter().any(|(f, _)| f == "deleted.rs"),
"#848 POST-FIX: file-hash entry for deleted.rs must be removed"
);
assert!(
hashes.iter().any(|(f, _)| f == "kept.rs"),
"#848 POST-FIX: file-hash entry for kept.rs must still be present"
);
}