use super::to_corpus_relative_path;
#[test]
fn to_corpus_relative_path_agrees_with_batch_loop() {
let root = std::path::Path::new("/repo/root");
let path = std::path::Path::new("/repo/root/src/lib.rs");
let expected = path
.strip_prefix(root)
.unwrap_or(path)
.display()
.to_string();
assert_eq!(to_corpus_relative_path(root, path), expected);
}
#[test]
fn disk_existence_guard_skips_live_file() {
let dir = tempfile::tempdir().unwrap();
let live_file = dir.path().join("live.rs");
std::fs::write(&live_file, "fn live() {}").unwrap();
let corpus_relative = "live.rs";
let absolute = dir.path().join(corpus_relative);
assert!(absolute.exists(), "test setup: live.rs must exist on disk");
let would_prune = !absolute.exists();
assert!(
!would_prune,
"disk-existence guard must prevent pruning a file still present on disk"
);
}
#[test]
fn list_indexed_files_returns_distinct_paths() {
use crate::core::chunker::{ChunkType, RawChunk};
use crate::core::corpus::CorpusStore;
let dir = tempfile::tempdir().unwrap();
let db_path = dir.path().join("index.redb");
let store = CorpusStore::open(&db_path).unwrap();
let chunk = |file: &str, id: &str| RawChunk {
id: id.to_string(),
file: file.to_string(),
start_line: 1,
end_line: 1,
content: format!("fn {id}() {{}}"),
function_name: None,
language: Some("rust".to_string()),
chunk_type: ChunkType::Code,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
};
store
.upsert_chunks(&[
chunk("src/a.rs", "a:1:10"),
chunk("src/a.rs", "a:11:20"),
chunk("src/b.rs", "b:1:10"),
])
.unwrap();
let mut files = store.list_indexed_files().unwrap();
files.sort();
assert_eq!(
files,
vec!["src/a.rs".to_string(), "src/b.rs".to_string()],
"#848: list_indexed_files must return each file exactly once"
);
}
#[test]
fn deleted_file_chunks_persist_without_prune_pass() {
use crate::core::chunker::{ChunkType, RawChunk};
use crate::core::corpus::CorpusStore;
let dir = tempfile::tempdir().unwrap();
let chunk = |file: &str, id: &str| RawChunk {
id: id.to_string(),
file: file.to_string(),
start_line: 1,
end_line: 1,
content: format!("fn {id}() {{}}"),
function_name: None,
language: Some("rust".to_string()),
chunk_type: ChunkType::Code,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
};
let live_path = dir.path().join("pre848_live.redb");
{
let live = CorpusStore::open(&live_path).unwrap();
live.upsert_chunks(&[
chunk("kept.rs", "kept:1:10"),
chunk("deleted.rs", "deleted:1:10"),
])
.unwrap();
live.upsert_file_hashes(&[("kept.rs", "aa"), ("deleted.rs", "bb")])
.unwrap();
}
let staging_path = dir.path().join("pre848_staging.redb");
{
let live = CorpusStore::open(&live_path).unwrap();
let staging = CorpusStore::open_fresh(&staging_path).unwrap();
staging.copy_all_from(&live).unwrap();
}
let reopened = CorpusStore::open(&staging_path).unwrap();
let files = reopened.list_indexed_files().unwrap();
assert!(
files.iter().any(|f| f == "deleted.rs"),
"PRE-FIX #848 model: deleted.rs MUST still be present without a prune pass \
(proving the bug exists and the fix is needed)"
);
}
#[test]
fn prune_pass_removes_deleted_file_from_staged_corpus() {
use crate::core::chunker::{ChunkType, RawChunk};
use crate::core::corpus::CorpusStore;
let dir = tempfile::tempdir().unwrap();
let chunk = |file: &str, id: &str| RawChunk {
id: id.to_string(),
file: file.to_string(),
start_line: 1,
end_line: 1,
content: format!("fn {id}() {{}}"),
function_name: None,
language: Some("rust".to_string()),
chunk_type: ChunkType::Code,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
};
let live_path = dir.path().join("post848_live.redb");
{
let live = CorpusStore::open(&live_path).unwrap();
live.upsert_chunks(&[
chunk("kept.rs", "kept:1:10"),
chunk("deleted.rs", "deleted:1:10"),
])
.unwrap();
live.upsert_entities(&[
("kept.rs".to_string(), Vec::new()),
("deleted.rs".to_string(), Vec::new()),
])
.unwrap();
live.upsert_file_hashes(&[("kept.rs", "aa"), ("deleted.rs", "bb")])
.unwrap();
}
let staging_path = dir.path().join("post848_staging.redb");
let staging = {
let live = CorpusStore::open(&live_path).unwrap();
let s = CorpusStore::open_fresh(&staging_path).unwrap();
s.copy_all_from(&live).unwrap();
s
};
let indexed = staging.list_indexed_files().unwrap();
let walked_set: std::collections::HashSet<String> =
["kept.rs".to_string()].into_iter().collect();
let deleted: Vec<String> = indexed
.into_iter()
.filter(|f| !walked_set.contains(f))
.collect();
assert_eq!(
deleted,
vec!["deleted.rs".to_string()],
"#848: set-difference must identify deleted.rs as stale"
);
let chunk_ids: Vec<String> = staging
.load_all_chunks()
.unwrap()
.into_iter()
.filter(|c| c.file == "deleted.rs")
.map(|c| c.id)
.collect();
staging.delete_chunks(&chunk_ids).unwrap();
staging.delete_entities("deleted.rs").unwrap();
staging
.delete_file_hash_entries(&["deleted.rs".to_string()])
.unwrap();
drop(staging);
let reopened = CorpusStore::open(&staging_path).unwrap();
let files_after = reopened.list_indexed_files().unwrap();
assert!(
!files_after.iter().any(|f| f == "deleted.rs"),
"#848 POST-FIX: deleted.rs must be absent from the promoted corpus \
after the prune pass; found files: {:?}",
files_after
);
assert!(
files_after.iter().any(|f| f == "kept.rs"),
"#848 POST-FIX: kept.rs must still be present in the promoted corpus"
);
let hashes = reopened.load_file_hashes().unwrap();
assert!(
!hashes.iter().any(|(f, _)| f == "deleted.rs"),
"#848 POST-FIX: file-hash entry for deleted.rs must be removed"
);
assert!(
hashes.iter().any(|(f, _)| f == "kept.rs"),
"#848 POST-FIX: file-hash entry for kept.rs must still be present"
);
}
#[test]
fn changed_file_orphan_chunks_removed_before_reinsert() {
use crate::core::chunker::{ChunkType, RawChunk};
use crate::core::corpus::CorpusStore;
let dir = tempfile::tempdir().unwrap();
let chunk = |file: &str, id: &str, content: &str| RawChunk {
id: id.to_string(),
file: file.to_string(),
start_line: 1,
end_line: 1,
content: content.to_string(),
function_name: None,
language: Some("rust".to_string()),
chunk_type: ChunkType::Code,
calls: Vec::new(),
inherits_from: Vec::new(),
chunk_depth: 0,
parent_chunk_id: None,
child_chunk_ids: Vec::new(),
nlp_keywords: Vec::new(),
nlp_code_refs: Vec::new(),
virtual_terms: Vec::new(),
};
let live_path = dir.path().join("855_live.redb");
{
let live = CorpusStore::open(&live_path).unwrap();
live.upsert_chunks(&[
chunk("shrunk.rs", "shrunk:fn_a", "fn fn_a() {}"),
chunk("shrunk.rs", "shrunk:fn_b", "fn fn_b() {}"),
chunk("shrunk.rs", "shrunk:fn_c", "fn fn_c() {}"),
])
.unwrap();
live.upsert_file_hashes(&[("shrunk.rs", "old_hash")])
.unwrap();
}
let staging_path = dir.path().join("855_staging.redb");
{
let live = CorpusStore::open(&live_path).unwrap();
let staging = CorpusStore::open_fresh(&staging_path).unwrap();
staging.copy_all_from(&live).unwrap();
let initial = staging.list_indexed_files().unwrap();
assert!(
initial.iter().any(|f| f == "shrunk.rs"),
"#855 setup: staging must contain shrunk.rs after copy_all_from"
);
let initial_chunks = staging
.load_all_chunks()
.unwrap()
.into_iter()
.filter(|c| c.file == "shrunk.rs")
.count();
assert_eq!(
initial_chunks, 3,
"#855 setup: staging must start with 3 chunks for shrunk.rs"
);
}
let prefix_staging_path = dir.path().join("855_prefix_staging.redb");
{
let live = CorpusStore::open(&live_path).unwrap();
let staging = CorpusStore::open_fresh(&prefix_staging_path).unwrap();
staging.copy_all_from(&live).unwrap();
staging
.upsert_chunks(&[chunk("shrunk.rs", "shrunk:fn_a", "fn fn_a_new() {}")])
.unwrap();
}
let prefix = CorpusStore::open(&prefix_staging_path).unwrap();
let prefix_chunks: Vec<_> = prefix
.load_all_chunks()
.unwrap()
.into_iter()
.filter(|c| c.file == "shrunk.rs")
.collect();
assert_eq!(
prefix_chunks.len(),
3, "#855 PRE-FIX model: upsert-only must leave 3 chunks (1 new + 2 orphan), \
proving the orphan-chunk bug exists"
);
assert!(
prefix_chunks.iter().any(|c| c.id == "shrunk:fn_b"),
"#855 PRE-FIX model: orphan chunk shrunk:fn_b must survive upsert-only"
);
assert!(
prefix_chunks.iter().any(|c| c.id == "shrunk:fn_c"),
"#855 PRE-FIX model: orphan chunk shrunk:fn_c must survive upsert-only"
);
let postfix_staging_path = dir.path().join("855_postfix_staging.redb");
{
let live = CorpusStore::open(&live_path).unwrap();
let staging = CorpusStore::open_fresh(&postfix_staging_path).unwrap();
staging.copy_all_from(&live).unwrap();
let old_chunk_ids: Vec<String> = staging
.load_all_chunks()
.unwrap()
.into_iter()
.filter(|c| c.file == "shrunk.rs")
.map(|c| c.id)
.collect();
staging.delete_chunks(&old_chunk_ids).unwrap();
staging
.upsert_chunks(&[chunk("shrunk.rs", "shrunk:fn_a", "fn fn_a_new() {}")])
.unwrap();
}
let postfix = CorpusStore::open(&postfix_staging_path).unwrap();
let postfix_chunks: Vec<_> = postfix
.load_all_chunks()
.unwrap()
.into_iter()
.filter(|c| c.file == "shrunk.rs")
.collect();
assert_eq!(
postfix_chunks.len(),
1, "#855 POST-FIX: delete-then-insert must leave exactly 1 chunk for shrunk.rs; \
found: {:?}",
postfix_chunks.iter().map(|c| &c.id).collect::<Vec<_>>()
);
assert_eq!(
postfix_chunks[0].id, "shrunk:fn_a",
"#855 POST-FIX: the surviving chunk must be the newly inserted one"
);
assert_eq!(
postfix_chunks[0].content, "fn fn_a_new() {}",
"#855 POST-FIX: the surviving chunk must have the NEW content, not stale content"
);
assert!(
!postfix_chunks.iter().any(|c| c.id == "shrunk:fn_b"),
"#855 POST-FIX: orphan chunk shrunk:fn_b must be removed by delete-then-insert"
);
assert!(
!postfix_chunks.iter().any(|c| c.id == "shrunk:fn_c"),
"#855 POST-FIX: orphan chunk shrunk:fn_c must be removed by delete-then-insert"
);
}