use std::fs;
use std::path::{Path, PathBuf};
use aft::cache_freshness::{self, FreshnessVerdict};
use aft::semantic_index::{SemanticIndex, SemanticIndexFingerprint};
use crate::test_helpers::{init_test_logger, take_logs};
fn build_test_index(project_root: &Path) -> (SemanticIndex, PathBuf) {
let source_file = project_root.join("src/lib.rs");
fs::create_dir_all(source_file.parent().expect("source parent")).expect("create src dir");
fs::write(
&source_file,
"pub fn handle_request(token: &str) -> bool {\n !token.is_empty()\n}\n\npub fn normalize_user_id(input: &str) -> String {\n input.trim().to_lowercase()\n}\n",
)
.expect("write source file");
let files = vec![source_file.clone()];
let mut embed = |texts: Vec<String>| {
Ok::<Vec<Vec<f32>>, String>(
texts
.into_iter()
.map(|text| {
if text.contains("handle_request") {
vec![1.0, 0.0, 0.0, 0.0]
} else if text.contains("normalize_user_id") {
vec![0.0, 1.0, 0.0, 0.0]
} else {
vec![0.0, 0.0, 1.0, 0.0]
}
})
.collect(),
)
};
let index = SemanticIndex::build(project_root, &files, &mut embed, 16)
.expect("build semantic index with stub embeddings");
(index, source_file)
}
fn push_string(buf: &mut Vec<u8>, value: &str) {
buf.extend_from_slice(&(value.len() as u32).to_le_bytes());
buf.extend_from_slice(value.as_bytes());
}
fn build_v1_index_bytes(file: &Path) -> Vec<u8> {
let mut bytes = Vec::new();
let file_str = file.to_string_lossy();
bytes.push(1u8);
bytes.extend_from_slice(&3u32.to_le_bytes());
bytes.extend_from_slice(&1u32.to_le_bytes());
bytes.extend_from_slice(&1u32.to_le_bytes());
push_string(&mut bytes, &file_str);
bytes.extend_from_slice(&0u64.to_le_bytes());
push_string(&mut bytes, &file_str);
push_string(&mut bytes, "legacy_symbol");
bytes.push(0u8);
bytes.extend_from_slice(&1u32.to_le_bytes());
bytes.extend_from_slice(&3u32.to_le_bytes());
bytes.push(1u8);
push_string(&mut bytes, "fn legacy_symbol() {}");
push_string(
&mut bytes,
"file:src/lib.rs kind:function name:legacy_symbol",
);
for value in [0.1f32, 0.2, 0.3] {
bytes.extend_from_slice(&value.to_le_bytes());
}
bytes
}
#[test]
fn write_and_read_roundtrip_preserves_semantic_entries() {
let project = tempfile::tempdir().expect("create project dir");
let storage = tempfile::tempdir().expect("create storage dir");
let (index, source_file) = build_test_index(project.path());
index.write_to_disk(storage.path(), "roundtrip-project");
let restored = SemanticIndex::read_from_disk(
storage.path(),
"roundtrip-project",
project.path(),
false,
None,
)
.expect("restore semantic index from disk");
assert_eq!(restored.len(), index.len());
assert_eq!(restored.dimension(), index.dimension());
let request_results = restored.search(&[1.0, 0.0, 0.0, 0.0], 2);
assert!(!request_results.is_empty());
assert_eq!(request_results[0].name, "handle_request");
assert_eq!(request_results[0].file, source_file);
assert!(request_results[0].snippet.contains("handle_request"));
let normalize_results = restored.search(&[0.0, 1.0, 0.0, 0.0], 2);
assert!(!normalize_results.is_empty());
assert_eq!(normalize_results[0].name, "normalize_user_id");
}
#[test]
fn read_from_nonexistent_path_returns_none() {
let storage = tempfile::tempdir().expect("create storage dir");
let restored = SemanticIndex::read_from_disk(
storage.path(),
"missing-project",
storage.path(),
false,
None,
);
assert!(restored.is_none());
}
#[test]
fn read_from_corrupt_file_returns_none_and_logs_warning() {
init_test_logger();
let storage = tempfile::tempdir().expect("create storage dir");
let semantic_dir = storage.path().join("semantic").join("corrupt-project");
fs::create_dir_all(&semantic_dir).expect("create semantic dir");
let semantic_file = semantic_dir.join("semantic.bin");
fs::write(&semantic_file, b"corrupt").expect("write corrupt semantic file");
let restored = SemanticIndex::read_from_disk(
storage.path(),
"corrupt-project",
storage.path(),
false,
None,
);
assert!(restored.is_none());
assert!(
!semantic_file.exists(),
"corrupt semantic file should be removed after read failure"
);
let logs = take_logs();
assert!(
logs.iter()
.any(|line| line.contains("corrupt semantic index")),
"expected corrupt-index warning, got {logs:?}"
);
}
#[test]
fn semantic_cache_inconsistent_lengths_rebuilds() {
init_test_logger();
let storage = tempfile::tempdir().expect("create storage dir");
let semantic_dir = storage.path().join("semantic").join("drift-project");
fs::create_dir_all(&semantic_dir).expect("create semantic dir");
let semantic_file = semantic_dir.join("semantic.bin");
let source = storage.path().join("src/lib.rs");
let mut bytes = Vec::new();
bytes.push(6u8);
bytes.extend_from_slice(&1u32.to_le_bytes()); bytes.extend_from_slice(&1u32.to_le_bytes()); bytes.extend_from_slice(&0u32.to_le_bytes()); bytes.extend_from_slice(&0u32.to_le_bytes()); push_string(&mut bytes, &source.to_string_lossy());
push_string(&mut bytes, "drift_symbol");
bytes.push(0u8);
bytes.extend_from_slice(&1u32.to_le_bytes());
bytes.extend_from_slice(&1u32.to_le_bytes());
bytes.push(1u8);
push_string(&mut bytes, "fn drift_symbol() {}");
push_string(
&mut bytes,
"file:src/lib.rs kind:function name:drift_symbol",
);
bytes.extend_from_slice(&1.0f32.to_le_bytes());
fs::write(&semantic_file, bytes).expect("write inconsistent semantic cache");
assert!(SemanticIndex::read_from_disk(
storage.path(),
"drift-project",
storage.path(),
false,
None
)
.is_none());
assert!(
!semantic_file.exists(),
"bad semantic cache should be removed"
);
}
#[test]
fn live_refresh_retries_deferred_new_file_after_deletion_frees_capacity() {
let project = tempfile::tempdir().expect("create project dir");
let old_file = project.path().join("src/old.rs");
let new_file = project.path().join("src/new.rs");
fs::create_dir_all(old_file.parent().expect("source parent")).expect("create src dir");
fs::write(&old_file, "pub fn old_anchor() -> usize { 1 }\n").expect("write old file");
fs::write(&new_file, "pub fn new_anchor() -> usize { 2 }\n").expect("write new file");
let mut embed = |texts: Vec<String>| {
Ok::<Vec<Vec<f32>>, String>(
texts
.into_iter()
.map(|text| {
if text.contains("new_anchor") {
vec![0.0, 1.0, 0.0, 0.0]
} else {
vec![1.0, 0.0, 0.0, 0.0]
}
})
.collect(),
)
};
let mut index = SemanticIndex::build(
project.path(),
std::slice::from_ref(&old_file),
&mut embed,
16,
)
.expect("build initial semantic index");
assert_eq!(index.indexed_file_count(), 1);
let mut progress = |_done: usize, _total: usize| {};
index
.refresh_invalidated_files(
project.path(),
std::slice::from_ref(&new_file),
&mut embed,
16,
1,
&mut progress,
)
.expect("defer new file at cap");
let deferred_results = index.search(&[0.0, 1.0, 0.0, 0.0], 5);
assert!(
deferred_results
.iter()
.all(|result| result.name != "new_anchor"),
"new file should be deferred while the cap is full: {deferred_results:?}"
);
fs::remove_file(&old_file).expect("delete old file");
index
.refresh_invalidated_files(
project.path(),
std::slice::from_ref(&old_file),
&mut embed,
16,
1,
&mut progress,
)
.expect("retry deferred file after deletion");
let results = index.search(&[0.0, 1.0, 0.0, 0.0], 1);
assert_eq!(results.len(), 1);
assert_eq!(results[0].file, new_file);
assert!(
results[0].snippet.contains("new_anchor"),
"deferred file should be indexed after capacity frees: {results:?}"
);
}
#[test]
fn stale_file_detected_after_deletion() {
let project = tempfile::tempdir().expect("create project dir");
let storage = tempfile::tempdir().expect("create storage dir");
let (index, source_file) = build_test_index(project.path());
index.write_to_disk(storage.path(), "stale-project");
fs::remove_file(&source_file).expect("remove indexed source file");
let restored =
SemanticIndex::read_from_disk(storage.path(), "stale-project", project.path(), false, None)
.expect("restore semantic index from disk");
assert!(
restored.is_file_stale(&source_file),
"deleted file should be detected as stale"
);
}
#[test]
fn semantic_stale_check_detects_same_mtime_same_size_content_change() {
let project = tempfile::tempdir().expect("create project dir");
let storage = tempfile::tempdir().expect("create storage dir");
let source_file = project.path().join("src/lib.rs");
fs::create_dir_all(source_file.parent().expect("source parent")).expect("create src dir");
fs::write(
&source_file,
"pub fn handle_request(token: &str) -> bool {
!token.is_empty()
}
",
)
.expect("write source file");
let fixed_mtime = filetime::FileTime::from_unix_time(1_700_000_000, 123_000_000);
filetime::set_file_mtime(&source_file, fixed_mtime).expect("set fixed mtime");
let files = vec![source_file.clone()];
let mut embed = |texts: Vec<String>| {
Ok::<Vec<Vec<f32>>, String>(
texts
.into_iter()
.map(|_| vec![1.0, 0.0, 0.0, 0.0])
.collect(),
)
};
let index =
SemanticIndex::build(project.path(), &files, &mut embed, 16).expect("build semantic index");
let freshness = cache_freshness::collect(&source_file).expect("collect source freshness");
index.write_to_disk(storage.path(), "same-metadata-project");
let mut restored = SemanticIndex::read_from_disk(
storage.path(),
"same-metadata-project",
project.path(),
false,
None,
)
.expect("restore semantic index from disk");
assert!(
!restored.is_file_stale(&source_file),
"freshly restored file should start hot"
);
let mut bytes = fs::read(&source_file).expect("read source bytes");
let bang = bytes
.iter()
.position(|byte| *byte == b'!')
.expect("fixture contains negation byte");
bytes[bang] = b' ';
fs::write(&source_file, &bytes).expect("rewrite source with same size");
filetime::set_file_mtime(
&source_file,
filetime::FileTime::from_system_time(freshness.mtime),
)
.expect("restore original mtime");
assert_eq!(
cache_freshness::verify_file(&source_file, &freshness),
FreshnessVerdict::HotFresh,
"non-strict freshness misses same-size/same-mtime content edits"
);
assert!(
restored.is_file_stale(&source_file),
"semantic staleness must hash-check same-size/same-mtime edits"
);
let mut refreshed_chunks = 0usize;
let mut refresh_embed = |texts: Vec<String>| {
refreshed_chunks += texts.len();
Ok::<Vec<Vec<f32>>, String>(
texts
.into_iter()
.map(|_| vec![1.0, 0.0, 0.0, 0.0])
.collect(),
)
};
let mut progress = |_done: usize, _total: usize| {};
let summary = restored
.refresh_stale_files(
project.path(),
&files,
&mut refresh_embed,
16,
&mut progress,
)
.expect("strict refresh should re-embed stale file");
assert_eq!(summary.changed, 1);
assert_eq!(summary.added, 0);
assert_eq!(summary.deleted, 0);
assert!(refreshed_chunks > 0, "changed file should be re-embedded");
assert!(
!restored.is_file_stale(&source_file),
"refreshed file should become fresh again"
);
}
#[test]
fn read_from_disk_rebuilds_v1_cache_when_fingerprint_is_expected() {
let storage = tempfile::tempdir().expect("create storage dir");
let legacy_file = storage.path().join("src/lib.rs");
fs::create_dir_all(legacy_file.parent().expect("legacy parent")).expect("create src dir");
fs::write(&legacy_file, "pub fn legacy_symbol() {}\n").expect("write legacy source file");
let v1_bytes = build_v1_index_bytes(&legacy_file);
let restored = SemanticIndex::from_bytes(&v1_bytes, storage.path())
.expect("parse v1 semantic index bytes");
assert!(restored.fingerprint().is_none());
let semantic_dir = storage.path().join("semantic").join("v1-project");
fs::create_dir_all(&semantic_dir).expect("create semantic dir");
let semantic_file = semantic_dir.join("semantic.bin");
fs::write(&semantic_file, &v1_bytes).expect("write v1 semantic index file");
let expected_fingerprint = SemanticIndexFingerprint {
backend: "fastembed".to_string(),
model: "all-MiniLM-L6-v2".to_string(),
base_url: "none".to_string(),
dimension: 3,
chunking_version: 2,
}
.as_string();
assert!(SemanticIndex::read_from_disk(
storage.path(),
"v1-project",
storage.path(),
false,
Some(&expected_fingerprint)
)
.is_none());
assert!(
!semantic_file.exists(),
"legacy semantic cache should be deleted after fingerprint mismatch"
);
}
#[test]
fn write_roundtrip_preserves_subsecond_mtime_precision() {
let project = tempfile::tempdir().expect("create project dir");
let storage = tempfile::tempdir().expect("create storage dir");
let (index, source_file) = build_test_index(project.path());
let live_mtime = fs::metadata(&source_file)
.expect("stat source file")
.modified()
.expect("read live mtime");
let live_nanos = live_mtime
.duration_since(std::time::UNIX_EPOCH)
.expect("mtime >= epoch")
.subsec_nanos();
if live_nanos == 0 {
eprintln!(
"skipping subsecond roundtrip assertion: filesystem does not report subsecond mtime \
(live nanos == 0). Test still validates staleness on whole-second mtimes."
);
}
index.write_to_disk(storage.path(), "subsec-project");
let restored = SemanticIndex::read_from_disk(
storage.path(),
"subsec-project",
project.path(),
false,
None,
)
.expect("restore semantic index from disk");
assert!(
!restored.is_file_stale(&source_file),
"unchanged file flagged stale after disk round-trip — mtime precision lost"
);
assert!(
!restored.is_file_stale(&source_file),
"no file should be stale after a fresh round-trip"
);
}
#[test]
fn read_from_disk_rebuilds_v2_cache_for_v4_snippets() {
let project = tempfile::tempdir().expect("create project dir");
let storage = tempfile::tempdir().expect("create storage dir");
let (index, source_file) = build_test_index(project.path());
let fingerprint = SemanticIndexFingerprint {
backend: "fastembed".to_string(),
model: "all-MiniLM-L6-v2".to_string(),
base_url: "none".to_string(),
dimension: 4,
chunking_version: 2,
};
let fp_str = fingerprint.as_string();
let fp_bytes = fp_str.as_bytes();
let mut bytes = Vec::new();
bytes.push(2u8); bytes.extend_from_slice(&4u32.to_le_bytes()); bytes.extend_from_slice(&(index.len() as u32).to_le_bytes()); bytes.extend_from_slice(&(fp_bytes.len() as u32).to_le_bytes());
bytes.extend_from_slice(fp_bytes);
bytes.extend_from_slice(&1u32.to_le_bytes());
push_string(&mut bytes, &source_file.to_string_lossy());
bytes.extend_from_slice(&0u64.to_le_bytes());
let v3_bytes = index.to_bytes();
push_string(&mut bytes, &source_file.to_string_lossy());
push_string(&mut bytes, "legacy_sym");
bytes.push(0u8); bytes.extend_from_slice(&1u32.to_le_bytes()); bytes.extend_from_slice(&3u32.to_le_bytes()); bytes.push(1u8); push_string(&mut bytes, "fn legacy_sym() {}");
push_string(&mut bytes, "file:src kind:function name:legacy_sym");
for value in [0.1f32, 0.2, 0.3, 0.4] {
bytes.extend_from_slice(&value.to_le_bytes());
}
bytes[5..9].copy_from_slice(&1u32.to_le_bytes());
let _ = v3_bytes;
let semantic_dir = storage.path().join("semantic").join("v2-project");
fs::create_dir_all(&semantic_dir).expect("create semantic dir");
fs::write(semantic_dir.join("semantic.bin"), &bytes).expect("write v2 cache");
assert!(SemanticIndex::read_from_disk(
storage.path(),
"v2-project",
project.path(),
false,
Some(&fp_str)
)
.is_none());
assert!(!semantic_dir.join("semantic.bin").exists());
}
#[test]
fn from_bytes_rejects_corrupt_v3_cache_payloads() {
fn build_v3_with_mtime(secs: u64, nanos: u32) -> Vec<u8> {
let fingerprint = SemanticIndexFingerprint {
backend: "fastembed".to_string(),
model: "all-MiniLM-L6-v2".to_string(),
base_url: "none".to_string(),
dimension: 4,
chunking_version: 2,
};
let fp_bytes = fingerprint.as_string().into_bytes();
let mut bytes = Vec::new();
bytes.push(3u8); bytes.extend_from_slice(&4u32.to_le_bytes()); bytes.extend_from_slice(&0u32.to_le_bytes()); bytes.extend_from_slice(&(fp_bytes.len() as u32).to_le_bytes());
bytes.extend_from_slice(&fp_bytes);
bytes.extend_from_slice(&1u32.to_le_bytes());
push_string(&mut bytes, "/tmp/corrupt.rs");
bytes.extend_from_slice(&secs.to_le_bytes());
bytes.extend_from_slice(&nanos.to_le_bytes());
bytes
}
let bad_nanos = build_v3_with_mtime(0, 2_000_000_000);
let root = tempfile::tempdir().expect("semantic cache root");
let err = SemanticIndex::from_bytes(&bad_nanos, root.path())
.expect_err("V3 with nanos >= 1e9 must be rejected");
assert!(
err.contains("nanos") && err.contains("1_000_000_000"),
"nanos-overflow error should explain the rejection: {err}"
);
let overflow = build_v3_with_mtime(u64::MAX, 0);
let err = SemanticIndex::from_bytes(&overflow, root.path())
.expect_err("V3 with secs=u64::MAX must be rejected");
assert!(
err.contains("overflows SystemTime"),
"SystemTime-overflow error should explain the rejection: {err}"
);
let boundary = build_v3_with_mtime(1_700_000_000, 999_999_999);
let _ = SemanticIndex::from_bytes(&boundary, root.path())
.expect("V3 with nanos=999_999_999 must load cleanly");
}