#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
use tempfile::tempdir;
use super::inverted_index::SparseInvertedIndex;
use super::persistence::*;
use super::types::SparseVector;
fn make_vector(pairs: Vec<(u32, f32)>) -> SparseVector {
SparseVector::new(pairs)
}
#[test]
fn test_wal_write_and_replay() {
let dir = tempdir().unwrap();
let wal_path = dir.path().join("sparse.wal");
let index1 = SparseInvertedIndex::new();
for i in 0..100u64 {
let v = make_vector(vec![(1, 1.0), (2, 0.5 + i as f32 * 0.01)]);
index1.insert(i, &v);
wal_append_upsert(&wal_path, i, &v).unwrap();
}
let index2 = SparseInvertedIndex::new();
let count = wal_replay(&wal_path, &index2).unwrap();
assert_eq!(count, 100);
assert_eq!(index2.doc_count(), 100);
let p1 = index1.get_all_postings(1);
let p2 = index2.get_all_postings(1);
assert_eq!(p1.len(), p2.len());
for (a, b) in p1.iter().zip(p2.iter()) {
assert_eq!(a.doc_id, b.doc_id);
assert!((a.weight - b.weight).abs() < f32::EPSILON);
}
}
#[test]
fn test_wal_truncated_entry() {
let dir = tempdir().unwrap();
let wal_path = dir.path().join("sparse.wal");
let v = make_vector(vec![(1, 1.0)]);
wal_append_upsert(&wal_path, 42, &v).unwrap();
{
use std::io::Write;
let mut f = std::fs::OpenOptions::new()
.append(true)
.open(&wal_path)
.unwrap();
f.write_all(&[0xFF, 0x00, 0xAA, 0xBB, 0xCC]).unwrap();
}
let index = SparseInvertedIndex::new();
let count = wal_replay(&wal_path, &index).unwrap();
assert_eq!(count, 1);
assert_eq!(index.doc_count(), 1);
let postings = index.get_all_postings(1);
assert_eq!(postings.len(), 1);
assert_eq!(postings[0].doc_id, 42);
}
#[test]
fn test_wal_delete_replay() {
let dir = tempdir().unwrap();
let wal_path = dir.path().join("sparse.wal");
let v = make_vector(vec![(1, 1.0)]);
wal_append_upsert(&wal_path, 1, &v).unwrap();
wal_append_upsert(&wal_path, 2, &v).unwrap();
wal_append_delete(&wal_path, 1).unwrap();
let index = SparseInvertedIndex::new();
let count = wal_replay(&wal_path, &index).unwrap();
assert_eq!(count, 3);
assert_eq!(index.doc_count(), 1);
let postings = index.get_all_postings(1);
assert_eq!(postings.len(), 1);
assert_eq!(postings[0].doc_id, 2);
}
#[test]
fn test_compaction_round_trip() {
let dir = tempdir().unwrap();
let index1 = SparseInvertedIndex::new();
for i in 0..500u64 {
let v = make_vector(vec![
(i as u32 % 50, 1.0 + (i as f32) * 0.001),
(100 + i as u32 % 20, 0.5),
]);
index1.insert(i, &v);
}
compact(dir.path(), &index1).unwrap();
let loaded = load_from_disk(dir.path()).unwrap();
assert!(loaded.is_some());
let index2 = loaded.unwrap();
assert_eq!(index2.doc_count(), 500);
let p1 = index1.get_all_postings(5);
let p2 = index2.get_all_postings(5);
assert_eq!(p1.len(), p2.len());
for (a, b) in p1.iter().zip(p2.iter()) {
assert_eq!(a.doc_id, b.doc_id);
assert!((a.weight - b.weight).abs() < f32::EPSILON);
}
}
#[test]
fn test_empty_directory_returns_none() {
let dir = tempdir().unwrap();
let result = load_from_disk(dir.path()).unwrap();
assert!(result.is_none());
}
#[test]
fn test_full_restart_simulation() {
let dir = tempdir().unwrap();
let wal_path = dir.path().join("sparse.wal");
let index1 = SparseInvertedIndex::new();
for i in 0..50u64 {
let v = make_vector(vec![(1, 1.0), (2, 2.0)]);
index1.insert(i, &v);
}
compact(dir.path(), &index1).unwrap();
for i in 50..60u64 {
let v = make_vector(vec![(1, 3.0), (3, 1.0)]);
wal_append_upsert(&wal_path, i, &v).unwrap();
}
let loaded = load_from_disk(dir.path()).unwrap();
assert!(loaded.is_some());
let index2 = loaded.unwrap();
assert_eq!(index2.doc_count(), 60);
let p1 = index2.get_all_postings(1);
assert_eq!(p1.len(), 60);
let p3 = index2.get_all_postings(3);
assert_eq!(p3.len(), 10);
}
#[test]
fn test_meta_contains_correct_values() {
let dir = tempdir().unwrap();
let index = SparseInvertedIndex::new();
for i in 0..25u64 {
let v = make_vector(vec![(i as u32 % 5, 1.0), (10, 0.5)]);
index.insert(i, &v);
}
compact(dir.path(), &index).unwrap();
let meta_data = std::fs::read(dir.path().join("sparse.meta")).unwrap();
let meta: SparseMeta = postcard::from_bytes(&meta_data).unwrap();
assert_eq!(meta.version, 1);
assert_eq!(meta.doc_count, 25);
assert_eq!(meta.term_count, 6);
}
#[test]
fn test_wal_missing_file_returns_zero() {
let dir = tempdir().unwrap();
let wal_path = dir.path().join("nonexistent.wal");
let index = SparseInvertedIndex::new();
let count = wal_replay(&wal_path, &index).unwrap();
assert_eq!(count, 0);
}
#[test]
fn test_partial_compaction_crash_recovery() {
let dir = tempdir().unwrap();
let wal_path = dir.path().join("sparse.wal");
for i in 0..5u64 {
let v = make_vector(vec![(i as u32, 1.0 + i as f32 * 0.1)]);
wal_append_upsert(&wal_path, i, &v).unwrap();
}
let tmp_path = dir.path().join("sparse.idx.tmp");
std::fs::write(&tmp_path, b"garbage partial write").unwrap();
assert!(!dir.path().join("sparse.meta").exists());
let loaded = load_from_disk(dir.path()).unwrap();
assert!(
loaded.is_some(),
"WAL-only load should return Some after partial compaction crash"
);
let index = loaded.unwrap();
assert_eq!(index.doc_count(), 5);
for i in 0..5u64 {
let postings = index.get_all_postings(i as u32);
assert_eq!(
postings.len(),
1,
"term {i} should have exactly one posting"
);
assert_eq!(postings[0].doc_id, i);
}
assert!(
tmp_path.exists(),
"load_from_disk must not remove stale .tmp artefacts"
);
}
#[test]
fn test_compaction_truncates_wal() {
let dir = tempdir().unwrap();
let wal_path = dir.path().join("sparse.wal");
let index = SparseInvertedIndex::new();
let v = make_vector(vec![(1, 1.0)]);
index.insert(0, &v);
wal_append_upsert(&wal_path, 0, &v).unwrap();
assert!(std::fs::metadata(&wal_path).unwrap().len() > 0);
compact(dir.path(), &index).unwrap();
assert_eq!(std::fs::metadata(&wal_path).unwrap().len(), 0);
}