use std::fs::OpenOptions;
use std::io::{Seek, Write};
use std::path::{Path, PathBuf};
use crc32fast::Hasher as Crc32Hasher;
use frankensearch_core::{SearchError, SearchResult};
use half::f16;
use tracing::{debug, warn};
use crate::Quantization;
const WAL_MAGIC: [u8; 4] = *b"FWAL";
const WAL_VERSION: u16 = 1;
const BATCH_MAGIC: [u8; 4] = *b"FWB1";
const WAL_HEADER_SIZE: usize = 20;
#[derive(Debug, Clone)]
pub struct WalConfig {
pub compaction_threshold: usize,
pub compaction_ratio: f64,
pub fsync_on_write: bool,
}
impl Default for WalConfig {
fn default() -> Self {
Self {
compaction_threshold: 1000,
compaction_ratio: 0.10,
fsync_on_write: true,
}
}
}
#[derive(Debug, Clone)]
pub(crate) struct WalEntry {
pub doc_id: String,
#[allow(dead_code)]
pub doc_id_hash: u64,
pub embedding: Vec<f32>,
}
#[derive(Debug, Clone)]
pub struct CompactionStats {
pub main_records_before: usize,
pub wal_records: usize,
pub total_records_after: usize,
pub elapsed_ms: f64,
}
pub(crate) const WAL_INDEX_BIT: usize = 1_usize << (usize::BITS - 1);
pub(crate) const fn is_wal_index(index: usize) -> bool {
index & WAL_INDEX_BIT != 0
}
pub(crate) const fn to_wal_index(wal_pos: usize) -> usize {
wal_pos | WAL_INDEX_BIT
}
pub(crate) const fn from_wal_index(tagged: usize) -> usize {
tagged & !WAL_INDEX_BIT
}
#[must_use]
pub fn wal_path_for(fsvi_path: &Path) -> PathBuf {
let mut p = fsvi_path.as_os_str().to_os_string();
p.push(".wal");
PathBuf::from(p)
}
pub(crate) fn read_wal(
path: &Path,
expected_dimension: usize,
quantization: Quantization,
) -> SearchResult<(Vec<WalEntry>, u8, u64)> {
if !path.exists() {
return Ok((Vec::new(), 0, 0));
}
let data = std::fs::read(path)?;
if data.len() < WAL_HEADER_SIZE {
warn!(path = %path.display(), len = data.len(), "WAL file too small, ignoring");
return Ok((Vec::new(), 0, 0));
}
parse_wal_bytes(&data, expected_dimension, quantization, path)
}
fn parse_wal_bytes(
data: &[u8],
expected_dimension: usize,
quantization: Quantization,
path: &Path,
) -> SearchResult<(Vec<WalEntry>, u8, u64)> {
if data[..4] != WAL_MAGIC {
return Err(wal_corrupted(path, "bad magic bytes"));
}
let version = u16::from_le_bytes([data[4], data[5]]);
if version != WAL_VERSION {
return Err(wal_corrupted(
path,
format!("version mismatch: expected {WAL_VERSION}, got {version}"),
));
}
let dimension = u32::from_le_bytes([data[6], data[7], data[8], data[9]]) as usize;
if dimension != expected_dimension {
return Err(wal_corrupted(
path,
format!("dimension mismatch: expected {expected_dimension}, got {dimension}"),
));
}
let quant_byte = data[10];
let wal_quant = Quantization::from_wire(quant_byte, path)?;
if wal_quant != quantization {
return Err(wal_corrupted(path, "quantization mismatch"));
}
let compaction_gen = data[11];
let header_crc_stored = u32::from_le_bytes([data[16], data[17], data[18], data[19]]);
let header_crc_computed = crc32_of(&data[..16]);
if header_crc_stored != header_crc_computed {
return Err(wal_corrupted(path, "header CRC mismatch"));
}
let mut entries = Vec::new();
let mut cursor = WAL_HEADER_SIZE;
let vector_bytes = dimension * quantization.bytes_per_element();
while cursor + 8 <= data.len() {
if let Ok((batch_entries, batch_len)) =
parse_batch(&data[cursor..], dimension, vector_bytes, quantization)
{
entries.extend(batch_entries);
cursor += batch_len;
} else {
warn!(
path = %path.display(),
offset = cursor,
entries_recovered = entries.len(),
"WAL batch corrupt or truncated, discarding remainder"
);
break;
}
}
debug!(path = %path.display(), entries = entries.len(), "loaded WAL entries");
Ok((entries, compaction_gen, cursor as u64))
}
fn parse_batch(
data: &[u8],
dimension: usize,
vector_bytes: usize,
quantization: Quantization,
) -> Result<(Vec<WalEntry>, usize), ()> {
if data.len() < 8 {
return Err(());
}
if data[..4] != BATCH_MAGIC {
return Err(());
}
let entry_count = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
let min_entry_bytes = 2_usize.saturating_add(vector_bytes).max(1);
let max_possible = data.len().saturating_sub(8) / min_entry_bytes;
let mut cursor: usize = 8;
let mut entries = Vec::with_capacity(entry_count.min(max_possible));
for _ in 0..entry_count {
if cursor.checked_add(2).is_none_or(|end| end > data.len()) {
return Err(());
}
let doc_id_len = u16::from_le_bytes([data[cursor], data[cursor + 1]]) as usize;
cursor += 2;
let needed = doc_id_len.checked_add(vector_bytes).ok_or(())?;
if cursor
.checked_add(needed)
.is_none_or(|end| end > data.len())
{
return Err(());
}
let doc_id = std::str::from_utf8(&data[cursor..cursor + doc_id_len])
.map_err(|_| ())?
.to_owned();
cursor += doc_id_len;
let embedding = decode_vector(
&data[cursor..cursor + vector_bytes],
dimension,
quantization,
);
cursor += vector_bytes;
entries.push(WalEntry {
doc_id_hash: crate::fnv1a_hash(doc_id.as_bytes()),
doc_id,
embedding,
});
}
if cursor + 4 > data.len() {
return Err(());
}
let stored_crc = u32::from_le_bytes([
data[cursor],
data[cursor + 1],
data[cursor + 2],
data[cursor + 3],
]);
let computed_crc = crc32_of(&data[..cursor]);
if stored_crc != computed_crc {
return Err(());
}
cursor += 4;
Ok((entries, cursor))
}
fn decode_vector(bytes: &[u8], dimension: usize, quantization: Quantization) -> Vec<f32> {
let vec = match quantization {
Quantization::F16 => bytes
.chunks_exact(2)
.take(dimension)
.map(|chunk| f16::from_le_bytes([chunk[0], chunk[1]]).to_f32())
.collect(),
Quantization::F32 => bytes
.chunks_exact(4)
.take(dimension)
.map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
.collect(),
};
debug_assert_eq!(
Vec::<f32>::len(&vec),
dimension,
"decode_vector produced {actual} elements, expected {dimension}",
actual = Vec::<f32>::len(&vec),
);
vec
}
fn wal_entry_count(entries_len: usize) -> SearchResult<u32> {
u32::try_from(entries_len).map_err(|_| SearchError::InvalidConfig {
field: "wal_batch_entries".to_owned(),
value: entries_len.to_string(),
reason: "WAL batch entry count exceeds u32 maximum".to_owned(),
})
}
fn wal_doc_id_len(doc_bytes_len: usize) -> SearchResult<u16> {
u16::try_from(doc_bytes_len).map_err(|_| SearchError::InvalidConfig {
field: "doc_id_length".to_owned(),
value: doc_bytes_len.to_string(),
reason: format!("document ID length {doc_bytes_len} exceeds WAL u16 limit (65535 bytes)"),
})
}
fn wal_dimension(dimension: usize) -> SearchResult<u32> {
u32::try_from(dimension).map_err(|_| SearchError::InvalidConfig {
field: "dimension".to_owned(),
value: dimension.to_string(),
reason: "vector dimension exceeds u32 maximum for WAL header".to_owned(),
})
}
pub(crate) fn append_wal_batch(
wal_path: &Path,
entries: &[WalEntry],
dimension: usize,
quantization: Quantization,
compaction_gen: u8,
fsync: bool,
) -> SearchResult<()> {
let created = OpenOptions::new()
.write(true)
.create_new(true)
.open(wal_path);
let mut file = match created {
Ok(mut file) => {
write_wal_header(&mut file, dimension, quantization, compaction_gen)?;
file
}
Err(error) if error.kind() == std::io::ErrorKind::AlreadyExists => {
let mut file = OpenOptions::new()
.write(true)
.create(true)
.truncate(false)
.open(wal_path)?;
let existing_len = file.metadata()?.len();
if existing_len < WAL_HEADER_SIZE as u64 {
warn!(
path = %wal_path.display(),
existing_len,
"WAL file smaller than header; truncating and writing fresh header"
);
file.set_len(0)?;
file.seek(std::io::SeekFrom::Start(0))?;
write_wal_header(&mut file, dimension, quantization, compaction_gen)?;
} else {
file.seek(std::io::SeekFrom::End(0))?;
}
file
}
Err(error) => return Err(error.into()),
};
let mut batch = Vec::new();
batch.extend_from_slice(&BATCH_MAGIC);
let entry_count = wal_entry_count(entries.len())?;
batch.extend_from_slice(&entry_count.to_le_bytes());
for entry in entries {
let doc_bytes = entry.doc_id.as_bytes();
let doc_id_len = wal_doc_id_len(doc_bytes.len())?;
batch.extend_from_slice(&doc_id_len.to_le_bytes());
batch.extend_from_slice(doc_bytes);
encode_vector(&mut batch, &entry.embedding, quantization);
}
let crc = crc32_of(&batch);
batch.extend_from_slice(&crc.to_le_bytes());
file.write_all(&batch)?;
if fsync {
file.sync_all()?;
}
debug!(
path = %wal_path.display(),
batch_entries = entries.len(),
batch_bytes = batch.len(),
"appended WAL batch"
);
Ok(())
}
fn write_wal_header(
writer: &mut impl Write,
dimension: usize,
quantization: Quantization,
compaction_gen: u8,
) -> SearchResult<()> {
let mut header = Vec::with_capacity(WAL_HEADER_SIZE);
header.extend_from_slice(&WAL_MAGIC);
header.extend_from_slice(&WAL_VERSION.to_le_bytes());
let dim_u32 = wal_dimension(dimension)?;
header.extend_from_slice(&dim_u32.to_le_bytes());
#[allow(clippy::cast_possible_truncation)]
header.push(quantization as u8);
header.push(compaction_gen);
header.extend_from_slice(&[0u8; 4]); let crc = crc32_of(&header);
header.extend_from_slice(&crc.to_le_bytes());
writer.write_all(&header)?;
Ok(())
}
fn encode_vector(buf: &mut Vec<u8>, embedding: &[f32], quantization: Quantization) {
match quantization {
Quantization::F16 => {
for &val in embedding {
buf.extend_from_slice(&f16::from_f32(val).to_le_bytes());
}
}
Quantization::F32 => {
for &val in embedding {
buf.extend_from_slice(&val.to_le_bytes());
}
}
}
}
pub(crate) fn remove_wal(path: &Path) -> SearchResult<()> {
if path.exists() {
std::fs::remove_file(path)?;
}
Ok(())
}
fn crc32_of(data: &[u8]) -> u32 {
let mut hasher = Crc32Hasher::new();
hasher.update(data);
hasher.finalize()
}
fn wal_corrupted(path: &Path, detail: impl Into<String>) -> SearchError {
SearchError::IndexCorrupted {
path: path.to_path_buf(),
detail: detail.into(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::{SystemTime, UNIX_EPOCH};
fn temp_wal_path(name: &str) -> PathBuf {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
std::env::temp_dir().join(format!(
"frankensearch-wal-{name}-{}-{now}.fsvi.wal",
std::process::id()
))
}
fn make_entry(doc_id: &str, base: f32, dim: usize) -> WalEntry {
WalEntry {
doc_id: doc_id.to_owned(),
doc_id_hash: crate::fnv1a_hash(doc_id.as_bytes()),
embedding: vec![base; dim],
}
}
#[test]
fn roundtrip_single_batch() {
let path = temp_wal_path("roundtrip-single");
let dim = 4;
let entries = vec![make_entry("doc-0", 1.0, dim), make_entry("doc-1", 2.0, dim)];
append_wal_batch(&path, &entries, dim, Quantization::F16, 0, false).unwrap();
let (loaded, generation, _) = read_wal(&path, dim, Quantization::F16).unwrap();
assert_eq!(generation, 0);
assert_eq!(loaded.len(), 2);
assert_eq!(loaded[0].doc_id, "doc-0");
assert_eq!(loaded[1].doc_id, "doc-1");
assert!((loaded[0].embedding[0] - 1.0).abs() < 0.01);
assert!((loaded[1].embedding[0] - 2.0).abs() < 0.01);
std::fs::remove_file(&path).ok();
}
#[test]
fn roundtrip_multiple_batches() {
let path = temp_wal_path("roundtrip-multi");
let dim = 4;
append_wal_batch(
&path,
&[make_entry("doc-0", 1.0, dim)],
dim,
Quantization::F16,
0,
false,
)
.unwrap();
append_wal_batch(
&path,
&[make_entry("doc-1", 2.0, dim), make_entry("doc-2", 3.0, dim)],
dim,
Quantization::F16,
0,
false,
)
.unwrap();
let (loaded, generation, _) = read_wal(&path, dim, Quantization::F16).unwrap();
assert_eq!(generation, 0);
assert_eq!(loaded.len(), 3);
assert_eq!(loaded[0].doc_id, "doc-0");
assert_eq!(loaded[1].doc_id, "doc-1");
assert_eq!(loaded[2].doc_id, "doc-2");
std::fs::remove_file(&path).ok();
}
#[test]
fn truncated_batch_is_discarded() {
let path = temp_wal_path("truncated");
let dim = 4;
append_wal_batch(
&path,
&[make_entry("doc-good", 1.0, dim)],
dim,
Quantization::F16,
0,
false,
)
.unwrap();
let mut data = std::fs::read(&path).unwrap();
data.extend_from_slice(&BATCH_MAGIC);
data.extend_from_slice(&1_u32.to_le_bytes()); data.extend_from_slice(&[0xFF; 3]); std::fs::write(&path, &data).unwrap();
let (loaded, _, _) = read_wal(&path, dim, Quantization::F16).unwrap();
assert_eq!(loaded.len(), 1, "only the good batch should survive");
assert_eq!(loaded[0].doc_id, "doc-good");
std::fs::remove_file(&path).ok();
}
#[test]
fn crc_corrupted_batch_is_discarded() {
let path = temp_wal_path("crc-corrupt");
let dim = 4;
append_wal_batch(
&path,
&[make_entry("doc-good", 1.0, dim)],
dim,
Quantization::F16,
0,
false,
)
.unwrap();
append_wal_batch(
&path,
&[make_entry("doc-bad", 2.0, dim)],
dim,
Quantization::F16,
0,
false,
)
.unwrap();
let mut data = std::fs::read(&path).unwrap();
let last_byte = data.len() - 1;
data[last_byte] ^= 0xFF;
std::fs::write(&path, &data).unwrap();
let (loaded, _, _) = read_wal(&path, dim, Quantization::F16).unwrap();
assert_eq!(
loaded.len(),
1,
"corrupted second batch should be discarded"
);
assert_eq!(loaded[0].doc_id, "doc-good");
std::fs::remove_file(&path).ok();
}
#[test]
fn nonexistent_wal_returns_empty() {
let path = temp_wal_path("nonexistent");
let (loaded, _, _) = read_wal(&path, 4, Quantization::F16).unwrap();
assert!(loaded.is_empty());
}
#[test]
fn dimension_mismatch_is_error() {
let path = temp_wal_path("dim-mismatch");
append_wal_batch(
&path,
&[make_entry("doc-0", 1.0, 4)],
4,
Quantization::F16,
0,
false,
)
.unwrap();
let result = read_wal(&path, 8, Quantization::F16);
assert!(result.is_err());
std::fs::remove_file(&path).ok();
}
#[test]
fn f32_quantization_roundtrip() {
let path = temp_wal_path("f32-quant");
let dim = 4;
let entries = vec![make_entry("doc-0", 0.123_456, dim)];
append_wal_batch(&path, &entries, dim, Quantization::F32, 0, false).unwrap();
let (loaded, _, _) = read_wal(&path, dim, Quantization::F32).unwrap();
assert_eq!(loaded.len(), 1);
assert!(
(loaded[0].embedding[0] - 0.123_456).abs() < f32::EPSILON,
"f32 should round-trip exactly"
);
std::fs::remove_file(&path).ok();
}
#[test]
fn wal_path_derivation() {
let fsvi = Path::new("/data/index.fsvi");
let wal = wal_path_for(fsvi);
assert_eq!(wal.to_str().unwrap(), "/data/index.fsvi.wal");
}
#[test]
fn remove_wal_is_idempotent() {
let path = temp_wal_path("remove-idem");
remove_wal(&path).unwrap(); std::fs::write(&path, b"dummy").unwrap();
remove_wal(&path).unwrap(); assert!(!path.exists());
remove_wal(&path).unwrap(); }
#[test]
fn index_tagging_roundtrip() {
let pos = 42;
let tagged = to_wal_index(pos);
assert!(is_wal_index(tagged));
assert!(!is_wal_index(pos));
assert_eq!(from_wal_index(tagged), pos);
}
#[test]
fn wal_config_default_values() {
let config = WalConfig::default();
assert_eq!(config.compaction_threshold, 1000);
assert!((config.compaction_ratio - 0.10).abs() < f64::EPSILON);
assert!(config.fsync_on_write);
}
#[test]
fn wal_config_debug_clone() {
let config = WalConfig::default();
let cloned = config.clone();
assert_eq!(cloned.compaction_threshold, config.compaction_threshold);
let debug = format!("{config:?}");
assert!(debug.contains("WalConfig"));
}
#[test]
fn compaction_stats_debug_clone() {
let stats = CompactionStats {
main_records_before: 100,
wal_records: 10,
total_records_after: 110,
elapsed_ms: 42.5,
};
let cloned = stats.clone();
assert_eq!(cloned.main_records_before, 100);
assert_eq!(cloned.wal_records, 10);
assert_eq!(cloned.total_records_after, 110);
assert!((cloned.elapsed_ms - 42.5).abs() < f64::EPSILON);
let debug = format!("{stats:?}");
assert!(debug.contains("CompactionStats"));
}
#[test]
fn wal_too_small_returns_empty() {
let path = temp_wal_path("too-small");
std::fs::write(&path, [0u8; 10]).unwrap();
let (loaded, _, _) = read_wal(&path, 4, Quantization::F16).unwrap();
assert!(loaded.is_empty());
std::fs::remove_file(&path).ok();
}
#[test]
fn bad_magic_bytes_is_error() {
let path = temp_wal_path("bad-magic");
let mut header = vec![0xDE, 0xAD, 0xBE, 0xEF]; header.extend_from_slice(&WAL_VERSION.to_le_bytes());
header.extend_from_slice(&4_u32.to_le_bytes());
header.push(Quantization::F16 as u8);
header.push(0); header.extend_from_slice(&[0u8; 4]);
let crc = crc32_of(&header);
header.extend_from_slice(&crc.to_le_bytes());
std::fs::write(&path, &header).unwrap();
let result = read_wal(&path, 4, Quantization::F16);
assert!(result.is_err());
let err = format!("{}", result.unwrap_err());
assert!(err.contains("magic"));
std::fs::remove_file(&path).ok();
}
#[test]
fn version_mismatch_is_error() {
let path = temp_wal_path("version-mismatch");
let mut header = Vec::new();
header.extend_from_slice(&WAL_MAGIC);
header.extend_from_slice(&99_u16.to_le_bytes()); header.extend_from_slice(&4_u32.to_le_bytes());
header.push(Quantization::F16 as u8);
header.push(0); header.extend_from_slice(&[0u8; 4]);
let crc = crc32_of(&header);
header.extend_from_slice(&crc.to_le_bytes());
std::fs::write(&path, &header).unwrap();
let result = read_wal(&path, 4, Quantization::F16);
assert!(result.is_err());
let err = format!("{}", result.unwrap_err());
assert!(err.contains("version"));
std::fs::remove_file(&path).ok();
}
#[test]
fn quantization_mismatch_is_error() {
let path = temp_wal_path("quant-mismatch");
append_wal_batch(
&path,
&[make_entry("doc-0", 1.0, 4)],
4,
Quantization::F16,
0,
false,
)
.unwrap();
let result = read_wal(&path, 4, Quantization::F32);
assert!(result.is_err());
let err = format!("{}", result.unwrap_err());
assert!(err.contains("quantization"));
std::fs::remove_file(&path).ok();
}
#[test]
fn header_crc_mismatch_is_error() {
let path = temp_wal_path("header-crc");
append_wal_batch(
&path,
&[make_entry("doc-0", 1.0, 4)],
4,
Quantization::F16,
0,
false,
)
.unwrap();
let mut data = std::fs::read(&path).unwrap();
data[16] ^= 0xFF;
std::fs::write(&path, &data).unwrap();
let result = read_wal(&path, 4, Quantization::F16);
assert!(result.is_err());
let err = format!("{}", result.unwrap_err());
assert!(err.contains("CRC"));
std::fs::remove_file(&path).ok();
}
#[test]
fn empty_batch_roundtrip() {
let path = temp_wal_path("empty-batch");
let dim = 4;
append_wal_batch(&path, &[], dim, Quantization::F16, 0, false).unwrap();
let (loaded, _, _) = read_wal(&path, dim, Quantization::F16).unwrap();
assert!(loaded.is_empty());
std::fs::remove_file(&path).ok();
}
#[test]
fn wal_entry_doc_id_hash_matches_fnv1a() {
let entry = make_entry("hello", 1.0, 4);
assert_eq!(entry.doc_id_hash, crate::fnv1a_hash(b"hello"));
}
#[test]
fn wal_entry_clone() {
let entry = make_entry("test-doc", 0.5, 8);
#[allow(clippy::redundant_clone)]
let cloned = entry.clone();
assert_eq!(cloned.doc_id, "test-doc");
assert_eq!(cloned.embedding.len(), 8);
assert!((cloned.embedding[0] - 0.5).abs() < f32::EPSILON);
}
#[test]
fn index_tagging_zero() {
let tagged = to_wal_index(0);
assert!(is_wal_index(tagged));
assert_eq!(from_wal_index(tagged), 0);
}
#[test]
fn index_tagging_large_value() {
let pos = (1_usize << (usize::BITS - 2)) - 1;
let tagged = to_wal_index(pos);
assert!(is_wal_index(tagged));
assert_eq!(from_wal_index(tagged), pos);
}
#[test]
fn is_wal_index_false_for_normal_values() {
assert!(!is_wal_index(0));
assert!(!is_wal_index(1));
assert!(!is_wal_index(usize::MAX >> 1));
}
#[test]
fn crc32_of_empty() {
let crc = crc32_of(&[]);
assert_eq!(crc, 0); }
#[test]
fn crc32_of_known_data() {
let crc = crc32_of(b"hello");
assert_ne!(crc, 0);
assert_eq!(crc, crc32_of(b"hello"));
}
#[test]
fn oversized_doc_id_returns_error() {
let path = temp_wal_path("oversized-docid");
let dim = 4;
let big_id = "x".repeat(70_000);
let entries = vec![WalEntry {
doc_id: big_id,
doc_id_hash: 0,
embedding: vec![1.0; dim],
}];
let result = append_wal_batch(&path, &entries, dim, Quantization::F16, 0, false);
assert!(result.is_err(), "should reject doc_id exceeding u16 length");
let err = result.unwrap_err();
assert!(
err.to_string().contains("doc_id_length"),
"error should mention doc_id_length: {err}"
);
std::fs::remove_file(&path).ok();
}
#[test]
fn oversized_batch_entry_count_returns_error() {
let result = wal_entry_count(usize::MAX);
assert!(result.is_err(), "should reject entry count exceeding u32");
let err = result.unwrap_err();
assert!(
err.to_string().contains("wal_batch_entries"),
"error should mention wal_batch_entries: {err}"
);
}
#[test]
fn oversized_dimension_returns_error() {
let path = temp_wal_path("oversized-dimension");
let result = append_wal_batch(&path, &[], usize::MAX, Quantization::F16, 0, false);
assert!(result.is_err(), "should reject dimension exceeding u32");
let err = result.unwrap_err();
assert!(
err.to_string().contains("dimension"),
"error should mention dimension: {err}"
);
std::fs::remove_file(&path).ok();
}
#[test]
fn wal_corrupted_produces_index_corrupted_error() {
let err = wal_corrupted(Path::new("/test/path.fsvi.wal"), "test detail");
match err {
SearchError::IndexCorrupted { path, detail } => {
assert_eq!(path, Path::new("/test/path.fsvi.wal"));
assert_eq!(detail, "test detail");
}
other => panic!("expected IndexCorrupted, got {other:?}"),
}
}
#[test]
fn wal_path_for_various_inputs() {
let wal = wal_path_for(Path::new("index.fsvi"));
assert_eq!(wal, PathBuf::from("index.fsvi.wal"));
let wal = wal_path_for(Path::new("/abs/path/data.fsvi"));
assert_eq!(wal, PathBuf::from("/abs/path/data.fsvi.wal"));
}
#[test]
fn append_with_fsync_succeeds() {
let path = temp_wal_path("fsync-test");
let dim = 4;
append_wal_batch(
&path,
&[make_entry("doc-sync", 1.0, dim)],
dim,
Quantization::F16,
0,
true,
)
.unwrap();
let (loaded, _, _) = read_wal(&path, dim, Quantization::F16).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].doc_id, "doc-sync");
std::fs::remove_file(&path).ok();
}
#[test]
fn append_preserves_existing_header() {
let path = temp_wal_path("preserve-header");
let dim = 4;
append_wal_batch(
&path,
&[make_entry("doc-0", 1.0, dim)],
dim,
Quantization::F16,
0,
false,
)
.unwrap();
let data_after_first = std::fs::read(&path).unwrap();
let header_bytes = &data_after_first[..WAL_HEADER_SIZE];
append_wal_batch(
&path,
&[make_entry("doc-1", 2.0, dim)],
dim,
Quantization::F16,
0,
false,
)
.unwrap();
let data_after_second = std::fs::read(&path).unwrap();
assert_eq!(&data_after_second[..WAL_HEADER_SIZE], header_bytes);
assert!(data_after_second.len() > data_after_first.len());
let (loaded, _, _) = read_wal(&path, dim, Quantization::F16).unwrap();
assert_eq!(loaded.len(), 2);
std::fs::remove_file(&path).ok();
}
#[test]
fn bad_batch_magic_discards_batch() {
let path = temp_wal_path("bad-batch-magic");
let dim = 4;
append_wal_batch(
&path,
&[make_entry("doc-good", 1.0, dim)],
dim,
Quantization::F16,
0,
false,
)
.unwrap();
let mut data = std::fs::read(&path).unwrap();
data.extend_from_slice(&[0xFF, 0xFF, 0xFF, 0xFF]); data.extend_from_slice(&1_u32.to_le_bytes());
std::fs::write(&path, &data).unwrap();
let (loaded, _, _) = read_wal(&path, dim, Quantization::F16).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].doc_id, "doc-good");
std::fs::remove_file(&path).ok();
}
#[test]
fn f16_quantization_has_limited_precision() {
let path = temp_wal_path("f16-precision");
let dim = 2;
let entries = vec![WalEntry {
doc_id: "precise".to_owned(),
doc_id_hash: crate::fnv1a_hash(b"precise"),
embedding: vec![0.123_456, -0.987_654],
}];
append_wal_batch(&path, &entries, dim, Quantization::F16, 0, false).unwrap();
let (loaded, _, _) = read_wal(&path, dim, Quantization::F16).unwrap();
assert_eq!(loaded.len(), 1);
assert!((loaded[0].embedding[0] - 0.123_456).abs() < 0.001);
assert!((loaded[0].embedding[1] + 0.987_654).abs() < 0.001);
std::fs::remove_file(&path).ok();
}
#[test]
fn f32_quantization_exact_roundtrip() {
let path = temp_wal_path("f32-exact");
let dim = 3;
let val = std::f32::consts::PI;
let entries = vec![WalEntry {
doc_id: "pi-doc".to_owned(),
doc_id_hash: crate::fnv1a_hash(b"pi-doc"),
embedding: vec![val, -val, 0.0],
}];
append_wal_batch(&path, &entries, dim, Quantization::F32, 0, false).unwrap();
let (loaded, _, _) = read_wal(&path, dim, Quantization::F32).unwrap();
assert!((loaded[0].embedding[0] - val).abs() < f32::EPSILON);
assert!((loaded[0].embedding[1] + val).abs() < f32::EPSILON);
assert!(loaded[0].embedding[2].abs() < f32::EPSILON);
std::fs::remove_file(&path).ok();
}
#[test]
fn append_to_preexisting_empty_file_writes_header() {
let path = temp_wal_path("pre-empty");
let dim = 3;
std::fs::File::create(&path).unwrap();
assert_eq!(std::fs::metadata(&path).unwrap().len(), 0);
let entries = vec![WalEntry {
doc_id: "doc-0".to_owned(),
doc_id_hash: crate::fnv1a_hash(b"doc-0"),
embedding: vec![1.0, 2.0, 3.0],
}];
append_wal_batch(&path, &entries, dim, Quantization::F16, 0, false).unwrap();
let (loaded, _, _) = read_wal(&path, dim, Quantization::F16).unwrap();
assert_eq!(loaded.len(), 1);
assert_eq!(loaded[0].doc_id, "doc-0");
std::fs::remove_file(&path).ok();
}
}