use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use hnsw_rs::api::AnnT;
use solo_core::{Error, Result};
use crate::vector_index::{HnswIndex, load_inner_from_basename};
pub const LIVE_BASENAME: &str = "hnsw_episodes";
pub const BAK_BASENAME: &str = "hnsw_episodes_bak";
pub const TMP_BASENAME: &str = "hnsw_episodes_tmp";
const DATA_SUFFIX: &str = ".hnsw.data";
const GRAPH_SUFFIX: &str = ".hnsw.graph";
fn data_path(dir: &Path, basename: &str) -> PathBuf {
let mut p = PathBuf::from(dir);
p.push(format!("{basename}{DATA_SUFFIX}"));
p
}
fn graph_path(dir: &Path, basename: &str) -> PathBuf {
let mut p = PathBuf::from(dir);
p.push(format!("{basename}{GRAPH_SUFFIX}"));
p
}
pub fn pair_exists(dir: &Path, basename: &str) -> bool {
data_path(dir, basename).is_file() && graph_path(dir, basename).is_file()
}
pub fn delete_all_pairs(dir: &Path) -> Result<()> {
for basename in [LIVE_BASENAME, BAK_BASENAME, TMP_BASENAME] {
remove_pair(dir, basename).map_err(|e| {
Error::vector_index(format!(
"delete snapshot pair {basename:?} in {dir:?}: {e}"
))
})?;
}
Ok(())
}
fn remove_pair(dir: &Path, basename: &str) -> io::Result<()> {
for p in [data_path(dir, basename), graph_path(dir, basename)] {
match fs::remove_file(&p) {
Ok(_) => {}
Err(e) if e.kind() == io::ErrorKind::NotFound => {}
Err(e) => return Err(e),
}
}
Ok(())
}
fn copy_pair(dir: &Path, src_basename: &str, dst_basename: &str) -> io::Result<()> {
fs::copy(
data_path(dir, src_basename),
data_path(dir, dst_basename),
)?;
fs::copy(
graph_path(dir, src_basename),
graph_path(dir, dst_basename),
)?;
Ok(())
}
fn rename_pair(dir: &Path, src_basename: &str, dst_basename: &str) -> io::Result<()> {
fs::rename(
data_path(dir, src_basename),
data_path(dir, dst_basename),
)?;
fs::rename(
graph_path(dir, src_basename),
graph_path(dir, dst_basename),
)?;
Ok(())
}
fn fsync_file(path: &Path) -> io::Result<()> {
let f = fs::OpenOptions::new().write(true).open(path)?;
f.sync_all()
}
fn fsync_pair(dir: &Path, basename: &str) -> io::Result<()> {
fsync_file(&data_path(dir, basename))?;
fsync_file(&graph_path(dir, basename))?;
Ok(())
}
#[cfg(unix)]
fn fsync_dir(dir: &Path) -> io::Result<()> {
let f = fs::OpenOptions::new().read(true).open(dir)?;
f.sync_all()
}
#[cfg(not(unix))]
fn fsync_dir(_dir: &Path) -> io::Result<()> {
Ok(())
}
pub fn save(idx: &HnswIndex, dir: &Path) -> Result<()> {
if idx.raw_len() == 0 {
tracing::debug!(?dir, "snapshot::save: index is empty; skipping");
return Ok(());
}
fs::create_dir_all(dir)
.map_err(|e| Error::vector_index(format!("create snapshot dir {dir:?}: {e}")))?;
remove_pair(dir, TMP_BASENAME)
.map_err(|e| Error::vector_index(format!("clean stale tmp pair: {e}")))?;
idx.inner()
.file_dump(dir, TMP_BASENAME)
.map_err(|e| Error::vector_index(format!("Hnsw::file_dump: {e}")))?;
fsync_pair(dir, TMP_BASENAME)
.map_err(|e| Error::vector_index(format!("fsync tmp pair: {e}")))?;
fsync_dir(dir)
.map_err(|e| Error::vector_index(format!("fsync dir post-tmp: {e}")))?;
if pair_exists(dir, LIVE_BASENAME) {
remove_pair(dir, BAK_BASENAME)
.map_err(|e| Error::vector_index(format!("clean prior bak: {e}")))?;
copy_pair(dir, LIVE_BASENAME, BAK_BASENAME)
.map_err(|e| Error::vector_index(format!("copy live→bak: {e}")))?;
fsync_pair(dir, BAK_BASENAME)
.map_err(|e| Error::vector_index(format!("fsync bak pair: {e}")))?;
fsync_dir(dir)
.map_err(|e| Error::vector_index(format!("fsync dir post-bak: {e}")))?;
}
rename_pair(dir, TMP_BASENAME, LIVE_BASENAME)
.map_err(|e| Error::vector_index(format!("rename tmp→live: {e}")))?;
fsync_dir(dir)
.map_err(|e| Error::vector_index(format!("fsync dir post-promote: {e}")))?;
tracing::debug!(?dir, "HNSW snapshot saved");
Ok(())
}
pub fn load(dir: &Path) -> Result<HnswIndex> {
if !pair_exists(dir, LIVE_BASENAME) {
return Err(Error::vector_index(format!(
"live HNSW snapshot pair missing in {dir:?}"
)));
}
load_inner_from_basename(dir, LIVE_BASENAME)
}
pub fn load_bak(dir: &Path) -> Result<HnswIndex> {
if !pair_exists(dir, BAK_BASENAME) {
return Err(Error::vector_index(format!(
"backup HNSW snapshot pair missing in {dir:?}"
)));
}
load_inner_from_basename(dir, BAK_BASENAME)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vector_index::{HnswIndex, HnswParams};
use solo_core::VectorIndex;
fn unit_vec(seed: u32, dim: usize) -> Vec<f32> {
let mut v = vec![0.0f32; dim];
let s = (seed as f32) * 0.123;
for (i, x) in v.iter_mut().enumerate() {
*x = (s + i as f32 * 0.317).sin();
}
let n: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt().max(1e-9);
for x in &mut v {
*x /= n;
}
v
}
fn populate(idx: &HnswIndex, n: usize, dim: usize) {
for i in 0..n {
idx.add(i as i64 + 1, &unit_vec(i as u32 + 1, dim)).unwrap();
}
}
#[test]
fn save_then_load_roundtrip_preserves_search_recall() {
let tmp = tempfile::TempDir::new().unwrap();
let dim = 16;
let idx = HnswIndex::new(dim, HnswParams::default());
populate(&idx, 20, dim);
save(&idx, tmp.path()).unwrap();
assert!(pair_exists(tmp.path(), LIVE_BASENAME));
let restored = load(tmp.path()).unwrap();
assert_eq!(restored.dim(), dim);
assert_eq!(restored.len(), 20);
let q = unit_vec(5, dim);
let hits = restored.search(&q, 3).unwrap();
assert!(!hits.is_empty());
assert_eq!(hits[0].0, 5);
}
#[test]
fn second_save_promotes_previous_to_bak() {
let tmp = tempfile::TempDir::new().unwrap();
let dim = 8;
let idx1 = HnswIndex::new(dim, HnswParams::default());
populate(&idx1, 5, dim);
save(&idx1, tmp.path()).unwrap();
assert!(pair_exists(tmp.path(), LIVE_BASENAME));
assert!(!pair_exists(tmp.path(), BAK_BASENAME));
let idx2 = HnswIndex::new(dim, HnswParams::default());
populate(&idx2, 9, dim);
save(&idx2, tmp.path()).unwrap();
assert!(pair_exists(tmp.path(), LIVE_BASENAME));
assert!(pair_exists(tmp.path(), BAK_BASENAME));
let live = load(tmp.path()).unwrap();
assert_eq!(live.len(), 9);
let bak = load_bak(tmp.path()).unwrap();
assert_eq!(bak.len(), 5);
}
#[test]
fn corrupt_live_falls_back_to_bak() {
let tmp = tempfile::TempDir::new().unwrap();
let dim = 8;
let idx1 = HnswIndex::new(dim, HnswParams::default());
populate(&idx1, 7, dim);
save(&idx1, tmp.path()).unwrap();
let idx2 = HnswIndex::new(dim, HnswParams::default());
populate(&idx2, 11, dim);
save(&idx2, tmp.path()).unwrap();
let live_graph = graph_path(tmp.path(), LIVE_BASENAME);
std::fs::write(&live_graph, b"GARBAGE").unwrap();
assert!(load(tmp.path()).is_err());
let bak = load_bak(tmp.path()).unwrap();
assert_eq!(bak.len(), 7);
}
#[test]
fn missing_snapshot_returns_error_not_panic() {
let tmp = tempfile::TempDir::new().unwrap();
let err = load(tmp.path()).unwrap_err();
assert!(err.to_string().contains("missing"));
let err = load_bak(tmp.path()).unwrap_err();
assert!(err.to_string().contains("missing"));
}
#[test]
fn save_on_empty_index_is_noop() {
let tmp = tempfile::TempDir::new().unwrap();
let idx = HnswIndex::new(8, HnswParams::default());
assert_eq!(idx.len(), 0);
save(&idx, tmp.path()).expect("empty save must succeed");
assert!(!pair_exists(tmp.path(), LIVE_BASENAME));
assert!(!pair_exists(tmp.path(), TMP_BASENAME));
}
#[test]
fn save_persists_when_all_visible_entries_are_tombstoned() {
let tmp = tempfile::TempDir::new().unwrap();
let dim = 8usize;
let idx = HnswIndex::new(dim, HnswParams::default());
for i in 1..=3 {
idx.add(i as i64, &unit_vec(i as u32, dim)).unwrap();
}
for i in 1..=3 {
idx.remove(i as i64).unwrap();
}
assert_eq!(idx.len(), 0);
save(&idx, tmp.path()).expect("save must succeed");
assert!(
pair_exists(tmp.path(), LIVE_BASENAME),
"snapshot must be written even when all entries are tombstoned"
);
let restored = load(tmp.path()).unwrap();
assert_eq!(restored.len(), 3, "graph entries restored intact");
}
#[test]
fn stale_tmp_files_get_cleaned_on_save() {
let tmp = tempfile::TempDir::new().unwrap();
std::fs::write(data_path(tmp.path(), TMP_BASENAME), b"stale").unwrap();
std::fs::write(graph_path(tmp.path(), TMP_BASENAME), b"stale").unwrap();
let idx = HnswIndex::new(8, HnswParams::default());
populate(&idx, 3, 8);
save(&idx, tmp.path()).unwrap();
assert!(!pair_exists(tmp.path(), TMP_BASENAME));
assert!(pair_exists(tmp.path(), LIVE_BASENAME));
}
}