use std::collections::HashMap;
use std::io::{Read, Write};
use std::path::Path;
use crate::config::AnnIndexConfig;
use crate::error::{JammiError, Result};
use crate::index::VectorIndex;
const ROWMAP_VERSION: u32 = 1;
fn index_options(dimensions: usize, ann: &AnnIndexConfig) -> usearch::IndexOptions {
usearch::IndexOptions {
dimensions,
metric: usearch::MetricKind::Cos,
quantization: usearch::ScalarKind::F32,
connectivity: ann.connectivity,
expansion_add: ann.build_expansion,
expansion_search: ann.search_expansion,
..Default::default()
}
}
pub struct SidecarIndex {
dimensions: usize,
index: usearch::Index,
row_map: Vec<String>,
row_index: HashMap<String, u64>,
built: bool,
}
impl SidecarIndex {
pub fn new(dimensions: usize, ann: &AnnIndexConfig) -> Result<Self> {
let index = usearch::Index::new(&index_options(dimensions, ann))
.map_err(|e| JammiError::Other(format!("USearch index creation: {e}")))?;
Ok(Self {
dimensions,
index,
row_map: Vec::new(),
row_index: HashMap::new(),
built: false,
})
}
pub fn get(&self, row_id: &str) -> Result<Option<Vec<f32>>> {
let Some(&key) = self.row_index.get(row_id) else {
return Ok(None);
};
let mut out = Vec::new();
let found = self
.index
.export(key, &mut out)
.map_err(|e| JammiError::Other(format!("USearch get: {e}")))?;
if found == 0 {
return Ok(None);
}
out.truncate(self.dimensions);
Ok(Some(out))
}
pub fn save(&self, base_path: &Path) -> Result<()> {
let usearch_path = base_path.with_extension("usearch");
self.index
.save(usearch_path.to_str().unwrap_or_default())
.map_err(|e| JammiError::Other(format!("USearch save: {e}")))?;
let rowmap_path = base_path.with_extension("rowmap");
let mut file = std::fs::File::create(&rowmap_path)?;
file.write_all(&ROWMAP_VERSION.to_le_bytes())?;
for id in &self.row_map {
let bytes = id.as_bytes();
file.write_all(&(bytes.len() as u32).to_le_bytes())?;
file.write_all(bytes)?;
}
let manifest_path = base_path.with_extension("manifest.json");
let manifest = serde_json::json!({
"version": 1,
"dimensions": self.dimensions,
"count": self.row_map.len(),
"metric": "cosine",
"backend": "usearch",
"backend_version": crate::index::backend_version(),
"files": {
"index": usearch_path.file_name().and_then(|n| n.to_str()),
"rowmap": rowmap_path.file_name().and_then(|n| n.to_str()),
},
"created_at": chrono::Utc::now().to_rfc3339(),
});
std::fs::write(&manifest_path, serde_json::to_string_pretty(&manifest)?)?;
Ok(())
}
pub fn load(base_path: &Path, ann: &AnnIndexConfig) -> Result<Self> {
let manifest_path = base_path.with_extension("manifest.json");
let manifest_str = std::fs::read_to_string(&manifest_path)?;
let manifest: serde_json::Value = serde_json::from_str(&manifest_str)?;
let dimensions = manifest["dimensions"]
.as_u64()
.ok_or_else(|| JammiError::Other("Missing dimensions in manifest".into()))?
as usize;
let rowmap_path = base_path.with_extension("rowmap");
let mut file = std::fs::File::open(&rowmap_path)?;
let mut version_bytes = [0u8; 4];
file.read_exact(&mut version_bytes)?;
let version = u32::from_le_bytes(version_bytes);
if version != ROWMAP_VERSION {
return Err(JammiError::Other(format!(
"Unknown rowmap version {version}, expected {ROWMAP_VERSION}"
)));
}
let mut row_map = Vec::new();
loop {
let mut len_bytes = [0u8; 4];
match file.read_exact(&mut len_bytes) {
Ok(()) => {}
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => break,
Err(e) => return Err(e.into()),
}
let len = u32::from_le_bytes(len_bytes) as usize;
let mut buf = vec![0u8; len];
file.read_exact(&mut buf)?;
row_map.push(
String::from_utf8(buf)
.map_err(|e| JammiError::Other(format!("Invalid UTF-8 in rowmap: {e}")))?,
);
}
let index = usearch::Index::new(&index_options(dimensions, &AnnIndexConfig::default()))
.map_err(|e| JammiError::Other(format!("USearch index creation for load: {e}")))?;
let usearch_path = base_path.with_extension("usearch");
index
.load(usearch_path.to_str().unwrap_or_default())
.map_err(|e| JammiError::Other(format!("USearch load: {e}")))?;
if ann.search_expansion != 0 {
index.change_expansion_search(ann.search_expansion);
}
let row_index = row_map
.iter()
.enumerate()
.map(|(key, id)| (id.clone(), key as u64))
.collect();
Ok(Self {
dimensions,
index,
row_map,
row_index,
built: true,
})
}
}
impl VectorIndex for SidecarIndex {
fn add(&mut self, row_id: &str, vector: &[f32]) -> Result<()> {
if vector.len() != self.dimensions {
return Err(JammiError::Other(format!(
"Vector dimension mismatch: expected {}, got {}",
self.dimensions,
vector.len()
)));
}
let key = self.row_map.len() as u64;
if self.index.capacity() <= self.index.size() {
let new_cap = (self.index.capacity() + 1).max(64);
self.index
.reserve(new_cap)
.map_err(|e| JammiError::Other(format!("USearch reserve: {e}")))?;
}
self.index
.add(key, vector)
.map_err(|e| JammiError::Other(format!("USearch add: {e}")))?;
self.row_map.push(row_id.to_string());
self.row_index.insert(row_id.to_string(), key);
Ok(())
}
fn build(&mut self) -> Result<()> {
self.built = true;
Ok(())
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
if self.row_map.is_empty() {
return Ok(Vec::new());
}
let actual_k = k.min(self.row_map.len());
let matches = self
.index
.search(query, actual_k)
.map_err(|e| JammiError::Other(format!("USearch search: {e}")))?;
let results: Vec<(String, f32)> = matches
.keys
.iter()
.zip(matches.distances.iter())
.filter_map(|(&key, &dist)| {
let idx = key as usize;
self.row_map.get(idx).map(|id| (id.clone(), dist))
})
.collect();
Ok(results)
}
fn save(&self, path: &Path) -> Result<()> {
SidecarIndex::save(self, path)
}
fn len(&self) -> usize {
self.row_map.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
const USEARCH_DEFAULT_CONNECTIVITY: usize = 16;
const USEARCH_DEFAULT_EXPANSION_ADD: usize = 128;
const USEARCH_DEFAULT_EXPANSION_SEARCH: usize = 64;
#[test]
fn knobs_map_onto_a_freshly_built_graph() {
let ann = AnnIndexConfig {
connectivity: 32,
build_expansion: 200,
search_expansion: 100,
};
let idx = SidecarIndex::new(8, &ann).unwrap();
assert_eq!(idx.index.connectivity(), 32);
assert_eq!(idx.index.expansion_add(), 200);
assert_eq!(idx.index.expansion_search(), 100);
}
#[test]
fn default_config_reproduces_backend_defaults() {
let idx = SidecarIndex::new(8, &AnnIndexConfig::default()).unwrap();
assert_eq!(idx.index.connectivity(), USEARCH_DEFAULT_CONNECTIVITY);
assert_eq!(idx.index.expansion_add(), USEARCH_DEFAULT_EXPANSION_ADD);
assert_eq!(
idx.index.expansion_search(),
USEARCH_DEFAULT_EXPANSION_SEARCH
);
}
#[test]
fn load_reapplies_search_expansion_only() {
let dir = tempdir().unwrap();
let base = dir.path().join("knob_roundtrip");
let build = AnnIndexConfig {
connectivity: 32,
build_expansion: 200,
search_expansion: 0,
};
let mut idx = SidecarIndex::new(4, &build).unwrap();
idx.add("a", &[1.0, 0.0, 0.0, 0.0]).unwrap();
idx.build().unwrap();
idx.save(&base).unwrap();
let loaded = SidecarIndex::load(
&base,
&AnnIndexConfig {
search_expansion: 77,
..AnnIndexConfig::default()
},
)
.unwrap();
assert_eq!(
loaded.index.expansion_search(),
77,
"search_expansion must be re-applied on load"
);
let loaded_default = SidecarIndex::load(&base, &AnnIndexConfig::default()).unwrap();
assert_eq!(
loaded_default.index.expansion_search(),
USEARCH_DEFAULT_EXPANSION_SEARCH
);
}
}