use std::collections::HashMap;
use std::io::{Read, Write};
use std::path::Path;
use crate::error::{JammiError, Result};
use crate::index::VectorIndex;
const ROWMAP_VERSION: u32 = 1;
pub struct SidecarIndex {
dimensions: usize,
index: usearch::Index,
row_map: Vec<String>,
row_index: HashMap<String, u64>,
built: bool,
}
impl SidecarIndex {
pub fn new(dimensions: usize) -> Result<Self> {
let index = usearch::Index::new(&usearch::IndexOptions {
dimensions,
metric: usearch::MetricKind::Cos,
quantization: usearch::ScalarKind::F32,
..Default::default()
})
.map_err(|e| JammiError::Other(format!("USearch index creation: {e}")))?;
Ok(Self {
dimensions,
index,
row_map: Vec::new(),
row_index: HashMap::new(),
built: false,
})
}
pub fn get(&self, row_id: &str) -> Result<Option<Vec<f32>>> {
let Some(&key) = self.row_index.get(row_id) else {
return Ok(None);
};
let mut out = Vec::new();
let found = self
.index
.export(key, &mut out)
.map_err(|e| JammiError::Other(format!("USearch get: {e}")))?;
if found == 0 {
return Ok(None);
}
out.truncate(self.dimensions);
Ok(Some(out))
}
pub fn save(&self, base_path: &Path) -> Result<()> {
let usearch_path = base_path.with_extension("usearch");
self.index
.save(usearch_path.to_str().unwrap_or_default())
.map_err(|e| JammiError::Other(format!("USearch save: {e}")))?;
let rowmap_path = base_path.with_extension("rowmap");
let mut file = std::fs::File::create(&rowmap_path)?;
file.write_all(&ROWMAP_VERSION.to_le_bytes())?;
for id in &self.row_map {
let bytes = id.as_bytes();
file.write_all(&(bytes.len() as u32).to_le_bytes())?;
file.write_all(bytes)?;
}
let manifest_path = base_path.with_extension("manifest.json");
let manifest = serde_json::json!({
"version": 1,
"dimensions": self.dimensions,
"count": self.row_map.len(),
"metric": "cosine",
"backend": "usearch",
"files": {
"index": usearch_path.file_name().and_then(|n| n.to_str()),
"rowmap": rowmap_path.file_name().and_then(|n| n.to_str()),
},
"created_at": chrono::Utc::now().to_rfc3339(),
});
std::fs::write(&manifest_path, serde_json::to_string_pretty(&manifest)?)?;
Ok(())
}
pub fn load(base_path: &Path) -> Result<Self> {
let manifest_path = base_path.with_extension("manifest.json");
let manifest_str = std::fs::read_to_string(&manifest_path)?;
let manifest: serde_json::Value = serde_json::from_str(&manifest_str)?;
let dimensions = manifest["dimensions"]
.as_u64()
.ok_or_else(|| JammiError::Other("Missing dimensions in manifest".into()))?
as usize;
let rowmap_path = base_path.with_extension("rowmap");
let mut file = std::fs::File::open(&rowmap_path)?;
let mut version_bytes = [0u8; 4];
file.read_exact(&mut version_bytes)?;
let version = u32::from_le_bytes(version_bytes);
if version != ROWMAP_VERSION {
return Err(JammiError::Other(format!(
"Unknown rowmap version {version}, expected {ROWMAP_VERSION}"
)));
}
let mut row_map = Vec::new();
loop {
let mut len_bytes = [0u8; 4];
match file.read_exact(&mut len_bytes) {
Ok(()) => {}
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => break,
Err(e) => return Err(e.into()),
}
let len = u32::from_le_bytes(len_bytes) as usize;
let mut buf = vec![0u8; len];
file.read_exact(&mut buf)?;
row_map.push(
String::from_utf8(buf)
.map_err(|e| JammiError::Other(format!("Invalid UTF-8 in rowmap: {e}")))?,
);
}
let index = usearch::Index::new(&usearch::IndexOptions {
dimensions,
metric: usearch::MetricKind::Cos,
quantization: usearch::ScalarKind::F32,
..Default::default()
})
.map_err(|e| JammiError::Other(format!("USearch index creation for load: {e}")))?;
let usearch_path = base_path.with_extension("usearch");
index
.load(usearch_path.to_str().unwrap_or_default())
.map_err(|e| JammiError::Other(format!("USearch load: {e}")))?;
let row_index = row_map
.iter()
.enumerate()
.map(|(key, id)| (id.clone(), key as u64))
.collect();
Ok(Self {
dimensions,
index,
row_map,
row_index,
built: true,
})
}
}
impl VectorIndex for SidecarIndex {
fn add(&mut self, row_id: &str, vector: &[f32]) -> Result<()> {
if vector.len() != self.dimensions {
return Err(JammiError::Other(format!(
"Vector dimension mismatch: expected {}, got {}",
self.dimensions,
vector.len()
)));
}
let key = self.row_map.len() as u64;
if self.index.capacity() <= self.index.size() {
let new_cap = (self.index.capacity() + 1).max(64);
self.index
.reserve(new_cap)
.map_err(|e| JammiError::Other(format!("USearch reserve: {e}")))?;
}
self.index
.add(key, vector)
.map_err(|e| JammiError::Other(format!("USearch add: {e}")))?;
self.row_map.push(row_id.to_string());
self.row_index.insert(row_id.to_string(), key);
Ok(())
}
fn build(&mut self) -> Result<()> {
self.built = true;
Ok(())
}
fn search(&self, query: &[f32], k: usize) -> Result<Vec<(String, f32)>> {
if self.row_map.is_empty() {
return Ok(Vec::new());
}
let actual_k = k.min(self.row_map.len());
let matches = self
.index
.search(query, actual_k)
.map_err(|e| JammiError::Other(format!("USearch search: {e}")))?;
let results: Vec<(String, f32)> = matches
.keys
.iter()
.zip(matches.distances.iter())
.filter_map(|(&key, &dist)| {
let idx = key as usize;
self.row_map.get(idx).map(|id| (id.clone(), dist))
})
.collect();
Ok(results)
}
fn save(&self, path: &Path) -> Result<()> {
SidecarIndex::save(self, path)
}
fn load(path: &Path) -> Result<Self> {
SidecarIndex::load(path)
}
fn len(&self) -> usize {
self.row_map.len()
}
}