use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct PersistedIndex {
pub id: String,
pub root_path: PathBuf,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub include_paths: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub exclude_globs: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub extensions: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub domain_terms: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub path_filter: Vec<String>,
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct IndexRegistryFile {
#[serde(default, rename = "index")]
pub indexes: Vec<PersistedIndex>,
}
pub fn data_dir() -> Result<PathBuf> {
let dir = dirs::data_local_dir()
.context("could not determine data-local directory")?
.join("trusty-search");
std::fs::create_dir_all(&dir).context("create trusty-search data dir")?;
Ok(dir)
}
pub fn indexes_toml_path() -> Result<PathBuf> {
Ok(data_dir()?.join("indexes.toml"))
}
pub fn index_data_dir(index_id: &str) -> Result<PathBuf> {
let dir = data_dir()?.join("indexes").join(sanitize_id(index_id));
std::fs::create_dir_all(&dir).context("create per-index data dir")?;
Ok(dir)
}
fn sanitize_id(id: &str) -> String {
id.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '.' || c == '_' || c == '-' {
c
} else {
'_'
}
})
.collect()
}
pub fn hnsw_path(index_id: &str) -> Result<PathBuf> {
Ok(index_data_dir(index_id)?.join("hnsw.usearch"))
}
pub fn chunks_path(index_id: &str) -> Result<PathBuf> {
Ok(index_data_dir(index_id)?.join("chunks.json"))
}
pub fn load_index_registry() -> Result<Vec<PersistedIndex>> {
load_index_registry_at(&indexes_toml_path()?)
}
pub(crate) fn load_index_registry_at(path: &Path) -> Result<Vec<PersistedIndex>> {
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
Err(e) => return Err(e).context("read indexes.toml"),
};
match toml::from_str::<IndexRegistryFile>(&content) {
Ok(file) => Ok(file.indexes),
Err(e) => {
tracing::warn!(
"indexes.toml at {} is corrupt ({e}); starting with empty registry",
path.display()
);
Ok(Vec::new())
}
}
}
pub fn save_index_registry(entries: &[PersistedIndex]) -> Result<()> {
save_index_registry_at(&indexes_toml_path()?, entries)
}
pub(crate) fn save_index_registry_at(path: &Path, entries: &[PersistedIndex]) -> Result<()> {
let file = IndexRegistryFile {
indexes: entries.to_vec(),
};
let serialized = toml::to_string_pretty(&file).context("serialize indexes.toml")?;
let tmp = path.with_extension("toml.tmp");
std::fs::write(&tmp, serialized).context("write indexes.toml tmp")?;
std::fs::rename(&tmp, path).context("rename indexes.toml")?;
Ok(())
}
pub fn upsert_index_registry_entry(entry: PersistedIndex) -> Result<()> {
upsert_index_registry_entry_at(&indexes_toml_path()?, entry)
}
pub(crate) fn upsert_index_registry_entry_at(path: &Path, entry: PersistedIndex) -> Result<()> {
let mut entries = load_index_registry_at(path)?;
if let Some(existing) = entries.iter_mut().find(|e| e.id == entry.id) {
*existing = entry;
} else {
entries.push(entry);
}
save_index_registry_at(path, &entries)
}
pub fn remove_index_registry_entry(id: &str) -> Result<()> {
remove_index_registry_entry_at(&indexes_toml_path()?, id)
}
pub(crate) fn remove_index_registry_entry_at(path: &Path, id: &str) -> Result<()> {
let mut entries = load_index_registry_at(path)?;
let before = entries.len();
entries.retain(|e| e.id != id);
if entries.len() == before {
return Ok(());
}
save_index_registry_at(path, &entries)
}
pub fn remove_index_data_dir(index_id: &str) -> Result<()> {
let dir = data_dir()?.join("indexes").join(sanitize_id(index_id));
if dir.exists() {
std::fs::remove_dir_all(&dir).with_context(|| format!("remove {}", dir.display()))?;
}
Ok(())
}
pub fn has_persisted_hnsw(path: &Path) -> bool {
path.exists() && path.is_file()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sanitize_strips_unsafe_chars() {
assert_eq!(sanitize_id("good-name_1.0"), "good-name_1.0");
assert_eq!(sanitize_id("../escape"), ".._escape");
assert_eq!(sanitize_id("with spaces/slash"), "with_spaces_slash");
}
#[test]
fn registry_file_serde_roundtrip() {
let file = IndexRegistryFile {
indexes: vec![
PersistedIndex {
id: "a".into(),
root_path: PathBuf::from("/tmp/a"),
..Default::default()
},
PersistedIndex {
id: "b".into(),
root_path: PathBuf::from("/tmp/b"),
..Default::default()
},
],
};
let s = toml::to_string_pretty(&file).unwrap();
let parsed: IndexRegistryFile = toml::from_str(&s).unwrap();
assert_eq!(parsed.indexes, file.indexes);
}
#[test]
fn remove_index_persists_to_toml() {
let tmp = tempfile::NamedTempFile::new().unwrap();
let path = tmp.path().to_path_buf();
upsert_index_registry_entry_at(
&path,
PersistedIndex {
id: "keep".into(),
root_path: PathBuf::from("/tmp/keep"),
..Default::default()
},
)
.unwrap();
upsert_index_registry_entry_at(
&path,
PersistedIndex {
id: "drop".into(),
root_path: PathBuf::from("/tmp/drop"),
..Default::default()
},
)
.unwrap();
assert_eq!(load_index_registry_at(&path).unwrap().len(), 2);
remove_index_registry_entry_at(&path, "drop").unwrap();
let restored = load_index_registry_at(&path).unwrap();
assert_eq!(restored.len(), 1);
assert_eq!(restored[0].id, "keep");
assert!(restored.iter().all(|e| e.id != "drop"));
remove_index_registry_entry_at(&path, "drop").unwrap();
assert_eq!(load_index_registry_at(&path).unwrap().len(), 1);
}
#[test]
fn upsert_index_dedupes_on_id() {
let tmp = tempfile::NamedTempFile::new().unwrap();
let path = tmp.path().to_path_buf();
upsert_index_registry_entry_at(
&path,
PersistedIndex {
id: "proj".into(),
root_path: PathBuf::from("/old"),
..Default::default()
},
)
.unwrap();
upsert_index_registry_entry_at(
&path,
PersistedIndex {
id: "proj".into(),
root_path: PathBuf::from("/new"),
..Default::default()
},
)
.unwrap();
let entries = load_index_registry_at(&path).unwrap();
assert_eq!(entries.len(), 1, "duplicate [[index]] block written");
assert_eq!(entries[0].root_path, PathBuf::from("/new"));
}
#[test]
fn registry_upsert_idempotent_unit() {
let mut entries = vec![PersistedIndex {
id: "a".into(),
root_path: PathBuf::from("/old"),
..Default::default()
}];
let new = PersistedIndex {
id: "a".into(),
root_path: PathBuf::from("/new"),
..Default::default()
};
if let Some(existing) = entries.iter_mut().find(|e| e.id == new.id) {
existing.root_path = new.root_path.clone();
} else {
entries.push(new);
}
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].root_path, PathBuf::from("/new"));
}
}