use crate::indexing::facade::IndexFacade;
use crate::storage::{DataSource, IndexMetadata};
use crate::{IndexError, IndexResult, Settings};
use std::path::PathBuf;
use std::sync::Arc;
#[derive(Debug)]
pub struct IndexPersistence {
base_path: PathBuf,
}
impl IndexPersistence {
pub fn new(base_path: PathBuf) -> Self {
Self { base_path }
}
fn semantic_path(&self) -> PathBuf {
self.base_path.join("semantic")
}
#[must_use = "Load errors should be handled appropriately"]
pub fn load_facade(&self, settings: Arc<Settings>) -> IndexResult<IndexFacade> {
self.load_facade_impl(settings, true)
}
#[must_use = "Load errors should be handled appropriately"]
pub fn load_facade_lite(&self, settings: Arc<Settings>) -> IndexResult<IndexFacade> {
self.load_facade_impl(settings, false)
}
fn persist_metadata(&self, metadata: &IndexMetadata) -> IndexResult<()> {
metadata.save(&self.base_path)?;
if let Err(err) = self.update_project_registry(metadata) {
tracing::debug!(
target: "persistence",
"Skipped project registry update: {err}"
);
}
Ok(())
}
fn load_facade_impl(
&self,
settings: Arc<Settings>,
load_semantic: bool,
) -> IndexResult<IndexFacade> {
let metadata = IndexMetadata::load(&self.base_path).ok();
let tantivy_path = self.base_path.join("tantivy");
if !tantivy_path.join("meta.json").exists() {
return Err(IndexError::FileRead {
path: tantivy_path,
source: std::io::Error::new(
std::io::ErrorKind::NotFound,
"Tantivy index not found",
),
});
}
let mut facade = IndexFacade::new(settings)?;
if let Some(ref meta) = metadata {
let fresh_symbol_count = facade.symbol_count();
let fresh_file_count = facade.file_count();
match &meta.data_source {
DataSource::Tantivy {
path, doc_count, ..
} => {
tracing::info!(
"[persistence] loaded facade from Tantivy index: {} ({} documents)",
path.display(),
doc_count
);
}
DataSource::Fresh => {
tracing::info!("[persistence] created fresh facade");
}
}
tracing::info!(
"[persistence] facade contains {fresh_symbol_count} symbols from {fresh_file_count} files"
);
}
if load_semantic {
let semantic_path = self.semantic_path();
tracing::debug!(
"[persistence] semantic path computed as: {}",
semantic_path.display()
);
match facade.load_semantic_search(&semantic_path) {
Ok(true) => {
tracing::debug!("[persistence] loaded semantic search for facade");
}
Ok(false) => {
tracing::debug!("[persistence] no semantic data found (this is optional)");
}
Err(IndexError::SemanticSearch(
crate::semantic::SemanticSearchError::DimensionMismatch {
ref suggestion, ..
},
)) => {
tracing::error!(
"[persistence] semantic search disabled — index incompatible: {suggestion}"
);
}
Err(e) => {
tracing::warn!("[persistence] failed to load semantic search: {e}");
}
}
} else {
tracing::debug!("[persistence] skipping semantic search (lite mode)");
let semantic_path = self.semantic_path();
if semantic_path.join("metadata.json").exists() {
match facade.load_semantic_metadata_snapshot(&semantic_path) {
Ok(true) => {
tracing::debug!(
"[persistence] loaded semantic metadata snapshot for lite facade"
);
}
Ok(false) => {}
Err(e) => {
tracing::warn!(
"[persistence] failed to load semantic metadata snapshot: {e}"
);
}
}
}
}
if let Some(ref meta) = metadata {
if let Some(ref stored_paths) = meta.indexed_paths {
facade.set_indexed_paths(stored_paths.clone());
tracing::debug!(
"[persistence] restored {} indexed paths from metadata",
stored_paths.len()
);
}
}
Ok(facade)
}
#[must_use = "Save errors should be handled to ensure data is persisted"]
pub fn save_facade(&self, facade: &IndexFacade) -> IndexResult<()> {
let mut metadata =
IndexMetadata::load(&self.base_path).unwrap_or_else(|_| IndexMetadata::new());
metadata.update_counts(facade.symbol_count() as u32, facade.file_count());
let indexed_paths: Vec<PathBuf> = facade.get_indexed_paths().iter().cloned().collect();
tracing::debug!(
"[persistence] saving {} indexed paths to metadata",
indexed_paths.len()
);
metadata.update_indexed_paths(indexed_paths);
metadata.data_source = DataSource::Tantivy {
path: self.base_path.join("tantivy"),
doc_count: facade.document_count().unwrap_or(0),
timestamp: crate::indexing::get_utc_timestamp(),
};
self.persist_metadata(&metadata)?;
if facade.has_semantic_search() {
let semantic_path = self.semantic_path();
std::fs::create_dir_all(&semantic_path).map_err(|e| {
IndexError::General(format!("Failed to create semantic directory: {e}"))
})?;
facade
.save_semantic_search(&semantic_path)
.map_err(|e| IndexError::General(format!("Failed to save semantic search: {e}")))?;
}
Ok(())
}
pub fn exists(&self) -> bool {
let tantivy_path = self.base_path.join("tantivy");
tantivy_path.join("meta.json").exists()
}
pub fn clear(&self) -> Result<(), std::io::Error> {
let tantivy_path = self.base_path.join("tantivy");
if tantivy_path.exists() {
let mut attempts = 0;
const MAX_ATTEMPTS: u32 = 3;
loop {
match std::fs::remove_dir_all(&tantivy_path) {
Ok(()) => break,
Err(e) if attempts < MAX_ATTEMPTS => {
attempts += 1;
#[cfg(windows)]
{
if e.kind() == std::io::ErrorKind::PermissionDenied {
eprintln!(
"Attempt {attempts}/{MAX_ATTEMPTS}: Windows permission denied ({e}), retrying after delay..."
);
std::hint::black_box(());
std::thread::sleep(std::time::Duration::from_millis(200));
continue;
}
}
eprintln!(
"Attempt {attempts}/{MAX_ATTEMPTS}: Failed to remove directory ({e}), retrying..."
);
std::thread::sleep(std::time::Duration::from_millis(100));
continue;
}
Err(e) => return Err(e),
}
}
std::fs::create_dir_all(&tantivy_path)?;
#[cfg(windows)]
{
std::thread::sleep(std::time::Duration::from_millis(100));
}
}
Ok(())
}
fn update_project_registry(&self, metadata: &IndexMetadata) -> IndexResult<()> {
let local_dir = crate::init::local_dir_name();
let project_id_path = PathBuf::from(local_dir).join(".project-id");
if !project_id_path.exists() {
return Ok(());
}
let project_id =
std::fs::read_to_string(&project_id_path).map_err(|e| IndexError::FileRead {
path: project_id_path.clone(),
source: e,
})?;
let mut registry = crate::init::ProjectRegistry::load()
.map_err(|e| IndexError::General(format!("Failed to load project registry: {e}")))?;
if let Some(project) = registry.find_project_by_id_mut(&project_id) {
project.symbol_count = metadata.symbol_count;
project.file_count = metadata.file_count;
project.last_modified = metadata.last_modified;
if let DataSource::Tantivy { doc_count, .. } = &metadata.data_source {
project.doc_count = *doc_count;
}
registry.save().map_err(|e| {
IndexError::General(format!("Failed to save project registry: {e}"))
})?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::semantic::SemanticMetadata;
use crate::storage::DocumentIndex;
use tempfile::TempDir;
fn has_semantic_data(persistence: &IndexPersistence) -> bool {
persistence.semantic_path().join("metadata.json").exists()
}
#[test]
fn test_exists() {
let temp_dir = TempDir::new().unwrap();
let persistence = IndexPersistence::new(temp_dir.path().to_path_buf());
assert!(!persistence.exists());
let tantivy_path = temp_dir.path().join("tantivy");
std::fs::create_dir_all(&tantivy_path).unwrap();
std::fs::write(tantivy_path.join("meta.json"), "{}").unwrap();
assert!(persistence.exists());
}
#[test]
fn test_semantic_paths() {
let temp_dir = TempDir::new().unwrap();
let persistence = IndexPersistence::new(temp_dir.path().to_path_buf());
let semantic_path = persistence.semantic_path();
assert_eq!(semantic_path, temp_dir.path().join("semantic"));
assert!(!has_semantic_data(&persistence));
std::fs::create_dir_all(&semantic_path).unwrap();
std::fs::write(semantic_path.join("metadata.json"), "{}").unwrap();
assert!(has_semantic_data(&persistence));
}
#[test]
fn test_load_facade_lite_preserves_semantic_metadata_snapshot() {
let temp_dir = TempDir::new().unwrap();
let persistence = IndexPersistence::new(temp_dir.path().to_path_buf());
let settings = Settings {
index_path: temp_dir.path().to_path_buf(),
..Settings::default()
};
DocumentIndex::new(temp_dir.path().join("tantivy"), &settings).unwrap();
let semantic_path = temp_dir.path().join("semantic");
std::fs::create_dir_all(&semantic_path).unwrap();
let metadata =
SemanticMetadata::new_remote("snowflake-arctic-embed:latest".to_string(), 1024, 42);
metadata.save(&semantic_path).unwrap();
let loaded = persistence.load_facade_lite(Arc::new(settings)).unwrap();
let snapshot = loaded
.get_semantic_metadata()
.expect("snapshot should load in lite mode");
assert_eq!(snapshot.backend, metadata.backend);
assert_eq!(snapshot.model_name, metadata.model_name);
assert_eq!(snapshot.dimension, metadata.dimension);
assert_eq!(
loaded.semantic_search_embedding_count(),
metadata.embedding_count
);
assert!(!loaded.has_semantic_search());
}
}