use std::path::Path;
use anyhow::{Context, Result};
use async_trait::async_trait;
use crate::core::registry::IndexHandle;
use super::Migration;
pub struct M004RepairAbsoluteFilePaths;
#[async_trait]
impl Migration for M004RepairAbsoluteFilePaths {
fn source_version(&self) -> u32 {
3
}
fn target_version(&self) -> u32 {
4
}
fn description(&self) -> &'static str {
"M004: repair any remaining absolute chunk file paths (issue #674)"
}
async fn apply(&self, index: &IndexHandle) -> Result<(), anyhow::Error> {
let (corpus, root_path) = {
let indexer = index.indexer.read().await;
let corpus = indexer.corpus_store();
let root = index.root_path.clone();
(corpus, root)
};
let Some(corpus) = corpus else {
tracing::debug!(
index_id = %index.id,
"M004: no durable corpus, skipping"
);
return Ok(());
};
let all_chunks = tokio::task::spawn_blocking({
let corpus = std::sync::Arc::clone(&corpus);
move || corpus.load_all_chunks()
})
.await
.context("M004: load_all_chunks task panicked")?
.context("M004: failed to load chunks from corpus")?;
let mut to_upsert = Vec::new();
let mut ids_to_delete: Vec<String> = Vec::new();
for mut chunk in all_chunks {
if !Path::new(&chunk.file).is_absolute() {
continue;
}
let old_file = chunk.file.clone();
let old_id = chunk.id.clone();
match Path::new(&old_file).strip_prefix(&root_path) {
Ok(rel) => {
let rel_str = rel.to_string_lossy().into_owned();
let new_id = reconstruct_id(&old_id, &old_file, &rel_str);
chunk.file = rel_str;
chunk.id = new_id;
ids_to_delete.push(old_id);
to_upsert.push(chunk);
}
Err(_) => {
tracing::warn!(
index_id = %index.id,
file = %old_file,
root = %root_path.display(),
"M004: chunk file is absolute but not under root_path; skipping"
);
}
}
}
if to_upsert.is_empty() {
tracing::info!(
index_id = %index.id,
"M004: all chunk file paths already relative, nothing to do"
);
return Ok(());
}
tracing::info!(
index_id = %index.id,
count = to_upsert.len(),
"M004: rewriting absolute chunk file paths to root-relative"
);
let upsert_corpus = std::sync::Arc::clone(&corpus);
let chunks_to_upsert = to_upsert;
tokio::task::spawn_blocking(move || upsert_corpus.upsert_chunks(&chunks_to_upsert))
.await
.context("M004: upsert task panicked")?
.context("M004: failed to upsert rewritten chunks")?;
let delete_corpus = std::sync::Arc::clone(&corpus);
tokio::task::spawn_blocking(move || delete_corpus.delete_chunks(&ids_to_delete))
.await
.context("M004: delete task panicked")?
.context("M004: failed to delete old absolute-keyed chunk rows")?;
{
let indexer = index.indexer.read().await;
if let Err(e) = indexer.refresh_live_indices_from_corpus().await {
tracing::warn!(
index_id = %index.id,
"M004: live-index refresh failed ({e}) — \
BM25 may be stale until next daemon restart"
);
}
}
tracing::info!(
index_id = %index.id,
"M004: path repair complete (redb + live BM25 + chunks map synced)"
);
Ok(())
}
}
fn reconstruct_id(old_id: &str, old_file: &str, rel_file: &str) -> String {
if let Some(suffix) = old_id.strip_prefix(old_file) {
format!("{rel_file}{suffix}")
} else {
old_id.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_m004_from_target_version() {
let m = M004RepairAbsoluteFilePaths;
assert_eq!(m.source_version(), 3);
assert_eq!(m.target_version(), 4);
}
#[test]
fn test_m004_description_non_empty() {
let m = M004RepairAbsoluteFilePaths;
let desc = m.description();
assert!(!desc.is_empty());
assert!(desc.contains("M004"), "description should include 'M004'");
assert!(desc.contains("#674"), "description should include '#674'");
}
#[test]
fn test_m004_advances_exactly_one_version() {
let m = M004RepairAbsoluteFilePaths;
assert_eq!(
m.target_version() - m.source_version(),
1,
"each migration must advance exactly one version"
);
}
#[test]
fn test_m004_reconstruct_id_standard() {
let old_file = "/mnt/efs/data/repos/proj/src/lib.rs";
let rel_file = "src/lib.rs";
let old_id = format!("{old_file}:42:78");
let new_id = reconstruct_id(&old_id, old_file, rel_file);
assert_eq!(new_id, "src/lib.rs:42:78");
}
#[test]
fn test_m004_reconstruct_id_no_match_passthrough() {
let old_id = "some::qualified::id";
let result = reconstruct_id(old_id, "/unexpected/prefix", "rel");
assert_eq!(result, old_id);
}
#[test]
fn test_m004_rewrite_logic_strip_prefix() {
let root = std::path::Path::new("/mnt/efs/data/repos/proj");
let abs_file = "/mnt/efs/data/repos/proj/src/lib.rs";
let rel = std::path::Path::new(abs_file).strip_prefix(root).unwrap();
assert_eq!(rel.display().to_string(), "src/lib.rs");
}
#[test]
fn test_m004_rewrite_logic_non_root_path_errors() {
let root = std::path::Path::new("/mnt/efs/data/repos/proj");
let unrelated = "/tmp/other/file.rs";
assert!(
std::path::Path::new(unrelated).strip_prefix(root).is_err(),
"path outside root must not be rewritten"
);
}
#[tokio::test]
async fn test_m004_apply_no_corpus_is_ok() {
use crate::core::indexer::CodeIndexer;
use crate::core::registry::{IndexHandle, IndexId};
use std::sync::Arc;
use tokio::sync::RwLock;
let indexer = CodeIndexer::new("m004-test", "/tmp/m004-test");
let handle = IndexHandle::bare(
IndexId::new("m004-test"),
Arc::new(RwLock::new(indexer)),
std::path::PathBuf::from("/tmp/m004-test"),
);
let m = M004RepairAbsoluteFilePaths;
let result = m.apply(&handle).await;
assert!(
result.is_ok(),
"no-corpus apply must be Ok, got: {result:?}"
);
}
}