use std::collections::HashSet;
use anyhow::{Context, Result};
use async_trait::async_trait;
use regex::Regex;
use crate::core::chunker::ChunkType;
use crate::core::registry::IndexHandle;
use super::Migration;
pub struct M001PerPubConstRust;
#[async_trait]
impl Migration for M001PerPubConstRust {
fn source_version(&self) -> u32 {
0
}
fn target_version(&self) -> u32 {
1
}
fn description(&self) -> &'static str {
"M001: re-chunk Rust pub const/static → ChunkType::Constant (issue #143)"
}
async fn apply(&self, index: &IndexHandle) -> Result<(), anyhow::Error> {
let (corpus, root_path) = {
let indexer = index.indexer.read().await;
let corpus = indexer.corpus_store();
let root = index.root_path.clone();
(corpus, root)
};
let Some(corpus) = corpus else {
tracing::debug!(
index_id = %index.id,
"M001: no durable corpus, skipping"
);
return Ok(());
};
let all_chunks = tokio::task::spawn_blocking({
let corpus = std::sync::Arc::clone(&corpus);
move || corpus.load_all_chunks()
})
.await
.context("M001: load_all_chunks task panicked")?
.context("M001: failed to load chunks from corpus")?;
let files_with_constants: HashSet<String> = all_chunks
.iter()
.filter(|c| c.chunk_type == ChunkType::Constant && c.file.ends_with(".rs"))
.map(|c| c.file.clone())
.collect();
let all_rs_files: HashSet<String> = all_chunks
.iter()
.filter(|c| c.file.ends_with(".rs"))
.map(|c| c.file.clone())
.collect();
let pub_const_re = Regex::new(r"\bpub\s+(const|static)\b").expect("valid pub-const regex");
let mut candidates: Vec<(String, String)> = Vec::new();
for file_path in all_rs_files {
if files_with_constants.contains(&file_path) {
continue;
}
let abs_path = root_path.join(&file_path);
let content = match tokio::fs::read_to_string(&abs_path).await {
Ok(c) => c,
Err(e) => {
tracing::warn!(
index_id = %index.id,
path = %abs_path.display(),
"M001: cannot read file, skipping ({e})"
);
continue;
}
};
if pub_const_re.is_match(&content) {
candidates.push((file_path, content));
}
}
if candidates.is_empty() {
tracing::info!(
index_id = %index.id,
"M001: no candidate Rust files found, nothing to do"
);
return Ok(());
}
tracing::info!(
index_id = %index.id,
count = candidates.len(),
"M001: re-indexing Rust files with pub const/static"
);
const BATCH_SIZE: usize = 64;
let indexer_arc = std::sync::Arc::clone(&index.indexer);
for (batch_idx, batch) in candidates.chunks(BATCH_SIZE).enumerate() {
let files: Vec<(String, String)> = batch.to_vec();
let parsed = {
let indexer = indexer_arc.read().await;
indexer
.parse_and_embed_files(files)
.await
.with_context(|| {
format!("M001: parse_and_embed_files failed on batch {batch_idx}")
})?
};
{
let indexer = indexer_arc.read().await;
indexer
.commit_parsed_batch(parsed, true)
.await
.with_context(|| {
format!("M001: commit_parsed_batch failed on batch {batch_idx}")
})?;
}
tracing::debug!(
index_id = %index.id,
batch = batch_idx,
"M001: committed batch"
);
}
tracing::info!(
index_id = %index.id,
"M001: re-indexing complete"
);
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_m001_from_target_version() {
let m = M001PerPubConstRust;
assert_eq!(m.source_version(), 0);
assert_eq!(m.target_version(), 1);
}
#[test]
fn test_m001_description_non_empty() {
let m = M001PerPubConstRust;
let desc = m.description();
assert!(!desc.is_empty());
assert!(desc.contains("M001"), "description should include 'M001'");
}
#[test]
fn test_m001_pre_filter_regex() {
let re = Regex::new(r"\bpub\s+(const|static)\b").unwrap();
assert!(re.is_match("pub const MAX_SIZE: usize = 100;"));
assert!(re.is_match("pub static GREETING: &str = \"hello\";"));
assert!(re.is_match(" pub const NESTED: u32 = 42;"));
assert!(re.is_match("pub static mut COUNTER: u32 = 0;"));
assert!(!re.is_match("const PRIVATE: usize = 1;"));
assert!(!re.is_match("pub fn my_function() {}"));
assert!(!re.is_match("let x = 5;"));
assert!(re.is_match("// pub const IN_COMMENT: u32 = 1;"));
}
#[tokio::test]
async fn test_m001_apply_no_corpus_is_ok() {
use crate::core::indexer::CodeIndexer;
use crate::core::registry::{IndexHandle, IndexId};
use std::sync::Arc;
use tokio::sync::RwLock;
let indexer = CodeIndexer::new("m001-test", "/tmp/m001-test");
let handle = IndexHandle::bare(
IndexId::new("m001-test"),
Arc::new(RwLock::new(indexer)),
std::path::PathBuf::from("/tmp/m001-test"),
);
let m = M001PerPubConstRust;
let result = m.apply(&handle).await;
assert!(
result.is_ok(),
"no-corpus apply must be Ok, got: {result:?}"
);
}
#[test]
fn test_m001_advances_exactly_one_version() {
let m = M001PerPubConstRust;
assert_eq!(
m.target_version() - m.source_version(),
1,
"each migration must advance exactly one version"
);
}
}