use anyhow::{Context, Result};
use chrono::Utc;
use rmcp_memex::embeddings::{EmbeddingClient, EmbeddingConfig};
use rmcp_memex::storage::{ChromaDocument, StorageManager};
use serde::Deserialize;
use serde_json::{Map, Value};
use std::fs;
use std::path::{Path, PathBuf};
use uuid::Uuid;
use walkdir::WalkDir;
use crate::cli::MemexOptions;
use crate::snapshot::{Snapshot, project_cache_dir};
#[derive(Debug, Deserialize)]
struct AnalysisReport {
analysis: Vec<AnalysisRun>,
}
#[derive(Debug, Deserialize)]
struct AnalysisRun {
#[serde(rename = "aiViews")]
ai_views: AiViews,
}
#[derive(Debug, Deserialize)]
struct AiViews {
#[serde(rename = "deadSymbols")]
dead_symbols: Option<Vec<DeadSymbol>>,
#[serde(rename = "ciSummary")]
ci_summary: Option<CiSummary>,
}
#[derive(Debug, Deserialize)]
struct DeadSymbol {
name: String,
paths: Vec<String>,
#[serde(rename = "publicSurface")]
public_surface: bool,
}
#[derive(Debug, Deserialize)]
struct CiSummary {
#[serde(rename = "duplicateClustersCount")]
duplicate_count: usize,
#[serde(rename = "topClusters")]
top_clusters: Option<Vec<DuplicateCluster>>,
}
#[derive(Debug, Deserialize)]
struct DuplicateCluster {
#[serde(rename = "symbolName")]
symbol_name: String,
size: usize,
severity: String,
}
pub async fn run_memex(
opts: &MemexOptions,
json_output: bool,
verbose: bool,
) -> Result<usize, String> {
run_memex_inner(opts, json_output, verbose)
.await
.map_err(|e| e.to_string())
}
async fn run_memex_inner(opts: &MemexOptions, _json_output: bool, verbose: bool) -> Result<usize> {
let analysis_path = resolve_analysis_path(opts)?;
let analysis_path = analysis_path
.canonicalize()
.context("Failed to resolve analysis path - file may not exist or path is invalid")?;
if verbose {
eprintln!("[memex] Reading analysis report from: {:?}", analysis_path);
}
let content = fs::read_to_string(&analysis_path).context("Failed to read analysis.json")?;
let report: AnalysisReport =
serde_json::from_str(&content).context("Failed to parse analysis.json")?;
let project_id = opts.project_id.as_deref().unwrap_or("unknown_project");
let raw_docs = prepare_documents(&report, project_id);
if raw_docs.is_empty() {
if verbose {
eprintln!("[memex] No indexable content found in analysis report.");
}
return Ok(0);
}
if verbose {
eprintln!("[memex] Found {} items to index.", raw_docs.len());
}
if verbose {
eprintln!("[memex] Initializing embedding model...");
}
let mut embedder = EmbeddingClient::new(&EmbeddingConfig::default())
.await
.context("Failed to initialize embedding client")?;
let db_path = opts
.db_path
.clone()
.unwrap_or_else(|| "~/.rmcp_servers/rmcp_memex/lancedb".to_string());
if verbose {
eprintln!("[memex] Opening vector storage at: {}", db_path);
}
let storage = StorageManager::new(512, &db_path)
.await
.context("Failed to open storage")?;
let texts: Vec<String> = raw_docs.iter().map(|(_, text)| text.clone()).collect();
if verbose {
eprintln!(
"[memex] Generating embeddings for {} documents...",
texts.len()
);
}
let embeddings = embedder
.embed_batch(&texts)
.await
.context("Failed to generate embeddings")?;
let mut chroma_docs = Vec::new();
for (i, (metadata_str, text)) in raw_docs.into_iter().enumerate() {
let embedding = embeddings[i].clone();
let meta_json = parse_metadata_string(&metadata_str);
chroma_docs.push(ChromaDocument::new_flat(
Uuid::new_v4().to_string(),
opts.namespace.clone(),
embedding,
Value::Object(meta_json),
text,
));
}
let doc_count = chroma_docs.len();
if verbose {
eprintln!("[memex] Writing to storage...");
}
storage
.add_to_store(chroma_docs)
.await
.context("Failed to save documents to LanceDB")?;
Ok(doc_count)
}
fn find_analysis_json(path: &Path) -> Result<PathBuf> {
for entry in WalkDir::new(path) {
let entry = entry?;
if entry.file_name() == "analysis.json" {
return Ok(entry.path().to_path_buf());
}
}
anyhow::bail!("No analysis.json found in {:?}", path)
}
fn resolve_analysis_path(opts: &MemexOptions) -> Result<PathBuf> {
if opts.report_path.is_file() {
return Ok(opts.report_path.clone());
}
if opts.report_path.is_dir() {
return find_analysis_json(&opts.report_path);
}
if opts.report_path.as_path() == Path::new(".loctree") {
let cwd = std::env::current_dir().context("Failed to read current working directory")?;
let root = Snapshot::find_loctree_root(&cwd).unwrap_or(cwd);
let base_dir = project_cache_dir(&root);
let candidates = [
Snapshot::artifacts_dir(&root).join("analysis.json"),
base_dir.join("analysis.json"),
base_dir.join("latest").join("analysis.json"),
];
for candidate in candidates {
if candidate.is_file() {
return Ok(candidate);
}
}
anyhow::bail!(
"No analysis.json found for this project. Run `loct auto` first to generate artifacts."
);
}
anyhow::bail!(
"Report path does not exist (or is not a directory/file): {:?}",
opts.report_path
)
}
fn parse_metadata_string(meta_str: &str) -> Map<String, Value> {
let mut map = Map::new();
for part in meta_str.split('|') {
if let Some((k, v)) = part.split_once(':') {
map.insert(k.to_string(), Value::String(v.to_string()));
}
}
map.insert(
"indexed_at".to_string(),
Value::String(Utc::now().to_rfc3339()),
);
map
}
fn prepare_documents(report: &AnalysisReport, project_id: &str) -> Vec<(String, String)> {
let mut docs = Vec::new();
for run in &report.analysis {
if let Some(dead_symbols) = &run.ai_views.dead_symbols {
for dead in dead_symbols {
let context = format!(
"Dead Code Detection: Symbol '{}' is defined in {:?} but appears unused. Public Surface: {}.",
dead.name, dead.paths, dead.public_surface
);
let metadata =
format!("type:dead_code|project:{}|symbol:{}", project_id, dead.name);
docs.push((metadata, context));
}
}
if let Some(summary) = &run.ai_views.ci_summary {
let summary_context = format!(
"Code Duplication Summary: {} duplicate clusters detected.",
summary.duplicate_count
);
let summary_meta = format!("type:duplication_summary|project:{}", project_id);
docs.push((summary_meta, summary_context));
if let Some(clusters) = &summary.top_clusters {
for cluster in clusters {
let context = format!(
"Code Duplication: Symbol '{}' appears {} times. Severity: {}.",
cluster.symbol_name, cluster.size, cluster.severity
);
let metadata = format!(
"type:duplication|project:{}|symbol:{}",
project_id, cluster.symbol_name
);
docs.push((metadata, context));
}
}
}
}
docs
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_prepare_documents_dead_code() {
let report = AnalysisReport {
analysis: vec![AnalysisRun {
ai_views: AiViews {
dead_symbols: Some(vec![DeadSymbol {
name: "GhostFunc".to_string(),
paths: vec!["src/ghost.rs".to_string()],
public_surface: false,
}]),
ci_summary: None,
},
}],
};
let docs = prepare_documents(&report, "test-proj");
assert_eq!(docs.len(), 1);
let (meta, text) = &docs[0];
assert!(meta.contains("type:dead_code"));
assert!(meta.contains("project:test-proj"));
assert!(meta.contains("symbol:GhostFunc"));
assert!(text.contains("Dead Code Detection"));
assert!(text.contains("GhostFunc"));
}
#[test]
fn test_metadata_parser() {
let raw = "type:dead_code|project:abc|symbol:Foo";
let map = parse_metadata_string(raw);
assert_eq!(map.get("type").unwrap(), "dead_code");
assert_eq!(map.get("project").unwrap(), "abc");
assert_eq!(map.get("symbol").unwrap(), "Foo");
assert!(map.contains_key("indexed_at"));
}
}