mod commands;
use anyhow::Result;
use avocado_core::{compiler, db::Database, embedding, span, Artifact, CompilerConfig};
use clap::{Parser, Subcommand};
use console::style;
use indicatif::{ProgressBar, ProgressStyle, MultiProgress};
use sha2::Digest;
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use uuid::Uuid;
#[derive(Parser)]
#[command(name = "avocado")]
#[command(version)]
#[command(about = "AvocadoDB - Deterministic context compilation", long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
Init {
#[arg(short, long, default_value = ".avocado/db.sqlite")]
path: PathBuf,
},
Ingest {
path: PathBuf,
#[arg(short, long)]
recursive: bool,
#[arg(short, long, default_value = ".avocado/db.sqlite")]
db_path: PathBuf,
},
Compile {
query: String,
#[arg(long)]
backend: Option<String>,
#[arg(long, default_value = "http://localhost:8765")]
url: String,
#[arg(long, default_value_t = false)]
local: bool,
#[arg(short, long, default_value = "8000")]
budget: usize,
#[arg(short, long)]
json: bool,
#[arg(short, long)]
explain: bool,
#[arg(short, long)]
visual: bool,
#[arg(short, long, default_value = ".avocado/db.sqlite")]
db_path: PathBuf,
},
Stats {
#[arg(short, long, default_value = ".avocado/db.sqlite")]
db_path: PathBuf,
},
Clear {
#[arg(short, long, default_value = ".avocado/db.sqlite")]
db_path: PathBuf,
#[arg(short, long)]
yes: bool,
},
Ask {
query: String,
#[arg(long, default_value = "auto")]
llm: String,
#[arg(short, long, default_value = "8000")]
budget: usize,
#[arg(long, default_value = "150")]
max_tokens: usize,
#[arg(long, default_value = "http://localhost:8765")]
url: String,
#[arg(short, long)]
json: bool,
},
Benchmark {
#[arg(short, long)]
verbose: bool,
},
Recommend {
#[arg(long)]
corpus_size: Option<usize>,
#[arg(long)]
use_case: Option<String>,
},
Serve {
#[arg(long, default_value_t = false)]
gpu: bool,
#[arg(long)]
embed_url: Option<String>,
#[arg(long, default_value = "BAAI/bge-large-en-v1.5")]
model: String,
#[arg(long, default_value_t = 1024)]
dim: usize,
#[arg(long, default_value = "http://127.0.0.1:8765")]
url: String,
#[arg(long, default_value_t = true)]
prewarm: bool,
},
Session {
#[command(subcommand)]
command: commands::SessionCommands,
},
}
#[tokio::main]
async fn main() -> Result<()> {
env_logger::init();
let cli = Cli::parse();
match cli.command {
Commands::Init { path } => {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let _db = Database::new(&path)?;
println!("✓ Initialized AvocadoDB at {}", path.display());
}
Commands::Ingest {
path,
recursive,
db_path,
} => {
let db = Database::new(&db_path)?;
let files = if path.is_dir() {
collect_files(&path, recursive)?
} else {
vec![path]
};
println!(
"{} Ingesting {} {}...\n",
style("🥑").green(),
files.len(),
if files.len() == 1 { "file" } else { "files" }
);
let multi = MultiProgress::new();
let overall_pb = multi.add(ProgressBar::new(files.len() as u64));
overall_pb.set_style(
ProgressStyle::default_bar()
.template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} files ({msg})")
.unwrap()
.progress_chars("#>-")
);
overall_pb.set_message("starting");
let mut total_spans = 0;
let mut successful = 0;
let mut failed = 0;
for (_idx, file_path) in files.iter().enumerate() {
let file_result: Result<usize, anyhow::Error> = async {
let content = match fs::read_to_string(file_path) {
Ok(c) => c,
Err(_e) => {
return Ok(0);
}
};
let artifact_id = Uuid::new_v4().to_string();
let content_hash = format!("{:x}", sha2::Sha256::digest(content.as_bytes()));
let artifact = Artifact {
id: artifact_id.clone(),
path: file_path.display().to_string(),
content: content.clone(),
content_hash,
metadata: None,
created_at: chrono::Utc::now(),
};
if let Err(_e) = db.insert_artifact(&artifact) {
return Ok(0);
}
let mut spans = match span::extract_spans(&content, &artifact_id) {
Ok(s) => s,
Err(_e) => {
return Ok(0);
}
};
let file_name = file_path.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown");
overall_pb.set_message(format!("{} ({} spans)", file_name, spans.len()));
let embed_pb = multi.add(ProgressBar::new(spans.len() as u64));
embed_pb.set_style(
ProgressStyle::default_bar()
.template(" {spinner:.green} Embedding: [{bar:30.cyan/blue}] {pos}/{len} spans")
.unwrap()
.progress_chars("=>-")
);
let batch_size = 100;
let mut all_embeddings = Vec::new();
{
let texts: Vec<&str> = spans.iter().map(|s| s.text.as_str()).collect();
for text_batch in texts.chunks(batch_size) {
let embeddings = match embedding::embed_batch(text_batch.to_vec(), None, None).await {
Ok(e) => e,
Err(_e) => {
embed_pb.finish_and_clear();
return Ok(0);
}
};
all_embeddings.extend(embeddings);
embed_pb.inc(text_batch.len() as u64);
}
}
for (span, emb) in spans.iter_mut().zip(all_embeddings.iter()) {
span.embedding = Some(emb.clone());
span.embedding_model = Some(embedding::embedding_model().to_string());
}
embed_pb.finish_and_clear();
if let Err(_e) = db.insert_spans(&spans) {
return Ok(0);
}
Ok(spans.len())
}.await;
match file_result {
Ok(span_count) => {
if span_count > 0 {
total_spans += span_count;
successful += 1;
} else {
failed += 1;
}
}
Err(_e) => {
failed += 1;
}
}
overall_pb.inc(1);
}
overall_pb.finish_with_message(format!("{} spans created", total_spans));
println!(
"\n{} Indexed {} files → {} spans ({} successful, {} failed/skipped)",
style("✓").green().bold(),
style(successful).cyan().bold(),
style(total_spans).cyan().bold(),
style(successful).green(),
style(failed).yellow()
);
}
Commands::Compile {
query,
url,
local,
budget,
json,
explain,
visual,
db_path,
backend,
} => {
let explain = explain || visual;
if !json {
println!(
"{} Compiling context for: {}\n",
style("🥑").green(),
style(&query).cyan().bold()
);
}
if !local {
let project = std::env::current_dir()
.unwrap_or_else(|_| PathBuf::from("."))
.canonicalize()
.unwrap_or_else(|_| PathBuf::from("."));
let client = reqwest::Client::new();
let resp = client
.post(format!("{}/compile", url.trim_end_matches('/')))
.json(&serde_json::json!({
"query": query,
"token_budget": budget,
"project": project.to_string_lossy(),
"backend": backend,
"explain": explain,
}))
.send()
.await?;
if !resp.status().is_success() {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
anyhow::bail!("Server error {}: {}", status, text);
}
let v: serde_json::Value = resp.json().await?;
let ws = v.get("working_set").cloned().unwrap_or(v);
if json {
println!("{}", serde_json::to_string_pretty(&ws)?);
} else {
if visual {
print_visual_pipeline_json(&ws, &query, budget);
} else {
if let Some(text) = ws.get("text").and_then(|x| x.as_str()) {
println!("{}", text);
println!("\n{}", style("─".repeat(60)).dim());
}
let tokens = ws.get("tokens_used").and_then(|x| x.as_u64()).unwrap_or(0);
let spans = ws.get("citations").and_then(|c| c.as_array()).map(|a| a.len()).unwrap_or(0);
let utilization = if budget > 0 { ((tokens as f32 / budget as f32) * 100.0) as usize } else { 0 };
println!("{} {} / {} ({}%)", style("Tokens: ").bold(), style(tokens).cyan().bold(), style(budget).dim(), utilization);
println!("{} {} spans", style("Compiled: ").bold(), style(spans).cyan().bold());
}
}
return Ok(());
}
let db = Database::new(&db_path)?;
let spinner = if !json {
let sp = ProgressBar::new_spinner();
sp.set_style(
ProgressStyle::default_spinner()
.template("{spinner:.green} {msg}")
.unwrap()
);
sp.set_message("Loading vector index...");
sp.enable_steady_tick(std::time::Duration::from_millis(100));
Some(sp)
} else {
None
};
if let Some(sp) = &spinner {
sp.set_message("Building/loading vector index (this may take 1-2 min for large repos)...");
}
let index = db.get_vector_index()?;
if let Some(sp) = &spinner {
sp.set_message("Compiling context...");
}
let config = CompilerConfig {
token_budget: budget,
..Default::default()
};
if let Some(sp) = &spinner {
sp.set_message("Compiling context (embedding query + hybrid search)...");
}
let working_set = compiler::compile_with_options(&query, config, &db, index.as_ref(), None, explain).await?;
if let Some(sp) = spinner {
sp.finish_and_clear();
}
if json {
println!("{}", serde_json::to_string_pretty(&working_set)?);
} else if visual {
print_visual_pipeline(&working_set, &query, budget);
} else {
println!("{}", working_set.text);
println!("\n{}", style("─".repeat(60)).dim());
let utilization = (working_set.tokens_used as f32 / budget as f32 * 100.0) as usize;
println!(
"{} {} / {} ({utilization}%)",
style("Tokens: ").bold(),
style(working_set.tokens_used).cyan().bold(),
style(budget).dim(),
);
println!(
"{} {} spans",
style("Compiled: ").bold(),
style(working_set.citations.len()).cyan().bold()
);
println!(
"{} {}ms {}",
style("Time: ").bold(),
style(working_set.compilation_time_ms).cyan().bold(),
if working_set.compilation_time_ms < 500 {
style("✓").green()
} else {
style("⚠").yellow()
}
);
println!(
"{} {}",
style("Hash: ").bold(),
style(&working_set.deterministic_hash()[..16]).dim()
);
println!();
}
}
Commands::Stats { db_path } => {
let db = Database::new(&db_path)?;
let (artifacts, spans, tokens) = db.get_stats()?;
let avg_tokens_per_span = if spans > 0 {
tokens / spans
} else {
0
};
let avg_spans_per_artifact = if artifacts > 0 {
spans / artifacts
} else {
0
};
println!("\n{}", style("╔══════════════════════════════════════════════════════════════╗").cyan());
println!("{}", style("║ AvocadoDB Database Statistics ║").cyan());
println!("{}", style("╚══════════════════════════════════════════════════════════════╝").cyan());
println!();
println!(" {} {}",
style("Artifacts:").bold(),
style(format!("{}", artifacts)).cyan().bold()
);
println!(" {} {}",
style("Spans: ").bold(),
style(format!("{}", spans)).cyan().bold()
);
println!(" {} {}",
style("Tokens: ").bold(),
style(format!("{}", tokens)).cyan().bold()
);
println!();
println!(" {} {}",
style("Avg tokens/span: ").dim(),
style(format!("{}", avg_tokens_per_span)).yellow()
);
println!(" {} {}",
style("Avg spans/artifact:").dim(),
style(format!("{}", avg_spans_per_artifact)).yellow()
);
println!();
if spans > 0 {
println!("{}", style(" Token Distribution:").bold());
println!();
let all_spans = db.get_all_spans()?;
let mut buckets = vec![0; 5];
let bucket_size = 200;
for span in &all_spans {
let bucket_idx = (span.token_count / bucket_size).min(4);
buckets[bucket_idx] += 1;
}
let max_count = *buckets.iter().max().unwrap_or(&1);
for (i, &count) in buckets.iter().enumerate() {
let range = if i == 4 {
format!("{}+", i * bucket_size)
} else {
format!("{}-{}", i * bucket_size, (i + 1) * bucket_size)
};
let bar_length = if max_count > 0 {
(count as f32 / max_count as f32 * 40.0) as usize
} else {
0
};
let bar = "█".repeat(bar_length);
let percentage = if spans > 0 {
(count as f32 / spans as f32 * 100.0) as usize
} else {
0
};
println!(
" {:>8} tokens │{:<40}│ {} ({}%)",
style(range).dim(),
style(bar).cyan(),
style(format!("{:>4}", count)).yellow(),
style(format!("{:>2}", percentage)).dim()
);
}
println!();
}
if spans < 10 {
println!(" {} Small corpus - good for testing", style("ℹ").blue());
} else if spans < 1000 {
println!(" {} Optimal size for Phase 1", style("✓").green());
} else if spans < 10000 {
println!(" {} Large corpus - consider monitoring performance", style("⚠").yellow());
} else {
println!(" {} Very large corpus - Phase 2 HNSW recommended", style("⚠").yellow());
}
println!();
}
Commands::Clear { db_path, yes } => {
if !yes {
print!("Are you sure? This will delete all data. (y/N): ");
use std::io::{self, Write};
io::stdout().flush()?;
let mut input = String::new();
io::stdin().read_line(&mut input)?;
if !input.trim().eq_ignore_ascii_case("y") {
println!("Cancelled");
return Ok(());
}
}
let db = Database::new(&db_path)?;
db.clear()?;
println!("✓ Cleared all data");
}
Commands::Ask {
query,
llm,
budget,
max_tokens,
url,
json,
} => {
let mut script_paths = vec![
PathBuf::from("avocado-cli/scripts/ask.py"),
PathBuf::from("../avocado-cli/scripts/ask.py"),
];
if let Ok(exe_path) = std::env::current_exe() {
if let Some(exe_dir) = exe_path.parent() {
script_paths.push(exe_dir.join("scripts").join("ask.py"));
script_paths.push(exe_dir.parent().unwrap_or(exe_dir).join("avocado-cli").join("scripts").join("ask.py"));
}
}
let script = script_paths
.iter()
.find(|p| p.exists())
.ok_or_else(|| {
anyhow::anyhow!(
"Could not find ask.py script. Tried: {}. \
Please ensure the script exists in avocado-cli/scripts/ask.py",
script_paths.iter()
.map(|p| p.display().to_string())
.collect::<Vec<_>>()
.join(", ")
)
})?;
let mut cmd = Command::new("python3");
cmd.arg(&script);
cmd.arg(&query);
cmd.arg("--url").arg(&url);
cmd.arg("--budget").arg(budget.to_string());
cmd.arg("--llm").arg(&llm);
cmd.arg("--max-tokens").arg(max_tokens.to_string());
if json {
cmd.arg("--json");
}
let output = cmd.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
eprintln!("{}", stderr);
return Err(anyhow::anyhow!("Ask command failed"));
}
let stdout = String::from_utf8_lossy(&output.stdout);
print!("{}", stdout);
}
Commands::Benchmark { verbose } => {
commands::run_benchmark(verbose).await?;
}
Commands::Recommend {
corpus_size,
use_case,
} => {
commands::recommend_model(corpus_size, use_case.as_deref())?;
}
Commands::Serve {
gpu,
embed_url,
model,
dim,
url,
prewarm,
} => {
let mut server_cmd = Command::new("avocado-server");
server_cmd.env("RUST_LOG", "info");
if gpu {
let remote = embed_url.clone().unwrap_or_default();
if remote.is_empty() {
anyhow::bail!("--embed-url is required when using --gpu");
}
server_cmd
.env("AVOCADODB_EMBEDDING_PROVIDER", "remote")
.env("AVOCADODB_EMBEDDING_MODEL", &model)
.env("AVOCADODB_EMBEDDING_DIM", dim.to_string())
.env("AVOCADODB_EMBEDDING_URL", &remote)
.env("AVOCADODB_FORBID_FALLBACKS", "1");
} else {
server_cmd
.env("AVOCADODB_EMBEDDING_PROVIDER", "local")
.env("AVOCADODB_FORBID_FALLBACKS", "1");
}
let mut child = match server_cmd.spawn() {
Ok(c) => c,
Err(e) => {
let mut alt = Command::new(
std::env::current_exe()
.ok()
.and_then(|p| p.parent().map(|d| d.to_path_buf()))
.unwrap_or_else(|| PathBuf::from(".")) .join("avocado-server"),
);
alt.envs(server_cmd.get_envs().filter_map(|(k, v)| {
v.map(|vv| (k.to_os_string(), vv.to_os_string()))
}));
alt.env("RUST_LOG", "info");
alt.spawn().map_err(|_| e)?
}
};
let client = reqwest::blocking::Client::new();
let start = std::time::Instant::now();
loop {
if let Ok(Some(status)) = child.try_wait() {
anyhow::bail!("avocado-server exited prematurely with status {}", status);
}
match client.get(format!("{}/health", url.trim_end_matches('/'))).send() {
Ok(resp) if resp.status().is_success() => break,
_ => std::thread::sleep(std::time::Duration::from_millis(300)),
}
if start.elapsed() > std::time::Duration::from_secs(30) {
anyhow::bail!("timeout waiting for server health at {}/health", url);
}
}
if gpu && prewarm {
if let Some(remote) = embed_url {
let _ = client
.post(remote)
.json(&serde_json::json!({"inputs": ["warmup 1","warmup 2","warmup 3"]}))
.send();
}
}
println!(
"✓ Avocado server ready at {} ({})",
url,
if gpu { "remote embeddings (GPU)" } else { "local CPU embeddings" }
);
}
Commands::Session { command } => {
commands::handle_session_command(command).await?;
}
}
Ok(())
}
fn collect_files(dir: &PathBuf, recursive: bool) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
let skip_dirs: &[&str] = &[
".git", ".svn", ".hg",
"node_modules", ".node_modules",
"venv", ".venv", "env", ".env",
"__pycache__", ".pytest_cache",
"target", "build", "dist", "out",
".next", ".cache", ".tox",
"vendor", ".bundle",
".idea", ".vscode", ".vs",
".avocado", ];
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if skip_dirs.contains(&name) {
continue;
}
}
if path.is_file() {
files.push(path);
} else if path.is_dir() && recursive {
files.extend(collect_files(&path, recursive)?);
}
}
Ok(files)
}
fn print_visual_pipeline(ws: &avocado_core::WorkingSet, query: &str, budget: usize) {
let explain = ws.explain.as_ref();
let manifest = ws.manifest.as_ref();
println!("\n{}", style("╔══════════════════════════════════════════════════════════════════════════════╗").cyan());
println!("{}", style("║ 🥑 AvocadoDB Visual Pipeline Inspector ║").cyan());
println!("{}", style("╚══════════════════════════════════════════════════════════════════════════════╝").cyan());
println!("\n {} {}", style("Query:").bold(), style(query).yellow());
if let Some(m) = manifest {
println!(" {} {}", style("Context Hash:").bold(), style(&m.context_hash[..24]).dim());
}
println!();
let (sem_count, lex_count, fused_count, mmr_count, packed_count, final_count) = if let Some(e) = explain {
(
e.semantic_candidates.len(),
e.lexical_candidates.len(),
e.fused_candidates.len(),
e.mmr_candidates.len(),
e.packed_candidates.len(),
e.final_candidates.len(),
)
} else {
(0, 0, 0, 0, 0, ws.citations.len())
};
let timing = explain.map(|e| &e.timing);
println!(" {}", style("┌─────────────────────────────────────────────────────────────────────────────┐").dim());
println!(" {} {} {} {}",
style("│").dim(),
style("RETRIEVAL PIPELINE").bold().cyan(),
style("(deterministic)").dim(),
style("│").dim()
);
println!(" {}", style("└─────────────────────────────────────────────────────────────────────────────┘").dim());
println!();
let sem_time = timing.map(|t| t.semantic_search_ms).unwrap_or(0);
let lex_time = timing.map(|t| t.lexical_search_ms).unwrap_or(0);
let fusion_time = timing.map(|t| t.fusion_ms).unwrap_or(0);
let mmr_time = timing.map(|t| t.mmr_ms).unwrap_or(0);
let pack_time = timing.map(|t| t.packing_ms).unwrap_or(0);
println!(" ┌──────────────────┐ ┌──────────────────┐");
println!(" │ {} {} │ │ {} {} │",
style("SEMANTIC").cyan().bold(),
style("🔍").dim(),
style("LEXICAL").magenta().bold(),
style("📝").dim()
);
println!(" │ {} {} │ │ {} {} │",
style(format!("{:>4} candidates", sem_count)).white(),
style(format!("{}ms", sem_time)).dim(),
style(format!("{:>4} candidates", lex_count)).white(),
style(format!("{}ms", lex_time)).dim()
);
println!(" │ (HNSW top-50) │ │ (BM25 keyword) │");
println!(" └────────┬─────────┘ └─────────┬────────┘");
println!(" │ │");
println!(" └───────────────────┬────────────────────┘");
println!(" │");
println!(" ▼");
println!(" ┌──────────────────────┐");
println!(" │ {} {} │",
style("RRF FUSION").yellow().bold(),
style("⚡").dim()
);
println!(" │ {} {} │",
style(format!("{:>4} combined", fused_count)).white(),
style(format!("{}ms", fusion_time)).dim()
);
println!(" │ (reciprocal rank) │");
println!(" └──────────┬───────────┘");
println!(" │");
println!(" ▼");
println!(" ┌──────────────────────┐");
println!(" │ {} {} │",
style("MMR DIVERSITY").green().bold(),
style("🎯").dim()
);
println!(" │ {} {} │",
style(format!("{:>4} selected", mmr_count)).white(),
style(format!("{}ms", mmr_time)).dim()
);
println!(" │ (λ=0.5 balance) │");
println!(" └──────────┬───────────┘");
println!(" │");
println!(" ▼");
println!(" ┌──────────────────────┐");
println!(" │ {} {} │",
style("TOKEN PACK").blue().bold(),
style("📦").dim()
);
println!(" │ {} {} │",
style(format!("{:>4} packed", packed_count)).white(),
style(format!("{}ms", pack_time)).dim()
);
println!(" │ (budget: {}) │", budget);
println!(" └──────────┬───────────┘");
println!(" │");
println!(" ▼");
println!(" ┌──────────────────────┐");
println!(" │ {} {} │",
style("FINAL ORDER").bold().white().on_green(),
style("✓").green()
);
println!(" │ {} spans │",
style(format!("{:>4}", final_count)).cyan().bold()
);
println!(" │ (deterministic ✓) │");
println!(" └──────────────────────┘");
println!();
let total_time = timing.map(|t| t.total_ms).unwrap_or(ws.compilation_time_ms);
let utilization = (ws.tokens_used as f32 / budget as f32 * 100.0) as usize;
println!(" {}", style("─".repeat(78)).dim());
println!();
println!(" {} {} / {} tokens ({}% utilization)",
style("Budget:").bold(),
style(ws.tokens_used).cyan().bold(),
style(budget).dim(),
style(utilization).yellow()
);
println!(" {} {}ms total {}",
style("Time:").bold(),
style(total_time).cyan().bold(),
if total_time < 500 { style("(fast ✓)").green() } else { style("(consider caching)").yellow() }
);
if let Some(m) = manifest {
println!(" {} {} (same query → same results)",
style("Hash:").bold(),
style(&m.context_hash[..32]).dim()
);
}
println!();
if !ws.citations.is_empty() {
println!(" {} (top 5):", style("Citations").bold());
for (i, cite) in ws.citations.iter().take(5).enumerate() {
println!(" {}. {} {}",
style(i + 1).dim(),
style(&cite.artifact_path).cyan(),
style(format!(":{}–{}", cite.start_line, cite.end_line)).dim()
);
}
if ws.citations.len() > 5 {
println!(" {} more...", style(format!("... and {}", ws.citations.len() - 5)).dim());
}
}
println!();
}
fn print_visual_pipeline_json(ws: &serde_json::Value, query: &str, budget: usize) {
println!("\n{}", style("╔══════════════════════════════════════════════════════════════════════════════╗").cyan());
println!("{}", style("║ 🥑 AvocadoDB Visual Pipeline Inspector ║").cyan());
println!("{}", style("╚══════════════════════════════════════════════════════════════════════════════╝").cyan());
println!("\n {} {}", style("Query:").bold(), style(query).yellow());
if let Some(hash) = ws.get("manifest").and_then(|m| m.get("context_hash")).and_then(|h| h.as_str()) {
println!(" {} {}", style("Context Hash:").bold(), style(&hash[..hash.len().min(24)]).dim());
}
println!();
let explain = ws.get("explain");
let (sem_count, lex_count, fused_count, mmr_count, packed_count) = if let Some(e) = explain {
(
e.get("semantic_candidates").and_then(|a| a.as_array()).map(|a| a.len()).unwrap_or(0),
e.get("lexical_candidates").and_then(|a| a.as_array()).map(|a| a.len()).unwrap_or(0),
e.get("fused_candidates").and_then(|a| a.as_array()).map(|a| a.len()).unwrap_or(0),
e.get("mmr_selected").and_then(|a| a.as_array()).map(|a| a.len()).unwrap_or(0),
e.get("packed_spans").and_then(|a| a.as_array()).map(|a| a.len()).unwrap_or(0),
)
} else {
(0, 0, 0, 0, 0)
};
let final_count = ws.get("citations").and_then(|c| c.as_array()).map(|a| a.len()).unwrap_or(0);
let timing = explain.and_then(|e| e.get("timing"));
let sem_time = timing.and_then(|t| t.get("semantic_search_ms")).and_then(|v| v.as_u64()).unwrap_or(0);
let lex_time = timing.and_then(|t| t.get("lexical_search_ms")).and_then(|v| v.as_u64()).unwrap_or(0);
let fusion_time = timing.and_then(|t| t.get("fusion_ms")).and_then(|v| v.as_u64()).unwrap_or(0);
let mmr_time = timing.and_then(|t| t.get("mmr_ms")).and_then(|v| v.as_u64()).unwrap_or(0);
let pack_time = timing.and_then(|t| t.get("packing_ms")).and_then(|v| v.as_u64()).unwrap_or(0);
let total_time = timing.and_then(|t| t.get("total_ms")).and_then(|v| v.as_u64())
.or_else(|| ws.get("compilation_time_ms").and_then(|v| v.as_u64()))
.unwrap_or(0);
println!(" {}", style("┌─────────────────────────────────────────────────────────────────────────────┐").dim());
println!(" {} {} {} {}",
style("│").dim(),
style("RETRIEVAL PIPELINE").bold().cyan(),
style("(deterministic)").dim(),
style("│").dim()
);
println!(" {}", style("└─────────────────────────────────────────────────────────────────────────────┘").dim());
println!();
println!(" ┌──────────────────┐ ┌──────────────────┐");
println!(" │ {} {} │ │ {} {} │",
style("SEMANTIC").cyan().bold(),
style("🔍").dim(),
style("LEXICAL").magenta().bold(),
style("📝").dim()
);
println!(" │ {} {} │ │ {} {} │",
style(format!("{:>4} candidates", sem_count)).white(),
style(format!("{}ms", sem_time)).dim(),
style(format!("{:>4} candidates", lex_count)).white(),
style(format!("{}ms", lex_time)).dim()
);
println!(" │ (HNSW top-50) │ │ (BM25 keyword) │");
println!(" └────────┬─────────┘ └─────────┬────────┘");
println!(" │ │");
println!(" └───────────────────┬────────────────────┘");
println!(" │");
println!(" ▼");
println!(" ┌──────────────────────┐");
println!(" │ {} {} │",
style("RRF FUSION").yellow().bold(),
style("⚡").dim()
);
println!(" │ {} {} │",
style(format!("{:>4} combined", fused_count)).white(),
style(format!("{}ms", fusion_time)).dim()
);
println!(" │ (reciprocal rank) │");
println!(" └──────────┬───────────┘");
println!(" │");
println!(" ▼");
println!(" ┌──────────────────────┐");
println!(" │ {} {} │",
style("MMR DIVERSITY").green().bold(),
style("🎯").dim()
);
println!(" │ {} {} │",
style(format!("{:>4} selected", mmr_count)).white(),
style(format!("{}ms", mmr_time)).dim()
);
println!(" │ (λ=0.5 balance) │");
println!(" └──────────┬───────────┘");
println!(" │");
println!(" ▼");
println!(" ┌──────────────────────┐");
println!(" │ {} {} │",
style("TOKEN PACK").blue().bold(),
style("📦").dim()
);
println!(" │ {} {} │",
style(format!("{:>4} packed", packed_count)).white(),
style(format!("{}ms", pack_time)).dim()
);
println!(" │ (budget: {}) │", budget);
println!(" └──────────┬───────────┘");
println!(" │");
println!(" ▼");
println!(" ┌──────────────────────┐");
println!(" │ {} {} │",
style("FINAL ORDER").bold().white().on_green(),
style("✓").green()
);
println!(" │ {} spans │",
style(format!("{:>4}", final_count)).cyan().bold()
);
println!(" │ (deterministic ✓) │");
println!(" └──────────────────────┘");
println!();
let tokens_used = ws.get("tokens_used").and_then(|v| v.as_u64()).unwrap_or(0);
let utilization = if budget > 0 { (tokens_used as f32 / budget as f32 * 100.0) as usize } else { 0 };
println!(" {}", style("─".repeat(78)).dim());
println!();
println!(" {} {} / {} tokens ({}% utilization)",
style("Budget:").bold(),
style(tokens_used).cyan().bold(),
style(budget).dim(),
style(utilization).yellow()
);
println!(" {} {}ms total {}",
style("Time:").bold(),
style(total_time).cyan().bold(),
if total_time < 500 { style("(fast ✓)").green() } else { style("(consider caching)").yellow() }
);
if let Some(hash) = ws.get("manifest").and_then(|m| m.get("context_hash")).and_then(|h| h.as_str()) {
println!(" {} {} (same query → same results)",
style("Hash:").bold(),
style(&hash[..hash.len().min(32)]).dim()
);
}
println!();
if let Some(citations) = ws.get("citations").and_then(|c| c.as_array()) {
if !citations.is_empty() {
println!(" {} (top 5):", style("Citations").bold());
for (i, cite) in citations.iter().take(5).enumerate() {
let path = cite.get("artifact_path").and_then(|p| p.as_str()).unwrap_or("?");
let start = cite.get("start_line").and_then(|l| l.as_u64()).unwrap_or(0);
let end = cite.get("end_line").and_then(|l| l.as_u64()).unwrap_or(0);
println!(" {}. {} {}",
style(i + 1).dim(),
style(path).cyan(),
style(format!(":{}–{}", start, end)).dim()
);
}
if citations.len() > 5 {
println!(" {} more...", style(format!("... and {}", citations.len() - 5)).dim());
}
}
}
println!();
}