use std::path::PathBuf;
use std::process::ExitCode;
use anyhow::{Context, Result};
use clap::{Parser, Subcommand};
use dci_tool::eval::{self, Comparison, DciRetriever, EvalConfig, Retriever, VectorRetriever};
use dci_tool::{CorpusRoot, Limits};
use rig_core::client::{EmbeddingsClient, ProviderClient};
use rig_core::providers::openai;
use rig_retrieval_evals::Qrels;
#[derive(Debug, Parser)]
#[command(name = "dci-eval", version, about)]
struct Cli {
#[command(subcommand)]
command: Command,
#[arg(long, default_value_t = 10, global = true)]
k: usize,
#[arg(long, global = true)]
baseline_openai: bool,
#[arg(long, global = true)]
no_gitignore: bool,
}
#[derive(Debug, Subcommand)]
enum Command {
Run {
#[arg(short, long)]
corpus: PathBuf,
#[arg(short, long)]
qrels: PathBuf,
},
Synthetic {
#[arg(short, long)]
out: PathBuf,
#[arg(long, default_value_t = 8)]
files: usize,
#[arg(long, default_value_t = 500)]
lines: usize,
#[arg(long, default_value_t = 12)]
needles: usize,
#[arg(long, default_value_t = 1)]
seed: usize,
},
BeirPrep {
#[arg(short, long)]
dataset: PathBuf,
#[arg(short, long)]
out: PathBuf,
#[arg(long, default_value = "test")]
split: String,
},
}
#[tokio::main]
async fn main() -> ExitCode {
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
)
.with_writer(std::io::stderr)
.init();
match run(Cli::parse()).await {
Ok(markdown) => {
println!("{markdown}");
ExitCode::SUCCESS
}
Err(err) => {
eprintln!("error: {err:#}");
ExitCode::FAILURE
}
}
}
async fn run(cli: Cli) -> Result<String> {
if let Command::BeirPrep {
dataset,
out,
split,
} = &cli.command
{
std::fs::create_dir_all(out)
.with_context(|| format!("creating output dir {}", out.display()))?;
let prepared = eval::beir::prepare(dataset, out, split)
.map_err(|e| anyhow::anyhow!("preparing BEIR dataset: {e}"))?;
return Ok(format!(
"Prepared {} documents and {} queries.\n\nCorpus: {}\nQrels: {}\n\nRun it:\n \
dci-eval --k 10 run --corpus {} --qrels {}",
prepared.documents,
prepared.queries,
prepared.corpus_dir.display(),
prepared.qrels_path.display(),
prepared.corpus_dir.display(),
prepared.qrels_path.display(),
));
}
let cfg = EvalConfig {
k: cli.k,
dataset_id: match &cli.command {
Command::Run { corpus, .. } => format!("corpus:{}", corpus.display()),
Command::Synthetic { .. } => "synthetic-logs".to_string(),
Command::BeirPrep { .. } => String::new(),
},
..EvalConfig::default()
};
let limits = Limits {
respect_gitignore: !cli.no_gitignore,
..Limits::default()
};
let (corpus, qrels) = match cli.command {
Command::Run { corpus, qrels } => {
let root = CorpusRoot::with_limits(&corpus, limits)
.with_context(|| format!("opening corpus at {}", corpus.display()))?;
let qrels = Qrels::load_jsonl(&qrels)
.with_context(|| format!("loading qrels from {}", qrels.display()))?;
(root, qrels)
}
Command::Synthetic {
out,
files,
lines,
needles,
seed,
} => {
std::fs::create_dir_all(&out)
.with_context(|| format!("creating output dir {}", out.display()))?;
let gen_cfg = eval::synth::SyntheticLogConfig {
files,
lines_per_file: lines,
needles,
seed: seed as u64,
};
let qrels = eval::synth::generate(&out, &gen_cfg)
.map_err(|e| anyhow::anyhow!("generating synthetic corpus: {e}"))?;
let root = CorpusRoot::with_limits(&out, limits)
.with_context(|| format!("opening generated corpus at {}", out.display()))?;
(root, qrels)
}
Command::BeirPrep { .. } => {
return Err(anyhow::anyhow!("beir-prep is handled before evaluation"));
}
};
let dci = DciRetriever::new(corpus.clone());
if cli.baseline_openai {
let client = openai::Client::from_env().context("initializing OpenAI client")?;
let model = client.embedding_model(openai::embedding::TEXT_EMBEDDING_3_SMALL);
let baseline = VectorRetriever::build(&corpus, model)
.await
.map_err(|e| anyhow::anyhow!("building vector baseline: {e}"))?;
let comparison = Comparison::run(&dci, &baseline, &qrels, &cfg)
.await
.map_err(|e| anyhow::anyhow!("running comparison: {e}"))?;
Ok(comparison.to_markdown())
} else {
let report = eval::evaluate(&dci, &qrels, &cfg)
.await
.map_err(|e| anyhow::anyhow!("evaluating: {e}"))?;
Ok(format!(
"# DCI retrieval — {} ({} queries)\n\n{}",
dci.name(),
qrels.queries.len(),
report.to_markdown()
))
}
}