dci-tool 0.1.0

Direct Corpus Interaction: a sandboxed, ripgrep-backed corpus-search toolset and agent for cyber-focused LLM agents, built on rig.
Documentation
//! `dci-eval` — benchmark Direct Corpus Interaction retrieval quality.
//!
//! Scores a DCI retriever against BEIR-style qrels, optionally head-to-head with
//! an OpenAI-embeddings vector baseline over the same corpus. Also generates and
//! evaluates a synthetic "needle in a haystack" log corpus for the cyber use
//! case. Reports are printed as Markdown.

use std::path::PathBuf;
use std::process::ExitCode;

use anyhow::{Context, Result};
use clap::{Parser, Subcommand};
use dci_tool::eval::{self, Comparison, DciRetriever, EvalConfig, Retriever, VectorRetriever};
use dci_tool::{CorpusRoot, Limits};
use rig_core::client::{EmbeddingsClient, ProviderClient};
use rig_core::providers::openai;
use rig_retrieval_evals::Qrels;

/// Benchmark DCI retrieval against ground-truth qrels.
#[derive(Debug, Parser)]
#[command(name = "dci-eval", version, about)]
struct Cli {
    #[command(subcommand)]
    command: Command,

    /// Retrieval/metric cut-off depth k.
    #[arg(long, default_value_t = 10, global = true)]
    k: usize,

    /// Also evaluate an OpenAI-embeddings vector baseline (needs OPENAI_API_KEY).
    #[arg(long, global = true)]
    baseline_openai: bool,

    /// Do not honor `.gitignore` while searching (recommended for log corpora).
    #[arg(long, global = true)]
    no_gitignore: bool,
}

#[derive(Debug, Subcommand)]
enum Command {
    /// Evaluate a corpus against an existing qrels JSONL file.
    Run {
        /// Path to the corpus directory.
        #[arg(short, long)]
        corpus: PathBuf,
        /// Path to the BEIR-style qrels JSONL file.
        #[arg(short, long)]
        qrels: PathBuf,
    },
    /// Generate a synthetic log corpus and evaluate DCI on it.
    Synthetic {
        /// Directory to write the generated corpus into (created if absent).
        #[arg(short, long)]
        out: PathBuf,
        /// Number of log files.
        #[arg(long, default_value_t = 8)]
        files: usize,
        /// Lines per file.
        #[arg(long, default_value_t = 500)]
        lines: usize,
        /// Number of injected IOC needles (one gold query each).
        #[arg(long, default_value_t = 12)]
        needles: usize,
        /// Generator seed.
        #[arg(long, default_value_t = 1)]
        seed: usize,
    },
    /// Materialize a downloaded BEIR/BRIGHT dataset into a DCI corpus + qrels.
    ///
    /// Expects `<dataset>/corpus.jsonl`, `<dataset>/queries.jsonl`, and
    /// `<dataset>/qrels/<split>.tsv`. Writes `<out>/corpus/` and
    /// `<out>/qrels.jsonl`, then prints the `dci-eval run` command to use them.
    BeirPrep {
        /// Path to the downloaded dataset directory.
        #[arg(short, long)]
        dataset: PathBuf,
        /// Output directory for the materialized corpus + qrels.
        #[arg(short, long)]
        out: PathBuf,
        /// Qrels split to use (`qrels/<split>.tsv`).
        #[arg(long, default_value = "test")]
        split: String,
    },
}

#[tokio::main]
async fn main() -> ExitCode {
    tracing_subscriber::fmt()
        .with_env_filter(
            tracing_subscriber::EnvFilter::try_from_default_env()
                .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
        )
        .with_writer(std::io::stderr)
        .init();

    match run(Cli::parse()).await {
        Ok(markdown) => {
            println!("{markdown}");
            ExitCode::SUCCESS
        }
        Err(err) => {
            eprintln!("error: {err:#}");
            ExitCode::FAILURE
        }
    }
}

async fn run(cli: Cli) -> Result<String> {
    // BEIR/BRIGHT preparation does not evaluate; it materializes and exits.
    if let Command::BeirPrep {
        dataset,
        out,
        split,
    } = &cli.command
    {
        std::fs::create_dir_all(out)
            .with_context(|| format!("creating output dir {}", out.display()))?;
        let prepared = eval::beir::prepare(dataset, out, split)
            .map_err(|e| anyhow::anyhow!("preparing BEIR dataset: {e}"))?;
        return Ok(format!(
            "Prepared {} documents and {} queries.\n\nCorpus: {}\nQrels:  {}\n\nRun it:\n  \
             dci-eval --k 10 run --corpus {} --qrels {}",
            prepared.documents,
            prepared.queries,
            prepared.corpus_dir.display(),
            prepared.qrels_path.display(),
            prepared.corpus_dir.display(),
            prepared.qrels_path.display(),
        ));
    }

    let cfg = EvalConfig {
        k: cli.k,
        dataset_id: match &cli.command {
            Command::Run { corpus, .. } => format!("corpus:{}", corpus.display()),
            Command::Synthetic { .. } => "synthetic-logs".to_string(),
            // BeirPrep returns early above; this branch is never evaluated.
            Command::BeirPrep { .. } => String::new(),
        },
        ..EvalConfig::default()
    };
    let limits = Limits {
        respect_gitignore: !cli.no_gitignore,
        ..Limits::default()
    };

    let (corpus, qrels) = match cli.command {
        Command::Run { corpus, qrels } => {
            let root = CorpusRoot::with_limits(&corpus, limits)
                .with_context(|| format!("opening corpus at {}", corpus.display()))?;
            let qrels = Qrels::load_jsonl(&qrels)
                .with_context(|| format!("loading qrels from {}", qrels.display()))?;
            (root, qrels)
        }
        Command::Synthetic {
            out,
            files,
            lines,
            needles,
            seed,
        } => {
            std::fs::create_dir_all(&out)
                .with_context(|| format!("creating output dir {}", out.display()))?;
            let gen_cfg = eval::synth::SyntheticLogConfig {
                files,
                lines_per_file: lines,
                needles,
                seed: seed as u64,
            };
            let qrels = eval::synth::generate(&out, &gen_cfg)
                .map_err(|e| anyhow::anyhow!("generating synthetic corpus: {e}"))?;
            let root = CorpusRoot::with_limits(&out, limits)
                .with_context(|| format!("opening generated corpus at {}", out.display()))?;
            (root, qrels)
        }
        Command::BeirPrep { .. } => {
            return Err(anyhow::anyhow!("beir-prep is handled before evaluation"));
        }
    };

    let dci = DciRetriever::new(corpus.clone());

    if cli.baseline_openai {
        let client = openai::Client::from_env().context("initializing OpenAI client")?;
        let model = client.embedding_model(openai::embedding::TEXT_EMBEDDING_3_SMALL);
        let baseline = VectorRetriever::build(&corpus, model)
            .await
            .map_err(|e| anyhow::anyhow!("building vector baseline: {e}"))?;
        let comparison = Comparison::run(&dci, &baseline, &qrels, &cfg)
            .await
            .map_err(|e| anyhow::anyhow!("running comparison: {e}"))?;
        Ok(comparison.to_markdown())
    } else {
        let report = eval::evaluate(&dci, &qrels, &cfg)
            .await
            .map_err(|e| anyhow::anyhow!("evaluating: {e}"))?;
        Ok(format!(
            "# DCI retrieval — {} ({} queries)\n\n{}",
            dci.name(),
            qrels.queries.len(),
            report.to_markdown()
        ))
    }
}