dirpack 0.2.0

Budgeted directory indexes for AI coding agents with tree-sitter signatures
Documentation
//! Evaluation harness for allocation quality metrics.

use std::collections::{BTreeSet, HashMap};
use std::path::Path;
use std::time::Instant;

use serde::Serialize;

use crate::budget::BudgetTarget;
use crate::config::Config;
use crate::packer;
use crate::scanner;
use crate::scanner::entry::FileEntry;
use crate::tokenizer;

#[derive(Debug, Serialize)]
pub struct EvalReport {
    pub repo: String,
    pub budgets: Vec<EvalMetrics>,
}

#[derive(Debug, Serialize)]
pub struct EvalMetrics {
    pub target_tokens: usize,
    pub actual_tokens: usize,
    pub elapsed_ms: u64,
    pub tokens_per_sec: f64,
    pub overshoot_ratio: f64,
    pub tree_tokens: usize,
    pub tree_ratio: f64,
    pub entry_points_expected: Vec<String>,
    pub entry_points_missing: Vec<String>,
    pub entry_point_coverage: f64,
    pub coverage_spread: Option<f64>,
    pub lopsidedness: Option<f64>,
    pub signature_files: usize,
    pub path_diversity: usize,
}

pub fn evaluate(path: &Path, budgets: &[usize]) -> EvalReport {
    let repo = path.to_string_lossy().to_string();
    let config = Config::default();
    let entries = scanner::scan(path, &config, true);
    let entry_points = find_entry_points(&entries);

    let mut results = Vec::new();
    for target in budgets {
        let start = Instant::now();
        let result = packer::pack(path, &config, BudgetTarget::Tokens(*target), true, true, None);
        let elapsed = start.elapsed();
        let metrics = compute_metrics(&result.output, *target, &entry_points, elapsed);
        results.push(metrics);
    }

    EvalReport {
        repo,
        budgets: results,
    }
}

fn compute_metrics(
    output: &str,
    target: usize,
    entry_points: &[String],
    elapsed: std::time::Duration,
) -> EvalMetrics {
    let actual_tokens = tokenizer::count_tokens(output);
    let elapsed_ms = elapsed.as_millis() as u64;
    let seconds = elapsed.as_secs_f64();
    let tokens_per_sec = if seconds > 0.0 {
        actual_tokens as f64 / seconds
    } else {
        0.0
    };
    let overshoot_ratio = if actual_tokens > target {
        (actual_tokens - target) as f64 / target as f64
    } else {
        0.0
    };

    let parsed = parse_pipe(output);
    let tree_tokens = tokenizer::count_tokens(&parsed.tree_segments.join("|"));
    let tree_ratio = tree_tokens as f64 / target as f64;

    let (entry_points_expected, entry_points_missing, entry_point_coverage) =
        entry_point_coverage(output, entry_points);

    let (coverage_spread, lopsidedness, signature_files, path_diversity) =
        detail_distribution(&parsed.tree_segments, &parsed.signature_segments);

    EvalMetrics {
        target_tokens: target,
        actual_tokens,
        elapsed_ms,
        tokens_per_sec,
        overshoot_ratio,
        tree_tokens,
        tree_ratio,
        entry_points_expected,
        entry_points_missing,
        entry_point_coverage,
        coverage_spread,
        lopsidedness,
        signature_files,
        path_diversity,
    }
}

struct ParsedPipe {
    tree_segments: Vec<String>,
    signature_segments: Vec<String>,
}

fn parse_pipe(output: &str) -> ParsedPipe {
    let mut tree_segments = Vec::new();
    let mut signature_segments = Vec::new();

    for part in output.split('|') {
        if part.starts_with('[')
            || part.starts_with("root:")
            || part.starts_with("IMPORTANT:")
        {
            continue;
        }

        if part.contains(":{") {
            tree_segments.push(part.to_string());
        } else if part.contains(':') {
            signature_segments.push(part.to_string());
        }
    }

    ParsedPipe {
        tree_segments,
        signature_segments,
    }
}

fn entry_point_coverage(output: &str, entry_points: &[String]) -> (Vec<String>, Vec<String>, f64) {
    let expected = entry_points.to_vec();
    if expected.is_empty() {
        return (expected, Vec::new(), 1.0);
    }

    let mut missing = Vec::new();
    for entry in &expected {
        if !output.contains(entry) {
            missing.push(entry.clone());
        }
    }

    let coverage = (expected.len() - missing.len()) as f64 / expected.len() as f64;

    (expected, missing, coverage)
}

fn detail_distribution(
    tree_segments: &[String],
    signature_segments: &[String],
) -> (Option<f64>, Option<f64>, usize, usize) {
    let top_dirs = extract_top_dirs(tree_segments);

    let mut detail_counts: HashMap<String, usize> = HashMap::new();
    let mut signature_files = 0;
    let mut prefixes: BTreeSet<String> = BTreeSet::new();

    for seg in signature_segments {
        let path = seg.splitn(2, ':').next().unwrap_or("");
        if path.is_empty() {
            continue;
        }
        signature_files += 1;
        let top = top_dir(path);
        *detail_counts.entry(top).or_insert(0) += 1;

        let prefix = path_prefix(path, 2);
        prefixes.insert(prefix);
    }

    let coverage_spread = if top_dirs.is_empty() {
        None
    } else {
        let covered = top_dirs
            .iter()
            .filter(|d| detail_counts.contains_key(*d))
            .count();
        Some(covered as f64 / top_dirs.len() as f64)
    };

    let lopsidedness = if detail_counts.is_empty() {
        None
    } else {
        let counts: Vec<usize> = detail_counts.values().copied().collect();
        let mean = counts.iter().sum::<usize>() as f64 / counts.len() as f64;
        if mean == 0.0 {
            None
        } else {
            Some(*counts.iter().max().unwrap_or(&0) as f64 / mean)
        }
    };

    (coverage_spread, lopsidedness, signature_files, prefixes.len())
}

fn extract_top_dirs(tree_segments: &[String]) -> BTreeSet<String> {
    let mut dirs = BTreeSet::new();

    for seg in tree_segments {
        if let Some((label, rest)) = seg.split_once(":{") {
            if label == "dirs" {
                let items = rest.trim_end_matches('}');
                for item in items.split(',').filter(|s| !s.is_empty()) {
                    dirs.insert(item.to_string());
                }
            } else {
                let top = top_dir(label);
                if !top.is_empty() {
                    dirs.insert(top);
                }
            }
        }
    }

    dirs
}

fn top_dir(path: &str) -> String {
    path.split('/').next().unwrap_or(path).to_string()
}

fn path_prefix(path: &str, depth: usize) -> String {
    let mut parts = path.split('/');
    let mut prefix = Vec::new();
    for _ in 0..depth {
        if let Some(part) = parts.next() {
            prefix.push(part);
        } else {
            break;
        }
    }
    if prefix.is_empty() {
        path.to_string()
    } else {
        prefix.join("/")
    }
}

fn find_entry_points(entries: &[FileEntry]) -> Vec<String> {
    let candidates = [
        "Cargo.toml",
        "pyproject.toml",
        "package.json",
        "main.rs",
        "lib.rs",
        "index.ts",
        "index.tsx",
        "main.py",
        "app.py",
        "__init__.py",
    ];

    let mut found = BTreeSet::new();
    for entry in entries {
        if entry.is_dir {
            continue;
        }
        let name = entry.file_name();
        if candidates.contains(&name) {
            found.insert(name.to_string());
        }
    }

    found.into_iter().collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_top_dir() {
        assert_eq!(top_dir("src/main.rs"), "src");
        assert_eq!(top_dir("Cargo.toml"), "Cargo.toml");
    }

    #[test]
    fn test_path_prefix() {
        assert_eq!(path_prefix("src/format/mod.rs", 2), "src/format");
        assert_eq!(path_prefix("main.rs", 2), "main.rs");
    }
}