dci-tool 0.1.0

Direct Corpus Interaction: a sandboxed, ripgrep-backed corpus-search toolset and agent for cyber-focused LLM agents, built on rig.
Documentation
//! Integration tests for the corpus engine and tools against a fixture corpus.
#![allow(
    clippy::unwrap_used,
    clippy::expect_used,
    clippy::indexing_slicing,
    clippy::panic
)]

use std::path::PathBuf;
use std::time::Duration;

use dci_tool::engine::{self, FindQuery, SearchQuery};
use dci_tool::sandbox::{CorpusRoot, Limits};

fn fixtures() -> PathBuf {
    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures")
}

fn corpus() -> CorpusRoot {
    CorpusRoot::new(fixtures()).expect("fixture corpus")
}

fn corpus_with(limits: Limits) -> CorpusRoot {
    CorpusRoot::with_limits(fixtures(), limits).expect("fixture corpus")
}

#[test]
fn search_finds_matches_across_files() {
    let result = engine::search(
        &corpus(),
        &SearchQuery {
            pattern: "203\\.0\\.113\\.7".to_string(),
            path_glob: None,
            case_insensitive: false,
            context_lines: 0,
            max_results: None,
        },
    )
    .expect("search");

    assert!(!result.hits.is_empty());
    assert!(result.hits.iter().all(|h| h.is_match));
    // The brute-force IP appears in the auth log and the notes.
    assert!(result.hits.iter().any(|h| h.path.ends_with("auth.log")));
    assert!(result.hits.iter().any(|h| h.path.ends_with("notes.md")));
}

#[test]
fn search_respects_path_glob() {
    let result = engine::search(
        &corpus(),
        &SearchQuery {
            pattern: "10\\.0\\.0\\.5".to_string(),
            path_glob: Some("**/*.log".to_string()),
            case_insensitive: false,
            context_lines: 0,
            max_results: None,
        },
    )
    .expect("search");

    assert!(!result.hits.is_empty());
    assert!(result.hits.iter().all(|h| h.path.ends_with(".log")));
}

#[test]
fn search_captures_context_lines() {
    let result = engine::search(
        &corpus(),
        &SearchQuery {
            pattern: "Accepted password".to_string(),
            path_glob: Some("**/*.log".to_string()),
            case_insensitive: false,
            context_lines: 1,
            max_results: None,
        },
    )
    .expect("search");

    // At least one matched line plus surrounding context.
    assert!(result.hits.iter().any(|h| h.is_match));
    assert!(result.hits.iter().any(|h| !h.is_match));
}

#[test]
fn search_truncates_at_max_results() {
    let result = engine::search(
        &corpus(),
        &SearchQuery {
            pattern: "ssh2".to_string(),
            path_glob: None,
            case_insensitive: false,
            context_lines: 0,
            max_results: Some(2),
        },
    )
    .expect("search");

    assert_eq!(result.hits.len(), 2);
    assert!(result.truncated);
}

#[test]
fn parallel_search_is_deterministic_across_runs() {
    let query = SearchQuery {
        pattern: r"\d".to_string(),
        path_glob: None,
        case_insensitive: false,
        context_lines: 0,
        max_results: Some(5),
    };
    // The parallel walk must yield identical ordered results every run,
    // regardless of thread scheduling.
    let first = engine::search(&corpus(), &query).expect("search");
    for _ in 0..8 {
        let again = engine::search(&corpus(), &query).expect("search");
        assert_eq!(
            first.hits, again.hits,
            "search results must be deterministic"
        );
    }
}

#[test]
fn find_by_extension_glob() {
    let result = engine::find(
        &corpus(),
        &FindQuery {
            glob: "*.py".to_string(),
            max_results: None,
        },
    )
    .expect("find");

    assert!(result.paths.iter().any(|p| p.ends_with("app.py")));
    assert!(result.paths.iter().all(|p| p.ends_with(".py")));
}

#[test]
fn find_truncation_selects_deterministically() {
    // Match every file, but cap below the total so the parallel walk must
    // truncate. The surviving subset must be the lexicographically smallest
    // paths and identical on every run, never thread-scheduling dependent.
    let query = FindQuery {
        glob: "**/*".to_string(),
        max_results: Some(2),
    };

    let first = engine::find(&corpus(), &query).expect("find");
    assert_eq!(first.paths.len(), 2, "cap must bound the result");
    assert!(first.truncated, "more matches than cap implies truncation");
    // Cap is smaller than the full match set, so order then truncate must
    // yield the two smallest paths in sorted order.
    let mut sorted = first.paths.clone();
    sorted.sort();
    assert_eq!(first.paths, sorted, "results must be sorted");

    for _ in 0..8 {
        let again = engine::find(&corpus(), &query).expect("find");
        assert_eq!(
            first.paths, again.paths,
            "truncated find selection must be deterministic"
        );
    }
}

#[test]
fn exhausted_time_budget_stops_walk_and_flags_truncation() {
    // A zero wall-clock budget means the cooperative deadline is already spent
    // before the first file is visited: the walk must stop and report the
    // result as truncated rather than running to completion.
    let corpus = corpus_with(Limits {
        timeout: Duration::ZERO,
        ..Limits::default()
    });

    let search = engine::search(
        &corpus,
        &SearchQuery {
            pattern: r"\d".to_string(),
            path_glob: None,
            case_insensitive: false,
            context_lines: 0,
            max_results: None,
        },
    )
    .expect("search");
    assert!(
        search.truncated,
        "an exhausted time budget must flag the search as truncated"
    );

    let find = engine::find(
        &corpus,
        &FindQuery {
            glob: "**/*".to_string(),
            max_results: None,
        },
    )
    .expect("find");
    assert!(
        find.truncated,
        "an exhausted time budget must flag the find as truncated"
    );
}

#[test]
fn read_range_returns_numbered_window() {
    let result = engine::read_range(&corpus(), "src/app.py", Some(1), Some(3)).expect("read");

    assert_eq!(result.lines.len(), 3);
    assert_eq!(result.lines[0].line, 1);
    assert_eq!(result.lines[0].text, "import os");
    assert!(result.more_below);
}

#[test]
fn read_range_rejects_path_escape() {
    let err = engine::read_range(&corpus(), "../Cargo.toml", None, None).unwrap_err();
    assert!(matches!(
        err,
        dci_tool::DciError::PathEscape { .. } | dci_tool::DciError::NotFound { .. }
    ));
}

#[test]
fn list_dir_root_lists_entries() {
    let result = engine::list_dir(&corpus(), None).expect("list");
    let names: Vec<&str> = result.entries.iter().map(|e| e.name.as_str()).collect();
    assert!(names.contains(&"logs"));
    assert!(names.contains(&"src"));
    assert!(names.contains(&"notes.md"));
    // Directories sort before files.
    let first_file = result.entries.iter().position(|e| e.kind == "file");
    let last_dir = result.entries.iter().rposition(|e| e.kind == "dir");
    if let (Some(f), Some(d)) = (first_file, last_dir) {
        assert!(d < f);
    }
}

#[test]
fn gitignore_is_honored_when_enabled() {
    let respected = engine::search(
        &corpus_with(Limits {
            respect_gitignore: true,
            ..Limits::default()
        }),
        &SearchQuery {
            pattern: "ignored_token".to_string(),
            path_glob: None,
            case_insensitive: false,
            context_lines: 0,
            max_results: None,
        },
    )
    .expect("search");
    assert!(
        respected.hits.is_empty(),
        "ignored/ should be excluded when gitignore is respected"
    );

    let unrestricted = engine::search(
        &corpus_with(Limits {
            respect_gitignore: false,
            ..Limits::default()
        }),
        &SearchQuery {
            pattern: "ignored_token".to_string(),
            path_glob: None,
            case_insensitive: false,
            context_lines: 0,
            max_results: None,
        },
    )
    .expect("search");
    assert!(
        unrestricted.hits.iter().any(|h| h.path.contains("ignored")),
        "ignored/ should be searched when gitignore is disabled"
    );
}