car-search 0.25.0

External code discovery + indexing for Common Agent Runtime
//! Integration tests for `ReferenceMiner` implementations.
//!
//! The `LocalCloneSource` tests run offline using a tempdir fixture.
//! The `GitHubSource` test is gated on `GITHUB_TOKEN` — skipped if absent
//! so CI without credentials still passes.

use car_search::reference_miner::{
    GitHubSource, LocalCloneSource, MiningFilters, MiningQuery, MiningScope, MultiSource,
    ReferenceMiner,
};
use std::fs;
use std::path::PathBuf;
use std::sync::Arc;

fn write(path: &std::path::Path, contents: &str) {
    if let Some(parent) = path.parent() {
        fs::create_dir_all(parent).expect("create parent dirs");
    }
    fs::write(path, contents).expect("write fixture file");
}

/// Build a tempdir with two fake repos, each with a LICENSE and a source file.
fn build_fixture() -> tempfile::TempDir {
    let td = tempfile::tempdir().expect("tempdir");
    let root = td.path();

    // Repo A: MIT-licensed, mentions "rate limiter token bucket".
    let a = root.join("repo_a");
    write(
        &a.join("LICENSE"),
        "MIT License\n\nPermission is hereby granted...\n",
    );
    write(
        &a.join("src/limiter.rs"),
        "//! A token bucket rate limiter.\n\
         pub struct RateLimiter { tokens: f64 }\n\
         impl RateLimiter {\n    pub fn acquire(&mut self) -> bool { self.tokens >= 1.0 }\n}\n",
    );

    // Repo B: Apache-2.0, unrelated code.
    let b = root.join("repo_b");
    write(
        &b.join("LICENSE"),
        "                                 Apache License\n                           Version 2.0, January 2004\n",
    );
    write(
        &b.join("src/unrelated.rs"),
        "pub fn hello() -> &'static str { \"hi\" }\n",
    );

    // Repo C: GPL-3.0, mentions the query terms (for license filtering tests).
    let c = root.join("repo_c");
    write(
        &c.join("COPYING"),
        "GNU General Public License\nVersion 3, 29 June 2007\n",
    );
    write(
        &c.join("src/also_limiter.rs"),
        "// another token bucket rate limiter impl\npub fn acquire_token() {}\n",
    );

    td
}

fn roots_from(td: &tempfile::TempDir) -> Vec<PathBuf> {
    vec![td.path().to_path_buf()]
}

#[tokio::test]
async fn local_source_returns_ranked_results() {
    let td = build_fixture();
    let src = LocalCloneSource::new(roots_from(&td));
    let q = MiningQuery {
        query: "token bucket rate limiter".into(),
        scope: MiningScope::Local,
        filters: MiningFilters {
            max_results: 10,
            ..Default::default()
        },
    };
    let hits = src.search(&q).await.expect("search ok");
    assert!(
        hits.len() >= 2,
        "expected at least two hits, got {}: {:?}",
        hits.len(),
        hits.iter().map(|h| &h.path).collect::<Vec<_>>()
    );

    // Sorted descending by score.
    for w in hits.windows(2) {
        assert!(
            w[0].score >= w[1].score,
            "not sorted descending: {:?}",
            hits
        );
    }

    // Best match should be one of the limiter files, not unrelated.rs.
    let top_path = &hits[0].path;
    assert!(
        top_path.contains("limiter"),
        "top hit was {top_path:?}, expected limiter.rs"
    );
    assert!(hits[0].score > 0.0);
}

#[tokio::test]
async fn local_source_filters_by_license_allowlist() {
    let td = build_fixture();
    let src = LocalCloneSource::new(roots_from(&td));
    let q = MiningQuery {
        query: "token bucket rate limiter".into(),
        scope: MiningScope::Local,
        filters: MiningFilters {
            max_results: 10,
            license_allowlist: vec!["MIT".into(), "Apache-2.0".into()],
            ..Default::default()
        },
    };
    let hits = src.search(&q).await.expect("search ok");
    assert!(!hits.is_empty());
    for h in &hits {
        let lic = h
            .license
            .as_deref()
            .expect("license_allowlist must drop unknown-license hits");
        assert!(
            lic == "MIT" || lic == "Apache-2.0",
            "unexpected license {lic} in filtered result"
        );
    }
    // The GPL-3.0 repo should be excluded entirely.
    assert!(
        hits.iter().all(|h| !h.repo.contains("repo_c")),
        "GPL-3.0 repo leaked through MIT/Apache allowlist: {:?}",
        hits.iter().map(|h| &h.repo).collect::<Vec<_>>()
    );
}

#[tokio::test]
async fn local_source_filters_by_language() {
    let td = build_fixture();
    // Add a .py file that would match on query but should be filtered out.
    write(
        &td.path().join("repo_a/scripts/limiter_script.py"),
        "# token bucket rate limiter script\nprint('hi')\n",
    );
    let src = LocalCloneSource::new(roots_from(&td));
    let q = MiningQuery {
        query: "token bucket rate limiter".into(),
        scope: MiningScope::Local,
        filters: MiningFilters {
            max_results: 10,
            languages: vec!["rust".into()],
            ..Default::default()
        },
    };
    let hits = src.search(&q).await.expect("search ok");
    assert!(!hits.is_empty());
    for h in &hits {
        assert!(
            h.path.ends_with(".rs"),
            "language=rust filter let through {}",
            h.path
        );
    }
}

#[tokio::test]
async fn local_source_rejects_empty_query() {
    let td = build_fixture();
    let src = LocalCloneSource::new(roots_from(&td));
    let q = MiningQuery {
        query: "   ".into(),
        scope: MiningScope::Local,
        filters: MiningFilters::default(),
    };
    let res = src.search(&q).await;
    assert!(matches!(
        res,
        Err(car_search::reference_miner::MiningError::InvalidQuery(_))
    ));
}

#[tokio::test]
async fn multi_source_composes_local_miners() {
    let td = build_fixture();
    let a = Arc::new(LocalCloneSource::new(roots_from(&td))) as Arc<dyn ReferenceMiner>;
    // Second source pointing at the same fixture to exercise the dedupe path.
    let b = Arc::new(LocalCloneSource::new(roots_from(&td))) as Arc<dyn ReferenceMiner>;
    let multi = MultiSource::new(vec![a, b]);
    let q = MiningQuery {
        query: "token bucket rate limiter".into(),
        scope: MiningScope::All,
        filters: MiningFilters {
            max_results: 10,
            ..Default::default()
        },
    };
    let hits = multi.search(&q).await.expect("search ok");
    // Must dedupe — two identical sources should not return duplicate (repo, path, commit).
    let mut keys: Vec<_> = hits
        .iter()
        .map(|h| (h.repo.clone(), h.path.clone(), h.commit.clone()))
        .collect();
    keys.sort();
    let before = keys.len();
    keys.dedup();
    assert_eq!(before, keys.len(), "multi source failed to dedupe");
    assert!(!hits.is_empty());
}

#[tokio::test]
async fn github_source_requires_token() {
    // This test runs unconditionally: if the env var is absent we expect
    // a clean Unavailable error, not a panic.
    // Use remove_var safely — only for this process.
    let saved = std::env::var("GITHUB_TOKEN").ok();
    std::env::remove_var("GITHUB_TOKEN");
    let res = GitHubSource::from_env();
    if let Some(v) = saved {
        std::env::set_var("GITHUB_TOKEN", v);
    }
    assert!(matches!(
        res,
        Err(car_search::reference_miner::MiningError::Unavailable(_))
    ));
}

#[tokio::test]
async fn github_source_live_search() {
    // Gated: require GITHUB_TOKEN to exercise the real API. Skip otherwise.
    let Ok(_) = std::env::var("GITHUB_TOKEN") else {
        eprintln!("skipping github_source_live_search: GITHUB_TOKEN not set");
        return;
    };
    let src = GitHubSource::from_env().expect("from_env with token");
    let q = MiningQuery {
        query: "token bucket rate limiter".into(),
        scope: MiningScope::Remote,
        filters: MiningFilters {
            max_results: 3,
            languages: vec!["rust".into()],
            ..Default::default()
        },
    };
    let hits = src.search(&q).await.expect("github search ok");
    assert!(!hits.is_empty(), "expected at least one github hit");
    for h in &hits {
        assert!(h.repo.starts_with("github.com/"));
        assert!(
            h.path.ends_with(".rs"),
            "language filter failed: {}",
            h.path
        );
        assert!(h.score > 0.0);
    }
}