use std::time::Instant;
use anyhow::{Context, Result};
use clap::Args;
use time::OffsetDateTime;
use crate::api::deps_dev::Client as DepsDevClient;
use crate::api::github::Client as GhClient;
use crate::api::github::GithubError;
use crate::api::osv::Client as OsvClient;
use crate::api::scorecard::Client as ScorecardClient;
use crate::config;
use crate::models::{
Category, ModuleResult, ModuleWeights, RepositoryContext, RepositorySummary, TrustReport,
};
use crate::reports::json_report;
use crate::scoring::{aggregate, overall_confidence};
use crate::storage::Cache;
use crate::utils::{ratelimit::RateLimiter, repo_url};
#[derive(Debug, Args)]
pub struct ScanArgs {
pub repo: String,
#[arg(long, value_enum, default_value_t = Mode::Standard)]
pub mode: Mode,
#[arg(long, value_delimiter = ',')]
pub modules: Option<Vec<String>>,
#[arg(long, value_delimiter = ',')]
pub skip_modules: Option<Vec<String>>,
#[arg(long, default_value = "./repo-trust-reports")]
pub output: std::path::PathBuf,
#[arg(long, value_delimiter = ',', value_enum)]
pub format: Vec<Format>,
#[arg(long)]
pub weights: Option<std::path::PathBuf>,
#[arg(long)]
pub scoring_version: Option<String>,
#[arg(long, env = "GITHUB_TOKEN", hide_env_values = true)]
pub token: Option<String>,
#[arg(long)]
pub seed: Option<u64>,
#[arg(long)]
pub refresh: bool,
#[arg(long)]
pub refresh_module: Option<String>,
#[arg(long)]
pub debug: bool,
#[arg(long)]
pub quiet: bool,
#[arg(long)]
pub no_color: bool,
#[arg(long)]
pub json: bool,
#[arg(long, hide = true, env = "REPO_TRUST_API_BASE_URL")]
pub api_base_url: Option<String>,
#[arg(long, hide = true, env = "REPO_TRUST_SNAPSHOT_AT")]
pub snapshot_at: Option<String>,
}
#[derive(Debug, Clone, Copy, clap::ValueEnum)]
pub enum Mode {
Quick,
Standard,
Deep,
}
impl Mode {
fn as_str(self) -> &'static str {
match self {
Self::Quick => "quick",
Self::Standard => "standard",
Self::Deep => "deep",
}
}
}
#[derive(Debug, Clone, Copy, clap::ValueEnum)]
pub enum Format {
Terminal,
Json,
Md,
Csv,
Sarif,
}
pub async fn execute(args: ScanArgs) -> Result<u8> {
let started = Instant::now();
tracing::info!(repo = %args.repo, mode = ?args.mode, "scan starting");
let full_name = repo_url::parse(&args.repo).context("invalid repo identifier")?;
let canonical_url = url::Url::parse(&format!("https://github.com/{full_name}"))?;
let cfg = config::load::<()>(None).context("loading config")?;
let token = args.token.clone().or_else(|| cfg.github.resolve_token());
if token.is_none() {
tracing::warn!("no GitHub token configured; running unauthenticated (60 req/h limit)");
}
let weights = if let Some(p) = &args.weights {
crate::scoring::weights::load(p).context("loading custom weights")?
} else {
ModuleWeights::from(cfg.weights)
};
let cache_path = cfg.cache.resolved_path();
let cache = Cache::open(&cache_path).context("opening cache")?;
if args.refresh {
let n = cache.delete_by_repo(&full_name)?;
tracing::info!(invalidated = n, "cache invalidated for repo");
}
let http = crate::api::client::build()?;
let limiter = RateLimiter::default();
let mut github = GhClient::new(http.clone(), cache.clone(), limiter, token);
let mut scorecard = ScorecardClient::new(http.clone(), cache.clone());
let mut osv = OsvClient::new(http.clone(), cache.clone());
let mut deps_dev = DepsDevClient::new(http.clone(), cache.clone());
if let Some(base) = args.api_base_url.as_deref() {
github = github.with_base_url(base);
scorecard = scorecard.with_base_url(base);
osv = osv.with_base_url(base);
deps_dev = deps_dev.with_base_url(base);
}
let scoring_version = match &args.scoring_version {
Some(s) => semver::Version::parse(s).context("invalid scoring version")?,
None => semver::Version::parse(crate::SCORING_VERSION)
.expect("crate SCORING_VERSION is valid SemVer"),
};
let rng_seed = args.seed.unwrap_or_else(|| {
crate::utils::sampling::derive_seed(&full_name, &scoring_version.to_string())
});
let snapshot_at = match &args.snapshot_at {
Some(s) => {
time::OffsetDateTime::parse(s, &time::format_description::well_known::Iso8601::DEFAULT)
.context("--snapshot-at must be ISO 8601 (e.g. 2026-05-03T12:00:00Z)")?
},
None => OffsetDateTime::now_utc(),
};
let ctx = RepositoryContext {
full_name: full_name.clone(),
canonical_url,
mode: args.mode,
scoring_version: scoring_version.clone(),
weights,
rng_seed,
snapshot_at,
cache,
github: github.clone(),
scorecard,
osv,
deps_dev,
};
let selected = select_modules(args.modules.as_ref(), args.skip_modules.as_ref());
let mut module_results: Vec<ModuleResult> = Vec::new();
let mut all_evidence = Vec::new();
use crate::modules::TrustModule;
for name in &selected {
let result = match name.as_str() {
"activity" => {
let m = crate::modules::activity::ActivityModule;
Some(m.run(&ctx).await)
},
"maintainers" => {
let m = crate::modules::maintainers::MaintainersModule;
Some(m.run(&ctx).await)
},
"security" => {
let m = crate::modules::security::SecurityModule;
Some(m.run(&ctx).await)
},
"stars" => {
let m = crate::modules::stars::StarsModule;
Some(m.run(&ctx).await)
},
"adoption" => {
let m = crate::modules::adoption::AdoptionModule;
Some(m.run(&ctx).await)
},
other => {
tracing::debug!(module = other, "unknown module name; skipping");
None
},
};
if let Some(res) = result {
let (r, ev) = res.with_context(|| format!("module '{name}' failed"))?;
module_results.push(r);
all_evidence.extend(ev);
}
}
let overall_score = aggregate(&module_results, &ctx.weights);
let overall_conf = overall_confidence(&module_results, &ctx.weights);
let category = Category::from_score(overall_score);
let (top_strengths, top_concerns) =
crate::scoring::explain::top_strengths_and_concerns(&all_evidence, 3);
let (owner, name) = full_name
.split_once('/')
.ok_or_else(|| anyhow::anyhow!("invalid full_name"))?;
let summary = match github.get_repo(owner, name).await {
Ok(r) => RepositorySummary {
full_name: r.full_name,
url: r.html_url,
default_branch: r.default_branch,
primary_language: r.language,
stars: r.stargazers_count,
snapshot_at,
},
Err(e) => {
return Err(map_github_error(&e).unwrap_or(e));
},
};
let runtime_seconds = started.elapsed().as_secs_f64();
let mut evidence_sorted = all_evidence;
evidence_sorted.sort_by(|a, b| {
(a.module.as_str(), a.code.as_str()).cmp(&(b.module.as_str(), b.code.as_str()))
});
let report = TrustReport {
schema_version: crate::REPORT_SCHEMA_VERSION.to_string(),
repository: summary,
overall_score,
overall_confidence: overall_conf,
category,
mode: match args.mode {
Mode::Quick => crate::models::Mode::Quick,
Mode::Standard => crate::models::Mode::Standard,
Mode::Deep => crate::models::Mode::Deep,
},
modules: module_results,
evidence: evidence_sorted,
top_strengths,
top_concerns,
caveats: Vec::new(),
scoring_version: scoring_version.to_string(),
weights_used: ctx.weights,
snapshot_at,
runtime_seconds: crate::utils::time::round6(runtime_seconds),
};
std::fs::create_dir_all(&args.output)
.with_context(|| format!("creating output dir {:?}", args.output))?;
let safe = full_name.replace('/', "_");
let formats = resolve_formats(&args, &cfg.output.default_formats);
let json_bytes = serde_json::to_vec(&report)?;
ctx.cache.put_report(
&full_name,
args.mode.as_str(),
&scoring_version.to_string(),
&json_bytes,
)?;
let mut wrote: Vec<std::path::PathBuf> = Vec::new();
for fmt in &formats {
match fmt {
Format::Json => {
let p = args.output.join(format!("{safe}.json"));
json_report::write(&report, &p)?;
wrote.push(p);
},
Format::Md => {
let p = args.output.join(format!("{safe}.md"));
crate::reports::markdown_report::write(&report, &p)?;
wrote.push(p);
},
Format::Csv => {
let p = args.output.join(format!("{safe}.csv"));
crate::reports::csv_report::write(&report, &p)?;
wrote.push(p);
},
Format::Terminal => {
if !args.quiet {
let stdout = std::io::stdout();
let mut handle = stdout.lock();
crate::reports::terminal::write(&report, &mut handle, !args.no_color)?;
}
},
Format::Sarif => {
tracing::warn!("SARIF output deferred to v1.1; skipping");
},
}
}
if !args.quiet {
for p in &wrote {
println!("wrote {}", p.display());
}
println!(
"score {} / {}, confidence {:?}",
report.overall_score,
mode_label(args.mode),
report.overall_confidence,
);
}
Ok(0)
}
fn resolve_formats(args: &ScanArgs, default_formats: &[String]) -> Vec<Format> {
if args.json {
return vec![Format::Json];
}
if !args.format.is_empty() {
return args.format.clone();
}
let mut out = Vec::new();
for s in default_formats {
match s.as_str() {
"terminal" => out.push(Format::Terminal),
"json" => out.push(Format::Json),
"md" | "markdown" => out.push(Format::Md),
"csv" => out.push(Format::Csv),
"sarif" => out.push(Format::Sarif),
other => tracing::debug!(format = other, "unknown format in config; skipping"),
}
}
if out.is_empty() {
out.push(Format::Json); }
out
}
fn mode_label(m: Mode) -> &'static str {
m.as_str()
}
fn select_modules(enabled: Option<&Vec<String>>, skipped: Option<&Vec<String>>) -> Vec<String> {
let default_set = vec![
"stars".to_string(),
"activity".to_string(),
"maintainers".to_string(),
"adoption".to_string(),
"security".to_string(),
];
let mut selected: Vec<String> = match enabled {
Some(list) if !list.is_empty() => list.clone(),
_ => default_set,
};
if let Some(skip) = skipped {
selected.retain(|m| !skip.iter().any(|s| s == m));
}
selected
}
fn map_github_error(e: &anyhow::Error) -> Option<anyhow::Error> {
let _ = e;
None
}
#[must_use]
pub fn exit_code_for(error: &anyhow::Error) -> u8 {
match error.downcast_ref::<GithubError>() {
Some(GithubError::NotFound) => 2,
Some(GithubError::Unauthorized) => 3,
Some(GithubError::Forbidden(_)) => 4,
_ => 1,
}
}