pub mod analyzer;
mod report;
use std::error::Error;
use std::path::Path;
use crate::git::GitRepo;
use crate::util::parse_since;
use crate::walk::{self, WalkConfig};
use crate::report_helpers;
use analyzer::{FileOwnership, aggregate_by_author, compute_bus_factor, compute_ownership};
use report::{
print_bus_factor_json, print_bus_factor_report, print_json, print_report, print_summary_json,
print_summary_report,
};
fn is_generated(path: &Path) -> bool {
let file_name = match path.file_name().and_then(|n| n.to_str()) {
Some(n) => n,
None => return false,
};
matches!(
file_name,
"Cargo.lock"
| "package-lock.json"
| "yarn.lock"
| "pnpm-lock.yaml"
| "Gemfile.lock"
| "poetry.lock"
| "composer.lock"
| "Pipfile.lock"
| "go.sum"
) || file_name.ends_with(".min.js")
|| file_name.ends_with(".min.css")
|| file_name.ends_with(".bundle.js")
|| file_name.ends_with(".pb.go")
|| file_name.ends_with("_pb2.py")
|| file_name.contains(".generated.")
}
pub struct KnowledgeOptions<'a> {
pub json: bool,
pub top: usize,
pub sort_by: &'a str,
pub since: Option<&'a str>,
pub risk_only: bool,
pub summary: bool,
pub bus_factor: bool,
pub author: Option<&'a str>,
}
fn sort_results(results: &mut [FileOwnership], sort_by: &str) {
match sort_by {
"diffusion" => results.sort_by(|a, b| b.contributors.cmp(&a.contributors)),
"risk" => results.sort_by(|a, b| {
a.risk.sort_key().cmp(&b.risk.sort_key()).then_with(|| {
b.ownership_pct
.partial_cmp(&a.ownership_pct)
.unwrap_or(std::cmp::Ordering::Equal)
})
}),
_ => {
results.sort_by(|a, b| {
b.ownership_pct
.partial_cmp(&a.ownership_pct)
.unwrap_or(std::cmp::Ordering::Equal)
});
}
}
}
pub fn run(cfg: &WalkConfig<'_>, opts: &KnowledgeOptions<'_>) -> Result<(), Box<dyn Error>> {
let git_repo = GitRepo::open(cfg.path)
.map_err(|e| format!("not a git repository (or any parent): {e}"))?;
let since_ts = opts.since.map(parse_since).transpose()?;
let recent_authors = if since_ts.is_some() {
git_repo.recent_authors(since_ts)?
} else {
std::collections::HashSet::new()
};
let (walk_root, walk_prefix) = git_repo.walk_prefix(cfg.path)?;
let mut results: Vec<FileOwnership> = Vec::new();
let mut author_lines: std::collections::HashMap<String, usize> =
std::collections::HashMap::new();
for (file_path, spec) in walk::source_files(&walk_root, cfg.exclude_tests(), cfg.filter) {
if is_generated(&file_path) {
continue;
}
let rel_path = GitRepo::to_git_path(&walk_root, &walk_prefix, &file_path);
let blames = match git_repo.blame_file(&rel_path) {
Ok(b) => b,
Err(e) => {
eprintln!("warning: blame {}: {e}", rel_path.display());
continue;
}
};
for b in &blames {
*author_lines.entry(b.author.clone()).or_insert(0) += b.lines;
}
let ownership = compute_ownership(rel_path, spec.name, &blames, &recent_authors);
results.push(ownership);
}
if let Some(author_filter) = opts.author {
let lower = author_filter.to_lowercase();
results.retain(|f| {
f.primary_owner.to_lowercase().contains(&lower)
|| f.primary_email.to_lowercase().contains(&lower)
});
}
if opts.risk_only {
results.retain(|f| f.knowledge_loss);
}
sort_results(&mut results, opts.sort_by);
if opts.bus_factor {
let bf = compute_bus_factor(&author_lines, 80.0);
return if opts.json {
print_bus_factor_json(&bf)
} else {
print_bus_factor_report(&bf);
Ok(())
};
}
if opts.summary {
let mut authors = aggregate_by_author(&results);
match opts.sort_by {
"diffusion" => authors.sort_by(|a, b| b.total_lines.cmp(&a.total_lines)),
"risk" => authors.sort_by(|a, b| a.worst_risk.sort_key().cmp(&b.worst_risk.sort_key())),
_ => authors.sort_by(|a, b| b.files_owned.cmp(&a.files_owned)),
}
let limit = opts.top.min(authors.len());
let authors = &authors[..limit];
if opts.json {
print_summary_json(authors)
} else {
print_summary_report(authors);
Ok(())
}
} else {
report_helpers::output_results(&mut results, opts.top, opts.json, print_json, print_report)
}
}
#[cfg(test)]
#[path = "mod_test.rs"]
mod tests;