use anyhow::{bail, Result};
use std::collections::{HashMap, HashSet};
use std::io::{self, BufRead, IsTerminal, Write};
use std::path::Path;
use crate::cache::storage::CACHE_DIR;
use crate::collector::Collector;
use crate::snapshot::TimeWindow;
const CONFIG_FILE: &str = "barad-dur.toml";
const TRANSLATION_EXTENSIONS: &[&str] = &[
"resx", "po", "pot", "xlf", "xliff", "strings", "arb", "lproj",
];
const GENERATED_PATTERNS: &[&str] = &[".generated.", ".designer.", ".g.cs"];
const VENDOR_DIRS: &[&str] = &["vendor", "third_party", "node_modules", "bower_components"];
const I18N_DIRS: &[&str] = &["i18n", "l10n", "locales", "translations"];
#[derive(Debug, Default)]
pub struct ScanResult {
pub exclude_patterns: Vec<(String, usize)>,
pub total_files: usize,
pub total_commits: usize,
pub distinct_authors: usize,
pub suggest_skip_blame: bool,
}
pub fn detect_exclude_patterns(file_paths: &[&str]) -> Vec<String> {
let mut patterns: Vec<String> = Vec::new();
let mut seen: HashSet<String> = HashSet::new();
let mut ext_counts: HashMap<String, usize> = HashMap::new();
for path in file_paths {
if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) {
let ext_lower = ext.to_lowercase();
if TRANSLATION_EXTENSIONS.contains(&ext_lower.as_str()) {
*ext_counts.entry(ext_lower).or_insert(0) += 1;
}
}
}
for ext in ext_counts.keys() {
let pattern = format!("*.{}", ext);
if seen.insert(pattern.clone()) {
patterns.push(pattern);
}
}
for gen_pat in GENERATED_PATTERNS {
if file_paths.iter().any(|p| p.contains(gen_pat)) {
let pattern = format!("*{}*", gen_pat);
if seen.insert(pattern.clone()) {
patterns.push(pattern);
}
}
}
for dir in VENDOR_DIRS {
if file_paths
.iter()
.any(|p| p.starts_with(&format!("{}/", dir)) || p.contains(&format!("/{}/", dir)))
{
let pattern = format!("{}/**", dir);
if seen.insert(pattern.clone()) {
patterns.push(pattern);
}
}
}
for dir in I18N_DIRS {
if file_paths
.iter()
.any(|p| p.contains(&format!("/{}/", dir)) || p.starts_with(&format!("{}/", dir)))
{
let pattern = format!("**/{}/**", dir);
if seen.insert(pattern.clone()) {
patterns.push(pattern);
}
}
}
patterns
}
pub fn scan_repo(repo_path: &Path) -> Result<ScanResult> {
let collector = Collector::open(repo_path, TimeWindow::default())?;
let files = collector.collect_files()?;
let collection = collector.collect_commits()?;
let file_paths: Vec<String> = files
.iter()
.map(|f| f.path.to_string_lossy().to_string())
.collect();
let file_refs: Vec<&str> = file_paths.iter().map(|s| s.as_str()).collect();
let raw_patterns = detect_exclude_patterns(&file_refs);
let exclude_patterns: Vec<(String, usize)> = raw_patterns
.into_iter()
.map(|pattern| {
let count = file_refs
.iter()
.filter(|p| glob_match::glob_match(&pattern, p))
.count();
(pattern, count)
})
.filter(|(_, count)| *count > 0)
.collect();
Ok(ScanResult {
exclude_patterns,
total_files: files.len(),
total_commits: collection.commits.len(),
distinct_authors: collection.authors.len(),
suggest_skip_blame: files.len() > 10_000,
})
}
pub fn generate_toml(scan: &ScanResult) -> String {
generate_toml_inner(scan, "6months", true, "cli", false)
}
fn generate_toml_inner(
scan: &ScanResult,
since: &str,
use_detected_excludes: bool,
format: &str,
auto_open: bool,
) -> String {
let mut out = String::new();
out.push_str("# .repository-analysis/barad-dur.toml — Barad-dur repository configuration\n");
out.push_str(
"# All barad-dur files live in .repository-analysis/ (config, blame cache, history).\n",
);
out.push_str("# Generated by `barad-dur init`. Edit freely.\n");
out.push_str("# CLI flags override these values when explicitly passed.\n\n");
out.push_str("# ──────────────────────────────────────────────────\n");
out.push_str("# Analysis scope\n");
out.push_str("# ──────────────────────────────────────────────────\n");
out.push_str("[analysis]\n\n");
out.push_str("# How far back to look at git history.\n");
out.push_str(&format!("since = \"{}\"\n", since));
if scan.suggest_skip_blame {
out.push_str(&format!(
"\n# Large repo ({} files) — consider skip_blame = true for faster iteration\n",
scan.total_files
));
}
out.push_str("skip_blame = false\n\n");
out.push_str("# ──────────────────────────────────────────────────\n");
out.push_str("# File exclusions\n");
out.push_str("# ──────────────────────────────────────────────────\n");
out.push_str("[exclude]\n\n");
out.push_str("use_defaults = true\n");
if use_detected_excludes && !scan.exclude_patterns.is_empty() {
out.push_str("\n# Detected patterns:\n");
for (pattern, count) in &scan.exclude_patterns {
out.push_str(&format!("# {} ({} files)\n", pattern, count));
}
let patterns: Vec<String> = scan
.exclude_patterns
.iter()
.map(|(p, _)| format!("\"{}\"", p))
.collect();
out.push_str(&format!("patterns = [{}]\n\n", patterns.join(", ")));
} else {
out.push_str("patterns = []\n\n");
}
out.push_str("# ──────────────────────────────────────────────────\n");
out.push_str("# Category weights (must sum to 100)\n");
out.push_str("# ──────────────────────────────────────────────────\n");
out.push_str("[weights]\n\n");
if scan.distinct_authors > 0 {
out.push_str(&format!(
"# {} distinct authors detected in the time window\n",
scan.distinct_authors
));
}
out.push_str("health = 30\n");
out.push_str("team = 30\n");
out.push_str("evolution = 20\n");
out.push_str("hygiene = 20\n\n");
out.push_str("# ──────────────────────────────────────────────────\n");
out.push_str("# Scoring thresholds\n");
out.push_str("# ──────────────────────────────────────────────────\n\n");
out.push_str("[thresholds.health]\n");
out.push_str("max_complexity = 20\n");
out.push_str("hotspot_top_n = 10\n");
out.push_str("coupling_min_commits = 5\n\n");
out.push_str("[thresholds.team]\n");
out.push_str("silo_max_owners = 1\n");
out.push_str("activity_window_days = 30\n\n");
out.push_str("[thresholds.evolution]\n");
out.push_str("growth_baseline_months = 3\n");
out.push_str("refactor_ratio_target = 0.1\n\n");
out.push_str("[thresholds.hygiene]\n");
out.push_str("min_message_length = 10\n");
out.push_str("max_message_length = 72\n\n");
out.push_str("# ──────────────────────────────────────────────────\n");
out.push_str("# Output preferences\n");
out.push_str("# ──────────────────────────────────────────────────\n");
out.push_str("[output]\n\n");
out.push_str(&format!("format = \"{}\"\n", format));
out.push_str(&format!("auto_open = {}\n", auto_open));
out
}
fn prompt(question: &str, default: &str) -> String {
eprint!(" ? {} [{}]: ", question, default);
let _ = io::stderr().flush();
let mut input = String::new();
if io::stdin().lock().read_line(&mut input).is_err() {
return default.to_string(); }
let trimmed = input.trim();
if trimmed.is_empty() {
default.to_string()
} else {
trimmed.to_string()
}
}
fn prompt_yn(question: &str, default_yes: bool) -> bool {
let hint = if default_yes { "Y/n" } else { "y/N" };
let answer = prompt(question, hint);
match answer.to_lowercase().as_str() {
"y" | "yes" => true,
"n" | "no" => false,
_ => default_yes,
}
}
fn run_wizard(scan: &ScanResult) -> Result<String> {
eprintln!("\n barad-dur config wizard");
eprintln!(" ───────────────────────\n");
let mode = prompt("Configuration mode: [S]imple or [A]dvanced", "S");
let advanced = mode.to_lowercase().starts_with('a');
if advanced {
run_advanced_wizard(scan)
} else {
run_simple_wizard(scan)
}
}
fn run_simple_wizard(scan: &ScanResult) -> Result<String> {
eprintln!(
" Detected: {} commits, {} files",
scan.total_commits, scan.total_files
);
let since = prompt("Analysis window", "6months");
let use_detected_excludes = if !scan.exclude_patterns.is_empty() {
eprintln!(" Detected exclude candidates:");
for (pattern, count) in &scan.exclude_patterns {
eprintln!(" - {} ({} files)", pattern, count);
}
prompt_yn("Exclude these from analysis?", true)
} else {
false
};
let format = prompt("Default output format (cli/html/json)", "cli");
Ok(generate_toml_inner(
scan,
&since,
use_detected_excludes,
&format,
false,
))
}
fn run_advanced_wizard(scan: &ScanResult) -> Result<String> {
eprintln!(
" Detected: {} commits, {} files, {} authors",
scan.total_commits, scan.total_files, scan.distinct_authors
);
let since = prompt("Analysis window", "6months");
let use_detected_excludes = if !scan.exclude_patterns.is_empty() {
eprintln!(" Detected exclude candidates:");
for (pattern, count) in &scan.exclude_patterns {
eprintln!(" - {} ({} files)", pattern, count);
}
prompt_yn("Add these exclusions?", true)
} else {
false
};
let adjust_weights = prompt_yn("Adjust category weights? (default: 30/30/20/20)", false);
let _ = adjust_weights;
let _ = prompt_yn(
"Skip detailed threshold configuration? (use defaults)",
true,
);
let format = prompt("Default output format (cli/html/json)", "cli");
let auto_open = prompt_yn("Auto-open HTML reports in browser?", false);
Ok(generate_toml_inner(
scan,
&since,
use_detected_excludes,
&format,
auto_open,
))
}
pub fn run_init(target: &Path, force: bool, interactive: bool) -> Result<()> {
let config_path = target.join(CACHE_DIR).join(CONFIG_FILE);
if config_path.exists() && !force {
bail!(
"Config already exists at {}. Use --force to overwrite.",
config_path.display()
);
}
eprintln!(" Scanning repository...");
let scan = scan_repo(target)?;
let toml_content = if interactive && io::stdin().is_terminal() {
run_wizard(&scan)?
} else {
if interactive {
eprintln!("Warning: stdin is not a terminal, falling back to auto-detect mode.");
}
generate_toml(&scan)
};
std::fs::create_dir_all(target.join(CACHE_DIR))?;
std::fs::write(&config_path, &toml_content)?;
eprintln!(" Config written to {}", config_path.display());
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_translation_extensions() {
let files = vec!["src/main.rs", "Resources/Strings.resx", "i18n/fr.po"];
let patterns = detect_exclude_patterns(&files);
assert!(patterns.iter().any(|p| p.contains("resx")));
assert!(patterns.iter().any(|p| p.contains("po")));
}
#[test]
fn detect_i18n_directories() {
let files = vec!["src/assets/i18n/en.ts", "src/assets/i18n/fr.ts"];
let patterns = detect_exclude_patterns(&files);
assert!(patterns.iter().any(|p| p.contains("i18n")));
}
#[test]
fn detect_generated_code() {
let files = vec!["Models/Foo.generated.cs", "Views/Bar.designer.cs"];
let patterns = detect_exclude_patterns(&files);
assert!(patterns.iter().any(|p| p.contains("generated")));
}
#[test]
fn detect_vendor_dirs() {
let files = vec!["vendor/lib/foo.go", "node_modules/pkg/index.js"];
let patterns = detect_exclude_patterns(&files);
assert!(patterns.iter().any(|p| p.contains("vendor")));
assert!(patterns.iter().any(|p| p.contains("node_modules")));
}
#[test]
fn detect_no_false_positives() {
let files = vec!["src/main.rs", "src/lib.rs", "tests/test.rs"];
let patterns = detect_exclude_patterns(&files);
assert!(patterns.is_empty());
}
#[test]
fn generate_toml_is_valid() {
let scan = ScanResult::default();
let toml_str = generate_toml(&scan);
assert!(toml_str.contains("[analysis]"));
assert!(toml_str.contains("[weights]"));
assert!(toml_str.contains("since ="));
assert!(toml_str.contains("[output]"));
assert!(toml_str.parse::<toml::Value>().is_ok());
}
#[test]
fn generate_toml_with_detected_patterns() {
let scan = ScanResult {
exclude_patterns: vec![("*.resx".to_string(), 5), ("**/i18n/**".to_string(), 3)],
..Default::default()
};
let toml_str = generate_toml(&scan);
assert!(toml_str.contains("*.resx"));
assert!(toml_str.contains("**/i18n/**"));
assert!(toml_str.contains("5 files"));
assert!(toml_str.parse::<toml::Value>().is_ok());
}
#[test]
fn generate_toml_suggests_skip_blame_for_large_repos() {
let scan = ScanResult {
total_files: 15_000,
suggest_skip_blame: true,
..Default::default()
};
let toml_str = generate_toml(&scan);
assert!(toml_str.contains("skip_blame"));
assert!(toml_str.contains("15000 files"));
}
}