#![warn(missing_docs)]
mod commands;
use std::path::PathBuf;
use clap::{Args, Parser, Subcommand, ValueEnum};
use tracing_subscriber::EnvFilter;
use tga::core::config::{Config, ConfigValidator};
use tga::core::db::Database;
use crate::commands::aliases::AliasesArgs;
use crate::commands::author::AuthorArgs;
use crate::commands::backfill::BackfillArgs;
use crate::commands::deployments::DeploymentsCollectArgs;
use crate::commands::dora::DoraArgs;
use crate::commands::incidents::IncidentsCollectArgs;
use crate::commands::install::InstallArgs;
use crate::commands::override_cmd::OverrideArgs;
use crate::commands::pr_metrics::PrMetricsArgs;
use crate::commands::rules::RulesArgs;
#[derive(Parser, Debug)]
#[command(
name = "tga",
about = "trusty-git-analytics — developer productivity analytics",
long_about = "trusty-git-analytics — developer productivity analytics.\n\n\
Three-stage pipeline: collect → classify → report. Run `tga analyze` \
for the full pipeline, or invoke each stage individually.\n\n\
Architecture decisions are documented in docs/adr/. See \
docs/adr/README.md for the format and process.",
version,
propagate_version = true
)]
struct Cli {
#[arg(short, long, default_value = "config.yaml", global = true)]
config: PathBuf,
#[arg(short, long, default_value = "tga.db", global = true)]
database: PathBuf,
#[arg(short, long, action = clap::ArgAction::Count, global = true)]
verbose: u8,
#[arg(long, value_name = "LEVEL", global = true)]
log: Option<LogLevel>,
#[command(subcommand)]
command: Commands,
}
#[derive(Copy, Clone, Debug, ValueEnum)]
#[clap(rename_all = "lower")]
enum LogLevel {
Error,
Warn,
Info,
Debug,
Trace,
}
impl From<LogLevel> for tracing::Level {
fn from(l: LogLevel) -> Self {
match l {
LogLevel::Error => tracing::Level::ERROR,
LogLevel::Warn => tracing::Level::WARN,
LogLevel::Info => tracing::Level::INFO,
LogLevel::Debug => tracing::Level::DEBUG,
LogLevel::Trace => tracing::Level::TRACE,
}
}
}
#[derive(Subcommand, Debug)]
enum Commands {
Author(AuthorArgs),
Analyze(AnalyzeArgs),
Collect(CollectArgs),
Classify(ClassifyArgs),
Report(ReportArgs),
PrMetrics(PrMetricsArgs),
Install(InstallArgs),
Aliases(AliasesArgs),
Backfill(BackfillArgs),
Override(OverrideArgs),
Rules(RulesArgs),
Deployments(DeploymentsSubcommandArgs),
Incidents(IncidentsSubcommandArgs),
Dora(DoraArgs),
}
#[derive(Args, Debug)]
pub struct DeploymentsSubcommandArgs {
#[command(subcommand)]
pub subcommand: DeploymentsSubcommand,
}
#[derive(Subcommand, Debug)]
pub enum DeploymentsSubcommand {
Collect(DeploymentsCollectArgs),
}
#[derive(Args, Debug)]
pub struct IncidentsSubcommandArgs {
#[command(subcommand)]
pub subcommand: IncidentsSubcommand,
}
#[derive(Subcommand, Debug)]
pub enum IncidentsSubcommand {
Collect(IncidentsCollectArgs),
}
#[derive(Args, Debug)]
#[command(
about = "Run the full pipeline: collect → classify → report.",
long_about = "Run all three stages (collect, classify, report) in sequence against the\n\
configured repositories.\n\n\
This is the normal production command for routine analytics runs. Individual\n\
stages can be invoked separately (tga collect / tga classify / tga report) when\n\
you need surgical control over a single step.\n\n\
NOTE: --branch is available via `tga collect` when running stages individually.\n\
Use `tga analyze --skip-collect && tga collect --branch main` for branched runs.",
after_help = "EXAMPLES:\n\
# Standard weekly run (collect last 4 weeks, classify, report)\n\
tga analyze --weeks 4\n\n\
# Full history refresh after upgrading (re-collects all weeks)\n\
tga analyze --force\n\n\
# Skip slow collection; re-classify and regenerate reports only\n\
tga analyze --skip-collect\n\n\
TIPS:\n\
- Run `tga collect --branch main --force` first to restrict the corpus.\n\
- Use --dry-run to preview collection work without database writes."
)]
pub struct AnalyzeArgs {
#[arg(long)]
pub skip_collect: bool,
#[arg(long)]
pub skip_classify: bool,
#[arg(short, long)]
pub output: Option<PathBuf>,
#[arg(long, short = 'f', default_value_t = false)]
pub force: bool,
#[arg(long, value_name = "N", conflicts_with_all = ["from", "to"])]
pub weeks: Option<u32>,
#[arg(long, value_name = "DATE", conflicts_with = "weeks")]
pub from: Option<String>,
#[arg(long, value_name = "DATE", conflicts_with = "weeks")]
pub to: Option<String>,
#[arg(long, default_value_t = false)]
pub no_fetch: bool,
#[arg(long, default_value_t = false)]
pub dry_run: bool,
#[arg(long, default_value_t = false)]
pub validate_only: bool,
#[arg(long, default_value_t = false)]
pub no_validate: bool,
}
#[derive(Args, Debug)]
#[command(
about = "Collect commits from git repositories into the database (Stage 1).",
long_about = "Walk configured git repositories and persist commit metadata, diff statistics,\n\
and ticket references into the SQLite database.\n\n\
tga 2.0.0 changed the default revwalk to cover ALL local branches and remote\n\
tracking refs (refs/heads/* + refs/remotes/origin/*). Use --head-only to restore\n\
the legacy HEAD-only walk, or --branch to restrict to specific branch names.\n\n\
Typical workflow:\n\
tga collect --weeks 4 # collect last 4 weeks across all repos\n\
tga collect --repos myrepo # collect only one repo\n\
tga collect --force # re-collect all weeks (e.g. after upgrading)\n\
tga classify # run Stage 2 after collection",
after_help = "EXAMPLES:\n\
# First-time setup: collect all history, then classify\n\
tga collect && tga classify && tga report\n\n\
# Incremental re-run scoped to a specific repo and branch\n\
tga collect --repos my-service --branch main --weeks 2\n\n\
# Recover missing branch commits after upgrading from tga <= 1.5.4\n\
tga collect --force\n\n\
TIPS:\n\
- Run `tga classify` immediately after `tga collect` to keep the DB in sync.\n\
- Use `--weeks 4 --force` for routine weekly re-runs to refresh recent data.\n\
- `--branch` is collect-only; commits in the DB do not carry branch attribution."
)]
pub struct CollectArgs {
#[arg(long, value_delimiter = ',')]
pub repos: Vec<String>,
#[arg(
long,
value_delimiter = ',',
conflicts_with = "head_only",
value_name = "NAME[,NAME…]"
)]
pub branch: Vec<String>,
#[arg(long, hide = true)]
pub since: Option<String>,
#[arg(long, hide = true)]
pub until: Option<String>,
#[arg(long, value_name = "DATE", conflicts_with = "weeks")]
pub from: Option<String>,
#[arg(long, value_name = "DATE", conflicts_with = "weeks")]
pub to: Option<String>,
#[arg(long, short = 'f', default_value_t = false)]
pub force: bool,
#[arg(long, value_name = "N", conflicts_with_all = ["from", "to"])]
pub weeks: Option<u32>,
#[arg(long, default_value_t = false)]
pub no_fetch: bool,
#[arg(long, default_value_t = false)]
pub strict_fetch: bool,
#[arg(long, default_value_t = false)]
pub verbose_fetch: bool,
#[arg(long, default_value_t = false)]
pub dry_run: bool,
#[arg(long, default_value_t = false)]
pub force_refresh_prs: bool,
#[arg(long, default_value_t = false)]
pub skip_tag_reachability: bool,
#[arg(long, default_value_t = false)]
pub validate_only: bool,
#[arg(long, default_value_t = false)]
pub no_validate: bool,
#[arg(long, default_value_t = false, conflicts_with = "branch")]
pub head_only: bool,
}
#[derive(Args, Debug)]
#[command(
about = "Classify collected commits using the four-tier cascade (Stage 2).",
long_about = "Run the classification cascade over commits already in the database.\n\n\
The cascade applies rules in this order:\n\
Tier 0 -- manual overrides (tga override add)\n\
Tier 1 -- external ticket sources (JIRA, GitHub Issues, Linear, ADO)\n\
Tier 2 -- commit-message regex rules (built-in + custom --rules file)\n\
Tier 3 -- LLM fallback (requires --use-llm or config.classification.use_llm)\n\n\
By default, commits that already have a classification are skipped for\n\
efficiency. Pass --force to re-classify a slice (e.g. after a rule update).\n\n\
NOTE: --branch is collect-only. Commits in the DB do not carry branch\n\
attribution after the walk, so there is no branch filter here.",
after_help = "EXAMPLES:\n\
# Classify all unclassified commits (normal incremental run)\n\
tga classify\n\n\
# Re-classify commits in the last 8 weeks after updating rules\n\
tga classify --force --since 2026-01-01\n\n\
# Re-classify only the last 4 weeks for one repo\n\
tga classify --force --repos my-service --weeks 4\n\n\
TIPS:\n\
- After updating your rules file, run `tga classify --force` to reprocess.\n\
- Use `--no-external` in CI to skip network calls to JIRA/GitHub Issues."
)]
pub struct ClassifyArgs {
#[arg(long, value_delimiter = ',')]
pub repos: Vec<String>,
#[arg(long, value_name = "N", conflicts_with_all = ["since", "until"])]
pub weeks: Option<u32>,
#[arg(long, value_name = "DATE", conflicts_with = "weeks")]
pub since: Option<String>,
#[arg(long, value_name = "DATE", conflicts_with = "weeks")]
pub until: Option<String>,
#[arg(long)]
pub rules: Option<PathBuf>,
#[arg(long)]
pub use_llm: bool,
#[arg(long)]
pub backfill_complexity: bool,
#[arg(long, short = 'f', default_value_t = false)]
pub force: bool,
#[arg(long, default_value_t = false)]
pub no_external: bool,
}
#[derive(Args, Debug)]
#[command(
about = "Generate productivity reports from classified commits (Stage 3).",
long_about = "Produce CSV, JSON, and/or Markdown reports from the classified commits\n\
already in the database. Reports aggregate metrics by author, week, and category.\n\n\
Use --author to drill down to a single engineer's output.\n\
Use --formats to select one or more output formats.\n\n\
NOTE: --branch and --repos are collect-level concepts. The report reads\n\
whatever is in the database; filter at collection time if needed.",
after_help = "EXAMPLES:\n\
# Generate all formats for the full team\n\
tga report --formats csv,json,markdown\n\n\
# Per-engineer drill-down\n\
tga report --author alice@example.com\n\n\
# Write reports to a custom directory\n\
tga report --output ./reports --formats markdown\n\n\
TIPS:\n\
- Use `tga aliases list` to find canonical email addresses for --author.\n\
- Reports cover all classified data in the DB; re-run classify first if\n\
recent commits are unclassified."
)]
pub struct ReportArgs {
#[arg(short, long)]
pub output: Option<PathBuf>,
#[arg(long, value_delimiter = ',')]
pub formats: Vec<String>,
#[arg(long, value_name = "EMAIL")]
pub author: Option<String>,
}
fn run_validation(config: &Config, no_validate: bool, validate_only: bool) -> anyhow::Result<bool> {
if no_validate {
if validate_only {
tracing::warn!("--no-validate overrides --validate-only; exiting without checks");
return Ok(true);
}
tracing::debug!("--no-validate: skipping configuration pre-flight checks");
return Ok(false);
}
let errors = ConfigValidator::new(config).validate();
if errors.is_empty() {
if validate_only {
println!("Configuration OK.");
return Ok(true);
}
return Ok(false);
}
eprintln!("Configuration validation found {} error(s):", errors.len());
for e in &errors {
eprintln!(" - {e}");
}
Err(anyhow::anyhow!(
"configuration validation failed ({} error(s)); use --no-validate to skip",
errors.len()
))
}
static HELP: std::sync::LazyLock<trusty_common::help::HelpConfig> =
std::sync::LazyLock::new(|| {
trusty_common::help::load_help(include_str!("../help.yaml"))
.expect("tga help.yaml is bundled and valid")
});
fn main() -> anyhow::Result<()> {
let runtime = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()?;
let result = runtime.block_on(run());
runtime.shutdown_timeout(std::time::Duration::from_secs(0));
result
}
async fn run() -> anyhow::Result<()> {
let argv: Vec<String> = std::env::args().collect();
let cli = match Cli::try_parse() {
Ok(cli) => cli,
Err(e) => {
e.print().ok();
if matches!(
e.kind(),
clap::error::ErrorKind::InvalidSubcommand | clap::error::ErrorKind::UnknownArgument
) {
trusty_common::help::print_suggestion_hint(&argv, &HELP);
}
std::process::exit(e.exit_code());
}
};
let level: tracing::Level = if let Some(l) = cli.log {
l.into()
} else {
match cli.verbose {
0 => tracing::Level::WARN,
1 => tracing::Level::INFO,
2 => tracing::Level::DEBUG,
_ => tracing::Level::TRACE,
}
};
let env_filter =
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(level.to_string()));
tracing_subscriber::fmt().with_env_filter(env_filter).init();
let config = if cli.config.exists() {
tracing::info!(path = %cli.config.display(), "loading config");
Config::load(&cli.config)?
} else {
tracing::warn!(
"config file {} not found, using defaults",
cli.config.display()
);
Config::default()
};
if let Commands::Install(args) = cli.command {
return commands::install::run(config, args);
}
let should_short_circuit = match &cli.command {
Commands::Analyze(args) => run_validation(&config, args.no_validate, args.validate_only)?,
Commands::Collect(args) => run_validation(&config, args.no_validate, args.validate_only)?,
_ => false,
};
if should_short_circuit {
return Ok(());
}
tracing::info!(path = %cli.database.display(), "opening database");
let mut db = Database::open(&cli.database)?;
match cli.command {
Commands::Author(args) => commands::author::run(config, &db, args)?,
Commands::Analyze(args) => commands::analyze::run(config, &mut db, args).await?,
Commands::Collect(args) => commands::collect::run(config, &mut db, args).await?,
Commands::Classify(args) => commands::classify::run(config, &mut db, args).await?,
Commands::Report(args) => commands::report::run(config, &db, args)?,
Commands::PrMetrics(args) => commands::pr_metrics::run(config, &db, args)?,
Commands::Aliases(args) => commands::aliases::run(config, &mut db, args)?,
Commands::Backfill(args) => commands::backfill::run(config, &mut db, args).await?,
Commands::Override(args) => commands::override_cmd::run(config, &mut db, args)?,
Commands::Rules(args) => commands::rules::run(config, &db, args)?,
Commands::Deployments(args) => match args.subcommand {
DeploymentsSubcommand::Collect(a) => {
commands::deployments::run(config, &mut db, a).await?
}
},
Commands::Incidents(args) => match args.subcommand {
IncidentsSubcommand::Collect(a) => commands::incidents::run(config, &mut db, a)?,
},
Commands::Dora(args) => commands::dora::run(config, &mut db, args)?,
Commands::Install(_) => unreachable!("install dispatched above"),
}
Ok(())
}