use anyhow::{Context, Result, anyhow};
use chrono::{DateTime, Utc};
use clap::{Parser, ValueEnum};
use git2::{Commit, ObjectType, Oid, Repository, Tree};
use serde::Serialize;
use std::collections::{BTreeSet, HashMap};
use std::io;
use std::path::{Component, Path, PathBuf};
use tokei::{Config, LanguageType};
#[derive(ValueEnum, Clone, Debug, Copy)]
#[clap(rename_all = "kebab_case")]
enum OutputFormat {
Csv,
}
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
#[arg(default_value = ".")]
path: String,
#[arg(short, long, default_value_t = OutputFormat::Csv, value_enum)]
format: OutputFormat,
}
#[derive(Debug)]
struct CommitStat {
commit: String,
date: DateTime<Utc>,
total: usize,
langs: HashMap<LanguageType, usize>,
}
#[derive(Debug, Clone)]
struct TreeStat {
total: usize,
langs: HashMap<LanguageType, usize>,
}
#[derive(Debug, Clone, Copy)]
struct BlobStat {
top_lines: usize,
total_lines: usize,
}
#[derive(Debug, Serialize)]
struct CsvRow {
commit: String,
date: DateTime<Utc>,
total: usize,
langs: Vec<usize>,
}
#[derive(Serialize)]
struct HeaderRow {
commit: &'static str,
date: &'static str,
total: &'static str,
langs: Vec<String>,
}
fn main() -> Result<()> {
let args = Args::parse();
match args.format {
OutputFormat::Csv => write_csv(&args.path),
}
}
fn write_csv(repo_path: &str) -> Result<()> {
let repo = open_repository(repo_path)?;
let mut stats = collect_commit_stats(&repo)?;
stats.sort_by(|a, b| a.date.cmp(&b.date).then(a.commit.cmp(&b.commit)));
let language_columns = collect_language_columns(&stats);
let mut writer = csv::WriterBuilder::new()
.has_headers(false)
.from_writer(io::stdout());
writer
.serialize(HeaderRow {
commit: "Commit",
date: "Time",
total: "Total",
langs: language_columns.iter().map(ToString::to_string).collect(),
})
.context("failed to write CSV header row")?;
for stat in stats {
writer
.serialize(CsvRow {
commit: stat.commit,
date: stat.date,
total: stat.total,
langs: language_columns
.iter()
.map(|language| stat.langs.get(language).copied().unwrap_or(0))
.collect(),
})
.context("failed to write CSV data row")?;
}
writer.flush().context("failed to flush the CSV writer")?;
Ok(())
}
fn open_repository(repo_path: &str) -> Result<Repository> {
Repository::open(repo_path)
.with_context(|| format!("failed to open the repository at {repo_path}"))
}
fn collect_commit_stats(repo: &Repository) -> Result<Vec<CommitStat>> {
let mut revwalk = repo.revwalk().context("failed to create revision walk")?;
revwalk
.push_head()
.context("failed to start revision walk at HEAD")?;
let mut stats = Vec::new();
let tokei_config = Config::default();
let mut tree_cache: HashMap<Oid, TreeStat> = HashMap::new();
let mut blob_cache: HashMap<(Oid, LanguageType), BlobStat> = HashMap::new();
for oid_result in revwalk {
let oid = oid_result.context("failed to iterate commit IDs in the revision walk")?;
let commit = repo
.find_commit(oid)
.with_context(|| format!("failed to find commit {oid}"))?;
let tree_id = commit.tree_id();
let tree_stat = if let Some(cached) = tree_cache.get(&tree_id) {
cached.clone()
} else {
let tree = commit
.tree()
.with_context(|| format!("failed to load tree for commit {}", commit.id()))?;
let computed = compute_tree_stat(repo, &tree, &tokei_config, &mut blob_cache)?;
tree_cache.insert(tree_id, computed.clone());
computed
};
stats.push(build_commit_stat(&commit, tree_stat)?);
}
Ok(stats)
}
fn compute_tree_stat(
repo: &Repository,
tree: &Tree<'_>,
config: &Config,
blob_cache: &mut HashMap<(Oid, LanguageType), BlobStat>,
) -> Result<TreeStat> {
let mut langs = HashMap::new();
let mut total = 0;
accumulate_tree_stat(
repo,
tree,
Path::new(""),
config,
blob_cache,
&mut langs,
&mut total,
)?;
Ok(TreeStat { total, langs })
}
fn accumulate_tree_stat(
repo: &Repository,
tree: &Tree<'_>,
base: &Path,
config: &Config,
blob_cache: &mut HashMap<(Oid, LanguageType), BlobStat>,
langs: &mut HashMap<LanguageType, usize>,
total: &mut usize,
) -> Result<()> {
for entry in tree {
let Some(name) = entry.name() else {
continue;
};
let path = if base.as_os_str().is_empty() {
PathBuf::from(name)
} else {
base.join(name)
};
if should_ignore_path(&path, config) {
continue;
}
match entry.kind() {
Some(ObjectType::Tree) => {
let subtree = repo
.find_tree(entry.id())
.with_context(|| format!("failed to load subtree for {}", path.display()))?;
accumulate_tree_stat(repo, &subtree, &path, config, blob_cache, langs, total)?;
}
Some(ObjectType::Blob) => {
let Some(language) = LanguageType::from_path(&path, config) else {
continue;
};
let key = (entry.id(), language);
let blob_stat = if let Some(cached) = blob_cache.get(&key) {
*cached
} else {
let blob = repo
.find_blob(entry.id())
.with_context(|| format!("failed to load blob for {}", path.display()))?;
let parsed = language.parse_from_slice(blob.content(), config);
let computed = BlobStat {
top_lines: parsed.lines(),
total_lines: parsed.summarise().lines(),
};
blob_cache.insert(key, computed);
computed
};
*total += blob_stat.total_lines;
if blob_stat.top_lines > 0 {
*langs.entry(language).or_insert(0) += blob_stat.top_lines;
}
}
_ => {}
}
}
Ok(())
}
fn should_ignore_path(path: &Path, config: &Config) -> bool {
let include_hidden = config.hidden.unwrap_or(false);
path.components().any(|component| {
matches!(
component,
Component::Normal(name)
if name == ".git"
|| name == "target"
|| (!include_hidden && name.to_string_lossy().starts_with('.'))
)
})
}
fn build_commit_stat(commit: &Commit<'_>, tree_stat: TreeStat) -> Result<CommitStat> {
let date = DateTime::from_timestamp(commit.time().seconds(), 0)
.ok_or_else(|| anyhow!("failed to convert the timestamp for commit {}", commit.id()))?;
Ok(CommitStat {
commit: commit.id().to_string(),
date,
total: tree_stat.total,
langs: tree_stat.langs,
})
}
fn collect_language_columns(stats: &[CommitStat]) -> Vec<LanguageType> {
let mut columns = BTreeSet::new();
for stat in stats {
columns.extend(stat.langs.keys().copied());
}
columns.into_iter().collect()
}