use anyhow::{Context, Result, anyhow};
use chrono::{DateTime, Utc};
use clap::{Parser, ValueEnum};
use git2::{Commit, ObjectType, Oid, Repository};
use ignore::gitignore::{Gitignore, GitignoreBuilder};
use rustc_hash::{FxHashMap, FxHashSet};
use serde::Serialize;
use std::collections::BTreeSet;
use std::hash::{Hash, Hasher};
use std::io::{self, IsTerminal, Write};
use std::path::{Component, Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use terminal_size::{Height, Width, terminal_size};
use textplots::{
AxisBuilder, Chart, LabelBuilder, LabelFormat, LineStyle, Plot, Shape, TickDisplay,
TickDisplayBuilder,
};
use tokei::{Config, LanguageType};
const DEFAULT_EXCLUDED_LANGUAGES: &[LanguageType] = &[LanguageType::Sql, LanguageType::Json];
#[derive(ValueEnum, Clone, Debug, Copy)]
#[clap(rename_all = "kebab_case")]
enum OutputFormat {
Csv,
}
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
#[arg(default_value = ".")]
path: String,
#[arg(short, long, default_value_t = OutputFormat::Csv, value_enum)]
format: OutputFormat,
#[arg(
long = "exclude-language",
value_name = "LANGUAGE",
value_parser = parse_language,
value_delimiter = ','
)]
exclude_languages: Vec<LanguageType>,
#[arg(
long = "include-language",
value_name = "LANGUAGE",
value_parser = parse_language,
value_delimiter = ','
)]
include_languages: Vec<LanguageType>,
}
#[derive(Debug)]
struct CommitStat {
commit: String,
date: DateTime<Utc>,
total: usize,
langs: FxHashMap<LanguageType, usize>,
}
#[derive(Debug, Clone)]
struct TreeStat {
total: usize,
langs: FxHashMap<LanguageType, usize>,
}
#[derive(Debug, Clone, Copy)]
struct BlobStat {
lines: usize,
}
#[derive(Clone, Default)]
struct IgnoreState {
context_hash: u64,
matchers: Vec<Arc<Gitignore>>,
hints: Vec<Arc<IgnoreHints>>,
}
#[derive(Clone, Default)]
struct IgnoreHints {
dir_names: FxHashMap<String, ()>,
file_names: FxHashMap<String, ()>,
wildcard_patterns: Vec<WildcardPattern>,
has_complex: bool,
}
#[derive(Clone)]
struct WildcardPattern {
prefix: String,
suffix: String,
only_dir: bool,
}
#[derive(Clone)]
struct IgnoreFileData {
matcher: Arc<Gitignore>,
hints: Arc<IgnoreHints>,
}
#[derive(Debug, Serialize)]
struct CsvRow {
commit: String,
date: DateTime<Utc>,
total: usize,
langs: Vec<usize>,
}
#[derive(Serialize)]
struct HeaderRow {
commit: &'static str,
date: &'static str,
total: &'static str,
langs: Vec<String>,
}
fn main() -> Result<()> {
let args = Args::parse();
match args.format {
OutputFormat::Csv => write_output(&args),
}
}
fn write_output(args: &Args) -> Result<()> {
let repo = open_repository(&args.path)?;
let excluded_languages =
build_excluded_languages(&args.exclude_languages, &args.include_languages);
let mut stats = collect_commit_stats(&repo, &excluded_languages)?;
stats.sort_by(|a, b| a.date.cmp(&b.date).then(a.commit.cmp(&b.commit)));
if io::stdout().is_terminal() {
write_terminal_graph(&stats)
} else {
write_csv(stats)
}
}
fn write_csv(stats: Vec<CommitStat>) -> Result<()> {
let language_columns = collect_language_columns(&stats);
let mut writer = csv::WriterBuilder::new()
.has_headers(false)
.from_writer(io::stdout());
writer
.serialize(HeaderRow {
commit: "Commit",
date: "Time",
total: "Total",
langs: language_columns.iter().map(ToString::to_string).collect(),
})
.context("failed to write CSV header row")?;
for stat in stats {
writer
.serialize(CsvRow {
commit: stat.commit,
date: stat.date,
total: stat.total,
langs: language_columns
.iter()
.map(|language| stat.langs.get(language).copied().unwrap_or(0))
.collect(),
})
.context("failed to write CSV data row")?;
}
writer.flush().context("failed to flush the CSV writer")?;
Ok(())
}
fn parse_language(value: &str) -> std::result::Result<LanguageType, String> {
LanguageType::from_str(value).map_err(|err| err.to_string())
}
fn build_excluded_languages(
additional_excludes: &[LanguageType],
includes: &[LanguageType],
) -> FxHashSet<LanguageType> {
let mut excluded = DEFAULT_EXCLUDED_LANGUAGES
.iter()
.copied()
.collect::<FxHashSet<_>>();
excluded.extend(additional_excludes.iter().copied());
for language in includes {
excluded.remove(language);
}
excluded
}
fn write_terminal_graph(stats: &[CommitStat]) -> Result<()> {
let mut stdout = io::stdout().lock();
if stats.is_empty() {
writeln!(stdout, "No commits found.").context("failed to write terminal output")?;
return Ok(());
}
let points: Vec<(f32, f32)> = stats
.iter()
.enumerate()
.map(|(index, stat)| (index as f32, stat.total as f32))
.collect();
let (width, height) = match terminal_size() {
Some((Width(width), Height(height))) => (
u32::from(width).saturating_sub(16).max(16) * 2,
u32::from(height).saturating_sub(5).max(8) * 2,
),
None => (320, 192),
};
let xmax = points.len().saturating_sub(1).max(1) as f32;
let ymax = stats
.iter()
.map(|stat| stat.total)
.max()
.unwrap_or(0)
.max(1) as f32;
let start_date = stats
.first()
.map(|stat| stat.date.format("%Y-%m-%d").to_string())
.unwrap_or_default();
let end_date = stats
.last()
.map(|stat| stat.date.format("%Y-%m-%d").to_string())
.unwrap_or_default();
drop(stdout);
let shape = if points.len() == 1 {
Shape::Points(&points)
} else {
Shape::Lines(&points)
};
let frame = {
let mut chart = Chart::new_with_y_range(width, height, 0.0, xmax, 0.0, ymax);
let chart_ref = chart
.x_axis_style(LineStyle::Solid)
.y_axis_style(LineStyle::Solid)
.x_label_format(LabelFormat::Custom(Box::new(move |value| {
format_x_axis_tick(value, xmax, &start_date, &end_date)
})))
.y_label_format(LabelFormat::Custom(Box::new(format_count_tick)))
.y_tick_display(TickDisplay::Sparse)
.lineplot(&shape);
chart_ref.borders();
chart_ref.axis();
chart_ref.figures();
chart_ref.to_string()
};
println!("{frame}");
Ok(())
}
fn format_x_axis_tick(value: f32, xmax: f32, start_date: &str, end_date: &str) -> String {
if value <= 0.0 {
start_date.to_string()
} else if (value - xmax).abs() < f32::EPSILON {
end_date.to_string()
} else {
format!("{value:.0}")
}
}
fn format_count_tick(value: f32) -> String {
let value = value.round().max(0.0);
if value >= 1_000_000.0 {
format!("{:.1}M", value / 1_000_000.0)
} else if value >= 1_000.0 {
format!("{:.1}K", value / 1_000.0)
} else {
format!("{}", value as usize)
}
}
fn open_repository(repo_path: &str) -> Result<Repository> {
Repository::open(repo_path)
.with_context(|| format!("failed to open the repository at {repo_path}"))
}
fn collect_commit_stats(
repo: &Repository,
excluded_languages: &FxHashSet<LanguageType>,
) -> Result<Vec<CommitStat>> {
let mut revwalk = repo.revwalk().context("failed to create revision walk")?;
revwalk
.push_head()
.context("failed to start revision walk at HEAD")?;
let mut stats = Vec::new();
let tokei_config = Config::default();
let mut tree_cache: FxHashMap<Oid, TreeStat> = FxHashMap::default();
let mut ignore_file_cache: FxHashMap<(Oid, u64), Arc<IgnoreFileData>> = FxHashMap::default();
let mut ignore_decision_cache: FxHashMap<(u64, u64, bool), bool> = FxHashMap::default();
let mut blob_cache: FxHashMap<(Oid, LanguageType), BlobStat> = FxHashMap::default();
let mut shebang_cache: FxHashMap<Oid, Option<LanguageType>> = FxHashMap::default();
let mut shebang_line_cache: FxHashMap<String, Option<LanguageType>> = FxHashMap::default();
let root_ignore_state = IgnoreState::default();
for oid_result in revwalk {
let oid = oid_result.context("failed to iterate commit IDs in the revision walk")?;
let commit = repo
.find_commit(oid)
.with_context(|| format!("failed to find commit {oid}"))?;
let tree_stat = compute_tree_stat(
repo,
commit.tree_id(),
Path::new(""),
&root_ignore_state,
&tokei_config,
excluded_languages,
&mut tree_cache,
&mut blob_cache,
&mut ignore_file_cache,
&mut ignore_decision_cache,
&mut shebang_cache,
&mut shebang_line_cache,
)?;
stats.push(build_commit_stat(&commit, tree_stat)?);
}
Ok(stats)
}
fn compute_tree_stat(
repo: &Repository,
tree_id: Oid,
base: &Path,
ignore_state: &IgnoreState,
config: &Config,
excluded_languages: &FxHashSet<LanguageType>,
tree_cache: &mut FxHashMap<Oid, TreeStat>,
blob_cache: &mut FxHashMap<(Oid, LanguageType), BlobStat>,
ignore_file_cache: &mut FxHashMap<(Oid, u64), Arc<IgnoreFileData>>,
ignore_decision_cache: &mut FxHashMap<(u64, u64, bool), bool>,
shebang_cache: &mut FxHashMap<Oid, Option<LanguageType>>,
shebang_line_cache: &mut FxHashMap<String, Option<LanguageType>>,
) -> Result<TreeStat> {
let cache_key = tree_id;
if let Some(cached) = tree_cache.get(&cache_key) {
return Ok(cached.clone());
}
let tree = repo
.find_tree(tree_id)
.with_context(|| format!("failed to load tree {tree_id}"))?;
let mut active_ignore_state = ignore_state.clone();
for ignore_name in [".gitignore", ".ignore"] {
if let Some((ignore_blob_id, ignore_file)) =
load_ignore_file_for_dir(repo, &tree, base, ignore_name, ignore_file_cache)?
{
active_ignore_state.context_hash = extend_context_hash(
active_ignore_state.context_hash,
ignore_blob_id,
hash_path(base),
);
active_ignore_state
.matchers
.push(Arc::clone(&ignore_file.matcher));
active_ignore_state
.hints
.push(Arc::clone(&ignore_file.hints));
}
}
let mut langs = FxHashMap::default();
let mut total = 0;
for entry in &tree {
let Some(name) = entry.name() else {
continue;
};
let path = if base.as_os_str().is_empty() {
PathBuf::from(name)
} else {
base.join(name)
};
let is_dir = matches!(entry.kind(), Some(ObjectType::Tree));
let ignore_key = (active_ignore_state.context_hash, hash_path(&path), is_dir);
let should_ignore = if let Some(cached) = ignore_decision_cache.get(&ignore_key) {
*cached
} else {
let computed = should_ignore_path(&path, is_dir, &active_ignore_state, config);
ignore_decision_cache.insert(ignore_key, computed);
computed
};
if should_ignore {
continue;
}
match entry.kind() {
Some(ObjectType::Tree) => {
let subtree_stat = compute_tree_stat(
repo,
entry.id(),
&path,
&active_ignore_state,
config,
excluded_languages,
tree_cache,
blob_cache,
ignore_file_cache,
ignore_decision_cache,
shebang_cache,
shebang_line_cache,
)?;
total += subtree_stat.total;
for (language, lines) in &subtree_stat.langs {
*langs.entry(*language).or_insert(0) += *lines;
}
}
Some(ObjectType::Blob) => {
let mut loaded_blob = None;
let language = if let Some(language) = LanguageType::from_path(&path, config) {
language
} else {
let detected = if let Some(cached) = shebang_cache.get(&entry.id()) {
*cached
} else {
let blob = repo.find_blob(entry.id()).with_context(|| {
format!("failed to load blob for {}", path.display())
})?;
let detected =
detect_language_from_shebang(blob.content(), shebang_line_cache);
shebang_cache.insert(entry.id(), detected);
loaded_blob = Some(blob);
detected
};
let Some(language) = detected else {
continue;
};
language
};
if excluded_languages.contains(&language) {
continue;
}
let key = (entry.id(), language);
let blob_stat = if let Some(cached) = blob_cache.get(&key) {
*cached
} else {
let blob = if let Some(blob) = loaded_blob {
blob
} else {
repo.find_blob(entry.id()).with_context(|| {
format!("failed to load blob for {}", path.display())
})?
};
let parsed = language.parse_from_slice(blob.content(), config);
let computed = BlobStat {
lines: parsed.lines(),
};
blob_cache.insert(key, computed);
computed
};
total += blob_stat.lines;
if blob_stat.lines > 0 {
*langs.entry(language).or_insert(0) += blob_stat.lines;
}
}
_ => {}
}
}
let tree_stat = TreeStat { total, langs };
tree_cache.insert(cache_key, tree_stat.clone());
Ok(tree_stat)
}
fn load_ignore_file_for_dir(
repo: &Repository,
tree: &git2::Tree<'_>,
base: &Path,
ignore_name: &str,
ignore_file_cache: &mut FxHashMap<(Oid, u64), Arc<IgnoreFileData>>,
) -> Result<Option<(Oid, Arc<IgnoreFileData>)>> {
let Some(ignore_entry) = tree.get_name(ignore_name) else {
return Ok(None);
};
if !matches!(ignore_entry.kind(), Some(ObjectType::Blob)) {
return Ok(None);
}
let ignore_blob_id = ignore_entry.id();
let dir_hash = hash_path(base);
let cache_key = (ignore_blob_id, dir_hash);
if let Some(cached) = ignore_file_cache.get(&cache_key) {
return Ok(Some((ignore_blob_id, Arc::clone(cached))));
}
let blob = repo
.find_blob(ignore_blob_id)
.with_context(|| format!("failed to load ignore file blob {ignore_blob_id}"))?;
let root = if base.as_os_str().is_empty() {
PathBuf::from("ROOT")
} else {
PathBuf::from("ROOT").join(base)
};
let source_path = root.join(ignore_name);
let mut builder = GitignoreBuilder::new(&root);
let mut hints = IgnoreHints::default();
for line in String::from_utf8_lossy(blob.content()).lines() {
let _ = builder.add_line(Some(source_path.clone()), line);
update_ignore_hints(&mut hints, line);
}
let matcher = builder.build().with_context(|| {
format!(
"failed to build ignore matcher for {}",
source_path.display()
)
})?;
let data = Arc::new(IgnoreFileData {
matcher: Arc::new(matcher),
hints: Arc::new(hints),
});
ignore_file_cache.insert(cache_key, Arc::clone(&data));
Ok(Some((ignore_blob_id, data)))
}
fn update_ignore_hints(hints: &mut IgnoreHints, line: &str) {
let mut line = line.trim();
if line.is_empty() || line.starts_with('#') {
return;
}
let mut only_dir = false;
if let Some(stripped) = line.strip_prefix("\\!") {
line = stripped;
} else if let Some(stripped) = line.strip_prefix("\\#") {
line = stripped;
} else if let Some(stripped) = line.strip_prefix('!') {
line = stripped;
}
if let Some(stripped) = line.strip_suffix('/') {
only_dir = true;
line = stripped;
}
line = line.trim_start_matches('/');
if line.is_empty() {
return;
}
let last_segment = line.rsplit('/').next().unwrap_or(line).replace('\\', "");
if last_segment.is_empty() {
return;
}
let has_unknown_wildcards =
last_segment.contains('?') || last_segment.contains('[') || last_segment.contains(']');
if has_unknown_wildcards {
hints.has_complex = true;
return;
}
let wildcard_count = last_segment.matches('*').count();
if wildcard_count == 0 {
if only_dir {
hints.dir_names.insert(last_segment, ());
} else {
hints.dir_names.insert(last_segment.clone(), ());
hints.file_names.insert(last_segment, ());
}
return;
}
if wildcard_count > 1 {
hints.has_complex = true;
return;
}
let mut parts = last_segment.splitn(2, '*');
let prefix = parts.next().unwrap_or_default();
let suffix = parts.next().unwrap_or_default();
hints.wildcard_patterns.push(WildcardPattern {
prefix: prefix.to_string(),
suffix: suffix.to_string(),
only_dir,
});
}
fn path_might_match_ignore(path: &Path, is_dir: bool, ignore_state: &IgnoreState) -> bool {
let file_name = path.file_name().and_then(|name| name.to_str());
for hints in &ignore_state.hints {
if hints.has_complex {
return true;
}
let Some(file_name) = file_name else {
continue;
};
if hints.dir_names.contains_key(file_name) {
return true;
}
if !is_dir && hints.file_names.contains_key(file_name) {
return true;
}
for pattern in &hints.wildcard_patterns {
if pattern.only_dir && !is_dir {
continue;
}
if file_name.len() < pattern.prefix.len() + pattern.suffix.len() {
continue;
}
if file_name.starts_with(&pattern.prefix) && file_name.ends_with(&pattern.suffix) {
return true;
}
}
for component in path.components() {
let Component::Normal(name) = component else {
continue;
};
let Some(name) = name.to_str() else {
continue;
};
if hints.dir_names.contains_key(name) {
return true;
}
}
}
false
}
fn hash_path(path: &Path) -> u64 {
let mut hasher = rustc_hash::FxHasher::default();
path.hash(&mut hasher);
hasher.finish()
}
fn extend_context_hash(context_hash: u64, ignore_blob_id: Oid, path_hash: u64) -> u64 {
let mut hasher = rustc_hash::FxHasher::default();
context_hash.hash(&mut hasher);
ignore_blob_id.as_bytes().hash(&mut hasher);
path_hash.hash(&mut hasher);
hasher.finish()
}
fn detect_language_from_shebang(
content: &[u8],
shebang_line_cache: &mut FxHashMap<String, Option<LanguageType>>,
) -> Option<LanguageType> {
let first_line_bytes = content
.split(|byte| *byte == b'\n')
.next()
.unwrap_or_default();
let first_line = std::str::from_utf8(first_line_bytes)
.ok()?
.trim_end_matches('\r');
if first_line.is_empty() {
return None;
}
if let Some(cached) = shebang_line_cache.get(first_line) {
return *cached;
}
let detected = (|| {
for (language, _) in LanguageType::list() {
if language
.shebangs()
.iter()
.any(|shebang| *shebang == first_line)
{
return Some(*language);
}
}
if !first_line.starts_with("#!/usr/bin/env") {
return None;
}
let mut temp = tempfile::NamedTempFile::new().ok()?;
writeln!(temp, "{first_line}").ok()?;
LanguageType::from_shebang(temp.path())
})();
shebang_line_cache.insert(first_line.to_string(), detected);
detected
}
fn should_ignore_path(
path: &Path,
is_dir: bool,
ignore_state: &IgnoreState,
config: &Config,
) -> bool {
let include_hidden = config.hidden.unwrap_or(false);
let builtin_ignored = path.components().any(|component| {
matches!(
component,
Component::Normal(name)
if name == ".git"
|| name == "target"
|| (!include_hidden && name.to_string_lossy().starts_with('.'))
)
});
if builtin_ignored {
return true;
}
if ignore_state.matchers.is_empty() {
return false;
}
if !path_might_match_ignore(path, is_dir, ignore_state) {
return false;
}
let rooted_path = PathBuf::from("ROOT").join(path);
let mut ignored = false;
for matcher in &ignore_state.matchers {
let matched = matcher.matched_path_or_any_parents(&rooted_path, is_dir);
if matched.is_ignore() {
ignored = true;
} else if matched.is_whitelist() {
ignored = false;
}
}
ignored
}
fn build_commit_stat(commit: &Commit<'_>, tree_stat: TreeStat) -> Result<CommitStat> {
let date = DateTime::from_timestamp(commit.time().seconds(), 0)
.ok_or_else(|| anyhow!("failed to convert the timestamp for commit {}", commit.id()))?;
Ok(CommitStat {
commit: commit.id().to_string(),
date,
total: tree_stat.total,
langs: tree_stat.langs,
})
}
fn collect_language_columns(stats: &[CommitStat]) -> Vec<LanguageType> {
let mut columns = BTreeSet::new();
for stat in stats {
columns.extend(stat.langs.keys().copied());
}
columns.into_iter().collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_excluded_languages_include_sql_and_json() {
let excluded = build_excluded_languages(&[], &[]);
assert!(excluded.contains(&LanguageType::Sql));
assert!(excluded.contains(&LanguageType::Json));
}
#[test]
fn include_languages_remove_default_excludes() {
let excluded = build_excluded_languages(&[], &[LanguageType::Sql]);
assert!(!excluded.contains(&LanguageType::Sql));
assert!(excluded.contains(&LanguageType::Json));
}
#[test]
fn additional_excludes_are_added_to_defaults() {
let excluded = build_excluded_languages(&[LanguageType::Rust], &[]);
assert!(excluded.contains(&LanguageType::Rust));
assert!(excluded.contains(&LanguageType::Sql));
assert!(excluded.contains(&LanguageType::Json));
}
#[test]
fn x_axis_endpoint_labels_use_dates() {
let start = "2024-01-01";
let end = "2024-02-03";
assert_eq!(format_x_axis_tick(0.0, 12.0, start, end), start);
assert_eq!(format_x_axis_tick(12.0, 12.0, start, end), end);
}
}