locoti 0.1.0

A fork of Locot. A simple utility for tracking lines of code over time in a Git repository. It outputs CSV for visualization and analysis.
use anyhow::{Context, Result, anyhow};
use chrono::{DateTime, Utc};
use clap::{Parser, ValueEnum};
use git2::{Commit, Repository, build::CheckoutBuilder};
use serde::Serialize;
use std::collections::{BTreeSet, HashMap};
use std::io;
use std::path::Path;
use tempfile::{TempDir, tempdir};
use tokei::{Config, LanguageType, Languages};

#[derive(ValueEnum, Clone, Debug, Copy)]
#[clap(rename_all = "kebab_case")]
enum OutputFormat {
    Csv,
}

/// Count lines of code over time.
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
    /// Path to the repository
    #[arg(default_value = ".")]
    path: String,

    /// Output format
    #[arg(short, long, default_value_t = OutputFormat::Csv, value_enum)]
    format: OutputFormat,
}

#[derive(Debug)]
struct CommitStat {
    commit: String,
    date: DateTime<Utc>,
    total: usize,
    langs: HashMap<LanguageType, usize>,
}

#[derive(Debug, Serialize)]
struct CsvRow {
    commit: String,
    date: DateTime<Utc>,
    total: usize,
    langs: Vec<usize>,
}

#[derive(Serialize)]
struct HeaderRow {
    commit: &'static str,
    date: &'static str,
    total: &'static str,
    langs: Vec<String>,
}

fn main() -> Result<()> {
    let args = Args::parse();
    match args.format {
        OutputFormat::Csv => write_csv(&args.path),
    }
}

fn write_csv(repo_path: &str) -> Result<()> {
    let (_tmp_dir, repo) = clone_to_temp(repo_path)?;
    let workdir = repo
        .workdir()
        .context("failed to locate the working directory for the temporary clone")?;

    let mut stats = collect_commit_stats(&repo, workdir)?;
    stats.sort_by(|a, b| a.date.cmp(&b.date).then(a.commit.cmp(&b.commit)));

    let language_columns = collect_language_columns(&stats);
    let mut writer = csv::WriterBuilder::new()
        .has_headers(false)
        .from_writer(io::stdout());

    writer
        .serialize(HeaderRow {
            commit: "Commit",
            date: "Time",
            total: "Total",
            langs: language_columns.iter().map(ToString::to_string).collect(),
        })
        .context("failed to write CSV header row")?;

    for stat in stats {
        writer
            .serialize(CsvRow {
                commit: stat.commit,
                date: stat.date,
                total: stat.total,
                langs: language_columns
                    .iter()
                    .map(|language| stat.langs.get(language).copied().unwrap_or(0))
                    .collect(),
            })
            .context("failed to write CSV data row")?;
    }

    writer.flush().context("failed to flush the CSV writer")?;
    Ok(())
}

fn clone_to_temp(repo_path: &str) -> Result<(TempDir, Repository)> {
    let tmp_dir = tempdir().context("failed to create temporary directory")?;
    let repo = Repository::clone_recurse(repo_path, tmp_dir.path())
        .with_context(|| format!("failed to clone the repository from {repo_path}"))?;
    Ok((tmp_dir, repo))
}

fn collect_commit_stats(repo: &Repository, workdir: &Path) -> Result<Vec<CommitStat>> {
    let mut revwalk = repo.revwalk().context("failed to create revision walk")?;
    revwalk
        .push_head()
        .context("failed to start revision walk at HEAD")?;

    let mut stats = Vec::new();
    let tokei_config = Config::default();

    for oid_result in revwalk {
        let oid = oid_result.context("failed to iterate commit IDs in the revision walk")?;
        let commit = repo
            .find_commit(oid)
            .with_context(|| format!("failed to find commit {oid}"))?;

        checkout_commit(repo, &commit)?;
        let languages = count_languages(workdir, &tokei_config);
        stats.push(build_commit_stat(&commit, languages)?);
    }

    Ok(stats)
}

fn checkout_commit(repo: &Repository, commit: &Commit<'_>) -> Result<()> {
    let tree = commit
        .tree()
        .with_context(|| format!("failed to load tree for commit {}", commit.id()))?;
    let mut checkout_opts = CheckoutBuilder::new();
    checkout_opts.force().remove_untracked(true);

    repo.checkout_tree(tree.as_object(), Some(&mut checkout_opts))
        .with_context(|| format!("failed to check out the tree for commit {}", commit.id()))?;
    repo.set_head_detached(commit.id())
        .with_context(|| format!("failed to detach HEAD at commit {}", commit.id()))?;
    Ok(())
}

fn count_languages(workdir: &Path, config: &Config) -> Languages {
    let mut languages = Languages::new();
    languages.get_statistics(&[workdir], &[".git", "target"], config);
    languages
}

fn build_commit_stat(commit: &Commit<'_>, languages: Languages) -> Result<CommitStat> {
    let date = DateTime::from_timestamp(commit.time().seconds(), 0)
        .ok_or_else(|| anyhow!("failed to convert the timestamp for commit {}", commit.id()))?;

    let mut per_language = HashMap::new();
    for (language_type, language) in &languages {
        let lines = language.lines();
        if lines > 0 {
            per_language.insert(*language_type, lines);
        }
    }

    Ok(CommitStat {
        commit: commit.id().to_string(),
        date,
        total: languages.total().lines(),
        langs: per_language,
    })
}

fn collect_language_columns(stats: &[CommitStat]) -> Vec<LanguageType> {
    let mut columns = BTreeSet::new();
    for stat in stats {
        columns.extend(stat.langs.keys().copied());
    }

    columns.into_iter().collect()
}