locoti 0.1.1

A fork of Locot. A simple utility for tracking lines of code over time in a Git repository. Outputs CSV for visualization and analysis.
use anyhow::{Context, Result, anyhow};
use chrono::{DateTime, Utc};
use clap::{Parser, ValueEnum};
use git2::{Commit, ObjectType, Oid, Repository, Tree};
use serde::Serialize;
use std::collections::{BTreeSet, HashMap};
use std::io;
use std::path::{Component, Path, PathBuf};
use tokei::{Config, LanguageType};

#[derive(ValueEnum, Clone, Debug, Copy)]
#[clap(rename_all = "kebab_case")]
enum OutputFormat {
    Csv,
}

/// Count lines of code over time.
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Args {
    /// Path to the repository
    #[arg(default_value = ".")]
    path: String,

    /// Output format
    #[arg(short, long, default_value_t = OutputFormat::Csv, value_enum)]
    format: OutputFormat,
}

#[derive(Debug)]
struct CommitStat {
    commit: String,
    date: DateTime<Utc>,
    total: usize,
    langs: HashMap<LanguageType, usize>,
}

#[derive(Debug, Clone)]
struct TreeStat {
    total: usize,
    langs: HashMap<LanguageType, usize>,
}

#[derive(Debug, Clone, Copy)]
struct BlobStat {
    top_lines: usize,
    total_lines: usize,
}

#[derive(Debug, Serialize)]
struct CsvRow {
    commit: String,
    date: DateTime<Utc>,
    total: usize,
    langs: Vec<usize>,
}

#[derive(Serialize)]
struct HeaderRow {
    commit: &'static str,
    date: &'static str,
    total: &'static str,
    langs: Vec<String>,
}

fn main() -> Result<()> {
    let args = Args::parse();
    match args.format {
        OutputFormat::Csv => write_csv(&args.path),
    }
}

fn write_csv(repo_path: &str) -> Result<()> {
    let repo = open_repository(repo_path)?;
    let mut stats = collect_commit_stats(&repo)?;
    stats.sort_by(|a, b| a.date.cmp(&b.date).then(a.commit.cmp(&b.commit)));

    let language_columns = collect_language_columns(&stats);
    let mut writer = csv::WriterBuilder::new()
        .has_headers(false)
        .from_writer(io::stdout());

    writer
        .serialize(HeaderRow {
            commit: "Commit",
            date: "Time",
            total: "Total",
            langs: language_columns.iter().map(ToString::to_string).collect(),
        })
        .context("failed to write CSV header row")?;

    for stat in stats {
        writer
            .serialize(CsvRow {
                commit: stat.commit,
                date: stat.date,
                total: stat.total,
                langs: language_columns
                    .iter()
                    .map(|language| stat.langs.get(language).copied().unwrap_or(0))
                    .collect(),
            })
            .context("failed to write CSV data row")?;
    }

    writer.flush().context("failed to flush the CSV writer")?;
    Ok(())
}

fn open_repository(repo_path: &str) -> Result<Repository> {
    Repository::open(repo_path)
        .with_context(|| format!("failed to open the repository at {repo_path}"))
}

fn collect_commit_stats(repo: &Repository) -> Result<Vec<CommitStat>> {
    let mut revwalk = repo.revwalk().context("failed to create revision walk")?;
    revwalk
        .push_head()
        .context("failed to start revision walk at HEAD")?;

    let mut stats = Vec::new();
    let tokei_config = Config::default();
    let mut tree_cache: HashMap<Oid, TreeStat> = HashMap::new();
    let mut blob_cache: HashMap<(Oid, LanguageType), BlobStat> = HashMap::new();

    for oid_result in revwalk {
        let oid = oid_result.context("failed to iterate commit IDs in the revision walk")?;
        let commit = repo
            .find_commit(oid)
            .with_context(|| format!("failed to find commit {oid}"))?;
        let tree_id = commit.tree_id();

        let tree_stat = if let Some(cached) = tree_cache.get(&tree_id) {
            cached.clone()
        } else {
            let tree = commit
                .tree()
                .with_context(|| format!("failed to load tree for commit {}", commit.id()))?;
            let computed = compute_tree_stat(repo, &tree, &tokei_config, &mut blob_cache)?;
            tree_cache.insert(tree_id, computed.clone());
            computed
        };

        stats.push(build_commit_stat(&commit, tree_stat)?);
    }

    Ok(stats)
}

fn compute_tree_stat(
    repo: &Repository,
    tree: &Tree<'_>,
    config: &Config,
    blob_cache: &mut HashMap<(Oid, LanguageType), BlobStat>,
) -> Result<TreeStat> {
    let mut langs = HashMap::new();
    let mut total = 0;
    accumulate_tree_stat(
        repo,
        tree,
        Path::new(""),
        config,
        blob_cache,
        &mut langs,
        &mut total,
    )?;

    Ok(TreeStat { total, langs })
}

fn accumulate_tree_stat(
    repo: &Repository,
    tree: &Tree<'_>,
    base: &Path,
    config: &Config,
    blob_cache: &mut HashMap<(Oid, LanguageType), BlobStat>,
    langs: &mut HashMap<LanguageType, usize>,
    total: &mut usize,
) -> Result<()> {
    for entry in tree {
        let Some(name) = entry.name() else {
            continue;
        };
        let path = if base.as_os_str().is_empty() {
            PathBuf::from(name)
        } else {
            base.join(name)
        };

        if should_ignore_path(&path, config) {
            continue;
        }

        match entry.kind() {
            Some(ObjectType::Tree) => {
                let subtree = repo
                    .find_tree(entry.id())
                    .with_context(|| format!("failed to load subtree for {}", path.display()))?;
                accumulate_tree_stat(repo, &subtree, &path, config, blob_cache, langs, total)?;
            }
            Some(ObjectType::Blob) => {
                let Some(language) = LanguageType::from_path(&path, config) else {
                    continue;
                };
                let key = (entry.id(), language);
                let blob_stat = if let Some(cached) = blob_cache.get(&key) {
                    *cached
                } else {
                    let blob = repo
                        .find_blob(entry.id())
                        .with_context(|| format!("failed to load blob for {}", path.display()))?;
                    let parsed = language.parse_from_slice(blob.content(), config);
                    let computed = BlobStat {
                        top_lines: parsed.lines(),
                        total_lines: parsed.summarise().lines(),
                    };
                    blob_cache.insert(key, computed);
                    computed
                };

                *total += blob_stat.total_lines;
                if blob_stat.top_lines > 0 {
                    *langs.entry(language).or_insert(0) += blob_stat.top_lines;
                }
            }
            _ => {}
        }
    }

    Ok(())
}

fn should_ignore_path(path: &Path, config: &Config) -> bool {
    let include_hidden = config.hidden.unwrap_or(false);

    path.components().any(|component| {
        matches!(
            component,
            Component::Normal(name)
                if name == ".git"
                    || name == "target"
                    || (!include_hidden && name.to_string_lossy().starts_with('.'))
        )
    })
}

fn build_commit_stat(commit: &Commit<'_>, tree_stat: TreeStat) -> Result<CommitStat> {
    let date = DateTime::from_timestamp(commit.time().seconds(), 0)
        .ok_or_else(|| anyhow!("failed to convert the timestamp for commit {}", commit.id()))?;

    Ok(CommitStat {
        commit: commit.id().to_string(),
        date,
        total: tree_stat.total,
        langs: tree_stat.langs,
    })
}

fn collect_language_columns(stats: &[CommitStat]) -> Vec<LanguageType> {
    let mut columns = BTreeSet::new();
    for stat in stats {
        columns.extend(stat.langs.keys().copied());
    }

    columns.into_iter().collect()
}