open-kioku-git 2.0.1

Git repository helpers for Open Kioku indexing and analysis.
Documentation
use open_kioku_errors::{OkError, Result};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;

#[derive(Debug, Clone, PartialEq)]
pub struct CochangeRecord {
    pub path: PathBuf,
    pub cochanged_path: PathBuf,
    pub commit_count: usize,
    pub recency_weight: f32,
    pub test_corun: bool,
    pub commits: Vec<String>,
}

pub fn discover_root(start: impl AsRef<Path>) -> Result<PathBuf> {
    let mut current = start.as_ref().canonicalize()?;
    loop {
        if current.join(".git").exists() || current.join("ok.toml").exists() {
            return Ok(current);
        }
        if !current.pop() {
            return Ok(start.as_ref().canonicalize()?);
        }
    }
}

pub fn branch(root: impl AsRef<Path>) -> Option<String> {
    let head = fs::read_to_string(root.as_ref().join(".git/HEAD")).ok()?;
    if let Some(value) = head.strip_prefix("ref: refs/heads/") {
        return Some(value.trim().to_string());
    }
    None
}

pub fn commit(root: impl AsRef<Path>) -> Option<String> {
    let head = fs::read_to_string(root.as_ref().join(".git/HEAD")).ok()?;
    if !head.starts_with("ref: ") {
        return Some(head.trim().to_string());
    }
    let reference = head.trim().strip_prefix("ref: ")?;
    fs::read_to_string(root.as_ref().join(".git").join(reference))
        .ok()
        .map(|value| value.trim().to_string())
}

pub fn require_repo(root: impl AsRef<Path>) -> Result<PathBuf> {
    let root = discover_root(root)?;
    if !root.exists() {
        return Err(OkError::Repository(format!(
            "repository root does not exist: {}",
            root.display()
        )));
    }
    Ok(root)
}

pub fn cochange_records(
    root: impl AsRef<Path>,
    max_commits: usize,
    max_files_per_commit: usize,
) -> Result<Vec<CochangeRecord>> {
    let root = root.as_ref();
    if !root.join(".git").exists() || max_commits == 0 || max_files_per_commit < 2 {
        return Ok(Vec::new());
    }
    let output = Command::new("git")
        .arg("-C")
        .arg(root)
        .arg("log")
        .arg(format!("--max-count={max_commits}"))
        .arg("--name-only")
        .arg("--pretty=format:commit:%H")
        .output()
        .map_err(|err| OkError::Repository(format!("git history scan failed: {err}")))?;
    if !output.status.success() {
        return Ok(Vec::new());
    }
    let stdout = String::from_utf8_lossy(&output.stdout);
    let mut commits = Vec::new();
    let mut current_sha: Option<String> = None;
    let mut current_files = Vec::new();
    for line in stdout.lines() {
        if let Some(sha) = line.strip_prefix("commit:") {
            push_commit(&mut commits, current_sha.take(), &mut current_files);
            current_sha = Some(sha.trim().to_string());
        } else {
            let path = line.trim();
            if is_history_path(path) {
                current_files.push(PathBuf::from(path));
            }
        }
    }
    push_commit(&mut commits, current_sha, &mut current_files);

    let mut pairs: HashMap<(PathBuf, PathBuf), CochangeRecord> = HashMap::new();
    for (idx, (sha, mut files)) in commits.into_iter().enumerate() {
        files.sort();
        files.dedup();
        if files.len() < 2 || files.len() > max_files_per_commit {
            continue;
        }
        let recency_weight = 1.0 / (1.0 + idx as f32 / 25.0);
        for left in &files {
            for right in &files {
                if left == right {
                    continue;
                }
                let key = (left.clone(), right.clone());
                let entry = pairs.entry(key).or_insert_with(|| CochangeRecord {
                    path: left.clone(),
                    cochanged_path: right.clone(),
                    commit_count: 0,
                    recency_weight: 0.0,
                    test_corun: is_test_path(right),
                    commits: Vec::new(),
                });
                entry.commit_count += 1;
                entry.recency_weight += recency_weight;
                entry.test_corun |= is_test_path(right);
                if entry.commits.len() < 5 {
                    entry.commits.push(sha.clone());
                }
            }
        }
    }
    let mut records = pairs.into_values().collect::<Vec<_>>();
    records.sort_by(|a, b| {
        b.recency_weight
            .partial_cmp(&a.recency_weight)
            .unwrap_or(std::cmp::Ordering::Equal)
            .then_with(|| b.commit_count.cmp(&a.commit_count))
            .then_with(|| a.path.cmp(&b.path))
            .then_with(|| a.cochanged_path.cmp(&b.cochanged_path))
    });
    Ok(records)
}

fn push_commit(
    commits: &mut Vec<(String, Vec<PathBuf>)>,
    sha: Option<String>,
    files: &mut Vec<PathBuf>,
) {
    if let Some(sha) = sha {
        commits.push((sha, std::mem::take(files)));
    }
}

fn is_history_path(path: &str) -> bool {
    !path.is_empty()
        && !path.ends_with('/')
        && !path.starts_with(".git/")
        && !path.starts_with(".ok/")
        && !path.contains("=>")
}

fn is_test_path(path: &Path) -> bool {
    let value = path.to_string_lossy().to_ascii_lowercase();
    value.contains("/test/")
        || value.contains("/tests/")
        || value.ends_with("_test.rs")
        || value.ends_with("_test.go")
        || value.ends_with(".test.ts")
        || value.ends_with(".spec.ts")
        || value.ends_with("test.java")
        || value.ends_with("tests.java")
}

#[cfg(test)]
mod tests {
    use super::cochange_records;
    use std::process::Command;

    #[test]
    fn cochange_records_apply_recency_and_test_corun() {
        let dir = tempfile::tempdir().unwrap();
        run(dir.path(), &["init"]);
        run(dir.path(), &["config", "user.email", "test@example.com"]);
        run(dir.path(), &["config", "user.name", "Test User"]);

        write(dir.path(), "src/old.rs", "fn old() {}\n");
        write(
            dir.path(),
            "tests/old_test.rs",
            "#[test] fn old_test() {}\n",
        );
        run(dir.path(), &["add", "."]);
        run(dir.path(), &["commit", "-m", "old pair"]);

        write(dir.path(), "src/new.rs", "fn new() {}\n");
        write(
            dir.path(),
            "tests/new_test.rs",
            "#[test] fn new_test() {}\n",
        );
        run(dir.path(), &["add", "."]);
        run(dir.path(), &["commit", "-m", "new pair"]);

        let records = cochange_records(dir.path(), 20, 10).unwrap();
        let new_pair = records
            .iter()
            .find(|record| {
                record.path == std::path::Path::new("src/new.rs")
                    && record.cochanged_path == std::path::Path::new("tests/new_test.rs")
            })
            .unwrap();
        let old_pair = records
            .iter()
            .find(|record| {
                record.path == std::path::Path::new("src/old.rs")
                    && record.cochanged_path == std::path::Path::new("tests/old_test.rs")
            })
            .unwrap();

        assert!(new_pair.test_corun);
        assert!(new_pair.recency_weight > old_pair.recency_weight);
        assert_eq!(new_pair.commit_count, 1);
    }

    fn write(root: &std::path::Path, path: &str, content: &str) {
        let path = root.join(path);
        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
        std::fs::write(path, content).unwrap();
    }

    fn run(root: &std::path::Path, args: &[&str]) {
        let status = Command::new("git")
            .arg("-C")
            .arg(root)
            .args(args)
            .status()
            .unwrap();
        assert!(status.success(), "git {args:?} failed");
    }
}