gobby-code 1.3.3

Fast Rust CLI for Gobby's code index — AST-aware search, symbol navigation, and dependency graph
Documentation
use std::collections::{BTreeMap, btree_map::Entry};
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;
use std::process::{ExitStatus, Stdio};
use std::time::{Duration, Instant};

use anyhow::Context;
use sha2::{Digest, Sha256};
use wait_timeout::ChildExt;

use super::{
    CachedBlameSummary, OwnershipContributor, OwnershipMeta, OwnershipOptions, OwnershipStatus,
};
use crate::index::hasher;

#[derive(Debug)]
enum BlameContributorsOutcome {
    Success(Vec<OwnershipContributor>),
    Timeout,
    Error(anyhow::Error),
}

pub(super) fn derived_owners_for_files(
    project_root: &Path,
    files: &[String],
    meta: &mut OwnershipMeta,
    options: &OwnershipOptions,
    status: &mut OwnershipStatus,
) -> BTreeMap<String, Vec<OwnershipContributor>> {
    let Ok(repo) = gix::discover(project_root) else {
        return BTreeMap::new();
    };
    let Ok(head) = repo.head_id() else {
        return BTreeMap::new();
    };
    let head = head.detach();
    status.blame_available = true;

    let started = Instant::now();
    let mut out = BTreeMap::new();
    let mut blame_misses = 0usize;
    for file in files {
        if started.elapsed() >= options.blame_timeout {
            status.partial = true;
            break;
        }
        let Some(content_hash) = content_hash(project_root, file) else {
            continue;
        };
        if let Some(cached) = meta
            .files
            .get(file)
            .filter(|cached| cached.content_hash == content_hash)
        {
            out.insert(file.clone(), cached.contributors.clone());
            continue;
        }
        if blame_misses >= options.blame_file_cap {
            status.partial = true;
            break;
        }
        blame_misses += 1;
        let remaining_timeout = options.blame_timeout.saturating_sub(started.elapsed());
        match blame_file_contributors_with_timeout(project_root, head, file, remaining_timeout) {
            BlameContributorsOutcome::Success(contributors) => {
                meta.files.insert(
                    file.clone(),
                    CachedBlameSummary {
                        content_hash,
                        contributors: contributors.clone(),
                    },
                );
                out.insert(file.clone(), contributors);
            }
            BlameContributorsOutcome::Timeout => {
                status.partial = true;
                break;
            }
            BlameContributorsOutcome::Error(error) => {
                log::debug!("codewiki ownership git blame failed for `{file}`: {error}");
                status.partial = true;
                status.blame_errors = true;
            }
        }
    }
    out
}

pub(super) fn content_hash(project_root: &Path, file: &str) -> Option<String> {
    hasher::file_content_hash(&project_root.join(file)).ok()
}

fn blame_file_contributors_with_timeout(
    project_root: &Path,
    head: gix::ObjectId,
    file: &str,
    timeout: Duration,
) -> BlameContributorsOutcome {
    match blame_file_contributors(project_root, head, file, timeout) {
        Ok(Some(contributors)) => BlameContributorsOutcome::Success(contributors),
        Ok(None) => BlameContributorsOutcome::Timeout,
        Err(error) => BlameContributorsOutcome::Error(error),
    }
}

pub(super) struct GitBlameOutput {
    status: ExitStatus,
    stdout: String,
    stderr: String,
}

fn blame_file_contributors(
    project_root: &Path,
    head: gix::ObjectId,
    file: &str,
    timeout: Duration,
) -> anyhow::Result<Option<Vec<OwnershipContributor>>> {
    let head = head.to_string();
    let Some(output) =
        git_blame_output_with_timeout(Path::new("git"), project_root, &head, file, timeout)?
    else {
        return Ok(None);
    };
    if !output.status.success() {
        let stderr = output.stderr.trim();
        if stderr.is_empty() {
            anyhow::bail!("git blame failed for `{file}`");
        }
        anyhow::bail!("git blame failed for `{file}`: {stderr}");
    }

    Ok(Some(parse_git_blame_porcelain(&output.stdout)))
}

pub(super) fn git_blame_output_with_timeout(
    git: &Path,
    project_root: &Path,
    head: &str,
    file: &str,
    timeout: Duration,
) -> anyhow::Result<Option<GitBlameOutput>> {
    let mut stdout_file = tempfile::tempfile().context("create git blame stdout temp file")?;
    let mut stderr_file = tempfile::tempfile().context("create git blame stderr temp file")?;
    let mut child = std::process::Command::new(git)
        .arg("-C")
        .arg(project_root)
        .args(["blame", "--line-porcelain", head, "--", file])
        .stdout(Stdio::from(stdout_file.try_clone()?))
        .stderr(Stdio::from(stderr_file.try_clone()?))
        .spawn()
        .with_context(|| format!("spawn git blame for `{file}`"))?;
    let status = match child.wait_timeout(timeout)? {
        Some(status) => status,
        None => {
            let _ = child.kill();
            let _ = child.wait();
            return Ok(None);
        }
    };
    Ok(Some(GitBlameOutput {
        status,
        stdout: read_tempfile(&mut stdout_file)?,
        stderr: read_tempfile(&mut stderr_file)?,
    }))
}

fn read_tempfile(file: &mut std::fs::File) -> anyhow::Result<String> {
    file.seek(SeekFrom::Start(0))?;
    let mut bytes = Vec::new();
    file.read_to_end(&mut bytes)?;
    Ok(String::from_utf8_lossy(&bytes).into_owned())
}

fn parse_git_blame_porcelain(raw: &str) -> Vec<OwnershipContributor> {
    let mut line_counts: BTreeMap<String, (String, Option<String>, usize)> = BTreeMap::new();
    let mut current_name: Option<String> = None;
    let mut current_email: Option<String> = None;
    for line in raw.lines() {
        if let Some(name) = line.strip_prefix("author ") {
            current_name = Some(name.to_string());
            continue;
        }
        if let Some(email) = line.strip_prefix("author-mail ") {
            current_email = git_blame_email(email);
            continue;
        }
        if !line.starts_with('\t') {
            continue;
        }

        let name = current_name
            .clone()
            .unwrap_or_else(|| "Unknown".to_string());
        let email = current_email.clone();
        let contributor_id = contributor_id(&name, email.as_deref());
        match line_counts.entry(contributor_id) {
            Entry::Occupied(mut stored) => {
                let (stored_name, stored_email, lines) = stored.get_mut();
                retain_deterministic_identity(stored_name, stored_email, &name, &email);
                *lines += 1;
            }
            Entry::Vacant(stored) => {
                stored.insert((name, email, 1));
            }
        }
    }
    let mut contributors = line_counts
        .into_iter()
        .map(
            |(contributor_id, (name, email, lines))| OwnershipContributor {
                contributor_id,
                name,
                email,
                lines,
            },
        )
        .collect::<Vec<_>>();
    contributors.sort_by(|left, right| {
        right
            .lines
            .cmp(&left.lines)
            .then_with(|| left.name.cmp(&right.name))
            .then_with(|| left.contributor_id.cmp(&right.contributor_id))
    });
    contributors.truncate(5);
    contributors
}

fn git_blame_email(value: &str) -> Option<String> {
    let trimmed = value.trim();
    let trimmed = trimmed
        .strip_prefix('<')
        .and_then(|inner| inner.strip_suffix('>'))
        .unwrap_or(trimmed);
    (!trimmed.is_empty()).then(|| trimmed.to_string())
}

fn contributor_id(name: &str, email: Option<&str>) -> String {
    let identity = email
        .map(str::trim)
        .filter(|email| !email.is_empty())
        .unwrap_or(name)
        .trim()
        .to_ascii_lowercase();
    let digest = Sha256::digest(identity.as_bytes());
    digest.iter().map(|byte| format!("{byte:02x}")).collect()
}

pub(super) fn retain_deterministic_identity(
    stored_name: &mut String,
    stored_email: &mut Option<String>,
    name: &str,
    email: &Option<String>,
) {
    if name < stored_name.as_str() {
        *stored_name = name.to_string();
    }
    match (stored_email.as_mut(), email) {
        (Some(stored), Some(next)) if next < stored => *stored = next.clone(),
        (None, Some(next)) => *stored_email = Some(next.clone()),
        _ => {}
    }
}