git-cloak 0.0.1

The Invisible Layer for Your Repositories - Manage private, untracked files across Git clones.
use serde::de;
use sha2::{Digest, Sha256};
use std::path::Path;
use std::process::Command;
use thiserror::Error;

const GIT_HASH_SHORT_LEN: usize = 16;
const PATH_HASH_SHORT_LEN: usize = 7;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Pid {
    Git(String),
    Path(String),
}

impl Pid {
    pub fn as_str(&self) -> &str {
        match self {
            Pid::Git(s) | Pid::Path(s) => s,
        }
    }

    pub fn kind(&self) -> &'static str {
        match self {
            Pid::Git(_) => "git",
            Pid::Path(_) => "path",
        }
    }

    /// Reconstruct a `Pid` from its serialized string form (e.g. `"git_abc123"` or `"path_def456"`).
    pub fn parse(s: &str) -> Option<Pid> {
        if let Some(rest) = s.strip_prefix("git_") {
            if rest.is_empty() {
                return None;
            }
            Some(Pid::Git(s.to_owned()))
        } else if let Some(rest) = s.strip_prefix("path_") {
            if rest.is_empty() {
                return None;
            }
            Some(Pid::Path(s.to_owned()))
        } else {
            None
        }
    }
}

impl std::fmt::Display for Pid {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.write_str(self.as_str())
    }
}

impl serde::Serialize for Pid {
    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
        serializer.serialize_str(self.as_str())
    }
}

impl<'de> serde::Deserialize<'de> for Pid {
    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
        let s = String::deserialize(deserializer)?;
        Pid::parse(&s).ok_or_else(|| de::Error::custom(format!("invalid PID: {s}")))
    }
}

#[derive(Debug, Error)]
pub enum PidError {
    #[error("failed to discover git repository: {0}")]
    GitDiscover(#[from] gix::discover::Error),
    #[error("repository HEAD is unborn — no commits yet")]
    UnbornHead,
    #[error("git command failed: {0}")]
    GitCommand(String),
    #[error("path is not absolute: {0}")]
    RelativePath(String),
}

/// Compute PID for the given directory.
///
/// Priority A: If inside a git repo, returns `Pid::Git("git_<root_hash_short>")`.
/// Priority B: Otherwise, returns `Pid::Path("path_<sha256_short>")`.
pub fn compute(dir: &Path) -> Result<Pid, PidError> {
    match compute_git_pid(dir) {
        Ok(pid) => Ok(pid),
        Err(_) => compute_path_pid(dir),
    }
}

/// Compute a Git-based PID from the initial commit hash.
///
/// Shells out to `git rev-list --max-parents=0 HEAD` for the root commit hash,
/// which is O(1) with commit-graph. Uses `gix::discover` for robust repo resolution.
/// Returns `git_<first 16 hex chars of root commit hash>`.
pub fn compute_git_pid(dir: &Path) -> Result<Pid, PidError> {
    let repo = gix::discover(dir)?;
    let git_dir = repo.git_dir().to_owned();

    let output = Command::new("git")
        .args(["rev-list", "--max-parents=0", "HEAD"]) // TODO situ: check if there is a better way to do this, using gix directly
        .env("GIT_DIR", &git_dir)
        .current_dir(dir)
        .output()
        .map_err(|e| PidError::GitCommand(e.to_string()))?;

    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        if stderr.contains("unknown revision") || stderr.contains("bad default revision") {
            return Err(PidError::UnbornHead);
        }
        return Err(PidError::GitCommand(stderr.into_owned()));
    }

    let stdout = String::from_utf8_lossy(&output.stdout);
    // Multiple roots possible; last line is the chronologically earliest.
    let hash = stdout.lines().last().unwrap_or("").trim();
    if hash.is_empty() {
        return Err(PidError::UnbornHead);
    }

    let short = &hash[..GIT_HASH_SHORT_LEN.min(hash.len())];
    Ok(Pid::Git(format!("git_{short}")))
}

/// Compute a path-based PID from SHA256 of the absolute path.
///
/// Returns `path_<first 7 hex chars of SHA256>`.
pub fn compute_path_pid(dir: &Path) -> Result<Pid, PidError> {
    if !dir.is_absolute() {
        return Err(PidError::RelativePath(dir.display().to_string()));
    }

    let path_str = dir.to_string_lossy();
    let mut hasher = Sha256::new();
    hasher.update(path_str.as_bytes());
    let result = hasher.finalize();
    let hex_str = hex::encode(result);
    let short = &hex_str[..PATH_HASH_SHORT_LEN.min(hex_str.len())];
    Ok(Pid::Path(format!("path_{short}")))
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::path::PathBuf;

    #[test]
    fn git_pid_format() {
        // Run from the repo itself — should produce a git_ PID
        let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
        let pid = compute(&dir).unwrap();
        match &pid {
            Pid::Git(s) => {
                assert!(s.starts_with("git_"));
                // "git_" (4 chars) + 16 hex chars = 20
                assert_eq!(s.len(), 4 + GIT_HASH_SHORT_LEN);
            }
            _ => panic!("expected Git PID, got {:?}", pid),
        }
    }

    #[test]
    fn git_pid_is_stable() {
        let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
        let a = compute_git_pid(&dir).unwrap();
        let b = compute_git_pid(&dir).unwrap();
        assert_eq!(a, b);
    }

    #[test]
    fn path_pid_format() {
        let pid = compute_path_pid(Path::new("/tmp/some-non-git-dir")).unwrap(); // TODO situ use temp dir generator crate
        match &pid {
            Pid::Path(s) => {
                assert!(s.starts_with("path_"));
                assert_eq!(s.len(), 5 + PATH_HASH_SHORT_LEN);
            }
            _ => panic!("expected Path PID, got {:?}", pid),
        }
    }

    #[test]
    fn path_pid_is_stable() {
        let a = compute_path_pid(Path::new("/foo/bar")).unwrap();
        let b = compute_path_pid(Path::new("/foo/bar")).unwrap();
        assert_eq!(a, b);
    }

    #[test]
    fn path_pid_differs_for_different_paths() {
        let a = compute_path_pid(Path::new("/foo/bar")).unwrap();
        let b = compute_path_pid(Path::new("/foo/baz")).unwrap();
        assert_ne!(a, b);
    }

    #[test]
    fn path_pid_rejects_relative() {
        let result = compute_path_pid(Path::new("relative/path"));
        assert!(result.is_err());
    }

    #[test]
    fn parse_git_pid() {
        let pid = Pid::parse("git_abc123").unwrap();
        assert_eq!(pid, Pid::Git("git_abc123".into()));
    }

    #[test]
    fn parse_path_pid() {
        let pid = Pid::parse("path_def456").unwrap();
        assert_eq!(pid, Pid::Path("path_def456".into()));
    }

    #[test]
    fn parse_rejects_invalid() {
        assert!(Pid::parse("").is_none());
        assert!(Pid::parse("unknown_abc").is_none());
        assert!(Pid::parse("git_").is_none());
        assert!(Pid::parse("path_").is_none());
    }

    #[test]
    fn serde_round_trip() {
        let pid = Pid::Git("git_abc123def456ab".into());
        let json = serde_json::to_string(&pid).unwrap();
        assert_eq!(json, "\"git_abc123def456ab\"");
        let back: Pid = serde_json::from_str(&json).unwrap();
        assert_eq!(back, pid);
    }
}