agent-source-repository 0.1.0

Agent Source Repository local context registry for coding agents
Documentation
use std::path::{Path, PathBuf};
use std::process::{Command, Output};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};

use anyhow::{bail, Context, Result};
use once_cell::sync::Lazy;
use regex::Regex;

static SCP_GIT_URL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[\w.-]+@[\w.-]+:").unwrap());

const GIT_URL_SCHEMES: &[&str] = &[
    "https://",
    "http://",
    "ssh://",
    "git://",
    "git+ssh://",
    "file://",
];

const GIT_CLONE_TIMEOUT: Duration = Duration::from_secs(120);
const GIT_POLL_INTERVAL: Duration = Duration::from_millis(100);

pub(crate) fn is_git_url(path: &str) -> bool {
    if GIT_URL_SCHEMES.iter().any(|s| path.starts_with(s)) {
        return true;
    }
    if let Some(mat) = SCP_GIT_URL_RE.find(path) {
        let rest = &path[mat.end()..];
        return !rest.starts_with('/');
    }
    false
}

pub(crate) struct SourceTree {
    root: PathBuf,
    temp_dir: Option<PathBuf>,
}

impl SourceTree {
    pub(crate) fn from_source(source: &str, ref_: Option<&str>) -> Result<Self> {
        if is_git_url(source) {
            Self::from_git(source, ref_)
        } else {
            Self::from_path(source)
        }
    }

    pub(crate) fn from_path(path: impl AsRef<Path>) -> Result<Self> {
        let path = path.as_ref();
        if !path.exists() {
            bail!("Path does not exist: {}", path.display());
        }
        if !path.is_dir() {
            bail!("Path is not a directory: {}", path.display());
        }

        Ok(Self {
            root: path.canonicalize().context("Failed to resolve path")?,
            temp_dir: None,
        })
    }

    pub(crate) fn from_git(url: &str, ref_: Option<&str>) -> Result<Self> {
        let unique = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .context("system time is before UNIX epoch")?
            .as_nanos();
        let tmp_dir =
            std::env::temp_dir().join(format!("asr-clone-{}-{unique}", std::process::id()));
        let stderr_path = tmp_dir.with_extension("git-stderr");
        std::fs::create_dir_all(&tmp_dir)?;

        let mut cmd = Command::new("git");
        cmd.args(["clone", "--depth", "1"]);
        if let Some(r) = ref_ {
            cmd.args(["--branch", r]);
        }
        cmd.args(["--", url, &tmp_dir.to_string_lossy()]);
        cmd.stdin(std::process::Stdio::null());
        cmd.stdout(std::process::Stdio::null());
        cmd.stderr(std::process::Stdio::from(
            std::fs::File::create(&stderr_path).context("failed to create git stderr log")?,
        ));

        let output =
            output_with_timeout(&mut cmd, GIT_CLONE_TIMEOUT, &stderr_path).inspect_err(|_| {
                let _ = std::fs::remove_dir_all(&tmp_dir);
                let _ = std::fs::remove_file(&stderr_path);
            })?;
        let _ = std::fs::remove_file(&stderr_path);

        if !output.status.success() {
            let stderr = String::from_utf8_lossy(&output.stderr);
            let _ = std::fs::remove_dir_all(&tmp_dir);
            bail!("git clone failed for {url:?}:\n{}", stderr.trim());
        }

        Ok(Self {
            root: tmp_dir.canonicalize().unwrap_or_else(|_| tmp_dir.clone()),
            temp_dir: Some(tmp_dir),
        })
    }

    pub(crate) fn root(&self) -> &Path {
        &self.root
    }

    pub(crate) fn is_temporary(&self) -> bool {
        self.temp_dir.is_some()
    }
}

fn output_with_timeout(cmd: &mut Command, timeout: Duration, stderr_path: &Path) -> Result<Output> {
    let mut child = cmd
        .spawn()
        .map_err(|e| anyhow::anyhow!("git is not installed or not on PATH: {e}"))?;
    let started = Instant::now();

    loop {
        if let Some(status) = child
            .try_wait()
            .context("failed while waiting for git clone")?
        {
            let stderr = std::fs::read(stderr_path).unwrap_or_default();
            return Ok(Output {
                status,
                stdout: Vec::new(),
                stderr,
            });
        }

        if started.elapsed() >= timeout {
            let _ = child.kill();
            let _ = child.wait();
            let stderr_hint = std::fs::read_to_string(stderr_path)
                .ok()
                .filter(|s| !s.trim().is_empty())
                .map(|s| format!("\nstderr: {}", s.trim()))
                .unwrap_or_default();
            bail!(
                "git clone timed out after {} seconds{}",
                timeout.as_secs(),
                stderr_hint
            );
        }

        std::thread::sleep(GIT_POLL_INTERVAL);
    }
}

impl Drop for SourceTree {
    fn drop(&mut self) {
        if let Some(temp_dir) = &self.temp_dir {
            let _ = std::fs::remove_dir_all(temp_dir);
        }
    }
}

#[cfg(test)]
mod tests {
    use super::output_with_timeout;
    use std::process::Command;
    use std::time::{Duration, SystemTime, UNIX_EPOCH};

    fn stderr_path(name: &str) -> std::path::PathBuf {
        let unique = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .expect("system time should be after unix epoch")
            .as_nanos();
        std::env::temp_dir().join(format!("asr-source-tree-{name}-{unique}.stderr"))
    }

    #[test]
    fn command_timeout_kills_slow_process() {
        let path = stderr_path("timeout");
        let stderr = std::fs::File::create(&path).expect("stderr file should be created");
        let mut cmd = Command::new("sleep");
        cmd.arg("2")
            .stdin(std::process::Stdio::null())
            .stdout(std::process::Stdio::null())
            .stderr(std::process::Stdio::from(stderr));

        let err = output_with_timeout(&mut cmd, Duration::from_millis(10), &path)
            .expect_err("slow command should time out");
        let _ = std::fs::remove_file(path);

        let message = err.to_string();
        assert!(
            message.contains("timed out"),
            "timeout error should be explicit: {message}"
        );
    }
}