Skip to main content

stormchaser_engine/
git_cache.rs

1use anyhow::{Context, Result};
2use git2::{
3    build::{CheckoutBuilder, RepoBuilder},
4    Repository,
5};
6use sha2::{Digest, Sha256};
7use std::path::{Path, PathBuf};
8use tracing::{debug, info};
9
10/// Gitcache.
11pub struct GitCache {
12    base_dir: PathBuf,
13}
14
15impl GitCache {
16    /// New.
17    pub fn new<P: AsRef<Path>>(base_dir: P) -> Self {
18        Self {
19            base_dir: base_dir.as_ref().to_path_buf(),
20        }
21    }
22
23    /// Returns the target directory for a repo/rev pair.
24    fn get_target_dir(&self, repo_url: &str, rev: &str) -> PathBuf {
25        let repo_hash = self.hash_url(repo_url);
26        let rev_hash = self.hash_rev(rev);
27        self.base_dir
28            .join("checkouts")
29            .join(&repo_hash)
30            .join(&rev_hash)
31    }
32
33    /// Initializes a repository cache entry if it doesn't exist.
34    /// Uses metadata-only clone (no initial checkout).
35    pub fn init_repo(&self, repo_url: &str, rev: &str) -> Result<PathBuf> {
36        let target_dir = self.get_target_dir(repo_url, rev);
37
38        if target_dir.exists() {
39            return Ok(target_dir);
40        }
41
42        info!(
43            "Initializing metadata-only cache for {} at rev {} in {:?}",
44            repo_url, rev, target_dir
45        );
46
47        std::fs::create_dir_all(&target_dir)?;
48
49        // Clone WITHOUT checkout
50        let fetch_opts = git2::FetchOptions::new();
51        // We could add depth(1) here but it can be problematic with specific refs/shas
52
53        let repo = RepoBuilder::new()
54            .fetch_options(fetch_opts)
55            .with_checkout(CheckoutBuilder::new()) // Empty checkout
56            .clone(repo_url, &target_dir)
57            .with_context(|| format!("Failed to clone metadata for {}", repo_url))?;
58
59        // Resolve and set HEAD to the revision
60        self.checkout_revision_metadata(&repo, rev)?;
61
62        Ok(target_dir)
63    }
64
65    /// Ensures specific files/folders are present in the working directory.
66    /// This can be called multiple times to add more files to the same cache entry.
67    pub fn ensure_files(&self, repo_url: &str, rev: &str, paths: &[String]) -> Result<PathBuf> {
68        let target_dir = self.init_repo(repo_url, rev)?;
69        let repo = Repository::open(&target_dir)?;
70
71        if paths.is_empty() {
72            return Ok(target_dir);
73        }
74
75        debug!("Ensuring paths {:?} are present in {:?}", paths, target_dir);
76
77        let mut cb = CheckoutBuilder::new();
78        cb.force(); // Overwrite if exists
79
80        for path in paths {
81            cb.path(path);
82        }
83
84        repo.checkout_head(Some(&mut cb))
85            .with_context(|| format!("Failed to checkout paths {:?} for rev {}", paths, rev))?;
86
87        Ok(target_dir)
88    }
89
90    /// Legacy method updated to use the new implementation
91    #[allow(dead_code)]
92    pub fn get_repo(&self, repo_url: &str, rev: &str) -> Result<PathBuf> {
93        // Full checkout for the legacy get_repo
94        // We achieve this by NOT specifying paths, but by default git2 checkout_head
95        // without pathspecs checkouts everything.
96        let target_dir = self.init_repo(repo_url, rev)?;
97        let repo = Repository::open(&target_dir)?;
98
99        let mut cb = CheckoutBuilder::new();
100        cb.force();
101        repo.checkout_head(Some(&mut cb))?;
102
103        Ok(target_dir)
104    }
105
106    fn hash_url(&self, url: &str) -> String {
107        let mut hasher = Sha256::new();
108        hasher.update(url.as_bytes());
109        hex::encode(hasher.finalize())[..16].to_string()
110    }
111
112    fn hash_rev(&self, rev: &str) -> String {
113        let mut hasher = Sha256::new();
114        hasher.update(rev.as_bytes());
115        hex::encode(hasher.finalize())[..16].to_string()
116    }
117
118    fn checkout_revision_metadata(&self, repo: &Repository, rev: &str) -> Result<()> {
119        let (object, reference) = repo
120            .revparse_ext(rev)
121            .with_context(|| format!("Failed to find revision {}", rev))?;
122
123        // Note: we DON'T call checkout_tree here if we want metadata only.
124        // We just set HEAD.
125
126        match reference {
127            Some(ref r) if r.is_branch() => repo.set_head(r.name().unwrap()),
128            _ => repo.set_head_detached(object.id()),
129        }
130        .with_context(|| format!("Failed to set HEAD to revision {}", rev))?;
131
132        Ok(())
133    }
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use tempfile::tempdir;
140
141    #[test]
142    fn test_git_cache_path_generation() {
143        let tmp = tempdir().unwrap();
144        let cache = GitCache::new(tmp.path());
145
146        let url = "https://github.com/example/repo.git";
147        let rev = "main";
148
149        let _path = cache.get_target_dir(url, rev);
150
151        assert_eq!(cache.hash_url(url).len(), 16);
152        assert_eq!(cache.hash_rev(rev).len(), 16);
153    }
154}