polykit_core/remote_cache/
mod.rs

1//! Remote caching system for shared cache across CI/CD and team members.
2
3mod artifact;
4mod backend;
5mod cache_key;
6mod config;
7mod filesystem;
8mod http;
9mod integrity;
10
11pub use artifact::Artifact;
12pub use backend::{BackendError, RemoteCacheBackend};
13pub use cache_key::{detect_toolchain_version, CacheKey, CacheKeyBuilder};
14pub use config::RemoteCacheConfig;
15pub use filesystem::FilesystemBackend;
16pub use http::HttpBackend;
17pub use integrity::ArtifactVerifier;
18
19use crate::error::Result;
20use crate::graph::DependencyGraph;
21use crate::package::Package;
22
23/// Remote cache orchestrator.
24///
25/// Handles cache operations and integrates with task execution.
26pub struct RemoteCache {
27    backend: Box<dyn RemoteCacheBackend>,
28    config: RemoteCacheConfig,
29}
30
31impl RemoteCache {
32    /// Creates a new remote cache with the given backend and configuration.
33    pub fn new(backend: Box<dyn RemoteCacheBackend>, config: RemoteCacheConfig) -> Self {
34        Self { backend, config }
35    }
36
37    /// Creates a remote cache from configuration.
38    ///
39    /// Automatically selects the appropriate backend based on the URL.
40    ///
41    /// # Errors
42    ///
43    /// Returns an error if backend creation fails.
44    pub fn from_config(config: RemoteCacheConfig) -> Result<Self> {
45        let backend: Box<dyn RemoteCacheBackend> = if config.is_http() {
46            Box::new(HttpBackend::new(&config)?)
47        } else {
48            Box::new(FilesystemBackend::new(&config.url)?)
49        };
50
51        Ok(Self::new(backend, config))
52    }
53
54    /// Creates a disabled remote cache (no-op).
55    pub fn disabled() -> Self {
56        Self {
57            backend: Box::new(DisabledBackend),
58            config: RemoteCacheConfig::default(),
59        }
60    }
61
62    /// Checks if remote cache is enabled.
63    pub fn is_enabled(&self) -> bool {
64        !self.config.url.is_empty()
65    }
66
67    /// Fetches an artifact from the remote cache.
68    ///
69    /// # Arguments
70    ///
71    /// * `key` - The cache key to fetch
72    ///
73    /// # Returns
74    ///
75    /// Returns `Some(artifact)` if found, `None` if not found.
76    ///
77    /// # Errors
78    ///
79    /// Returns an error only for unexpected failures. Cache misses return `Ok(None)`.
80    pub async fn fetch_artifact(&self, key: &CacheKey) -> Result<Option<Artifact>> {
81        if !self.is_enabled() {
82            return Ok(None);
83        }
84
85        self.backend.fetch_artifact(key).await
86    }
87
88    /// Uploads an artifact to the remote cache.
89    ///
90    /// # Arguments
91    ///
92    /// * `key` - The cache key for this artifact
93    /// * `artifact` - The artifact to upload
94    ///
95    /// # Errors
96    ///
97    /// Returns an error if upload fails. Errors are non-fatal.
98    pub async fn upload_artifact(&self, key: &CacheKey, artifact: &Artifact) -> Result<()> {
99        if !self.is_enabled() || self.config.read_only {
100            return Ok(());
101        }
102
103        self.backend.upload_artifact(key, artifact).await
104    }
105
106    /// Checks if an artifact exists in the remote cache.
107    ///
108    /// # Arguments
109    ///
110    /// * `key` - The cache key to check
111    ///
112    /// # Returns
113    ///
114    /// Returns `true` if the artifact exists, `false` otherwise.
115    ///
116    /// # Errors
117    ///
118    /// Returns an error only for unexpected failures.
119    pub async fn has_artifact(&self, key: &CacheKey) -> Result<bool> {
120        if !self.is_enabled() {
121            return Ok(false);
122        }
123
124        self.backend.has_artifact(key).await
125    }
126
127    /// Builds a cache key for a task execution.
128    ///
129    /// # Arguments
130    ///
131    /// * `package` - The package being executed
132    /// * `task_name` - The task name
133    /// * `command` - The command string
134    /// * `graph` - The dependency graph
135    /// * `package_path` - Path to the package directory
136    ///
137    /// # Errors
138    ///
139    /// Returns an error if cache key construction fails.
140    pub async fn build_cache_key(
141        &self,
142        package: &Package,
143        task_name: &str,
144        command: &str,
145        graph: &DependencyGraph,
146        package_path: &std::path::Path,
147    ) -> Result<CacheKey> {
148        use std::collections::BTreeMap;
149        use std::env;
150        use std::path::PathBuf;
151        use sha2::{Digest, Sha256};
152        use walkdir::WalkDir;
153        use rayon::prelude::*;
154
155        // Build dependency graph hash
156        let deps = graph.dependencies(&package.name).unwrap_or_default();
157        let mut dep_hash_input = format!("{}:{}", package.name, task_name);
158        for dep in &deps {
159            dep_hash_input.push_str(&format!(":{}", dep));
160        }
161        let mut dep_hasher = Sha256::new();
162        dep_hasher.update(dep_hash_input.as_bytes());
163        let dependency_graph_hash = format!("{:x}", dep_hasher.finalize());
164
165        // Collect environment variables (only from allowlist)
166        let mut env_vars = BTreeMap::new();
167        for var_name in &self.config.env_vars {
168            if let Ok(value) = env::var(var_name) {
169                env_vars.insert(var_name.clone(), value);
170            }
171        }
172
173        // Collect input file hashes in parallel using BLAKE3
174        let mut input_file_hashes = rustc_hash::FxHashMap::default();
175        if !self.config.input_files.is_empty() {
176            let files_to_hash: Vec<PathBuf> = self.config.input_files
177                .par_iter()
178                .flat_map(|pattern| {
179                    let pattern_path = package_path.join(pattern);
180                    if pattern_path.exists() {
181                        if pattern_path.is_file() {
182                            vec![pattern_path]
183                        } else if pattern_path.is_dir() {
184                            WalkDir::new(&pattern_path)
185                                .into_iter()
186                                .filter_map(|e| e.ok())
187                                .filter(|e| e.file_type().is_file())
188                                .map(|e| e.path().to_path_buf())
189                                .collect()
190                        } else {
191                            Vec::new()
192                        }
193                    } else {
194                        Vec::new()
195                    }
196                })
197                .collect();
198
199            let hashed_files: Vec<(PathBuf, String)> = files_to_hash
200                .into_par_iter()
201                .filter_map(|file_path| {
202                    let hash = Self::hash_file_fast(&file_path).ok()?;
203                    let relative = file_path
204                        .strip_prefix(package_path)
205                        .unwrap_or(&file_path)
206                        .to_path_buf();
207                    Some((relative, hash))
208                })
209                .collect();
210
211            for (path, hash) in hashed_files {
212                input_file_hashes.insert(path, hash);
213            }
214        }
215
216        // Detect toolchain version
217        let toolchain_version = detect_toolchain_version(package.language)?;
218
219        // Build package ID (name + path hash)
220        let package_path_str = package_path.to_string_lossy();
221        let mut package_hasher = Sha256::new();
222        package_hasher.update(package_path_str.as_bytes());
223        let package_path_hash = format!("{:x}", package_hasher.finalize())[..8].to_string();
224        let package_id = format!("{}-{}", package.name, package_path_hash);
225
226        CacheKey::builder()
227            .package_id(package_id)
228            .task_name(task_name.to_string())
229            .command(command.to_string())
230            .env_vars(env_vars)
231            .input_files(input_file_hashes)
232            .dependency_graph_hash(dependency_graph_hash)
233            .toolchain_version(toolchain_version)
234            .build()
235    }
236
237    /// Returns the configuration.
238    pub fn config(&self) -> &RemoteCacheConfig {
239        &self.config
240    }
241
242    fn hash_file_fast(path: &std::path::Path) -> Result<String> {
243        use std::fs::File;
244        use std::io::{BufReader, Read};
245        use blake3::Hasher;
246
247        let file = File::open(path).map_err(|e| crate::error::Error::Adapter {
248            package: "remote-cache".to_string(),
249            message: format!("Failed to open file for hashing: {}", e),
250        })?;
251        let mut reader = BufReader::with_capacity(64 * 1024, file);
252        let mut hasher = Hasher::new();
253        let mut buffer = vec![0u8; 64 * 1024];
254
255        loop {
256            let bytes_read = reader.read(&mut buffer).map_err(|e| crate::error::Error::Adapter {
257                package: "remote-cache".to_string(),
258                message: format!("Failed to read file for hashing: {}", e),
259            })?;
260            if bytes_read == 0 {
261                break;
262            }
263            hasher.update(&buffer[..bytes_read]);
264        }
265
266        Ok(hasher.finalize().to_hex().to_string())
267    }
268}
269
270/// Disabled backend that does nothing.
271struct DisabledBackend;
272
273#[async_trait::async_trait]
274impl RemoteCacheBackend for DisabledBackend {
275    async fn upload_artifact(&self, _key: &CacheKey, _artifact: &Artifact) -> Result<()> {
276        Ok(())
277    }
278
279    async fn fetch_artifact(&self, _key: &CacheKey) -> Result<Option<Artifact>> {
280        Ok(None)
281    }
282
283    async fn has_artifact(&self, _key: &CacheKey) -> Result<bool> {
284        Ok(false)
285    }
286}