polykit_core/remote_cache/
cache_key.rs

1//! Deterministic cache key generation for remote caching.
2
3use std::collections::BTreeMap;
4use std::path::PathBuf;
5
6use rustc_hash::FxHashMap;
7use serde::{Deserialize, Serialize};
8use sha2::{Digest, Sha256};
9
10use crate::error::{Error, Result};
11use crate::package::Language;
12
13/// Deterministic cache key for task execution results.
14///
15/// The cache key includes all inputs that affect task output:
16/// - Package identifier
17/// - Task name and command
18/// - Environment variables (explicit allowlist)
19/// - Input file hashes (tracked files only)
20/// - Dependency graph hash
21/// - Toolchain version
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
23pub struct CacheKey {
24    /// Package identifier (name + normalized path hash).
25    pub package_id: String,
26    /// Task name.
27    pub task_name: String,
28    /// Command string.
29    pub command: String,
30    /// Environment variables (sorted by key for determinism).
31    #[serde(serialize_with = "serialize_env_vars")]
32    #[serde(deserialize_with = "deserialize_env_vars")]
33    pub env_vars: BTreeMap<String, String>,
34    /// Input file hashes (relative path -> SHA-256 hash).
35    pub input_file_hashes: FxHashMap<PathBuf, String>,
36    /// Dependency graph hash (transitive dependencies).
37    pub dependency_graph_hash: String,
38    /// Toolchain version (e.g., "node-v20.0.0", "rustc-1.75.0").
39    pub toolchain_version: String,
40}
41
42fn serialize_env_vars<S>(
43    env_vars: &BTreeMap<String, String>,
44    serializer: S,
45) -> std::result::Result<S::Ok, S::Error>
46where
47    S: serde::Serializer,
48{
49    use serde::Serialize;
50    let vec: Vec<(&String, &String)> = env_vars.iter().collect();
51    vec.serialize(serializer)
52}
53
54fn deserialize_env_vars<'de, D>(
55    deserializer: D,
56) -> std::result::Result<BTreeMap<String, String>, D::Error>
57where
58    D: serde::Deserializer<'de>,
59{
60    use serde::Deserialize;
61    let vec: Vec<(String, String)> = Vec::deserialize(deserializer)?;
62    Ok(vec.into_iter().collect())
63}
64
65impl CacheKey {
66    /// Creates a new cache key builder.
67    pub fn builder() -> CacheKeyBuilder {
68        CacheKeyBuilder::new()
69    }
70
71    /// Computes the deterministic hash of this cache key.
72    ///
73    /// Returns a hex-encoded SHA-256 hash.
74    pub fn hash(&self) -> String {
75        let mut hasher = Sha256::new();
76
77        // Serialize key components deterministically
78        let serialized = bincode::serialize(self).unwrap_or_else(|_| {
79            // Fallback: manual serialization if bincode fails
80            format!(
81                "{}\0{}\0{}\0{:?}\0{:?}\0{}\0{}",
82                self.package_id,
83                self.task_name,
84                self.command,
85                self.env_vars,
86                self.input_file_hashes,
87                self.dependency_graph_hash,
88                self.toolchain_version
89            )
90            .into_bytes()
91        });
92
93        hasher.update(&serialized);
94        format!("{:x}", hasher.finalize())
95    }
96
97    /// Returns the cache key as a string identifier.
98    ///
99    /// This is the hash of the cache key, used for storage and retrieval.
100    pub fn as_string(&self) -> String {
101        self.hash()
102    }
103}
104
105/// Builder for constructing cache keys.
106pub struct CacheKeyBuilder {
107    package_id: Option<String>,
108    task_name: Option<String>,
109    command: Option<String>,
110    env_vars: BTreeMap<String, String>,
111    input_file_hashes: FxHashMap<PathBuf, String>,
112    dependency_graph_hash: Option<String>,
113    toolchain_version: Option<String>,
114}
115
116impl CacheKeyBuilder {
117    fn new() -> Self {
118        Self {
119            package_id: None,
120            task_name: None,
121            command: None,
122            env_vars: BTreeMap::new(),
123            input_file_hashes: FxHashMap::default(),
124            dependency_graph_hash: None,
125            toolchain_version: None,
126        }
127    }
128
129    /// Sets the package identifier.
130    pub fn package_id(mut self, package_id: impl Into<String>) -> Self {
131        self.package_id = Some(package_id.into());
132        self
133    }
134
135    /// Sets the task name.
136    pub fn task_name(mut self, task_name: impl Into<String>) -> Self {
137        self.task_name = Some(task_name.into());
138        self
139    }
140
141    /// Sets the command string.
142    pub fn command(mut self, command: impl Into<String>) -> Self {
143        self.command = Some(command.into());
144        self
145    }
146
147    /// Adds an environment variable to the cache key.
148    ///
149    /// Only explicitly allowed environment variables should be added.
150    pub fn env_var(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
151        self.env_vars.insert(key.into(), value.into());
152        self
153    }
154
155    /// Adds multiple environment variables.
156    pub fn env_vars(mut self, vars: BTreeMap<String, String>) -> Self {
157        self.env_vars.extend(vars);
158        self
159    }
160
161    /// Adds an input file hash.
162    pub fn input_file(mut self, path: PathBuf, hash: impl Into<String>) -> Self {
163        self.input_file_hashes.insert(path, hash.into());
164        self
165    }
166
167    /// Adds multiple input file hashes.
168    pub fn input_files(mut self, files: FxHashMap<PathBuf, String>) -> Self {
169        self.input_file_hashes.extend(files);
170        self
171    }
172
173    /// Sets the dependency graph hash.
174    pub fn dependency_graph_hash(mut self, hash: impl Into<String>) -> Self {
175        self.dependency_graph_hash = Some(hash.into());
176        self
177    }
178
179    /// Sets the toolchain version.
180    pub fn toolchain_version(mut self, version: impl Into<String>) -> Self {
181        self.toolchain_version = Some(version.into());
182        self
183    }
184
185    /// Builds the cache key.
186    ///
187    /// # Errors
188    ///
189    /// Returns an error if any required field is missing.
190    pub fn build(self) -> Result<CacheKey> {
191        Ok(CacheKey {
192            package_id: self.package_id.ok_or_else(|| Error::Adapter {
193                package: "cache-key".to_string(),
194                message: "package_id is required".to_string(),
195            })?,
196            task_name: self.task_name.ok_or_else(|| Error::Adapter {
197                package: "cache-key".to_string(),
198                message: "task_name is required".to_string(),
199            })?,
200            command: self.command.ok_or_else(|| Error::Adapter {
201                package: "cache-key".to_string(),
202                message: "command is required".to_string(),
203            })?,
204            env_vars: self.env_vars,
205            input_file_hashes: self.input_file_hashes,
206            dependency_graph_hash: self.dependency_graph_hash.ok_or_else(|| Error::Adapter {
207                package: "cache-key".to_string(),
208                message: "dependency_graph_hash is required".to_string(),
209            })?,
210            toolchain_version: self.toolchain_version.ok_or_else(|| Error::Adapter {
211                package: "cache-key".to_string(),
212                message: "toolchain_version is required".to_string(),
213            })?,
214        })
215    }
216}
217
218/// Detects the toolchain version for a given language.
219///
220/// Returns a version string like "node-v20.0.0" or "rustc-1.75.0".
221pub fn detect_toolchain_version(language: Language) -> Result<String> {
222    use std::process::Command;
223
224    let (command, version_flag) = match language {
225        Language::Js | Language::Ts => ("node", "--version"),
226        Language::Rust => ("rustc", "--version"),
227        Language::Go => ("go", "version"),
228        Language::Python => ("python3", "--version"),
229    };
230
231    let output = Command::new(command)
232        .arg(version_flag)
233        .output()
234        .map_err(|e| Error::Adapter {
235            package: "toolchain-detection".to_string(),
236            message: format!("Failed to detect {} version: {}", command, e),
237        })?;
238
239    if !output.status.success() {
240        return Err(Error::Adapter {
241            package: "toolchain-detection".to_string(),
242            message: format!("Failed to get {} version", command),
243        });
244    }
245
246    let version_str = String::from_utf8_lossy(&output.stdout);
247    let version = version_str
248        .lines()
249        .next()
250        .unwrap_or("unknown")
251        .trim()
252        .to_string();
253
254    Ok(format!("{}-{}", command, version))
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    #[test]
262    fn test_cache_key_determinism() {
263        let key1 = CacheKey::builder()
264            .package_id("test-package")
265            .task_name("build")
266            .command("echo hello")
267            .dependency_graph_hash("abc123")
268            .toolchain_version("node-v20.0.0")
269            .build()
270            .unwrap();
271
272        let key2 = CacheKey::builder()
273            .package_id("test-package")
274            .task_name("build")
275            .command("echo hello")
276            .dependency_graph_hash("abc123")
277            .toolchain_version("node-v20.0.0")
278            .build()
279            .unwrap();
280
281        assert_eq!(key1.hash(), key2.hash());
282    }
283
284    #[test]
285    fn test_cache_key_env_vars_order() {
286        let mut env1 = BTreeMap::new();
287        env1.insert("VAR1".to_string(), "value1".to_string());
288        env1.insert("VAR2".to_string(), "value2".to_string());
289
290        let mut env2 = BTreeMap::new();
291        env2.insert("VAR2".to_string(), "value2".to_string());
292        env2.insert("VAR1".to_string(), "value1".to_string());
293
294        let key1 = CacheKey::builder()
295            .package_id("test")
296            .task_name("build")
297            .command("echo")
298            .env_vars(env1)
299            .dependency_graph_hash("abc")
300            .toolchain_version("node-v20")
301            .build()
302            .unwrap();
303
304        let key2 = CacheKey::builder()
305            .package_id("test")
306            .task_name("build")
307            .command("echo")
308            .env_vars(env2)
309            .dependency_graph_hash("abc")
310            .toolchain_version("node-v20")
311            .build()
312            .unwrap();
313
314        // BTreeMap ensures order, so hashes should be equal
315        assert_eq!(key1.hash(), key2.hash());
316    }
317
318    #[test]
319    fn test_cache_key_different_commands() {
320        let key1 = CacheKey::builder()
321            .package_id("test")
322            .task_name("build")
323            .command("echo hello")
324            .dependency_graph_hash("abc")
325            .toolchain_version("node-v20")
326            .build()
327            .unwrap();
328
329        let key2 = CacheKey::builder()
330            .package_id("test")
331            .task_name("build")
332            .command("echo world")
333            .dependency_graph_hash("abc")
334            .toolchain_version("node-v20")
335            .build()
336            .unwrap();
337
338        assert_ne!(key1.hash(), key2.hash());
339    }
340}