1use super::types::yaml_value_to_string;
8use anyhow::{Context, Result};
9use std::collections::HashMap;
10use std::io::Read;
11use std::path::Path;
12
13pub fn hash_file(path: &Path) -> Result<String> {
15 let mut file = std::fs::File::open(path)
16 .with_context(|| format!("failed to open file for hashing: {}", path.display()))?;
17 let mut hasher = blake3::Hasher::new();
18 let mut buf = [0u8; 65536];
19 loop {
20 let n = file
21 .read(&mut buf)
22 .with_context(|| format!("failed to read file: {}", path.display()))?;
23 if n == 0 {
24 break;
25 }
26 hasher.update(&buf[..n]);
27 }
28 let hash = hasher.finalize();
29 Ok(format!("blake3:{}", hash.to_hex()))
30}
31
32#[derive(Debug, Clone)]
34pub struct DirHashResult {
35 pub hash: String,
36 pub file_count: u64,
37 pub total_bytes: u64,
38}
39
40fn stream_into_hasher(path: &Path, hasher: &mut blake3::Hasher) -> Result<u64> {
42 let meta =
43 std::fs::metadata(path).with_context(|| format!("failed to stat: {}", path.display()))?;
44 let mut file =
45 std::fs::File::open(path).with_context(|| format!("failed to open: {}", path.display()))?;
46 let mut buf = [0u8; 65536];
47 loop {
48 let n = file.read(&mut buf)?;
49 if n == 0 {
50 break;
51 }
52 hasher.update(&buf[..n]);
53 }
54 Ok(meta.len())
55}
56
57pub fn hash_directory(path: &Path) -> Result<DirHashResult> {
59 if !path.is_dir() {
60 let mut hasher = blake3::Hasher::new();
61 let size = stream_into_hasher(path, &mut hasher)?;
62 let hash = hasher.finalize();
63 return Ok(DirHashResult {
64 hash: format!("blake3:{}", hash.to_hex()),
65 file_count: 1,
66 total_bytes: size,
67 });
68 }
69
70 let mut entries: Vec<std::path::PathBuf> = Vec::new();
71 collect_files_sorted(path, &mut entries)?;
72
73 let mut hasher = blake3::Hasher::new();
74 let mut file_count = 0u64;
75 let mut total_bytes = 0u64;
76
77 for entry in &entries {
78 let rel = entry.strip_prefix(path).unwrap_or(entry);
79 hasher.update(rel.to_string_lossy().as_bytes());
80 total_bytes += stream_into_hasher(entry, &mut hasher)?;
81 file_count += 1;
82 }
83
84 let hash = hasher.finalize();
85 Ok(DirHashResult { hash: format!("blake3:{}", hash.to_hex()), file_count, total_bytes })
86}
87
88fn collect_files_sorted(dir: &Path, out: &mut Vec<std::path::PathBuf>) -> Result<()> {
89 let mut entries: Vec<std::path::PathBuf> = Vec::new();
90
91 for entry in
92 std::fs::read_dir(dir).with_context(|| format!("failed to read dir: {}", dir.display()))?
93 {
94 let entry = entry?;
95 let ft = entry.file_type()?;
96 if ft.is_symlink() {
98 continue;
99 }
100 entries.push(entry.path());
101 }
102
103 entries.sort();
105
106 for entry in entries {
107 if entry.is_dir() {
108 collect_files_sorted(&entry, out)?;
109 } else {
110 out.push(entry);
111 }
112 }
113
114 Ok(())
115}
116
117pub fn hash_dep(path: &Path) -> Result<DirHashResult> {
119 hash_directory(path)
120}
121
122pub fn hash_params(
127 global_params: &HashMap<String, serde_yaml_ng::Value>,
128 referenced_keys: &[String],
129) -> Result<String> {
130 let mut pairs: Vec<(String, String)> = Vec::new();
131
132 for key in referenced_keys {
133 if let Some(val) = global_params.get(key) {
134 pairs.push((key.clone(), yaml_value_to_string(val)));
135 }
136 }
137
138 pairs.sort_by(|a, b| a.0.cmp(&b.0));
139
140 let mut hasher = blake3::Hasher::new();
141 for (k, v) in &pairs {
142 hasher.update(k.as_bytes());
143 hasher.update(b"=");
144 hasher.update(v.as_bytes());
145 hasher.update(b"\n");
146 }
147
148 let hash = hasher.finalize();
149 Ok(format!("blake3:{}", hash.to_hex()))
150}
151
152pub fn extract_param_refs(cmd: &str) -> Vec<String> {
154 let mut keys = Vec::new();
155 let mut pos = 0;
156
157 while pos < cmd.len() {
158 if cmd[pos..].starts_with("{{") {
159 let start = pos + 2;
160 if let Some(end_offset) = cmd[start..].find("}}") {
161 let ref_str = cmd[start..start + end_offset].trim();
162 if let Some(key) = ref_str.strip_prefix("params.") {
163 if !keys.contains(&key.to_string()) {
164 keys.push(key.to_string());
165 }
166 }
167 pos = start + end_offset + 2;
168 } else {
169 pos += 2;
170 }
171 } else {
172 let ch = cmd[pos..].chars().next().expect("iterator empty");
173 pos += ch.len_utf8();
174 }
175 }
176
177 keys
178}
179
180pub fn effective_param_keys(declared: &Option<Vec<String>>, cmd: &str) -> Vec<String> {
185 let mut keys = extract_param_refs(cmd);
186 if let Some(declared_keys) = declared {
187 for k in declared_keys {
188 if !keys.contains(k) {
189 keys.push(k.clone());
190 }
191 }
192 }
193 keys
194}
195
196pub fn hash_cmd(resolved_cmd: &str) -> String {
198 let hash = blake3::hash(resolved_cmd.as_bytes());
199 format!("blake3:{}", hash.to_hex())
200}
201
202pub fn compute_cache_key(cmd_hash: &str, deps_hash: &str, params_hash: &str) -> String {
206 let mut hasher = blake3::Hasher::new();
207 hasher.update(cmd_hash.as_bytes());
208 hasher.update(deps_hash.as_bytes());
209 hasher.update(params_hash.as_bytes());
210 let hash = hasher.finalize();
211 format!("blake3:{}", hash.to_hex())
212}
213
214pub fn combine_deps_hashes(hashes: &[String]) -> String {
216 let mut hasher = blake3::Hasher::new();
217 for h in hashes {
218 hasher.update(h.as_bytes());
219 }
220 let hash = hasher.finalize();
221 format!("blake3:{}", hash.to_hex())
222}
223
224#[cfg(test)]
225#[allow(non_snake_case)]
226mod tests {
227 use super::*;
228
229 fn make_params(pairs: &[(&str, &str)]) -> HashMap<String, serde_yaml_ng::Value> {
230 pairs
231 .iter()
232 .map(|(k, v)| (k.to_string(), serde_yaml_ng::Value::String(v.to_string())))
233 .collect()
234 }
235
236 #[test]
237 fn test_PB003_hash_file_deterministic() {
238 let dir = tempfile::tempdir().expect("tempdir creation failed");
239 let file = dir.path().join("test.txt");
240 std::fs::write(&file, b"hello world").expect("fs write failed");
241
242 let h1 = hash_file(&file).expect("unexpected failure");
243 let h2 = hash_file(&file).expect("unexpected failure");
244 assert_eq!(h1, h2);
245 assert!(h1.starts_with("blake3:"));
246 }
247
248 #[test]
249 fn test_PB003_hash_file_changes_with_content() {
250 let dir = tempfile::tempdir().expect("tempdir creation failed");
251 let file = dir.path().join("test.txt");
252
253 std::fs::write(&file, b"hello").expect("fs write failed");
254 let h1 = hash_file(&file).expect("unexpected failure");
255
256 std::fs::write(&file, b"world").expect("fs write failed");
257 let h2 = hash_file(&file).expect("unexpected failure");
258
259 assert_ne!(h1, h2);
260 }
261
262 #[test]
263 fn test_PB003_hash_directory_sorted_walk() {
264 let dir = tempfile::tempdir().expect("tempdir creation failed");
265 std::fs::write(dir.path().join("b.txt"), b"content-b").expect("fs write failed");
266 std::fs::write(dir.path().join("a.txt"), b"content-a").expect("fs write failed");
267
268 let r1 = hash_directory(dir.path()).expect("unexpected failure");
269 assert!(r1.hash.starts_with("blake3:"));
270 assert_eq!(r1.file_count, 2);
271 assert_eq!(r1.total_bytes, 18);
272
273 let r2 = hash_directory(dir.path()).expect("unexpected failure");
274 assert_eq!(r1.hash, r2.hash);
275 }
276
277 #[test]
278 fn test_PB003_hash_directory_single_file() {
279 let dir = tempfile::tempdir().expect("tempdir creation failed");
280 let file = dir.path().join("only.txt");
281 std::fs::write(&file, b"data").expect("fs write failed");
282
283 let result = hash_directory(&file).expect("unexpected failure");
284 assert_eq!(result.file_count, 1);
285 assert_eq!(result.total_bytes, 4);
286 }
287
288 #[test]
289 fn test_PB003_hash_params_sorted() {
290 let global = make_params(&[("b", "2"), ("a", "1")]);
291 let refs = vec!["a".to_string(), "b".to_string()];
292
293 let h1 = hash_params(&global, &refs).expect("unexpected failure");
294
295 let refs2 = vec!["b".to_string(), "a".to_string()];
297 let h2 = hash_params(&global, &refs2).expect("unexpected failure");
298
299 assert_eq!(h1, h2);
300 assert!(h1.starts_with("blake3:"));
301 }
302
303 #[test]
304 fn test_PB003_hash_cmd() {
305 let h1 = hash_cmd("echo hello");
306 let h2 = hash_cmd("echo hello");
307 let h3 = hash_cmd("echo world");
308
309 assert_eq!(h1, h2);
310 assert_ne!(h1, h3);
311 assert!(h1.starts_with("blake3:"));
312 }
313
314 #[test]
315 fn test_PB003_compute_cache_key() {
316 let key1 = compute_cache_key("blake3:aaa", "blake3:bbb", "blake3:ccc");
317 let key2 = compute_cache_key("blake3:aaa", "blake3:bbb", "blake3:ccc");
318 let key3 = compute_cache_key("blake3:xxx", "blake3:bbb", "blake3:ccc");
319
320 assert_eq!(key1, key2);
321 assert_ne!(key1, key3);
322 }
323
324 #[test]
325 fn test_PB003_extract_param_refs() {
326 let refs = extract_param_refs("run --model {{params.model}} --lang {{params.lang}} plain");
327 assert_eq!(refs, vec!["model", "lang"]);
328 }
329
330 #[test]
331 fn test_PB003_extract_param_refs_no_refs() {
332 let refs = extract_param_refs("echo hello world");
333 assert!(refs.is_empty());
334 }
335
336 #[test]
337 fn test_PB003_extract_param_refs_dedup() {
338 let refs = extract_param_refs("{{params.x}} and {{params.x}} again");
339 assert_eq!(refs, vec!["x"]);
340 }
341
342 #[test]
343 fn test_PB003_effective_param_keys() {
344 let keys = effective_param_keys(&None, "echo {{params.model}}");
346 assert_eq!(keys, vec!["model"]);
347
348 let declared = Some(vec!["chunk_size".to_string()]);
350 let keys = effective_param_keys(&declared, "echo {{params.model}}");
351 assert_eq!(keys, vec!["model", "chunk_size"]);
352 }
353
354 #[test]
355 fn test_PB003_combine_deps_hashes() {
356 let h1 = combine_deps_hashes(&["blake3:aaa".to_string(), "blake3:bbb".to_string()]);
357 let h2 = combine_deps_hashes(&["blake3:aaa".to_string(), "blake3:bbb".to_string()]);
358 assert_eq!(h1, h2);
359
360 let h3 = combine_deps_hashes(&["blake3:bbb".to_string(), "blake3:aaa".to_string()]);
362 assert_ne!(h1, h3);
363 }
364
365 #[test]
366 fn test_PB003_hash_file_missing() {
367 let err = hash_file(Path::new("/nonexistent/file.txt")).unwrap_err();
368 assert!(err.to_string().contains("failed to open"));
369 }
370
371 #[test]
372 fn test_PB003_hash_directory_nested() {
373 let dir = tempfile::tempdir().expect("tempdir creation failed");
374 std::fs::create_dir(dir.path().join("sub")).expect("unexpected failure");
375 std::fs::write(dir.path().join("top.txt"), b"top").expect("fs write failed");
376 std::fs::write(dir.path().join("sub").join("nested.txt"), b"nested")
377 .expect("fs write failed");
378
379 let result = hash_directory(dir.path()).expect("unexpected failure");
380 assert_eq!(result.file_count, 2);
381 assert_eq!(result.total_bytes, 9);
382 }
383
384 #[test]
385 fn test_PB003_extract_param_refs_unicode_safe() {
386 let refs = extract_param_refs("echo {{params.model}} — résumé {{params.lang}}");
387 assert_eq!(refs, vec!["model", "lang"]);
388 }
389}