agpm_cli/lockfile/checksum.rs
1//! Checksum computation and verification for lockfile integrity.
2//!
3//! This module provides SHA-256 checksum operations for verifying file integrity,
4//! detecting corruption, and ensuring reproducible installations.
5
6use anyhow::{Context, Result};
7use std::fs;
8use std::path::Path;
9
10use super::{LockFile, ResourceId};
11
12impl LockFile {
13 /// Compute SHA-256 checksum for file integrity verification.
14 ///
15 /// Detects corruption, tampering, or changes after installation.
16 ///
17 /// # Arguments
18 ///
19 /// * `path` - Path to the file to checksum
20 ///
21 /// # Returns
22 ///
23 /// * `Ok(String)` - Checksum in format "`sha256:hexadecimal_hash`"
24 /// * `Err(anyhow::Error)` - File read error with detailed context
25 ///
26 /// # Checksum Format
27 ///
28 /// The returned checksum follows the format:
29 /// - **Algorithm prefix**: "sha256:"
30 /// - **Hash encoding**: Lowercase hexadecimal
31 /// - **Length**: 71 characters total (7 for prefix + 64 hex digits)
32 ///
33 /// # Examples
34 ///
35 /// ```rust,no_run
36 /// use std::path::Path;
37 /// use agpm_cli::lockfile::LockFile;
38 ///
39 /// # fn example() -> anyhow::Result<()> {
40 /// let checksum = LockFile::compute_checksum(Path::new("example.md"))?;
41 /// println!("File checksum: {}", checksum);
42 /// // Output: "sha256:a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3"
43 /// # Ok(())
44 /// # }
45 /// ```
46 ///
47 /// # Error Handling
48 ///
49 /// Provides detailed error context for common issues:
50 /// - **File not found**: Suggests checking the path
51 /// - **Permission denied**: Suggests checking file permissions
52 /// - **IO errors**: Suggests checking disk health or file locks
53 ///
54 /// # Security Considerations
55 ///
56 /// - Uses SHA-256, a cryptographically secure hash function
57 /// - Suitable for integrity verification and tamper detection
58 /// - Consistent across platforms (Windows, macOS, Linux)
59 /// - Not affected by line ending differences (hashes actual bytes)
60 ///
61 /// # Performance
62 ///
63 /// The method reads the entire file into memory before hashing.
64 /// For very large files (>100MB), consider streaming implementations
65 /// in future versions.
66 pub fn compute_checksum(path: &Path) -> Result<String> {
67 use sha2::{Digest, Sha256};
68
69 let content = fs::read(path).with_context(|| {
70 format!(
71 "Cannot read file for checksum calculation: {}\n\n\
72 This error occurs when verifying file integrity.\n\
73 Check that the file exists and is readable.",
74 path.display()
75 )
76 })?;
77
78 let mut hasher = Sha256::new();
79 hasher.update(&content);
80 let result = hasher.finalize();
81
82 Ok(format!("sha256:{}", hex::encode(result)))
83 }
84
85 /// Compute SHA-256 checksum for a directory (skill resources).
86 ///
87 /// Calculates a combined checksum of all files in a directory by concatenating
88 /// their individual checksums in sorted order. This provides a deterministic
89 /// checksum that changes when any file in the directory changes.
90 ///
91 /// # Arguments
92 ///
93 /// * `path` - Path to the directory to checksum
94 ///
95 /// # Returns
96 ///
97 /// * `Ok(String)` - Combined checksum in format "`sha256:hexadecimal_hash`"
98 /// * `Err(anyhow::Error)` - Directory read or file hash error
99 ///
100 /// # Algorithm
101 ///
102 /// 1. Walk directory recursively (files only, not directories)
103 /// 2. Compute SHA-256 of each file
104 /// 3. Sort file paths for deterministic ordering
105 /// 4. Concatenate all checksums with file paths
106 /// 5. Compute final SHA-256 of the concatenated data
107 ///
108 /// # Examples
109 ///
110 /// ```rust,no_run
111 /// use std::path::Path;
112 /// use agpm_cli::lockfile::LockFile;
113 ///
114 /// # fn example() -> anyhow::Result<()> {
115 /// let checksum = LockFile::compute_directory_checksum(Path::new("my-skill"))?;
116 /// println!("Directory checksum: {}", checksum);
117 /// # Ok(())
118 /// # }
119 /// ```
120 pub fn compute_directory_checksum(path: &Path) -> Result<String> {
121 use sha2::{Digest, Sha256};
122 use walkdir::WalkDir;
123
124 let mut file_hashes: Vec<(String, String)> = Vec::new();
125
126 for entry in WalkDir::new(path).follow_links(false) {
127 let entry = entry.with_context(|| {
128 format!("Failed to read directory entry in: {}", path.display())
129 })?;
130
131 if entry.file_type().is_file() {
132 let file_path = entry.path();
133 // Use normalize_path_for_storage for cross-platform deterministic checksums
134 let relative_path = crate::utils::normalize_path_for_storage(
135 file_path.strip_prefix(path).unwrap_or(file_path),
136 );
137
138 let file_checksum = Self::compute_checksum(file_path)?;
139 file_hashes.push((relative_path, file_checksum));
140 }
141 }
142
143 // Sort by relative path for deterministic ordering
144 file_hashes.sort_by(|a, b| a.0.cmp(&b.0));
145
146 // Concatenate all checksums with their paths
147 let mut hasher = Sha256::new();
148 for (path, checksum) in &file_hashes {
149 hasher.update(format!("{}:{}\n", path, checksum).as_bytes());
150 }
151
152 let result = hasher.finalize();
153 Ok(format!("sha256:{}", hex::encode(result)))
154 }
155
156 /// Verify file matches expected checksum.
157 ///
158 /// Computes current checksum and compares with expected value.
159 ///
160 /// # Arguments
161 ///
162 /// * `path` - Path to the file to verify
163 /// * `expected` - Expected checksum in "sha256:hex" format
164 ///
165 /// # Returns
166 ///
167 /// * `Ok(true)` - File checksum matches expected value
168 /// * `Ok(false)` - File checksum does not match (corruption detected)
169 /// * `Err(anyhow::Error)` - File read error or checksum calculation failed
170 ///
171 /// # Examples
172 ///
173 /// ```rust,no_run
174 /// use std::path::Path;
175 /// use agpm_cli::lockfile::LockFile;
176 ///
177 /// # fn example() -> anyhow::Result<()> {
178 /// let expected = "sha256:a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3";
179 /// let is_valid = LockFile::verify_checksum(Path::new("example.md"), expected)?;
180 ///
181 /// if is_valid {
182 /// println!("File integrity verified");
183 /// } else {
184 /// println!("WARNING: File has been modified or corrupted!");
185 /// }
186 /// # Ok(())
187 /// # }
188 /// ```
189 ///
190 /// # Use Cases
191 ///
192 /// - **Installation verification**: Ensure copied files are intact
193 /// - **Periodic validation**: Detect file corruption over time
194 /// - **Security checks**: Detect unauthorized modifications
195 /// - **Troubleshooting**: Diagnose installation issues
196 ///
197 /// # Performance
198 ///
199 /// This method internally calls [`compute_checksum`](Self::compute_checksum),
200 /// so it has the same performance characteristics. For bulk verification
201 /// operations, consider caching computed checksums.
202 ///
203 /// # Security
204 ///
205 /// The comparison is performed using standard string equality, which is
206 /// not timing-attack resistant. Since checksums are not secrets, this
207 /// is acceptable for integrity verification purposes.
208 pub fn verify_checksum(path: &Path, expected: &str) -> Result<bool> {
209 let actual = Self::compute_checksum(path)?;
210 Ok(actual == expected)
211 }
212
213 /// Update checksum for resource identified by ResourceId.
214 ///
215 /// Used after installation to record actual file checksum. ResourceId ensures unique
216 /// identification via name, source, tool, and template_vars.
217 ///
218 /// # Arguments
219 ///
220 /// * `id` - The unique identifier for the resource
221 /// * `checksum` - The new SHA-256 checksum in "sha256:hex" format
222 ///
223 /// # Returns
224 ///
225 /// Returns `true` if the resource was found and updated, `false` otherwise.
226 ///
227 /// # Examples
228 ///
229 /// ```rust,no_run
230 /// # use agpm_cli::lockfile::{LockFile, LockedResourceBuilder, ResourceId};
231 /// # use agpm_cli::core::ResourceType;
232 /// # use agpm_cli::utils::compute_variant_inputs_hash;
233 /// # let mut lockfile = LockFile::default();
234 /// # // First add a resource to update
235 /// # let resource = LockedResourceBuilder::new(
236 /// # "my-agent".to_string(),
237 /// # "my-agent.md".to_string(),
238 /// # "".to_string(),
239 /// # "agents/my-agent.md".to_string(),
240 /// # ResourceType::Agent,
241 /// # )
242 /// # .tool(Some("claude-code".to_string()))
243 /// # .build();
244 /// # lockfile.add_typed_resource("my-agent".to_string(), resource, ResourceType::Agent);
245 /// let variant_hash = compute_variant_inputs_hash(&serde_json::json!({})).unwrap_or_default();
246 /// let id = ResourceId::new("my-agent", None::<String>, Some("claude-code"), ResourceType::Agent, variant_hash);
247 /// let updated = lockfile.update_resource_checksum(&id, "sha256:abcdef123456...");
248 /// assert!(updated);
249 /// ```
250 pub fn update_resource_checksum(&mut self, id: &ResourceId, checksum: &str) -> bool {
251 // Try each resource type until we find a match by comparing ResourceIds
252 for resource in &mut self.agents {
253 if resource.id() == *id {
254 resource.checksum = checksum.to_string();
255 return true;
256 }
257 }
258
259 for resource in &mut self.snippets {
260 if resource.id() == *id {
261 resource.checksum = checksum.to_string();
262 return true;
263 }
264 }
265
266 for resource in &mut self.commands {
267 if resource.id() == *id {
268 resource.checksum = checksum.to_string();
269 return true;
270 }
271 }
272
273 for resource in &mut self.scripts {
274 if resource.id() == *id {
275 resource.checksum = checksum.to_string();
276 return true;
277 }
278 }
279
280 for resource in &mut self.hooks {
281 if resource.id() == *id {
282 resource.checksum = checksum.to_string();
283 return true;
284 }
285 }
286
287 for resource in &mut self.mcp_servers {
288 if resource.id() == *id {
289 resource.checksum = checksum.to_string();
290 return true;
291 }
292 }
293
294 for resource in &mut self.skills {
295 if resource.id() == *id {
296 resource.checksum = checksum.to_string();
297 return true;
298 }
299 }
300
301 false
302 }
303
304 /// Update context checksum for resource by ResourceId.
305 ///
306 /// Stores the SHA-256 checksum of template rendering inputs (context) in the lockfile.
307 /// This is different from the file checksum which covers the final rendered content.
308 ///
309 /// # Arguments
310 ///
311 /// * `id` - The ResourceId identifying the resource to update
312 /// * `context_checksum` - The SHA-256 checksum of template context, or None for non-templated resources
313 ///
314 /// # Returns
315 ///
316 /// Returns `true` if the resource was found and updated, `false` otherwise.
317 ///
318 /// # Examples
319 ///
320 /// ```rust,ignore
321 /// let mut lockfile = LockFile::new();
322 /// let id = ResourceId::new("my-agent", None::<String>, Some("claude-code"), ResourceType::Agent, serde_json::json!({}));
323 /// let updated = lockfile.update_resource_context_checksum(&id, Some("sha256:context123456..."));
324 /// assert!(updated);
325 /// ```
326 pub fn update_resource_context_checksum(
327 &mut self,
328 id: &ResourceId,
329 context_checksum: &str,
330 ) -> bool {
331 // Try each resource type until we find a match by comparing ResourceIds
332 for resource in &mut self.agents {
333 if resource.id() == *id {
334 resource.context_checksum = Some(context_checksum.to_string());
335 return true;
336 }
337 }
338
339 for resource in &mut self.snippets {
340 if resource.id() == *id {
341 resource.context_checksum = Some(context_checksum.to_string());
342 return true;
343 }
344 }
345
346 for resource in &mut self.commands {
347 if resource.id() == *id {
348 resource.context_checksum = Some(context_checksum.to_string());
349 return true;
350 }
351 }
352
353 for resource in &mut self.scripts {
354 if resource.id() == *id {
355 resource.context_checksum = Some(context_checksum.to_string());
356 return true;
357 }
358 }
359
360 for resource in &mut self.hooks {
361 if resource.id() == *id {
362 resource.context_checksum = Some(context_checksum.to_string());
363 return true;
364 }
365 }
366
367 for resource in &mut self.mcp_servers {
368 if resource.id() == *id {
369 resource.context_checksum = Some(context_checksum.to_string());
370 return true;
371 }
372 }
373
374 for resource in &mut self.skills {
375 if resource.id() == *id {
376 resource.context_checksum = Some(context_checksum.to_string());
377 return true;
378 }
379 }
380
381 false
382 }
383
384 /// Update applied patches for resource by name.
385 ///
386 /// Stores project patches in main lockfile; private patches go to agpm.private.lock.
387 /// Takes `AppliedPatches` from installer.
388 ///
389 /// # Arguments
390 ///
391 /// * `name` - The name of the resource to update
392 /// * `applied_patches` - The patches that were applied (from `AppliedPatches` struct)
393 ///
394 /// # Returns
395 ///
396 /// Returns `true` if the resource was found and updated, `false` otherwise.
397 ///
398 /// # Examples
399 ///
400 /// ```no_run
401 /// # use agpm_cli::lockfile::LockFile;
402 /// # use agpm_cli::manifest::patches::AppliedPatches;
403 /// # use std::collections::HashMap;
404 /// # let mut lockfile = LockFile::new();
405 /// let mut applied = AppliedPatches::new();
406 /// applied.project.insert("model".to_string(), toml::Value::String("haiku".into()));
407 ///
408 /// let updated = lockfile.update_resource_applied_patches("my-agent", &applied);
409 /// assert!(updated);
410 /// ```
411 pub fn update_resource_applied_patches(
412 &mut self,
413 name: &str,
414 applied_patches: &crate::manifest::patches::AppliedPatches,
415 ) -> bool {
416 // Store ONLY project patches in the main lockfile (agpm.lock)
417 // Private patches are stored separately in agpm.private.lock
418 // This ensures the main lockfile is deterministic and safe to commit
419 let project_patches = applied_patches.project.clone();
420
421 // Try each resource type until we find a match
422 for resource in &mut self.agents {
423 if resource.name == name {
424 resource.applied_patches = project_patches;
425 return true;
426 }
427 }
428
429 for resource in &mut self.snippets {
430 if resource.name == name {
431 resource.applied_patches = project_patches;
432 return true;
433 }
434 }
435
436 for resource in &mut self.commands {
437 if resource.name == name {
438 resource.applied_patches = project_patches;
439 return true;
440 }
441 }
442
443 for resource in &mut self.scripts {
444 if resource.name == name {
445 resource.applied_patches = project_patches;
446 return true;
447 }
448 }
449
450 for resource in &mut self.hooks {
451 if resource.name == name {
452 resource.applied_patches = project_patches;
453 return true;
454 }
455 }
456
457 for resource in &mut self.mcp_servers {
458 if resource.name == name {
459 resource.applied_patches = project_patches;
460 return true;
461 }
462 }
463
464 for resource in &mut self.skills {
465 if resource.name == name {
466 resource.applied_patches = project_patches;
467 return true;
468 }
469 }
470
471 false
472 }
473
474 /// Apply installation results to the lockfile in batch.
475 ///
476 /// Updates the lockfile with checksums, context checksums, and applied patches
477 /// from the installation process. This consolidates three separate update operations
478 /// into one batch call, reducing code duplication between install and update commands.
479 ///
480 /// # Batch Processing Pattern
481 ///
482 /// This function processes three parallel vectors of installation results:
483 /// 1. **File checksums** - SHA-256 of rendered content (triggers reinstall if changed)
484 /// 2. **Context checksums** - SHA-256 of template inputs (audit/debug only)
485 /// 3. **Applied patches** - Tracks which project patches were applied to each resource
486 ///
487 /// The batch approach ensures all three updates are applied consistently and
488 /// atomically to the lockfile, avoiding partial state.
489 ///
490 /// # Arguments
491 ///
492 /// * `checksums` - File checksums for each installed resource (by ResourceId)
493 /// * `context_checksums` - Context checksums for template inputs (Optional)
494 /// * `applied_patches_list` - Patches that were applied to each resource
495 ///
496 /// # Implementation Details
497 ///
498 /// - Updates are applied by ResourceId to handle duplicate resource names correctly
499 /// - Context checksums are only applied if present (non-templated resources have None)
500 /// - Only project patches are stored; private patches go to `agpm.private.lock`
501 /// - Called by both `install` and `update` commands after parallel installation
502 ///
503 /// # Examples
504 ///
505 /// ```rust,no_run
506 /// # use agpm_cli::lockfile::{LockFile, ResourceId};
507 /// # use agpm_cli::manifest::patches::AppliedPatches;
508 /// # use agpm_cli::core::ResourceType;
509 /// let mut lockfile = LockFile::default();
510 ///
511 /// // Collect results from parallel installation
512 /// let checksums = vec![/* (ResourceId, checksum) pairs */];
513 /// let context_checksums = vec![/* (ResourceId, Option<checksum>) pairs */];
514 /// let applied_patches = vec![/* (ResourceId, AppliedPatches) pairs */];
515 /// let token_counts = vec![/* (ResourceId, Option<u64>) pairs */];
516 ///
517 /// // Apply all results in batch (replaces 3 separate loops)
518 /// lockfile.apply_installation_results(
519 /// checksums,
520 /// context_checksums,
521 /// applied_patches,
522 /// token_counts,
523 /// );
524 /// ```
525 ///
526 pub fn apply_installation_results(
527 &mut self,
528 checksums: Vec<(ResourceId, String)>,
529 context_checksums: Vec<(ResourceId, Option<String>)>,
530 applied_patches_list: Vec<(ResourceId, crate::manifest::patches::AppliedPatches)>,
531 token_counts: Vec<(ResourceId, Option<u64>)>,
532 ) {
533 // Update lockfile with checksums
534 for (id, checksum) in checksums {
535 self.update_resource_checksum(&id, &checksum);
536 }
537
538 // Update lockfile with context checksums
539 for (id, context_checksum) in context_checksums {
540 if let Some(checksum) = context_checksum {
541 self.update_resource_context_checksum(&id, &checksum);
542 }
543 }
544
545 // Update lockfile with applied patches
546 for (id, applied_patches) in applied_patches_list {
547 self.update_resource_applied_patches(id.name(), &applied_patches);
548 }
549
550 // Update lockfile with token counts
551 for (id, token_count) in token_counts {
552 self.update_resource_token_count(&id, token_count);
553 }
554 }
555
556 /// Update the approximate token count for a resource.
557 ///
558 /// # Arguments
559 ///
560 /// * `id` - The resource identifier
561 /// * `token_count` - The approximate BPE token count, or None for skills/directories
562 fn update_resource_token_count(&mut self, id: &ResourceId, token_count: Option<u64>) {
563 let resources = match id.resource_type() {
564 crate::core::ResourceType::Agent => &mut self.agents,
565 crate::core::ResourceType::Snippet => &mut self.snippets,
566 crate::core::ResourceType::Command => &mut self.commands,
567 crate::core::ResourceType::Script => &mut self.scripts,
568 crate::core::ResourceType::Hook => &mut self.hooks,
569 crate::core::ResourceType::McpServer => &mut self.mcp_servers,
570 crate::core::ResourceType::Skill => &mut self.skills,
571 };
572
573 for resource in resources.iter_mut() {
574 if resource.matches_id(id) {
575 resource.approximate_token_count = token_count;
576 return;
577 }
578 }
579 }
580}