agpm_cli/lockfile/
checksum.rs

1//! Checksum computation and verification for lockfile integrity.
2//!
3//! This module provides SHA-256 checksum operations for verifying file integrity,
4//! detecting corruption, and ensuring reproducible installations.
5
6use anyhow::{Context, Result};
7use std::fs;
8use std::path::Path;
9
10use super::{LockFile, ResourceId};
11
12impl LockFile {
13    /// Compute SHA-256 checksum for file integrity verification.
14    ///
15    /// Detects corruption, tampering, or changes after installation.
16    ///
17    /// # Arguments
18    ///
19    /// * `path` - Path to the file to checksum
20    ///
21    /// # Returns
22    ///
23    /// * `Ok(String)` - Checksum in format "`sha256:hexadecimal_hash`"
24    /// * `Err(anyhow::Error)` - File read error with detailed context
25    ///
26    /// # Checksum Format
27    ///
28    /// The returned checksum follows the format:
29    /// - **Algorithm prefix**: "sha256:"
30    /// - **Hash encoding**: Lowercase hexadecimal
31    /// - **Length**: 71 characters total (7 for prefix + 64 hex digits)
32    ///
33    /// # Examples
34    ///
35    /// ```rust,no_run
36    /// use std::path::Path;
37    /// use agpm_cli::lockfile::LockFile;
38    ///
39    /// # fn example() -> anyhow::Result<()> {
40    /// let checksum = LockFile::compute_checksum(Path::new("example.md"))?;
41    /// println!("File checksum: {}", checksum);
42    /// // Output: "sha256:a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3"
43    /// # Ok(())
44    /// # }
45    /// ```
46    ///
47    /// # Error Handling
48    ///
49    /// Provides detailed error context for common issues:
50    /// - **File not found**: Suggests checking the path
51    /// - **Permission denied**: Suggests checking file permissions
52    /// - **IO errors**: Suggests checking disk health or file locks
53    ///
54    /// # Security Considerations
55    ///
56    /// - Uses SHA-256, a cryptographically secure hash function
57    /// - Suitable for integrity verification and tamper detection
58    /// - Consistent across platforms (Windows, macOS, Linux)
59    /// - Not affected by line ending differences (hashes actual bytes)
60    ///
61    /// # Performance
62    ///
63    /// The method reads the entire file into memory before hashing.
64    /// For very large files (>100MB), consider streaming implementations
65    /// in future versions.
66    pub fn compute_checksum(path: &Path) -> Result<String> {
67        use sha2::{Digest, Sha256};
68
69        let content = fs::read(path).with_context(|| {
70            format!(
71                "Cannot read file for checksum calculation: {}\n\n\
72                    This error occurs when verifying file integrity.\n\
73                    Check that the file exists and is readable.",
74                path.display()
75            )
76        })?;
77
78        let mut hasher = Sha256::new();
79        hasher.update(&content);
80        let result = hasher.finalize();
81
82        Ok(format!("sha256:{}", hex::encode(result)))
83    }
84
85    /// Compute SHA-256 checksum for a directory (skill resources).
86    ///
87    /// Calculates a combined checksum of all files in a directory by concatenating
88    /// their individual checksums in sorted order. This provides a deterministic
89    /// checksum that changes when any file in the directory changes.
90    ///
91    /// # Arguments
92    ///
93    /// * `path` - Path to the directory to checksum
94    ///
95    /// # Returns
96    ///
97    /// * `Ok(String)` - Combined checksum in format "`sha256:hexadecimal_hash`"
98    /// * `Err(anyhow::Error)` - Directory read or file hash error
99    ///
100    /// # Algorithm
101    ///
102    /// 1. Walk directory recursively (files only, not directories)
103    /// 2. Compute SHA-256 of each file
104    /// 3. Sort file paths for deterministic ordering
105    /// 4. Concatenate all checksums with file paths
106    /// 5. Compute final SHA-256 of the concatenated data
107    ///
108    /// # Examples
109    ///
110    /// ```rust,no_run
111    /// use std::path::Path;
112    /// use agpm_cli::lockfile::LockFile;
113    ///
114    /// # fn example() -> anyhow::Result<()> {
115    /// let checksum = LockFile::compute_directory_checksum(Path::new("my-skill"))?;
116    /// println!("Directory checksum: {}", checksum);
117    /// # Ok(())
118    /// # }
119    /// ```
120    pub fn compute_directory_checksum(path: &Path) -> Result<String> {
121        use sha2::{Digest, Sha256};
122        use walkdir::WalkDir;
123
124        let mut file_hashes: Vec<(String, String)> = Vec::new();
125
126        for entry in WalkDir::new(path).follow_links(false) {
127            let entry = entry.with_context(|| {
128                format!("Failed to read directory entry in: {}", path.display())
129            })?;
130
131            if entry.file_type().is_file() {
132                let file_path = entry.path();
133                // Use normalize_path_for_storage for cross-platform deterministic checksums
134                let relative_path = crate::utils::normalize_path_for_storage(
135                    file_path.strip_prefix(path).unwrap_or(file_path),
136                );
137
138                let file_checksum = Self::compute_checksum(file_path)?;
139                file_hashes.push((relative_path, file_checksum));
140            }
141        }
142
143        // Sort by relative path for deterministic ordering
144        file_hashes.sort_by(|a, b| a.0.cmp(&b.0));
145
146        // Concatenate all checksums with their paths
147        let mut hasher = Sha256::new();
148        for (path, checksum) in &file_hashes {
149            hasher.update(format!("{}:{}\n", path, checksum).as_bytes());
150        }
151
152        let result = hasher.finalize();
153        Ok(format!("sha256:{}", hex::encode(result)))
154    }
155
156    /// Verify file matches expected checksum.
157    ///
158    /// Computes current checksum and compares with expected value.
159    ///
160    /// # Arguments
161    ///
162    /// * `path` - Path to the file to verify
163    /// * `expected` - Expected checksum in "sha256:hex" format
164    ///
165    /// # Returns
166    ///
167    /// * `Ok(true)` - File checksum matches expected value
168    /// * `Ok(false)` - File checksum does not match (corruption detected)
169    /// * `Err(anyhow::Error)` - File read error or checksum calculation failed
170    ///
171    /// # Examples
172    ///
173    /// ```rust,no_run
174    /// use std::path::Path;
175    /// use agpm_cli::lockfile::LockFile;
176    ///
177    /// # fn example() -> anyhow::Result<()> {
178    /// let expected = "sha256:a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3";
179    /// let is_valid = LockFile::verify_checksum(Path::new("example.md"), expected)?;
180    ///
181    /// if is_valid {
182    ///     println!("File integrity verified");
183    /// } else {
184    ///     println!("WARNING: File has been modified or corrupted!");
185    /// }
186    /// # Ok(())
187    /// # }
188    /// ```
189    ///
190    /// # Use Cases
191    ///
192    /// - **Installation verification**: Ensure copied files are intact
193    /// - **Periodic validation**: Detect file corruption over time
194    /// - **Security checks**: Detect unauthorized modifications
195    /// - **Troubleshooting**: Diagnose installation issues
196    ///
197    /// # Performance
198    ///
199    /// This method internally calls [`compute_checksum`](Self::compute_checksum),
200    /// so it has the same performance characteristics. For bulk verification
201    /// operations, consider caching computed checksums.
202    ///
203    /// # Security
204    ///
205    /// The comparison is performed using standard string equality, which is
206    /// not timing-attack resistant. Since checksums are not secrets, this
207    /// is acceptable for integrity verification purposes.
208    pub fn verify_checksum(path: &Path, expected: &str) -> Result<bool> {
209        let actual = Self::compute_checksum(path)?;
210        Ok(actual == expected)
211    }
212
213    /// Update checksum for resource identified by ResourceId.
214    ///
215    /// Used after installation to record actual file checksum. ResourceId ensures unique
216    /// identification via name, source, tool, and template_vars.
217    ///
218    /// # Arguments
219    ///
220    /// * `id` - The unique identifier for the resource
221    /// * `checksum` - The new SHA-256 checksum in "sha256:hex" format
222    ///
223    /// # Returns
224    ///
225    /// Returns `true` if the resource was found and updated, `false` otherwise.
226    ///
227    /// # Examples
228    ///
229    /// ```rust,no_run
230    /// # use agpm_cli::lockfile::{LockFile, LockedResourceBuilder, ResourceId};
231    /// # use agpm_cli::core::ResourceType;
232    /// # use agpm_cli::utils::compute_variant_inputs_hash;
233    /// # let mut lockfile = LockFile::default();
234    /// # // First add a resource to update
235    /// # let resource = LockedResourceBuilder::new(
236    /// #     "my-agent".to_string(),
237    /// #     "my-agent.md".to_string(),
238    /// #     "".to_string(),
239    /// #     "agents/my-agent.md".to_string(),
240    /// #     ResourceType::Agent,
241    /// # )
242    /// # .tool(Some("claude-code".to_string()))
243    /// # .build();
244    /// # lockfile.add_typed_resource("my-agent".to_string(), resource, ResourceType::Agent);
245    /// let variant_hash = compute_variant_inputs_hash(&serde_json::json!({})).unwrap_or_default();
246    /// let id = ResourceId::new("my-agent", None::<String>, Some("claude-code"), ResourceType::Agent, variant_hash);
247    /// let updated = lockfile.update_resource_checksum(&id, "sha256:abcdef123456...");
248    /// assert!(updated);
249    /// ```
250    pub fn update_resource_checksum(&mut self, id: &ResourceId, checksum: &str) -> bool {
251        // Try each resource type until we find a match by comparing ResourceIds
252        for resource in &mut self.agents {
253            if resource.id() == *id {
254                resource.checksum = checksum.to_string();
255                return true;
256            }
257        }
258
259        for resource in &mut self.snippets {
260            if resource.id() == *id {
261                resource.checksum = checksum.to_string();
262                return true;
263            }
264        }
265
266        for resource in &mut self.commands {
267            if resource.id() == *id {
268                resource.checksum = checksum.to_string();
269                return true;
270            }
271        }
272
273        for resource in &mut self.scripts {
274            if resource.id() == *id {
275                resource.checksum = checksum.to_string();
276                return true;
277            }
278        }
279
280        for resource in &mut self.hooks {
281            if resource.id() == *id {
282                resource.checksum = checksum.to_string();
283                return true;
284            }
285        }
286
287        for resource in &mut self.mcp_servers {
288            if resource.id() == *id {
289                resource.checksum = checksum.to_string();
290                return true;
291            }
292        }
293
294        for resource in &mut self.skills {
295            if resource.id() == *id {
296                resource.checksum = checksum.to_string();
297                return true;
298            }
299        }
300
301        false
302    }
303
304    /// Update context checksum for resource by ResourceId.
305    ///
306    /// Stores the SHA-256 checksum of template rendering inputs (context) in the lockfile.
307    /// This is different from the file checksum which covers the final rendered content.
308    ///
309    /// # Arguments
310    ///
311    /// * `id` - The ResourceId identifying the resource to update
312    /// * `context_checksum` - The SHA-256 checksum of template context, or None for non-templated resources
313    ///
314    /// # Returns
315    ///
316    /// Returns `true` if the resource was found and updated, `false` otherwise.
317    ///
318    /// # Examples
319    ///
320    /// ```rust,ignore
321    /// let mut lockfile = LockFile::new();
322    /// let id = ResourceId::new("my-agent", None::<String>, Some("claude-code"), ResourceType::Agent, serde_json::json!({}));
323    /// let updated = lockfile.update_resource_context_checksum(&id, Some("sha256:context123456..."));
324    /// assert!(updated);
325    /// ```
326    pub fn update_resource_context_checksum(
327        &mut self,
328        id: &ResourceId,
329        context_checksum: &str,
330    ) -> bool {
331        // Try each resource type until we find a match by comparing ResourceIds
332        for resource in &mut self.agents {
333            if resource.id() == *id {
334                resource.context_checksum = Some(context_checksum.to_string());
335                return true;
336            }
337        }
338
339        for resource in &mut self.snippets {
340            if resource.id() == *id {
341                resource.context_checksum = Some(context_checksum.to_string());
342                return true;
343            }
344        }
345
346        for resource in &mut self.commands {
347            if resource.id() == *id {
348                resource.context_checksum = Some(context_checksum.to_string());
349                return true;
350            }
351        }
352
353        for resource in &mut self.scripts {
354            if resource.id() == *id {
355                resource.context_checksum = Some(context_checksum.to_string());
356                return true;
357            }
358        }
359
360        for resource in &mut self.hooks {
361            if resource.id() == *id {
362                resource.context_checksum = Some(context_checksum.to_string());
363                return true;
364            }
365        }
366
367        for resource in &mut self.mcp_servers {
368            if resource.id() == *id {
369                resource.context_checksum = Some(context_checksum.to_string());
370                return true;
371            }
372        }
373
374        for resource in &mut self.skills {
375            if resource.id() == *id {
376                resource.context_checksum = Some(context_checksum.to_string());
377                return true;
378            }
379        }
380
381        false
382    }
383
384    /// Update applied patches for resource by name.
385    ///
386    /// Stores project patches in main lockfile; private patches go to agpm.private.lock.
387    /// Takes `AppliedPatches` from installer.
388    ///
389    /// # Arguments
390    ///
391    /// * `name` - The name of the resource to update
392    /// * `applied_patches` - The patches that were applied (from `AppliedPatches` struct)
393    ///
394    /// # Returns
395    ///
396    /// Returns `true` if the resource was found and updated, `false` otherwise.
397    ///
398    /// # Examples
399    ///
400    /// ```no_run
401    /// # use agpm_cli::lockfile::LockFile;
402    /// # use agpm_cli::manifest::patches::AppliedPatches;
403    /// # use std::collections::HashMap;
404    /// # let mut lockfile = LockFile::new();
405    /// let mut applied = AppliedPatches::new();
406    /// applied.project.insert("model".to_string(), toml::Value::String("haiku".into()));
407    ///
408    /// let updated = lockfile.update_resource_applied_patches("my-agent", &applied);
409    /// assert!(updated);
410    /// ```
411    pub fn update_resource_applied_patches(
412        &mut self,
413        name: &str,
414        applied_patches: &crate::manifest::patches::AppliedPatches,
415    ) -> bool {
416        // Store ONLY project patches in the main lockfile (agpm.lock)
417        // Private patches are stored separately in agpm.private.lock
418        // This ensures the main lockfile is deterministic and safe to commit
419        let project_patches = applied_patches.project.clone();
420
421        // Try each resource type until we find a match
422        for resource in &mut self.agents {
423            if resource.name == name {
424                resource.applied_patches = project_patches;
425                return true;
426            }
427        }
428
429        for resource in &mut self.snippets {
430            if resource.name == name {
431                resource.applied_patches = project_patches;
432                return true;
433            }
434        }
435
436        for resource in &mut self.commands {
437            if resource.name == name {
438                resource.applied_patches = project_patches;
439                return true;
440            }
441        }
442
443        for resource in &mut self.scripts {
444            if resource.name == name {
445                resource.applied_patches = project_patches;
446                return true;
447            }
448        }
449
450        for resource in &mut self.hooks {
451            if resource.name == name {
452                resource.applied_patches = project_patches;
453                return true;
454            }
455        }
456
457        for resource in &mut self.mcp_servers {
458            if resource.name == name {
459                resource.applied_patches = project_patches;
460                return true;
461            }
462        }
463
464        for resource in &mut self.skills {
465            if resource.name == name {
466                resource.applied_patches = project_patches;
467                return true;
468            }
469        }
470
471        false
472    }
473
474    /// Apply installation results to the lockfile in batch.
475    ///
476    /// Updates the lockfile with checksums, context checksums, and applied patches
477    /// from the installation process. This consolidates three separate update operations
478    /// into one batch call, reducing code duplication between install and update commands.
479    ///
480    /// # Batch Processing Pattern
481    ///
482    /// This function processes three parallel vectors of installation results:
483    /// 1. **File checksums** - SHA-256 of rendered content (triggers reinstall if changed)
484    /// 2. **Context checksums** - SHA-256 of template inputs (audit/debug only)
485    /// 3. **Applied patches** - Tracks which project patches were applied to each resource
486    ///
487    /// The batch approach ensures all three updates are applied consistently and
488    /// atomically to the lockfile, avoiding partial state.
489    ///
490    /// # Arguments
491    ///
492    /// * `checksums` - File checksums for each installed resource (by ResourceId)
493    /// * `context_checksums` - Context checksums for template inputs (Optional)
494    /// * `applied_patches_list` - Patches that were applied to each resource
495    ///
496    /// # Implementation Details
497    ///
498    /// - Updates are applied by ResourceId to handle duplicate resource names correctly
499    /// - Context checksums are only applied if present (non-templated resources have None)
500    /// - Only project patches are stored; private patches go to `agpm.private.lock`
501    /// - Called by both `install` and `update` commands after parallel installation
502    ///
503    /// # Examples
504    ///
505    /// ```rust,no_run
506    /// # use agpm_cli::lockfile::{LockFile, ResourceId};
507    /// # use agpm_cli::manifest::patches::AppliedPatches;
508    /// # use agpm_cli::core::ResourceType;
509    /// let mut lockfile = LockFile::default();
510    ///
511    /// // Collect results from parallel installation
512    /// let checksums = vec![/* (ResourceId, checksum) pairs */];
513    /// let context_checksums = vec![/* (ResourceId, Option<checksum>) pairs */];
514    /// let applied_patches = vec![/* (ResourceId, AppliedPatches) pairs */];
515    /// let token_counts = vec![/* (ResourceId, Option<u64>) pairs */];
516    ///
517    /// // Apply all results in batch (replaces 3 separate loops)
518    /// lockfile.apply_installation_results(
519    ///     checksums,
520    ///     context_checksums,
521    ///     applied_patches,
522    ///     token_counts,
523    /// );
524    /// ```
525    ///
526    pub fn apply_installation_results(
527        &mut self,
528        checksums: Vec<(ResourceId, String)>,
529        context_checksums: Vec<(ResourceId, Option<String>)>,
530        applied_patches_list: Vec<(ResourceId, crate::manifest::patches::AppliedPatches)>,
531        token_counts: Vec<(ResourceId, Option<u64>)>,
532    ) {
533        // Update lockfile with checksums
534        for (id, checksum) in checksums {
535            self.update_resource_checksum(&id, &checksum);
536        }
537
538        // Update lockfile with context checksums
539        for (id, context_checksum) in context_checksums {
540            if let Some(checksum) = context_checksum {
541                self.update_resource_context_checksum(&id, &checksum);
542            }
543        }
544
545        // Update lockfile with applied patches
546        for (id, applied_patches) in applied_patches_list {
547            self.update_resource_applied_patches(id.name(), &applied_patches);
548        }
549
550        // Update lockfile with token counts
551        for (id, token_count) in token_counts {
552            self.update_resource_token_count(&id, token_count);
553        }
554    }
555
556    /// Update the approximate token count for a resource.
557    ///
558    /// # Arguments
559    ///
560    /// * `id` - The resource identifier
561    /// * `token_count` - The approximate BPE token count, or None for skills/directories
562    fn update_resource_token_count(&mut self, id: &ResourceId, token_count: Option<u64>) {
563        let resources = match id.resource_type() {
564            crate::core::ResourceType::Agent => &mut self.agents,
565            crate::core::ResourceType::Snippet => &mut self.snippets,
566            crate::core::ResourceType::Command => &mut self.commands,
567            crate::core::ResourceType::Script => &mut self.scripts,
568            crate::core::ResourceType::Hook => &mut self.hooks,
569            crate::core::ResourceType::McpServer => &mut self.mcp_servers,
570            crate::core::ResourceType::Skill => &mut self.skills,
571        };
572
573        for resource in resources.iter_mut() {
574            if resource.matches_id(id) {
575                resource.approximate_token_count = token_count;
576                return;
577            }
578        }
579    }
580}