amalgam_core/
fingerprint.rs

1//! Content fingerprinting for intelligent change detection
2//!
3//! This module provides universal change detection across all source types
4//! by creating content-based fingerprints that capture everything affecting
5//! code generation.
6
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use sha2::{Digest, Sha256};
10use std::collections::BTreeMap;
11use std::time::SystemTime;
12
13/// Universal content fingerprint for change detection
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct ContentFingerprint {
16    /// Hash of all content that affects code generation
17    pub content_hash: String,
18    /// Source-specific metadata hash (URLs, versions, etc.)  
19    pub metadata_hash: String,
20    /// Combined hash for quick comparison
21    pub combined_hash: String,
22    /// When this fingerprint was created
23    pub created_at: DateTime<Utc>,
24    /// Source type and location information
25    pub source_info: SourceInfo,
26    /// Version of amalgam that created this fingerprint
27    pub amalgam_version: String,
28}
29
30/// Source-specific information for different ingest types
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub enum SourceInfo {
33    /// Git repository source
34    GitRepo {
35        url: String,
36        commit: String,
37        paths: Vec<String>,
38        /// ETags or last-modified headers if available
39        http_metadata: Option<BTreeMap<String, String>>,
40    },
41    /// Kubernetes cluster API
42    K8sCluster {
43        version: String,
44        server_version: String,
45        /// Hash of all CRD resource versions
46        api_resources_hash: String,
47    },
48    /// Collection of URLs (like GitHub file listings)
49    UrlCollection {
50        base_url: String,
51        urls: Vec<String>,
52        etags: Vec<Option<String>>,
53        last_modified: Vec<Option<DateTime<Utc>>>,
54    },
55    /// Local files
56    LocalFiles {
57        paths: Vec<String>,
58        mtimes: Vec<SystemTime>,
59        file_sizes: Vec<u64>,
60    },
61    /// Kubernetes core types from OpenAPI
62    K8sCore {
63        version: String,
64        openapi_hash: String,
65        spec_url: String,
66    },
67}
68
69/// Builder for creating content fingerprints
70pub struct FingerprintBuilder {
71    content_parts: Vec<Vec<u8>>,
72    metadata_parts: Vec<String>,
73    source_info: Option<SourceInfo>,
74}
75
76impl FingerprintBuilder {
77    /// Create a new fingerprint builder
78    pub fn new() -> Self {
79        Self {
80            content_parts: Vec::new(),
81            metadata_parts: Vec::new(),
82            source_info: None,
83        }
84    }
85
86    /// Add content that affects code generation (CRD YAML, OpenAPI spec, etc.)
87    pub fn add_content(&mut self, content: &[u8]) -> &mut Self {
88        self.content_parts.push(content.to_vec());
89        self
90    }
91
92    /// Add content from string
93    pub fn add_content_str(&mut self, content: &str) -> &mut Self {
94        self.add_content(content.as_bytes())
95    }
96
97    /// Add metadata that could affect generation (versions, URLs, etc.)
98    pub fn add_metadata(&mut self, key: &str, value: &str) -> &mut Self {
99        self.metadata_parts.push(format!("{}={}", key, value));
100        self
101    }
102
103    /// Set source information
104    pub fn with_source_info(&mut self, source_info: SourceInfo) -> &mut Self {
105        self.source_info = Some(source_info);
106        self
107    }
108
109    /// Build the final fingerprint
110    pub fn build(&self) -> ContentFingerprint {
111        let content_hash = self.hash_content();
112        let metadata_hash = self.hash_metadata();
113        let combined_hash = self.hash_combined(&content_hash, &metadata_hash);
114
115        ContentFingerprint {
116            content_hash,
117            metadata_hash,
118            combined_hash,
119            created_at: Utc::now(),
120            source_info: self
121                .source_info
122                .clone()
123                .unwrap_or_else(|| SourceInfo::LocalFiles {
124                    paths: vec!["unknown".to_string()],
125                    mtimes: vec![SystemTime::now()],
126                    file_sizes: vec![0],
127                }),
128            amalgam_version: env!("CARGO_PKG_VERSION").to_string(),
129        }
130    }
131
132    fn hash_content(&self) -> String {
133        let mut hasher = Sha256::new();
134
135        // Sort content for deterministic hashing
136        let mut sorted_content = self.content_parts.clone();
137        sorted_content.sort();
138
139        for content in &sorted_content {
140            hasher.update(content);
141        }
142
143        format!("{:x}", hasher.finalize())
144    }
145
146    fn hash_metadata(&self) -> String {
147        let mut hasher = Sha256::new();
148
149        // Sort metadata for deterministic hashing
150        let mut sorted_metadata = self.metadata_parts.clone();
151        sorted_metadata.sort();
152
153        for metadata in &sorted_metadata {
154            hasher.update(metadata.as_bytes());
155        }
156
157        format!("{:x}", hasher.finalize())
158    }
159
160    fn hash_combined(&self, content_hash: &str, metadata_hash: &str) -> String {
161        let mut hasher = Sha256::new();
162        hasher.update(content_hash.as_bytes());
163        hasher.update(metadata_hash.as_bytes());
164        format!("{:x}", hasher.finalize())
165    }
166}
167
168impl Default for FingerprintBuilder {
169    fn default() -> Self {
170        Self::new()
171    }
172}
173
174impl ContentFingerprint {
175    /// Check if this fingerprint represents the same content as another
176    pub fn content_matches(&self, other: &ContentFingerprint) -> bool {
177        self.combined_hash == other.combined_hash
178    }
179
180    /// Check if only metadata changed (requiring regeneration with new timestamps)
181    pub fn metadata_changed(&self, other: &ContentFingerprint) -> bool {
182        self.content_hash == other.content_hash && self.metadata_hash != other.metadata_hash
183    }
184
185    /// Check if content changed (requiring full regeneration)
186    pub fn content_changed(&self, other: &ContentFingerprint) -> bool {
187        self.content_hash != other.content_hash
188    }
189
190    /// Get a short hash for display purposes
191    pub fn short_hash(&self) -> String {
192        self.combined_hash.chars().take(12).collect()
193    }
194
195    /// Save fingerprint to a file
196    pub fn save_to_file(&self, path: &std::path::Path) -> Result<(), Box<dyn std::error::Error>> {
197        let content = serde_json::to_string_pretty(self)?;
198        std::fs::write(path, content)?;
199        Ok(())
200    }
201
202    /// Load fingerprint from a file
203    pub fn load_from_file(
204        path: &std::path::Path,
205    ) -> Result<ContentFingerprint, Box<dyn std::error::Error>> {
206        if !path.exists() {
207            return Err("Fingerprint file does not exist".into());
208        }
209        let content = std::fs::read_to_string(path)?;
210        let fingerprint = serde_json::from_str(&content)?;
211        Ok(fingerprint)
212    }
213
214    /// Create a fingerprint file path for a package
215    pub fn fingerprint_path(output_dir: &std::path::Path) -> std::path::PathBuf {
216        output_dir.join(".amalgam-fingerprint.json")
217    }
218}
219
220/// Trait for source types to implement fingerprinting
221pub trait Fingerprintable {
222    /// Create a content fingerprint for this source
223    fn create_fingerprint(&self) -> Result<ContentFingerprint, Box<dyn std::error::Error>>;
224
225    /// Check if content has changed since the last fingerprint
226    fn has_changed(
227        &self,
228        last_fingerprint: &ContentFingerprint,
229    ) -> Result<bool, Box<dyn std::error::Error>> {
230        let current = self.create_fingerprint()?;
231        Ok(current.content_changed(last_fingerprint) || current.metadata_changed(last_fingerprint))
232    }
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238
239    #[test]
240    fn test_fingerprint_builder() {
241        let mut builder = FingerprintBuilder::new();
242        builder
243            .add_content_str("test content")
244            .add_metadata("version", "1.0.0")
245            .add_metadata("source", "test");
246
247        let fingerprint = builder.build();
248
249        assert!(!fingerprint.content_hash.is_empty());
250        assert!(!fingerprint.metadata_hash.is_empty());
251        assert!(!fingerprint.combined_hash.is_empty());
252        assert_eq!(fingerprint.short_hash().len(), 12);
253    }
254
255    #[test]
256    fn test_fingerprint_comparison() {
257        let mut builder1 = FingerprintBuilder::new();
258        builder1.add_content_str("same content");
259        let fp1 = builder1.build();
260
261        let mut builder2 = FingerprintBuilder::new();
262        builder2.add_content_str("same content");
263        let fp2 = builder2.build();
264
265        assert!(fp1.content_matches(&fp2));
266    }
267
268    #[test]
269    fn test_content_vs_metadata_changes() {
270        let mut builder1 = FingerprintBuilder::new();
271        builder1
272            .add_content_str("content")
273            .add_metadata("version", "1.0.0");
274        let fp1 = builder1.build();
275
276        // Same content, different metadata
277        let mut builder2 = FingerprintBuilder::new();
278        builder2
279            .add_content_str("content")
280            .add_metadata("version", "1.0.1");
281        let fp2 = builder2.build();
282
283        assert!(fp1.metadata_changed(&fp2));
284        assert!(!fp1.content_changed(&fp2));
285
286        // Different content
287        let mut builder3 = FingerprintBuilder::new();
288        builder3
289            .add_content_str("different content")
290            .add_metadata("version", "1.0.0");
291        let fp3 = builder3.build();
292
293        assert!(fp1.content_changed(&fp3));
294        assert!(!fp1.metadata_changed(&fp3));
295    }
296}