amalgam_parser/
incremental.rs

1//! Incremental update support for all parser types
2//!
3//! This module implements fingerprinting for different source types
4//! to enable intelligent change detection and incremental updates.
5
6use amalgam_core::fingerprint::{
7    ContentFingerprint, FingerprintBuilder, Fingerprintable, SourceInfo,
8};
9use std::path::Path;
10
11/// URL-based source fingerprinting (GitHub, GitLab, etc.)
12pub struct UrlSource {
13    pub base_url: String,
14    pub urls: Vec<String>,
15    pub contents: Vec<String>,
16}
17
18impl Fingerprintable for UrlSource {
19    fn create_fingerprint(&self) -> Result<ContentFingerprint, Box<dyn std::error::Error>> {
20        let mut builder = FingerprintBuilder::new();
21
22        // Add all content that affects generation
23        for content in &self.contents {
24            builder.add_content_str(content);
25        }
26
27        // Add metadata that could change
28        builder.add_metadata("base_url", &self.base_url);
29        builder.add_metadata("url_count", &self.urls.len().to_string());
30
31        // TODO: Add ETags and Last-Modified headers when available
32        let source_info = SourceInfo::UrlCollection {
33            base_url: self.base_url.clone(),
34            urls: self.urls.clone(),
35            etags: vec![None; self.urls.len()], // Will be populated from HTTP headers
36            last_modified: vec![None; self.urls.len()],
37        };
38
39        builder.with_source_info(source_info);
40
41        Ok(builder.build())
42    }
43}
44
45/// Kubernetes cluster source fingerprinting
46pub struct K8sClusterSource {
47    pub server_version: String,
48    pub api_version: String,
49    pub crd_specs: Vec<String>,
50}
51
52impl Fingerprintable for K8sClusterSource {
53    fn create_fingerprint(&self) -> Result<ContentFingerprint, Box<dyn std::error::Error>> {
54        let mut builder = FingerprintBuilder::new();
55
56        // Hash all CRD specifications
57        for spec in &self.crd_specs {
58            builder.add_content_str(spec);
59        }
60
61        // Add server metadata
62        builder.add_metadata("server_version", &self.server_version);
63        builder.add_metadata("api_version", &self.api_version);
64        builder.add_metadata("crd_count", &self.crd_specs.len().to_string());
65
66        // Create API resources hash
67        let mut api_hasher = sha2::Sha256::new();
68        use sha2::Digest;
69        for spec in &self.crd_specs {
70            api_hasher.update(spec.as_bytes());
71        }
72        let api_resources_hash = format!("{:x}", api_hasher.finalize());
73
74        let source_info = SourceInfo::K8sCluster {
75            version: self.api_version.clone(),
76            server_version: self.server_version.clone(),
77            api_resources_hash,
78        };
79
80        builder.with_source_info(source_info);
81        Ok(builder.build())
82    }
83}
84
85/// Kubernetes core types (OpenAPI) fingerprinting
86pub struct K8sCoreSource {
87    pub version: String,
88    pub openapi_spec: String,
89    pub spec_url: String,
90}
91
92impl Fingerprintable for K8sCoreSource {
93    fn create_fingerprint(&self) -> Result<ContentFingerprint, Box<dyn std::error::Error>> {
94        let mut builder = FingerprintBuilder::new();
95
96        // The OpenAPI spec content is what matters for generation
97        builder.add_content_str(&self.openapi_spec);
98
99        // Version and URL are metadata
100        builder.add_metadata("k8s_version", &self.version);
101        builder.add_metadata("spec_url", &self.spec_url);
102
103        // Hash just the OpenAPI spec for the fingerprint
104        let mut hasher = sha2::Sha256::new();
105        use sha2::Digest;
106        hasher.update(self.openapi_spec.as_bytes());
107        let openapi_hash = format!("{:x}", hasher.finalize());
108
109        let source_info = SourceInfo::K8sCore {
110            version: self.version.clone(),
111            openapi_hash,
112            spec_url: self.spec_url.clone(),
113        };
114
115        builder.with_source_info(source_info);
116        Ok(builder.build())
117    }
118}
119
120/// Local files fingerprinting
121pub struct LocalFilesSource {
122    pub paths: Vec<String>,
123    pub contents: Vec<String>,
124}
125
126impl Fingerprintable for LocalFilesSource {
127    fn create_fingerprint(&self) -> Result<ContentFingerprint, Box<dyn std::error::Error>> {
128        let mut builder = FingerprintBuilder::new();
129
130        // Add all file contents
131        for content in &self.contents {
132            builder.add_content_str(content);
133        }
134
135        // Add metadata
136        builder.add_metadata("file_count", &self.paths.len().to_string());
137        for path in &self.paths {
138            builder.add_metadata("file_path", path);
139        }
140
141        // Get file metadata
142        let mut mtimes = Vec::new();
143        let mut file_sizes = Vec::new();
144
145        for path in &self.paths {
146            if let Ok(metadata) = std::fs::metadata(path) {
147                mtimes.push(
148                    metadata
149                        .modified()
150                        .unwrap_or(std::time::SystemTime::UNIX_EPOCH),
151                );
152                file_sizes.push(metadata.len());
153            } else {
154                mtimes.push(std::time::SystemTime::UNIX_EPOCH);
155                file_sizes.push(0);
156            }
157        }
158
159        let source_info = SourceInfo::LocalFiles {
160            paths: self.paths.clone(),
161            mtimes,
162            file_sizes,
163        };
164
165        builder.with_source_info(source_info);
166        Ok(builder.build())
167    }
168}
169
170/// Git repository fingerprinting
171pub struct GitRepoSource {
172    pub url: String,
173    pub commit: String,
174    pub paths: Vec<String>,
175    pub contents: Vec<String>,
176}
177
178impl Fingerprintable for GitRepoSource {
179    fn create_fingerprint(&self) -> Result<ContentFingerprint, Box<dyn std::error::Error>> {
180        let mut builder = FingerprintBuilder::new();
181
182        // Add all file contents from the repo
183        for content in &self.contents {
184            builder.add_content_str(content);
185        }
186
187        // Add Git metadata
188        builder.add_metadata("git_url", &self.url);
189        builder.add_metadata("git_commit", &self.commit);
190        builder.add_metadata("path_count", &self.paths.len().to_string());
191
192        let source_info = SourceInfo::GitRepo {
193            url: self.url.clone(),
194            commit: self.commit.clone(),
195            paths: self.paths.clone(),
196            http_metadata: None, // Could add ETags from GitHub API
197        };
198
199        builder.with_source_info(source_info);
200        Ok(builder.build())
201    }
202}
203
204/// High-level function to check if a package needs regeneration
205pub fn needs_regeneration(
206    output_dir: &Path,
207    source: &dyn Fingerprintable,
208) -> Result<bool, Box<dyn std::error::Error>> {
209    let fingerprint_path = ContentFingerprint::fingerprint_path(output_dir);
210
211    // If no previous fingerprint exists, we need to generate
212    if !fingerprint_path.exists() {
213        return Ok(true);
214    }
215
216    let last_fingerprint = ContentFingerprint::load_from_file(&fingerprint_path)?;
217    source.has_changed(&last_fingerprint)
218}
219
220/// Save fingerprint after successful generation
221pub fn save_fingerprint(
222    output_dir: &Path,
223    source: &dyn Fingerprintable,
224) -> Result<(), Box<dyn std::error::Error>> {
225    let fingerprint = source.create_fingerprint()?;
226    let fingerprint_path = ContentFingerprint::fingerprint_path(output_dir);
227
228    // Ensure directory exists
229    if let Some(parent) = fingerprint_path.parent() {
230        std::fs::create_dir_all(parent)?;
231    }
232
233    fingerprint.save_to_file(&fingerprint_path)?;
234    Ok(())
235}
236
237/// Check what type of change occurred (for different update strategies)
238#[derive(Debug, Clone)]
239pub enum ChangeType {
240    /// No changes detected
241    NoChange,
242    /// Only metadata changed (version, timestamps) - might update with same content
243    MetadataOnly,
244    /// Content changed - full regeneration required
245    ContentChanged,
246    /// No previous fingerprint - first generation
247    FirstGeneration,
248}
249
250pub fn detect_change_type(
251    output_dir: &Path,
252    source: &dyn Fingerprintable,
253) -> Result<ChangeType, Box<dyn std::error::Error>> {
254    let fingerprint_path = ContentFingerprint::fingerprint_path(output_dir);
255
256    if !fingerprint_path.exists() {
257        return Ok(ChangeType::FirstGeneration);
258    }
259
260    let last_fingerprint = ContentFingerprint::load_from_file(&fingerprint_path)?;
261    let current_fingerprint = source.create_fingerprint()?;
262
263    if current_fingerprint.content_matches(&last_fingerprint) {
264        Ok(ChangeType::NoChange)
265    } else if current_fingerprint.metadata_changed(&last_fingerprint) {
266        Ok(ChangeType::MetadataOnly)
267    } else {
268        Ok(ChangeType::ContentChanged)
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275    use tempfile::TempDir;
276
277    #[test]
278    fn test_url_source_fingerprinting() {
279        let source = UrlSource {
280            base_url: "https://example.com".to_string(),
281            urls: vec!["file1.yaml".to_string(), "file2.yaml".to_string()],
282            contents: vec!["content1".to_string(), "content2".to_string()],
283        };
284
285        let fingerprint = source.create_fingerprint().unwrap();
286        assert!(!fingerprint.content_hash.is_empty());
287        assert!(!fingerprint.combined_hash.is_empty());
288
289        // Same content should produce same fingerprint
290        let source2 = UrlSource {
291            base_url: "https://example.com".to_string(),
292            urls: vec!["file1.yaml".to_string(), "file2.yaml".to_string()],
293            contents: vec!["content1".to_string(), "content2".to_string()],
294        };
295        let fingerprint2 = source2.create_fingerprint().unwrap();
296        assert!(fingerprint.content_matches(&fingerprint2));
297    }
298
299    #[test]
300    fn test_needs_regeneration() {
301        let temp_dir = TempDir::new().unwrap();
302        let output_dir = temp_dir.path();
303
304        let source = UrlSource {
305            base_url: "https://example.com".to_string(),
306            urls: vec!["file1.yaml".to_string()],
307            contents: vec!["content1".to_string()],
308        };
309
310        // First time should need regeneration
311        assert!(needs_regeneration(output_dir, &source).unwrap());
312
313        // Save fingerprint
314        save_fingerprint(output_dir, &source).unwrap();
315
316        // Second time should not need regeneration
317        assert!(!needs_regeneration(output_dir, &source).unwrap());
318
319        // Changed content should need regeneration
320        let changed_source = UrlSource {
321            base_url: "https://example.com".to_string(),
322            urls: vec!["file1.yaml".to_string()],
323            contents: vec!["different_content".to_string()],
324        };
325        assert!(needs_regeneration(output_dir, &changed_source).unwrap());
326    }
327
328    #[test]
329    fn test_change_type_detection() {
330        let temp_dir = TempDir::new().unwrap();
331        let output_dir = temp_dir.path();
332
333        let source = UrlSource {
334            base_url: "https://example.com".to_string(),
335            urls: vec!["file1.yaml".to_string()],
336            contents: vec!["content1".to_string()],
337        };
338
339        // First generation
340        match detect_change_type(output_dir, &source).unwrap() {
341            ChangeType::FirstGeneration => {}
342            other => panic!("Expected FirstGeneration, got {:?}", other),
343        }
344
345        // Save fingerprint
346        save_fingerprint(output_dir, &source).unwrap();
347
348        // No change
349        match detect_change_type(output_dir, &source).unwrap() {
350            ChangeType::NoChange => {}
351            other => panic!("Expected NoChange, got {:?}", other),
352        }
353
354        // Content change
355        let changed_source = UrlSource {
356            base_url: "https://example.com".to_string(),
357            urls: vec!["file1.yaml".to_string()],
358            contents: vec!["different_content".to_string()],
359        };
360        match detect_change_type(output_dir, &changed_source).unwrap() {
361            ChangeType::ContentChanged => {}
362            other => panic!("Expected ContentChanged, got {:?}", other),
363        }
364    }
365}