ggen_core/codegen/
audit.rs

1//! Audit trail generation for determinism verification
2//!
3//! Creates comprehensive audit records of generation pipeline execution,
4//! enabling verification that the same inputs produce identical outputs.
5
6use ggen_utils::error::{Error, Result};
7use serde::{Deserialize, Serialize};
8use sha2::{Digest, Sha256};
9use std::collections::BTreeMap;
10use std::path::Path;
11use std::time::Duration;
12
13/// Complete audit trail for generation verification
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct AuditTrail {
16    /// Generation timestamp (ISO 8601)
17    pub generated_at: String,
18
19    /// ggen version
20    pub ggen_version: String,
21
22    /// Input hashes for determinism verification
23    pub inputs: AuditInputs,
24
25    /// Pipeline execution log
26    pub pipeline: Vec<AuditStep>,
27
28    /// Generated file manifest
29    pub outputs: Vec<AuditOutput>,
30
31    /// Overall validation status
32    pub validation_passed: bool,
33
34    /// Total duration (ms)
35    pub total_duration_ms: u64,
36}
37
38/// Hashes of all input files
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct AuditInputs {
41    /// SHA256 of ggen.toml
42    pub manifest_hash: String,
43
44    /// SHA256 of each ontology file (BTreeMap for determinism)
45    pub ontology_hashes: BTreeMap<String, String>,
46
47    /// SHA256 of each template file
48    pub template_hashes: BTreeMap<String, String>,
49
50    /// SHA256 of each SPARQL file
51    pub query_hashes: BTreeMap<String, String>,
52}
53
54/// Record of a pipeline execution step
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AuditStep {
57    /// Step type ("load_ontology", "inference", "construct", "render")
58    pub step_type: String,
59
60    /// Rule/file name
61    pub name: String,
62
63    /// Duration (ms)
64    pub duration_ms: u64,
65
66    /// Triples added (for graph operations)
67    #[serde(default)]
68    pub triples_added: Option<usize>,
69
70    /// Status ("success", "skipped", "error")
71    pub status: String,
72
73    /// Error message if failed
74    #[serde(default)]
75    pub error: Option<String>,
76}
77
78/// Record of a generated output file
79#[derive(Debug, Clone, Serialize, Deserialize)]
80pub struct AuditOutput {
81    /// Output file path
82    pub path: String,
83
84    /// SHA256 of generated content
85    pub content_hash: String,
86
87    /// File size in bytes
88    pub size_bytes: usize,
89
90    /// Generation rule that produced this
91    pub source_rule: String,
92}
93
94/// Builder for constructing audit trails
95pub struct AuditTrailBuilder {
96    /// Current ggen version
97    ggen_version: String,
98
99    /// Input file hashes
100    inputs: AuditInputs,
101
102    /// Pipeline execution steps
103    pipeline: Vec<AuditStep>,
104
105    /// Generated outputs
106    outputs: Vec<AuditOutput>,
107
108    /// Start time for duration calculation
109    started_at: std::time::Instant,
110}
111
112impl AuditTrailBuilder {
113    /// Create a new audit trail builder
114    pub fn new() -> Self {
115        Self {
116            ggen_version: env!("CARGO_PKG_VERSION").to_string(),
117            inputs: AuditInputs {
118                manifest_hash: String::new(),
119                ontology_hashes: BTreeMap::new(),
120                template_hashes: BTreeMap::new(),
121                query_hashes: BTreeMap::new(),
122            },
123            pipeline: Vec::new(),
124            outputs: Vec::new(),
125            started_at: std::time::Instant::now(),
126        }
127    }
128
129    /// Record input file hashes
130    ///
131    /// # Arguments
132    /// * `manifest` - Path to ggen.toml
133    /// * `ontologies` - Paths to ontology files
134    /// * `templates` - Paths to template files
135    pub fn record_inputs(
136        &mut self, manifest: &Path, ontologies: &[&Path], templates: &[&Path],
137    ) -> Result<&mut Self> {
138        // Hash manifest
139        self.inputs.manifest_hash = Self::hash_file(manifest)?;
140
141        // Hash ontologies
142        for ont in ontologies {
143            let hash = Self::hash_file(ont)?;
144            self.inputs
145                .ontology_hashes
146                .insert(ont.display().to_string(), hash);
147        }
148
149        // Hash templates
150        for tmpl in templates {
151            let hash = Self::hash_file(tmpl)?;
152            self.inputs
153                .template_hashes
154                .insert(tmpl.display().to_string(), hash);
155        }
156
157        Ok(self)
158    }
159
160    /// Record a pipeline execution step
161    ///
162    /// # Arguments
163    /// * `step_type` - Type of step (e.g., "inference", "render")
164    /// * `name` - Name of the rule/file
165    /// * `duration` - Execution duration
166    /// * `triples` - Optional triple count for graph operations
167    /// * `status` - Execution status
168    pub fn record_step(
169        &mut self, step_type: &str, name: &str, duration: Duration, triples: Option<usize>,
170        status: &str,
171    ) -> &mut Self {
172        self.pipeline.push(AuditStep {
173            step_type: step_type.to_string(),
174            name: name.to_string(),
175            duration_ms: duration.as_millis() as u64,
176            triples_added: triples,
177            status: status.to_string(),
178            error: None,
179        });
180        self
181    }
182
183    /// Record a pipeline step that failed
184    pub fn record_step_error(
185        &mut self, step_type: &str, name: &str, duration: Duration, error: &str,
186    ) -> &mut Self {
187        self.pipeline.push(AuditStep {
188            step_type: step_type.to_string(),
189            name: name.to_string(),
190            duration_ms: duration.as_millis() as u64,
191            triples_added: None,
192            status: "error".to_string(),
193            error: Some(error.to_string()),
194        });
195        self
196    }
197
198    /// Record a generated output file
199    ///
200    /// # Arguments
201    /// * `path` - Output file path
202    /// * `content` - Generated content
203    /// * `source_rule` - Rule that generated this file
204    pub fn record_output(&mut self, path: &Path, content: &str, source_rule: &str) -> &mut Self {
205        let hash = Self::hash_string(content);
206        self.outputs.push(AuditOutput {
207            path: path.display().to_string(),
208            content_hash: hash,
209            size_bytes: content.len(),
210            source_rule: source_rule.to_string(),
211        });
212        self
213    }
214
215    /// Build the final audit trail
216    pub fn build(&self, validation_passed: bool) -> AuditTrail {
217        let total_duration = self.started_at.elapsed();
218
219        AuditTrail {
220            generated_at: chrono::Utc::now().to_rfc3339(),
221            ggen_version: self.ggen_version.clone(),
222            inputs: self.inputs.clone(),
223            pipeline: self.pipeline.clone(),
224            outputs: self.outputs.clone(),
225            validation_passed,
226            total_duration_ms: total_duration.as_millis() as u64,
227        }
228    }
229
230    /// Write audit trail to a file
231    pub fn write_to(trail: &AuditTrail, path: &Path) -> Result<()> {
232        let json = serde_json::to_string_pretty(trail)
233            .map_err(|e| Error::new(&format!("Failed to serialize audit trail: {}", e)))?;
234
235        std::fs::write(path, json)
236            .map_err(|e| Error::new(&format!("Failed to write audit trail: {}", e)))?;
237
238        Ok(())
239    }
240
241    /// Calculate SHA256 hash of a file
242    fn hash_file(path: &Path) -> Result<String> {
243        let content = std::fs::read(path)
244            .map_err(|e| Error::new(&format!("Failed to read '{}': {}", path.display(), e)))?;
245        Ok(Self::hash_bytes(&content))
246    }
247
248    /// Calculate SHA256 hash of a string
249    fn hash_string(content: &str) -> String {
250        Self::hash_bytes(content.as_bytes())
251    }
252
253    /// Calculate SHA256 hash of bytes
254    fn hash_bytes(bytes: &[u8]) -> String {
255        let mut hasher = Sha256::new();
256        hasher.update(bytes);
257        format!("{:x}", hasher.finalize())
258    }
259}
260
261impl Default for AuditTrailBuilder {
262    fn default() -> Self {
263        Self::new()
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270    use std::time::Duration;
271
272    #[test]
273    fn test_audit_builder() {
274        let mut builder = AuditTrailBuilder::new();
275
276        builder.record_step(
277            "inference",
278            "auditable_fields",
279            Duration::from_millis(5),
280            Some(10),
281            "success",
282        );
283        builder.record_step(
284            "render",
285            "structs",
286            Duration::from_millis(15),
287            None,
288            "success",
289        );
290
291        let trail = builder.build(true);
292
293        assert_eq!(trail.pipeline.len(), 2);
294        assert!(trail.validation_passed);
295    }
296
297    #[test]
298    fn test_hash_string() {
299        let hash1 = AuditTrailBuilder::hash_string("hello world");
300        let hash2 = AuditTrailBuilder::hash_string("hello world");
301        let hash3 = AuditTrailBuilder::hash_string("different");
302
303        assert_eq!(hash1, hash2);
304        assert_ne!(hash1, hash3);
305    }
306
307    #[test]
308    fn test_record_output() {
309        let mut builder = AuditTrailBuilder::new();
310        builder.record_output(Path::new("test.rs"), "fn main() {}", "structs");
311
312        let trail = builder.build(true);
313        assert_eq!(trail.outputs.len(), 1);
314        assert_eq!(trail.outputs[0].path, "test.rs");
315        assert_eq!(trail.outputs[0].source_rule, "structs");
316    }
317}