Skip to main content

assay_sim/attacks/
differential.rs

1use crate::mutators::bitflip::BitFlip;
2use crate::mutators::inject::InjectFile;
3use crate::mutators::truncate::Truncate;
4use crate::mutators::Mutator;
5use crate::report::{AttackResult, AttackStatus};
6use crate::subprocess::{subprocess_verify, SubprocessResult};
7use anyhow::{Context, Result};
8use assay_evidence::crypto::id::{compute_content_hash, compute_run_root};
9use assay_evidence::types::EvidenceEvent;
10use assay_evidence::BundleWriter;
11use chrono::{TimeZone, Utc};
12use sha2::{Digest, Sha256};
13use std::io::{Cursor, Read};
14use std::time::{Duration, Instant};
15
16/// Result from the reference (non-streaming) verifier.
17#[derive(Debug)]
18pub struct ReferenceResult {
19    pub valid: bool,
20    pub event_count: usize,
21    pub run_root: String,
22    pub error: Option<String>,
23}
24
25/// Independent reference verifier that does NOT use the production verify_bundle path.
26///
27/// Reads entire bundle into memory, decompresses gzip → tar, extracts
28/// manifest.json + events.ndjson, parses with standard serde_json (no streaming),
29/// and recomputes all hashes independently.
30pub fn reference_verify(bundle_data: &[u8]) -> ReferenceResult {
31    match reference_verify_inner(bundle_data) {
32        Ok(r) => r,
33        Err(e) => ReferenceResult {
34            valid: false,
35            event_count: 0,
36            run_root: String::new(),
37            error: Some(e.to_string()),
38        },
39    }
40}
41
42fn reference_verify_inner(bundle_data: &[u8]) -> Result<ReferenceResult> {
43    // 1. Decompress gzip
44    let decoder = flate2::read::GzDecoder::new(Cursor::new(bundle_data));
45    let mut archive = tar::Archive::new(decoder);
46
47    let mut manifest_bytes: Option<Vec<u8>> = None;
48    let mut events_bytes: Option<Vec<u8>> = None;
49
50    for entry in archive.entries().context("reading tar entries")? {
51        let mut entry = entry.context("reading tar entry")?;
52        let path = entry.path()?.to_string_lossy().to_string();
53
54        let mut content = Vec::new();
55        entry
56            .read_to_end(&mut content)
57            .context("reading entry content")?;
58
59        match path.as_str() {
60            "manifest.json" => manifest_bytes = Some(content),
61            "events.ndjson" => events_bytes = Some(content),
62            _ => {
63                return Ok(ReferenceResult {
64                    valid: false,
65                    event_count: 0,
66                    run_root: String::new(),
67                    error: Some(format!("unexpected file: {}", path)),
68                });
69            }
70        }
71    }
72
73    let manifest_bytes = manifest_bytes.context("missing manifest.json")?;
74    let events_bytes = events_bytes.context("missing events.ndjson")?;
75
76    // 2. Parse manifest
77    let manifest: serde_json::Value =
78        serde_json::from_slice(&manifest_bytes).context("parsing manifest")?;
79
80    let declared_event_count = manifest
81        .get("event_count")
82        .and_then(|v| v.as_u64())
83        .unwrap_or(0) as usize;
84    let declared_run_root = manifest
85        .get("run_root")
86        .and_then(|v| v.as_str())
87        .unwrap_or("")
88        .to_string();
89
90    // 3. Verify events.ndjson hash
91    let events_hash = format!("sha256:{}", hex::encode(Sha256::digest(&events_bytes)));
92    let declared_events_hash = manifest
93        .get("files")
94        .and_then(|f| f.get("events.ndjson"))
95        .and_then(|f| f.get("sha256"))
96        .and_then(|v| v.as_str())
97        .unwrap_or("");
98
99    if events_hash != declared_events_hash {
100        return Ok(ReferenceResult {
101            valid: false,
102            event_count: 0,
103            run_root: String::new(),
104            error: Some(format!(
105                "events hash mismatch: computed={}, declared={}",
106                events_hash, declared_events_hash
107            )),
108        });
109    }
110
111    // 4. Parse events (non-streaming — all at once)
112    let events_str = std::str::from_utf8(&events_bytes).context("events not valid UTF-8")?;
113    let mut events: Vec<EvidenceEvent> = Vec::new();
114    for line in events_str.lines() {
115        if line.is_empty() {
116            continue;
117        }
118        let event: EvidenceEvent = serde_json::from_str(line).context("parsing event")?;
119        events.push(event);
120    }
121
122    // 5. Recompute content hashes and run_root
123    let mut content_hashes = Vec::new();
124    for event in &events {
125        let computed = compute_content_hash(event).context("computing content hash")?;
126        let claimed = event.content_hash.as_deref().unwrap_or("").to_string();
127
128        if computed != claimed {
129            return Ok(ReferenceResult {
130                valid: false,
131                event_count: events.len(),
132                run_root: String::new(),
133                error: Some(format!(
134                    "content hash mismatch at seq {}: computed={}, claimed={}",
135                    event.seq, computed, claimed
136                )),
137            });
138        }
139        content_hashes.push(computed);
140    }
141
142    let computed_run_root = compute_run_root(&content_hashes);
143
144    // 6. Check all invariants
145    if events.len() != declared_event_count {
146        return Ok(ReferenceResult {
147            valid: false,
148            event_count: events.len(),
149            run_root: computed_run_root,
150            error: Some(format!(
151                "event count mismatch: actual={}, declared={}",
152                events.len(),
153                declared_event_count
154            )),
155        });
156    }
157
158    if computed_run_root != declared_run_root {
159        let error_msg = format!(
160            "run root mismatch: computed={}, declared={}",
161            computed_run_root, declared_run_root
162        );
163        return Ok(ReferenceResult {
164            valid: false,
165            event_count: events.len(),
166            run_root: computed_run_root,
167            error: Some(error_msg),
168        });
169    }
170
171    Ok(ReferenceResult {
172        valid: true,
173        event_count: events.len(),
174        run_root: computed_run_root,
175        error: None,
176    })
177}
178
179/// Run differential parity checks: apply mutations, compare production vs reference verifier.
180///
181/// Uses subprocess isolation for the production verifier (`assay evidence verify`) to survive
182/// `panic = "abort"` in dev/release profiles. The reference verifier runs in-process.
183///
184/// For each mutation:
185/// 1. Apply mutation to a valid bundle
186/// 2. Run production verifier via subprocess → result A
187/// 3. Run in-process `reference_verify()` → result B
188/// 4. If production accepts but reference rejects → `AttackStatus::Failed` (Bypassed)
189/// 5. If both reject → `AttackStatus::Passed`
190/// 6. If production rejects but reference accepts → `AttackStatus::Passed` (stricter is OK, logged)
191pub fn check_differential_parity(seed: u64) -> Result<Vec<AttackResult>> {
192    let valid_bundle = create_test_bundle()?;
193    let mut results = Vec::new();
194    let timeout = Duration::from_secs(30);
195
196    // Use seed for BitFlip mutation: controls which bits get flipped
197    let bitflip_count = ((seed % 10) + 1) as usize; // 1-10 flips based on seed
198
199    // Define mutations to test
200    let mutations: Vec<(&str, Box<dyn Mutator>)> = vec![
201        (
202            "differential.parity.bitflip",
203            Box::new(BitFlip {
204                count: bitflip_count,
205                seed: Some(seed),
206            }),
207        ),
208        (
209            "differential.parity.truncate",
210            Box::new(Truncate {
211                at: valid_bundle.len() / 2,
212            }),
213        ),
214        (
215            "differential.parity.inject",
216            Box::new(InjectFile {
217                name: "extra.txt".into(),
218                content: b"injected".to_vec(),
219            }),
220        ),
221    ];
222
223    // Also test the unmodified bundle
224    {
225        let start = Instant::now();
226        let production = subprocess_verify(&valid_bundle, timeout);
227        let reference = reference_verify(&valid_bundle);
228        let duration = start.elapsed().as_millis() as u64;
229
230        let result = match production {
231            Ok(ref prod) => {
232                compare_results("differential.parity.identity", prod, &reference, duration)
233            }
234            Err(e) => AttackResult {
235                name: "differential.parity.identity".into(),
236                status: AttackStatus::Error,
237                error_class: None,
238                error_code: None,
239                message: Some(format!("subprocess failed: {}", e)),
240                duration_ms: duration,
241            },
242        };
243        results.push(result);
244    }
245
246    // Test each mutation
247    for (name, mutator) in mutations {
248        let start = Instant::now();
249
250        let mutated = match mutator.mutate(&valid_bundle) {
251            Ok(m) => m,
252            Err(e) => {
253                let duration = start.elapsed().as_millis() as u64;
254                results.push(AttackResult {
255                    name: name.into(),
256                    status: AttackStatus::Error,
257                    error_class: None,
258                    error_code: None,
259                    message: Some(format!("mutation failed: {}", e)),
260                    duration_ms: duration,
261                });
262                continue;
263            }
264        };
265
266        let production = subprocess_verify(&mutated, timeout);
267        let reference = reference_verify(&mutated);
268        let duration = start.elapsed().as_millis() as u64;
269
270        let result = match production {
271            Ok(ref prod) => compare_results(name, prod, &reference, duration),
272            Err(e) => AttackResult {
273                name: name.into(),
274                status: AttackStatus::Error,
275                error_class: None,
276                error_code: None,
277                message: Some(format!("subprocess failed: {}", e)),
278                duration_ms: duration,
279            },
280        };
281        results.push(result);
282    }
283
284    Ok(results)
285}
286
287/// Compare production and reference verifier outcomes with asymmetric policy:
288/// - production accepts, reference rejects → FAIL (Bypassed — security violation)
289/// - both accept but disagree on event_count/run_root → FAIL (metadata parity violation)
290/// - production rejects, reference accepts → PASS (stricter is OK, but log divergence)
291/// - both reject → PASS (check error class agreement, log divergence)
292/// - both accept, same metadata → PASS
293fn compare_results(
294    name: &str,
295    production: &SubprocessResult,
296    reference: &ReferenceResult,
297    duration_ms: u64,
298) -> AttackResult {
299    let production_ok = production.valid;
300
301    if production_ok && !reference.valid {
302        // Production accepted what reference rejected — security violation
303        AttackResult {
304            name: name.into(),
305            status: AttackStatus::Failed,
306            error_class: Some("parity_violation".into()),
307            error_code: Some("SOTA_BYPASS".into()),
308            message: Some(format!(
309                "SOTA parity violation: production accepted, reference rejected ({})",
310                reference.error.as_deref().unwrap_or("unknown")
311            )),
312            duration_ms,
313        }
314    } else if production_ok && reference.valid {
315        // Both accept — verify they agree on metadata
316        // We can't easily get event_count/run_root from production subprocess output,
317        // but reference has them. If the identity test passes here, the bundle is valid
318        // and both agree. For mutated bundles, this branch means a bypass (caught above).
319        AttackResult {
320            name: name.into(),
321            status: AttackStatus::Passed,
322            error_class: None,
323            error_code: None,
324            message: Some(format!(
325                "both accepted (ref: events={}, run_root={})",
326                reference.event_count,
327                truncate_hash(&reference.run_root, 16)
328            )),
329            duration_ms,
330        }
331    } else if !production_ok && reference.valid {
332        // Production is stricter — acceptable, but log the divergence
333        AttackResult {
334            name: name.into(),
335            status: AttackStatus::Passed,
336            error_class: None,
337            error_code: None,
338            message: Some("strictness divergence: production rejected, reference accepted".into()),
339            duration_ms,
340        }
341    } else {
342        // Both reject — log error details for diagnostic comparison
343        let ref_error = reference.error.as_deref().unwrap_or("unknown");
344        let prod_stderr = production.stderr.lines().next().unwrap_or("unknown");
345        AttackResult {
346            name: name.into(),
347            status: AttackStatus::Passed,
348            error_class: None,
349            error_code: None,
350            message: Some(format!(
351                "both rejected (ref: {}, prod: {})",
352                truncate_hash(ref_error, 80),
353                truncate_hash(prod_stderr, 80)
354            )),
355            duration_ms,
356        }
357    }
358}
359
360fn truncate_hash(s: &str, max: usize) -> String {
361    if s.len() <= max {
362        s.to_string()
363    } else {
364        format!("{}…", &s[..max])
365    }
366}
367
368fn create_test_bundle() -> Result<Vec<u8>> {
369    let mut buffer = Vec::new();
370    let mut writer = BundleWriter::new(&mut buffer);
371    for seq in 0..3u64 {
372        let mut event = EvidenceEvent::new(
373            "assay.test",
374            "urn:test",
375            "diffrun",
376            seq,
377            serde_json::json!({"seq": seq}),
378        );
379        event.time = Utc.timestamp_opt(1700000000 + seq as i64, 0).unwrap();
380        writer.add_event(event);
381    }
382    writer.finish()?;
383    Ok(buffer)
384}