#![allow(dead_code)]
use std::collections::BTreeMap;
use std::io::{Cursor, Write};
use std::path::{Path, PathBuf};
use serde_json::{Map, Value};
use sha2::{Digest, Sha256};
use crate::ir_nodes::IRProgram;
use super::super::attestation::{
generate_dossier, generate_in_toto_statement, generate_sbom,
};
use super::control_statements::{generate_control_statements, statements_to_value};
use super::frameworks::{FrameworkId, all_frameworks};
use super::gap_analyzer::analyze_all;
use super::risk_register::{generate_risk_register, risk_register_to_value};
const IN_TOTO_BUILDER_ID: &str = "https://axon-lang.io/builders/compiler@v1";
const IN_TOTO_SUBJECT_NAME: &str = "axon-program";
#[derive(Debug, Clone)]
pub struct EvidencePackage {
pub program_hash: String,
pub files: BTreeMap<String, Vec<u8>>,
}
impl EvidencePackage {
pub fn new(program_hash: impl Into<String>) -> Self {
EvidencePackage {
program_hash: program_hash.into(),
files: BTreeMap::new(),
}
}
pub fn filenames(&self) -> Vec<String> {
self.files.keys().cloned().collect()
}
pub fn sha256_of(&self, filename: &str) -> Option<String> {
self.files.get(filename).map(|bytes| {
let mut hasher = Sha256::new();
hasher.update(bytes);
let digest = hasher.finalize();
let mut s = String::with_capacity(64);
for b in digest {
s.push_str(&format!("{:02x}", b));
}
s
})
}
pub fn to_zip_bytes(&self) -> Vec<u8> {
let mut buf: Vec<u8> = Vec::new();
{
let cursor = Cursor::new(&mut buf);
let mut zip = zip::ZipWriter::new(cursor);
let fixed_time = zip::DateTime::from_date_and_time(2026, 1, 1, 0, 0, 0)
.expect("valid DateTime constants");
let options = zip::write::SimpleFileOptions::default()
.compression_method(zip::CompressionMethod::Deflated)
.last_modified_time(fixed_time);
for (name, bytes) in &self.files {
zip.start_file(name.as_str(), options)
.expect("zip start_file");
zip.write_all(bytes).expect("zip write_all");
}
zip.finish().expect("zip finish");
}
buf
}
pub fn write_zip<P: AsRef<Path>>(&self, path: P) -> PathBuf {
let out = path.as_ref().to_path_buf();
std::fs::write(&out, self.to_zip_bytes()).expect("write evidence zip");
out
}
}
fn j(payload: &Value) -> Vec<u8> {
let sorted = sort_value(payload);
let raw = serde_json::to_string_pretty(&sorted).expect("serialise canonical JSON");
escape_non_ascii(&raw).into_bytes()
}
fn escape_non_ascii(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
if (c as u32) < 0x80 {
out.push(c);
} else {
let code = c as u32;
if code <= 0xFFFF {
out.push_str(&format!("\\u{:04x}", code));
} else {
let v = code - 0x10000;
let hi = 0xD800 + (v >> 10);
let lo = 0xDC00 + (v & 0x3FF);
out.push_str(&format!("\\u{:04x}\\u{:04x}", hi, lo));
}
}
}
out
}
fn sort_value(v: &Value) -> Value {
match v {
Value::Object(m) => {
let mut keys: Vec<&String> = m.keys().collect();
keys.sort();
let mut out = Map::new();
for k in keys {
out.insert(k.clone(), sort_value(&m[k]));
}
Value::Object(out)
}
Value::Array(a) => Value::Array(a.iter().map(sort_value).collect()),
_ => v.clone(),
}
}
pub fn build_evidence_package(
program: &IRProgram,
axon_version: &str,
provenance_entries: Option<Vec<Value>>,
provenance_payloads: Option<Vec<Value>>,
source_files: Option<BTreeMap<String, String>>,
auditor_note: &str,
) -> EvidencePackage {
let sbom = generate_sbom(program, axon_version);
let dossier = generate_dossier(program, axon_version);
let in_toto = generate_in_toto_statement(
program,
axon_version,
IN_TOTO_BUILDER_ID,
IN_TOTO_SUBJECT_NAME,
);
let risks = generate_risk_register(program);
let gap = analyze_all(program);
let mut pkg = EvidencePackage::new(sbom.program_hash.clone());
pkg.files.insert("program_sbom.json".into(), j(&sbom.to_value()));
pkg.files
.insert("program_dossier.json".into(), j(&dossier.to_value()));
pkg.files
.insert("in_toto_statement.json".into(), j(&in_toto.to_value()));
pkg.files.insert(
"risk_register.json".into(),
j(&risk_register_to_value(&risks)),
);
for (framework_name, analysis) in &gap {
pkg.files.insert(
format!("gap_analysis/{}.json", framework_name),
j(&analysis.to_value()),
);
}
for f in all_frameworks() {
let statements = generate_control_statements(program, f);
pkg.files.insert(
format!("control_statements/{}.json", f.as_str()),
j(&statements_to_value(&statements, f)),
);
}
if let Some(entries) = provenance_entries {
let mut chain_blob = Map::new();
chain_blob.insert("schema".into(), "axon.esk.provenance_chain.v1".into());
chain_blob.insert("genesis".into(), "0".repeat(64).into());
chain_blob.insert("count".into(), (entries.len() as i64).into());
chain_blob.insert("entries".into(), Value::Array(entries));
if let Some(payloads) = provenance_payloads {
chain_blob.insert("payloads".into(), Value::Array(payloads));
}
pkg.files
.insert("provenance_chain.json".into(), j(&Value::Object(chain_blob)));
}
if let Some(files) = source_files {
for (fname, source_text) in files {
let safe_name = fname.replace('\\', "/").trim_start_matches('/').to_string();
pkg.files
.insert(format!("source/{}", safe_name), source_text.into_bytes());
}
}
let readme = build_readme(&sbom.program_hash, auditor_note);
pkg.files.insert("README.md".into(), readme.into_bytes());
let file_count = pkg.files.len();
let file_entries: Vec<Value> = pkg
.files
.keys()
.cloned()
.collect::<Vec<_>>()
.into_iter()
.map(|name| {
let mut m = Map::new();
let sha = pkg.sha256_of(&name).unwrap_or_default();
let size = pkg.files.get(&name).map(|b| b.len()).unwrap_or(0);
m.insert("name".into(), name.into());
m.insert("sha256".into(), sha.into());
m.insert("size".into(), (size as i64).into());
Value::Object(m)
})
.collect();
let mut manifest = Map::new();
manifest.insert("schema".into(), "axon.esk.evidence_manifest.v1".into());
manifest.insert("axon_version".into(), axon_version.into());
manifest.insert("program_hash".into(), sbom.program_hash.clone().into());
manifest.insert("file_count".into(), (file_count as i64).into());
manifest.insert("files".into(), Value::Array(file_entries));
pkg.files
.insert("MANIFEST.json".into(), j(&Value::Object(manifest)));
pkg
}
fn build_readme(program_hash: &str, auditor_note: &str) -> String {
let mut lines: Vec<String> = vec![
"# AXON Audit Evidence Package".into(),
String::new(),
format!("**Program hash:** `{}`", program_hash),
String::new(),
"## What is in this package".into(),
String::new(),
"This ZIP bundles the deterministic audit artifacts produced by the".into(),
"Axon compiler + ESK runtime. Every JSON file is canonical-encoded".into(),
"and carries its own SHA-256 in `MANIFEST.json`.".into(),
String::new(),
"| File | Purpose |".into(),
"|---|---|".into(),
"| `program_sbom.json` | Software Bill of Materials (deterministic) |".into(),
"| `program_dossier.json` | Regulatory compliance dossier |".into(),
"| `in_toto_statement.json` | SLSA Provenance v1 attestation |".into(),
"| `risk_register.json` | ISO 27005-shaped risk register |".into(),
"| `gap_analysis/*.json` | Per-framework gap analysis |".into(),
"| `control_statements/*.json` | Pre-populated implementation statements |".into(),
"| `provenance_chain.json` | Runtime Merkle chain (if provided) |".into(),
"| `source/*.axon` | Source snapshot at package time |".into(),
String::new(),
"## Verifying the package".into(),
String::new(),
"Every entry in `MANIFEST.json` carries a SHA-256 hash of the file".into(),
"contents. An auditor re-running `sha256sum` on each file MUST see".into(),
"the same digest — the package is deterministic. The program_hash".into(),
"inside `MANIFEST.json` equals the `program_hash` inside".into(),
"`program_sbom.json` — any divergence signals tampering.".into(),
String::new(),
"## Frameworks covered".into(),
String::new(),
"- SOC 2 Type II (AICPA Trust Services Criteria)".into(),
"- ISO/IEC 27001:2022 (Annex A subset)".into(),
"- FIPS 140-3 (scaffold + readiness)".into(),
"- Common Criteria EAL 4+ (SFR / SAR readiness)".into(),
String::new(),
];
if !auditor_note.is_empty() {
lines.extend([
"## Auditor note".into(),
String::new(),
auditor_note.to_string(),
String::new(),
]);
}
lines.join("\n")
}
#[cfg(test)]
mod tests {
use super::super::frameworks::all_frameworks;
use super::*;
use crate::ir_generator::IRGenerator;
use crate::lexer::Lexer;
use crate::parser::Parser;
use std::io::Read;
fn compile(source: &str) -> IRProgram {
let tokens = Lexer::new(source, "t").tokenize().unwrap();
let program = Parser::new(tokens).parse().unwrap();
IRGenerator::new().generate(&program)
}
fn full_program() -> IRProgram {
compile(r#"
type R compliance [HIPAA] { x: String }
flow F(r: R) -> R { step S { ask: "x" output: R } }
shield G {
scan: [prompt_injection]
on_breach: halt
severity: high
compliance: [HIPAA]
}
axonendpoint E {
method: POST path: "/p" body: R execute: F output: R
shield: G
compliance: [HIPAA]
}
resource Db { kind: postgres lifetime: linear }
fabric Vpc { provider: aws }
manifest M { resources: [Db] fabric: Vpc compliance: [HIPAA] }
observe O from M { sources: [prom] quorum: 1 }
reconcile Rec { observe: O }
lease L { resource: Db duration: 30m }
immune I { watch: [O] scope: tenant }
reflex Rf { trigger: I on_level: doubt action: quarantine scope: tenant }
heal H { source: I scope: tenant }
"#)
}
#[test]
fn package_contains_expected_top_level_files() {
let ir = full_program();
let mut sources: BTreeMap<String, String> = BTreeMap::new();
sources.insert("prog.axon".into(), "// src".into());
let pkg =
build_evidence_package(&ir, "1.0.0", None, None, Some(sources), "");
let names: std::collections::HashSet<String> =
pkg.filenames().into_iter().collect();
for required in [
"MANIFEST.json",
"README.md",
"program_sbom.json",
"program_dossier.json",
"in_toto_statement.json",
"risk_register.json",
] {
assert!(names.contains(required), "missing {}", required);
}
}
#[test]
fn per_framework_files_exist() {
let ir = full_program();
let pkg = build_evidence_package(&ir, "1.0.0", None, None, None, "");
let names: std::collections::HashSet<String> =
pkg.filenames().into_iter().collect();
for f in all_frameworks() {
assert!(
names.contains(&format!("gap_analysis/{}.json", f.as_str())),
"missing gap_analysis for {:?}",
f
);
assert!(
names.contains(&format!("control_statements/{}.json", f.as_str())),
"missing control_statements for {:?}",
f
);
}
}
#[test]
fn manifest_sha256_matches_content_for_every_file() {
let ir = full_program();
let pkg = build_evidence_package(&ir, "1.0.0", None, None, None, "");
let manifest_bytes = pkg
.files
.get("MANIFEST.json")
.expect("MANIFEST present");
let manifest: Value = serde_json::from_slice(manifest_bytes).unwrap();
let files = manifest["files"].as_array().unwrap();
for entry in files {
let name = entry["name"].as_str().unwrap();
if name == "MANIFEST.json" {
continue;
}
let expected = entry["sha256"].as_str().unwrap();
let actual = pkg.sha256_of(name).unwrap();
assert_eq!(actual, expected, "sha mismatch for {}", name);
}
}
#[test]
fn zip_opens_and_contains_all_files() {
let ir = full_program();
let pkg = build_evidence_package(&ir, "1.0.0", None, None, None, "");
let bytes = pkg.to_zip_bytes();
assert!(!bytes.is_empty(), "zip should not be empty");
let mut archive =
zip::ZipArchive::new(Cursor::new(bytes)).expect("zip parses");
let mut in_zip: std::collections::HashSet<String> =
std::collections::HashSet::new();
for i in 0..archive.len() {
let f = archive.by_index(i).unwrap();
in_zip.insert(f.name().to_string());
}
let in_pkg: std::collections::HashSet<String> =
pkg.filenames().into_iter().collect();
assert_eq!(in_zip, in_pkg);
}
#[test]
fn zip_is_byte_identical_on_equal_input() {
let ir = full_program();
let a =
build_evidence_package(&ir, "1.0.0", None, None, None, "").to_zip_bytes();
let b =
build_evidence_package(&ir, "1.0.0", None, None, None, "").to_zip_bytes();
assert_eq!(a, b, "evidence ZIP must be deterministic");
}
#[test]
fn source_snapshot_and_auditor_note_surface() {
let ir = full_program();
let mut sources: BTreeMap<String, String> = BTreeMap::new();
sources.insert("prog.axon".into(), "// content\n".into());
let pkg = build_evidence_package(
&ir,
"1.0.0",
None,
None,
Some(sources),
"Engaged with FirmX on 2026-04",
);
let src = pkg
.files
.get("source/prog.axon")
.expect("source file present");
assert!(std::str::from_utf8(src).unwrap().contains("// content"));
let readme = pkg.files.get("README.md").expect("README present");
let readme_text = std::str::from_utf8(readme).unwrap();
assert!(readme_text.contains("Engaged with FirmX"));
}
#[test]
fn provenance_chain_emitted_when_entries_supplied() {
let ir = full_program();
let entries = vec![serde_json::json!({"seq": 0, "data_hash": "abc"})];
let payloads = vec![serde_json::json!({"event": "start"})];
let pkg = build_evidence_package(
&ir,
"1.0.0",
Some(entries),
Some(payloads),
None,
"",
);
let chain = pkg
.files
.get("provenance_chain.json")
.expect("chain emitted");
let parsed: Value = serde_json::from_slice(chain).unwrap();
assert_eq!(parsed["schema"], "axon.esk.provenance_chain.v1");
assert_eq!(parsed["count"], 1);
assert_eq!(
parsed["genesis"].as_str().unwrap().len(),
64,
"genesis hash should be 64 zeros"
);
assert!(parsed["payloads"].is_array());
}
#[test]
fn zip_entries_round_trip_through_archive() {
let ir = full_program();
let pkg = build_evidence_package(&ir, "1.0.0", None, None, None, "");
let bytes = pkg.to_zip_bytes();
let mut archive = zip::ZipArchive::new(Cursor::new(bytes)).unwrap();
let mut buf = Vec::new();
archive
.by_name("README.md")
.unwrap()
.read_to_end(&mut buf)
.unwrap();
assert_eq!(
buf,
*pkg.files.get("README.md").unwrap(),
"README content must round-trip"
);
}
}