1use std::collections::BTreeMap;
66use std::fs;
67use std::path::{Path, PathBuf};
68
69use anyhow::{Context, Result, anyhow, bail};
70use base64::Engine;
71use base64::engine::general_purpose::STANDARD_NO_PAD;
72use ed25519_dalek::Signer;
73use rusqlite::{Connection, params};
74use serde::{Deserialize, Serialize};
75use sha2::{Digest, Sha256};
76
77use crate::cli::CliOutput;
78use crate::identity::keypair as kp_mod;
79use crate::identity::sign::SignableLink;
80
81const MANIFEST_FILE_NAME: &str = "manifest.json";
83
84#[derive(clap::Args, Debug)]
90pub struct ExportForensicBundleArgs {
91 #[arg(long, value_name = "ID")]
93 pub memory_id: String,
94
95 #[arg(long, default_value_t = false)]
97 pub include_reflections: bool,
98
99 #[arg(long, default_value_t = false)]
101 pub include_transcripts: bool,
102
103 #[arg(long, value_name = "PATH")]
107 pub output: Option<PathBuf>,
108
109 #[arg(long, default_value_t = true)]
118 pub include_atomisation_chain: bool,
119}
120
121#[derive(clap::Args, Debug)]
123pub struct VerifyForensicBundleArgs {
124 pub bundle_path: PathBuf,
126}
127
128#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
134pub struct ManifestFile {
135 pub path: String,
137 pub size: u64,
139 pub sha256: String,
141}
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct Manifest {
152 pub schema_version: u32,
154 pub memory_id: String,
156 pub generated_at: String,
163 pub include_reflections: bool,
165 pub include_transcripts: bool,
167 pub files: Vec<ManifestFile>,
170 #[serde(skip_serializing_if = "Option::is_none")]
173 pub signer_agent_id: Option<String>,
174 #[serde(skip_serializing_if = "Option::is_none")]
177 pub signature: Option<String>,
178}
179
180pub const BUNDLE_SCHEMA_VERSION: u32 = 1;
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct MemoryEnvelope {
194 pub id: String,
195 pub namespace: String,
196 pub title: String,
197 pub content: String,
198 pub tier: String,
199 pub memory_kind: String,
200 pub reflection_depth: i32,
201 pub created_at: String,
202 pub updated_at: String,
203 pub metadata: serde_json::Value,
204 #[serde(skip_serializing_if = "Option::is_none")]
213 pub atomisation: Option<AtomisationEnvelope>,
214 #[serde(default)]
219 pub citations: Vec<crate::models::Citation>,
220 #[serde(default, skip_serializing_if = "Option::is_none")]
223 pub source_uri: Option<String>,
224 #[serde(default, skip_serializing_if = "Option::is_none")]
227 pub source_span: Option<crate::models::SourceSpan>,
228 #[serde(default)]
234 pub confidence_source: crate::models::ConfidenceSource,
235 #[serde(default, skip_serializing_if = "Option::is_none")]
240 pub confidence_signals: Option<crate::models::ConfidenceSignals>,
241 #[serde(default, skip_serializing_if = "Option::is_none")]
245 pub confidence_decayed_at: Option<String>,
246}
247
248#[derive(Debug, Clone, Serialize, Deserialize, Default)]
253pub struct AtomisationEnvelope {
254 #[serde(skip_serializing_if = "Option::is_none")]
258 pub atomised_into: Option<i64>,
259 #[serde(skip_serializing_if = "Option::is_none")]
263 pub archived_at: Option<String>,
264 #[serde(skip_serializing_if = "Vec::is_empty", default)]
268 pub atom_ids: Vec<String>,
269 #[serde(skip_serializing_if = "Option::is_none")]
272 pub atom_of: Option<String>,
273}
274
275#[derive(Debug, Clone, Serialize, Deserialize)]
280pub struct EdgeEnvelope {
281 pub source_id: String,
282 pub target_id: String,
283 pub relation: String,
284 pub created_at: String,
285 pub observed_by: Option<String>,
286 pub valid_from: Option<String>,
287 pub valid_until: Option<String>,
288 pub attest_level: String,
289 pub signature_hex: Option<String>,
291}
292
293#[derive(Debug, Clone, Serialize, Deserialize)]
298pub struct SignedEventEnvelope {
299 pub id: String,
300 pub agent_id: String,
301 pub event_type: String,
302 pub payload_hash_hex: String,
303 pub signature_hex: Option<String>,
304 pub attest_level: String,
305 pub timestamp: String,
306}
307
308#[derive(Debug, Clone, Serialize, Deserialize)]
312pub struct TranscriptEnvelope {
313 pub id: String,
314 pub namespace: String,
315 pub created_at: String,
316 pub expires_at: Option<String>,
317 pub compressed_size: i64,
318 pub original_size: i64,
319 pub linked_memory_ids: Vec<String>,
321}
322
323type BundleFiles = BTreeMap<String, Vec<u8>>;
331
332pub fn build(
345 conn: &Connection,
346 args: &ExportForensicBundleArgs,
347 output_path: &Path,
348 generated_at: Option<&str>,
349) -> Result<()> {
350 let files = build_files(conn, args, generated_at)?;
351 write_ustar(output_path, &files).context("write forensic bundle tar")
352}
353
354pub fn build_files(
360 conn: &Connection,
361 args: &ExportForensicBundleArgs,
362 generated_at: Option<&str>,
363) -> Result<BundleFiles> {
364 let generated_at: String = generated_at
365 .map(ToString::to_string)
366 .unwrap_or_else(|| chrono::Utc::now().to_rfc3339());
367
368 let mut chain_ids = walk_reflection_chain(conn, &args.memory_id)?;
371
372 if args.include_atomisation_chain {
380 let mut expanded = chain_ids.clone();
381 for mid in &chain_ids {
382 for atom_id in atom_ids_of_source(conn, mid)? {
384 if !expanded.contains(&atom_id) {
385 expanded.push(atom_id);
386 }
387 }
388 if let Some(parent_id) = atom_of_for(conn, mid)? {
390 if !expanded.contains(&parent_id) {
391 expanded.push(parent_id.clone());
392 for atom_id in atom_ids_of_source(conn, &parent_id)? {
397 if !expanded.contains(&atom_id) {
398 expanded.push(atom_id);
399 }
400 }
401 }
402 }
403 }
404 expanded.sort();
405 chain_ids = expanded;
406 }
407
408 let mut files: BundleFiles = BTreeMap::new();
409
410 let memory_ids_to_emit: Vec<String> = if args.include_reflections {
416 chain_ids.clone()
417 } else if args.include_atomisation_chain {
418 let mut ids = vec![args.memory_id.clone()];
422 for atom_id in atom_ids_of_source(conn, &args.memory_id)? {
423 if !ids.contains(&atom_id) {
424 ids.push(atom_id);
425 }
426 }
427 if let Some(parent) = atom_of_for(conn, &args.memory_id)? {
428 if !ids.contains(&parent) {
429 ids.push(parent.clone());
430 }
431 for atom_id in atom_ids_of_source(conn, &parent)? {
432 if !ids.contains(&atom_id) {
433 ids.push(atom_id);
434 }
435 }
436 }
437 ids.sort();
438 ids
439 } else {
440 vec![args.memory_id.clone()]
441 };
442 for mid in &memory_ids_to_emit {
443 if let Some(mem) = crate::db::get(conn, mid).context("db::get for bundle")? {
444 let atomisation = if args.include_atomisation_chain {
445 build_atomisation_envelope(conn, &mem)?
446 } else {
447 None
448 };
449 let env = MemoryEnvelope {
450 id: mem.id.clone(),
451 namespace: mem.namespace.clone(),
452 title: mem.title.clone(),
453 content: mem.content.clone(),
454 tier: mem.tier.as_str().to_string(),
455 memory_kind: format!("{:?}", mem.memory_kind).to_ascii_lowercase(),
456 reflection_depth: mem.reflection_depth,
457 created_at: mem.created_at.clone(),
458 updated_at: mem.updated_at.clone(),
459 metadata: mem.metadata.clone(),
460 atomisation,
461 citations: mem.citations.clone(),
466 source_uri: mem.source_uri.clone(),
467 source_span: mem.source_span,
468 confidence_source: mem.confidence_source,
473 confidence_signals: mem.confidence_signals.clone(),
474 confidence_decayed_at: mem.confidence_decayed_at.clone(),
475 };
476 let bytes = serde_json::to_vec_pretty(&env).context("serialise MemoryEnvelope")?;
477 files.insert(format!("memories/{}.json", mem.id), bytes);
478 }
479 }
480
481 let edges_raw = fetch_edges_for(conn, &chain_ids)?;
490 let edges: Vec<_> = if args.include_atomisation_chain {
491 edges_raw
492 } else {
493 edges_raw
494 .into_iter()
495 .filter(|e| e.relation != crate::models::MemoryLinkRelation::DerivesFrom.as_str())
496 .collect()
497 };
498 for edge in &edges {
499 let bytes = serde_json::to_vec_pretty(edge).context("serialise EdgeEnvelope")?;
500 let path = format!(
504 "edges/{}__{}__{}.json",
505 edge.source_id, edge.relation, edge.target_id
506 );
507 files.insert(path, bytes);
508 }
509
510 let mut event_ids_emitted: std::collections::HashSet<String> = std::collections::HashSet::new();
514 let events = fetch_signed_events_for(conn, &chain_ids)?;
515 for ev in &events {
516 let bytes = serde_json::to_vec_pretty(ev).context("serialise SignedEventEnvelope")?;
517 files.insert(format!("signed_events/{}.json", ev.id), bytes);
518 event_ids_emitted.insert(ev.id.clone());
519 }
520
521 if args.include_atomisation_chain {
538 let extra = fetch_atomisation_signed_events_for(conn, &chain_ids)?;
539 for ev in &extra {
540 if event_ids_emitted.contains(&ev.id) {
541 continue;
542 }
543 let bytes = serde_json::to_vec_pretty(ev).context("serialise SignedEventEnvelope")?;
544 files.insert(format!("signed_events/{}.json", ev.id), bytes);
545 event_ids_emitted.insert(ev.id.clone());
546 }
547 }
548
549 if args.include_transcripts {
551 let entries =
552 crate::transcripts::replay::replay_transcript_union(conn, &args.memory_id, None)
553 .context("replay_transcript_union for bundle")?;
554
555 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
559 for entry in &entries {
560 if !seen.insert(entry.meta.id.clone()) {
561 continue;
562 }
563 let mut linked: Vec<String> = entries
566 .iter()
567 .filter(|e| e.meta.id == entry.meta.id)
568 .map(|e| e.memory_id.clone())
569 .collect();
570 linked.sort();
571 linked.dedup();
572
573 let env = TranscriptEnvelope {
574 id: entry.meta.id.clone(),
575 namespace: entry.meta.namespace.clone(),
576 created_at: entry.meta.created_at.clone(),
577 expires_at: entry.meta.expires_at.clone(),
578 compressed_size: entry.meta.compressed_size,
579 original_size: entry.meta.original_size,
580 linked_memory_ids: linked,
581 };
582 let meta_bytes =
583 serde_json::to_vec_pretty(&env).context("serialise TranscriptEnvelope")?;
584 files.insert(format!("transcripts/{}.json", entry.meta.id), meta_bytes);
585
586 if let Some(content) = crate::transcripts::storage::fetch(conn, &entry.meta.id)
587 .context("fetch transcript content for bundle")?
588 {
589 files.insert(
590 format!("transcripts/{}.content", entry.meta.id),
591 content.into_bytes(),
592 );
593 }
594 }
595 }
596
597 let report =
604 crate::cli::verify::build_chain_report_at(conn, &args.memory_id, true, Some(&generated_at))
605 .context("build_chain_report for bundle")?;
606 let verification_bytes =
607 serde_json::to_vec_pretty(&report).context("serialise chain report")?;
608 files.insert("verification.json".to_string(), verification_bytes);
609
610 let mut manifest = Manifest {
613 schema_version: BUNDLE_SCHEMA_VERSION,
614 memory_id: args.memory_id.clone(),
615 generated_at,
616 include_reflections: args.include_reflections,
617 include_transcripts: args.include_transcripts,
618 files: files
619 .iter()
620 .map(|(p, body)| ManifestFile {
621 path: p.clone(),
622 size: body.len() as u64,
623 sha256: hex_sha256(body),
624 })
625 .collect(),
626 signer_agent_id: None,
627 signature: None,
628 };
629
630 if let Some((agent_id, sig_b64)) = sign_manifest_if_keyed(&manifest)? {
635 manifest.signer_agent_id = Some(agent_id);
636 manifest.signature = Some(sig_b64);
637 }
638
639 let manifest_bytes = serde_json::to_vec_pretty(&manifest).context("serialise Manifest")?;
640 files.insert(MANIFEST_FILE_NAME.to_string(), manifest_bytes);
641
642 Ok(files)
643}
644
645pub fn canonical_signed_bytes(m: &Manifest) -> Vec<u8> {
651 let mut out = String::new();
652 for f in &m.files {
653 out.push_str(&f.path);
654 out.push(':');
655 out.push_str(&f.size.to_string());
656 out.push(':');
657 out.push_str(&f.sha256);
658 out.push('\n');
659 }
660 out.push_str("schema_version:");
661 out.push_str(&m.schema_version.to_string());
662 out.push('\n');
663 out.push_str("memory_id:");
664 out.push_str(&m.memory_id);
665 out.push('\n');
666 out.into_bytes()
667}
668
669fn sign_manifest_if_keyed(manifest: &Manifest) -> Result<Option<(String, String)>> {
676 let key_dir = match kp_mod::default_key_dir() {
677 Ok(p) => p,
678 Err(_) => return Ok(None),
679 };
680 if !key_dir.exists() {
681 return Ok(None);
682 }
683 let entries = match kp_mod::list(&key_dir) {
684 Ok(v) => v,
685 Err(_) => return Ok(None),
686 };
687 let mut candidates: Vec<String> = entries.into_iter().map(|kp| kp.agent_id).collect();
690 candidates.sort();
691 for agent_id in candidates {
692 if let Ok(kp) = kp_mod::load(&agent_id, &key_dir) {
693 if let Some(signing) = kp.private.as_ref() {
694 let bytes = canonical_signed_bytes(manifest);
695 let sig = signing.sign(&bytes);
696 let sig_b64 = STANDARD_NO_PAD.encode(sig.to_bytes());
697 return Ok(Some((agent_id, sig_b64)));
698 }
699 }
700 }
701 Ok(None)
702}
703
704fn walk_reflection_chain(conn: &Connection, root: &str) -> Result<Vec<String>> {
713 use std::collections::{HashSet, VecDeque};
714 let mut visited: HashSet<String> = HashSet::new();
715 let mut order: Vec<String> = Vec::new();
716 let mut queue: VecDeque<String> = VecDeque::new();
717 queue.push_back(root.to_string());
718 while let Some(cur) = queue.pop_front() {
719 if !visited.insert(cur.clone()) {
720 continue;
721 }
722 order.push(cur.clone());
723 let mut stmt = conn.prepare(
724 "SELECT target_id FROM memory_links \
725 WHERE source_id = ?1 AND relation = 'reflects_on' \
726 ORDER BY target_id",
727 )?;
728 let rows = stmt.query_map(params![cur], |r| r.get::<_, String>(0))?;
729 for r in rows {
730 let tgt = r?;
731 if !visited.contains(&tgt) {
732 queue.push_back(tgt);
733 }
734 }
735 }
736 order.sort();
740 Ok(order)
741}
742
743fn fetch_edges_for(conn: &Connection, chain_ids: &[String]) -> Result<Vec<EdgeEnvelope>> {
748 let mut out = Vec::new();
749 if chain_ids.is_empty() {
750 return Ok(out);
751 }
752 let placeholders: String = chain_ids
753 .iter()
754 .enumerate()
755 .map(|(i, _)| format!("?{}", i + 1))
756 .collect::<Vec<_>>()
757 .join(", ");
758 let sql = format!(
759 "SELECT source_id, target_id, relation, created_at, observed_by, \
760 valid_from, valid_until, signature, attest_level \
761 FROM memory_links \
762 WHERE source_id IN ({placeholders}) \
763 AND relation IN ('reflects_on', 'supersedes', 'derived_from', 'derives_from') \
764 ORDER BY source_id, relation, target_id"
765 );
766 let mut stmt = conn.prepare(&sql)?;
767 let param_refs: Vec<&dyn rusqlite::ToSql> = chain_ids
768 .iter()
769 .map(|s| s as &dyn rusqlite::ToSql)
770 .collect();
771 let rows = stmt.query_map(param_refs.as_slice(), |r| {
772 Ok(EdgeEnvelope {
773 source_id: r.get::<_, String>(0)?,
774 target_id: r.get::<_, String>(1)?,
775 relation: r.get::<_, String>(2)?,
776 created_at: r.get::<_, String>(3)?,
777 observed_by: r.get::<_, Option<String>>(4)?,
778 valid_from: r.get::<_, Option<String>>(5)?,
779 valid_until: r.get::<_, Option<String>>(6)?,
780 signature_hex: r.get::<_, Option<Vec<u8>>>(7)?.map(|b| bytes_to_hex(&b)),
781 attest_level: r
782 .get::<_, Option<String>>(8)?
783 .unwrap_or_else(|| crate::models::AttestLevel::Unsigned.as_str().to_string()),
784 })
785 })?;
786 for r in rows {
787 out.push(r?);
788 }
789 Ok(out)
790}
791
792fn atom_ids_of_source(conn: &Connection, source_id: &str) -> Result<Vec<String>> {
797 let mut stmt = conn.prepare(
798 "SELECT id FROM memories \
799 WHERE atom_of = ?1 \
800 ORDER BY created_at ASC, id ASC",
801 )?;
802 let rows = stmt.query_map(params![source_id], |r| r.get::<_, String>(0))?;
803 let mut out = Vec::new();
804 for r in rows {
805 out.push(r?);
806 }
807 Ok(out)
808}
809
810fn atom_of_for(conn: &Connection, id: &str) -> Result<Option<String>> {
814 let res: rusqlite::Result<Option<String>> = conn.query_row(
815 "SELECT atom_of FROM memories WHERE id = ?1",
816 params![id],
817 |r| r.get::<_, Option<String>>(0),
818 );
819 match res {
820 Ok(v) => Ok(v),
821 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
822 Err(e) => Err(e.into()),
823 }
824}
825
826fn build_atomisation_envelope(
832 conn: &Connection,
833 mem: &crate::models::Memory,
834) -> Result<Option<AtomisationEnvelope>> {
835 let (atomised_into, atom_of_col): (Option<i64>, Option<String>) = conn
838 .query_row(
839 "SELECT atomised_into, atom_of FROM memories WHERE id = ?1",
840 params![mem.id],
841 |r| Ok((r.get::<_, Option<i64>>(0)?, r.get::<_, Option<String>>(1)?)),
842 )
843 .unwrap_or((None, None));
844
845 let archived_at = mem
846 .metadata
847 .get(crate::models::field_names::ATOMISATION_ARCHIVED_AT)
848 .and_then(|v| v.as_str())
849 .map(ToString::to_string);
850
851 let is_archived_source = atomised_into.unwrap_or(0) > 0 || archived_at.is_some();
852 let is_atom = atom_of_col.is_some();
853 if !is_archived_source && !is_atom {
854 return Ok(None);
855 }
856 let atom_ids = if is_archived_source {
857 atom_ids_of_source(conn, &mem.id)?
858 } else {
859 Vec::new()
860 };
861 Ok(Some(AtomisationEnvelope {
862 atomised_into: atomised_into.filter(|n| *n > 0),
863 archived_at,
864 atom_ids,
865 atom_of: atom_of_col,
866 }))
867}
868
869fn fetch_atomisation_signed_events_for(
892 conn: &Connection,
893 chain_ids: &[String],
894) -> Result<Vec<SignedEventEnvelope>> {
895 if chain_ids.is_empty() {
896 return Ok(Vec::new());
897 }
898 let src_placeholders: String = (1..=chain_ids.len())
909 .map(|i| format!("?{i}"))
910 .collect::<Vec<_>>()
911 .join(", ");
912 let tgt_placeholders: String = (chain_ids.len() + 1..=chain_ids.len() * 2)
913 .map(|i| format!("?{i}"))
914 .collect::<Vec<_>>()
915 .join(", ");
916 let agent_sql = format!(
917 "SELECT DISTINCT observed_by FROM memory_links \
918 WHERE relation = 'derives_from' \
919 AND (source_id IN ({src_placeholders}) OR target_id IN ({tgt_placeholders})) \
920 AND observed_by IS NOT NULL"
921 );
922 let mut agent_stmt = conn.prepare(&agent_sql)?;
923 let bind_pairs: Vec<&dyn rusqlite::ToSql> = chain_ids
924 .iter()
925 .chain(chain_ids.iter())
926 .map(|s| s as &dyn rusqlite::ToSql)
927 .collect();
928 let agent_rows = agent_stmt.query_map(bind_pairs.as_slice(), |r| r.get::<_, String>(0))?;
929 let mut writer_agents: Vec<String> = Vec::new();
930 for r in agent_rows {
931 let id = r?;
932 if !writer_agents.contains(&id) {
933 writer_agents.push(id);
934 }
935 }
936
937 let mut out: Vec<SignedEventEnvelope> = Vec::new();
942 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
943
944 if writer_agents.is_empty() {
945 let sql = "SELECT id, agent_id, event_type, payload_hash, signature, \
951 attest_level, timestamp \
952 FROM signed_events \
953 WHERE event_type IN ('atomisation_complete', 'memory_link.created') \
954 ORDER BY timestamp ASC, id ASC";
955 let mut stmt = conn.prepare(sql)?;
956 let rows = stmt.query_map([], row_to_signed_event_envelope)?;
957 for r in rows {
958 let ev = r?;
959 if seen.insert(ev.id.clone()) {
960 out.push(ev);
961 }
962 }
963 return Ok(out);
964 }
965
966 let agent_placeholders: String = writer_agents
967 .iter()
968 .enumerate()
969 .map(|(i, _)| format!("?{}", i + 1))
970 .collect::<Vec<_>>()
971 .join(", ");
972 let sql = format!(
973 "SELECT id, agent_id, event_type, payload_hash, signature, \
974 attest_level, timestamp \
975 FROM signed_events \
976 WHERE event_type IN ('atomisation_complete', 'memory_link.created') \
977 AND agent_id IN ({agent_placeholders}) \
978 ORDER BY timestamp ASC, id ASC"
979 );
980 let mut stmt = conn.prepare(&sql)?;
981 let param_refs: Vec<&dyn rusqlite::ToSql> = writer_agents
982 .iter()
983 .map(|s| s as &dyn rusqlite::ToSql)
984 .collect();
985 let rows = stmt.query_map(param_refs.as_slice(), row_to_signed_event_envelope)?;
986 for r in rows {
987 let ev = r?;
988 if seen.insert(ev.id.clone()) {
989 out.push(ev);
990 }
991 }
992 Ok(out)
993}
994
995fn row_to_signed_event_envelope(r: &rusqlite::Row<'_>) -> rusqlite::Result<SignedEventEnvelope> {
1001 Ok(SignedEventEnvelope {
1002 id: r.get::<_, String>(0)?,
1003 agent_id: r.get::<_, String>(1)?,
1004 event_type: r.get::<_, String>(2)?,
1005 payload_hash_hex: bytes_to_hex(&r.get::<_, Vec<u8>>(3)?),
1006 signature_hex: r.get::<_, Option<Vec<u8>>>(4)?.map(|b| bytes_to_hex(&b)),
1007 attest_level: r.get::<_, String>(5)?,
1008 timestamp: r.get::<_, String>(6)?,
1009 })
1010}
1011
1012fn fetch_signed_events_for(
1018 conn: &Connection,
1019 chain_ids: &[String],
1020) -> Result<Vec<SignedEventEnvelope>> {
1021 if chain_ids.is_empty() {
1022 return Ok(Vec::new());
1023 }
1024 let placeholders: String = chain_ids
1025 .iter()
1026 .enumerate()
1027 .map(|(i, _)| format!("?{}", i + 1))
1028 .collect::<Vec<_>>()
1029 .join(", ");
1030 let sql = format!(
1031 "SELECT id, agent_id, event_type, payload_hash, signature, \
1032 attest_level, timestamp \
1033 FROM signed_events \
1034 WHERE agent_id IN ({placeholders}) \
1035 ORDER BY timestamp ASC, id ASC"
1036 );
1037 let mut stmt = conn.prepare(&sql)?;
1038 let param_refs: Vec<&dyn rusqlite::ToSql> = chain_ids
1039 .iter()
1040 .map(|s| s as &dyn rusqlite::ToSql)
1041 .collect();
1042 let rows = stmt.query_map(param_refs.as_slice(), |r| {
1043 Ok(SignedEventEnvelope {
1044 id: r.get::<_, String>(0)?,
1045 agent_id: r.get::<_, String>(1)?,
1046 event_type: r.get::<_, String>(2)?,
1047 payload_hash_hex: bytes_to_hex(&r.get::<_, Vec<u8>>(3)?),
1048 signature_hex: r.get::<_, Option<Vec<u8>>>(4)?.map(|b| bytes_to_hex(&b)),
1049 attest_level: r.get::<_, String>(5)?,
1050 timestamp: r.get::<_, String>(6)?,
1051 })
1052 })?;
1053 let mut out = Vec::new();
1054 for r in rows {
1055 out.push(r?);
1056 }
1057 Ok(out)
1058}
1059
1060#[derive(Debug, Clone, Serialize)]
1066pub struct VerificationReport {
1067 pub ok: bool,
1068 pub bundle_path: String,
1069 pub manifest_present: bool,
1070 pub schema_version: u32,
1071 pub memory_id: String,
1072 pub signer_agent_id: Option<String>,
1073 pub signature_status: SignatureStatus,
1074 pub tampered_files: Vec<String>,
1076 pub missing_files: Vec<String>,
1078 pub extra_files: Vec<String>,
1080 pub chain_edges_failed: Vec<String>,
1084}
1085
1086#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq)]
1088#[serde(rename_all = "snake_case")]
1089pub enum SignatureStatus {
1090 Verified,
1093 Failed,
1095 Absent,
1097 UnknownSigner,
1100}
1101
1102pub fn verify(bundle_path: &Path) -> Result<VerificationReport> {
1115 let bytes = fs::read(bundle_path)
1116 .with_context(|| format!("read bundle from {}", bundle_path.display()))?;
1117 let files = read_ustar(&bytes).context("parse forensic bundle tar")?;
1118
1119 let manifest_bytes = files
1120 .get(MANIFEST_FILE_NAME)
1121 .ok_or_else(|| anyhow!("bundle is missing manifest.json"))?
1122 .clone();
1123 let manifest: Manifest =
1124 serde_json::from_slice(&manifest_bytes).context("parse manifest.json")?;
1125
1126 let mut report = VerificationReport {
1127 ok: true,
1128 bundle_path: bundle_path.display().to_string(),
1129 manifest_present: true,
1130 schema_version: manifest.schema_version,
1131 memory_id: manifest.memory_id.clone(),
1132 signer_agent_id: manifest.signer_agent_id.clone(),
1133 signature_status: SignatureStatus::Absent,
1134 tampered_files: Vec::new(),
1135 missing_files: Vec::new(),
1136 extra_files: Vec::new(),
1137 chain_edges_failed: Vec::new(),
1138 };
1139
1140 let manifest_index: BTreeMap<&str, &ManifestFile> = manifest
1142 .files
1143 .iter()
1144 .map(|m| (m.path.as_str(), m))
1145 .collect();
1146 for (path, body) in &files {
1147 if path == MANIFEST_FILE_NAME {
1148 continue;
1149 }
1150 match manifest_index.get(path.as_str()) {
1151 Some(mf) => {
1152 let actual = hex_sha256(body);
1153 if actual != mf.sha256 || u64::try_from(body.len()).unwrap_or(0) != mf.size {
1154 report.tampered_files.push(path.clone());
1155 }
1156 }
1157 None => report.extra_files.push(path.clone()),
1158 }
1159 }
1160 for (path, _) in manifest_index.iter() {
1161 if !files.contains_key(*path) {
1162 report.missing_files.push((*path).to_string());
1163 }
1164 }
1165
1166 if let (Some(signer), Some(sig_b64)) = (
1168 manifest.signer_agent_id.as_ref(),
1169 manifest.signature.as_ref(),
1170 ) {
1171 let pubkey_opt = crate::identity::verify::lookup_peer_public_key(signer);
1172 match pubkey_opt {
1173 Some(pubkey) => {
1174 let signed_bytes = canonical_signed_bytes(&Manifest {
1175 signer_agent_id: None,
1176 signature: None,
1177 ..manifest.clone()
1178 });
1179 let sig_bytes = STANDARD_NO_PAD
1180 .decode(sig_b64)
1181 .context("decode manifest signature")?;
1182 let sig_arr: [u8; ed25519_dalek::SIGNATURE_LENGTH] = sig_bytes
1183 .as_slice()
1184 .try_into()
1185 .map_err(|_| anyhow!("manifest signature has wrong length"))?;
1186 let sig = ed25519_dalek::Signature::from_bytes(&sig_arr);
1187 report.signature_status = match pubkey.verify_strict(&signed_bytes, &sig) {
1188 Ok(()) => SignatureStatus::Verified,
1189 Err(_) => SignatureStatus::Failed,
1190 };
1191 }
1192 None => {
1193 report.signature_status = SignatureStatus::UnknownSigner;
1194 }
1195 }
1196 }
1197
1198 for (path, body) in &files {
1200 if !path.starts_with("edges/") || !path.ends_with(".json") {
1201 continue;
1202 }
1203 let edge: EdgeEnvelope = match serde_json::from_slice(body) {
1204 Ok(e) => e,
1205 Err(_) => {
1206 report.chain_edges_failed.push(path.clone());
1207 continue;
1208 }
1209 };
1210 if !verify_edge_envelope(&edge) {
1211 report.chain_edges_failed.push(path.clone());
1212 }
1213 }
1214
1215 report.ok = report.tampered_files.is_empty()
1217 && report.missing_files.is_empty()
1218 && report.chain_edges_failed.is_empty()
1219 && !matches!(report.signature_status, SignatureStatus::Failed);
1220
1221 Ok(report)
1222}
1223
1224fn verify_edge_envelope(edge: &EdgeEnvelope) -> bool {
1230 let Some(sig_hex) = edge.signature_hex.as_ref() else {
1231 return true; };
1233 let Some(observed_by) = edge.observed_by.as_ref() else {
1234 return false; };
1236 let Some(pubkey) = crate::identity::verify::lookup_peer_public_key(observed_by) else {
1237 return false;
1242 };
1243 let Ok(sig_bytes) = hex_to_bytes(sig_hex) else {
1244 return false;
1245 };
1246 let link = SignableLink {
1247 src_id: &edge.source_id,
1248 dst_id: &edge.target_id,
1249 relation: &edge.relation,
1250 observed_by: Some(observed_by),
1251 valid_from: edge.valid_from.as_deref(),
1252 valid_until: edge.valid_until.as_deref(),
1253 };
1254 crate::identity::verify::verify(&pubkey, &link, &sig_bytes).is_ok()
1255}
1256
1257pub fn run_export(
1267 db_path: &Path,
1268 args: &ExportForensicBundleArgs,
1269 out: &mut CliOutput<'_>,
1270) -> Result<i32> {
1271 let conn = crate::db::open(db_path).context("open db")?;
1272 let output = match args.output.as_ref() {
1273 Some(p) => p.clone(),
1274 None => {
1275 let short = args.memory_id.chars().take(8).collect::<String>();
1276 let ts = chrono::Utc::now().format("%Y%m%dT%H%M%SZ");
1277 PathBuf::from(format!("forensic-bundle-{short}-{ts}.tar"))
1278 }
1279 };
1280 build(&conn, args, &output, None)?;
1281 writeln!(out.stdout, "forensic bundle written: {}", output.display())?;
1282 Ok(0)
1283}
1284
1285pub fn run_verify(args: &VerifyForensicBundleArgs, out: &mut CliOutput<'_>) -> Result<i32> {
1299 let report = verify(&args.bundle_path)?;
1300 let payload = serde_json::to_string_pretty(&report).context("serialise VerificationReport")?;
1301 writeln!(out.stdout, "{payload}")?;
1302 if report.ok {
1303 writeln!(out.stdout, "verification OK")?;
1304 Ok(0)
1305 } else {
1306 writeln!(out.stdout, "verification FAILED")?;
1307 Ok(2)
1308 }
1309}
1310
1311fn bytes_to_hex(b: &[u8]) -> String {
1316 b.iter().map(|x| format!("{x:02x}")).collect()
1317}
1318
1319fn hex_to_bytes(s: &str) -> Result<Vec<u8>> {
1320 if s.len() % 2 != 0 {
1321 bail!("hex string has odd length");
1322 }
1323 let mut out = Vec::with_capacity(s.len() / 2);
1324 for i in (0..s.len()).step_by(2) {
1325 let pair = &s[i..i + 2];
1326 let byte =
1327 u8::from_str_radix(pair, 16).with_context(|| format!("invalid hex pair '{pair}'"))?;
1328 out.push(byte);
1329 }
1330 Ok(out)
1331}
1332
1333fn hex_sha256(bytes: &[u8]) -> String {
1334 let mut hasher = Sha256::new();
1335 hasher.update(bytes);
1336 bytes_to_hex(&hasher.finalize())
1337}
1338
1339const USTAR_BLOCK_SIZE: usize = 512;
1362
1363fn write_ustar(path: &Path, files: &BundleFiles) -> Result<()> {
1367 let mut out: Vec<u8> = Vec::new();
1368 for (name, body) in files {
1369 write_ustar_entry(&mut out, name, body)?;
1370 }
1371 out.extend(std::iter::repeat(0u8).take(USTAR_BLOCK_SIZE * 2));
1373 fs::write(path, &out).with_context(|| format!("write tarball to {}", path.display()))?;
1374 Ok(())
1375}
1376
1377pub fn pack_to_vec(files: &BundleFiles) -> Result<Vec<u8>> {
1381 let mut out: Vec<u8> = Vec::new();
1382 for (name, body) in files {
1383 write_ustar_entry(&mut out, name, body)?;
1384 }
1385 out.extend(std::iter::repeat(0u8).take(USTAR_BLOCK_SIZE * 2));
1386 Ok(out)
1387}
1388
1389fn write_ustar_entry(out: &mut Vec<u8>, name: &str, body: &[u8]) -> Result<()> {
1390 if name.len() > 100 {
1391 bail!(
1392 "bundle path '{name}' exceeds 100-byte ustar name limit; the bundle layout is \
1393 documented to keep every path under 100 bytes"
1394 );
1395 }
1396 let mut header = [0u8; USTAR_BLOCK_SIZE];
1397
1398 header[..name.len()].copy_from_slice(name.as_bytes());
1400 write_octal(&mut header[100..108], 0o644, 7);
1402 write_octal(&mut header[108..116], 0, 7);
1404 write_octal(&mut header[116..124], 0, 7);
1406 write_octal(&mut header[124..136], body.len() as u64, 11);
1408 write_octal(&mut header[136..148], 0, 11);
1410 for b in &mut header[148..156] {
1412 *b = b' ';
1413 }
1414 header[156] = b'0';
1416 header[257..263].copy_from_slice(b"ustar\0");
1419 header[263..265].copy_from_slice(b"00");
1421 write_octal(&mut header[329..337], 0, 7);
1424 write_octal(&mut header[337..345], 0, 7);
1425 let chksum: u32 = header.iter().map(|b| u32::from(*b)).sum();
1432 let s = format!("{chksum:06o}\0 ");
1433 header[148..156].copy_from_slice(s.as_bytes());
1434
1435 out.extend_from_slice(&header);
1436 out.extend_from_slice(body);
1437 let pad = (USTAR_BLOCK_SIZE - (body.len() % USTAR_BLOCK_SIZE)) % USTAR_BLOCK_SIZE;
1438 out.extend(std::iter::repeat(0u8).take(pad));
1439 Ok(())
1440}
1441
1442fn write_octal(field: &mut [u8], value: u64, width: usize) {
1443 let s = format!("{value:0width$o}", width = width);
1445 for (i, b) in s.bytes().enumerate() {
1446 field[i] = b;
1447 }
1448 field[width] = 0;
1449}
1450
1451pub fn read_ustar(bytes: &[u8]) -> Result<BundleFiles> {
1457 let mut files: BundleFiles = BTreeMap::new();
1458 let mut pos = 0;
1459 while pos + USTAR_BLOCK_SIZE <= bytes.len() {
1460 let header = &bytes[pos..pos + USTAR_BLOCK_SIZE];
1461 if header[0] == 0 {
1464 break;
1465 }
1466 let name = read_cstr(&header[..100]);
1467 let size = read_octal_size(&header[124..136])?;
1468 if size > MAX_TAR_ENTRY_BYTES {
1479 bail!(
1480 "tar entry '{name}' size {size} exceeds the {MAX_TAR_ENTRY_BYTES}-byte \
1481 hard cap (likely a malformed or crafted bundle)"
1482 );
1483 }
1484 pos = pos
1485 .checked_add(USTAR_BLOCK_SIZE)
1486 .ok_or_else(|| anyhow!("tar parser: pos overflow advancing past header"))?;
1487 let body_end = pos
1488 .checked_add(size)
1489 .ok_or_else(|| anyhow!("tar entry '{name}' size {size} overflows usize"))?;
1490 if body_end > bytes.len() {
1491 bail!("tar entry '{name}' size {size} extends beyond archive bytes");
1492 }
1493 let body = bytes[pos..body_end].to_vec();
1494 files.insert(name, body);
1495 let pad = (USTAR_BLOCK_SIZE - (size % USTAR_BLOCK_SIZE)) % USTAR_BLOCK_SIZE;
1496 pos = body_end
1497 .checked_add(pad)
1498 .ok_or_else(|| anyhow!("tar parser: pos overflow advancing past padding"))?;
1499 }
1500 Ok(files)
1501}
1502
1503pub const MAX_TAR_ENTRY_BYTES: usize = 1024 * 1024 * 1024;
1511
1512fn read_cstr(bytes: &[u8]) -> String {
1513 let end = bytes.iter().position(|b| *b == 0).unwrap_or(bytes.len());
1514 String::from_utf8_lossy(&bytes[..end]).into_owned()
1515}
1516
1517fn read_octal_size(bytes: &[u8]) -> Result<usize> {
1518 let s = read_cstr(bytes);
1519 let trimmed = s.trim().trim_matches(|c: char| !c.is_ascii_digit());
1520 if trimmed.is_empty() {
1521 return Ok(0);
1522 }
1523 usize::from_str_radix(trimmed, 8).with_context(|| format!("invalid octal size field '{s}'"))
1524}
1525
1526#[cfg(test)]
1531mod tests {
1532 use super::*;
1533 use crate::db;
1534 use crate::models::{Memory, MemoryKind, Tier};
1535 use chrono::Utc;
1536 use rusqlite::params;
1537 use tempfile::TempDir;
1538
1539 fn open_tmp_db(tmp: &TempDir) -> (rusqlite::Connection, PathBuf) {
1540 let p = tmp.path().join("ai-memory.db");
1541 let conn = db::open(&p).expect("db::open");
1542 (conn, p)
1543 }
1544
1545 fn insert_mem(conn: &rusqlite::Connection, ns: &str, depth: i32, kind: MemoryKind) -> String {
1546 let id = uuid::Uuid::new_v4().to_string();
1547 let now = Utc::now().to_rfc3339();
1548 let mem = Memory {
1549 id: id.clone(),
1550 tier: Tier::Mid,
1551 namespace: ns.to_string(),
1552 title: format!("t-{depth}"),
1553 content: format!("c-{depth}"),
1554 reflection_depth: depth,
1555 created_at: now.clone(),
1556 updated_at: now,
1557 memory_kind: kind,
1558 entity_id: None,
1559 persona_version: None,
1560 citations: Vec::new(),
1561 source_uri: None,
1562 source_span: None,
1563 ..Default::default()
1564 };
1565 db::insert(conn, &mem).expect("insert");
1566 id
1567 }
1568
1569 fn link_unsigned(conn: &rusqlite::Connection, src: &str, tgt: &str) {
1570 conn.execute(
1571 "INSERT OR IGNORE INTO memory_links \
1572 (source_id, target_id, relation, created_at, attest_level) \
1573 VALUES (?1, ?2, 'reflects_on', ?3, 'unsigned')",
1574 params![src, tgt, Utc::now().to_rfc3339()],
1575 )
1576 .expect("link_unsigned");
1577 }
1578
1579 #[test]
1580 fn write_and_read_ustar_round_trips() {
1581 let mut files = BTreeMap::new();
1582 files.insert("a.json".to_string(), b"{\"a\":1}".to_vec());
1583 files.insert("nested/b.txt".to_string(), b"hello world".to_vec());
1584 let bytes = pack_to_vec(&files).expect("pack");
1585 let parsed = read_ustar(&bytes).expect("parse");
1586 assert_eq!(parsed, files);
1587 }
1588
1589 #[test]
1590 fn ustar_is_byte_deterministic() {
1591 let mut files = BTreeMap::new();
1592 files.insert("z.txt".to_string(), b"last".to_vec());
1593 files.insert("a.txt".to_string(), b"first".to_vec());
1594 let a = pack_to_vec(&files).expect("pack a");
1595 let b = pack_to_vec(&files).expect("pack b");
1596 assert_eq!(a, b, "same input must produce byte-identical output");
1597 }
1598
1599 #[test]
1600 fn build_files_emits_manifest_with_pinned_schema_version() {
1601 let tmp = TempDir::new().unwrap();
1602 let (conn, _) = open_tmp_db(&tmp);
1603 let id = insert_mem(&conn, "fb-ns", 0, MemoryKind::Observation);
1604 let args = ExportForensicBundleArgs {
1605 memory_id: id.clone(),
1606 include_reflections: true,
1607 include_transcripts: false,
1608 include_atomisation_chain: true,
1609 output: None,
1610 };
1611 let files = build_files(&conn, &args, Some("2026-01-01T00:00:00Z")).expect("build");
1612 let manifest_bytes = files.get("manifest.json").expect("manifest present");
1613 let manifest: Manifest = serde_json::from_slice(manifest_bytes).expect("parse manifest");
1614 assert_eq!(manifest.schema_version, BUNDLE_SCHEMA_VERSION);
1615 assert_eq!(manifest.memory_id, id);
1616 assert_eq!(manifest.generated_at, "2026-01-01T00:00:00Z");
1617 }
1618
1619 #[test]
1620 fn build_files_reproducible_modulo_timestamp() {
1621 let tmp = TempDir::new().unwrap();
1622 let (conn, _) = open_tmp_db(&tmp);
1623 let d0 = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
1624 let d1 = insert_mem(&conn, "ns", 1, MemoryKind::Reflection);
1625 link_unsigned(&conn, &d1, &d0);
1626 let args = ExportForensicBundleArgs {
1627 memory_id: d1.clone(),
1628 include_reflections: true,
1629 include_transcripts: false,
1630 include_atomisation_chain: true,
1631 output: None,
1632 };
1633 let files_a = build_files(&conn, &args, Some("2026-01-01T00:00:00Z")).expect("build a");
1634 let files_b = build_files(&conn, &args, Some("2026-01-01T00:00:00Z")).expect("build b");
1635 let bytes_a = pack_to_vec(&files_a).expect("pack a");
1636 let bytes_b = pack_to_vec(&files_b).expect("pack b");
1637 assert_eq!(
1638 bytes_a, bytes_b,
1639 "byte-identical mod timestamp is the L2-5 acceptance criterion"
1640 );
1641 }
1642
1643 #[test]
1644 fn verify_clean_bundle_reports_ok() {
1645 let tmp = TempDir::new().unwrap();
1646 let (conn, _) = open_tmp_db(&tmp);
1647 let d0 = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
1648 let d1 = insert_mem(&conn, "ns", 1, MemoryKind::Reflection);
1649 link_unsigned(&conn, &d1, &d0);
1650 let args = ExportForensicBundleArgs {
1651 memory_id: d1.clone(),
1652 include_reflections: true,
1653 include_transcripts: false,
1654 include_atomisation_chain: true,
1655 output: None,
1656 };
1657 let bundle_path = tmp.path().join("bundle.tar");
1658 build(&conn, &args, &bundle_path, Some("2026-01-01T00:00:00Z")).expect("build");
1659 let report = verify(&bundle_path).expect("verify");
1660 assert!(report.ok, "clean bundle must verify: {report:#?}");
1661 assert!(report.tampered_files.is_empty());
1662 assert!(report.missing_files.is_empty());
1663 }
1664
1665 #[test]
1666 fn verify_detects_tampered_file_in_bundle() {
1667 let tmp = TempDir::new().unwrap();
1668 let (conn, _) = open_tmp_db(&tmp);
1669 let d0 = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
1670 let d1 = insert_mem(&conn, "ns", 1, MemoryKind::Reflection);
1671 link_unsigned(&conn, &d1, &d0);
1672 let args = ExportForensicBundleArgs {
1673 memory_id: d1.clone(),
1674 include_reflections: true,
1675 include_transcripts: false,
1676 include_atomisation_chain: true,
1677 output: None,
1678 };
1679 let bundle_path = tmp.path().join("bundle.tar");
1680 build(&conn, &args, &bundle_path, Some("2026-01-01T00:00:00Z")).expect("build");
1681
1682 let bytes = fs::read(&bundle_path).expect("read");
1685 let mut files = read_ustar(&bytes).expect("parse");
1686 let target_key = files
1687 .keys()
1688 .find(|k| k.starts_with("memories/"))
1689 .expect("at least one memory entry")
1690 .clone();
1691 files.insert(target_key.clone(), b"tampered".to_vec());
1692 let new_bytes = pack_to_vec(&files).expect("repack");
1693 fs::write(&bundle_path, &new_bytes).expect("write");
1694
1695 let report = verify(&bundle_path).expect("verify");
1696 assert!(!report.ok, "tampered bundle must fail verification");
1697 assert!(
1698 report.tampered_files.contains(&target_key),
1699 "verifier must name the tampered file; got {:?}",
1700 report.tampered_files
1701 );
1702 }
1703
1704 #[test]
1705 fn canonical_signed_bytes_is_stable() {
1706 let m = Manifest {
1707 schema_version: 1,
1708 memory_id: "abc".into(),
1709 generated_at: "2026-01-01T00:00:00Z".into(),
1710 include_reflections: true,
1711 include_transcripts: false,
1712 files: vec![
1713 ManifestFile {
1714 path: "a.json".into(),
1715 size: 5,
1716 sha256: "ff".into(),
1717 },
1718 ManifestFile {
1719 path: "b.json".into(),
1720 size: 10,
1721 sha256: "ee".into(),
1722 },
1723 ],
1724 signer_agent_id: None,
1725 signature: None,
1726 };
1727 let a = canonical_signed_bytes(&m);
1728 let b = canonical_signed_bytes(&m);
1729 assert_eq!(a, b);
1730 let s = String::from_utf8(a).unwrap();
1731 assert!(s.contains("a.json:5:ff"));
1732 assert!(s.contains("b.json:10:ee"));
1733 assert!(s.contains("memory_id:abc"));
1734 }
1735
1736 #[test]
1737 fn build_chain_includes_ancestors_when_reflections_requested() {
1738 let tmp = TempDir::new().unwrap();
1739 let (conn, _) = open_tmp_db(&tmp);
1740 let d0 = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
1741 let d1 = insert_mem(&conn, "ns", 1, MemoryKind::Reflection);
1742 let d2 = insert_mem(&conn, "ns", 2, MemoryKind::Reflection);
1743 link_unsigned(&conn, &d2, &d1);
1744 link_unsigned(&conn, &d1, &d0);
1745 let args = ExportForensicBundleArgs {
1746 memory_id: d2.clone(),
1747 include_reflections: true,
1748 include_transcripts: false,
1749 include_atomisation_chain: true,
1750 output: None,
1751 };
1752 let files = build_files(&conn, &args, Some("2026-01-01T00:00:00Z")).expect("build");
1753 for id in [&d0, &d1, &d2] {
1754 let key = format!("memories/{id}.json");
1755 assert!(
1756 files.contains_key(&key),
1757 "depth-2 chain must include all ancestors; missing {key}"
1758 );
1759 }
1760 let edge_count = files.keys().filter(|k| k.starts_with("edges/")).count();
1762 assert_eq!(edge_count, 2, "expected 2 reflects_on edges");
1763 }
1764
1765 #[test]
1766 fn build_chain_excludes_ancestors_without_reflections_flag() {
1767 let tmp = TempDir::new().unwrap();
1768 let (conn, _) = open_tmp_db(&tmp);
1769 let d0 = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
1770 let d1 = insert_mem(&conn, "ns", 1, MemoryKind::Reflection);
1771 link_unsigned(&conn, &d1, &d0);
1772 let args = ExportForensicBundleArgs {
1773 memory_id: d1.clone(),
1774 include_reflections: false,
1775 include_transcripts: false,
1776 include_atomisation_chain: true,
1777 output: None,
1778 };
1779 let files = build_files(&conn, &args, Some("2026-01-01T00:00:00Z")).expect("build");
1780 assert!(files.contains_key(&format!("memories/{d1}.json")));
1781 assert!(
1782 !files.contains_key(&format!("memories/{d0}.json")),
1783 "ancestor must be excluded when --include-reflections is unset"
1784 );
1785 }
1786
1787 #[test]
1788 fn verify_detects_missing_file_from_bundle() {
1789 let tmp = TempDir::new().unwrap();
1790 let (conn, _) = open_tmp_db(&tmp);
1791 let d0 = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
1792 let d1 = insert_mem(&conn, "ns", 1, MemoryKind::Reflection);
1793 link_unsigned(&conn, &d1, &d0);
1794 let args = ExportForensicBundleArgs {
1795 memory_id: d1.clone(),
1796 include_reflections: true,
1797 include_transcripts: false,
1798 include_atomisation_chain: true,
1799 output: None,
1800 };
1801 let bundle_path = tmp.path().join("bundle.tar");
1802 build(&conn, &args, &bundle_path, Some("2026-01-01T00:00:00Z")).expect("build");
1803
1804 let bytes = fs::read(&bundle_path).expect("read");
1805 let mut files = read_ustar(&bytes).expect("parse");
1806 let memory_key = files
1807 .keys()
1808 .find(|k| k.starts_with("memories/") && k.contains(&d0))
1809 .expect("ancestor entry present")
1810 .clone();
1811 files.remove(&memory_key);
1812 let new_bytes = pack_to_vec(&files).expect("repack");
1813 fs::write(&bundle_path, &new_bytes).expect("write");
1814
1815 let report = verify(&bundle_path).expect("verify");
1816 assert!(!report.ok, "missing file must fail verification");
1817 assert!(report.missing_files.contains(&memory_key));
1818 }
1819
1820 #[test]
1821 fn hex_round_trip() {
1822 let bytes = vec![0u8, 0x0f, 0xa1, 0xff];
1823 let hex = bytes_to_hex(&bytes);
1824 assert_eq!(hex, "000fa1ff");
1825 assert_eq!(hex_to_bytes(&hex).unwrap(), bytes);
1826 }
1827
1828 #[test]
1829 fn hex_to_bytes_rejects_odd_length() {
1830 assert!(hex_to_bytes("abc").is_err());
1831 }
1832
1833 #[test]
1834 fn ustar_rejects_long_paths() {
1835 let mut files = BTreeMap::new();
1836 files.insert("a".repeat(101), b"x".to_vec());
1838 assert!(pack_to_vec(&files).is_err());
1839 }
1840
1841 #[test]
1842 fn hex_to_bytes_rejects_invalid_pair() {
1843 let err = hex_to_bytes("zz").unwrap_err();
1844 assert!(format!("{err:#}").contains("invalid hex pair"));
1845 }
1846
1847 #[test]
1848 fn hex_sha256_stable_for_same_input() {
1849 let a = hex_sha256(b"hello world");
1850 let b = hex_sha256(b"hello world");
1851 assert_eq!(a, b);
1852 assert_eq!(a.len(), 64);
1854 assert!(a.chars().all(|c| c.is_ascii_hexdigit()));
1855 }
1856
1857 #[test]
1858 fn read_octal_size_parses_padded_field() {
1859 let mut field = [0u8; 12];
1860 write_octal(&mut field, 256, 11);
1861 let parsed = read_octal_size(&field).unwrap();
1862 assert_eq!(parsed, 256);
1863 }
1864
1865 #[test]
1866 fn read_octal_size_empty_returns_zero() {
1867 let field = [0u8; 12];
1868 let parsed = read_octal_size(&field).unwrap();
1870 assert_eq!(parsed, 0);
1871 }
1872
1873 #[test]
1874 fn read_octal_size_garbage_returns_error_or_zero() {
1875 let field = b" \0\0\0\0\0\0\0\0\0\0";
1877 let parsed = read_octal_size(field).unwrap();
1878 assert_eq!(parsed, 0);
1879 }
1880
1881 #[test]
1882 fn ustar_pack_unpack_empty_files_map() {
1883 let files: BundleFiles = BTreeMap::new();
1884 let bytes = pack_to_vec(&files).unwrap();
1885 let parsed = read_ustar(&bytes).unwrap();
1886 assert!(parsed.is_empty());
1887 }
1888
1889 #[test]
1890 fn ustar_pack_unpack_handles_block_aligned_body() {
1891 let mut files = BundleFiles::new();
1892 files.insert("aligned.bin".to_string(), vec![b'A'; 512]);
1894 let bytes = pack_to_vec(&files).unwrap();
1895 let parsed = read_ustar(&bytes).unwrap();
1896 assert_eq!(parsed.get("aligned.bin").unwrap().len(), 512);
1897 }
1898
1899 #[test]
1900 fn read_ustar_stops_on_zero_block() {
1901 let bytes = vec![0u8; 1024];
1903 let parsed = read_ustar(&bytes).unwrap();
1904 assert!(parsed.is_empty());
1905 }
1906
1907 #[test]
1908 fn canonical_signed_bytes_excludes_signature_fields() {
1909 let mut m1 = Manifest {
1912 schema_version: 1,
1913 memory_id: "abc".into(),
1914 generated_at: "2026-01-01T00:00:00Z".into(),
1915 include_reflections: true,
1916 include_transcripts: false,
1917 files: vec![ManifestFile {
1918 path: "a.json".into(),
1919 size: 5,
1920 sha256: "ff".into(),
1921 }],
1922 signer_agent_id: None,
1923 signature: None,
1924 };
1925 let bytes_unsigned = canonical_signed_bytes(&m1);
1926 m1.signer_agent_id = Some("alice".into());
1927 m1.signature = Some("0xdead".into());
1928 let bytes_signed = canonical_signed_bytes(&m1);
1929 assert_eq!(
1930 bytes_unsigned, bytes_signed,
1931 "signer fields must not affect canonical signed bytes"
1932 );
1933 }
1934
1935 #[test]
1936 fn bytes_to_hex_empty_returns_empty_string() {
1937 assert_eq!(bytes_to_hex(&[]), "");
1938 }
1939
1940 #[test]
1941 fn hex_to_bytes_empty_returns_empty_vec() {
1942 let v = hex_to_bytes("").unwrap();
1943 assert!(v.is_empty());
1944 }
1945
1946 #[test]
1947 fn write_octal_zero_value_is_padded() {
1948 let mut field = [0u8; 8];
1949 write_octal(&mut field, 0, 7);
1950 assert_eq!(&field[..7], b"0000000");
1951 assert_eq!(field[7], 0);
1952 }
1953
1954 #[test]
1955 fn read_ustar_truncated_body_rejected() {
1956 let mut files = BundleFiles::new();
1958 files.insert("x.txt".to_string(), b"hello".to_vec());
1959 let bytes = pack_to_vec(&files).unwrap();
1960 let truncated = &bytes[..516];
1962 let err = read_ustar(truncated).unwrap_err();
1963 let s = format!("{err}");
1964 assert!(s.contains("extends beyond"));
1965 }
1966
1967 #[test]
1968 fn verify_returns_error_for_missing_bundle_path() {
1969 let p = std::path::Path::new("/this/does/not/exist/bundle.tar");
1970 assert!(verify(p).is_err());
1971 }
1972
1973 #[test]
1980 fn read_ustar_rejects_oversize_entry_1250() {
1981 let mut header = [0u8; USTAR_BLOCK_SIZE];
1990 header[0] = b'x';
1992 for b in &mut header[124..135] {
1994 *b = b'7';
1995 }
1996 header[135] = b' ';
1997 let err = read_ustar(&header).expect_err("oversize entry must be refused");
2000 let s = format!("{err}");
2001 assert!(
2002 s.contains("exceeds the") || s.contains("hard cap"),
2003 "expected MAX_TAR_ENTRY_BYTES rejection message, got: {s}"
2004 );
2005 }
2006
2007 #[test]
2012 fn read_ustar_oversize_cap_invariants_1250() {
2013 assert!(
2014 MAX_TAR_ENTRY_BYTES < usize::MAX / 4,
2015 "MAX_TAR_ENTRY_BYTES must be << usize::MAX so checked_add can never panic"
2016 );
2017 assert!(
2018 MAX_TAR_ENTRY_BYTES >= 100 * 1024 * 1024,
2019 "MAX_TAR_ENTRY_BYTES must accommodate the largest realistic forensic bundle"
2020 );
2021 }
2022
2023 #[test]
2028 fn run_export_explicit_output_writes_bundle() {
2029 let tmp = TempDir::new().unwrap();
2030 let (conn, db_path) = open_tmp_db(&tmp);
2031 let id = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
2032 drop(conn); let output = tmp.path().join("explicit.tar");
2034 let args = ExportForensicBundleArgs {
2035 memory_id: id,
2036 include_reflections: true,
2037 include_transcripts: false,
2038 include_atomisation_chain: true,
2039 output: Some(output.clone()),
2040 };
2041 let mut stdout = Vec::<u8>::new();
2042 let mut stderr = Vec::<u8>::new();
2043 let code = {
2044 let mut out = CliOutput::from_std(&mut stdout, &mut stderr);
2045 run_export(&db_path, &args, &mut out).expect("run_export")
2046 };
2047 assert_eq!(code, 0);
2048 assert!(output.exists(), "bundle file must be written");
2049 let printed = String::from_utf8(stdout).unwrap();
2050 assert!(printed.contains("forensic bundle written"));
2051 assert!(printed.contains("explicit.tar"));
2052 }
2053
2054 #[test]
2058 fn run_export_default_output_name_derived() {
2059 let tmp = TempDir::new().unwrap();
2060 let (conn, db_path) = open_tmp_db(&tmp);
2061 let id = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
2062 drop(conn);
2063 let args = ExportForensicBundleArgs {
2064 memory_id: id.clone(),
2065 include_reflections: false,
2066 include_transcripts: false,
2067 include_atomisation_chain: false,
2068 output: None,
2069 };
2070 let mut stdout = Vec::<u8>::new();
2071 let mut stderr = Vec::<u8>::new();
2072 let code = {
2073 let mut out = CliOutput::from_std(&mut stdout, &mut stderr);
2074 run_export(&db_path, &args, &mut out).expect("run_export default name")
2075 };
2076 assert_eq!(code, 0);
2077 let printed = String::from_utf8(stdout).unwrap();
2078 let short: String = id.chars().take(8).collect();
2080 let prefix = format!("forensic-bundle-{short}-");
2081 assert!(
2082 printed.contains(&prefix),
2083 "default name must embed short id: {printed}"
2084 );
2085 if let Some(name) = printed
2089 .lines()
2090 .find_map(|l| l.trim().strip_prefix("forensic bundle written: "))
2091 {
2092 let _ = fs::remove_file(name);
2093 }
2094 }
2095
2096 #[test]
2099 fn run_verify_clean_bundle_exit_zero() {
2100 let tmp = TempDir::new().unwrap();
2101 let (conn, _) = open_tmp_db(&tmp);
2102 let id = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
2103 let args = ExportForensicBundleArgs {
2104 memory_id: id,
2105 include_reflections: true,
2106 include_transcripts: false,
2107 include_atomisation_chain: true,
2108 output: None,
2109 };
2110 let bundle_path = tmp.path().join("ok.tar");
2111 build(&conn, &args, &bundle_path, Some("2026-01-01T00:00:00Z")).expect("build");
2112 let vargs = VerifyForensicBundleArgs {
2113 bundle_path: bundle_path.clone(),
2114 };
2115 let mut stdout = Vec::<u8>::new();
2116 let mut stderr = Vec::<u8>::new();
2117 let code = {
2118 let mut out = CliOutput::from_std(&mut stdout, &mut stderr);
2119 run_verify(&vargs, &mut out).expect("run_verify")
2120 };
2121 assert_eq!(code, 0);
2122 let printed = String::from_utf8(stdout).unwrap();
2123 assert!(printed.contains("verification OK"));
2124 }
2125
2126 #[test]
2129 fn run_verify_tampered_bundle_exit_two() {
2130 let tmp = TempDir::new().unwrap();
2131 let (conn, _) = open_tmp_db(&tmp);
2132 let id = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
2133 let args = ExportForensicBundleArgs {
2134 memory_id: id,
2135 include_reflections: true,
2136 include_transcripts: false,
2137 include_atomisation_chain: true,
2138 output: None,
2139 };
2140 let bundle_path = tmp.path().join("bad.tar");
2141 build(&conn, &args, &bundle_path, Some("2026-01-01T00:00:00Z")).expect("build");
2142 let bytes = fs::read(&bundle_path).unwrap();
2144 let mut files = read_ustar(&bytes).unwrap();
2145 let key = files
2146 .keys()
2147 .find(|k| k.starts_with("memories/"))
2148 .unwrap()
2149 .clone();
2150 files.insert(key, b"tampered".to_vec());
2151 fs::write(&bundle_path, pack_to_vec(&files).unwrap()).unwrap();
2152
2153 let vargs = VerifyForensicBundleArgs { bundle_path };
2154 let mut stdout = Vec::<u8>::new();
2155 let mut stderr = Vec::<u8>::new();
2156 let code = {
2157 let mut out = CliOutput::from_std(&mut stdout, &mut stderr);
2158 run_verify(&vargs, &mut out).expect("run_verify")
2159 };
2160 assert_eq!(code, 2, "verification failure must exit 2 (#709)");
2161 let printed = String::from_utf8(stdout).unwrap();
2162 assert!(printed.contains("verification FAILED"));
2163 }
2164
2165 #[test]
2168 fn verify_detects_extra_file_in_bundle() {
2169 let tmp = TempDir::new().unwrap();
2170 let (conn, _) = open_tmp_db(&tmp);
2171 let id = insert_mem(&conn, "ns", 0, MemoryKind::Observation);
2172 let args = ExportForensicBundleArgs {
2173 memory_id: id,
2174 include_reflections: true,
2175 include_transcripts: false,
2176 include_atomisation_chain: true,
2177 output: None,
2178 };
2179 let bundle_path = tmp.path().join("extra.tar");
2180 build(&conn, &args, &bundle_path, Some("2026-01-01T00:00:00Z")).expect("build");
2181 let bytes = fs::read(&bundle_path).unwrap();
2182 let mut files = read_ustar(&bytes).unwrap();
2183 files.insert("memories/intruder.json".to_string(), b"{}".to_vec());
2184 fs::write(&bundle_path, pack_to_vec(&files).unwrap()).unwrap();
2185
2186 let report = verify(&bundle_path).expect("verify");
2187 assert!(
2188 report
2189 .extra_files
2190 .contains(&"memories/intruder.json".to_string()),
2191 "extra file must be reported: {:?}",
2192 report.extra_files
2193 );
2194 }
2195
2196 #[test]
2198 fn verify_missing_manifest_errors() {
2199 let tmp = TempDir::new().unwrap();
2200 let mut files = BundleFiles::new();
2201 files.insert("memories/x.json".to_string(), b"{}".to_vec());
2202 let bundle_path = tmp.path().join("no-manifest.tar");
2203 fs::write(&bundle_path, pack_to_vec(&files).unwrap()).unwrap();
2204 let err = verify(&bundle_path).unwrap_err();
2205 assert!(format!("{err:#}").contains("missing manifest"));
2206 }
2207
2208 #[test]
2211 fn verify_edge_envelope_unsigned_is_ok() {
2212 let edge = EdgeEnvelope {
2213 source_id: "a".into(),
2214 target_id: "b".into(),
2215 relation: "reflects_on".into(),
2216 created_at: "2026-01-01T00:00:00Z".into(),
2217 observed_by: None,
2218 valid_from: None,
2219 valid_until: None,
2220 attest_level: "unsigned".into(),
2221 signature_hex: None,
2222 };
2223 assert!(verify_edge_envelope(&edge));
2224 }
2225
2226 #[test]
2228 fn verify_edge_envelope_signed_without_agent_is_false() {
2229 let edge = EdgeEnvelope {
2230 source_id: "a".into(),
2231 target_id: "b".into(),
2232 relation: "reflects_on".into(),
2233 created_at: "2026-01-01T00:00:00Z".into(),
2234 observed_by: None,
2235 valid_from: None,
2236 valid_until: None,
2237 attest_level: "signed".into(),
2238 signature_hex: Some("deadbeef".into()),
2239 };
2240 assert!(!verify_edge_envelope(&edge));
2241 }
2242
2243 #[test]
2246 fn verify_edge_envelope_unknown_signer_is_false() {
2247 let edge = EdgeEnvelope {
2248 source_id: "a".into(),
2249 target_id: "b".into(),
2250 relation: "reflects_on".into(),
2251 created_at: "2026-01-01T00:00:00Z".into(),
2252 observed_by: Some("nobody:unenrolled".into()),
2253 valid_from: None,
2254 valid_until: None,
2255 attest_level: "signed".into(),
2256 signature_hex: Some("deadbeef".into()),
2257 };
2258 assert!(!verify_edge_envelope(&edge));
2259 }
2260
2261 #[test]
2264 fn verification_report_serializes() {
2265 let report = VerificationReport {
2266 ok: true,
2267 bundle_path: "/x.tar".into(),
2268 manifest_present: true,
2269 schema_version: BUNDLE_SCHEMA_VERSION,
2270 memory_id: "abc".into(),
2271 signer_agent_id: None,
2272 signature_status: SignatureStatus::Absent,
2273 tampered_files: Vec::new(),
2274 missing_files: Vec::new(),
2275 extra_files: Vec::new(),
2276 chain_edges_failed: Vec::new(),
2277 };
2278 let json = serde_json::to_string(&report).expect("serialize");
2279 assert!(json.contains("\"ok\":true"));
2280 assert!(json.contains("abc"));
2281 }
2282}