Skip to main content

vela_protocol/
export.rs

1//! Export frontier findings in proof-first formats: CSV, JSON-LD, BibTeX, Markdown, Frontier JSON, and Packet.
2
3use std::collections::{BTreeMap, BTreeSet, HashMap};
4use std::path::{Path, PathBuf};
5
6use chrono::Utc;
7use serde::Serialize;
8use sha2::{Digest, Sha256};
9
10use crate::bundle::{Artifact, FindingBundle};
11use crate::project::Project;
12use crate::{events, packet, repo, signals, sources, state};
13
14/// Supported export formats.
15#[derive(Debug, Clone, Copy)]
16pub enum ExportFormat {
17    Csv,
18    JsonLd,
19    BibTex,
20    Markdown,
21    /// Export as monolithic frontier JSON (useful for converting VelaRepo back to JSON).
22    Project,
23    /// Export a bounded proof packet as a directory.
24    Packet,
25}
26
27impl ExportFormat {
28    #[allow(clippy::should_implement_trait)]
29    pub fn from_str(s: &str) -> Result<Self, String> {
30        match s.to_lowercase().as_str() {
31            "csv" => Ok(Self::Csv),
32            "jsonld" | "json-ld" => Ok(Self::JsonLd),
33            "bibtex" | "bib" => Ok(Self::BibTex),
34            "markdown" | "md" => Ok(Self::Markdown),
35            "frontier" | "json" => Ok(Self::Project),
36            "packet" => Ok(Self::Packet),
37            _ => Err(format!(
38                "Unknown format '{}'. Supported: csv, jsonld, bibtex, markdown, frontier, packet",
39                s
40            )),
41        }
42    }
43
44    /// Returns true if this format produces multiple files (a directory).
45    pub fn is_multi_file(&self) -> bool {
46        matches!(self, Self::Packet)
47    }
48}
49
50pub fn export(frontier: &Project, format: ExportFormat) -> String {
51    match format {
52        ExportFormat::Csv => export_csv(frontier),
53        ExportFormat::JsonLd => export_jsonld(frontier),
54        ExportFormat::BibTex => export_bibtex(frontier),
55        ExportFormat::Markdown => export_markdown(frontier),
56        ExportFormat::Project => {
57            serde_json::to_string_pretty(frontier).expect("Failed to serialize frontier")
58        }
59        ExportFormat::Packet => {
60            panic!("Packet format is multi-file. Use export_packet() instead of export().");
61        }
62    }
63}
64
65pub fn run(frontier_path: &Path, format_str: &str, output: Option<&Path>) {
66    let frontier = repo::load_from_path(frontier_path).expect("Failed to load frontier");
67
68    let format = match ExportFormat::from_str(format_str) {
69        Ok(f) => f,
70        Err(e) => {
71            eprintln!("{} {e}", crate::cli_style::err_prefix());
72            std::process::exit(1);
73        }
74    };
75
76    if format.is_multi_file() {
77        let out_dir = output.unwrap_or_else(|| {
78            eprintln!(
79                "{} {} format requires --output <directory>",
80                crate::cli_style::err_prefix(),
81                format_str
82            );
83            std::process::exit(1);
84        });
85        let result = match format {
86            ExportFormat::Packet => {
87                export_packet_with_source(&frontier, Some(frontier_path), out_dir).map(|_| ())
88            }
89            _ => unreachable!("single-file format reached multi-file branch"),
90        };
91        match result {
92            Ok(()) => {
93                eprintln!(
94                    "sealed · {} findings as {} · {}",
95                    frontier.findings.len(),
96                    format_str,
97                    out_dir.display()
98                );
99            }
100            Err(e) => {
101                eprintln!("{} {e}", crate::cli_style::err_prefix());
102                std::process::exit(1);
103            }
104        }
105        return;
106    }
107
108    let result = export(&frontier, format);
109
110    if let Some(out_path) = output {
111        std::fs::write(out_path, &result).expect("Failed to write output file");
112        eprintln!(
113            "sealed · {} findings · {}",
114            frontier.findings.len(),
115            out_path.display()
116        );
117    } else {
118        print!("{result}");
119    }
120}
121
122// ── CSV ──────────────────────────────────────────────────────────────────────
123
124fn csv_escape(s: &str) -> String {
125    if s.contains(',') || s.contains('"') || s.contains('\n') {
126        format!("\"{}\"", s.replace('"', "\"\""))
127    } else {
128        s.to_string()
129    }
130}
131
132fn export_csv(frontier: &Project) -> String {
133    let mut out = String::new();
134    out.push_str("id,assertion_type,assertion_text,confidence,replicated,entities,year,doi,source_title,gap,contested\n");
135
136    for f in &frontier.findings {
137        let entities: Vec<&str> = f
138            .assertion
139            .entities
140            .iter()
141            .map(|e| e.name.as_str())
142            .collect();
143        let row = format!(
144            "{},{},{},{},{},{},{},{},{},{},{}\n",
145            csv_escape(&f.id),
146            csv_escape(&f.assertion.assertion_type),
147            csv_escape(&f.assertion.text),
148            f.confidence.score,
149            f.evidence.replicated,
150            csv_escape(&entities.join(";")),
151            f.provenance.year.map(|y| y.to_string()).unwrap_or_default(),
152            csv_escape(f.provenance.doi.as_deref().unwrap_or("")),
153            csv_escape(&f.provenance.title),
154            f.flags.gap,
155            f.flags.contested,
156        );
157        out.push_str(&row);
158    }
159    out
160}
161
162// ── JSON-LD ──────────────────────────────────────────────────────────────────
163
164fn export_jsonld(frontier: &Project) -> String {
165    let items: Vec<serde_json::Value> = frontier
166        .findings
167        .iter()
168        .map(|f| {
169            // Build entity array with identifiers
170            let entities: Vec<serde_json::Value> = f
171                .assertion
172                .entities
173                .iter()
174                .map(|e| {
175                    let mut entity = serde_json::json!({
176                        "vela:entityName": e.name,
177                        "vela:entityType": e.entity_type,
178                    });
179                    // Add canonical identifier if resolved
180                    if let Some(canonical) = &e.canonical_id {
181                        let url = match canonical.source.as_str() {
182                            "uniprot" => {
183                                format!("https://www.uniprot.org/uniprot/{}", canonical.id)
184                            }
185                            "pubchem" => format!(
186                                "https://pubchem.ncbi.nlm.nih.gov/compound/{}",
187                                canonical.id
188                            ),
189                            "mesh" => format!("https://id.nlm.nih.gov/mesh/{}", canonical.id),
190                            "ncbi_gene" => {
191                                format!("https://www.ncbi.nlm.nih.gov/gene/{}", canonical.id)
192                            }
193                            "chebi" => format!(
194                                "https://www.ebi.ac.uk/chebi/searchId.do?chebiId={}",
195                                canonical.id
196                            ),
197                            "go" => {
198                                format!("http://amigo.geneontology.org/amigo/term/{}", canonical.id)
199                            }
200                            _ => format!("urn:{}:{}", canonical.source, canonical.id),
201                        };
202                        entity["schema:identifier"] = serde_json::json!({"@id": url});
203                    }
204                    entity
205                })
206                .collect();
207
208            // Build link array
209            let links: Vec<serde_json::Value> = f
210                .links
211                .iter()
212                .map(|l| {
213                    serde_json::json!({
214                        "vela:linkTarget": {"@id": format!("vela:{}", l.target)},
215                        "vela:linkType": l.link_type,
216                    })
217                })
218                .collect();
219
220            // Build provenance activity
221            let mut activity = serde_json::json!({
222                "@type": "prov:Activity",
223                "prov:wasAssociatedWith": format!("vela/{}", env!("CARGO_PKG_VERSION")),
224            });
225            if let Some(doi) = &f.provenance.doi {
226                activity["prov:used"] = serde_json::json!({"@id": format!("doi:{doi}")});
227            }
228
229            let mut node = serde_json::json!({
230                "@id": format!("vela:{}", f.id),
231                "@type": "vela:FindingBundle",
232                "vela:assertionText": f.assertion.text,
233                "vela:assertionType": f.assertion.assertion_type,
234                "vela:confidence": f.confidence.score,
235                "vela:evidenceType": f.evidence.evidence_type,
236                "schema:dateCreated": f.created,
237                "prov:wasGeneratedBy": activity,
238            });
239
240            if !entities.is_empty() {
241                node["vela:hasEntity"] = serde_json::Value::Array(entities);
242            }
243            if !links.is_empty() {
244                node["vela:hasLink"] = serde_json::Value::Array(links);
245            }
246
247            node
248        })
249        .collect();
250
251    let doc = serde_json::json!({
252        "@context": {
253            "@vocab": "https://vela.science/schema/",
254            "schema": "https://schema.org/",
255            "prov": "http://www.w3.org/ns/prov#",
256            "np": "http://www.nanopub.org/nschema#",
257            "doi": "https://doi.org/",
258            "orcid": "https://orcid.org/"
259        },
260        "@graph": items,
261    });
262
263    serde_json::to_string_pretty(&doc).unwrap_or_default()
264}
265
266// ── BibTeX ───────────────────────────────────────────────────────────────────
267
268fn export_bibtex(frontier: &Project) -> String {
269    // Deduplicate by DOI (or title if no DOI).
270    let mut seen: BTreeMap<String, &FindingBundle> = BTreeMap::new();
271    for f in &frontier.findings {
272        let key = f
273            .provenance
274            .doi
275            .clone()
276            .unwrap_or_else(|| f.provenance.title.clone());
277        seen.entry(key).or_insert(f);
278    }
279
280    let mut out = String::new();
281    for f in seen.values() {
282        let cite_key = f
283            .provenance
284            .doi
285            .as_deref()
286            .map(|d| d.replace(['/', '.'], "_"))
287            .unwrap_or_else(|| f.id.clone());
288
289        let authors_str: String = f
290            .provenance
291            .authors
292            .iter()
293            .map(|a| a.name.as_str())
294            .collect::<Vec<_>>()
295            .join(" and ");
296
297        out.push_str(&format!("@article{{{},\n", cite_key));
298        out.push_str(&format!("  title = {{{}}},\n", f.provenance.title));
299        if !authors_str.is_empty() {
300            out.push_str(&format!("  author = {{{}}},\n", authors_str));
301        }
302        if let Some(year) = f.provenance.year {
303            out.push_str(&format!("  year = {{{}}},\n", year));
304        }
305        if let Some(journal) = &f.provenance.journal {
306            out.push_str(&format!("  journal = {{{}}},\n", journal));
307        }
308        if let Some(doi) = &f.provenance.doi {
309            out.push_str(&format!("  doi = {{{}}},\n", doi));
310        }
311        out.push_str("}\n\n");
312    }
313    out
314}
315
316#[derive(Debug, Clone, Serialize)]
317struct PacketOverview {
318    project_name: String,
319    description: String,
320    compiled_at: String,
321    generated_at: String,
322    findings: usize,
323    papers_processed: usize,
324    avg_confidence: f64,
325    categories: BTreeMap<String, usize>,
326    link_types: BTreeMap<String, usize>,
327    top_entities: Vec<PacketEntitySummary>,
328}
329
330#[derive(Debug, Clone, Serialize)]
331struct PacketEntitySummary {
332    name: String,
333    entity_type: String,
334    finding_count: usize,
335    categories: Vec<String>,
336}
337
338#[derive(Debug, Clone, Serialize)]
339struct PacketFindingSummary {
340    id: String,
341    assertion_type: String,
342    assertion_text: String,
343    confidence: f64,
344    evidence_type: String,
345    method: String,
346    entities: Vec<String>,
347    doi: Option<String>,
348    source_title: String,
349    flags: PacketFlags,
350    link_count: usize,
351}
352
353#[derive(Debug, Clone, Serialize)]
354struct PacketFlags {
355    gap: bool,
356    contested: bool,
357    replicated: bool,
358}
359
360#[derive(Debug, Clone, Serialize)]
361struct PacketBridgeSummary {
362    entity: String,
363    entity_type: String,
364    categories: BTreeMap<String, usize>,
365    finding_ids: Vec<String>,
366}
367
368#[derive(Debug, Clone, Serialize)]
369struct PacketContradictionSummary {
370    source_id: String,
371    target_id: String,
372    link_type: String,
373    source_assertion: String,
374    target_assertion: String,
375}
376
377#[derive(Debug, Clone, Serialize)]
378struct PacketScope {
379    frontier_name: String,
380    description: String,
381    generated_at: String,
382    source_schema: String,
383    finding_count: usize,
384    papers_processed: usize,
385    review_event_count: usize,
386    intended_use: Vec<String>,
387    out_of_scope: Vec<String>,
388    caveats: Vec<String>,
389}
390
391#[derive(Debug, Clone, Serialize)]
392struct PacketSourceRow {
393    source_key: String,
394    source_id: String,
395    locator: String,
396    content_hash: Option<String>,
397    title: String,
398    doi: Option<String>,
399    pmid: Option<String>,
400    year: Option<i32>,
401    source_type: String,
402    extraction_mode: String,
403    source_quality: String,
404    caveats: Vec<String>,
405    finding_ids: Vec<String>,
406}
407
408#[derive(Debug, Clone, Serialize)]
409struct PacketEvidenceMatrixRow {
410    finding_id: String,
411    assertion_type: String,
412    evidence_type: String,
413    method: String,
414    confidence: f64,
415    replicated: bool,
416    human_data: bool,
417    clinical_trial: bool,
418    source_key: String,
419    source_id: Option<String>,
420    evidence_atom_ids: Vec<String>,
421    missing_locator_count: usize,
422    supports: usize,
423    contradicts: usize,
424    depends: usize,
425    flags: PacketFlags,
426}
427
428#[derive(Debug, Clone, Serialize)]
429struct PacketCandidateGap {
430    finding_id: String,
431    assertion: String,
432    confidence: f64,
433    conditions: String,
434    entities: Vec<String>,
435    review_status: String,
436}
437
438#[derive(Debug, Clone, Serialize)]
439struct PacketMcpSession {
440    protocol: String,
441    recommended_loop: Vec<String>,
442    tool_catalog: serde_json::Value,
443    notes: Vec<String>,
444}
445
446#[derive(Debug, Clone, Serialize)]
447struct PacketCheckSummary {
448    status: String,
449    generated_at: String,
450    checked_artifacts: Vec<String>,
451    artifact_audit: crate::artifact_audit::ArtifactAudit,
452    counts: PacketManifestStats,
453    proposal_summary: crate::proposals::ProposalSummary,
454    proof_state: crate::proposals::ProofState,
455    caveats: Vec<String>,
456}
457
458#[derive(Debug, Clone, Serialize)]
459struct PacketProofTrace {
460    trace_version: String,
461    generated_at: String,
462    source: String,
463    source_hash: String,
464    snapshot_hash: String,
465    event_log_hash: String,
466    proposal_state_hash: String,
467    replay_status: String,
468    packet_manifest_hash: Option<String>,
469    schema_version: String,
470    checked_artifacts: Vec<String>,
471    caveats: Vec<String>,
472    status: String,
473}
474
475#[derive(Debug, Clone, Serialize)]
476struct PacketLock {
477    lock_format: String,
478    generated_at: String,
479    files: Vec<PacketManifestFile>,
480}
481
482#[derive(Debug, Clone, Serialize)]
483struct PacketManifest {
484    packet_format: String,
485    packet_version: String,
486    generated_at: String,
487    source: PacketSource,
488    stats: PacketManifestStats,
489    included_files: Vec<PacketManifestFile>,
490}
491
492#[derive(Debug, Clone, Serialize)]
493struct PacketSource {
494    project_name: String,
495    description: String,
496    compiled_at: String,
497    compiler: String,
498    vela_version: String,
499    schema: String,
500}
501
502#[derive(Debug, Clone, Serialize)]
503struct PacketManifestStats {
504    findings: usize,
505    sources: usize,
506    evidence_atoms: usize,
507    condition_records: usize,
508    review_events: usize,
509    proposals: usize,
510    gaps: usize,
511    contested: usize,
512    bridge_entities: usize,
513    contradiction_edges: usize,
514}
515
516#[derive(Debug, Clone, Serialize)]
517struct PacketManifestFile {
518    path: String,
519    sha256: String,
520    bytes: usize,
521}
522
523#[derive(Debug, Clone, Serialize)]
524struct PacketArtifactBlob {
525    artifact_id: String,
526    content_hash: String,
527    source_locator: String,
528    packet_path: String,
529    size_bytes: usize,
530}
531
532#[derive(Debug, Clone)]
533pub struct PacketExportRecord {
534    pub generated_at: String,
535    pub snapshot_hash: String,
536    pub event_log_hash: String,
537    pub packet_manifest_hash: String,
538}
539
540#[derive(Debug, Clone)]
541struct PacketFile {
542    path: String,
543    content: Vec<u8>,
544}
545
546impl PacketFile {
547    fn text(path: impl Into<String>, content: String) -> Self {
548        Self {
549            path: path.into(),
550            content: content.into_bytes(),
551        }
552    }
553
554    fn json<T: Serialize>(path: impl Into<String>, value: &T) -> Result<Self, String> {
555        let content = serde_json::to_vec_pretty(value)
556            .map_err(|e| format!("Failed to serialize packet file: {e}"))?;
557        Ok(Self {
558            path: path.into(),
559            content,
560        })
561    }
562
563    fn bytes(path: impl Into<String>, content: Vec<u8>) -> Self {
564        Self {
565            path: path.into(),
566            content,
567        }
568    }
569}
570
571pub fn export_packet(frontier: &Project, output_dir: &Path) -> Result<PacketExportRecord, String> {
572    export_packet_with_source(frontier, None, output_dir)
573}
574
575pub fn export_packet_with_source(
576    frontier: &Project,
577    source_path: Option<&Path>,
578    output_dir: &Path,
579) -> Result<PacketExportRecord, String> {
580    use std::fs;
581
582    fs::create_dir_all(output_dir.join("findings"))
583        .map_err(|e| format!("Failed to create findings dir: {e}"))?;
584    fs::create_dir_all(output_dir.join("reviews"))
585        .map_err(|e| format!("Failed to create reviews dir: {e}"))?;
586    fs::create_dir_all(output_dir.join("sources"))
587        .map_err(|e| format!("Failed to create sources dir: {e}"))?;
588    fs::create_dir_all(output_dir.join("evidence"))
589        .map_err(|e| format!("Failed to create evidence dir: {e}"))?;
590    fs::create_dir_all(output_dir.join("conditions"))
591        .map_err(|e| format!("Failed to create conditions dir: {e}"))?;
592    fs::create_dir_all(output_dir.join("proposals"))
593        .map_err(|e| format!("Failed to create proposals dir: {e}"))?;
594
595    let generated_at = Utc::now().to_rfc3339();
596    let source_evidence = sources::derive_projection(frontier);
597    let source_records = source_evidence.sources;
598    let evidence_atoms = source_evidence.evidence_atoms;
599    let condition_records = source_evidence.condition_records;
600    let mut atoms_by_finding: BTreeMap<String, Vec<&sources::EvidenceAtom>> = BTreeMap::new();
601    for atom in &evidence_atoms {
602        atoms_by_finding
603            .entry(atom.finding_id.clone())
604            .or_default()
605            .push(atom);
606    }
607
608    let mut entity_counts: BTreeMap<String, usize> = BTreeMap::new();
609    let mut entity_types: BTreeMap<String, String> = BTreeMap::new();
610    let mut entity_categories: BTreeMap<String, BTreeMap<String, usize>> = BTreeMap::new();
611    let mut entity_finding_ids: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
612
613    for finding in &frontier.findings {
614        for entity in &finding.assertion.entities {
615            *entity_counts.entry(entity.name.clone()).or_default() += 1;
616            entity_types
617                .entry(entity.name.clone())
618                .or_insert_with(|| entity.entity_type.clone());
619            *entity_categories
620                .entry(entity.name.clone())
621                .or_default()
622                .entry(finding.assertion.assertion_type.clone())
623                .or_default() += 1;
624            entity_finding_ids
625                .entry(entity.name.clone())
626                .or_default()
627                .insert(finding.id.clone());
628        }
629    }
630
631    let mut top_entities: Vec<PacketEntitySummary> = entity_counts
632        .iter()
633        .map(|(name, finding_count)| PacketEntitySummary {
634            name: name.clone(),
635            entity_type: entity_types
636                .get(name)
637                .cloned()
638                .unwrap_or_else(|| "unknown".to_string()),
639            finding_count: *finding_count,
640            categories: entity_categories
641                .get(name)
642                .map(|cats| cats.keys().cloned().collect())
643                .unwrap_or_default(),
644        })
645        .collect();
646    top_entities.sort_by(|a, b| {
647        b.finding_count
648            .cmp(&a.finding_count)
649            .then_with(|| a.name.cmp(&b.name))
650    });
651    top_entities.truncate(25);
652
653    let overview = PacketOverview {
654        project_name: frontier.project.name.clone(),
655        description: frontier.project.description.clone(),
656        compiled_at: frontier.project.compiled_at.clone(),
657        generated_at: generated_at.clone(),
658        findings: frontier.stats.findings,
659        papers_processed: frontier.project.papers_processed,
660        avg_confidence: frontier.stats.avg_confidence,
661        categories: frontier
662            .stats
663            .categories
664            .iter()
665            .map(|(k, v)| (k.clone(), *v))
666            .collect(),
667        link_types: frontier
668            .stats
669            .link_types
670            .iter()
671            .map(|(k, v)| (k.clone(), *v))
672            .collect(),
673        top_entities,
674    };
675
676    let mut packet_findings: Vec<PacketFindingSummary> = frontier
677        .findings
678        .iter()
679        .map(|finding| PacketFindingSummary {
680            id: finding.id.clone(),
681            assertion_type: finding.assertion.assertion_type.clone(),
682            assertion_text: finding.assertion.text.clone(),
683            confidence: finding.confidence.score,
684            evidence_type: finding.evidence.evidence_type.clone(),
685            method: finding.evidence.method.clone(),
686            entities: finding
687                .assertion
688                .entities
689                .iter()
690                .map(|entity| entity.name.clone())
691                .collect(),
692            doi: finding.provenance.doi.clone(),
693            source_title: finding.provenance.title.clone(),
694            flags: PacketFlags {
695                gap: finding.flags.gap,
696                contested: finding.flags.contested,
697                replicated: finding.evidence.replicated,
698            },
699            link_count: finding.links.len(),
700        })
701        .collect();
702    packet_findings.sort_by(|a, b| {
703        b.confidence
704            .partial_cmp(&a.confidence)
705            .unwrap_or(std::cmp::Ordering::Equal)
706            .then_with(|| b.link_count.cmp(&a.link_count))
707            .then_with(|| a.id.cmp(&b.id))
708    });
709
710    let high_signal_findings: Vec<PacketFindingSummary> = packet_findings
711        .iter()
712        .filter(|finding| {
713            finding.flags.gap
714                || finding.flags.contested
715                || finding.flags.replicated
716                || finding.confidence >= 0.85
717                || finding.link_count > 0
718        })
719        .take(50)
720        .cloned()
721        .collect();
722
723    let gap_findings: Vec<PacketFindingSummary> = packet_findings
724        .iter()
725        .filter(|finding| finding.flags.gap)
726        .cloned()
727        .collect();
728
729    let contested_findings: Vec<PacketFindingSummary> = packet_findings
730        .iter()
731        .filter(|finding| finding.flags.contested)
732        .cloned()
733        .collect();
734
735    let mut bridge_entities: Vec<PacketBridgeSummary> = entity_categories
736        .iter()
737        .filter(|(_, categories)| categories.len() >= 2)
738        .map(|(entity, categories)| PacketBridgeSummary {
739            entity: entity.clone(),
740            entity_type: entity_types
741                .get(entity)
742                .cloned()
743                .unwrap_or_else(|| "unknown".to_string()),
744            categories: categories.clone(),
745            finding_ids: entity_finding_ids
746                .get(entity)
747                .map(|ids| ids.iter().cloned().collect())
748                .unwrap_or_default(),
749        })
750        .collect();
751    bridge_entities.sort_by(|a, b| {
752        b.categories
753            .len()
754            .cmp(&a.categories.len())
755            .then_with(|| b.finding_ids.len().cmp(&a.finding_ids.len()))
756            .then_with(|| a.entity.cmp(&b.entity))
757    });
758
759    let finding_lookup: HashMap<&str, &FindingBundle> = frontier
760        .findings
761        .iter()
762        .map(|finding| (finding.id.as_str(), finding))
763        .collect();
764    let mut contradictions = Vec::new();
765    let mut seen_pairs = BTreeSet::new();
766    for finding in &frontier.findings {
767        for link in &finding.links {
768            if !(link.link_type == "contradicts" || link.link_type == "disputes") {
769                continue;
770            }
771            let pair_key = if finding.id <= link.target {
772                format!("{}::{}::{}", finding.id, link.target, link.link_type)
773            } else {
774                format!("{}::{}::{}", link.target, finding.id, link.link_type)
775            };
776            if !seen_pairs.insert(pair_key) {
777                continue;
778            }
779            if let Some(target) = finding_lookup.get(link.target.as_str()) {
780                contradictions.push(PacketContradictionSummary {
781                    source_id: finding.id.clone(),
782                    target_id: target.id.clone(),
783                    link_type: link.link_type.clone(),
784                    source_assertion: finding.assertion.text.clone(),
785                    target_assertion: target.assertion.text.clone(),
786                });
787            }
788        }
789    }
790
791    let caveats = packet_caveats();
792    let scope = PacketScope {
793        frontier_name: frontier.project.name.clone(),
794        description: frontier.project.description.clone(),
795        generated_at: generated_at.clone(),
796        source_schema: frontier.schema.clone(),
797        finding_count: frontier.findings.len(),
798        papers_processed: frontier.project.papers_processed,
799        review_event_count: frontier.review_events.len(),
800        intended_use: vec![
801            "Review a bounded compiled frontier".to_string(),
802            "Inspect findings, evidence, confidence, provenance, and links".to_string(),
803            "Compare candidate tensions, gaps, and bridges".to_string(),
804            "Serve reviewable context to MCP/HTTP clients".to_string(),
805        ],
806        out_of_scope: vec![
807            "Autonomous experiment planning".to_string(),
808            "Definitive novelty claims".to_string(),
809            "Institutional federation or broad exchange-network claims".to_string(),
810        ],
811        caveats: caveats.clone(),
812    };
813
814    let source_table: Vec<PacketSourceRow> = source_records
815        .iter()
816        .map(|source| PacketSourceRow {
817            source_key: source.id.clone(),
818            source_id: source.id.clone(),
819            locator: source.locator.clone(),
820            content_hash: source.content_hash.clone(),
821            title: source.title.clone(),
822            doi: source.doi.clone(),
823            pmid: source.pmid.clone(),
824            year: source.year,
825            source_type: source.source_type.clone(),
826            extraction_mode: source.extraction_mode.clone(),
827            source_quality: source.source_quality.clone(),
828            caveats: source.caveats.clone(),
829            finding_ids: source.finding_ids.clone(),
830        })
831        .collect();
832
833    let evidence_matrix: Vec<PacketEvidenceMatrixRow> = frontier
834        .findings
835        .iter()
836        .map(|finding| {
837            let atoms = atoms_by_finding
838                .get(&finding.id)
839                .map(Vec::as_slice)
840                .unwrap_or(&[]);
841            let evidence_atom_ids = atoms.iter().map(|atom| atom.id.clone()).collect::<Vec<_>>();
842            let source_id = atoms.first().map(|atom| atom.source_id.clone());
843            let missing_locator_count = atoms.iter().filter(|atom| atom.locator.is_none()).count();
844            let supports = finding
845                .links
846                .iter()
847                .filter(|link| {
848                    matches!(
849                        link.link_type.as_str(),
850                        "supports" | "extends" | "replicates"
851                    )
852                })
853                .count();
854            let contradicts = finding
855                .links
856                .iter()
857                .filter(|link| matches!(link.link_type.as_str(), "contradicts" | "disputes"))
858                .count();
859            let depends = finding
860                .links
861                .iter()
862                .filter(|link| link.link_type == "depends")
863                .count();
864            PacketEvidenceMatrixRow {
865                finding_id: finding.id.clone(),
866                assertion_type: finding.assertion.assertion_type.clone(),
867                evidence_type: finding.evidence.evidence_type.clone(),
868                method: finding.evidence.method.clone(),
869                confidence: finding.confidence.score,
870                replicated: finding.evidence.replicated,
871                human_data: finding.conditions.human_data,
872                clinical_trial: finding.conditions.clinical_trial,
873                source_key: source_key(finding),
874                source_id,
875                evidence_atom_ids,
876                missing_locator_count,
877                supports,
878                contradicts,
879                depends,
880                flags: PacketFlags {
881                    gap: finding.flags.gap,
882                    contested: finding.flags.contested,
883                    replicated: finding.evidence.replicated,
884                },
885            }
886        })
887        .collect();
888
889    let candidate_gaps: Vec<PacketCandidateGap> = frontier
890        .findings
891        .iter()
892        .filter(|finding| finding.flags.gap)
893        .map(|finding| PacketCandidateGap {
894            finding_id: finding.id.clone(),
895            assertion: finding.assertion.text.clone(),
896            confidence: finding.confidence.score,
897            conditions: finding.conditions.text.clone(),
898            entities: finding
899                .assertion
900                .entities
901                .iter()
902                .map(|entity| entity.name.clone())
903                .collect(),
904            review_status: finding
905                .provenance
906                .review
907                .as_ref()
908                .map(|review| {
909                    if review.reviewed {
910                        "reviewed".to_string()
911                    } else {
912                        "unreviewed".to_string()
913                    }
914                })
915                .unwrap_or_else(|| "unreviewed".to_string()),
916        })
917        .collect();
918
919    let mcp_session = PacketMcpSession {
920        protocol: "model-context-protocol".to_string(),
921        recommended_loop: vec![
922            "frontier_stats".to_string(),
923            "search_findings".to_string(),
924            "get_finding".to_string(),
925            "list_gaps".to_string(),
926            "find_bridges".to_string(),
927            "check_pubmed".to_string(),
928            "list_contradictions".to_string(),
929            "propagate_retraction".to_string(),
930            "apply_observer".to_string(),
931        ],
932        tool_catalog: crate::tool_registry::mcp_tools_json(),
933        notes: caveats.clone(),
934    };
935
936    let stats = PacketManifestStats {
937        findings: frontier.findings.len(),
938        sources: source_records.len(),
939        evidence_atoms: evidence_atoms.len(),
940        condition_records: condition_records.len(),
941        review_events: frontier.review_events.len(),
942        proposals: frontier.proposals.len(),
943        gaps: gap_findings.len(),
944        contested: contested_findings.len(),
945        bridge_entities: bridge_entities.len(),
946        contradiction_edges: contradictions.len(),
947    };
948
949    // Phase K: `checked_artifacts` carries the proof-bearing surface —
950    // canonical artifacts only. Derived projections (signals, queues,
951    // tables, candidate-*) ship in the packet but are regenerable from
952    // canonical inputs and are not proof-load-bearing.
953    let checked_artifacts = packet::canonical_packet_files()
954        .iter()
955        .map(|path| (*path).to_string())
956        .collect::<Vec<_>>();
957
958    let artifact_audit_root = source_path.unwrap_or_else(|| Path::new("."));
959    let artifact_audit = crate::artifact_audit::audit_artifacts(artifact_audit_root, frontier);
960    if !artifact_audit.ok {
961        return Err(format!(
962            "Artifact audit failed for proof packet export: {} issue(s)",
963            artifact_audit.issue_count
964        ));
965    }
966    let (artifact_blob_files, artifact_blob_map) =
967        packet_artifact_blob_files(frontier, source_path)?;
968
969    let check_summary = PacketCheckSummary {
970        status: "ok".to_string(),
971        generated_at: generated_at.clone(),
972        checked_artifacts: checked_artifacts.clone(),
973        artifact_audit: artifact_audit.clone(),
974        counts: stats.clone(),
975        proposal_summary: crate::proposals::summary(frontier),
976        proof_state: frontier.proof_state.clone(),
977        caveats: caveats.clone(),
978    };
979    let signal_report = signals::analyze(frontier, &[]);
980    let quality_table = signals::quality_table(frontier, &signal_report);
981    let state_transitions = state::state_transitions(frontier);
982    let replay_report = events::replay_report(frontier);
983    let ro_crate = signals::ro_crate_metadata(frontier, &checked_artifacts);
984
985    let frontier_bytes = crate::canonical::to_canonical_bytes(frontier)
986        .map_err(|e| format!("Failed to serialize frontier for source hash: {e}"))?;
987    let proof_trace = PacketProofTrace {
988        trace_version: "0.2.0".to_string(),
989        generated_at: generated_at.clone(),
990        source: frontier.project.name.clone(),
991        source_hash: hex::encode(Sha256::digest(&frontier_bytes)),
992        snapshot_hash: replay_report.current_hash.clone(),
993        event_log_hash: replay_report.event_log_hash.clone(),
994        proposal_state_hash: crate::proposals::proposal_state_hash(&frontier.proposals),
995        replay_status: replay_report.status.clone(),
996        packet_manifest_hash: None,
997        schema_version: frontier.vela_version.clone(),
998        checked_artifacts: checked_artifacts.clone(),
999        caveats: caveats.clone(),
1000        status: "ok".to_string(),
1001    };
1002
1003    let readme = export_packet_readme(
1004        frontier,
1005        &generated_at,
1006        high_signal_findings.len(),
1007        gap_findings.len(),
1008        contested_findings.len(),
1009        bridge_entities.len(),
1010        contradictions.len(),
1011    );
1012
1013    let mut files = vec![
1014        PacketFile::text("README.md", readme),
1015        PacketFile::text("reviewer-guide.md", export_reviewer_guide(frontier)),
1016        PacketFile::json("overview.json", &overview)?,
1017        PacketFile::json("scope.json", &scope)?,
1018        PacketFile::json("source-table.json", &source_table)?,
1019        PacketFile::json("sources/source-registry.json", &source_records)?,
1020        PacketFile::json("evidence-matrix.json", &evidence_matrix)?,
1021        PacketFile::json("evidence/evidence-atoms.json", &evidence_atoms)?,
1022        PacketFile::json(
1023            "evidence/source-evidence-map.json",
1024            &sources::source_evidence_map_from_atoms(&evidence_atoms),
1025        )?,
1026        PacketFile::json("conditions/condition-records.json", &condition_records)?,
1027        PacketFile::json(
1028            "conditions/condition-matrix.json",
1029            &sources::condition_matrix(&condition_records),
1030        )?,
1031        PacketFile::json("signals.json", &signal_report.signals)?,
1032        PacketFile::json("review-queue.json", &signal_report.review_queue)?,
1033        PacketFile::json("quality-table.json", &quality_table)?,
1034        PacketFile::json("state-transitions.json", &state_transitions)?,
1035        PacketFile::json("events/events.json", &frontier.events)?,
1036        PacketFile::json("events/replay-report.json", &replay_report)?,
1037        PacketFile::json("proposals/proposals.json", &frontier.proposals)?,
1038        PacketFile::json("ro-crate-metadata.jsonld", &ro_crate)?,
1039        PacketFile::json("candidate-tensions.json", &contradictions)?,
1040        PacketFile::json("candidate-gaps.json", &candidate_gaps)?,
1041        PacketFile::json("candidate-bridges.json", &bridge_entities)?,
1042        PacketFile::json("mcp-session.json", &mcp_session)?,
1043        PacketFile::json("check-summary.json", &check_summary)?,
1044        PacketFile::json("proof-trace.json", &proof_trace)?,
1045        PacketFile::json("findings/high-signal.json", &high_signal_findings)?,
1046        PacketFile::json("findings/full.json", &frontier.findings)?,
1047        PacketFile::json("artifacts/artifacts.json", &frontier.artifacts)?,
1048        PacketFile::json("artifacts/artifact-audit.json", &artifact_audit)?,
1049        PacketFile::json("artifacts/blob-map.json", &artifact_blob_map)?,
1050        PacketFile::json("findings/gaps.json", &gap_findings)?,
1051        PacketFile::json("findings/contested.json", &contested_findings)?,
1052        PacketFile::json("findings/bridges.json", &bridge_entities)?,
1053        PacketFile::json("findings/contradictions.json", &contradictions)?,
1054        PacketFile::json("reviews/review-events.json", &frontier.review_events)?,
1055        PacketFile::json(
1056            "reviews/confidence-updates.json",
1057            &frontier.confidence_updates,
1058        )?,
1059    ];
1060    files.extend(artifact_blob_files);
1061
1062    let lock = PacketLock {
1063        lock_format: "vela.packet-lock.v1".to_string(),
1064        generated_at: generated_at.clone(),
1065        files: files.iter().map(manifest_entry_for_file).collect(),
1066    };
1067    files.push(PacketFile::json("packet.lock.json", &lock)?);
1068
1069    for file in &files {
1070        let full_path = output_dir.join(&file.path);
1071        if let Some(parent) = full_path.parent() {
1072            fs::create_dir_all(parent).map_err(|e| {
1073                format!(
1074                    "Failed to create packet parent dir {}: {e}",
1075                    parent.display()
1076                )
1077            })?;
1078        }
1079        fs::write(&full_path, &file.content)
1080            .map_err(|e| format!("Failed to write packet file {}: {e}", file.path))?;
1081    }
1082
1083    let manifest = PacketManifest {
1084        packet_format: "vela.frontier-packet".to_string(),
1085        packet_version: "v1".to_string(),
1086        generated_at: generated_at.clone(),
1087        source: PacketSource {
1088            project_name: frontier.project.name.clone(),
1089            description: frontier.project.description.clone(),
1090            compiled_at: frontier.project.compiled_at.clone(),
1091            compiler: frontier.project.compiler.clone(),
1092            vela_version: frontier.vela_version.clone(),
1093            schema: frontier.schema.clone(),
1094        },
1095        stats,
1096        included_files: files
1097            .drain(..)
1098            .map(|file| manifest_entry_for_file(&file))
1099            .collect(),
1100    };
1101
1102    let manifest_bytes = serde_json::to_vec_pretty(&manifest)
1103        .map_err(|e| format!("Failed to serialize packet manifest: {e}"))?;
1104    let manifest_path = output_dir.join("manifest.json");
1105    fs::write(&manifest_path, &manifest_bytes)
1106        .map_err(|e| format!("Failed to write manifest.json: {e}"))?;
1107
1108    let packet_manifest_hash = hex::encode(Sha256::digest(&manifest_bytes));
1109    Ok(PacketExportRecord {
1110        generated_at,
1111        snapshot_hash: replay_report.current_hash,
1112        event_log_hash: replay_report.event_log_hash,
1113        packet_manifest_hash,
1114    })
1115}
1116
1117fn packet_artifact_blob_files(
1118    frontier: &Project,
1119    source_path: Option<&Path>,
1120) -> Result<(Vec<PacketFile>, Vec<PacketArtifactBlob>), String> {
1121    let Some(root) = artifact_source_root(source_path) else {
1122        if frontier.artifacts.iter().any(is_local_artifact) {
1123            return Err(
1124                "Proof packet export needs a frontier directory to copy local artifact blobs"
1125                    .to_string(),
1126            );
1127        }
1128        return Ok((Vec::new(), Vec::new()));
1129    };
1130
1131    let mut files = Vec::new();
1132    let mut blob_map = Vec::new();
1133    let mut seen_hashes = BTreeSet::new();
1134
1135    for artifact in frontier
1136        .artifacts
1137        .iter()
1138        .filter(|artifact| is_local_artifact(artifact))
1139    {
1140        let Some(hex) = artifact.content_hash.strip_prefix("sha256:") else {
1141            return Err(format!(
1142                "Artifact {} has unsupported content hash '{}'",
1143                artifact.id, artifact.content_hash
1144            ));
1145        };
1146        let locator = artifact
1147            .locator
1148            .as_deref()
1149            .ok_or_else(|| format!("Artifact {} is local but has no locator", artifact.id))?;
1150        let source = resolve_artifact_locator(&root, locator);
1151        let bytes = std::fs::read(&source).map_err(|e| {
1152            format!(
1153                "Failed to read local artifact blob for {} at {}: {e}",
1154                artifact.id,
1155                source.display()
1156            )
1157        })?;
1158        let actual = hex::encode(Sha256::digest(&bytes));
1159        if actual != hex {
1160            return Err(format!(
1161                "Artifact {} blob hash mismatch: expected {}, found {}",
1162                artifact.id, hex, actual
1163            ));
1164        }
1165        let packet_path = format!("artifacts/blobs/sha256/{hex}");
1166        if seen_hashes.insert(hex.to_string()) {
1167            files.push(PacketFile::bytes(packet_path.clone(), bytes.clone()));
1168        }
1169        blob_map.push(PacketArtifactBlob {
1170            artifact_id: artifact.id.clone(),
1171            content_hash: artifact.content_hash.clone(),
1172            source_locator: locator.to_string(),
1173            packet_path,
1174            size_bytes: bytes.len(),
1175        });
1176    }
1177
1178    Ok((files, blob_map))
1179}
1180
1181fn artifact_source_root(source_path: Option<&Path>) -> Option<PathBuf> {
1182    let source = source_path?;
1183    if source.is_dir() {
1184        Some(source.to_path_buf())
1185    } else {
1186        source.parent().map(Path::to_path_buf)
1187    }
1188}
1189
1190fn is_local_artifact(artifact: &Artifact) -> bool {
1191    matches!(artifact.storage_mode.as_str(), "local_blob" | "local_file")
1192}
1193
1194fn resolve_artifact_locator(root: &Path, locator: &str) -> PathBuf {
1195    let path = Path::new(locator);
1196    if path.is_absolute() {
1197        path.to_path_buf()
1198    } else {
1199        root.join(path)
1200    }
1201}
1202
1203fn export_packet_readme(
1204    frontier: &Project,
1205    generated_at: &str,
1206    high_signal_count: usize,
1207    gap_count: usize,
1208    contested_count: usize,
1209    bridge_count: usize,
1210    contradiction_count: usize,
1211) -> String {
1212    let mut out = String::new();
1213    out.push_str(&format!("# {} packet\n\n", frontier.project.name));
1214    out.push_str(&format!("{}\n\n", frontier.project.description));
1215    out.push_str("This export is a bounded network packet: a compact, publishable subset of the frontier optimized for review, contradiction inspection, and grounded agent context. It intentionally does not dump the full raw frontier by default.\n\n");
1216    out.push_str("## Source\n\n");
1217    out.push_str(&format!("- Project: {}\n", frontier.project.name));
1218    out.push_str(&format!(
1219        "- Compiled at: {}\n",
1220        frontier.project.compiled_at
1221    ));
1222    out.push_str(&format!("- Generated at: {}\n", generated_at));
1223    out.push_str(&format!("- Compiler: {}\n", frontier.project.compiler));
1224    out.push_str(&format!("- Vela version: {}\n", frontier.vela_version));
1225    out.push_str(&format!("- Schema: {}\n\n", frontier.schema));
1226    out.push_str("## Included artifacts\n\n");
1227    out.push_str("- `manifest.json` — provenance, version stamp, checksums\n");
1228    out.push_str("- `overview.json` — project-level stats, categories, top entities\n");
1229    out.push_str("- `findings/high-signal.json` — compact high-signal finding subset\n");
1230    out.push_str(
1231        "- `findings/full.json` — canonical finding bundles for packet import and merge\n",
1232    );
1233    out.push_str("- `artifacts/artifacts.json`: content-addressed protocols, files, records, and dataset manifests\n");
1234    out.push_str(
1235        "- `artifacts/artifact-audit.json` — artifact integrity report used during export\n",
1236    );
1237    out.push_str(
1238        "- `artifacts/blob-map.json` — packet-local hash map for checked local artifact bytes\n",
1239    );
1240    out.push_str("- `findings/gaps.json` — gap-tagged findings\n");
1241    out.push_str("- `findings/contested.json` — contested findings\n");
1242    out.push_str("- `findings/bridges.json` — entities spanning multiple assertion categories\n");
1243    out.push_str("- `findings/contradictions.json` — explicit contradiction/dispute edges\n");
1244    out.push_str("- `reviews/review-events.json` — attached review events\n");
1245    out.push_str("- `reviews/confidence-updates.json` — interpretation confidence revisions\n");
1246    out.push_str("- `state-transitions.json` — combined review and confidence transition log\n\n");
1247    out.push_str("## Packet stats\n\n");
1248    out.push_str(&format!(
1249        "- Findings in source frontier: {}\n",
1250        frontier.findings.len()
1251    ));
1252    out.push_str(&format!(
1253        "- High-signal findings exported: {}\n",
1254        high_signal_count
1255    ));
1256    out.push_str(&format!("- Gap findings exported: {}\n", gap_count));
1257    out.push_str(&format!(
1258        "- Contested findings exported: {}\n",
1259        contested_count
1260    ));
1261    out.push_str(&format!("- Bridge entities exported: {}\n", bridge_count));
1262    out.push_str(&format!(
1263        "- Contradiction edges exported: {}\n",
1264        contradiction_count
1265    ));
1266    out.push_str(&format!(
1267        "- Review events exported: {}\n",
1268        frontier.review_events.len()
1269    ));
1270    out
1271}
1272
1273fn export_reviewer_guide(frontier: &Project) -> String {
1274    let mut out = String::new();
1275    out.push_str(&format!("# Reviewer guide: {}\n\n", frontier.project.name));
1276    out.push_str("Use this packet as a reviewable frontier snapshot. Start with `scope.json`, then inspect `evidence-matrix.json`, `candidate-tensions.json`, `candidate-gaps.json`, and `candidate-bridges.json` before reading individual finding bundles.\n\n");
1277    out.push_str("## Suggested review loop\n\n");
1278    out.push_str(
1279        "1. Confirm the bounded scope and source corpus in `scope.json`, `source-table.json`, and `sources/source-registry.json`.\n",
1280    );
1281    out.push_str("2. Check high-confidence or high-link findings in `evidence-matrix.json`, then inspect exact source-grounded atoms in `evidence/evidence-atoms.json`.\n");
1282    out.push_str(
1283        "3. Inspect candidate tensions against the full finding bundles in `findings/full.json`.\n",
1284    );
1285    out.push_str(
1286        "4. Treat candidate gaps and bridges as leads requiring review, not as settled claims.\n",
1287    );
1288    out.push_str("5. Use `mcp-session.json` to replay the conservative MCP investigation loop.\n");
1289    out.push_str("6. Verify checksums with `manifest.json` and `packet.lock.json` before comparing packet diffs.\n\n");
1290    out.push_str("## Caveats\n\n");
1291    for caveat in packet_caveats() {
1292        out.push_str(&format!("- {caveat}\n"));
1293    }
1294    out
1295}
1296
1297fn packet_caveats() -> Vec<String> {
1298    vec![
1299        "Candidate contradictions, gaps, and bridges require human review.".to_string(),
1300        "Evidence ranking is heuristic: meta-analysis > RCT > cohort > case-control > case-report > in-vitro.".to_string(),
1301        "PubMed prior-art checks are rough signals, not proof of novelty.".to_string(),
1302        "Observer policy output is weighted reranking, not definitive disagreement.".to_string(),
1303        "Retraction impact is simulated over declared dependency links.".to_string(),
1304    ]
1305}
1306
1307fn source_key(finding: &FindingBundle) -> String {
1308    if let Some(doi) = &finding.provenance.doi {
1309        return format!("doi:{doi}");
1310    }
1311    if let Some(pmid) = &finding.provenance.pmid {
1312        return format!("pmid:{pmid}");
1313    }
1314    format!("title:{}", finding.provenance.title)
1315}
1316
1317fn manifest_entry_for_file(file: &PacketFile) -> PacketManifestFile {
1318    PacketManifestFile {
1319        path: file.path.clone(),
1320        sha256: hex::encode(Sha256::digest(&file.content)),
1321        bytes: file.content.len(),
1322    }
1323}
1324
1325// ── Markdown ─────────────────────────────────────────────────────────────────
1326
1327fn export_markdown(frontier: &Project) -> String {
1328    let mut out = String::new();
1329
1330    out.push_str(&format!("# {}\n\n", frontier.project.name));
1331    out.push_str(&format!("{}\n\n", frontier.project.description));
1332    out.push_str(&format!(
1333        "**Findings:** {} | **Papers:** {} | **Avg confidence:** {:.2}\n\n",
1334        frontier.stats.findings, frontier.project.papers_processed, frontier.stats.avg_confidence
1335    ));
1336
1337    // Group by assertion type.
1338    let mut by_type: BTreeMap<String, Vec<&FindingBundle>> = BTreeMap::new();
1339    for f in &frontier.findings {
1340        by_type
1341            .entry(f.assertion.assertion_type.clone())
1342            .or_default()
1343            .push(f);
1344    }
1345
1346    for (atype, findings) in &by_type {
1347        out.push_str(&format!("## {} ({})\n\n", atype, findings.len()));
1348
1349        for f in findings {
1350            let entities: Vec<&str> = f
1351                .assertion
1352                .entities
1353                .iter()
1354                .map(|e| e.name.as_str())
1355                .collect();
1356            let repl = if f.evidence.replicated {
1357                " [replicated]"
1358            } else {
1359                ""
1360            };
1361            let gap = if f.flags.gap { " [GAP]" } else { "" };
1362            let contested = if f.flags.contested {
1363                " [CONTESTED]"
1364            } else {
1365                ""
1366            };
1367
1368            out.push_str(&format!(
1369                "- **[{:.2}]** {}{}{}{}\n",
1370                f.confidence.score, f.assertion.text, repl, gap, contested
1371            ));
1372            if !entities.is_empty() {
1373                out.push_str(&format!("  - Entities: {}\n", entities.join(", ")));
1374            }
1375            if let Some(doi) = &f.provenance.doi {
1376                let year = f.provenance.year.map(|y| y.to_string()).unwrap_or_default();
1377                out.push_str(&format!(
1378                    "  - Source: {} ({}) [doi:{}](https://doi.org/{})\n",
1379                    f.provenance.title, year, doi, doi
1380                ));
1381            }
1382            out.push('\n');
1383        }
1384    }
1385
1386    out
1387}
1388
1389// ── Nanopub validation ─────────────────────────────────────────────────────
1390
1391/// Validate a JSON-LD export against nanopub structural expectations.
1392///
1393/// Returns a list of validation warnings. An empty list means the export
1394/// passes all checks. This is not a full nanopub spec validator, but it
1395/// catches the most common structural issues for interoperability.
1396pub fn validate_nanopub(jsonld: &str) -> Vec<String> {
1397    let mut warnings = Vec::new();
1398
1399    let doc: serde_json::Value = match serde_json::from_str(jsonld) {
1400        Ok(v) => v,
1401        Err(e) => {
1402            warnings.push(format!("Invalid JSON: {e}"));
1403            return warnings;
1404        }
1405    };
1406
1407    // Check top-level @context exists
1408    if doc.get("@context").is_none() {
1409        warnings.push("Missing top-level @context".into());
1410    }
1411
1412    let graph = match doc["@graph"].as_array() {
1413        Some(g) => g,
1414        None => {
1415            warnings.push("Missing or invalid @graph array".into());
1416            return warnings;
1417        }
1418    };
1419
1420    for (i, node) in graph.iter().enumerate() {
1421        let label = node["@id"]
1422            .as_str()
1423            .map(|s| s.to_string())
1424            .unwrap_or_else(|| format!("graph[{}]", i));
1425
1426        // Every finding must have @type
1427        if node.get("@type").is_none() {
1428            warnings.push(format!("{}: missing @type", label));
1429        }
1430
1431        // Provenance must include source information
1432        let activity = &node["prov:wasGeneratedBy"];
1433        if activity.is_null() {
1434            warnings.push(format!(
1435                "{}: missing prov:wasGeneratedBy (no provenance activity)",
1436                label
1437            ));
1438        } else if activity["prov:used"].is_null() {
1439            warnings.push(format!(
1440                "{}: provenance activity has no prov:used (no source DOI)",
1441                label
1442            ));
1443        }
1444
1445        // Assertions should have entities with identifiers
1446        if let Some(entities) = node["vela:hasEntity"].as_array() {
1447            for (j, entity) in entities.iter().enumerate() {
1448                let ename = entity["vela:entityName"].as_str().unwrap_or("unknown");
1449                if entity.get("schema:identifier").is_none() {
1450                    warnings.push(format!(
1451                        "{}: entity {} ('{}') has no schema:identifier",
1452                        label, j, ename
1453                    ));
1454                }
1455            }
1456        }
1457    }
1458
1459    warnings
1460}
1461
1462#[cfg(test)]
1463mod tests {
1464    use super::*;
1465    use crate::bundle::*;
1466    use crate::project;
1467
1468    fn make_frontier() -> Project {
1469        let f1 = FindingBundle {
1470            id: "vf_abc123".into(),
1471            version: 1,
1472            previous_version: None,
1473            assertion: Assertion {
1474                text: "NLRP3 activates caspase-1".into(),
1475                assertion_type: "mechanism".into(),
1476                entities: vec![
1477                    Entity {
1478                        name: "NLRP3".into(),
1479                        entity_type: "protein".into(),
1480                        identifiers: serde_json::Map::new(),
1481                        canonical_id: None,
1482                        candidates: vec![],
1483                        aliases: vec![],
1484                        resolution_provenance: None,
1485                        resolution_confidence: 1.0,
1486                        resolution_method: None,
1487                        species_context: None,
1488                        needs_review: false,
1489                    },
1490                    Entity {
1491                        name: "caspase-1".into(),
1492                        entity_type: "protein".into(),
1493                        identifiers: serde_json::Map::new(),
1494                        canonical_id: None,
1495                        candidates: vec![],
1496                        aliases: vec![],
1497                        resolution_provenance: None,
1498                        resolution_confidence: 1.0,
1499                        resolution_method: None,
1500                        species_context: None,
1501                        needs_review: false,
1502                    },
1503                ],
1504                relation: Some("activates".into()),
1505                direction: Some("positive".into()),
1506                causal_claim: None,
1507                causal_evidence_grade: None,
1508            },
1509            evidence: Evidence {
1510                evidence_type: "experimental".into(),
1511                model_system: "mouse".into(),
1512                species: Some("Mus musculus".into()),
1513                method: "Western blot".into(),
1514                sample_size: None,
1515                effect_size: None,
1516                p_value: None,
1517                replicated: true,
1518                replication_count: None,
1519                evidence_spans: vec![],
1520            },
1521            conditions: Conditions {
1522                text: "In vitro".into(),
1523                species_verified: vec![],
1524                species_unverified: vec![],
1525                in_vitro: true,
1526                in_vivo: false,
1527                human_data: false,
1528                clinical_trial: false,
1529                concentration_range: None,
1530                duration: None,
1531                age_group: None,
1532                cell_type: None,
1533            },
1534            confidence: Confidence::raw(0.9, "grounded", 0.85),
1535            provenance: Provenance {
1536                source_type: "published_paper".into(),
1537                doi: Some("10.1234/test".into()),
1538                pmid: None,
1539                pmc: None,
1540                openalex_id: None,
1541                url: None,
1542                title: "NLRP3 inflammasome paper".into(),
1543                authors: vec![Author {
1544                    name: "Smith J".into(),
1545                    orcid: None,
1546                }],
1547                year: Some(2023),
1548                journal: Some("Nature".into()),
1549                license: None,
1550                publisher: None,
1551                funders: vec![],
1552                extraction: Extraction::default(),
1553                review: None,
1554                citation_count: Some(50),
1555            },
1556            flags: Flags {
1557                gap: false,
1558                negative_space: false,
1559                contested: false,
1560                retracted: false,
1561                declining: false,
1562                gravity_well: false,
1563                review_state: None,
1564                superseded: false,
1565                signature_threshold: None,
1566                jointly_accepted: false,
1567            },
1568            links: vec![],
1569            annotations: vec![],
1570            attachments: vec![],
1571            created: String::new(),
1572            updated: None,
1573
1574            access_tier: crate::access_tier::AccessTier::Public,
1575        };
1576
1577        project::assemble("Test frontier", vec![f1], 1, 0, "Test description")
1578    }
1579
1580    #[test]
1581    fn csv_has_header_and_row() {
1582        let c = make_frontier();
1583        let csv = export_csv(&c);
1584        let lines: Vec<&str> = csv.lines().collect();
1585        assert!(lines[0].starts_with("id,"));
1586        assert_eq!(lines.len(), 2); // header + 1 finding
1587        assert!(lines[1].contains("NLRP3"));
1588    }
1589
1590    #[test]
1591    fn jsonld_valid_json() {
1592        let c = make_frontier();
1593        let jsonld = export_jsonld(&c);
1594        let parsed: serde_json::Value = serde_json::from_str(&jsonld).unwrap();
1595        // Verify context has nanopub-inspired vocabulary
1596        let ctx = &parsed["@context"];
1597        assert_eq!(ctx["@vocab"], "https://vela.science/schema/");
1598        assert_eq!(ctx["schema"], "https://schema.org/");
1599        assert_eq!(ctx["prov"], "http://www.w3.org/ns/prov#");
1600        assert_eq!(ctx["np"], "http://www.nanopub.org/nschema#");
1601        let graph = parsed["@graph"].as_array().unwrap();
1602        assert_eq!(graph.len(), 1);
1603        assert_eq!(graph[0]["@type"], "vela:FindingBundle");
1604    }
1605
1606    #[test]
1607    fn jsonld_finding_fields() {
1608        let c = make_frontier();
1609        let jsonld = export_jsonld(&c);
1610        let parsed: serde_json::Value = serde_json::from_str(&jsonld).unwrap();
1611        let node = &parsed["@graph"][0];
1612        assert_eq!(node["@id"], "vela:vf_abc123");
1613        assert_eq!(node["vela:assertionType"], "mechanism");
1614        assert_eq!(node["vela:confidence"], 0.9);
1615        assert_eq!(node["vela:evidenceType"], "experimental");
1616        // Provenance should reference DOI
1617        let activity = &node["prov:wasGeneratedBy"];
1618        assert_eq!(activity["prov:used"]["@id"], "doi:10.1234/test");
1619    }
1620
1621    #[test]
1622    fn jsonld_entities_present() {
1623        let c = make_frontier();
1624        let jsonld = export_jsonld(&c);
1625        let parsed: serde_json::Value = serde_json::from_str(&jsonld).unwrap();
1626        let entities = parsed["@graph"][0]["vela:hasEntity"].as_array().unwrap();
1627        assert_eq!(entities.len(), 2);
1628        assert_eq!(entities[0]["vela:entityName"], "NLRP3");
1629        assert_eq!(entities[0]["vela:entityType"], "protein");
1630    }
1631
1632    #[test]
1633    fn jsonld_roundtrip_valid() {
1634        let c = make_frontier();
1635        let jsonld = export_jsonld(&c);
1636        // Verify it parses back to valid JSON
1637        let parsed: serde_json::Value = serde_json::from_str(&jsonld).unwrap();
1638        // Re-serialize and parse again to confirm stability
1639        let re_serialized = serde_json::to_string_pretty(&parsed).unwrap();
1640        let re_parsed: serde_json::Value = serde_json::from_str(&re_serialized).unwrap();
1641        assert_eq!(parsed, re_parsed);
1642    }
1643
1644    #[test]
1645    fn bibtex_has_entry() {
1646        let c = make_frontier();
1647        let bib = export_bibtex(&c);
1648        assert!(bib.contains("@article{"));
1649        assert!(bib.contains("NLRP3 inflammasome paper"));
1650    }
1651
1652    #[test]
1653    fn markdown_has_heading() {
1654        let c = make_frontier();
1655        let md = export_markdown(&c);
1656        assert!(md.starts_with("# Test frontier"));
1657        assert!(md.contains("## mechanism"));
1658    }
1659
1660    #[test]
1661    fn csv_escape_handles_commas() {
1662        assert_eq!(csv_escape("hello,world"), "\"hello,world\"");
1663        assert_eq!(csv_escape("plain"), "plain");
1664    }
1665
1666    #[test]
1667    fn format_parsing() {
1668        assert!(ExportFormat::from_str("csv").is_ok());
1669        assert!(ExportFormat::from_str("jsonld").is_ok());
1670        assert!(ExportFormat::from_str("json-ld").is_ok());
1671        assert!(ExportFormat::from_str("bibtex").is_ok());
1672        assert!(ExportFormat::from_str("bib").is_ok());
1673        assert!(ExportFormat::from_str("markdown").is_ok());
1674        assert!(ExportFormat::from_str("md").is_ok());
1675        assert!(ExportFormat::from_str("packet").is_ok());
1676        assert!(ExportFormat::from_str("wiki").is_err());
1677        assert!(ExportFormat::from_str("obsidian").is_err());
1678        assert!(ExportFormat::from_str("xml").is_err());
1679    }
1680
1681    #[test]
1682    fn multi_file_formats_are_flagged() {
1683        let packet = ExportFormat::from_str("packet").unwrap();
1684        assert!(packet.is_multi_file());
1685        let csv = ExportFormat::from_str("csv").unwrap();
1686        assert!(!csv.is_multi_file());
1687    }
1688
1689    #[test]
1690    fn packet_export_creates_manifest_and_payload_files() {
1691        let c = make_frontier();
1692        let dir = std::env::temp_dir().join(format!("vela_packet_test_{}", std::process::id()));
1693        let _ = std::fs::remove_dir_all(&dir);
1694
1695        export_packet(&c, &dir).unwrap();
1696
1697        assert!(dir.join("README.md").exists());
1698        assert!(dir.join("reviewer-guide.md").exists());
1699        assert!(dir.join("manifest.json").exists());
1700        assert!(dir.join("overview.json").exists());
1701        assert!(dir.join("scope.json").exists());
1702        assert!(dir.join("source-table.json").exists());
1703        assert!(dir.join("sources/source-registry.json").exists());
1704        assert!(dir.join("evidence-matrix.json").exists());
1705        assert!(dir.join("evidence/evidence-atoms.json").exists());
1706        assert!(dir.join("evidence/source-evidence-map.json").exists());
1707        assert!(dir.join("conditions/condition-records.json").exists());
1708        assert!(dir.join("conditions/condition-matrix.json").exists());
1709        assert!(dir.join("candidate-tensions.json").exists());
1710        assert!(dir.join("candidate-gaps.json").exists());
1711        assert!(dir.join("candidate-bridges.json").exists());
1712        assert!(dir.join("mcp-session.json").exists());
1713        assert!(dir.join("check-summary.json").exists());
1714        assert!(dir.join("signals.json").exists());
1715        assert!(dir.join("review-queue.json").exists());
1716        assert!(dir.join("quality-table.json").exists());
1717        assert!(dir.join("state-transitions.json").exists());
1718        assert!(dir.join("events/events.json").exists());
1719        assert!(dir.join("events/replay-report.json").exists());
1720        assert!(dir.join("ro-crate-metadata.jsonld").exists());
1721        assert!(dir.join("proof-trace.json").exists());
1722        assert!(dir.join("packet.lock.json").exists());
1723        assert!(dir.join("findings/high-signal.json").exists());
1724        assert!(dir.join("findings/full.json").exists());
1725        assert!(dir.join("artifacts/artifacts.json").exists());
1726        assert!(dir.join("artifacts/artifact-audit.json").exists());
1727        assert!(dir.join("artifacts/blob-map.json").exists());
1728        assert!(dir.join("findings/gaps.json").exists());
1729        assert!(dir.join("findings/contested.json").exists());
1730        assert!(dir.join("findings/bridges.json").exists());
1731        assert!(dir.join("findings/contradictions.json").exists());
1732        assert!(dir.join("reviews/review-events.json").exists());
1733        assert!(dir.join("reviews/confidence-updates.json").exists());
1734
1735        let readme = std::fs::read_to_string(dir.join("README.md")).unwrap();
1736        assert!(readme.contains("bounded network packet"));
1737        assert!(readme.contains("manifest.json"));
1738
1739        let manifest: serde_json::Value =
1740            serde_json::from_str(&std::fs::read_to_string(dir.join("manifest.json")).unwrap())
1741                .unwrap();
1742        assert_eq!(manifest["packet_format"], "vela.frontier-packet");
1743        assert_eq!(manifest["packet_version"], "v1");
1744        assert_eq!(manifest["stats"]["findings"], 1);
1745        assert_eq!(manifest["stats"]["sources"], 1);
1746        assert_eq!(manifest["stats"]["evidence_atoms"], 1);
1747        assert_eq!(manifest["stats"]["condition_records"], 1);
1748        assert_eq!(manifest["included_files"].as_array().unwrap().len(), 37);
1749
1750        let high_signal: serde_json::Value = serde_json::from_str(
1751            &std::fs::read_to_string(dir.join("findings/high-signal.json")).unwrap(),
1752        )
1753        .unwrap();
1754        assert_eq!(high_signal.as_array().unwrap().len(), 1);
1755        assert_eq!(high_signal[0]["id"], "vf_abc123");
1756
1757        let _ = std::fs::remove_dir_all(&dir);
1758    }
1759
1760    #[test]
1761    fn nanopub_validates_well_formed_jsonld() {
1762        let c = make_frontier();
1763        let jsonld = export_jsonld(&c);
1764        let warnings = validate_nanopub(&jsonld);
1765        // Only entity-identifier warnings expected (test entities are unresolved)
1766        for w in &warnings {
1767            assert!(w.contains("schema:identifier"), "Unexpected warning: {w}");
1768        }
1769    }
1770
1771    #[test]
1772    fn nanopub_catches_invalid_json() {
1773        let warnings = validate_nanopub("not valid json {{{");
1774        assert_eq!(warnings.len(), 1);
1775        assert!(warnings[0].contains("Invalid JSON"));
1776    }
1777
1778    #[test]
1779    fn nanopub_catches_missing_graph() {
1780        let warnings = validate_nanopub(r#"{"@context": {}}"#);
1781        assert_eq!(warnings.len(), 1);
1782        assert!(warnings[0].contains("@graph"));
1783    }
1784
1785    #[test]
1786    fn nanopub_catches_missing_type() {
1787        let doc = serde_json::json!({
1788            "@context": {},
1789            "@graph": [{"@id": "vela:test"}]
1790        });
1791        let warnings = validate_nanopub(&doc.to_string());
1792        assert!(warnings.iter().any(|w| w.contains("missing @type")));
1793    }
1794}