1use std::collections::{BTreeMap, BTreeSet, HashMap};
4use std::path::{Path, PathBuf};
5
6use chrono::Utc;
7use serde::Serialize;
8use sha2::{Digest, Sha256};
9
10use crate::bundle::{Artifact, FindingBundle};
11use crate::project::Project;
12use crate::{events, packet, repo, signals, sources, state};
13
14#[derive(Debug, Clone, Copy)]
16pub enum ExportFormat {
17 Csv,
18 JsonLd,
19 BibTex,
20 Markdown,
21 Project,
23 Packet,
25}
26
27impl ExportFormat {
28 #[allow(clippy::should_implement_trait)]
29 pub fn from_str(s: &str) -> Result<Self, String> {
30 match s.to_lowercase().as_str() {
31 "csv" => Ok(Self::Csv),
32 "jsonld" | "json-ld" => Ok(Self::JsonLd),
33 "bibtex" | "bib" => Ok(Self::BibTex),
34 "markdown" | "md" => Ok(Self::Markdown),
35 "frontier" | "json" => Ok(Self::Project),
36 "packet" => Ok(Self::Packet),
37 _ => Err(format!(
38 "Unknown format '{}'. Supported: csv, jsonld, bibtex, markdown, frontier, packet",
39 s
40 )),
41 }
42 }
43
44 pub fn is_multi_file(&self) -> bool {
46 matches!(self, Self::Packet)
47 }
48}
49
50pub fn export(frontier: &Project, format: ExportFormat) -> String {
51 match format {
52 ExportFormat::Csv => export_csv(frontier),
53 ExportFormat::JsonLd => export_jsonld(frontier),
54 ExportFormat::BibTex => export_bibtex(frontier),
55 ExportFormat::Markdown => export_markdown(frontier),
56 ExportFormat::Project => {
57 serde_json::to_string_pretty(frontier).expect("Failed to serialize frontier")
58 }
59 ExportFormat::Packet => {
60 panic!("Packet format is multi-file. Use export_packet() instead of export().");
61 }
62 }
63}
64
65pub fn run(frontier_path: &Path, format_str: &str, output: Option<&Path>) {
66 let frontier = repo::load_from_path(frontier_path).expect("Failed to load frontier");
67
68 let format = match ExportFormat::from_str(format_str) {
69 Ok(f) => f,
70 Err(e) => {
71 eprintln!("{} {e}", crate::cli_style::err_prefix());
72 std::process::exit(1);
73 }
74 };
75
76 if format.is_multi_file() {
77 let out_dir = output.unwrap_or_else(|| {
78 eprintln!(
79 "{} {} format requires --output <directory>",
80 crate::cli_style::err_prefix(),
81 format_str
82 );
83 std::process::exit(1);
84 });
85 let result = match format {
86 ExportFormat::Packet => {
87 export_packet_with_source(&frontier, Some(frontier_path), out_dir).map(|_| ())
88 }
89 _ => unreachable!("single-file format reached multi-file branch"),
90 };
91 match result {
92 Ok(()) => {
93 eprintln!(
94 "sealed · {} findings as {} · {}",
95 frontier.findings.len(),
96 format_str,
97 out_dir.display()
98 );
99 }
100 Err(e) => {
101 eprintln!("{} {e}", crate::cli_style::err_prefix());
102 std::process::exit(1);
103 }
104 }
105 return;
106 }
107
108 let result = export(&frontier, format);
109
110 if let Some(out_path) = output {
111 std::fs::write(out_path, &result).expect("Failed to write output file");
112 eprintln!(
113 "sealed · {} findings · {}",
114 frontier.findings.len(),
115 out_path.display()
116 );
117 } else {
118 print!("{result}");
119 }
120}
121
122fn csv_escape(s: &str) -> String {
125 if s.contains(',') || s.contains('"') || s.contains('\n') {
126 format!("\"{}\"", s.replace('"', "\"\""))
127 } else {
128 s.to_string()
129 }
130}
131
132fn export_csv(frontier: &Project) -> String {
133 let mut out = String::new();
134 out.push_str("id,assertion_type,assertion_text,confidence,replicated,entities,year,doi,source_title,gap,contested\n");
135
136 for f in &frontier.findings {
137 let entities: Vec<&str> = f
138 .assertion
139 .entities
140 .iter()
141 .map(|e| e.name.as_str())
142 .collect();
143 let row = format!(
144 "{},{},{},{},{},{},{},{},{},{},{}\n",
145 csv_escape(&f.id),
146 csv_escape(&f.assertion.assertion_type),
147 csv_escape(&f.assertion.text),
148 f.confidence.score,
149 f.evidence.replicated,
150 csv_escape(&entities.join(";")),
151 f.provenance.year.map(|y| y.to_string()).unwrap_or_default(),
152 csv_escape(f.provenance.doi.as_deref().unwrap_or("")),
153 csv_escape(&f.provenance.title),
154 f.flags.gap,
155 f.flags.contested,
156 );
157 out.push_str(&row);
158 }
159 out
160}
161
162fn export_jsonld(frontier: &Project) -> String {
165 let items: Vec<serde_json::Value> = frontier
166 .findings
167 .iter()
168 .map(|f| {
169 let entities: Vec<serde_json::Value> = f
171 .assertion
172 .entities
173 .iter()
174 .map(|e| {
175 let mut entity = serde_json::json!({
176 "vela:entityName": e.name,
177 "vela:entityType": e.entity_type,
178 });
179 if let Some(canonical) = &e.canonical_id {
181 let url = match canonical.source.as_str() {
182 "uniprot" => {
183 format!("https://www.uniprot.org/uniprot/{}", canonical.id)
184 }
185 "pubchem" => format!(
186 "https://pubchem.ncbi.nlm.nih.gov/compound/{}",
187 canonical.id
188 ),
189 "mesh" => format!("https://id.nlm.nih.gov/mesh/{}", canonical.id),
190 "ncbi_gene" => {
191 format!("https://www.ncbi.nlm.nih.gov/gene/{}", canonical.id)
192 }
193 "chebi" => format!(
194 "https://www.ebi.ac.uk/chebi/searchId.do?chebiId={}",
195 canonical.id
196 ),
197 "go" => {
198 format!("http://amigo.geneontology.org/amigo/term/{}", canonical.id)
199 }
200 _ => format!("urn:{}:{}", canonical.source, canonical.id),
201 };
202 entity["schema:identifier"] = serde_json::json!({"@id": url});
203 }
204 entity
205 })
206 .collect();
207
208 let links: Vec<serde_json::Value> = f
210 .links
211 .iter()
212 .map(|l| {
213 serde_json::json!({
214 "vela:linkTarget": {"@id": format!("vela:{}", l.target)},
215 "vela:linkType": l.link_type,
216 })
217 })
218 .collect();
219
220 let mut activity = serde_json::json!({
222 "@type": "prov:Activity",
223 "prov:wasAssociatedWith": format!("vela/{}", env!("CARGO_PKG_VERSION")),
224 });
225 if let Some(doi) = &f.provenance.doi {
226 activity["prov:used"] = serde_json::json!({"@id": format!("doi:{doi}")});
227 }
228
229 let mut node = serde_json::json!({
230 "@id": format!("vela:{}", f.id),
231 "@type": "vela:FindingBundle",
232 "vela:assertionText": f.assertion.text,
233 "vela:assertionType": f.assertion.assertion_type,
234 "vela:confidence": f.confidence.score,
235 "vela:evidenceType": f.evidence.evidence_type,
236 "schema:dateCreated": f.created,
237 "prov:wasGeneratedBy": activity,
238 });
239
240 if !entities.is_empty() {
241 node["vela:hasEntity"] = serde_json::Value::Array(entities);
242 }
243 if !links.is_empty() {
244 node["vela:hasLink"] = serde_json::Value::Array(links);
245 }
246
247 node
248 })
249 .collect();
250
251 let doc = serde_json::json!({
252 "@context": {
253 "@vocab": "https://vela.science/schema/",
254 "schema": "https://schema.org/",
255 "prov": "http://www.w3.org/ns/prov#",
256 "np": "http://www.nanopub.org/nschema#",
257 "doi": "https://doi.org/",
258 "orcid": "https://orcid.org/"
259 },
260 "@graph": items,
261 });
262
263 serde_json::to_string_pretty(&doc).unwrap_or_default()
264}
265
266fn export_bibtex(frontier: &Project) -> String {
269 let mut seen: BTreeMap<String, &FindingBundle> = BTreeMap::new();
271 for f in &frontier.findings {
272 let key = f
273 .provenance
274 .doi
275 .clone()
276 .unwrap_or_else(|| f.provenance.title.clone());
277 seen.entry(key).or_insert(f);
278 }
279
280 let mut out = String::new();
281 for f in seen.values() {
282 let cite_key = f
283 .provenance
284 .doi
285 .as_deref()
286 .map(|d| d.replace(['/', '.'], "_"))
287 .unwrap_or_else(|| f.id.clone());
288
289 let authors_str: String = f
290 .provenance
291 .authors
292 .iter()
293 .map(|a| a.name.as_str())
294 .collect::<Vec<_>>()
295 .join(" and ");
296
297 out.push_str(&format!("@article{{{},\n", cite_key));
298 out.push_str(&format!(" title = {{{}}},\n", f.provenance.title));
299 if !authors_str.is_empty() {
300 out.push_str(&format!(" author = {{{}}},\n", authors_str));
301 }
302 if let Some(year) = f.provenance.year {
303 out.push_str(&format!(" year = {{{}}},\n", year));
304 }
305 if let Some(journal) = &f.provenance.journal {
306 out.push_str(&format!(" journal = {{{}}},\n", journal));
307 }
308 if let Some(doi) = &f.provenance.doi {
309 out.push_str(&format!(" doi = {{{}}},\n", doi));
310 }
311 out.push_str("}\n\n");
312 }
313 out
314}
315
316#[derive(Debug, Clone, Serialize)]
317struct PacketOverview {
318 project_name: String,
319 description: String,
320 compiled_at: String,
321 generated_at: String,
322 findings: usize,
323 papers_processed: usize,
324 avg_confidence: f64,
325 categories: BTreeMap<String, usize>,
326 link_types: BTreeMap<String, usize>,
327 top_entities: Vec<PacketEntitySummary>,
328}
329
330#[derive(Debug, Clone, Serialize)]
331struct PacketEntitySummary {
332 name: String,
333 entity_type: String,
334 finding_count: usize,
335 categories: Vec<String>,
336}
337
338#[derive(Debug, Clone, Serialize)]
339struct PacketFindingSummary {
340 id: String,
341 assertion_type: String,
342 assertion_text: String,
343 confidence: f64,
344 evidence_type: String,
345 method: String,
346 entities: Vec<String>,
347 doi: Option<String>,
348 source_title: String,
349 flags: PacketFlags,
350 link_count: usize,
351}
352
353#[derive(Debug, Clone, Serialize)]
354struct PacketFlags {
355 gap: bool,
356 contested: bool,
357 replicated: bool,
358}
359
360#[derive(Debug, Clone, Serialize)]
361struct PacketBridgeSummary {
362 entity: String,
363 entity_type: String,
364 categories: BTreeMap<String, usize>,
365 finding_ids: Vec<String>,
366}
367
368#[derive(Debug, Clone, Serialize)]
369struct PacketContradictionSummary {
370 source_id: String,
371 target_id: String,
372 link_type: String,
373 source_assertion: String,
374 target_assertion: String,
375}
376
377#[derive(Debug, Clone, Serialize)]
378struct PacketScope {
379 frontier_name: String,
380 description: String,
381 generated_at: String,
382 source_schema: String,
383 finding_count: usize,
384 papers_processed: usize,
385 review_event_count: usize,
386 intended_use: Vec<String>,
387 out_of_scope: Vec<String>,
388 caveats: Vec<String>,
389}
390
391#[derive(Debug, Clone, Serialize)]
392struct PacketSourceRow {
393 source_key: String,
394 source_id: String,
395 locator: String,
396 content_hash: Option<String>,
397 title: String,
398 doi: Option<String>,
399 pmid: Option<String>,
400 year: Option<i32>,
401 source_type: String,
402 extraction_mode: String,
403 source_quality: String,
404 caveats: Vec<String>,
405 finding_ids: Vec<String>,
406}
407
408#[derive(Debug, Clone, Serialize)]
409struct PacketEvidenceMatrixRow {
410 finding_id: String,
411 assertion_type: String,
412 evidence_type: String,
413 method: String,
414 confidence: f64,
415 replicated: bool,
416 human_data: bool,
417 clinical_trial: bool,
418 source_key: String,
419 source_id: Option<String>,
420 evidence_atom_ids: Vec<String>,
421 missing_locator_count: usize,
422 supports: usize,
423 contradicts: usize,
424 depends: usize,
425 flags: PacketFlags,
426}
427
428#[derive(Debug, Clone, Serialize)]
429struct PacketCandidateGap {
430 finding_id: String,
431 assertion: String,
432 confidence: f64,
433 conditions: String,
434 entities: Vec<String>,
435 review_status: String,
436}
437
438#[derive(Debug, Clone, Serialize)]
439struct PacketMcpSession {
440 protocol: String,
441 recommended_loop: Vec<String>,
442 tool_catalog: serde_json::Value,
443 notes: Vec<String>,
444}
445
446#[derive(Debug, Clone, Serialize)]
447struct PacketCheckSummary {
448 status: String,
449 generated_at: String,
450 checked_artifacts: Vec<String>,
451 artifact_audit: crate::artifact_audit::ArtifactAudit,
452 counts: PacketManifestStats,
453 proposal_summary: crate::proposals::ProposalSummary,
454 proof_state: crate::proposals::ProofState,
455 caveats: Vec<String>,
456}
457
458#[derive(Debug, Clone, Serialize)]
459struct PacketProofTrace {
460 trace_version: String,
461 generated_at: String,
462 source: String,
463 source_hash: String,
464 snapshot_hash: String,
465 event_log_hash: String,
466 proposal_state_hash: String,
467 replay_status: String,
468 packet_manifest_hash: Option<String>,
469 schema_version: String,
470 checked_artifacts: Vec<String>,
471 caveats: Vec<String>,
472 status: String,
473}
474
475#[derive(Debug, Clone, Serialize)]
476struct PacketLock {
477 lock_format: String,
478 generated_at: String,
479 files: Vec<PacketManifestFile>,
480}
481
482#[derive(Debug, Clone, Serialize)]
483struct PacketManifest {
484 packet_format: String,
485 packet_version: String,
486 generated_at: String,
487 source: PacketSource,
488 stats: PacketManifestStats,
489 included_files: Vec<PacketManifestFile>,
490}
491
492#[derive(Debug, Clone, Serialize)]
493struct PacketSource {
494 project_name: String,
495 description: String,
496 compiled_at: String,
497 compiler: String,
498 vela_version: String,
499 schema: String,
500}
501
502#[derive(Debug, Clone, Serialize)]
503struct PacketManifestStats {
504 findings: usize,
505 sources: usize,
506 evidence_atoms: usize,
507 condition_records: usize,
508 review_events: usize,
509 proposals: usize,
510 gaps: usize,
511 contested: usize,
512 bridge_entities: usize,
513 contradiction_edges: usize,
514}
515
516#[derive(Debug, Clone, Serialize)]
517struct PacketManifestFile {
518 path: String,
519 sha256: String,
520 bytes: usize,
521}
522
523#[derive(Debug, Clone, Serialize)]
524struct PacketArtifactBlob {
525 artifact_id: String,
526 content_hash: String,
527 source_locator: String,
528 packet_path: String,
529 size_bytes: usize,
530}
531
532#[derive(Debug, Clone)]
533pub struct PacketExportRecord {
534 pub generated_at: String,
535 pub snapshot_hash: String,
536 pub event_log_hash: String,
537 pub packet_manifest_hash: String,
538}
539
540#[derive(Debug, Clone)]
541struct PacketFile {
542 path: String,
543 content: Vec<u8>,
544}
545
546impl PacketFile {
547 fn text(path: impl Into<String>, content: String) -> Self {
548 Self {
549 path: path.into(),
550 content: content.into_bytes(),
551 }
552 }
553
554 fn json<T: Serialize>(path: impl Into<String>, value: &T) -> Result<Self, String> {
555 let content = serde_json::to_vec_pretty(value)
556 .map_err(|e| format!("Failed to serialize packet file: {e}"))?;
557 Ok(Self {
558 path: path.into(),
559 content,
560 })
561 }
562
563 fn bytes(path: impl Into<String>, content: Vec<u8>) -> Self {
564 Self {
565 path: path.into(),
566 content,
567 }
568 }
569}
570
571pub fn export_packet(frontier: &Project, output_dir: &Path) -> Result<PacketExportRecord, String> {
572 export_packet_with_source(frontier, None, output_dir)
573}
574
575pub fn export_packet_with_source(
576 frontier: &Project,
577 source_path: Option<&Path>,
578 output_dir: &Path,
579) -> Result<PacketExportRecord, String> {
580 use std::fs;
581
582 fs::create_dir_all(output_dir.join("findings"))
583 .map_err(|e| format!("Failed to create findings dir: {e}"))?;
584 fs::create_dir_all(output_dir.join("reviews"))
585 .map_err(|e| format!("Failed to create reviews dir: {e}"))?;
586 fs::create_dir_all(output_dir.join("sources"))
587 .map_err(|e| format!("Failed to create sources dir: {e}"))?;
588 fs::create_dir_all(output_dir.join("evidence"))
589 .map_err(|e| format!("Failed to create evidence dir: {e}"))?;
590 fs::create_dir_all(output_dir.join("conditions"))
591 .map_err(|e| format!("Failed to create conditions dir: {e}"))?;
592 fs::create_dir_all(output_dir.join("proposals"))
593 .map_err(|e| format!("Failed to create proposals dir: {e}"))?;
594
595 let generated_at = Utc::now().to_rfc3339();
596 let source_evidence = sources::derive_projection(frontier);
597 let source_records = source_evidence.sources;
598 let evidence_atoms = source_evidence.evidence_atoms;
599 let condition_records = source_evidence.condition_records;
600 let mut atoms_by_finding: BTreeMap<String, Vec<&sources::EvidenceAtom>> = BTreeMap::new();
601 for atom in &evidence_atoms {
602 atoms_by_finding
603 .entry(atom.finding_id.clone())
604 .or_default()
605 .push(atom);
606 }
607
608 let mut entity_counts: BTreeMap<String, usize> = BTreeMap::new();
609 let mut entity_types: BTreeMap<String, String> = BTreeMap::new();
610 let mut entity_categories: BTreeMap<String, BTreeMap<String, usize>> = BTreeMap::new();
611 let mut entity_finding_ids: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
612
613 for finding in &frontier.findings {
614 for entity in &finding.assertion.entities {
615 *entity_counts.entry(entity.name.clone()).or_default() += 1;
616 entity_types
617 .entry(entity.name.clone())
618 .or_insert_with(|| entity.entity_type.clone());
619 *entity_categories
620 .entry(entity.name.clone())
621 .or_default()
622 .entry(finding.assertion.assertion_type.clone())
623 .or_default() += 1;
624 entity_finding_ids
625 .entry(entity.name.clone())
626 .or_default()
627 .insert(finding.id.clone());
628 }
629 }
630
631 let mut top_entities: Vec<PacketEntitySummary> = entity_counts
632 .iter()
633 .map(|(name, finding_count)| PacketEntitySummary {
634 name: name.clone(),
635 entity_type: entity_types
636 .get(name)
637 .cloned()
638 .unwrap_or_else(|| "unknown".to_string()),
639 finding_count: *finding_count,
640 categories: entity_categories
641 .get(name)
642 .map(|cats| cats.keys().cloned().collect())
643 .unwrap_or_default(),
644 })
645 .collect();
646 top_entities.sort_by(|a, b| {
647 b.finding_count
648 .cmp(&a.finding_count)
649 .then_with(|| a.name.cmp(&b.name))
650 });
651 top_entities.truncate(25);
652
653 let overview = PacketOverview {
654 project_name: frontier.project.name.clone(),
655 description: frontier.project.description.clone(),
656 compiled_at: frontier.project.compiled_at.clone(),
657 generated_at: generated_at.clone(),
658 findings: frontier.stats.findings,
659 papers_processed: frontier.project.papers_processed,
660 avg_confidence: frontier.stats.avg_confidence,
661 categories: frontier
662 .stats
663 .categories
664 .iter()
665 .map(|(k, v)| (k.clone(), *v))
666 .collect(),
667 link_types: frontier
668 .stats
669 .link_types
670 .iter()
671 .map(|(k, v)| (k.clone(), *v))
672 .collect(),
673 top_entities,
674 };
675
676 let mut packet_findings: Vec<PacketFindingSummary> = frontier
677 .findings
678 .iter()
679 .map(|finding| PacketFindingSummary {
680 id: finding.id.clone(),
681 assertion_type: finding.assertion.assertion_type.clone(),
682 assertion_text: finding.assertion.text.clone(),
683 confidence: finding.confidence.score,
684 evidence_type: finding.evidence.evidence_type.clone(),
685 method: finding.evidence.method.clone(),
686 entities: finding
687 .assertion
688 .entities
689 .iter()
690 .map(|entity| entity.name.clone())
691 .collect(),
692 doi: finding.provenance.doi.clone(),
693 source_title: finding.provenance.title.clone(),
694 flags: PacketFlags {
695 gap: finding.flags.gap,
696 contested: finding.flags.contested,
697 replicated: finding.evidence.replicated,
698 },
699 link_count: finding.links.len(),
700 })
701 .collect();
702 packet_findings.sort_by(|a, b| {
703 b.confidence
704 .partial_cmp(&a.confidence)
705 .unwrap_or(std::cmp::Ordering::Equal)
706 .then_with(|| b.link_count.cmp(&a.link_count))
707 .then_with(|| a.id.cmp(&b.id))
708 });
709
710 let high_signal_findings: Vec<PacketFindingSummary> = packet_findings
711 .iter()
712 .filter(|finding| {
713 finding.flags.gap
714 || finding.flags.contested
715 || finding.flags.replicated
716 || finding.confidence >= 0.85
717 || finding.link_count > 0
718 })
719 .take(50)
720 .cloned()
721 .collect();
722
723 let gap_findings: Vec<PacketFindingSummary> = packet_findings
724 .iter()
725 .filter(|finding| finding.flags.gap)
726 .cloned()
727 .collect();
728
729 let contested_findings: Vec<PacketFindingSummary> = packet_findings
730 .iter()
731 .filter(|finding| finding.flags.contested)
732 .cloned()
733 .collect();
734
735 let mut bridge_entities: Vec<PacketBridgeSummary> = entity_categories
736 .iter()
737 .filter(|(_, categories)| categories.len() >= 2)
738 .map(|(entity, categories)| PacketBridgeSummary {
739 entity: entity.clone(),
740 entity_type: entity_types
741 .get(entity)
742 .cloned()
743 .unwrap_or_else(|| "unknown".to_string()),
744 categories: categories.clone(),
745 finding_ids: entity_finding_ids
746 .get(entity)
747 .map(|ids| ids.iter().cloned().collect())
748 .unwrap_or_default(),
749 })
750 .collect();
751 bridge_entities.sort_by(|a, b| {
752 b.categories
753 .len()
754 .cmp(&a.categories.len())
755 .then_with(|| b.finding_ids.len().cmp(&a.finding_ids.len()))
756 .then_with(|| a.entity.cmp(&b.entity))
757 });
758
759 let finding_lookup: HashMap<&str, &FindingBundle> = frontier
760 .findings
761 .iter()
762 .map(|finding| (finding.id.as_str(), finding))
763 .collect();
764 let mut contradictions = Vec::new();
765 let mut seen_pairs = BTreeSet::new();
766 for finding in &frontier.findings {
767 for link in &finding.links {
768 if !(link.link_type == "contradicts" || link.link_type == "disputes") {
769 continue;
770 }
771 let pair_key = if finding.id <= link.target {
772 format!("{}::{}::{}", finding.id, link.target, link.link_type)
773 } else {
774 format!("{}::{}::{}", link.target, finding.id, link.link_type)
775 };
776 if !seen_pairs.insert(pair_key) {
777 continue;
778 }
779 if let Some(target) = finding_lookup.get(link.target.as_str()) {
780 contradictions.push(PacketContradictionSummary {
781 source_id: finding.id.clone(),
782 target_id: target.id.clone(),
783 link_type: link.link_type.clone(),
784 source_assertion: finding.assertion.text.clone(),
785 target_assertion: target.assertion.text.clone(),
786 });
787 }
788 }
789 }
790
791 let caveats = packet_caveats();
792 let scope = PacketScope {
793 frontier_name: frontier.project.name.clone(),
794 description: frontier.project.description.clone(),
795 generated_at: generated_at.clone(),
796 source_schema: frontier.schema.clone(),
797 finding_count: frontier.findings.len(),
798 papers_processed: frontier.project.papers_processed,
799 review_event_count: frontier.review_events.len(),
800 intended_use: vec![
801 "Review a bounded compiled frontier".to_string(),
802 "Inspect findings, evidence, confidence, provenance, and links".to_string(),
803 "Compare candidate tensions, gaps, and bridges".to_string(),
804 "Serve reviewable context to MCP/HTTP clients".to_string(),
805 ],
806 out_of_scope: vec![
807 "Autonomous experiment planning".to_string(),
808 "Definitive novelty claims".to_string(),
809 "Institutional federation or broad exchange-network claims".to_string(),
810 ],
811 caveats: caveats.clone(),
812 };
813
814 let source_table: Vec<PacketSourceRow> = source_records
815 .iter()
816 .map(|source| PacketSourceRow {
817 source_key: source.id.clone(),
818 source_id: source.id.clone(),
819 locator: source.locator.clone(),
820 content_hash: source.content_hash.clone(),
821 title: source.title.clone(),
822 doi: source.doi.clone(),
823 pmid: source.pmid.clone(),
824 year: source.year,
825 source_type: source.source_type.clone(),
826 extraction_mode: source.extraction_mode.clone(),
827 source_quality: source.source_quality.clone(),
828 caveats: source.caveats.clone(),
829 finding_ids: source.finding_ids.clone(),
830 })
831 .collect();
832
833 let evidence_matrix: Vec<PacketEvidenceMatrixRow> = frontier
834 .findings
835 .iter()
836 .map(|finding| {
837 let atoms = atoms_by_finding
838 .get(&finding.id)
839 .map(Vec::as_slice)
840 .unwrap_or(&[]);
841 let evidence_atom_ids = atoms.iter().map(|atom| atom.id.clone()).collect::<Vec<_>>();
842 let source_id = atoms.first().map(|atom| atom.source_id.clone());
843 let missing_locator_count = atoms.iter().filter(|atom| atom.locator.is_none()).count();
844 let supports = finding
845 .links
846 .iter()
847 .filter(|link| {
848 matches!(
849 link.link_type.as_str(),
850 "supports" | "extends" | "replicates"
851 )
852 })
853 .count();
854 let contradicts = finding
855 .links
856 .iter()
857 .filter(|link| matches!(link.link_type.as_str(), "contradicts" | "disputes"))
858 .count();
859 let depends = finding
860 .links
861 .iter()
862 .filter(|link| link.link_type == "depends")
863 .count();
864 PacketEvidenceMatrixRow {
865 finding_id: finding.id.clone(),
866 assertion_type: finding.assertion.assertion_type.clone(),
867 evidence_type: finding.evidence.evidence_type.clone(),
868 method: finding.evidence.method.clone(),
869 confidence: finding.confidence.score,
870 replicated: finding.evidence.replicated,
871 human_data: finding.conditions.human_data,
872 clinical_trial: finding.conditions.clinical_trial,
873 source_key: source_key(finding),
874 source_id,
875 evidence_atom_ids,
876 missing_locator_count,
877 supports,
878 contradicts,
879 depends,
880 flags: PacketFlags {
881 gap: finding.flags.gap,
882 contested: finding.flags.contested,
883 replicated: finding.evidence.replicated,
884 },
885 }
886 })
887 .collect();
888
889 let candidate_gaps: Vec<PacketCandidateGap> = frontier
890 .findings
891 .iter()
892 .filter(|finding| finding.flags.gap)
893 .map(|finding| PacketCandidateGap {
894 finding_id: finding.id.clone(),
895 assertion: finding.assertion.text.clone(),
896 confidence: finding.confidence.score,
897 conditions: finding.conditions.text.clone(),
898 entities: finding
899 .assertion
900 .entities
901 .iter()
902 .map(|entity| entity.name.clone())
903 .collect(),
904 review_status: finding
905 .provenance
906 .review
907 .as_ref()
908 .map(|review| {
909 if review.reviewed {
910 "reviewed".to_string()
911 } else {
912 "unreviewed".to_string()
913 }
914 })
915 .unwrap_or_else(|| "unreviewed".to_string()),
916 })
917 .collect();
918
919 let mcp_session = PacketMcpSession {
920 protocol: "model-context-protocol".to_string(),
921 recommended_loop: vec![
922 "frontier_stats".to_string(),
923 "search_findings".to_string(),
924 "get_finding".to_string(),
925 "list_gaps".to_string(),
926 "find_bridges".to_string(),
927 "check_pubmed".to_string(),
928 "list_contradictions".to_string(),
929 "propagate_retraction".to_string(),
930 "apply_observer".to_string(),
931 ],
932 tool_catalog: crate::tool_registry::mcp_tools_json(),
933 notes: caveats.clone(),
934 };
935
936 let stats = PacketManifestStats {
937 findings: frontier.findings.len(),
938 sources: source_records.len(),
939 evidence_atoms: evidence_atoms.len(),
940 condition_records: condition_records.len(),
941 review_events: frontier.review_events.len(),
942 proposals: frontier.proposals.len(),
943 gaps: gap_findings.len(),
944 contested: contested_findings.len(),
945 bridge_entities: bridge_entities.len(),
946 contradiction_edges: contradictions.len(),
947 };
948
949 let checked_artifacts = packet::canonical_packet_files()
954 .iter()
955 .map(|path| (*path).to_string())
956 .collect::<Vec<_>>();
957
958 let artifact_audit_root = source_path.unwrap_or_else(|| Path::new("."));
959 let artifact_audit = crate::artifact_audit::audit_artifacts(artifact_audit_root, frontier);
960 if !artifact_audit.ok {
961 return Err(format!(
962 "Artifact audit failed for proof packet export: {} issue(s)",
963 artifact_audit.issue_count
964 ));
965 }
966 let (artifact_blob_files, artifact_blob_map) =
967 packet_artifact_blob_files(frontier, source_path)?;
968
969 let check_summary = PacketCheckSummary {
970 status: "ok".to_string(),
971 generated_at: generated_at.clone(),
972 checked_artifacts: checked_artifacts.clone(),
973 artifact_audit: artifact_audit.clone(),
974 counts: stats.clone(),
975 proposal_summary: crate::proposals::summary(frontier),
976 proof_state: frontier.proof_state.clone(),
977 caveats: caveats.clone(),
978 };
979 let signal_report = signals::analyze(frontier, &[]);
980 let quality_table = signals::quality_table(frontier, &signal_report);
981 let state_transitions = state::state_transitions(frontier);
982 let replay_report = events::replay_report(frontier);
983 let ro_crate = signals::ro_crate_metadata(frontier, &checked_artifacts);
984
985 let frontier_bytes = crate::canonical::to_canonical_bytes(frontier)
986 .map_err(|e| format!("Failed to serialize frontier for source hash: {e}"))?;
987 let proof_trace = PacketProofTrace {
988 trace_version: "0.2.0".to_string(),
989 generated_at: generated_at.clone(),
990 source: frontier.project.name.clone(),
991 source_hash: hex::encode(Sha256::digest(&frontier_bytes)),
992 snapshot_hash: replay_report.current_hash.clone(),
993 event_log_hash: replay_report.event_log_hash.clone(),
994 proposal_state_hash: crate::proposals::proposal_state_hash(&frontier.proposals),
995 replay_status: replay_report.status.clone(),
996 packet_manifest_hash: None,
997 schema_version: frontier.vela_version.clone(),
998 checked_artifacts: checked_artifacts.clone(),
999 caveats: caveats.clone(),
1000 status: "ok".to_string(),
1001 };
1002
1003 let readme = export_packet_readme(
1004 frontier,
1005 &generated_at,
1006 high_signal_findings.len(),
1007 gap_findings.len(),
1008 contested_findings.len(),
1009 bridge_entities.len(),
1010 contradictions.len(),
1011 );
1012
1013 let mut files = vec![
1014 PacketFile::text("README.md", readme),
1015 PacketFile::text("reviewer-guide.md", export_reviewer_guide(frontier)),
1016 PacketFile::json("overview.json", &overview)?,
1017 PacketFile::json("scope.json", &scope)?,
1018 PacketFile::json("source-table.json", &source_table)?,
1019 PacketFile::json("sources/source-registry.json", &source_records)?,
1020 PacketFile::json("evidence-matrix.json", &evidence_matrix)?,
1021 PacketFile::json("evidence/evidence-atoms.json", &evidence_atoms)?,
1022 PacketFile::json(
1023 "evidence/source-evidence-map.json",
1024 &sources::source_evidence_map_from_atoms(&evidence_atoms),
1025 )?,
1026 PacketFile::json("conditions/condition-records.json", &condition_records)?,
1027 PacketFile::json(
1028 "conditions/condition-matrix.json",
1029 &sources::condition_matrix(&condition_records),
1030 )?,
1031 PacketFile::json("signals.json", &signal_report.signals)?,
1032 PacketFile::json("review-queue.json", &signal_report.review_queue)?,
1033 PacketFile::json("quality-table.json", &quality_table)?,
1034 PacketFile::json("state-transitions.json", &state_transitions)?,
1035 PacketFile::json("events/events.json", &frontier.events)?,
1036 PacketFile::json("events/replay-report.json", &replay_report)?,
1037 PacketFile::json("proposals/proposals.json", &frontier.proposals)?,
1038 PacketFile::json("ro-crate-metadata.jsonld", &ro_crate)?,
1039 PacketFile::json("candidate-tensions.json", &contradictions)?,
1040 PacketFile::json("candidate-gaps.json", &candidate_gaps)?,
1041 PacketFile::json("candidate-bridges.json", &bridge_entities)?,
1042 PacketFile::json("mcp-session.json", &mcp_session)?,
1043 PacketFile::json("check-summary.json", &check_summary)?,
1044 PacketFile::json("proof-trace.json", &proof_trace)?,
1045 PacketFile::json("findings/high-signal.json", &high_signal_findings)?,
1046 PacketFile::json("findings/full.json", &frontier.findings)?,
1047 PacketFile::json("artifacts/artifacts.json", &frontier.artifacts)?,
1048 PacketFile::json("artifacts/artifact-audit.json", &artifact_audit)?,
1049 PacketFile::json("artifacts/blob-map.json", &artifact_blob_map)?,
1050 PacketFile::json("findings/gaps.json", &gap_findings)?,
1051 PacketFile::json("findings/contested.json", &contested_findings)?,
1052 PacketFile::json("findings/bridges.json", &bridge_entities)?,
1053 PacketFile::json("findings/contradictions.json", &contradictions)?,
1054 PacketFile::json("reviews/review-events.json", &frontier.review_events)?,
1055 PacketFile::json(
1056 "reviews/confidence-updates.json",
1057 &frontier.confidence_updates,
1058 )?,
1059 ];
1060 files.extend(artifact_blob_files);
1061
1062 let lock = PacketLock {
1063 lock_format: "vela.packet-lock.v1".to_string(),
1064 generated_at: generated_at.clone(),
1065 files: files.iter().map(manifest_entry_for_file).collect(),
1066 };
1067 files.push(PacketFile::json("packet.lock.json", &lock)?);
1068
1069 for file in &files {
1070 let full_path = output_dir.join(&file.path);
1071 if let Some(parent) = full_path.parent() {
1072 fs::create_dir_all(parent).map_err(|e| {
1073 format!(
1074 "Failed to create packet parent dir {}: {e}",
1075 parent.display()
1076 )
1077 })?;
1078 }
1079 fs::write(&full_path, &file.content)
1080 .map_err(|e| format!("Failed to write packet file {}: {e}", file.path))?;
1081 }
1082
1083 let manifest = PacketManifest {
1084 packet_format: "vela.frontier-packet".to_string(),
1085 packet_version: "v1".to_string(),
1086 generated_at: generated_at.clone(),
1087 source: PacketSource {
1088 project_name: frontier.project.name.clone(),
1089 description: frontier.project.description.clone(),
1090 compiled_at: frontier.project.compiled_at.clone(),
1091 compiler: frontier.project.compiler.clone(),
1092 vela_version: frontier.vela_version.clone(),
1093 schema: frontier.schema.clone(),
1094 },
1095 stats,
1096 included_files: files
1097 .drain(..)
1098 .map(|file| manifest_entry_for_file(&file))
1099 .collect(),
1100 };
1101
1102 let manifest_bytes = serde_json::to_vec_pretty(&manifest)
1103 .map_err(|e| format!("Failed to serialize packet manifest: {e}"))?;
1104 let manifest_path = output_dir.join("manifest.json");
1105 fs::write(&manifest_path, &manifest_bytes)
1106 .map_err(|e| format!("Failed to write manifest.json: {e}"))?;
1107
1108 let packet_manifest_hash = hex::encode(Sha256::digest(&manifest_bytes));
1109 Ok(PacketExportRecord {
1110 generated_at,
1111 snapshot_hash: replay_report.current_hash,
1112 event_log_hash: replay_report.event_log_hash,
1113 packet_manifest_hash,
1114 })
1115}
1116
1117fn packet_artifact_blob_files(
1118 frontier: &Project,
1119 source_path: Option<&Path>,
1120) -> Result<(Vec<PacketFile>, Vec<PacketArtifactBlob>), String> {
1121 let Some(root) = artifact_source_root(source_path) else {
1122 if frontier.artifacts.iter().any(is_local_artifact) {
1123 return Err(
1124 "Proof packet export needs a frontier directory to copy local artifact blobs"
1125 .to_string(),
1126 );
1127 }
1128 return Ok((Vec::new(), Vec::new()));
1129 };
1130
1131 let mut files = Vec::new();
1132 let mut blob_map = Vec::new();
1133 let mut seen_hashes = BTreeSet::new();
1134
1135 for artifact in frontier
1136 .artifacts
1137 .iter()
1138 .filter(|artifact| is_local_artifact(artifact))
1139 {
1140 let Some(hex) = artifact.content_hash.strip_prefix("sha256:") else {
1141 return Err(format!(
1142 "Artifact {} has unsupported content hash '{}'",
1143 artifact.id, artifact.content_hash
1144 ));
1145 };
1146 let locator = artifact
1147 .locator
1148 .as_deref()
1149 .ok_or_else(|| format!("Artifact {} is local but has no locator", artifact.id))?;
1150 let source = resolve_artifact_locator(&root, locator);
1151 let bytes = std::fs::read(&source).map_err(|e| {
1152 format!(
1153 "Failed to read local artifact blob for {} at {}: {e}",
1154 artifact.id,
1155 source.display()
1156 )
1157 })?;
1158 let actual = hex::encode(Sha256::digest(&bytes));
1159 if actual != hex {
1160 return Err(format!(
1161 "Artifact {} blob hash mismatch: expected {}, found {}",
1162 artifact.id, hex, actual
1163 ));
1164 }
1165 let packet_path = format!("artifacts/blobs/sha256/{hex}");
1166 if seen_hashes.insert(hex.to_string()) {
1167 files.push(PacketFile::bytes(packet_path.clone(), bytes.clone()));
1168 }
1169 blob_map.push(PacketArtifactBlob {
1170 artifact_id: artifact.id.clone(),
1171 content_hash: artifact.content_hash.clone(),
1172 source_locator: locator.to_string(),
1173 packet_path,
1174 size_bytes: bytes.len(),
1175 });
1176 }
1177
1178 Ok((files, blob_map))
1179}
1180
1181fn artifact_source_root(source_path: Option<&Path>) -> Option<PathBuf> {
1182 let source = source_path?;
1183 if source.is_dir() {
1184 Some(source.to_path_buf())
1185 } else {
1186 source.parent().map(Path::to_path_buf)
1187 }
1188}
1189
1190fn is_local_artifact(artifact: &Artifact) -> bool {
1191 matches!(artifact.storage_mode.as_str(), "local_blob" | "local_file")
1192}
1193
1194fn resolve_artifact_locator(root: &Path, locator: &str) -> PathBuf {
1195 let path = Path::new(locator);
1196 if path.is_absolute() {
1197 path.to_path_buf()
1198 } else {
1199 root.join(path)
1200 }
1201}
1202
1203fn export_packet_readme(
1204 frontier: &Project,
1205 generated_at: &str,
1206 high_signal_count: usize,
1207 gap_count: usize,
1208 contested_count: usize,
1209 bridge_count: usize,
1210 contradiction_count: usize,
1211) -> String {
1212 let mut out = String::new();
1213 out.push_str(&format!("# {} packet\n\n", frontier.project.name));
1214 out.push_str(&format!("{}\n\n", frontier.project.description));
1215 out.push_str("This export is a bounded network packet: a compact, publishable subset of the frontier optimized for review, contradiction inspection, and grounded agent context. It intentionally does not dump the full raw frontier by default.\n\n");
1216 out.push_str("## Source\n\n");
1217 out.push_str(&format!("- Project: {}\n", frontier.project.name));
1218 out.push_str(&format!(
1219 "- Compiled at: {}\n",
1220 frontier.project.compiled_at
1221 ));
1222 out.push_str(&format!("- Generated at: {}\n", generated_at));
1223 out.push_str(&format!("- Compiler: {}\n", frontier.project.compiler));
1224 out.push_str(&format!("- Vela version: {}\n", frontier.vela_version));
1225 out.push_str(&format!("- Schema: {}\n\n", frontier.schema));
1226 out.push_str("## Included artifacts\n\n");
1227 out.push_str("- `manifest.json` — provenance, version stamp, checksums\n");
1228 out.push_str("- `overview.json` — project-level stats, categories, top entities\n");
1229 out.push_str("- `findings/high-signal.json` — compact high-signal finding subset\n");
1230 out.push_str(
1231 "- `findings/full.json` — canonical finding bundles for packet import and merge\n",
1232 );
1233 out.push_str("- `artifacts/artifacts.json`: content-addressed protocols, files, records, and dataset manifests\n");
1234 out.push_str(
1235 "- `artifacts/artifact-audit.json` — artifact integrity report used during export\n",
1236 );
1237 out.push_str(
1238 "- `artifacts/blob-map.json` — packet-local hash map for checked local artifact bytes\n",
1239 );
1240 out.push_str("- `findings/gaps.json` — gap-tagged findings\n");
1241 out.push_str("- `findings/contested.json` — contested findings\n");
1242 out.push_str("- `findings/bridges.json` — entities spanning multiple assertion categories\n");
1243 out.push_str("- `findings/contradictions.json` — explicit contradiction/dispute edges\n");
1244 out.push_str("- `reviews/review-events.json` — attached review events\n");
1245 out.push_str("- `reviews/confidence-updates.json` — interpretation confidence revisions\n");
1246 out.push_str("- `state-transitions.json` — combined review and confidence transition log\n\n");
1247 out.push_str("## Packet stats\n\n");
1248 out.push_str(&format!(
1249 "- Findings in source frontier: {}\n",
1250 frontier.findings.len()
1251 ));
1252 out.push_str(&format!(
1253 "- High-signal findings exported: {}\n",
1254 high_signal_count
1255 ));
1256 out.push_str(&format!("- Gap findings exported: {}\n", gap_count));
1257 out.push_str(&format!(
1258 "- Contested findings exported: {}\n",
1259 contested_count
1260 ));
1261 out.push_str(&format!("- Bridge entities exported: {}\n", bridge_count));
1262 out.push_str(&format!(
1263 "- Contradiction edges exported: {}\n",
1264 contradiction_count
1265 ));
1266 out.push_str(&format!(
1267 "- Review events exported: {}\n",
1268 frontier.review_events.len()
1269 ));
1270 out
1271}
1272
1273fn export_reviewer_guide(frontier: &Project) -> String {
1274 let mut out = String::new();
1275 out.push_str(&format!("# Reviewer guide: {}\n\n", frontier.project.name));
1276 out.push_str("Use this packet as a reviewable frontier snapshot. Start with `scope.json`, then inspect `evidence-matrix.json`, `candidate-tensions.json`, `candidate-gaps.json`, and `candidate-bridges.json` before reading individual finding bundles.\n\n");
1277 out.push_str("## Suggested review loop\n\n");
1278 out.push_str(
1279 "1. Confirm the bounded scope and source corpus in `scope.json`, `source-table.json`, and `sources/source-registry.json`.\n",
1280 );
1281 out.push_str("2. Check high-confidence or high-link findings in `evidence-matrix.json`, then inspect exact source-grounded atoms in `evidence/evidence-atoms.json`.\n");
1282 out.push_str(
1283 "3. Inspect candidate tensions against the full finding bundles in `findings/full.json`.\n",
1284 );
1285 out.push_str(
1286 "4. Treat candidate gaps and bridges as leads requiring review, not as settled claims.\n",
1287 );
1288 out.push_str("5. Use `mcp-session.json` to replay the conservative MCP investigation loop.\n");
1289 out.push_str("6. Verify checksums with `manifest.json` and `packet.lock.json` before comparing packet diffs.\n\n");
1290 out.push_str("## Caveats\n\n");
1291 for caveat in packet_caveats() {
1292 out.push_str(&format!("- {caveat}\n"));
1293 }
1294 out
1295}
1296
1297fn packet_caveats() -> Vec<String> {
1298 vec![
1299 "Candidate contradictions, gaps, and bridges require human review.".to_string(),
1300 "Evidence ranking is heuristic: meta-analysis > RCT > cohort > case-control > case-report > in-vitro.".to_string(),
1301 "PubMed prior-art checks are rough signals, not proof of novelty.".to_string(),
1302 "Observer policy output is weighted reranking, not definitive disagreement.".to_string(),
1303 "Retraction impact is simulated over declared dependency links.".to_string(),
1304 ]
1305}
1306
1307fn source_key(finding: &FindingBundle) -> String {
1308 if let Some(doi) = &finding.provenance.doi {
1309 return format!("doi:{doi}");
1310 }
1311 if let Some(pmid) = &finding.provenance.pmid {
1312 return format!("pmid:{pmid}");
1313 }
1314 format!("title:{}", finding.provenance.title)
1315}
1316
1317fn manifest_entry_for_file(file: &PacketFile) -> PacketManifestFile {
1318 PacketManifestFile {
1319 path: file.path.clone(),
1320 sha256: hex::encode(Sha256::digest(&file.content)),
1321 bytes: file.content.len(),
1322 }
1323}
1324
1325fn export_markdown(frontier: &Project) -> String {
1328 let mut out = String::new();
1329
1330 out.push_str(&format!("# {}\n\n", frontier.project.name));
1331 out.push_str(&format!("{}\n\n", frontier.project.description));
1332 out.push_str(&format!(
1333 "**Findings:** {} | **Papers:** {} | **Avg confidence:** {:.2}\n\n",
1334 frontier.stats.findings, frontier.project.papers_processed, frontier.stats.avg_confidence
1335 ));
1336
1337 let mut by_type: BTreeMap<String, Vec<&FindingBundle>> = BTreeMap::new();
1339 for f in &frontier.findings {
1340 by_type
1341 .entry(f.assertion.assertion_type.clone())
1342 .or_default()
1343 .push(f);
1344 }
1345
1346 for (atype, findings) in &by_type {
1347 out.push_str(&format!("## {} ({})\n\n", atype, findings.len()));
1348
1349 for f in findings {
1350 let entities: Vec<&str> = f
1351 .assertion
1352 .entities
1353 .iter()
1354 .map(|e| e.name.as_str())
1355 .collect();
1356 let repl = if f.evidence.replicated {
1357 " [replicated]"
1358 } else {
1359 ""
1360 };
1361 let gap = if f.flags.gap { " [GAP]" } else { "" };
1362 let contested = if f.flags.contested {
1363 " [CONTESTED]"
1364 } else {
1365 ""
1366 };
1367
1368 out.push_str(&format!(
1369 "- **[{:.2}]** {}{}{}{}\n",
1370 f.confidence.score, f.assertion.text, repl, gap, contested
1371 ));
1372 if !entities.is_empty() {
1373 out.push_str(&format!(" - Entities: {}\n", entities.join(", ")));
1374 }
1375 if let Some(doi) = &f.provenance.doi {
1376 let year = f.provenance.year.map(|y| y.to_string()).unwrap_or_default();
1377 out.push_str(&format!(
1378 " - Source: {} ({}) [doi:{}](https://doi.org/{})\n",
1379 f.provenance.title, year, doi, doi
1380 ));
1381 }
1382 out.push('\n');
1383 }
1384 }
1385
1386 out
1387}
1388
1389pub fn validate_nanopub(jsonld: &str) -> Vec<String> {
1397 let mut warnings = Vec::new();
1398
1399 let doc: serde_json::Value = match serde_json::from_str(jsonld) {
1400 Ok(v) => v,
1401 Err(e) => {
1402 warnings.push(format!("Invalid JSON: {e}"));
1403 return warnings;
1404 }
1405 };
1406
1407 if doc.get("@context").is_none() {
1409 warnings.push("Missing top-level @context".into());
1410 }
1411
1412 let graph = match doc["@graph"].as_array() {
1413 Some(g) => g,
1414 None => {
1415 warnings.push("Missing or invalid @graph array".into());
1416 return warnings;
1417 }
1418 };
1419
1420 for (i, node) in graph.iter().enumerate() {
1421 let label = node["@id"]
1422 .as_str()
1423 .map(|s| s.to_string())
1424 .unwrap_or_else(|| format!("graph[{}]", i));
1425
1426 if node.get("@type").is_none() {
1428 warnings.push(format!("{}: missing @type", label));
1429 }
1430
1431 let activity = &node["prov:wasGeneratedBy"];
1433 if activity.is_null() {
1434 warnings.push(format!(
1435 "{}: missing prov:wasGeneratedBy (no provenance activity)",
1436 label
1437 ));
1438 } else if activity["prov:used"].is_null() {
1439 warnings.push(format!(
1440 "{}: provenance activity has no prov:used (no source DOI)",
1441 label
1442 ));
1443 }
1444
1445 if let Some(entities) = node["vela:hasEntity"].as_array() {
1447 for (j, entity) in entities.iter().enumerate() {
1448 let ename = entity["vela:entityName"].as_str().unwrap_or("unknown");
1449 if entity.get("schema:identifier").is_none() {
1450 warnings.push(format!(
1451 "{}: entity {} ('{}') has no schema:identifier",
1452 label, j, ename
1453 ));
1454 }
1455 }
1456 }
1457 }
1458
1459 warnings
1460}
1461
1462#[cfg(test)]
1463mod tests {
1464 use super::*;
1465 use crate::bundle::*;
1466 use crate::project;
1467
1468 fn make_frontier() -> Project {
1469 let f1 = FindingBundle {
1470 id: "vf_abc123".into(),
1471 version: 1,
1472 previous_version: None,
1473 assertion: Assertion {
1474 text: "NLRP3 activates caspase-1".into(),
1475 assertion_type: "mechanism".into(),
1476 entities: vec![
1477 Entity {
1478 name: "NLRP3".into(),
1479 entity_type: "protein".into(),
1480 identifiers: serde_json::Map::new(),
1481 canonical_id: None,
1482 candidates: vec![],
1483 aliases: vec![],
1484 resolution_provenance: None,
1485 resolution_confidence: 1.0,
1486 resolution_method: None,
1487 species_context: None,
1488 needs_review: false,
1489 },
1490 Entity {
1491 name: "caspase-1".into(),
1492 entity_type: "protein".into(),
1493 identifiers: serde_json::Map::new(),
1494 canonical_id: None,
1495 candidates: vec![],
1496 aliases: vec![],
1497 resolution_provenance: None,
1498 resolution_confidence: 1.0,
1499 resolution_method: None,
1500 species_context: None,
1501 needs_review: false,
1502 },
1503 ],
1504 relation: Some("activates".into()),
1505 direction: Some("positive".into()),
1506 causal_claim: None,
1507 causal_evidence_grade: None,
1508 },
1509 evidence: Evidence {
1510 evidence_type: "experimental".into(),
1511 model_system: "mouse".into(),
1512 species: Some("Mus musculus".into()),
1513 method: "Western blot".into(),
1514 sample_size: None,
1515 effect_size: None,
1516 p_value: None,
1517 replicated: true,
1518 replication_count: None,
1519 evidence_spans: vec![],
1520 },
1521 conditions: Conditions {
1522 text: "In vitro".into(),
1523 species_verified: vec![],
1524 species_unverified: vec![],
1525 in_vitro: true,
1526 in_vivo: false,
1527 human_data: false,
1528 clinical_trial: false,
1529 concentration_range: None,
1530 duration: None,
1531 age_group: None,
1532 cell_type: None,
1533 },
1534 confidence: Confidence::raw(0.9, "grounded", 0.85),
1535 provenance: Provenance {
1536 source_type: "published_paper".into(),
1537 doi: Some("10.1234/test".into()),
1538 pmid: None,
1539 pmc: None,
1540 openalex_id: None,
1541 url: None,
1542 title: "NLRP3 inflammasome paper".into(),
1543 authors: vec![Author {
1544 name: "Smith J".into(),
1545 orcid: None,
1546 }],
1547 year: Some(2023),
1548 journal: Some("Nature".into()),
1549 license: None,
1550 publisher: None,
1551 funders: vec![],
1552 extraction: Extraction::default(),
1553 review: None,
1554 citation_count: Some(50),
1555 },
1556 flags: Flags {
1557 gap: false,
1558 negative_space: false,
1559 contested: false,
1560 retracted: false,
1561 declining: false,
1562 gravity_well: false,
1563 review_state: None,
1564 superseded: false,
1565 signature_threshold: None,
1566 jointly_accepted: false,
1567 },
1568 links: vec![],
1569 annotations: vec![],
1570 attachments: vec![],
1571 created: String::new(),
1572 updated: None,
1573
1574 access_tier: crate::access_tier::AccessTier::Public,
1575 };
1576
1577 project::assemble("Test frontier", vec![f1], 1, 0, "Test description")
1578 }
1579
1580 #[test]
1581 fn csv_has_header_and_row() {
1582 let c = make_frontier();
1583 let csv = export_csv(&c);
1584 let lines: Vec<&str> = csv.lines().collect();
1585 assert!(lines[0].starts_with("id,"));
1586 assert_eq!(lines.len(), 2); assert!(lines[1].contains("NLRP3"));
1588 }
1589
1590 #[test]
1591 fn jsonld_valid_json() {
1592 let c = make_frontier();
1593 let jsonld = export_jsonld(&c);
1594 let parsed: serde_json::Value = serde_json::from_str(&jsonld).unwrap();
1595 let ctx = &parsed["@context"];
1597 assert_eq!(ctx["@vocab"], "https://vela.science/schema/");
1598 assert_eq!(ctx["schema"], "https://schema.org/");
1599 assert_eq!(ctx["prov"], "http://www.w3.org/ns/prov#");
1600 assert_eq!(ctx["np"], "http://www.nanopub.org/nschema#");
1601 let graph = parsed["@graph"].as_array().unwrap();
1602 assert_eq!(graph.len(), 1);
1603 assert_eq!(graph[0]["@type"], "vela:FindingBundle");
1604 }
1605
1606 #[test]
1607 fn jsonld_finding_fields() {
1608 let c = make_frontier();
1609 let jsonld = export_jsonld(&c);
1610 let parsed: serde_json::Value = serde_json::from_str(&jsonld).unwrap();
1611 let node = &parsed["@graph"][0];
1612 assert_eq!(node["@id"], "vela:vf_abc123");
1613 assert_eq!(node["vela:assertionType"], "mechanism");
1614 assert_eq!(node["vela:confidence"], 0.9);
1615 assert_eq!(node["vela:evidenceType"], "experimental");
1616 let activity = &node["prov:wasGeneratedBy"];
1618 assert_eq!(activity["prov:used"]["@id"], "doi:10.1234/test");
1619 }
1620
1621 #[test]
1622 fn jsonld_entities_present() {
1623 let c = make_frontier();
1624 let jsonld = export_jsonld(&c);
1625 let parsed: serde_json::Value = serde_json::from_str(&jsonld).unwrap();
1626 let entities = parsed["@graph"][0]["vela:hasEntity"].as_array().unwrap();
1627 assert_eq!(entities.len(), 2);
1628 assert_eq!(entities[0]["vela:entityName"], "NLRP3");
1629 assert_eq!(entities[0]["vela:entityType"], "protein");
1630 }
1631
1632 #[test]
1633 fn jsonld_roundtrip_valid() {
1634 let c = make_frontier();
1635 let jsonld = export_jsonld(&c);
1636 let parsed: serde_json::Value = serde_json::from_str(&jsonld).unwrap();
1638 let re_serialized = serde_json::to_string_pretty(&parsed).unwrap();
1640 let re_parsed: serde_json::Value = serde_json::from_str(&re_serialized).unwrap();
1641 assert_eq!(parsed, re_parsed);
1642 }
1643
1644 #[test]
1645 fn bibtex_has_entry() {
1646 let c = make_frontier();
1647 let bib = export_bibtex(&c);
1648 assert!(bib.contains("@article{"));
1649 assert!(bib.contains("NLRP3 inflammasome paper"));
1650 }
1651
1652 #[test]
1653 fn markdown_has_heading() {
1654 let c = make_frontier();
1655 let md = export_markdown(&c);
1656 assert!(md.starts_with("# Test frontier"));
1657 assert!(md.contains("## mechanism"));
1658 }
1659
1660 #[test]
1661 fn csv_escape_handles_commas() {
1662 assert_eq!(csv_escape("hello,world"), "\"hello,world\"");
1663 assert_eq!(csv_escape("plain"), "plain");
1664 }
1665
1666 #[test]
1667 fn format_parsing() {
1668 assert!(ExportFormat::from_str("csv").is_ok());
1669 assert!(ExportFormat::from_str("jsonld").is_ok());
1670 assert!(ExportFormat::from_str("json-ld").is_ok());
1671 assert!(ExportFormat::from_str("bibtex").is_ok());
1672 assert!(ExportFormat::from_str("bib").is_ok());
1673 assert!(ExportFormat::from_str("markdown").is_ok());
1674 assert!(ExportFormat::from_str("md").is_ok());
1675 assert!(ExportFormat::from_str("packet").is_ok());
1676 assert!(ExportFormat::from_str("wiki").is_err());
1677 assert!(ExportFormat::from_str("obsidian").is_err());
1678 assert!(ExportFormat::from_str("xml").is_err());
1679 }
1680
1681 #[test]
1682 fn multi_file_formats_are_flagged() {
1683 let packet = ExportFormat::from_str("packet").unwrap();
1684 assert!(packet.is_multi_file());
1685 let csv = ExportFormat::from_str("csv").unwrap();
1686 assert!(!csv.is_multi_file());
1687 }
1688
1689 #[test]
1690 fn packet_export_creates_manifest_and_payload_files() {
1691 let c = make_frontier();
1692 let dir = std::env::temp_dir().join(format!("vela_packet_test_{}", std::process::id()));
1693 let _ = std::fs::remove_dir_all(&dir);
1694
1695 export_packet(&c, &dir).unwrap();
1696
1697 assert!(dir.join("README.md").exists());
1698 assert!(dir.join("reviewer-guide.md").exists());
1699 assert!(dir.join("manifest.json").exists());
1700 assert!(dir.join("overview.json").exists());
1701 assert!(dir.join("scope.json").exists());
1702 assert!(dir.join("source-table.json").exists());
1703 assert!(dir.join("sources/source-registry.json").exists());
1704 assert!(dir.join("evidence-matrix.json").exists());
1705 assert!(dir.join("evidence/evidence-atoms.json").exists());
1706 assert!(dir.join("evidence/source-evidence-map.json").exists());
1707 assert!(dir.join("conditions/condition-records.json").exists());
1708 assert!(dir.join("conditions/condition-matrix.json").exists());
1709 assert!(dir.join("candidate-tensions.json").exists());
1710 assert!(dir.join("candidate-gaps.json").exists());
1711 assert!(dir.join("candidate-bridges.json").exists());
1712 assert!(dir.join("mcp-session.json").exists());
1713 assert!(dir.join("check-summary.json").exists());
1714 assert!(dir.join("signals.json").exists());
1715 assert!(dir.join("review-queue.json").exists());
1716 assert!(dir.join("quality-table.json").exists());
1717 assert!(dir.join("state-transitions.json").exists());
1718 assert!(dir.join("events/events.json").exists());
1719 assert!(dir.join("events/replay-report.json").exists());
1720 assert!(dir.join("ro-crate-metadata.jsonld").exists());
1721 assert!(dir.join("proof-trace.json").exists());
1722 assert!(dir.join("packet.lock.json").exists());
1723 assert!(dir.join("findings/high-signal.json").exists());
1724 assert!(dir.join("findings/full.json").exists());
1725 assert!(dir.join("artifacts/artifacts.json").exists());
1726 assert!(dir.join("artifacts/artifact-audit.json").exists());
1727 assert!(dir.join("artifacts/blob-map.json").exists());
1728 assert!(dir.join("findings/gaps.json").exists());
1729 assert!(dir.join("findings/contested.json").exists());
1730 assert!(dir.join("findings/bridges.json").exists());
1731 assert!(dir.join("findings/contradictions.json").exists());
1732 assert!(dir.join("reviews/review-events.json").exists());
1733 assert!(dir.join("reviews/confidence-updates.json").exists());
1734
1735 let readme = std::fs::read_to_string(dir.join("README.md")).unwrap();
1736 assert!(readme.contains("bounded network packet"));
1737 assert!(readme.contains("manifest.json"));
1738
1739 let manifest: serde_json::Value =
1740 serde_json::from_str(&std::fs::read_to_string(dir.join("manifest.json")).unwrap())
1741 .unwrap();
1742 assert_eq!(manifest["packet_format"], "vela.frontier-packet");
1743 assert_eq!(manifest["packet_version"], "v1");
1744 assert_eq!(manifest["stats"]["findings"], 1);
1745 assert_eq!(manifest["stats"]["sources"], 1);
1746 assert_eq!(manifest["stats"]["evidence_atoms"], 1);
1747 assert_eq!(manifest["stats"]["condition_records"], 1);
1748 assert_eq!(manifest["included_files"].as_array().unwrap().len(), 37);
1749
1750 let high_signal: serde_json::Value = serde_json::from_str(
1751 &std::fs::read_to_string(dir.join("findings/high-signal.json")).unwrap(),
1752 )
1753 .unwrap();
1754 assert_eq!(high_signal.as_array().unwrap().len(), 1);
1755 assert_eq!(high_signal[0]["id"], "vf_abc123");
1756
1757 let _ = std::fs::remove_dir_all(&dir);
1758 }
1759
1760 #[test]
1761 fn nanopub_validates_well_formed_jsonld() {
1762 let c = make_frontier();
1763 let jsonld = export_jsonld(&c);
1764 let warnings = validate_nanopub(&jsonld);
1765 for w in &warnings {
1767 assert!(w.contains("schema:identifier"), "Unexpected warning: {w}");
1768 }
1769 }
1770
1771 #[test]
1772 fn nanopub_catches_invalid_json() {
1773 let warnings = validate_nanopub("not valid json {{{");
1774 assert_eq!(warnings.len(), 1);
1775 assert!(warnings[0].contains("Invalid JSON"));
1776 }
1777
1778 #[test]
1779 fn nanopub_catches_missing_graph() {
1780 let warnings = validate_nanopub(r#"{"@context": {}}"#);
1781 assert_eq!(warnings.len(), 1);
1782 assert!(warnings[0].contains("@graph"));
1783 }
1784
1785 #[test]
1786 fn nanopub_catches_missing_type() {
1787 let doc = serde_json::json!({
1788 "@context": {},
1789 "@graph": [{"@id": "vela:test"}]
1790 });
1791 let warnings = validate_nanopub(&doc.to_string());
1792 assert!(warnings.iter().any(|w| w.contains("missing @type")));
1793 }
1794}