1use crate::bundle::FindingBundle;
4use crate::project::Project;
5use crate::repo::{self, VelaSource};
6use crate::sources;
7
8use serde::{Deserialize, Serialize};
9use serde_json::{Value, json};
10use std::collections::HashSet;
11use std::path::Path;
12
13#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
14pub struct NormalizeOptions {
15 pub dry_run: bool,
17}
18
19#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
20#[serde(rename_all = "snake_case")]
21pub enum NormalizeChangeKind {
22 EntityType,
23 EntityName,
24 DuplicateEntity,
25 FindingId,
26 LinkTarget,
27 SourceRecord,
28 EvidenceAtom,
29 ConditionRecord,
30}
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct NormalizeChange {
34 pub kind: NormalizeChangeKind,
35 pub finding_id: String,
36 pub path: String,
37 pub before: Value,
38 pub after: Value,
39 pub safe: bool,
40 pub description: String,
41}
42
43#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
44pub struct NormalizeSummary {
45 pub planned: usize,
46 pub safe: usize,
47 pub unsafe_count: usize,
48 pub applied: usize,
49}
50
51#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
52pub struct NormalizeReport {
53 pub source: String,
54 pub source_kind: String,
55 pub dry_run: bool,
56 pub refused: bool,
57 #[serde(skip_serializing_if = "Option::is_none")]
58 pub refusal_reason: Option<String>,
59 pub summary: NormalizeSummary,
60 pub changes: Vec<NormalizeChange>,
61}
62
63impl NormalizeReport {
64 fn refused(source: &Path, source_kind: &str, dry_run: bool, reason: String) -> Self {
65 Self {
66 source: source.display().to_string(),
67 source_kind: source_kind.to_string(),
68 dry_run,
69 refused: true,
70 refusal_reason: Some(reason),
71 summary: NormalizeSummary::default(),
72 changes: Vec::new(),
73 }
74 }
75}
76
77pub fn entity_type(raw: &str) -> String {
79 let t = raw.to_lowercase();
80
81 match t.as_str() {
82 "gene"
84 | "protein"
85 | "compound"
86 | "disease"
87 | "cell_type"
88 | "organism"
89 | "pathway"
90 | "assay"
91 | "anatomical_structure"
92 | "other" => t.clone(),
93 "chemical" | "chemical_class" | "chemical_family" | "chemical_compound"
95 | "chemical_group" | "drug" | "drug_class" | "metabolite" | "lipid" | "hormone"
96 | "nucleic_acid" | "nucleic acid" | "amino_acid_residue" => "compound".into(),
97 "protein_complex" | "protein complex" | "protein family" | "receptor" | "antibody"
99 | "antibody_marker" | "modified_protein" | "biomarker" => "protein".into(),
100 "gene_variant" | "genetic_variant" => "gene".into(),
102 "cell" | "cell type" => "cell_type".into(),
104 "disease_state"
106 | "pathological state"
107 | "pathological_process"
108 | "pathology"
109 | "condition" => "disease".into(),
110 "structure"
112 | "tissue"
113 | "organ system"
114 | "organ_system"
115 | "subcellular structure"
116 | "organelle"
117 | "cellular_structure"
118 | "biological_barrier"
119 | "fluid" => "anatomical_structure".into(),
120 "biological_process"
122 | "biological process"
123 | "process"
124 | "molecular process"
125 | "metabolic pathway"
126 | "physiological_process"
127 | "physiological process" => "pathway".into(),
128 "method" | "technology" | "imaging_modality" | "diagnostic tool" | "device" => {
130 "assay".into()
131 }
132 "bacterium" | "virus" | "pathogen" | "microbiome" => "organism".into(),
134 _ => "other".into(),
136 }
137}
138
139pub fn entity_name(name: &str) -> String {
141 let key = name.to_lowercase().trim().to_string();
142 let canonical = match key.as_str() {
143 "bbb" | "blood brain barrier" | "blood–brain barrier" => "blood-brain barrier",
144 "ad" | "alzheimer disease" | "alzheimer's" => "Alzheimer's disease",
145 "abeta" | "aβ" | "amyloid beta" | "a-beta" | "amyloid-β" => "amyloid-beta",
146 "apoe4" | "apoe-4" | "apolipoprotein e4" => "APOE4",
147 "pd" | "parkinson disease" => "Parkinson's disease",
148 "ros" => "reactive oxygen species",
149 "nps" | "np" => "nanoparticles",
150 "evs" => "extracellular vesicles",
151 "fus" => "focused ultrasound",
152 "tjs" | "tight junction" => "tight junctions",
153 _ => return name.to_string(),
154 };
155 canonical.to_string()
156}
157
158pub fn plan_project(frontier: &Project) -> NormalizeReport {
160 let changes = plan_project_changes(frontier);
161 report_from_changes("<loaded>", "loaded", true, false, None, changes, 0)
162}
163
164pub fn plan_project_changes(frontier: &Project) -> Vec<NormalizeChange> {
167 let mut changes = plan_findings(&frontier.findings);
168 let id_map = normalized_id_map(&frontier.findings);
169 let projection = sources::derive_projection(frontier);
170
171 for (finding_index, bundle) in frontier.findings.iter().enumerate() {
172 if let Some(new_id) = id_map.get(&bundle.id) {
173 changes.push(NormalizeChange {
174 kind: NormalizeChangeKind::FindingId,
175 finding_id: bundle.id.clone(),
176 path: format!("findings[{finding_index}].id"),
177 before: json!(bundle.id),
178 after: json!(new_id),
179 safe: true,
180 description: "Rewrite finding ID to match normalized content address".to_string(),
181 });
182 }
183
184 for (link_index, link) in bundle.links.iter().enumerate() {
185 if let Some(new_target) = id_map.get(&link.target) {
186 changes.push(NormalizeChange {
187 kind: NormalizeChangeKind::LinkTarget,
188 finding_id: bundle.id.clone(),
189 path: format!("findings[{finding_index}].links[{link_index}].target"),
190 before: json!(link.target),
191 after: json!(new_target),
192 safe: true,
193 description:
194 "Rewrite internal link target after normalized content-address update"
195 .to_string(),
196 });
197 }
198 }
199 }
200
201 let current_source_ids = frontier
202 .sources
203 .iter()
204 .map(|source| source.id.as_str())
205 .collect::<HashSet<_>>();
206 for source in &projection.sources {
207 if !current_source_ids.contains(source.id.as_str()) {
208 changes.push(NormalizeChange {
209 kind: NormalizeChangeKind::SourceRecord,
210 finding_id: source.finding_ids.first().cloned().unwrap_or_default(),
211 path: format!("sources[{}]", source.id),
212 before: Value::Null,
213 after: json!(source),
214 safe: true,
215 description: "Materialize derived source record from finding provenance"
216 .to_string(),
217 });
218 }
219 }
220
221 let current_atom_ids = frontier
222 .evidence_atoms
223 .iter()
224 .map(|atom| atom.id.as_str())
225 .collect::<HashSet<_>>();
226 for atom in &projection.evidence_atoms {
227 if !current_atom_ids.contains(atom.id.as_str()) {
228 changes.push(NormalizeChange {
229 kind: NormalizeChangeKind::EvidenceAtom,
230 finding_id: atom.finding_id.clone(),
231 path: format!("evidence_atoms[{}]", atom.id),
232 before: Value::Null,
233 after: json!(atom),
234 safe: true,
235 description:
236 "Materialize derived evidence atom linking source, evidence, and finding"
237 .to_string(),
238 });
239 }
240 }
241
242 let current_condition_ids = frontier
243 .condition_records
244 .iter()
245 .map(|record| record.id.as_str())
246 .collect::<HashSet<_>>();
247 for record in &projection.condition_records {
248 if !current_condition_ids.contains(record.id.as_str()) {
249 changes.push(NormalizeChange {
250 kind: NormalizeChangeKind::ConditionRecord,
251 finding_id: record.finding_id.clone(),
252 path: format!("condition_records[{}]", record.id),
253 before: Value::Null,
254 after: json!(record),
255 safe: true,
256 description:
257 "Materialize derived condition boundary used for review and proof checks"
258 .to_string(),
259 });
260 }
261 }
262
263 changes
264}
265
266pub fn plan_findings(bundles: &[FindingBundle]) -> Vec<NormalizeChange> {
268 let mut changes = Vec::new();
269
270 for (finding_index, bundle) in bundles.iter().enumerate() {
271 let mut seen = HashSet::new();
272
273 for (entity_index, entity) in bundle.assertion.entities.iter().enumerate() {
274 let normalized_name = entity_name(&entity.name);
275 let normalized_type = entity_type(&entity.entity_type);
276 let dedupe_key = (normalized_name.to_lowercase(), normalized_type.clone());
277 let entity_path =
278 format!("findings[{finding_index}].assertion.entities[{entity_index}]");
279
280 if !seen.insert(dedupe_key) {
281 changes.push(NormalizeChange {
282 kind: NormalizeChangeKind::DuplicateEntity,
283 finding_id: bundle.id.clone(),
284 path: entity_path,
285 before: json!({
286 "name": entity.name,
287 "type": entity.entity_type,
288 }),
289 after: Value::Null,
290 safe: true,
291 description: "Remove duplicate entity after canonical name/type normalization"
292 .to_string(),
293 });
294 continue;
295 }
296
297 if normalized_type != entity.entity_type {
298 changes.push(NormalizeChange {
299 kind: NormalizeChangeKind::EntityType,
300 finding_id: bundle.id.clone(),
301 path: format!("{entity_path}.type"),
302 before: json!(entity.entity_type),
303 after: json!(normalized_type),
304 safe: true,
305 description: "Map entity type to the finding-bundle schema vocabulary"
306 .to_string(),
307 });
308 }
309
310 if normalized_name != entity.name {
311 changes.push(NormalizeChange {
312 kind: NormalizeChangeKind::EntityName,
313 finding_id: bundle.id.clone(),
314 path: format!("{entity_path}.name"),
315 before: json!(entity.name),
316 after: json!(normalized_name),
317 safe: true,
318 description: "Map common biomedical alias to canonical display name"
319 .to_string(),
320 });
321 }
322 }
323 }
324
325 changes
326}
327
328pub fn plan_source(source_path: &Path) -> Result<NormalizeReport, String> {
330 normalize_source(source_path, NormalizeOptions { dry_run: true })
331}
332
333pub fn apply_source(source_path: &Path) -> Result<NormalizeReport, String> {
338 normalize_source(source_path, NormalizeOptions { dry_run: false })
339}
340
341pub fn normalize_source(
343 source_path: &Path,
344 options: NormalizeOptions,
345) -> Result<NormalizeReport, String> {
346 let source = repo::detect(source_path)?;
347 let source_kind = source_kind(&source);
348
349 if matches!(source, VelaSource::PacketDir(_)) && !options.dry_run {
350 return Ok(NormalizeReport::refused(
351 source_path,
352 source_kind,
353 options.dry_run,
354 "Refusing to normalize a frontier packet directory in place; export a new packet from a normalized frontier instead.".to_string(),
355 ));
356 }
357
358 let mut frontier = repo::load(&source)?;
359 let changes = plan_project_changes(&frontier);
360 let applied = if options.dry_run {
361 0
362 } else {
363 apply_project_safe_normalizations(&mut frontier)?;
364 repo::save(&source, &frontier)?;
365 changes.iter().filter(|c| c.safe).count()
366 };
367
368 Ok(report_from_changes(
369 &source_path.display().to_string(),
370 source_kind,
371 options.dry_run,
372 false,
373 None,
374 changes,
375 applied,
376 ))
377}
378
379fn report_from_changes(
380 source: &str,
381 source_kind: &str,
382 dry_run: bool,
383 refused: bool,
384 refusal_reason: Option<String>,
385 changes: Vec<NormalizeChange>,
386 applied: usize,
387) -> NormalizeReport {
388 let safe = changes.iter().filter(|c| c.safe).count();
389 let unsafe_count = changes.len().saturating_sub(safe);
390 NormalizeReport {
391 source: source.to_string(),
392 source_kind: source_kind.to_string(),
393 dry_run,
394 refused,
395 refusal_reason,
396 summary: NormalizeSummary {
397 planned: changes.len(),
398 safe,
399 unsafe_count,
400 applied,
401 },
402 changes,
403 }
404}
405
406fn source_kind(source: &VelaSource) -> &'static str {
407 match source {
408 VelaSource::ProjectFile(_) => "project_file",
409 VelaSource::VelaRepo(_) => "vela_repo",
410 VelaSource::PacketDir(_) => "packet_dir",
411 }
412}
413
414fn apply_project_safe_normalizations(frontier: &mut Project) -> Result<usize, String> {
415 let planned = plan_project_changes(frontier)
416 .into_iter()
417 .filter(|change| change.safe)
418 .count();
419
420 normalize_bundle_entities(&mut frontier.findings);
421 rewrite_content_ids(&mut frontier.findings)?;
422 sources::materialize_project(frontier);
423
424 Ok(planned)
425}
426
427fn normalize_bundle_entities(bundles: &mut [FindingBundle]) {
428 for bundle in bundles.iter_mut() {
429 for entity in bundle.assertion.entities.iter_mut() {
430 entity.entity_type = entity_type(&entity.entity_type);
431 entity.name = entity_name(&entity.name);
432 }
433
434 let mut seen = HashSet::new();
435 bundle.assertion.entities.retain(|entity| {
436 let key = (entity.name.to_lowercase(), entity.entity_type.clone());
437 seen.insert(key)
438 });
439 }
440}
441
442fn normalized_id_map(bundles: &[FindingBundle]) -> std::collections::BTreeMap<String, String> {
443 let mut id_map = std::collections::BTreeMap::new();
444 for bundle in bundles {
445 let mut normalized = bundle.clone();
446 normalize_bundle_entities(std::slice::from_mut(&mut normalized));
447 let expected =
448 FindingBundle::content_address(&normalized.assertion, &normalized.provenance);
449 if expected != bundle.id {
450 id_map.insert(bundle.id.clone(), expected);
451 }
452 }
453 id_map
454}
455
456fn rewrite_content_ids(bundles: &mut [FindingBundle]) -> Result<(), String> {
457 let mut id_map = std::collections::BTreeMap::new();
458 let mut final_ids = HashSet::new();
459
460 for bundle in bundles.iter() {
461 let expected = FindingBundle::content_address(&bundle.assertion, &bundle.provenance);
462 if !final_ids.insert(expected.clone()) {
463 return Err(format!(
464 "Refusing to rewrite IDs because normalized content address '{}' is duplicated",
465 expected
466 ));
467 }
468 if expected != bundle.id {
469 id_map.insert(bundle.id.clone(), expected);
470 }
471 }
472
473 for bundle in bundles.iter_mut() {
474 if let Some(new_id) = id_map.get(&bundle.id) {
475 bundle.id = new_id.clone();
476 }
477 for link in &mut bundle.links {
478 if let Some(new_target) = id_map.get(&link.target) {
479 link.target = new_target.clone();
480 }
481 }
482 }
483
484 Ok(())
485}
486
487pub fn normalize_findings(bundles: &mut [FindingBundle]) -> (usize, usize) {
489 let mut type_fixes = 0usize;
490 let mut name_fixes = 0usize;
491
492 for b in bundles.iter_mut() {
493 for e in b.assertion.entities.iter_mut() {
494 let new_type = entity_type(&e.entity_type);
495 if new_type != e.entity_type {
496 e.entity_type = new_type;
497 type_fixes += 1;
498 }
499
500 let new_name = entity_name(&e.name);
501 if new_name != e.name {
502 e.name = new_name;
503 name_fixes += 1;
504 }
505 }
506
507 let mut seen = std::collections::HashSet::new();
509 b.assertion.entities.retain(|e| {
510 let key = (e.name.to_lowercase(), e.entity_type.clone());
511 seen.insert(key)
512 });
513 }
514
515 (type_fixes, name_fixes)
516}
517
518#[cfg(test)]
519mod tests {
520 use super::*;
521 use crate::bundle::*;
522
523 fn make_entity(name: &str, etype: &str) -> Entity {
524 Entity {
525 name: name.into(),
526 entity_type: etype.into(),
527 identifiers: serde_json::Map::new(),
528 canonical_id: None,
529 candidates: vec![],
530 aliases: vec![],
531 resolution_provenance: None,
532 resolution_confidence: 1.0,
533 resolution_method: None,
534 species_context: None,
535 needs_review: false,
536 }
537 }
538
539 fn make_finding_with_entities(entities: Vec<Entity>) -> FindingBundle {
540 FindingBundle {
541 id: "test".into(),
542 version: 1,
543 previous_version: None,
544 assertion: Assertion {
545 text: "Test".into(),
546 assertion_type: "mechanism".into(),
547 entities,
548 relation: None,
549 direction: None,
550 causal_claim: None,
551 causal_evidence_grade: None,
552 },
553 evidence: Evidence {
554 evidence_type: "experimental".into(),
555 model_system: String::new(),
556 species: None,
557 method: String::new(),
558 sample_size: None,
559 effect_size: None,
560 p_value: None,
561 replicated: false,
562 replication_count: None,
563 evidence_spans: vec![],
564 },
565 conditions: Conditions {
566 text: String::new(),
567 species_verified: vec![],
568 species_unverified: vec![],
569 in_vitro: false,
570 in_vivo: false,
571 human_data: false,
572 clinical_trial: false,
573 concentration_range: None,
574 duration: None,
575 age_group: None,
576 cell_type: None,
577 },
578 confidence: Confidence::raw(0.8, "seeded prior", 0.85),
579 provenance: Provenance {
580 source_type: "published_paper".into(),
581 doi: None,
582 pmid: None,
583 pmc: None,
584 openalex_id: None,
585 url: None,
586 title: "Test".into(),
587 authors: vec![],
588 year: Some(2024),
589 journal: None,
590 license: None,
591 publisher: None,
592 funders: vec![],
593 extraction: Extraction::default(),
594 review: None,
595 citation_count: None,
596 },
597 flags: Flags {
598 gap: false,
599 negative_space: false,
600 contested: false,
601 retracted: false,
602 declining: false,
603 gravity_well: false,
604 review_state: None,
605 superseded: false,
606 signature_threshold: None,
607 jointly_accepted: false,
608 },
609 links: vec![],
610 annotations: vec![],
611 attachments: vec![],
612 created: String::new(),
613 updated: None,
614
615 access_tier: crate::access_tier::AccessTier::Public,
616 }
617 }
618
619 #[test]
622 fn valid_types_pass_through() {
623 for t in &[
624 "gene",
625 "protein",
626 "compound",
627 "disease",
628 "cell_type",
629 "organism",
630 "pathway",
631 "assay",
632 "anatomical_structure",
633 "other",
634 ] {
635 assert_eq!(entity_type(t), *t);
636 }
637 }
638
639 #[test]
640 fn gene_variants_map_to_gene() {
641 assert_eq!(entity_type("gene_variant"), "gene");
642 assert_eq!(entity_type("genetic_variant"), "gene");
643 }
644
645 #[test]
646 fn drug_maps_to_compound() {
647 for t in &[
648 "drug",
649 "chemical",
650 "metabolite",
651 "lipid",
652 "hormone",
653 "drug_class",
654 ] {
655 assert_eq!(entity_type(t), "compound", "expected compound for {t}");
656 }
657 }
658
659 #[test]
660 fn protein_complex_maps_to_protein() {
661 for t in &["protein_complex", "receptor", "antibody", "biomarker"] {
662 assert_eq!(entity_type(t), "protein", "expected protein for {t}");
663 }
664 }
665
666 #[test]
667 fn cell_variants_map_to_cell_type() {
668 assert_eq!(entity_type("cell"), "cell_type");
669 assert_eq!(entity_type("cell type"), "cell_type");
670 }
671
672 #[test]
673 fn disease_variants_map_to_disease() {
674 for t in &["condition", "pathology", "disease_state"] {
675 assert_eq!(entity_type(t), "disease", "expected disease for {t}");
676 }
677 }
678
679 #[test]
680 fn anatomical_variants() {
681 for t in &[
682 "tissue",
683 "organ system",
684 "organelle",
685 "biological_barrier",
686 "fluid",
687 ] {
688 assert_eq!(
689 entity_type(t),
690 "anatomical_structure",
691 "expected anatomical_structure for {t}"
692 );
693 }
694 }
695
696 #[test]
697 fn pathway_variants() {
698 for t in &["biological_process", "process", "metabolic pathway"] {
699 assert_eq!(entity_type(t), "pathway", "expected pathway for {t}");
700 }
701 }
702
703 #[test]
704 fn assay_variants() {
705 for t in &["method", "technology", "device"] {
706 assert_eq!(entity_type(t), "assay", "expected assay for {t}");
707 }
708 }
709
710 #[test]
711 fn organism_variants() {
712 for t in &["bacterium", "virus", "pathogen", "microbiome"] {
713 assert_eq!(entity_type(t), "organism", "expected organism for {t}");
714 }
715 }
716
717 #[test]
718 fn unknown_type_maps_to_other() {
719 assert_eq!(entity_type("banana"), "other");
720 assert_eq!(entity_type("foobar"), "other");
721 assert_eq!(entity_type(""), "other");
722 }
723
724 #[test]
725 fn case_insensitive_type_mapping() {
726 assert_eq!(entity_type("Gene"), "gene");
727 assert_eq!(entity_type("DRUG"), "compound");
728 assert_eq!(entity_type("Protein_Complex"), "protein");
729 assert_eq!(entity_type("CELL"), "cell_type");
730 }
731
732 #[test]
735 fn bbb_normalizes() {
736 assert_eq!(entity_name("BBB"), "blood-brain barrier");
737 assert_eq!(entity_name("blood brain barrier"), "blood-brain barrier");
738 assert_eq!(entity_name("blood–brain barrier"), "blood-brain barrier");
739 }
740
741 #[test]
742 fn ad_normalizes() {
743 assert_eq!(entity_name("AD"), "Alzheimer's disease");
744 assert_eq!(entity_name("alzheimer's"), "Alzheimer's disease");
745 assert_eq!(entity_name("alzheimer disease"), "Alzheimer's disease");
746 }
747
748 #[test]
749 fn amyloid_beta_normalizes() {
750 assert_eq!(entity_name("Abeta"), "amyloid-beta");
751 assert_eq!(entity_name("a-beta"), "amyloid-beta");
752 assert_eq!(entity_name("amyloid beta"), "amyloid-beta");
753 }
754
755 #[test]
756 fn apoe4_normalizes() {
757 assert_eq!(entity_name("APOE4"), "APOE4");
758 assert_eq!(entity_name("apoe-4"), "APOE4");
759 assert_eq!(entity_name("apolipoprotein e4"), "APOE4");
760 }
761
762 #[test]
763 fn unknown_name_unchanged() {
764 assert_eq!(entity_name("NLRP3"), "NLRP3");
765 assert_eq!(entity_name("some random entity"), "some random entity");
766 }
767
768 #[test]
769 fn pd_normalizes() {
770 assert_eq!(entity_name("PD"), "Parkinson's disease");
771 assert_eq!(entity_name("parkinson disease"), "Parkinson's disease");
772 }
773
774 #[test]
775 fn ros_and_others() {
776 assert_eq!(entity_name("ROS"), "reactive oxygen species");
777 assert_eq!(entity_name("NPs"), "nanoparticles");
778 assert_eq!(entity_name("EVs"), "extracellular vesicles");
779 assert_eq!(entity_name("FUS"), "focused ultrasound");
780 assert_eq!(entity_name("TJs"), "tight junctions");
781 }
782
783 #[test]
786 fn normalize_fixes_types_and_names() {
787 let mut bundles = vec![make_finding_with_entities(vec![
788 make_entity("BBB", "biological_barrier"),
789 make_entity("AD", "condition"),
790 ])];
791 let (type_fixes, name_fixes) = normalize_findings(&mut bundles);
792 assert_eq!(type_fixes, 2);
793 assert_eq!(name_fixes, 2);
794 assert_eq!(bundles[0].assertion.entities[0].name, "blood-brain barrier");
795 assert_eq!(
796 bundles[0].assertion.entities[0].entity_type,
797 "anatomical_structure"
798 );
799 assert_eq!(bundles[0].assertion.entities[1].name, "Alzheimer's disease");
800 assert_eq!(bundles[0].assertion.entities[1].entity_type, "disease");
801 }
802
803 #[test]
804 fn deduplication_removes_duplicate_entities() {
805 let mut bundles = vec![make_finding_with_entities(vec![
806 make_entity("NLRP3", "protein"),
807 make_entity("nlrp3", "protein"), make_entity("NLRP3", "gene"), ])];
810 let (_tf, _nf) = normalize_findings(&mut bundles);
811 assert_eq!(bundles[0].assertion.entities.len(), 2); }
813
814 #[test]
815 fn normalize_no_changes_returns_zero() {
816 let mut bundles = vec![make_finding_with_entities(vec![make_entity(
817 "NLRP3", "protein",
818 )])];
819 let (type_fixes, name_fixes) = normalize_findings(&mut bundles);
820 assert_eq!(type_fixes, 0);
821 assert_eq!(name_fixes, 0);
822 }
823
824 #[test]
825 fn normalize_empty_bundles() {
826 let mut bundles: Vec<FindingBundle> = vec![];
827 let (tf, nf) = normalize_findings(&mut bundles);
828 assert_eq!(tf, 0);
829 assert_eq!(nf, 0);
830 }
831
832 #[test]
833 fn plan_findings_reports_safe_entity_repairs() {
834 let bundles = vec![make_finding_with_entities(vec![
835 make_entity("BBB", "biological_barrier"),
836 make_entity("blood-brain barrier", "anatomical_structure"),
837 ])];
838
839 let plan = plan_findings(&bundles);
840
841 assert!(
842 plan.iter()
843 .any(|change| change.kind == NormalizeChangeKind::EntityType)
844 );
845 assert!(
846 plan.iter()
847 .any(|change| change.kind == NormalizeChangeKind::EntityName)
848 );
849 assert!(
850 plan.iter()
851 .any(|change| change.kind == NormalizeChangeKind::DuplicateEntity)
852 );
853 assert!(plan.iter().all(|change| change.safe));
854 }
855
856 #[test]
857 fn source_dry_run_does_not_write() {
858 let tmp = tempfile::TempDir::new().unwrap();
859 let path = tmp.path().join("frontier.json");
860 let frontier = crate::project::assemble(
861 "test",
862 vec![make_finding_with_entities(vec![make_entity(
863 "BBB",
864 "biological_barrier",
865 )])],
866 1,
867 0,
868 "test",
869 );
870 std::fs::write(&path, serde_json::to_string_pretty(&frontier).unwrap()).unwrap();
871
872 let report = plan_source(&path).unwrap();
873 let saved: crate::project::Project =
874 serde_json::from_str(&std::fs::read_to_string(&path).unwrap()).unwrap();
875
876 assert!(report.dry_run);
877 assert_eq!(report.summary.applied, 0);
878 assert_eq!(
879 saved.findings[0].assertion.entities[0].entity_type,
880 "biological_barrier"
881 );
882 }
883
884 #[test]
885 fn source_apply_writes_safe_repairs() {
886 let tmp = tempfile::TempDir::new().unwrap();
887 let path = tmp.path().join("frontier.json");
888 let frontier = crate::project::assemble(
889 "test",
890 vec![make_finding_with_entities(vec![make_entity(
891 "BBB",
892 "biological_barrier",
893 )])],
894 1,
895 0,
896 "test",
897 );
898 std::fs::write(&path, serde_json::to_string_pretty(&frontier).unwrap()).unwrap();
899
900 let report = apply_source(&path).unwrap();
901 let saved: crate::project::Project =
902 serde_json::from_str(&std::fs::read_to_string(&path).unwrap()).unwrap();
903
904 assert!(!report.dry_run);
905 assert_eq!(report.summary.applied, report.summary.safe);
906 assert_eq!(
907 saved.findings[0].assertion.entities[0].entity_type,
908 "anatomical_structure"
909 );
910 assert_eq!(
911 saved.findings[0].assertion.entities[0].name,
912 "blood-brain barrier"
913 );
914 assert_eq!(
915 saved.findings[0].id,
916 FindingBundle::content_address(
917 &saved.findings[0].assertion,
918 &saved.findings[0].provenance,
919 )
920 );
921 }
922
923 #[test]
924 fn source_apply_refuses_packet_directory() {
925 let tmp = tempfile::TempDir::new().unwrap();
926 std::fs::write(
927 tmp.path().join("manifest.json"),
928 r#"{"packet_format":"vela.frontier-packet"}"#,
929 )
930 .unwrap();
931
932 let report = apply_source(tmp.path()).unwrap();
933
934 assert!(report.refused);
935 assert_eq!(report.source_kind, "packet_dir");
936 assert!(report.refusal_reason.unwrap().contains("Refusing"));
937 }
938}