Skip to main content

vela_protocol/
normalize.rs

1//! Stage 3: NORMALIZE — deduplicate entities, constrain types to schema.
2
3use crate::bundle::FindingBundle;
4use crate::project::Project;
5use crate::repo::{self, VelaSource};
6use crate::sources;
7
8use serde::{Deserialize, Serialize};
9use serde_json::{Value, json};
10use std::collections::HashSet;
11use std::path::Path;
12
13#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
14pub struct NormalizeOptions {
15    /// When true, compute the same deterministic plan without writing changes.
16    pub dry_run: bool,
17}
18
19#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
20#[serde(rename_all = "snake_case")]
21pub enum NormalizeChangeKind {
22    EntityType,
23    EntityName,
24    DuplicateEntity,
25    FindingId,
26    LinkTarget,
27    SourceRecord,
28    EvidenceAtom,
29    ConditionRecord,
30}
31
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct NormalizeChange {
34    pub kind: NormalizeChangeKind,
35    pub finding_id: String,
36    pub path: String,
37    pub before: Value,
38    pub after: Value,
39    pub safe: bool,
40    pub description: String,
41}
42
43#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
44pub struct NormalizeSummary {
45    pub planned: usize,
46    pub safe: usize,
47    pub unsafe_count: usize,
48    pub applied: usize,
49}
50
51#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
52pub struct NormalizeReport {
53    pub source: String,
54    pub source_kind: String,
55    pub dry_run: bool,
56    pub refused: bool,
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub refusal_reason: Option<String>,
59    pub summary: NormalizeSummary,
60    pub changes: Vec<NormalizeChange>,
61}
62
63impl NormalizeReport {
64    fn refused(source: &Path, source_kind: &str, dry_run: bool, reason: String) -> Self {
65        Self {
66            source: source.display().to_string(),
67            source_kind: source_kind.to_string(),
68            dry_run,
69            refused: true,
70            refusal_reason: Some(reason),
71            summary: NormalizeSummary::default(),
72            changes: Vec::new(),
73        }
74    }
75}
76
77/// Map LLM-invented entity types to the 10 valid schema types.
78pub fn entity_type(raw: &str) -> String {
79    let t = raw.to_lowercase();
80
81    match t.as_str() {
82        // Already valid
83        "gene"
84        | "protein"
85        | "compound"
86        | "disease"
87        | "cell_type"
88        | "organism"
89        | "pathway"
90        | "assay"
91        | "anatomical_structure"
92        | "other" => t.clone(),
93        // Compounds
94        "chemical" | "chemical_class" | "chemical_family" | "chemical_compound"
95        | "chemical_group" | "drug" | "drug_class" | "metabolite" | "lipid" | "hormone"
96        | "nucleic_acid" | "nucleic acid" | "amino_acid_residue" => "compound".into(),
97        // Proteins
98        "protein_complex" | "protein complex" | "protein family" | "receptor" | "antibody"
99        | "antibody_marker" | "modified_protein" | "biomarker" => "protein".into(),
100        // Genes
101        "gene_variant" | "genetic_variant" => "gene".into(),
102        // Cell types
103        "cell" | "cell type" => "cell_type".into(),
104        // Diseases
105        "disease_state"
106        | "pathological state"
107        | "pathological_process"
108        | "pathology"
109        | "condition" => "disease".into(),
110        // Anatomical
111        "structure"
112        | "tissue"
113        | "organ system"
114        | "organ_system"
115        | "subcellular structure"
116        | "organelle"
117        | "cellular_structure"
118        | "biological_barrier"
119        | "fluid" => "anatomical_structure".into(),
120        // Pathways
121        "biological_process"
122        | "biological process"
123        | "process"
124        | "molecular process"
125        | "metabolic pathway"
126        | "physiological_process"
127        | "physiological process" => "pathway".into(),
128        // Assays
129        "method" | "technology" | "imaging_modality" | "diagnostic tool" | "device" => {
130            "assay".into()
131        }
132        // Organisms
133        "bacterium" | "virus" | "pathogen" | "microbiome" => "organism".into(),
134        // Everything else
135        _ => "other".into(),
136    }
137}
138
139/// Normalize entity name to canonical form.
140pub fn entity_name(name: &str) -> String {
141    let key = name.to_lowercase().trim().to_string();
142    let canonical = match key.as_str() {
143        "bbb" | "blood brain barrier" | "blood–brain barrier" => "blood-brain barrier",
144        "ad" | "alzheimer disease" | "alzheimer's" => "Alzheimer's disease",
145        "abeta" | "aβ" | "amyloid beta" | "a-beta" | "amyloid-β" => "amyloid-beta",
146        "apoe4" | "apoe-4" | "apolipoprotein e4" => "APOE4",
147        "pd" | "parkinson disease" => "Parkinson's disease",
148        "ros" => "reactive oxygen species",
149        "nps" | "np" => "nanoparticles",
150        "evs" => "extracellular vesicles",
151        "fus" => "focused ultrasound",
152        "tjs" | "tight junction" => "tight junctions",
153        _ => return name.to_string(),
154    };
155    canonical.to_string()
156}
157
158/// Build a deterministic, safe repair plan for a loaded frontier.
159pub fn plan_project(frontier: &Project) -> NormalizeReport {
160    let changes = plan_project_changes(frontier);
161    report_from_changes("<loaded>", "loaded", true, false, None, changes, 0)
162}
163
164/// Build a deterministic repair plan for a loaded frontier, including content
165/// address and internal link updates implied by entity normalization.
166pub fn plan_project_changes(frontier: &Project) -> Vec<NormalizeChange> {
167    let mut changes = plan_findings(&frontier.findings);
168    let id_map = normalized_id_map(&frontier.findings);
169    let projection = sources::derive_projection(frontier);
170
171    for (finding_index, bundle) in frontier.findings.iter().enumerate() {
172        if let Some(new_id) = id_map.get(&bundle.id) {
173            changes.push(NormalizeChange {
174                kind: NormalizeChangeKind::FindingId,
175                finding_id: bundle.id.clone(),
176                path: format!("findings[{finding_index}].id"),
177                before: json!(bundle.id),
178                after: json!(new_id),
179                safe: true,
180                description: "Rewrite finding ID to match normalized content address".to_string(),
181            });
182        }
183
184        for (link_index, link) in bundle.links.iter().enumerate() {
185            if let Some(new_target) = id_map.get(&link.target) {
186                changes.push(NormalizeChange {
187                    kind: NormalizeChangeKind::LinkTarget,
188                    finding_id: bundle.id.clone(),
189                    path: format!("findings[{finding_index}].links[{link_index}].target"),
190                    before: json!(link.target),
191                    after: json!(new_target),
192                    safe: true,
193                    description:
194                        "Rewrite internal link target after normalized content-address update"
195                            .to_string(),
196                });
197            }
198        }
199    }
200
201    let current_source_ids = frontier
202        .sources
203        .iter()
204        .map(|source| source.id.as_str())
205        .collect::<HashSet<_>>();
206    for source in &projection.sources {
207        if !current_source_ids.contains(source.id.as_str()) {
208            changes.push(NormalizeChange {
209                kind: NormalizeChangeKind::SourceRecord,
210                finding_id: source.finding_ids.first().cloned().unwrap_or_default(),
211                path: format!("sources[{}]", source.id),
212                before: Value::Null,
213                after: json!(source),
214                safe: true,
215                description: "Materialize derived source record from finding provenance"
216                    .to_string(),
217            });
218        }
219    }
220
221    let current_atom_ids = frontier
222        .evidence_atoms
223        .iter()
224        .map(|atom| atom.id.as_str())
225        .collect::<HashSet<_>>();
226    for atom in &projection.evidence_atoms {
227        if !current_atom_ids.contains(atom.id.as_str()) {
228            changes.push(NormalizeChange {
229                kind: NormalizeChangeKind::EvidenceAtom,
230                finding_id: atom.finding_id.clone(),
231                path: format!("evidence_atoms[{}]", atom.id),
232                before: Value::Null,
233                after: json!(atom),
234                safe: true,
235                description:
236                    "Materialize derived evidence atom linking source, evidence, and finding"
237                        .to_string(),
238            });
239        }
240    }
241
242    let current_condition_ids = frontier
243        .condition_records
244        .iter()
245        .map(|record| record.id.as_str())
246        .collect::<HashSet<_>>();
247    for record in &projection.condition_records {
248        if !current_condition_ids.contains(record.id.as_str()) {
249            changes.push(NormalizeChange {
250                kind: NormalizeChangeKind::ConditionRecord,
251                finding_id: record.finding_id.clone(),
252                path: format!("condition_records[{}]", record.id),
253                before: Value::Null,
254                after: json!(record),
255                safe: true,
256                description:
257                    "Materialize derived condition boundary used for review and proof checks"
258                        .to_string(),
259            });
260        }
261    }
262
263    changes
264}
265
266/// Build a deterministic, safe repair plan for findings.
267pub fn plan_findings(bundles: &[FindingBundle]) -> Vec<NormalizeChange> {
268    let mut changes = Vec::new();
269
270    for (finding_index, bundle) in bundles.iter().enumerate() {
271        let mut seen = HashSet::new();
272
273        for (entity_index, entity) in bundle.assertion.entities.iter().enumerate() {
274            let normalized_name = entity_name(&entity.name);
275            let normalized_type = entity_type(&entity.entity_type);
276            let dedupe_key = (normalized_name.to_lowercase(), normalized_type.clone());
277            let entity_path =
278                format!("findings[{finding_index}].assertion.entities[{entity_index}]");
279
280            if !seen.insert(dedupe_key) {
281                changes.push(NormalizeChange {
282                    kind: NormalizeChangeKind::DuplicateEntity,
283                    finding_id: bundle.id.clone(),
284                    path: entity_path,
285                    before: json!({
286                        "name": entity.name,
287                        "type": entity.entity_type,
288                    }),
289                    after: Value::Null,
290                    safe: true,
291                    description: "Remove duplicate entity after canonical name/type normalization"
292                        .to_string(),
293                });
294                continue;
295            }
296
297            if normalized_type != entity.entity_type {
298                changes.push(NormalizeChange {
299                    kind: NormalizeChangeKind::EntityType,
300                    finding_id: bundle.id.clone(),
301                    path: format!("{entity_path}.type"),
302                    before: json!(entity.entity_type),
303                    after: json!(normalized_type),
304                    safe: true,
305                    description: "Map entity type to the finding-bundle schema vocabulary"
306                        .to_string(),
307                });
308            }
309
310            if normalized_name != entity.name {
311                changes.push(NormalizeChange {
312                    kind: NormalizeChangeKind::EntityName,
313                    finding_id: bundle.id.clone(),
314                    path: format!("{entity_path}.name"),
315                    before: json!(entity.name),
316                    after: json!(normalized_name),
317                    safe: true,
318                    description: "Map common biomedical alias to canonical display name"
319                        .to_string(),
320                });
321            }
322        }
323    }
324
325    changes
326}
327
328/// Plan normalization for a source path without writing changes.
329pub fn plan_source(source_path: &Path) -> Result<NormalizeReport, String> {
330    normalize_source(source_path, NormalizeOptions { dry_run: true })
331}
332
333/// Apply safe normalization repairs to a source path.
334///
335/// Packet directories are immutable review artifacts; applying directly to one
336/// returns a refused report instead of writing into the packet.
337pub fn apply_source(source_path: &Path) -> Result<NormalizeReport, String> {
338    normalize_source(source_path, NormalizeOptions { dry_run: false })
339}
340
341/// Plan or apply normalization for a source path.
342pub fn normalize_source(
343    source_path: &Path,
344    options: NormalizeOptions,
345) -> Result<NormalizeReport, String> {
346    let source = repo::detect(source_path)?;
347    let source_kind = source_kind(&source);
348
349    if matches!(source, VelaSource::PacketDir(_)) && !options.dry_run {
350        return Ok(NormalizeReport::refused(
351            source_path,
352            source_kind,
353            options.dry_run,
354            "Refusing to normalize a frontier packet directory in place; export a new packet from a normalized frontier instead.".to_string(),
355        ));
356    }
357
358    let mut frontier = repo::load(&source)?;
359    let changes = plan_project_changes(&frontier);
360    let applied = if options.dry_run {
361        0
362    } else {
363        apply_project_safe_normalizations(&mut frontier)?;
364        repo::save(&source, &frontier)?;
365        changes.iter().filter(|c| c.safe).count()
366    };
367
368    Ok(report_from_changes(
369        &source_path.display().to_string(),
370        source_kind,
371        options.dry_run,
372        false,
373        None,
374        changes,
375        applied,
376    ))
377}
378
379fn report_from_changes(
380    source: &str,
381    source_kind: &str,
382    dry_run: bool,
383    refused: bool,
384    refusal_reason: Option<String>,
385    changes: Vec<NormalizeChange>,
386    applied: usize,
387) -> NormalizeReport {
388    let safe = changes.iter().filter(|c| c.safe).count();
389    let unsafe_count = changes.len().saturating_sub(safe);
390    NormalizeReport {
391        source: source.to_string(),
392        source_kind: source_kind.to_string(),
393        dry_run,
394        refused,
395        refusal_reason,
396        summary: NormalizeSummary {
397            planned: changes.len(),
398            safe,
399            unsafe_count,
400            applied,
401        },
402        changes,
403    }
404}
405
406fn source_kind(source: &VelaSource) -> &'static str {
407    match source {
408        VelaSource::ProjectFile(_) => "project_file",
409        VelaSource::VelaRepo(_) => "vela_repo",
410        VelaSource::PacketDir(_) => "packet_dir",
411    }
412}
413
414fn apply_project_safe_normalizations(frontier: &mut Project) -> Result<usize, String> {
415    let planned = plan_project_changes(frontier)
416        .into_iter()
417        .filter(|change| change.safe)
418        .count();
419
420    normalize_bundle_entities(&mut frontier.findings);
421    rewrite_content_ids(&mut frontier.findings)?;
422    sources::materialize_project(frontier);
423
424    Ok(planned)
425}
426
427fn normalize_bundle_entities(bundles: &mut [FindingBundle]) {
428    for bundle in bundles.iter_mut() {
429        for entity in bundle.assertion.entities.iter_mut() {
430            entity.entity_type = entity_type(&entity.entity_type);
431            entity.name = entity_name(&entity.name);
432        }
433
434        let mut seen = HashSet::new();
435        bundle.assertion.entities.retain(|entity| {
436            let key = (entity.name.to_lowercase(), entity.entity_type.clone());
437            seen.insert(key)
438        });
439    }
440}
441
442fn normalized_id_map(bundles: &[FindingBundle]) -> std::collections::BTreeMap<String, String> {
443    let mut id_map = std::collections::BTreeMap::new();
444    for bundle in bundles {
445        let mut normalized = bundle.clone();
446        normalize_bundle_entities(std::slice::from_mut(&mut normalized));
447        let expected =
448            FindingBundle::content_address(&normalized.assertion, &normalized.provenance);
449        if expected != bundle.id {
450            id_map.insert(bundle.id.clone(), expected);
451        }
452    }
453    id_map
454}
455
456fn rewrite_content_ids(bundles: &mut [FindingBundle]) -> Result<(), String> {
457    let mut id_map = std::collections::BTreeMap::new();
458    let mut final_ids = HashSet::new();
459
460    for bundle in bundles.iter() {
461        let expected = FindingBundle::content_address(&bundle.assertion, &bundle.provenance);
462        if !final_ids.insert(expected.clone()) {
463            return Err(format!(
464                "Refusing to rewrite IDs because normalized content address '{}' is duplicated",
465                expected
466            ));
467        }
468        if expected != bundle.id {
469            id_map.insert(bundle.id.clone(), expected);
470        }
471    }
472
473    for bundle in bundles.iter_mut() {
474        if let Some(new_id) = id_map.get(&bundle.id) {
475            bundle.id = new_id.clone();
476        }
477        for link in &mut bundle.links {
478            if let Some(new_target) = id_map.get(&link.target) {
479                link.target = new_target.clone();
480            }
481        }
482    }
483
484    Ok(())
485}
486
487/// Normalize all findings: fix entity types and names, deduplicate entities within findings.
488pub fn normalize_findings(bundles: &mut [FindingBundle]) -> (usize, usize) {
489    let mut type_fixes = 0usize;
490    let mut name_fixes = 0usize;
491
492    for b in bundles.iter_mut() {
493        for e in b.assertion.entities.iter_mut() {
494            let new_type = entity_type(&e.entity_type);
495            if new_type != e.entity_type {
496                e.entity_type = new_type;
497                type_fixes += 1;
498            }
499
500            let new_name = entity_name(&e.name);
501            if new_name != e.name {
502                e.name = new_name;
503                name_fixes += 1;
504            }
505        }
506
507        // Deduplicate entities
508        let mut seen = std::collections::HashSet::new();
509        b.assertion.entities.retain(|e| {
510            let key = (e.name.to_lowercase(), e.entity_type.clone());
511            seen.insert(key)
512        });
513    }
514
515    (type_fixes, name_fixes)
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521    use crate::bundle::*;
522
523    fn make_entity(name: &str, etype: &str) -> Entity {
524        Entity {
525            name: name.into(),
526            entity_type: etype.into(),
527            identifiers: serde_json::Map::new(),
528            canonical_id: None,
529            candidates: vec![],
530            aliases: vec![],
531            resolution_provenance: None,
532            resolution_confidence: 1.0,
533            resolution_method: None,
534            species_context: None,
535            needs_review: false,
536        }
537    }
538
539    fn make_finding_with_entities(entities: Vec<Entity>) -> FindingBundle {
540        FindingBundle {
541            id: "test".into(),
542            version: 1,
543            previous_version: None,
544            assertion: Assertion {
545                text: "Test".into(),
546                assertion_type: "mechanism".into(),
547                entities,
548                relation: None,
549                direction: None,
550                causal_claim: None,
551                causal_evidence_grade: None,
552            },
553            evidence: Evidence {
554                evidence_type: "experimental".into(),
555                model_system: String::new(),
556                species: None,
557                method: String::new(),
558                sample_size: None,
559                effect_size: None,
560                p_value: None,
561                replicated: false,
562                replication_count: None,
563                evidence_spans: vec![],
564            },
565            conditions: Conditions {
566                text: String::new(),
567                species_verified: vec![],
568                species_unverified: vec![],
569                in_vitro: false,
570                in_vivo: false,
571                human_data: false,
572                clinical_trial: false,
573                concentration_range: None,
574                duration: None,
575                age_group: None,
576                cell_type: None,
577            },
578            confidence: Confidence::raw(0.8, "seeded prior", 0.85),
579            provenance: Provenance {
580                source_type: "published_paper".into(),
581                doi: None,
582                pmid: None,
583                pmc: None,
584                openalex_id: None,
585                url: None,
586                title: "Test".into(),
587                authors: vec![],
588                year: Some(2024),
589                journal: None,
590                license: None,
591                publisher: None,
592                funders: vec![],
593                extraction: Extraction::default(),
594                review: None,
595                citation_count: None,
596            },
597            flags: Flags {
598                gap: false,
599                negative_space: false,
600                contested: false,
601                retracted: false,
602                declining: false,
603                gravity_well: false,
604                review_state: None,
605                superseded: false,
606                signature_threshold: None,
607                jointly_accepted: false,
608            },
609            links: vec![],
610            annotations: vec![],
611            attachments: vec![],
612            created: String::new(),
613            updated: None,
614
615            access_tier: crate::access_tier::AccessTier::Public,
616        }
617    }
618
619    // ── entity_type tests ────────────────────────────────────────────
620
621    #[test]
622    fn valid_types_pass_through() {
623        for t in &[
624            "gene",
625            "protein",
626            "compound",
627            "disease",
628            "cell_type",
629            "organism",
630            "pathway",
631            "assay",
632            "anatomical_structure",
633            "other",
634        ] {
635            assert_eq!(entity_type(t), *t);
636        }
637    }
638
639    #[test]
640    fn gene_variants_map_to_gene() {
641        assert_eq!(entity_type("gene_variant"), "gene");
642        assert_eq!(entity_type("genetic_variant"), "gene");
643    }
644
645    #[test]
646    fn drug_maps_to_compound() {
647        for t in &[
648            "drug",
649            "chemical",
650            "metabolite",
651            "lipid",
652            "hormone",
653            "drug_class",
654        ] {
655            assert_eq!(entity_type(t), "compound", "expected compound for {t}");
656        }
657    }
658
659    #[test]
660    fn protein_complex_maps_to_protein() {
661        for t in &["protein_complex", "receptor", "antibody", "biomarker"] {
662            assert_eq!(entity_type(t), "protein", "expected protein for {t}");
663        }
664    }
665
666    #[test]
667    fn cell_variants_map_to_cell_type() {
668        assert_eq!(entity_type("cell"), "cell_type");
669        assert_eq!(entity_type("cell type"), "cell_type");
670    }
671
672    #[test]
673    fn disease_variants_map_to_disease() {
674        for t in &["condition", "pathology", "disease_state"] {
675            assert_eq!(entity_type(t), "disease", "expected disease for {t}");
676        }
677    }
678
679    #[test]
680    fn anatomical_variants() {
681        for t in &[
682            "tissue",
683            "organ system",
684            "organelle",
685            "biological_barrier",
686            "fluid",
687        ] {
688            assert_eq!(
689                entity_type(t),
690                "anatomical_structure",
691                "expected anatomical_structure for {t}"
692            );
693        }
694    }
695
696    #[test]
697    fn pathway_variants() {
698        for t in &["biological_process", "process", "metabolic pathway"] {
699            assert_eq!(entity_type(t), "pathway", "expected pathway for {t}");
700        }
701    }
702
703    #[test]
704    fn assay_variants() {
705        for t in &["method", "technology", "device"] {
706            assert_eq!(entity_type(t), "assay", "expected assay for {t}");
707        }
708    }
709
710    #[test]
711    fn organism_variants() {
712        for t in &["bacterium", "virus", "pathogen", "microbiome"] {
713            assert_eq!(entity_type(t), "organism", "expected organism for {t}");
714        }
715    }
716
717    #[test]
718    fn unknown_type_maps_to_other() {
719        assert_eq!(entity_type("banana"), "other");
720        assert_eq!(entity_type("foobar"), "other");
721        assert_eq!(entity_type(""), "other");
722    }
723
724    #[test]
725    fn case_insensitive_type_mapping() {
726        assert_eq!(entity_type("Gene"), "gene");
727        assert_eq!(entity_type("DRUG"), "compound");
728        assert_eq!(entity_type("Protein_Complex"), "protein");
729        assert_eq!(entity_type("CELL"), "cell_type");
730    }
731
732    // ── entity_name tests ────────────────────────────────────────────
733
734    #[test]
735    fn bbb_normalizes() {
736        assert_eq!(entity_name("BBB"), "blood-brain barrier");
737        assert_eq!(entity_name("blood brain barrier"), "blood-brain barrier");
738        assert_eq!(entity_name("blood–brain barrier"), "blood-brain barrier");
739    }
740
741    #[test]
742    fn ad_normalizes() {
743        assert_eq!(entity_name("AD"), "Alzheimer's disease");
744        assert_eq!(entity_name("alzheimer's"), "Alzheimer's disease");
745        assert_eq!(entity_name("alzheimer disease"), "Alzheimer's disease");
746    }
747
748    #[test]
749    fn amyloid_beta_normalizes() {
750        assert_eq!(entity_name("Abeta"), "amyloid-beta");
751        assert_eq!(entity_name("a-beta"), "amyloid-beta");
752        assert_eq!(entity_name("amyloid beta"), "amyloid-beta");
753    }
754
755    #[test]
756    fn apoe4_normalizes() {
757        assert_eq!(entity_name("APOE4"), "APOE4");
758        assert_eq!(entity_name("apoe-4"), "APOE4");
759        assert_eq!(entity_name("apolipoprotein e4"), "APOE4");
760    }
761
762    #[test]
763    fn unknown_name_unchanged() {
764        assert_eq!(entity_name("NLRP3"), "NLRP3");
765        assert_eq!(entity_name("some random entity"), "some random entity");
766    }
767
768    #[test]
769    fn pd_normalizes() {
770        assert_eq!(entity_name("PD"), "Parkinson's disease");
771        assert_eq!(entity_name("parkinson disease"), "Parkinson's disease");
772    }
773
774    #[test]
775    fn ros_and_others() {
776        assert_eq!(entity_name("ROS"), "reactive oxygen species");
777        assert_eq!(entity_name("NPs"), "nanoparticles");
778        assert_eq!(entity_name("EVs"), "extracellular vesicles");
779        assert_eq!(entity_name("FUS"), "focused ultrasound");
780        assert_eq!(entity_name("TJs"), "tight junctions");
781    }
782
783    // ── normalize_findings tests ─────────────────────────────────────
784
785    #[test]
786    fn normalize_fixes_types_and_names() {
787        let mut bundles = vec![make_finding_with_entities(vec![
788            make_entity("BBB", "biological_barrier"),
789            make_entity("AD", "condition"),
790        ])];
791        let (type_fixes, name_fixes) = normalize_findings(&mut bundles);
792        assert_eq!(type_fixes, 2);
793        assert_eq!(name_fixes, 2);
794        assert_eq!(bundles[0].assertion.entities[0].name, "blood-brain barrier");
795        assert_eq!(
796            bundles[0].assertion.entities[0].entity_type,
797            "anatomical_structure"
798        );
799        assert_eq!(bundles[0].assertion.entities[1].name, "Alzheimer's disease");
800        assert_eq!(bundles[0].assertion.entities[1].entity_type, "disease");
801    }
802
803    #[test]
804    fn deduplication_removes_duplicate_entities() {
805        let mut bundles = vec![make_finding_with_entities(vec![
806            make_entity("NLRP3", "protein"),
807            make_entity("nlrp3", "protein"), // same name different case
808            make_entity("NLRP3", "gene"),    // same name different type = kept
809        ])];
810        let (_tf, _nf) = normalize_findings(&mut bundles);
811        assert_eq!(bundles[0].assertion.entities.len(), 2); // protein + gene
812    }
813
814    #[test]
815    fn normalize_no_changes_returns_zero() {
816        let mut bundles = vec![make_finding_with_entities(vec![make_entity(
817            "NLRP3", "protein",
818        )])];
819        let (type_fixes, name_fixes) = normalize_findings(&mut bundles);
820        assert_eq!(type_fixes, 0);
821        assert_eq!(name_fixes, 0);
822    }
823
824    #[test]
825    fn normalize_empty_bundles() {
826        let mut bundles: Vec<FindingBundle> = vec![];
827        let (tf, nf) = normalize_findings(&mut bundles);
828        assert_eq!(tf, 0);
829        assert_eq!(nf, 0);
830    }
831
832    #[test]
833    fn plan_findings_reports_safe_entity_repairs() {
834        let bundles = vec![make_finding_with_entities(vec![
835            make_entity("BBB", "biological_barrier"),
836            make_entity("blood-brain barrier", "anatomical_structure"),
837        ])];
838
839        let plan = plan_findings(&bundles);
840
841        assert!(
842            plan.iter()
843                .any(|change| change.kind == NormalizeChangeKind::EntityType)
844        );
845        assert!(
846            plan.iter()
847                .any(|change| change.kind == NormalizeChangeKind::EntityName)
848        );
849        assert!(
850            plan.iter()
851                .any(|change| change.kind == NormalizeChangeKind::DuplicateEntity)
852        );
853        assert!(plan.iter().all(|change| change.safe));
854    }
855
856    #[test]
857    fn source_dry_run_does_not_write() {
858        let tmp = tempfile::TempDir::new().unwrap();
859        let path = tmp.path().join("frontier.json");
860        let frontier = crate::project::assemble(
861            "test",
862            vec![make_finding_with_entities(vec![make_entity(
863                "BBB",
864                "biological_barrier",
865            )])],
866            1,
867            0,
868            "test",
869        );
870        std::fs::write(&path, serde_json::to_string_pretty(&frontier).unwrap()).unwrap();
871
872        let report = plan_source(&path).unwrap();
873        let saved: crate::project::Project =
874            serde_json::from_str(&std::fs::read_to_string(&path).unwrap()).unwrap();
875
876        assert!(report.dry_run);
877        assert_eq!(report.summary.applied, 0);
878        assert_eq!(
879            saved.findings[0].assertion.entities[0].entity_type,
880            "biological_barrier"
881        );
882    }
883
884    #[test]
885    fn source_apply_writes_safe_repairs() {
886        let tmp = tempfile::TempDir::new().unwrap();
887        let path = tmp.path().join("frontier.json");
888        let frontier = crate::project::assemble(
889            "test",
890            vec![make_finding_with_entities(vec![make_entity(
891                "BBB",
892                "biological_barrier",
893            )])],
894            1,
895            0,
896            "test",
897        );
898        std::fs::write(&path, serde_json::to_string_pretty(&frontier).unwrap()).unwrap();
899
900        let report = apply_source(&path).unwrap();
901        let saved: crate::project::Project =
902            serde_json::from_str(&std::fs::read_to_string(&path).unwrap()).unwrap();
903
904        assert!(!report.dry_run);
905        assert_eq!(report.summary.applied, report.summary.safe);
906        assert_eq!(
907            saved.findings[0].assertion.entities[0].entity_type,
908            "anatomical_structure"
909        );
910        assert_eq!(
911            saved.findings[0].assertion.entities[0].name,
912            "blood-brain barrier"
913        );
914        assert_eq!(
915            saved.findings[0].id,
916            FindingBundle::content_address(
917                &saved.findings[0].assertion,
918                &saved.findings[0].provenance,
919            )
920        );
921    }
922
923    #[test]
924    fn source_apply_refuses_packet_directory() {
925        let tmp = tempfile::TempDir::new().unwrap();
926        std::fs::write(
927            tmp.path().join("manifest.json"),
928            r#"{"packet_format":"vela.frontier-packet"}"#,
929        )
930        .unwrap();
931
932        let report = apply_source(tmp.path()).unwrap();
933
934        assert!(report.refused);
935        assert_eq!(report.source_kind, "packet_dir");
936        assert!(report.refusal_reason.unwrap().contains("Refusing"));
937    }
938}