Skip to main content

vela_protocol/
bridge.rs

1//! Bridge detection — find cross-domain hypotheses from multiple frontiers.
2//!
3//! The core value proposition of Vela: compile findings from separate fields,
4//! link them by shared entities, and surface testable hypotheses at the intersection.
5//!
6//! ## v0.46 — bridges as first-class kernel objects
7//!
8//! v0.45 and earlier treated bridges as a derived report — a function from
9//! two `Project`s to a `Vec<BridgeEntity>` that ran on demand and was
10//! never persisted. v0.46 promotes bridges to first-class kernel objects:
11//! content-addressed `vbr_<hash>` records that live in
12//! `.vela/bridges/<vbr_id>.json` alongside findings, replications,
13//! datasets, and code artifacts.
14//!
15//! The shape: a `Bridge` is a *compositional hypothesis* that two
16//! frontiers' shared entity admits a cross-frontier claim. The kernel
17//! never asserts the hypothesis is true — that's a reviewer call,
18//! recorded by transitioning the bridge's `status` from `Derived` to
19//! `Confirmed` or `Refuted`. Bridges are written by `vela bridges
20//! derive`, read by `vela bridges list / show`, and updated by `vela
21//! bridges confirm / refute`.
22//!
23//! Doctrine: bridges are derived, not invented. The derivation runs the
24//! same `detect_bridges` algorithm and records its output verbatim.
25//! Confirmation / refutation is a reviewer act, not an automated one.
26
27use std::collections::HashMap;
28
29use serde::{Deserialize, Serialize};
30use sha2::{Digest, Sha256};
31
32use crate::project::Project;
33
34/// A bridge entity — appears in findings from 2+ different source frontiers.
35pub struct BridgeEntity {
36    pub entity_name: String,
37    pub frontiers: Vec<String>,
38    pub findings_per_frontier: HashMap<String, Vec<BridgeFinding>>,
39    pub total_findings: usize,
40    pub breadth: usize,
41    pub pubmed_count: Option<u64>,
42    pub tension: Option<String>,
43}
44
45#[allow(dead_code)]
46pub struct BridgeFinding {
47    pub id: String,
48    pub assertion: String,
49    pub confidence: f64,
50    pub direction: Option<String>,
51    pub year: Option<i32>,
52    pub doi: Option<String>,
53    pub title: String,
54}
55
56/// Detect bridges across multiple named frontiers.
57pub fn detect_bridges(named_frontiers: &[(&str, &Project)]) -> Vec<BridgeEntity> {
58    let mut entity_map: HashMap<String, HashMap<String, Vec<BridgeFinding>>> = HashMap::new();
59
60    for (frontier_name, frontier) in named_frontiers {
61        for f in &frontier.findings {
62            let mut entity_names: Vec<String> = f
63                .assertion
64                .entities
65                .iter()
66                .map(|e| e.name.to_lowercase())
67                .collect();
68
69            // Include aliases
70            for e in &f.assertion.entities {
71                for alias in &e.aliases {
72                    let a = alias.to_lowercase();
73                    if !entity_names.contains(&a) {
74                        entity_names.push(a);
75                    }
76                }
77            }
78
79            for name in entity_names {
80                let corr_map = entity_map.entry(name).or_default();
81                let findings = corr_map.entry(frontier_name.to_string()).or_default();
82                // Avoid duplicates within same frontier
83                if !findings.iter().any(|bf| bf.id == f.id) {
84                    findings.push(BridgeFinding {
85                        id: f.id.clone(),
86                        assertion: f.assertion.text.clone(),
87                        confidence: f.confidence.score,
88                        direction: f.assertion.direction.clone(),
89                        year: f.provenance.year,
90                        doi: f.provenance.doi.clone(),
91                        title: f.provenance.title.clone(),
92                    });
93                }
94            }
95        }
96    }
97
98    let mut bridges: Vec<BridgeEntity> = entity_map
99        .into_iter()
100        .filter(|(name, corr_map)| corr_map.len() >= 2 && !is_obvious(name))
101        .map(|(name, corr_map)| {
102            let total = corr_map.values().map(|v| v.len()).sum();
103            let frontiers: Vec<String> = corr_map.keys().cloned().collect();
104            let breadth = frontiers.len();
105
106            // Detect tension (opposite directions across frontiers)
107            let tension = detect_tension(&corr_map);
108
109            BridgeEntity {
110                entity_name: name,
111                frontiers,
112                findings_per_frontier: corr_map,
113                total_findings: total,
114                breadth,
115                pubmed_count: None,
116                tension,
117            }
118        })
119        .collect();
120
121    bridges.sort_by(|a, b| {
122        b.breadth
123            .cmp(&a.breadth)
124            .then(b.tension.is_some().cmp(&a.tension.is_some()))
125            .then(b.total_findings.cmp(&a.total_findings))
126    });
127    bridges
128}
129
130fn detect_tension(corr_map: &HashMap<String, Vec<BridgeFinding>>) -> Option<String> {
131    let mut pos = Vec::new();
132    let mut neg = Vec::new();
133    for (frontier, findings) in corr_map {
134        for f in findings {
135            match f.direction.as_deref() {
136                Some("positive") if !pos.contains(frontier) => pos.push(frontier.clone()),
137                Some("negative") if !neg.contains(frontier) => neg.push(frontier.clone()),
138                _ => {}
139            }
140        }
141    }
142    if !pos.is_empty() && !neg.is_empty() {
143        Some(format!(
144            "positive in [{}], negative in [{}]",
145            pos.join(", "),
146            neg.join(", ")
147        ))
148    } else {
149        None
150    }
151}
152
153pub fn is_obvious(name: &str) -> bool {
154    const OBVIOUS: &[&str] = &[
155        "alzheimer's disease",
156        "blood-brain barrier",
157        "brain",
158        "neuron",
159        "neurons",
160        "neurodegeneration",
161        "neuroinflammation",
162        "cns",
163        "inflammation",
164        "dementia",
165        "parkinson's disease",
166        "microglia",
167        "astrocyte",
168        "astrocytes",
169        "hippocampus",
170        "cortex",
171        "cognitive decline",
172        "cognitive function",
173        "neurodegenerative diseases",
174        "oxidative stress",
175        "cytokines",
176        "cerebrospinal fluid",
177        "amyloid",
178        "amyloid-beta",
179        "β-amyloid",
180        "amyloid β",
181        "tau",
182        "mouse",
183        "mice",
184        "rat",
185        "human",
186        "patient",
187        "patients",
188        "disease",
189        "treatment",
190        "therapy",
191        "drug",
192        "receptor",
193        "cell",
194        "cells",
195        "protein",
196        "gene",
197        "pathway",
198        "mechanism",
199        "model",
200        "study",
201        "expression",
202        "level",
203        "levels",
204        "activity",
205        "function",
206        "role",
207        "effect",
208        "effects",
209    ];
210    OBVIOUS.contains(&name.to_lowercase().as_str())
211}
212
213/// Run a rough PubMed prior-art check for a cross-domain query.
214/// Retries up to 2 times with exponential backoff on transient failures.
215pub async fn check_novelty(client: &reqwest::Client, query: &str) -> Result<u64, String> {
216    let url = format!(
217        "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={}&rettype=json&retmode=json&tool=vela&email=vela@borrowedlight.org",
218        urlencoding::encode(query)
219    );
220    let json: serde_json::Value =
221        crate::retry::retry_with_backoff("PubMed prior-art check", 2, || {
222            let client = client.clone();
223            let url = url.clone();
224            async move {
225                let resp = client
226                    .get(&url)
227                    .timeout(std::time::Duration::from_secs(10))
228                    .send()
229                    .await
230                    .map_err(|e| format!("PubMed: {e}"))?;
231                if !resp.status().is_success() {
232                    return Err(format!("PubMed {}", resp.status()));
233                }
234                resp.json::<serde_json::Value>()
235                    .await
236                    .map_err(|e| format!("PubMed parse: {e}"))
237            }
238        })
239        .await?;
240    Ok(json["esearchresult"]["count"]
241        .as_str()
242        .and_then(|s| s.parse().ok())
243        .unwrap_or(0))
244}
245
246/// Build a specific PubMed query for a bridge entity.
247/// Uses the most distinctive co-occurring entity from each frontier, not just field names.
248pub fn novelty_query(entity: &str, bridge: &BridgeEntity) -> String {
249    // Get the most specific co-occurring entity from each frontier
250    let mut frontier_specifics: Vec<String> = Vec::new();
251    for findings in bridge.findings_per_frontier.values() {
252        // Find the most specific entity that co-occurs with the bridge entity
253        // (not the bridge entity itself, and not an obvious term)
254        let mut cooccur: HashMap<String, usize> = HashMap::new();
255        for f in findings {
256            // We don't have access to other entities here directly,
257            // so extract keywords from the assertion text
258            let words: Vec<&str> = f.assertion.split_whitespace().collect();
259            for w in words {
260                let clean = w
261                    .trim_matches(|c: char| !c.is_alphanumeric())
262                    .to_lowercase();
263                if clean.len() > 3 && !is_obvious(&clean) && clean != entity.to_lowercase() {
264                    *cooccur.entry(clean).or_default() += 1;
265                }
266            }
267        }
268        // Pick the most frequent non-obvious co-occurring word
269        if let Some((word, _)) = cooccur.into_iter().max_by_key(|(_, count)| *count) {
270            frontier_specifics.push(word);
271        }
272    }
273
274    // Build query: entity + top 2 specific terms from different frontiers
275    let mut parts = vec![entity.to_string()];
276    for term in frontier_specifics.iter().take(2) {
277        parts.push(term.clone());
278    }
279    parts.join(" AND ")
280}
281
282/// Format the bridge report.
283pub fn format_report(bridges: &[BridgeEntity], total_findings: usize) -> String {
284    let mut r = String::new();
285
286    let prior_art_clear: Vec<_> = bridges
287        .iter()
288        .filter(|b| b.pubmed_count == Some(0))
289        .collect();
290    let emerging: Vec<_> = bridges
291        .iter()
292        .filter(|b| matches!(b.pubmed_count, Some(1..=5)))
293        .collect();
294    let with_tension: Vec<_> = bridges.iter().filter(|b| b.tension.is_some()).collect();
295
296    r.push_str(&format!("\n{}\n", "═".repeat(70)));
297    r.push_str("VELA BRIDGE REPORT\n");
298    r.push_str(&format!("{}\n\n", "═".repeat(70)));
299    r.push_str(&format!("  Total findings:    {total_findings}\n"));
300    r.push_str(&format!(
301        "  Bridge entities:   {} (non-obvious)\n",
302        bridges.len()
303    ));
304    r.push_str(&format!(
305        "  Zero-result prior-art checks: {}\n",
306        prior_art_clear.len()
307    ));
308    r.push_str(&format!("  Emerging (1-5):    {}\n", emerging.len()));
309    r.push_str(&format!("  With tension:      {}\n", with_tension.len()));
310
311    if !prior_art_clear.is_empty() {
312        r.push_str(&format!("\n{}\n", "─".repeat(70)));
313        r.push_str("CANDIDATE BRIDGES — zero PubMed results for query\n");
314        r.push_str(&format!("{}\n\n", "─".repeat(70)));
315
316        for (i, b) in prior_art_clear.iter().enumerate().take(20) {
317            r.push_str(&format!("  {}. {}", i + 1, b.entity_name.to_uppercase()));
318            if let Some(t) = &b.tension {
319                r.push_str(&format!("  ⚡ {t}"));
320            }
321            r.push('\n');
322            r.push_str(&format!("     Bridges: {}\n", b.frontiers.join(" ↔ ")));
323            for (corr, findings) in &b.findings_per_frontier {
324                let top = &findings[0];
325                let trunc: String = top.assertion.chars().take(90).collect();
326                r.push_str(&format!(
327                    "     [{corr}] conf:{:.2} | {trunc}...\n",
328                    top.confidence
329                ));
330            }
331            r.push('\n');
332        }
333    }
334
335    if !with_tension.is_empty() {
336        r.push_str(&format!("{}\n", "─".repeat(70)));
337        r.push_str("CROSS-DOMAIN TENSION — opposite directions across fields\n");
338        r.push_str(&format!("{}\n\n", "─".repeat(70)));
339
340        for (i, b) in with_tension.iter().enumerate().take(15) {
341            if b.pubmed_count == Some(0) {
342                continue;
343            } // already shown above
344            r.push_str(&format!(
345                "  {}. {} — {}\n",
346                i + 1,
347                b.entity_name,
348                b.tension.as_deref().unwrap_or("")
349            ));
350            r.push_str(&format!(
351                "     PubMed: {} results\n\n",
352                b.pubmed_count.unwrap_or(0)
353            ));
354        }
355    }
356
357    r.push_str(&format!("{}\n", "═".repeat(70)));
358    r.push_str("Generated by Vela — the stars have always been there\n\n");
359    r
360}
361
362// ── v0.46: first-class Bridge objects ────────────────────────────────
363
364/// A first-class, content-addressed bridge object. Persisted in
365/// `.vela/bridges/<vbr_id>.json` next to findings.
366#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
367pub struct Bridge {
368    /// `vbr_<16hex>`, content-addressed.
369    pub id: String,
370    /// Schema URL pin so old readers refuse mismatched data.
371    #[serde(default = "default_bridge_schema")]
372    pub schema: String,
373    /// Canonical (lowercased) entity name that bridges the frontiers.
374    pub entity_name: String,
375    /// Human labels of the bridged frontiers ("bbb", "landscape", …),
376    /// in canonical sort order (used in the content address).
377    pub frontiers: Vec<String>,
378    /// `vfr_<id>`s of the bridged frontiers, when known. Empty vec
379    /// when the source was a flat JSON file lacking frontier metadata.
380    #[serde(default)]
381    pub frontier_ids: Vec<String>,
382    /// Per-finding evidence backing the bridge. Each entry pins one
383    /// finding from one frontier; the bridge is "the entity links
384    /// these findings."
385    pub finding_refs: Vec<BridgeRef>,
386    /// `Some` when at least one frontier asserts a positive direction
387    /// for the entity and another asserts negative — the most
388    /// interesting case (cross-domain tension).
389    pub tension: Option<String>,
390    /// RFC 3339 timestamp when `vela bridges derive` produced this.
391    pub derived_at: String,
392    /// Reviewer state. Newly derived bridges start `Derived`.
393    pub status: BridgeStatus,
394}
395
396fn default_bridge_schema() -> String {
397    "https://vela.science/schema/bridge/v0.46.0".into()
398}
399
400#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
401pub struct BridgeRef {
402    /// Human label of the frontier this finding belongs to.
403    pub frontier: String,
404    /// `vf_<id>`.
405    pub finding_id: String,
406    /// Snapshot of the assertion text at derivation time. Lets the
407    /// bridge survive a frontier rewrite (the bridge captures *what
408    /// was true when it was derived*).
409    pub assertion_text: String,
410    pub confidence: f64,
411    pub direction: Option<String>,
412}
413
414#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
415#[serde(rename_all = "lowercase")]
416pub enum BridgeStatus {
417    /// Just derived; awaiting reviewer attention.
418    Derived,
419    /// A reviewer has examined the bridge and confirmed it is a
420    /// meaningful cross-frontier composition (not a coincidence of
421    /// shared vocabulary).
422    Confirmed,
423    /// A reviewer has examined the bridge and dismissed it.
424    Refuted,
425}
426
427impl Bridge {
428    /// `vbr_<16hex>` content address. Preimage is
429    /// `bridge|<sorted_frontiers>|<entity_name_lowercased>`. Two
430    /// derivations against the same frontiers and entity always yield
431    /// the same id; re-derivation is idempotent.
432    #[must_use]
433    pub fn content_address(frontiers: &[String], entity_name: &str) -> String {
434        let mut sorted = frontiers.to_vec();
435        sorted.sort();
436        let preimage = format!("bridge|{}|{}", sorted.join(","), entity_name.to_lowercase());
437        let hash = Sha256::digest(preimage.as_bytes());
438        format!("vbr_{}", &hex::encode(hash)[..16])
439    }
440
441    /// Lift a `BridgeEntity` (the v0.45 detection output) into a
442    /// content-addressed `Bridge`. The reviewer status defaults to
443    /// `Derived`; transitions are explicit acts.
444    pub fn from_detection(
445        b: &BridgeEntity,
446        frontier_ids: &HashMap<String, String>,
447        derived_at: &str,
448    ) -> Self {
449        let mut refs: Vec<BridgeRef> = b
450            .findings_per_frontier
451            .iter()
452            .flat_map(|(frontier, findings)| {
453                findings.iter().map(move |f| BridgeRef {
454                    frontier: frontier.clone(),
455                    finding_id: f.id.clone(),
456                    assertion_text: f.assertion.clone(),
457                    confidence: f.confidence,
458                    direction: f.direction.clone(),
459                })
460            })
461            .collect();
462        refs.sort_by(|a, b| {
463            a.frontier
464                .cmp(&b.frontier)
465                .then(a.finding_id.cmp(&b.finding_id))
466        });
467
468        let mut frontiers = b.frontiers.clone();
469        frontiers.sort();
470        let id = Self::content_address(&frontiers, &b.entity_name);
471        let frontier_ids_vec: Vec<String> = frontiers
472            .iter()
473            .filter_map(|f| frontier_ids.get(f).cloned())
474            .collect();
475
476        Self {
477            id,
478            schema: default_bridge_schema(),
479            entity_name: b.entity_name.clone(),
480            frontiers,
481            frontier_ids: frontier_ids_vec,
482            finding_refs: refs,
483            tension: b.tension.clone(),
484            derived_at: derived_at.to_string(),
485            status: BridgeStatus::Derived,
486        }
487    }
488}
489
490/// Top-level v0.46 entry point: derive bridges between two named
491/// frontiers and return content-addressed `Bridge` records ready to be
492/// written to `.vela/bridges/`.
493///
494/// Identity-stable: re-running against the same frontiers produces the
495/// same `vbr_<id>` per (entity_name, frontier-pair). Callers are
496/// expected to merge by id rather than blindly re-write.
497#[must_use]
498pub fn derive_bridges(named_frontiers: &[(&str, &Project)], derived_at: &str) -> Vec<Bridge> {
499    let entities = detect_bridges(named_frontiers);
500    let frontier_ids: HashMap<String, String> = named_frontiers
501        .iter()
502        .filter_map(|(label, p)| p.frontier_id.clone().map(|fid| (label.to_string(), fid)))
503        .collect();
504    entities
505        .iter()
506        .map(|b| Bridge::from_detection(b, &frontier_ids, derived_at))
507        .collect()
508}
509
510#[cfg(test)]
511mod v046_tests {
512    use super::*;
513    use std::fs;
514
515    /// Real-frontier integration: load both BBB-Alzheimer and the
516    /// will-alzheimer-landscape JSON fixtures, derive bridges, and
517    /// confirm the output is non-empty, content-addressed, and stable
518    /// across runs.
519    #[test]
520    fn derive_real_frontier_pair_is_stable() {
521        // Look for fixtures relative to the workspace root. The crate
522        // is run from `crates/vela-protocol`; the fixtures live two
523        // levels up at `frontiers/*.json`.
524        let workspace_root = std::env::var("CARGO_MANIFEST_DIR")
525            .map(|s| format!("{s}/../.."))
526            .unwrap_or_else(|_| ".".into());
527        let bbb_path = format!("{workspace_root}/frontiers/bbb-alzheimer.json");
528        let landscape_path = format!("{workspace_root}/frontiers/will-alzheimer-landscape.json");
529        if !std::path::Path::new(&bbb_path).exists() {
530            eprintln!("skipping derive_real_frontier_pair_is_stable: fixture missing");
531            return;
532        }
533        let bbb_text = fs::read_to_string(&bbb_path).expect("read bbb");
534        let landscape_text = fs::read_to_string(&landscape_path).expect("read landscape");
535        let bbb: crate::project::Project = serde_json::from_str(&bbb_text).expect("parse bbb");
536        let landscape: crate::project::Project =
537            serde_json::from_str(&landscape_text).expect("parse landscape");
538
539        let now = "2026-04-28T00:00:00Z";
540        let first = derive_bridges(&[("bbb", &bbb), ("landscape", &landscape)], now);
541        let second = derive_bridges(&[("bbb", &bbb), ("landscape", &landscape)], now);
542        assert!(
543            !first.is_empty(),
544            "expected real cross-frontier bridges; found 0"
545        );
546        let ids_first: Vec<&String> = first.iter().map(|b| &b.id).collect();
547        let ids_second: Vec<&String> = second.iter().map(|b| &b.id).collect();
548        assert_eq!(
549            ids_first, ids_second,
550            "vbr_<id> set must be stable across runs"
551        );
552        for b in &first {
553            assert!(b.id.starts_with("vbr_"));
554            assert_eq!(b.id.len(), "vbr_".len() + 16);
555            assert!(!b.finding_refs.is_empty());
556            assert_eq!(b.status, BridgeStatus::Derived);
557            // Content address recomputes correctly from declared parts.
558            let recomputed = Bridge::content_address(&b.frontiers, &b.entity_name);
559            assert_eq!(b.id, recomputed);
560        }
561    }
562
563    #[test]
564    fn content_address_independent_of_frontier_order() {
565        let a = Bridge::content_address(&["bbb".to_string(), "landscape".to_string()], "trem2");
566        let b = Bridge::content_address(&["landscape".to_string(), "bbb".to_string()], "trem2");
567        assert_eq!(a, b);
568    }
569
570    #[test]
571    fn content_address_lowercases_entity() {
572        let a = Bridge::content_address(&["a".into(), "b".into()], "TREM2");
573        let b = Bridge::content_address(&["a".into(), "b".into()], "trem2");
574        assert_eq!(a, b);
575    }
576}
577
578#[cfg(test)]
579mod tests {
580    use super::*;
581    use crate::bundle::*;
582
583    fn make_finding(
584        id: &str,
585        entities: Vec<(&str, &str)>,
586        direction: Option<&str>,
587        doi: Option<&str>,
588    ) -> FindingBundle {
589        FindingBundle {
590            id: id.into(),
591            version: 1,
592            previous_version: None,
593            assertion: Assertion {
594                text: format!("Finding {id}"),
595                assertion_type: "mechanism".into(),
596                entities: entities
597                    .into_iter()
598                    .map(|(name, etype)| Entity {
599                        name: name.into(),
600                        entity_type: etype.into(),
601                        identifiers: serde_json::Map::new(),
602                        canonical_id: None,
603                        candidates: vec![],
604                        aliases: vec![],
605                        resolution_provenance: None,
606                        resolution_confidence: 1.0,
607                        resolution_method: None,
608                        species_context: None,
609                        needs_review: false,
610                    })
611                    .collect(),
612                relation: None,
613                direction: direction.map(|s| s.to_string()),
614                causal_claim: None,
615                causal_evidence_grade: None,
616            },
617            evidence: Evidence {
618                evidence_type: "experimental".into(),
619                model_system: String::new(),
620                species: None,
621                method: String::new(),
622                sample_size: None,
623                effect_size: None,
624                p_value: None,
625                replicated: false,
626                replication_count: None,
627                evidence_spans: vec![],
628            },
629            conditions: Conditions {
630                text: String::new(),
631                species_verified: vec![],
632                species_unverified: vec![],
633                in_vitro: false,
634                in_vivo: false,
635                human_data: false,
636                clinical_trial: false,
637                concentration_range: None,
638                duration: None,
639                age_group: None,
640                cell_type: None,
641            },
642            confidence: Confidence::raw(0.8, "seeded prior", 0.85),
643            provenance: Provenance {
644                source_type: "published_paper".into(),
645                doi: doi.map(|s| s.to_string()),
646                pmid: None,
647                pmc: None,
648                openalex_id: None,
649                url: None,
650                title: "Test".into(),
651                authors: vec![],
652                year: Some(2024),
653                journal: None,
654                license: None,
655                publisher: None,
656                funders: vec![],
657                extraction: Extraction::default(),
658                review: None,
659                citation_count: None,
660            },
661            flags: Flags {
662                gap: false,
663                negative_space: false,
664                contested: false,
665                retracted: false,
666                declining: false,
667                gravity_well: false,
668                review_state: None,
669                superseded: false,
670                signature_threshold: None,
671                jointly_accepted: false,
672            },
673            links: vec![],
674            annotations: vec![],
675            attachments: vec![],
676            created: String::new(),
677            updated: None,
678
679            access_tier: crate::access_tier::AccessTier::Public,
680        }
681    }
682
683    fn make_frontier(findings: Vec<FindingBundle>) -> Project {
684        crate::project::assemble("test", findings, 1, 0, "test frontier")
685    }
686
687    #[test]
688    fn entity_in_two_frontiers_is_bridge() {
689        let c1 = make_frontier(vec![make_finding(
690            "f1",
691            vec![("NLRP3", "protein"), ("IL-1B", "protein")],
692            None,
693            None,
694        )]);
695        let c2 = make_frontier(vec![make_finding(
696            "f2",
697            vec![("NLRP3", "protein"), ("caspase-1", "protein")],
698            None,
699            None,
700        )]);
701        let named = vec![("neuro", &c1), ("immune", &c2)];
702        let bridges = detect_bridges(&named);
703        let nlrp3 = bridges.iter().find(|b| b.entity_name == "nlrp3");
704        assert!(nlrp3.is_some());
705        let nlrp3 = nlrp3.unwrap();
706        assert_eq!(nlrp3.breadth, 2);
707        assert_eq!(nlrp3.frontiers.len(), 2);
708    }
709
710    #[test]
711    fn entity_in_one_frontier_not_bridge() {
712        let c1 = make_frontier(vec![make_finding(
713            "f1",
714            vec![("NLRP3", "protein")],
715            None,
716            None,
717        )]);
718        let c2 = make_frontier(vec![make_finding(
719            "f2",
720            vec![("APOE4", "gene")],
721            None,
722            None,
723        )]);
724        let named = vec![("neuro", &c1), ("genetics", &c2)];
725        let bridges = detect_bridges(&named);
726        assert!(bridges.iter().all(|b| b.entity_name != "nlrp3"));
727        assert!(bridges.iter().all(|b| b.entity_name != "apoe4"));
728    }
729
730    #[test]
731    fn obvious_entities_filtered() {
732        assert!(is_obvious("brain"));
733        assert!(is_obvious("neuron"));
734        assert!(is_obvious("Alzheimer's disease"));
735        assert!(is_obvious("mouse"));
736        assert!(is_obvious("protein"));
737        assert!(!is_obvious("NLRP3"));
738        assert!(!is_obvious("cryopyrin"));
739        assert!(!is_obvious("rapamycin"));
740    }
741
742    #[test]
743    fn obvious_entities_not_bridges() {
744        let c1 = make_frontier(vec![make_finding(
745            "f1",
746            vec![("brain", "anatomical_structure")],
747            None,
748            None,
749        )]);
750        let c2 = make_frontier(vec![make_finding(
751            "f2",
752            vec![("brain", "anatomical_structure")],
753            None,
754            None,
755        )]);
756        let named = vec![("neuro", &c1), ("imaging", &c2)];
757        let bridges = detect_bridges(&named);
758        assert!(bridges.iter().all(|b| b.entity_name != "brain"));
759    }
760
761    #[test]
762    fn tension_detected_opposite_directions() {
763        let c1 = make_frontier(vec![make_finding(
764            "f1",
765            vec![("NLRP3", "protein")],
766            Some("positive"),
767            None,
768        )]);
769        let c2 = make_frontier(vec![make_finding(
770            "f2",
771            vec![("NLRP3", "protein")],
772            Some("negative"),
773            None,
774        )]);
775        let named = vec![("neuro", &c1), ("immune", &c2)];
776        let bridges = detect_bridges(&named);
777        let nlrp3 = bridges.iter().find(|b| b.entity_name == "nlrp3").unwrap();
778        assert!(nlrp3.tension.is_some());
779        let tension = nlrp3.tension.as_ref().unwrap();
780        assert!(tension.contains("positive"));
781        assert!(tension.contains("negative"));
782    }
783
784    #[test]
785    fn no_tension_same_direction() {
786        let c1 = make_frontier(vec![make_finding(
787            "f1",
788            vec![("NLRP3", "protein")],
789            Some("positive"),
790            None,
791        )]);
792        let c2 = make_frontier(vec![make_finding(
793            "f2",
794            vec![("NLRP3", "protein")],
795            Some("positive"),
796            None,
797        )]);
798        let named = vec![("neuro", &c1), ("immune", &c2)];
799        let bridges = detect_bridges(&named);
800        let nlrp3 = bridges.iter().find(|b| b.entity_name == "nlrp3").unwrap();
801        assert!(nlrp3.tension.is_none());
802    }
803
804    #[test]
805    fn sorted_by_breadth_then_tension() {
806        let c1 = make_frontier(vec![make_finding(
807            "f1",
808            vec![("entityA", "protein"), ("entityB", "gene")],
809            Some("positive"),
810            None,
811        )]);
812        let c2 = make_frontier(vec![make_finding(
813            "f2",
814            vec![("entityA", "protein"), ("entityB", "gene")],
815            Some("negative"),
816            None,
817        )]);
818        let c3 = make_frontier(vec![make_finding(
819            "f3",
820            vec![("entityA", "protein")],
821            None,
822            None,
823        )]);
824        let named = vec![("c1", &c1), ("c2", &c2), ("c3", &c3)];
825        let bridges = detect_bridges(&named);
826        assert!(bridges.len() >= 2);
827        assert!(bridges[0].breadth >= bridges[1].breadth);
828    }
829
830    #[test]
831    fn empty_input() {
832        let bridges = detect_bridges(&[]);
833        assert!(bridges.is_empty());
834    }
835
836    #[test]
837    fn alias_creates_bridge() {
838        let mut f1 = make_finding("f1", vec![], None, None);
839        f1.assertion.entities.push(Entity {
840            name: "NLRP3".into(),
841            entity_type: "protein".into(),
842            identifiers: serde_json::Map::new(),
843            canonical_id: None,
844            candidates: vec![],
845            aliases: vec!["cryopyrin".into()],
846            resolution_provenance: None,
847            resolution_confidence: 1.0,
848            resolution_method: None,
849            species_context: None,
850            needs_review: false,
851        });
852        let c1 = make_frontier(vec![f1]);
853        let c2 = make_frontier(vec![make_finding(
854            "f2",
855            vec![("cryopyrin", "protein")],
856            None,
857            None,
858        )]);
859        let named = vec![("neuro", &c1), ("immune", &c2)];
860        let bridges = detect_bridges(&named);
861        let cryo = bridges.iter().find(|b| b.entity_name == "cryopyrin");
862        assert!(cryo.is_some());
863    }
864
865    #[test]
866    fn detect_tension_helper() {
867        let mut map: HashMap<String, Vec<BridgeFinding>> = HashMap::new();
868        map.insert(
869            "c1".into(),
870            vec![BridgeFinding {
871                id: "f1".into(),
872                assertion: "test".into(),
873                confidence: 0.8,
874                direction: Some("positive".into()),
875                year: Some(2024),
876                doi: None,
877                title: "T".into(),
878            }],
879        );
880        map.insert(
881            "c2".into(),
882            vec![BridgeFinding {
883                id: "f2".into(),
884                assertion: "test".into(),
885                confidence: 0.8,
886                direction: Some("negative".into()),
887                year: Some(2024),
888                doi: None,
889                title: "T".into(),
890            }],
891        );
892        assert!(detect_tension(&map).is_some());
893
894        let mut map2: HashMap<String, Vec<BridgeFinding>> = HashMap::new();
895        map2.insert(
896            "c1".into(),
897            vec![BridgeFinding {
898                id: "f1".into(),
899                assertion: "test".into(),
900                confidence: 0.8,
901                direction: Some("positive".into()),
902                year: Some(2024),
903                doi: None,
904                title: "T".into(),
905            }],
906        );
907        map2.insert(
908            "c2".into(),
909            vec![BridgeFinding {
910                id: "f2".into(),
911                assertion: "test".into(),
912                confidence: 0.8,
913                direction: Some("positive".into()),
914                year: Some(2024),
915                doi: None,
916                title: "T".into(),
917            }],
918        );
919        assert!(detect_tension(&map2).is_none());
920    }
921
922    #[test]
923    fn is_obvious_case_insensitive() {
924        assert!(is_obvious("Brain"));
925        assert!(is_obvious("BRAIN"));
926        assert!(is_obvious("Cell"));
927        assert!(is_obvious("PROTEIN"));
928        assert!(is_obvious("Gene"));
929        assert!(is_obvious("Pathway"));
930        assert!(is_obvious("Mouse"));
931    }
932
933    #[test]
934    fn is_obvious_rejects_specific_entities() {
935        assert!(!is_obvious("rapamycin"));
936        assert!(!is_obvious("metformin"));
937        assert!(!is_obvious("TREM2"));
938        assert!(!is_obvious("GLP-1"));
939        assert!(!is_obvious("synuclein"));
940        assert!(!is_obvious("berberine"));
941    }
942
943    #[test]
944    fn is_obvious_all_listed_terms() {
945        // Verify every term in the OBVIOUS list is actually caught
946        let terms = vec![
947            "alzheimer's disease",
948            "blood-brain barrier",
949            "brain",
950            "neuron",
951            "neurons",
952            "neurodegeneration",
953            "neuroinflammation",
954            "cns",
955            "inflammation",
956            "dementia",
957            "cell",
958            "cells",
959            "protein",
960            "gene",
961            "pathway",
962            "mechanism",
963            "model",
964            "study",
965            "expression",
966            "level",
967            "levels",
968            "activity",
969            "function",
970            "role",
971            "effect",
972            "effects",
973        ];
974        for t in terms {
975            assert!(is_obvious(t), "Expected '{t}' to be obvious");
976        }
977    }
978
979    #[test]
980    fn bridge_entity_three_frontiers() {
981        let c1 = make_frontier(vec![make_finding(
982            "f1",
983            vec![("TREM2", "protein")],
984            None,
985            None,
986        )]);
987        let c2 = make_frontier(vec![make_finding(
988            "f2",
989            vec![("TREM2", "protein")],
990            None,
991            None,
992        )]);
993        let c3 = make_frontier(vec![make_finding(
994            "f3",
995            vec![("TREM2", "protein")],
996            None,
997            None,
998        )]);
999        let named = vec![("neuro", &c1), ("immune", &c2), ("genetics", &c3)];
1000        let bridges = detect_bridges(&named);
1001        let trem2 = bridges.iter().find(|b| b.entity_name == "trem2").unwrap();
1002        assert_eq!(trem2.breadth, 3);
1003        assert_eq!(trem2.total_findings, 3);
1004    }
1005
1006    #[test]
1007    fn duplicate_finding_in_same_frontier_not_counted_twice() {
1008        let c1 = make_frontier(vec![
1009            make_finding("f1", vec![("NLRP3", "protein")], None, None),
1010            make_finding("f1", vec![("NLRP3", "protein")], None, None), // same ID
1011        ]);
1012        let c2 = make_frontier(vec![make_finding(
1013            "f2",
1014            vec![("NLRP3", "protein")],
1015            None,
1016            None,
1017        )]);
1018        let named = vec![("neuro", &c1), ("immune", &c2)];
1019        let bridges = detect_bridges(&named);
1020        let nlrp3 = bridges.iter().find(|b| b.entity_name == "nlrp3").unwrap();
1021        // f1 should only appear once in neuro frontier
1022        let neuro_findings = nlrp3.findings_per_frontier.get("neuro").unwrap();
1023        assert_eq!(neuro_findings.len(), 1);
1024    }
1025
1026    #[test]
1027    fn novelty_query_includes_entity() {
1028        let bridge = BridgeEntity {
1029            entity_name: "trem2".into(),
1030            frontiers: vec!["neuro".into(), "immune".into()],
1031            findings_per_frontier: {
1032                let mut m = HashMap::new();
1033                m.insert(
1034                    "neuro".into(),
1035                    vec![BridgeFinding {
1036                        id: "f1".into(),
1037                        assertion: "TREM2 modulates microglial phagocytosis".into(),
1038                        confidence: 0.8,
1039                        direction: None,
1040                        year: Some(2024),
1041                        doi: None,
1042                        title: "T".into(),
1043                    }],
1044                );
1045                m.insert(
1046                    "immune".into(),
1047                    vec![BridgeFinding {
1048                        id: "f2".into(),
1049                        assertion: "TREM2 regulates complement activation".into(),
1050                        confidence: 0.7,
1051                        direction: None,
1052                        year: Some(2024),
1053                        doi: None,
1054                        title: "T".into(),
1055                    }],
1056                );
1057                m
1058            },
1059            total_findings: 2,
1060            breadth: 2,
1061            pubmed_count: None,
1062            tension: None,
1063        };
1064        let query = novelty_query("trem2", &bridge);
1065        assert!(query.contains("trem2"));
1066        // Should have AND separators
1067        assert!(query.contains(" AND "));
1068    }
1069
1070    #[test]
1071    fn detect_tension_no_direction() {
1072        let mut map: HashMap<String, Vec<BridgeFinding>> = HashMap::new();
1073        map.insert(
1074            "c1".into(),
1075            vec![BridgeFinding {
1076                id: "f1".into(),
1077                assertion: "test".into(),
1078                confidence: 0.8,
1079                direction: None,
1080                year: Some(2024),
1081                doi: None,
1082                title: "T".into(),
1083            }],
1084        );
1085        map.insert(
1086            "c2".into(),
1087            vec![BridgeFinding {
1088                id: "f2".into(),
1089                assertion: "test".into(),
1090                confidence: 0.8,
1091                direction: None,
1092                year: Some(2024),
1093                doi: None,
1094                title: "T".into(),
1095            }],
1096        );
1097        assert!(detect_tension(&map).is_none());
1098    }
1099
1100    #[test]
1101    fn format_report_empty_bridges() {
1102        let report = format_report(&[], 0);
1103        assert!(report.contains("VELA BRIDGE REPORT"));
1104        assert!(report.contains("Bridge entities:   0"));
1105        assert!(report.contains("Total findings:    0"));
1106    }
1107
1108    #[test]
1109    fn format_report_with_novel_bridge() {
1110        let bridge = BridgeEntity {
1111            entity_name: "trem2".into(),
1112            frontiers: vec!["neuro".into(), "immune".into()],
1113            findings_per_frontier: {
1114                let mut m = HashMap::new();
1115                m.insert(
1116                    "neuro".into(),
1117                    vec![BridgeFinding {
1118                        id: "f1".into(),
1119                        assertion: "TREM2 finding".into(),
1120                        confidence: 0.85,
1121                        direction: None,
1122                        year: Some(2024),
1123                        doi: None,
1124                        title: "T".into(),
1125                    }],
1126                );
1127                m.insert(
1128                    "immune".into(),
1129                    vec![BridgeFinding {
1130                        id: "f2".into(),
1131                        assertion: "TREM2 immune".into(),
1132                        confidence: 0.7,
1133                        direction: None,
1134                        year: Some(2024),
1135                        doi: None,
1136                        title: "T".into(),
1137                    }],
1138                );
1139                m
1140            },
1141            total_findings: 2,
1142            breadth: 2,
1143            pubmed_count: Some(0),
1144            tension: None,
1145        };
1146        let report = format_report(&[bridge], 5);
1147        assert!(report.contains("CANDIDATE BRIDGES"));
1148        assert!(report.contains("TREM2"));
1149    }
1150}