vela_protocol/causal_graph.rs
1//! v0.44: Pearl level 2 — causal graph + do-calculus over the
2//! frontier's claim-to-claim link graph.
3//!
4//! v0.40 (level 1) answered "given a finding's (causal_claim,
5//! causal_evidence_grade), is the claim identifiable from that
6//! design alone?" by lookup over a 3×4 matrix. v0.44 (level 2)
7//! answers a different question: "given the frontier's directed
8//! link graph, is the effect of *changing our belief in finding X*
9//! on *our belief in finding Y* identifiable from observational
10//! evidence (the rest of the graph) alone, or does it require an
11//! intervention?"
12//!
13//! This is the back-door criterion lifted to the claim level. The
14//! lift is novel — Pearl's original framework operates over
15//! variables; Vela operates over content-addressed claims that have
16//! parents (findings they depend on) and children (findings that
17//! depend on them). The same d-separation algebra applies.
18//!
19//! Doctrine for this module:
20//! - Graph nodes are findings; edges come from the typed link graph
21//! (`depends`, `supports`, `mediates`, `causes`).
22//! - `depends`/`supports`: directed edge from the source finding
23//! *to* the target it relies on. This is the convention we follow
24//! for back-door analysis: a finding's parents are the findings it
25//! depends on (its evidence base), its children are the findings
26//! that build on it.
27//! - `contradicts` is undirected and excluded from the causal DAG.
28//! - The substrate does not infer causal direction from prose; it
29//! only encodes what the link graph already declares.
30
31use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
32
33use serde::{Deserialize, Serialize};
34
35use crate::project::Project;
36
37/// v0.44: a directed acyclic graph over findings, derived from the
38/// link graph. Edges point from a finding to its declared parent
39/// (the finding it depends on / supports / cites as evidence).
40///
41/// We materialize parents and children both for fast lookup. The
42/// graph is built lazily from a Project; updates require rebuilding.
43#[derive(Debug, Clone)]
44pub struct CausalGraph {
45 /// Every finding id present in the source project.
46 nodes: BTreeSet<String>,
47 /// `parents[a]` = set of findings `a` directly depends on.
48 parents: HashMap<String, BTreeSet<String>>,
49 /// `children[a]` = set of findings that directly depend on `a`.
50 children: HashMap<String, BTreeSet<String>>,
51}
52
53impl CausalGraph {
54 /// Build the causal graph from a project's link graph. Walks
55 /// every finding's `links` array; `depends` and `supports` link
56 /// types contribute directed edges from source to target.
57 /// `contradicts`, `extends`, and other link types are excluded —
58 /// they don't encode causal dependency.
59 #[must_use]
60 pub fn from_project(project: &Project) -> Self {
61 let mut nodes = BTreeSet::new();
62 let mut parents: HashMap<String, BTreeSet<String>> = HashMap::new();
63 let mut children: HashMap<String, BTreeSet<String>> = HashMap::new();
64
65 for f in &project.findings {
66 nodes.insert(f.id.clone());
67 parents.entry(f.id.clone()).or_default();
68 children.entry(f.id.clone()).or_default();
69 }
70
71 for f in &project.findings {
72 for link in &f.links {
73 if !matches!(link.link_type.as_str(), "depends" | "supports") {
74 continue;
75 }
76 // Cross-frontier targets (vf_X@vfr_Y) are skipped for
77 // now; v0.44 reasons within a frontier. Cross-frontier
78 // composition via the bridge runtime is a follow-up.
79 if link.target.contains('@') {
80 continue;
81 }
82 if !nodes.contains(&link.target) {
83 continue;
84 }
85 parents
86 .entry(f.id.clone())
87 .or_default()
88 .insert(link.target.clone());
89 children
90 .entry(link.target.clone())
91 .or_default()
92 .insert(f.id.clone());
93 }
94 }
95
96 Self {
97 nodes,
98 parents,
99 children,
100 }
101 }
102
103 /// Number of nodes in the graph.
104 #[must_use]
105 pub fn node_count(&self) -> usize {
106 self.nodes.len()
107 }
108
109 /// Number of directed edges in the graph.
110 #[must_use]
111 pub fn edge_count(&self) -> usize {
112 self.parents.values().map(BTreeSet::len).sum()
113 }
114
115 /// True iff the node exists in the graph.
116 #[must_use]
117 pub fn contains(&self, node: &str) -> bool {
118 self.nodes.contains(node)
119 }
120
121 /// Direct parents of `node` (findings that `node` depends on).
122 #[must_use]
123 pub fn parents_of(&self, node: &str) -> impl Iterator<Item = &str> {
124 self.parents
125 .get(node)
126 .into_iter()
127 .flat_map(|s| s.iter().map(String::as_str))
128 }
129
130 /// Direct children of `node` (findings that depend on `node`).
131 #[must_use]
132 pub fn children_of(&self, node: &str) -> impl Iterator<Item = &str> {
133 self.children
134 .get(node)
135 .into_iter()
136 .flat_map(|s| s.iter().map(String::as_str))
137 }
138
139 /// All ancestors of `node` (transitive closure of parents).
140 #[must_use]
141 pub fn ancestors(&self, node: &str) -> HashSet<String> {
142 let mut seen = HashSet::new();
143 let mut queue: VecDeque<String> = VecDeque::new();
144 if let Some(ps) = self.parents.get(node) {
145 for p in ps {
146 queue.push_back(p.clone());
147 }
148 }
149 while let Some(n) = queue.pop_front() {
150 if !seen.insert(n.clone()) {
151 continue;
152 }
153 if let Some(ps) = self.parents.get(&n) {
154 for p in ps {
155 if !seen.contains(p) {
156 queue.push_back(p.clone());
157 }
158 }
159 }
160 }
161 seen
162 }
163
164 /// All descendants of `node` (transitive closure of children).
165 /// Includes `node` itself only if requested.
166 #[must_use]
167 pub fn descendants(&self, node: &str) -> HashSet<String> {
168 let mut seen = HashSet::new();
169 let mut queue: VecDeque<String> = VecDeque::new();
170 if let Some(cs) = self.children.get(node) {
171 for c in cs {
172 queue.push_back(c.clone());
173 }
174 }
175 while let Some(n) = queue.pop_front() {
176 if !seen.insert(n.clone()) {
177 continue;
178 }
179 if let Some(cs) = self.children.get(&n) {
180 for c in cs {
181 if !seen.contains(c) {
182 queue.push_back(c.clone());
183 }
184 }
185 }
186 }
187 seen
188 }
189
190 /// True iff `candidate` is a descendant of `source` (transitive).
191 #[must_use]
192 pub fn is_descendant_of(&self, candidate: &str, source: &str) -> bool {
193 self.descendants(source).contains(candidate)
194 }
195
196 /// All undirected paths between `start` and `end`, capped at
197 /// `max_paths` and `max_len`. A path is a sequence of distinct
198 /// nodes; each consecutive pair is connected by either a parent
199 /// or child edge (we walk the graph as undirected for path
200 /// enumeration; direction matters for the d-separation check).
201 ///
202 /// Returns paths as `Vec<Vec<String>>`, where each inner Vec is
203 /// the node sequence from start to end.
204 pub fn paths_between(
205 &self,
206 start: &str,
207 end: &str,
208 max_paths: usize,
209 max_len: usize,
210 ) -> Vec<Vec<String>> {
211 if !self.contains(start) || !self.contains(end) || start == end {
212 return Vec::new();
213 }
214 let mut all_paths = Vec::new();
215 let mut current: Vec<String> = vec![start.to_string()];
216 let mut visited: HashSet<String> = HashSet::new();
217 visited.insert(start.to_string());
218 self.dfs_paths(
219 start,
220 end,
221 &mut current,
222 &mut visited,
223 &mut all_paths,
224 max_paths,
225 max_len,
226 );
227 all_paths
228 }
229
230 fn dfs_paths(
231 &self,
232 node: &str,
233 end: &str,
234 current: &mut Vec<String>,
235 visited: &mut HashSet<String>,
236 all_paths: &mut Vec<Vec<String>>,
237 max_paths: usize,
238 max_len: usize,
239 ) {
240 if all_paths.len() >= max_paths {
241 return;
242 }
243 if current.len() > max_len {
244 return;
245 }
246 // Walk neighbors via both parent and child edges (undirected
247 // path enumeration).
248 let mut neighbors: BTreeSet<String> = BTreeSet::new();
249 if let Some(ps) = self.parents.get(node) {
250 for p in ps {
251 neighbors.insert(p.clone());
252 }
253 }
254 if let Some(cs) = self.children.get(node) {
255 for c in cs {
256 neighbors.insert(c.clone());
257 }
258 }
259 for next in &neighbors {
260 if visited.contains(next) {
261 continue;
262 }
263 current.push(next.clone());
264 visited.insert(next.clone());
265 if next == end {
266 all_paths.push(current.clone());
267 } else {
268 self.dfs_paths(next, end, current, visited, all_paths, max_paths, max_len);
269 }
270 visited.remove(next);
271 current.pop();
272 if all_paths.len() >= max_paths {
273 return;
274 }
275 }
276 }
277
278 /// Classify how a node sits inside a path: chain (B is between
279 /// a parent and a child of B in the path), fork (B is a parent
280 /// of both neighbors in the path), or collider (B is a child of
281 /// both neighbors in the path).
282 fn node_role_in_path(&self, prev: &str, node: &str, next: &str) -> NodeRole {
283 let prev_is_parent_of_node = self.parents.get(node).is_some_and(|ps| ps.contains(prev));
284 let next_is_parent_of_node = self.parents.get(node).is_some_and(|ps| ps.contains(next));
285 let prev_is_child_of_node = self.children.get(node).is_some_and(|cs| cs.contains(prev));
286 let next_is_child_of_node = self.children.get(node).is_some_and(|cs| cs.contains(next));
287
288 match (
289 prev_is_parent_of_node,
290 next_is_parent_of_node,
291 prev_is_child_of_node,
292 next_is_child_of_node,
293 ) {
294 // prev → node ← next: collider
295 (true, true, _, _) => NodeRole::Collider,
296 // prev ← node → next: fork
297 (_, _, true, true) => NodeRole::Fork,
298 // chain in either direction
299 _ => NodeRole::Chain,
300 }
301 }
302
303 /// True iff path is d-separated by Z. A path is blocked by Z if
304 /// any non-endpoint node on the path satisfies one of:
305 /// - chain or fork: node is in Z
306 /// - collider: neither node nor any descendant of node is in Z
307 ///
308 /// Equivalently, path is open under Z iff every chain/fork node
309 /// is *not* in Z, AND every collider node is in Z (or has a
310 /// descendant in Z).
311 #[must_use]
312 pub fn is_path_blocked(&self, path: &[String], z: &HashSet<String>) -> bool {
313 if path.len() < 3 {
314 // Direct edge (path length 2) is never blocked by an
315 // intermediate set; longer paths have at least one
316 // middle node to check.
317 return false;
318 }
319 for i in 1..path.len() - 1 {
320 let prev = &path[i - 1];
321 let node = &path[i];
322 let next = &path[i + 1];
323 let role = self.node_role_in_path(prev, node, next);
324 match role {
325 NodeRole::Chain | NodeRole::Fork => {
326 if z.contains(node) {
327 return true;
328 }
329 }
330 NodeRole::Collider => {
331 let in_z = z.contains(node);
332 let descendant_in_z = self.descendants(node).iter().any(|d| z.contains(d));
333 if !in_z && !descendant_in_z {
334 return true;
335 }
336 }
337 }
338 }
339 false
340 }
341
342 /// True iff the path is a "back-door path" from x to y: it
343 /// begins at x with an incoming edge to x (i.e., the second node
344 /// is a parent of x), not an outgoing edge.
345 #[must_use]
346 pub fn is_back_door_path(&self, path: &[String], x: &str) -> bool {
347 if path.len() < 2 || path[0] != x {
348 return false;
349 }
350 let second = &path[1];
351 self.parents.get(x).is_some_and(|ps| ps.contains(second))
352 }
353
354 /// v0.44.2: True iff every consecutive edge in `path` points
355 /// from the earlier node to the later node (i.e., the path is
356 /// a directed path in the DAG, not a mixed undirected walk).
357 /// Required for the front-door criterion's "M intercepts every
358 /// directed path source → target" check.
359 #[must_use]
360 pub fn is_directed_path(&self, path: &[String]) -> bool {
361 if path.len() < 2 {
362 return false;
363 }
364 for i in 0..path.len() - 1 {
365 let a = &path[i];
366 let b = &path[i + 1];
367 // edge a → b means b is a child of a (or equivalently,
368 // a is a parent of b).
369 let a_is_parent_of_b = self.parents.get(b).is_some_and(|ps| ps.contains(a));
370 if !a_is_parent_of_b {
371 return false;
372 }
373 }
374 true
375 }
376}
377
378#[derive(Debug, Clone, Copy, PartialEq, Eq)]
379enum NodeRole {
380 Chain,
381 Fork,
382 Collider,
383}
384
385/// v0.44: verdict on whether the causal effect of `source` on
386/// `target` is identifiable from observational data over the
387/// frontier's link graph. The lift of v0.40's `Identifiability` to
388/// graph-aware reasoning.
389#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
390#[serde(tag = "kind", rename_all = "snake_case")]
391pub enum CausalEffectVerdict {
392 /// All back-door paths from source to target are blocked by the
393 /// adjustment set under the back-door criterion.
394 Identified {
395 /// The adjustment set Z that blocks all back-door paths.
396 /// Empty means no adjustment is needed (no open back-door
397 /// path exists).
398 adjustment_set: Vec<String>,
399 /// Number of back-door paths the search considered.
400 back_door_paths_considered: usize,
401 },
402 /// v0.44.2: identified via Pearl's front-door criterion (1995 §3.3).
403 /// Used when confounders may be unobserved but a mediator set M
404 /// satisfies all three front-door conditions:
405 /// 1. M intercepts every directed path from source to target.
406 /// 2. There is no back-door path from source to any element of M.
407 /// 3. All back-door paths from M to target are blocked by source.
408 /// The effect is then identifiable via the front-door formula
409 /// P(Y | do(X)) = Σ_m P(M = m | X) Σ_{x'} P(Y | M = m, X = x') P(X = x').
410 IdentifiedByFrontDoor {
411 /// The mediator set M that the source's effect on the target
412 /// flows through. For v0.44.2 the search is restricted to
413 /// singletons.
414 mediator_set: Vec<String>,
415 },
416 /// Source and target are not connected, or source has no
417 /// directed path to target — the effect question is ill-posed
418 /// at the graph level.
419 NoCausalPath { reason: String },
420 /// At least one open back-door path remains under every
421 /// adjustment set the search examined, AND no front-door
422 /// mediator was found. The effect is not identifiable from
423 /// observational data alone — an intervention is required.
424 Underidentified {
425 unblocked_back_door_paths: Vec<Vec<String>>,
426 candidates_tried: usize,
427 },
428 /// Either source or target is missing from the frontier.
429 UnknownNode { which: String },
430}
431
432impl CausalEffectVerdict {
433 pub fn as_str(&self) -> &'static str {
434 match self {
435 CausalEffectVerdict::Identified { .. } => "identified",
436 CausalEffectVerdict::IdentifiedByFrontDoor { .. } => "identified_by_front_door",
437 CausalEffectVerdict::NoCausalPath { .. } => "no_causal_path",
438 CausalEffectVerdict::Underidentified { .. } => "underidentified",
439 CausalEffectVerdict::UnknownNode { .. } => "unknown_node",
440 }
441 }
442}
443
444/// v0.44: Find an adjustment set that satisfies the back-door
445/// criterion for the effect of `source` on `target`, or report that
446/// no such set exists in the observed graph.
447///
448/// Algorithm (conservative search):
449/// 1. Enumerate all paths from source to target up to a length
450/// cap.
451/// 2. Filter to back-door paths (those starting with an incoming
452/// edge to source).
453/// 3. Build the candidate set: all nodes that are *not*
454/// descendants of source and not source/target themselves.
455/// 4. Try the empty set first. If every back-door path is
456/// d-separated by the empty set, return identified-empty.
457/// 5. Try each single candidate. If any candidate blocks all
458/// back-door paths and is not a descendant of source, return
459/// identified-with-Z.
460/// 6. Try pairs (bounded). If found, return identified.
461/// 7. Otherwise underidentified, with the open back-door paths.
462///
463/// Subset search is bounded at size 2 by default. For larger graphs
464/// this is incomplete but the existing graph density on Vela is
465/// low enough that empty / singleton / pair coverage is typical.
466pub fn identify_effect(project: &Project, source: &str, target: &str) -> CausalEffectVerdict {
467 let graph = CausalGraph::from_project(project);
468 identify_effect_in_graph(&graph, source, target)
469}
470
471/// Same as `identify_effect` but takes an already-built graph for
472/// callers that want to reuse the construction across many queries.
473pub fn identify_effect_in_graph(
474 graph: &CausalGraph,
475 source: &str,
476 target: &str,
477) -> CausalEffectVerdict {
478 if !graph.contains(source) {
479 return CausalEffectVerdict::UnknownNode {
480 which: format!("source not in frontier: {source}"),
481 };
482 }
483 if !graph.contains(target) {
484 return CausalEffectVerdict::UnknownNode {
485 which: format!("target not in frontier: {target}"),
486 };
487 }
488 if source == target {
489 return CausalEffectVerdict::NoCausalPath {
490 reason: "source equals target".into(),
491 };
492 }
493
494 // Enumerate all undirected paths between source and target
495 // (capped). Filter to back-door paths.
496 const MAX_PATHS: usize = 200;
497 const MAX_LEN: usize = 8;
498 let all_paths = graph.paths_between(source, target, MAX_PATHS, MAX_LEN);
499 let back_door_paths: Vec<Vec<String>> = all_paths
500 .iter()
501 .filter(|p| graph.is_back_door_path(p, source))
502 .cloned()
503 .collect();
504
505 if all_paths.is_empty() {
506 return CausalEffectVerdict::NoCausalPath {
507 reason: format!("no path between {source} and {target} (search depth {MAX_LEN})"),
508 };
509 }
510
511 // Build candidate adjustment-set members: nodes that are not
512 // descendants of source, not source itself, not target.
513 let descendants_of_source = graph.descendants(source);
514 let candidates: Vec<String> = graph
515 .nodes
516 .iter()
517 .filter(|n| n.as_str() != source && n.as_str() != target)
518 .filter(|n| !descendants_of_source.contains(n.as_str()))
519 .cloned()
520 .collect();
521
522 // Helper: does Z block every back-door path?
523 let blocks_all = |z: &HashSet<String>| -> bool {
524 back_door_paths.iter().all(|p| graph.is_path_blocked(p, z))
525 };
526
527 // Try empty set first.
528 let empty: HashSet<String> = HashSet::new();
529 if blocks_all(&empty) {
530 return CausalEffectVerdict::Identified {
531 adjustment_set: Vec::new(),
532 back_door_paths_considered: back_door_paths.len(),
533 };
534 }
535
536 let mut tried = 1usize; // for the empty set
537
538 // Singleton candidates.
539 for c in &candidates {
540 let mut z = HashSet::new();
541 z.insert(c.clone());
542 tried += 1;
543 if blocks_all(&z) {
544 return CausalEffectVerdict::Identified {
545 adjustment_set: vec![c.clone()],
546 back_door_paths_considered: back_door_paths.len(),
547 };
548 }
549 }
550
551 // Pair candidates (bounded).
552 for i in 0..candidates.len() {
553 for j in (i + 1)..candidates.len() {
554 let mut z = HashSet::new();
555 z.insert(candidates[i].clone());
556 z.insert(candidates[j].clone());
557 tried += 1;
558 if blocks_all(&z) {
559 return CausalEffectVerdict::Identified {
560 adjustment_set: vec![candidates[i].clone(), candidates[j].clone()],
561 back_door_paths_considered: back_door_paths.len(),
562 };
563 }
564 if tried > 2_000 {
565 // Bound the search; underidentified is the safer report.
566 break;
567 }
568 }
569 if tried > 2_000 {
570 break;
571 }
572 }
573
574 // v0.44.2: back-door failed. Try the front-door criterion before
575 // declaring the effect unidentifiable. The front-door criterion
576 // (Pearl 1995 §3.3) admits identification when a mediator chain
577 // exists between source and target that satisfies three conditions
578 // even though a confounder may be unobserved.
579 if let Some(mediators) = find_front_door_set(graph, source, target, &all_paths) {
580 return CausalEffectVerdict::IdentifiedByFrontDoor {
581 mediator_set: mediators,
582 };
583 }
584
585 // No adjustment set found, and no front-door mediator. Report
586 // the open back-door paths as concrete remediation hints.
587 let unblocked: Vec<Vec<String>> = back_door_paths
588 .iter()
589 .filter(|p| !graph.is_path_blocked(p, &empty))
590 .take(5)
591 .cloned()
592 .collect();
593
594 CausalEffectVerdict::Underidentified {
595 unblocked_back_door_paths: unblocked,
596 candidates_tried: tried,
597 }
598}
599
600/// v0.44.2: search for a front-door mediator set M that satisfies
601/// Pearl's three conditions. Restricted to singletons for v0.44.2;
602/// multi-mediator front-door sets are a v0.44.3+ extension.
603///
604/// The three conditions, expressed in graph terms:
605/// 1. Every directed path source → target passes through M.
606/// 2. No back-door path source ← ... → m exists for any m ∈ M.
607/// 3. Every back-door path m ← ... → target is blocked by {source}.
608///
609/// When all three hold, the effect P(target | do(source)) is
610/// identifiable via the front-door formula even if confounders
611/// between source and target are unobserved.
612fn find_front_door_set(
613 graph: &CausalGraph,
614 source: &str,
615 target: &str,
616 all_paths_source_target: &[Vec<String>],
617) -> Option<Vec<String>> {
618 // Directed paths source → target. Re-derive from the all_paths
619 // collection, filtering to truly directed (each consecutive
620 // edge points in the source→target direction).
621 let directed_st: Vec<Vec<String>> = all_paths_source_target
622 .iter()
623 .filter(|p| graph.is_directed_path(p))
624 .cloned()
625 .collect();
626 if directed_st.is_empty() {
627 return None;
628 }
629
630 // Mediator candidates: any node that is a descendant of source
631 // and an ancestor of target, excluding source and target.
632 let descendants_of_source = graph.descendants(source);
633 let ancestors_of_target = graph.ancestors(target);
634 let mediator_candidates: Vec<&str> = graph
635 .nodes
636 .iter()
637 .filter(|n| {
638 n.as_str() != source
639 && n.as_str() != target
640 && descendants_of_source.contains(n.as_str())
641 && ancestors_of_target.contains(n.as_str())
642 })
643 .map(String::as_str)
644 .collect();
645
646 let source_set: HashSet<String> = std::iter::once(source.to_string()).collect();
647
648 for m in mediator_candidates {
649 // Condition 1: every directed source → target path passes through m.
650 let intercepts_all = directed_st
651 .iter()
652 .all(|p| p.iter().any(|n| n.as_str() == m));
653 if !intercepts_all {
654 continue;
655 }
656
657 // Condition 2: no back-door path from source to m is open
658 // (i.e., source ← ... → m). Enumerate paths source → m and
659 // check whether any starts with an incoming edge to source
660 // and is unblocked under the empty set.
661 const MAX_PATHS: usize = 100;
662 const MAX_LEN: usize = 6;
663 let paths_sm = graph.paths_between(source, m, MAX_PATHS, MAX_LEN);
664 let back_door_sm: Vec<&Vec<String>> = paths_sm
665 .iter()
666 .filter(|p| graph.is_back_door_path(p, source))
667 .collect();
668 let empty: HashSet<String> = HashSet::new();
669 let any_open = back_door_sm
670 .iter()
671 .any(|p| !graph.is_path_blocked(p, &empty));
672 if any_open {
673 continue;
674 }
675
676 // Condition 3: every back-door path from m to target is
677 // blocked by the set {source}.
678 let paths_mt = graph.paths_between(m, target, MAX_PATHS, MAX_LEN);
679 let back_door_mt: Vec<&Vec<String>> = paths_mt
680 .iter()
681 .filter(|p| graph.is_back_door_path(p, m))
682 .collect();
683 let all_blocked_by_source = back_door_mt
684 .iter()
685 .all(|p| graph.is_path_blocked(p, &source_set));
686 if !all_blocked_by_source {
687 continue;
688 }
689
690 return Some(vec![m.to_string()]);
691 }
692
693 None
694}
695
696#[cfg(test)]
697mod tests {
698 use super::*;
699 use crate::bundle::*;
700 use crate::project;
701
702 fn finding(id: &str) -> FindingBundle {
703 let mut b = FindingBundle::new(
704 Assertion {
705 text: format!("claim {id}"),
706 assertion_type: "mechanism".into(),
707 entities: vec![],
708 relation: None,
709 direction: None,
710 causal_claim: None,
711 causal_evidence_grade: None,
712 },
713 Evidence {
714 evidence_type: "experimental".into(),
715 model_system: String::new(),
716 species: None,
717 method: String::new(),
718 sample_size: None,
719 effect_size: None,
720 p_value: None,
721 replicated: false,
722 replication_count: None,
723 evidence_spans: vec![],
724 },
725 Conditions::default_for_test(),
726 Confidence::raw(0.7, "test", 0.85),
727 Provenance::default_for_test(),
728 Flags::default(),
729 );
730 b.id = id.to_string();
731 b
732 }
733
734 fn link(target: &str, kind: &str) -> Link {
735 Link {
736 target: target.into(),
737 link_type: kind.into(),
738 note: String::new(),
739 inferred_by: "test".into(),
740 created_at: String::new(),
741 mechanism: None,
742 }
743 }
744
745 impl Conditions {
746 fn default_for_test() -> Self {
747 Self {
748 text: String::new(),
749 species_verified: vec![],
750 species_unverified: vec![],
751 in_vitro: false,
752 in_vivo: false,
753 human_data: false,
754 clinical_trial: false,
755 concentration_range: None,
756 duration: None,
757 age_group: None,
758 cell_type: None,
759 }
760 }
761 }
762 impl Provenance {
763 fn default_for_test() -> Self {
764 Self {
765 source_type: "published_paper".into(),
766 doi: None,
767 pmid: None,
768 pmc: None,
769 openalex_id: None,
770 url: None,
771 title: "Test".into(),
772 authors: vec![],
773 year: Some(2025),
774 journal: None,
775 license: None,
776 publisher: None,
777 funders: vec![],
778 extraction: Extraction::default(),
779 review: None,
780 citation_count: None,
781 }
782 }
783 }
784
785 fn proj(findings: Vec<FindingBundle>) -> Project {
786 project::assemble("test", findings, 1, 0, "test")
787 }
788
789 /// Chain: A → B → C (A's claim depends on nothing; B depends on
790 /// A; C depends on B). Effect of A on C should be identifiable
791 /// with empty adjustment set.
792 #[test]
793 fn chain_a_to_c_identifiable_empty() {
794 let a = finding("vf_a");
795 let mut b = finding("vf_b");
796 b.links.push(link("vf_a", "depends"));
797 let mut c = finding("vf_c");
798 c.links.push(link("vf_b", "depends"));
799 let p = proj(vec![a, b, c]);
800 let v = identify_effect(&p, "vf_a", "vf_c");
801 match v {
802 CausalEffectVerdict::Identified { adjustment_set, .. } => {
803 assert!(
804 adjustment_set.is_empty(),
805 "chain should need no adjustment, got {adjustment_set:?}"
806 );
807 }
808 other => panic!("expected Identified for A→B→C, got {other:?}"),
809 }
810 }
811
812 /// Confounder: A ← Z → B. Effect of A on B requires conditioning
813 /// on Z (the back-door path A ← Z → B is open without it).
814 /// Encoding: A and B both depend on Z.
815 #[test]
816 fn confounder_requires_z_in_adjustment_set() {
817 let z = finding("vf_z");
818 let mut a = finding("vf_a");
819 a.links.push(link("vf_z", "depends"));
820 let mut b = finding("vf_b");
821 b.links.push(link("vf_z", "depends"));
822 let p = proj(vec![z, a, b]);
823 let v = identify_effect(&p, "vf_a", "vf_b");
824 match v {
825 CausalEffectVerdict::Identified { adjustment_set, .. } => {
826 assert_eq!(adjustment_set, vec!["vf_z"], "expected Z in adjustment set");
827 }
828 CausalEffectVerdict::NoCausalPath { .. } => {
829 // If A and B share only the confounder Z and have no
830 // direct effect, the substrate may report no causal
831 // path between them. That's a defensible verdict —
832 // accept either.
833 }
834 other => panic!("expected Identified or NoCausalPath, got {other:?}"),
835 }
836 }
837
838 /// Mediator: A → M → B. The mediator should NOT be in the
839 /// adjustment set (conditioning on it would block the path
840 /// we're trying to estimate). Empty adjustment is correct.
841 /// Encoding: M depends on A, B depends on M.
842 #[test]
843 fn mediator_not_in_adjustment_set() {
844 let a = finding("vf_a");
845 let mut m = finding("vf_m");
846 m.links.push(link("vf_a", "depends"));
847 let mut b = finding("vf_b");
848 b.links.push(link("vf_m", "depends"));
849 let p = proj(vec![a, m, b]);
850 let v = identify_effect(&p, "vf_a", "vf_b");
851 match v {
852 CausalEffectVerdict::Identified { adjustment_set, .. } => {
853 assert!(
854 !adjustment_set.contains(&"vf_m".to_string()),
855 "mediator must not be in adjustment set: {adjustment_set:?}"
856 );
857 }
858 other => panic!("expected Identified for A→M→B, got {other:?}"),
859 }
860 }
861
862 /// Collider: A → C ← B. Conditioning on C creates spurious
863 /// dependence (Berkson's bias). The substrate must not propose C
864 /// in the adjustment set when querying effect of A on B; with
865 /// no back-door path through C (which is a descendant of A), the
866 /// empty set should suffice.
867 /// Encoding: C depends on both A and B.
868 #[test]
869 fn collider_not_used_as_confounder() {
870 let a = finding("vf_a");
871 let b = finding("vf_b");
872 let mut c = finding("vf_c");
873 c.links.push(link("vf_a", "depends"));
874 c.links.push(link("vf_b", "depends"));
875 let p = proj(vec![a, b, c]);
876 let v = identify_effect(&p, "vf_a", "vf_b");
877 match v {
878 CausalEffectVerdict::Identified { adjustment_set, .. } => {
879 assert!(
880 !adjustment_set.contains(&"vf_c".to_string()),
881 "collider must not be in adjustment set: {adjustment_set:?}"
882 );
883 }
884 CausalEffectVerdict::NoCausalPath { .. } => {
885 // No path between A and B (they share only a
886 // collider). Acceptable.
887 }
888 other => panic!("expected Identified or NoCausalPath, got {other:?}"),
889 }
890 }
891
892 #[test]
893 fn unknown_node_reported() {
894 let a = finding("vf_a");
895 let p = proj(vec![a]);
896 let v = identify_effect(&p, "vf_missing", "vf_a");
897 assert!(matches!(v, CausalEffectVerdict::UnknownNode { .. }));
898 }
899
900 #[test]
901 fn graph_basic_construction() {
902 let a = finding("vf_a");
903 let mut b = finding("vf_b");
904 b.links.push(link("vf_a", "depends"));
905 let p = proj(vec![a, b]);
906 let g = CausalGraph::from_project(&p);
907 assert_eq!(g.node_count(), 2);
908 assert_eq!(g.edge_count(), 1);
909 assert!(g.parents_of("vf_b").any(|p| p == "vf_a"));
910 assert!(g.children_of("vf_a").any(|c| c == "vf_b"));
911 }
912
913 /// Front-door scenario (Pearl 1995 §3.3):
914 /// X → M → Y
915 /// X ← U → Y (U unobserved confounder)
916 ///
917 /// In our claim graph, U exists but the back-door path X ← U → Y
918 /// cannot be blocked by adjusting on U (because Z must not be a
919 /// descendant of X — and U is *not* a descendant). Wait, this is
920 /// the case where back-door SHOULD work: U is a valid adjustment.
921 ///
922 /// To force a real front-door scenario, we omit U from the graph
923 /// (it is the "unobserved confounder"). Then back-door fails (no
924 /// observable Z), but the mediator M still intercepts all
925 /// directed paths X → M → Y, so the front-door criterion fires.
926 /// Encoding: only M and Y are linked; X is connected to Y only
927 /// through M.
928 #[test]
929 fn front_door_when_confounder_unobserved() {
930 let x = finding("vf_x");
931 let mut m = finding("vf_m");
932 m.links.push(link("vf_x", "depends"));
933 let mut y = finding("vf_y");
934 y.links.push(link("vf_m", "depends"));
935 let p = proj(vec![x, m, y]);
936 let v = identify_effect(&p, "vf_x", "vf_y");
937 // Without an unobserved confounder, this is just a chain so
938 // back-door identifies trivially. We need a richer setup —
939 // but in the absence of a way to encode "unobserved" in the
940 // current graph, the trivial-chain case is identified
941 // directly. We assert it lands in the success branch.
942 match v {
943 CausalEffectVerdict::Identified { .. }
944 | CausalEffectVerdict::IdentifiedByFrontDoor { .. } => {}
945 other => panic!("expected identified or front-door, got {other:?}"),
946 }
947 }
948
949 #[test]
950 fn descendants_transitive() {
951 let a = finding("vf_a");
952 let mut b = finding("vf_b");
953 b.links.push(link("vf_a", "depends"));
954 let mut c = finding("vf_c");
955 c.links.push(link("vf_b", "depends"));
956 let p = proj(vec![a, b, c]);
957 let g = CausalGraph::from_project(&p);
958 let desc = g.descendants("vf_a");
959 assert!(desc.contains("vf_b"));
960 assert!(desc.contains("vf_c"));
961 }
962}