Skip to main content

vela_protocol/
causal_reasoning.rs

1//! v0.40: Causal reasoning over the schema landed in v0.38.
2//!
3//! v0.38.0 made `causal_claim` and `causal_evidence_grade` first-class
4//! fields on `Assertion`. v0.38.1 folded a soft compatibility multiplier
5//! into the confidence formula. v0.38.2 let aggregate queries filter
6//! by claim type. v0.38.3 caught the most common structural error
7//! (`supports` across claim-strength mismatch).
8//!
9//! v0.40.0 lands the *reasoning* move: a hard identifiability verdict.
10//! Given a finding's (claim, grade), can the design — *as declared* —
11//! support the claim being made? This is Pearl's identifiability
12//! question at level 1: does the rung-of-the-ladder match the
13//! evidence type?
14//!
15//! Doctrine:
16//! - Identifiability is a function of (claim, grade), not of the
17//!   confidence score, the citation count, or any soft signal.
18//!   Either the design admits the claim or it doesn't.
19//! - The kernel records the verdict; the kernel does not auto-correct.
20//!   v0.40.1+ will surface remediation proposals so a reviewer can
21//!   downgrade the claim or strengthen the evidence.
22//! - Findings without typed claims (`causal_claim = None`) are
23//!   `Underdetermined` — the kernel knows it doesn't know.
24
25use serde::{Deserialize, Serialize};
26
27use crate::bundle::{CausalClaim, CausalEvidenceGrade, FindingBundle};
28use crate::project::Project;
29
30/// v0.40: hard identifiability verdict for a finding's causal claim
31/// against the declared study-design grade.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34pub enum Identifiability {
35    /// The design admits the claim. (Correlation under any grade,
36    /// mediation under RCT/QE, intervention under RCT.)
37    Identified,
38    /// The design admits the claim only under additional assumptions
39    /// the kernel cannot verify (instrument validity for QE-grade
40    /// intervention, lack of unmeasured confounders for QE-grade
41    /// mediation). The reviewer must document the assumption.
42    Conditional,
43    /// The design cannot identify the claim. Observational data
44    /// alone does not identify intervention. Theoretical evidence
45    /// alone does not identify causation.
46    Underidentified,
47    /// `causal_claim` or `causal_evidence_grade` is unset; the kernel
48    /// has nothing to grade. Pre-v0.38 findings are all in this
49    /// bucket until reviewed.
50    Underdetermined,
51}
52
53impl Identifiability {
54    pub fn as_str(self) -> &'static str {
55        match self {
56            Identifiability::Identified => "identified",
57            Identifiability::Conditional => "conditional",
58            Identifiability::Underidentified => "underidentified",
59            Identifiability::Underdetermined => "underdetermined",
60        }
61    }
62
63    /// True if this verdict signals the substrate cannot vouch for
64    /// the claim as stated. `Underidentified` is the obvious case;
65    /// `Conditional` is included here because it requires reviewer
66    /// attestation the kernel hasn't seen.
67    pub fn needs_reviewer_attention(self) -> bool {
68        matches!(
69            self,
70            Identifiability::Underidentified | Identifiability::Conditional
71        )
72    }
73}
74
75/// v0.40: hard identifiability check on (claim, grade). Pure function;
76/// the matrix encodes the Pearlian doctrine documented above.
77#[must_use]
78pub fn is_identifiable(
79    claim: Option<CausalClaim>,
80    grade: Option<CausalEvidenceGrade>,
81) -> Identifiability {
82    use CausalClaim::*;
83    use CausalEvidenceGrade::*;
84    let (Some(c), Some(g)) = (claim, grade) else {
85        return Identifiability::Underdetermined;
86    };
87    match (c, g) {
88        // Correlation: any reasonable design admits association.
89        (Correlation, _) => Identifiability::Identified,
90        // Mediation:
91        (Mediation, Rct) => Identifiability::Identified,
92        (Mediation, QuasiExperimental) => Identifiability::Conditional,
93        (Mediation, Observational) => Identifiability::Underidentified,
94        (Mediation, Theoretical) => Identifiability::Underidentified,
95        // Intervention: the strongest claim. RCT identifies; QE under
96        // instrument validity (conditional); observational and
97        // theoretical alone don't.
98        (Intervention, Rct) => Identifiability::Identified,
99        (Intervention, QuasiExperimental) => Identifiability::Conditional,
100        (Intervention, Observational) => Identifiability::Underidentified,
101        (Intervention, Theoretical) => Identifiability::Underidentified,
102    }
103}
104
105/// One row of the causal-audit report.
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct AuditEntry {
108    pub finding_id: String,
109    pub assertion_text: String,
110    pub causal_claim: Option<CausalClaim>,
111    pub causal_evidence_grade: Option<CausalEvidenceGrade>,
112    pub verdict: Identifiability,
113    /// One short sentence explaining why this finding earned this
114    /// verdict, suitable for a review-queue display.
115    pub rationale: String,
116    /// Suggested remediation — downgrade the claim, strengthen the
117    /// evidence, or document the assumption.
118    pub remediation: String,
119}
120
121fn rationale_for(claim: CausalClaim, grade: CausalEvidenceGrade) -> &'static str {
122    use CausalClaim::*;
123    use CausalEvidenceGrade::*;
124    match (claim, grade) {
125        (Correlation, _) => "Correlation claims are admitted by any reasonable design.",
126        (Mediation, Rct) => "RCT design identifies mediation pathways.",
127        (Mediation, QuasiExperimental) => {
128            "Quasi-experimental design identifies mediation only when the instrument is valid and confounders are addressed."
129        }
130        (Mediation, Observational) => {
131            "Observational data leaves the back-door problem open: confounders may explain the apparent mediation."
132        }
133        (Mediation, Theoretical) => {
134            "Theoretical models propose mediation; they do not identify it from data."
135        }
136        (Intervention, Rct) => "RCT design identifies intervention effects directly.",
137        (Intervention, QuasiExperimental) => {
138            "Quasi-experimental design identifies intervention effects only under instrument validity."
139        }
140        (Intervention, Observational) => {
141            "Observational data does not identify intervention effects (Rubin/Pearl: do(X=x) is unobserved)."
142        }
143        (Intervention, Theoretical) => {
144            "Theoretical analysis cannot identify intervention effects from real-world data alone."
145        }
146    }
147}
148
149fn remediation_for(verdict: Identifiability, claim: Option<CausalClaim>) -> String {
150    match (verdict, claim) {
151        (Identifiability::Identified, _) => "No action; design supports the claim.".into(),
152        (Identifiability::Conditional, _) => {
153            "Document the additional assumptions (instrument validity, ignorability of confounders) on the finding as a caveat or evidence_span."
154                .into()
155        }
156        (Identifiability::Underidentified, Some(CausalClaim::Intervention)) => {
157            "Either downgrade the claim from `intervention` to `correlation`, or attach RCT/QE-grade evidence that identifies the effect."
158                .into()
159        }
160        (Identifiability::Underidentified, Some(CausalClaim::Mediation)) => {
161            "Either downgrade to `correlation`, or attach RCT/QE-grade evidence that closes the back-door pathways."
162                .into()
163        }
164        (Identifiability::Underidentified, _) => {
165            "Downgrade the claim or supply stronger evidence.".into()
166        }
167        (Identifiability::Underdetermined, _) => {
168            "Set `causal_claim` and `causal_evidence_grade` via `vela finding causal-set`."
169                .into()
170        }
171    }
172}
173
174/// v0.40: audit one finding against the identifiability matrix.
175#[must_use]
176pub fn audit_finding(finding: &FindingBundle) -> AuditEntry {
177    let claim = finding.assertion.causal_claim;
178    let grade = finding.assertion.causal_evidence_grade;
179    let verdict = is_identifiable(claim, grade);
180    let rationale = match (claim, grade) {
181        (Some(c), Some(g)) => rationale_for(c, g).to_string(),
182        _ => "Causal type or evidence grade unset.".to_string(),
183    };
184    let remediation = remediation_for(verdict, claim);
185    AuditEntry {
186        finding_id: finding.id.clone(),
187        assertion_text: finding.assertion.text.clone(),
188        causal_claim: claim,
189        causal_evidence_grade: grade,
190        verdict,
191        rationale,
192        remediation,
193    }
194}
195
196/// v0.40: audit every finding in a frontier. Return entries sorted
197/// so reviewer-attention items (Underidentified, then Conditional)
198/// surface first; identified findings sink to the bottom.
199#[must_use]
200pub fn audit_frontier(project: &Project) -> Vec<AuditEntry> {
201    let mut entries: Vec<AuditEntry> = project.findings.iter().map(audit_finding).collect();
202    entries.sort_by_key(|e| match e.verdict {
203        Identifiability::Underidentified => 0,
204        Identifiability::Conditional => 1,
205        Identifiability::Underdetermined => 2,
206        Identifiability::Identified => 3,
207    });
208    entries
209}
210
211/// Summary counters for an audit pass.
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct AuditSummary {
214    pub total: usize,
215    pub identified: usize,
216    pub conditional: usize,
217    pub underidentified: usize,
218    pub underdetermined: usize,
219}
220
221#[must_use]
222pub fn summarize_audit(entries: &[AuditEntry]) -> AuditSummary {
223    let mut s = AuditSummary {
224        total: entries.len(),
225        identified: 0,
226        conditional: 0,
227        underidentified: 0,
228        underdetermined: 0,
229    };
230    for e in entries {
231        match e.verdict {
232            Identifiability::Identified => s.identified += 1,
233            Identifiability::Conditional => s.conditional += 1,
234            Identifiability::Underidentified => s.underidentified += 1,
235            Identifiability::Underdetermined => s.underdetermined += 1,
236        }
237    }
238    s
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    #[test]
246    fn underdetermined_when_missing_either_field() {
247        assert_eq!(
248            is_identifiable(None, None),
249            Identifiability::Underdetermined
250        );
251        assert_eq!(
252            is_identifiable(Some(CausalClaim::Intervention), None),
253            Identifiability::Underdetermined
254        );
255        assert_eq!(
256            is_identifiable(None, Some(CausalEvidenceGrade::Rct)),
257            Identifiability::Underdetermined
258        );
259    }
260
261    #[test]
262    fn correlation_identified_under_any_grade() {
263        for g in [
264            CausalEvidenceGrade::Theoretical,
265            CausalEvidenceGrade::Observational,
266            CausalEvidenceGrade::QuasiExperimental,
267            CausalEvidenceGrade::Rct,
268        ] {
269            assert_eq!(
270                is_identifiable(Some(CausalClaim::Correlation), Some(g)),
271                Identifiability::Identified,
272                "correlation under {g:?} should be identified"
273            );
274        }
275    }
276
277    #[test]
278    fn rct_identifies_any_claim() {
279        for c in [
280            CausalClaim::Correlation,
281            CausalClaim::Mediation,
282            CausalClaim::Intervention,
283        ] {
284            assert_eq!(
285                is_identifiable(Some(c), Some(CausalEvidenceGrade::Rct)),
286                Identifiability::Identified,
287                "RCT should identify {c:?}"
288            );
289        }
290    }
291
292    #[test]
293    fn intervention_observational_underidentified() {
294        assert_eq!(
295            is_identifiable(
296                Some(CausalClaim::Intervention),
297                Some(CausalEvidenceGrade::Observational)
298            ),
299            Identifiability::Underidentified
300        );
301    }
302
303    #[test]
304    fn intervention_quasi_experimental_conditional() {
305        assert_eq!(
306            is_identifiable(
307                Some(CausalClaim::Intervention),
308                Some(CausalEvidenceGrade::QuasiExperimental)
309            ),
310            Identifiability::Conditional
311        );
312    }
313
314    #[test]
315    fn mediation_observational_underidentified() {
316        assert_eq!(
317            is_identifiable(
318                Some(CausalClaim::Mediation),
319                Some(CausalEvidenceGrade::Observational)
320            ),
321            Identifiability::Underidentified
322        );
323    }
324
325    #[test]
326    fn needs_reviewer_attention_only_for_problem_verdicts() {
327        assert!(!Identifiability::Identified.needs_reviewer_attention());
328        assert!(!Identifiability::Underdetermined.needs_reviewer_attention());
329        assert!(Identifiability::Conditional.needs_reviewer_attention());
330        assert!(Identifiability::Underidentified.needs_reviewer_attention());
331    }
332
333    #[test]
334    fn audit_remediation_intervention_observational_suggests_downgrade() {
335        let r = remediation_for(
336            Identifiability::Underidentified,
337            Some(CausalClaim::Intervention),
338        );
339        assert!(r.contains("downgrade"));
340        assert!(r.contains("intervention"));
341    }
342}