1use crate::bundle::FindingBundle;
4use crate::project::Project;
5
6use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
9pub enum Severity {
10 Error,
11 Warning,
12 Info,
13}
14
15impl std::fmt::Display for Severity {
16 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
17 match self {
18 Severity::Error => write!(f, "error"),
19 Severity::Warning => write!(f, "warning"),
20 Severity::Info => write!(f, "info"),
21 }
22 }
23}
24
25impl std::str::FromStr for Severity {
26 type Err = String;
27 fn from_str(s: &str) -> Result<Self, Self::Err> {
28 match s.to_lowercase().as_str() {
29 "error" => Ok(Severity::Error),
30 "warning" => Ok(Severity::Warning),
31 "info" => Ok(Severity::Info),
32 _ => Err(format!(
33 "Unknown severity: {s}. Use error, warning, or info."
34 )),
35 }
36 }
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct LintRule {
41 pub id: String,
42 pub name: String,
43 pub severity: Severity,
44 pub description: String,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct Diagnostic {
49 pub rule_id: String,
50 pub finding_id: String,
51 pub message: String,
52 pub suggestion: String,
53 pub severity: Severity,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct LintReport {
58 pub diagnostics: Vec<Diagnostic>,
59 pub findings_checked: usize,
60 pub errors: usize,
61 pub warnings: usize,
62 pub infos: usize,
63}
64
65pub fn all_rules() -> Vec<LintRule> {
67 vec![
68 LintRule {
69 id: "L001".into(),
70 name: "small_sample".into(),
71 severity: Severity::Warning,
72 description: "Experimental finding with sample size < 10".into(),
73 },
74 LintRule {
75 id: "L002".into(),
76 name: "no_replication".into(),
77 severity: Severity::Warning,
78 description: "High-confidence finding without replication".into(),
79 },
80 LintRule {
81 id: "L003".into(),
82 name: "missing_species".into(),
83 severity: Severity::Warning,
84 description: "Experimental finding without species information".into(),
85 },
86 LintRule {
87 id: "L004".into(),
88 name: "confidence_mismatch".into(),
89 severity: Severity::Warning,
90 description: "Theoretical finding with unusually high confidence".into(),
91 },
92 LintRule {
93 id: "L005".into(),
94 name: "unreported_effect".into(),
95 severity: Severity::Warning,
96 description: "P-value reported but no effect size".into(),
97 },
98 LintRule {
99 id: "L006".into(),
100 name: "p_boundary".into(),
101 severity: Severity::Info,
102 description: "P-value near significance boundary (0.04-0.06)".into(),
103 },
104 LintRule {
105 id: "L007".into(),
106 name: "missing_controls".into(),
107 severity: Severity::Warning,
108 description: "Experimental finding with no mention of controls".into(),
109 },
110 LintRule {
111 id: "L008".into(),
112 name: "multiple_comparisons".into(),
113 severity: Severity::Warning,
114 description: "Multiple evidence spans without correction for multiple comparisons"
115 .into(),
116 },
117 LintRule {
118 id: "L009".into(),
119 name: "cherry_picking".into(),
120 severity: Severity::Warning,
121 description:
122 "Same DOI has findings with mixed significance, potential selective reporting"
123 .into(),
124 },
125 LintRule {
126 id: "L010".into(),
127 name: "wrong_test".into(),
128 severity: Severity::Warning,
129 description: "T-test used when multiple groups are mentioned".into(),
130 },
131 LintRule {
132 id: "L011".into(),
133 name: "causal_mismatch_supports".into(),
134 severity: Severity::Warning,
135 description:
136 "A `supports` link from a weaker causal claim (correlation) to a stronger one (intervention) is a category error: correlation alone cannot support a causal claim."
137 .into(),
138 },
139 ]
140}
141
142fn parse_sample_size(s: &str) -> Option<u32> {
143 let cleaned = s.trim().to_lowercase();
145 let cleaned = cleaned.strip_prefix("n=").unwrap_or(&cleaned);
146 cleaned
147 .split(|c: char| !c.is_ascii_digit())
148 .next()?
149 .parse()
150 .ok()
151}
152
153fn parse_p_value(s: &str) -> Option<f64> {
154 let cleaned = s.trim().to_lowercase();
155 let cleaned = cleaned
156 .strip_prefix("p=")
157 .or_else(|| cleaned.strip_prefix("p ="))
158 .or_else(|| cleaned.strip_prefix("p<"))
159 .or_else(|| cleaned.strip_prefix("p < "))
160 .unwrap_or(&cleaned);
161 cleaned
162 .split(|c: char| !c.is_ascii_digit() && c != '.')
163 .next()?
164 .parse()
165 .ok()
166}
167
168fn has_abstract_only_caveat(finding: &FindingBundle) -> bool {
169 finding.annotations.iter().any(|annotation| {
170 let text = annotation.text.to_lowercase();
171 text.contains("abstract-only") || text.contains("title and abstract only")
172 })
173}
174
175pub fn check_sample_size(finding: &FindingBundle) -> Vec<Diagnostic> {
176 let mut diags = Vec::new();
177 if (finding.evidence.evidence_type == "experimental"
178 || finding.evidence.evidence_type == "observational")
179 && let Some(ref ss) = finding.evidence.sample_size
180 && let Some(n) = parse_sample_size(ss)
181 && n < 10
182 {
183 diags.push(Diagnostic {
184 rule_id: "L001".into(),
185 finding_id: finding.id.clone(),
186 message: format!("Sample size {} is below minimum threshold of 10", n),
187 suggestion: "Consider whether this finding has adequate statistical power".into(),
188 severity: Severity::Warning,
189 });
190 }
191 diags
192}
193
194pub fn check_no_replication(finding: &FindingBundle) -> Vec<Diagnostic> {
195 let mut diags = Vec::new();
196 if finding.confidence.score > 0.8 && !finding.evidence.replicated {
197 diags.push(Diagnostic {
198 rule_id: "L002".into(),
199 finding_id: finding.id.clone(),
200 message: format!(
201 "Confidence {:.2} but no replication reported",
202 finding.confidence.score
203 ),
204 suggestion: "High-confidence claims should have independent replication".into(),
205 severity: Severity::Warning,
206 });
207 }
208 diags
209}
210
211pub fn check_missing_species(finding: &FindingBundle) -> Vec<Diagnostic> {
212 let mut diags = Vec::new();
213 if finding.evidence.evidence_type == "experimental"
214 && finding.evidence.species.is_none()
215 && !has_abstract_only_caveat(finding)
216 {
217 diags.push(Diagnostic {
218 rule_id: "L003".into(),
219 finding_id: finding.id.clone(),
220 message: "Experimental finding without species information".into(),
221 suggestion: "Specify the species or model organism used".into(),
222 severity: Severity::Warning,
223 });
224 }
225 diags
226}
227
228pub fn check_confidence_mismatch(finding: &FindingBundle) -> Vec<Diagnostic> {
229 let mut diags = Vec::new();
230 if finding.assertion.assertion_type == "theoretical" && finding.confidence.score > 0.9 {
231 diags.push(Diagnostic {
232 rule_id: "L004".into(),
233 finding_id: finding.id.clone(),
234 message: format!(
235 "Theoretical assertion with confidence {:.2} — unusually high for unvalidated theory",
236 finding.confidence.score
237 ),
238 suggestion: "Theoretical findings typically warrant lower confidence until experimentally validated".into(),
239 severity: Severity::Warning,
240 });
241 }
242 diags
243}
244
245pub fn check_unreported_effect(finding: &FindingBundle) -> Vec<Diagnostic> {
246 let mut diags = Vec::new();
247 if finding.evidence.p_value.is_some() && finding.evidence.effect_size.is_none() {
248 diags.push(Diagnostic {
249 rule_id: "L005".into(),
250 finding_id: finding.id.clone(),
251 message: "P-value reported but effect size is missing".into(),
252 suggestion: "Report effect size (Cohen's d, odds ratio, etc.) alongside p-value".into(),
253 severity: Severity::Warning,
254 });
255 }
256 diags
257}
258
259pub fn check_p_boundary(finding: &FindingBundle) -> Vec<Diagnostic> {
260 let mut diags = Vec::new();
261 if let Some(ref pv) = finding.evidence.p_value
262 && let Some(p) = parse_p_value(pv)
263 && (0.04..=0.06).contains(&p)
264 {
265 diags.push(Diagnostic {
266 rule_id: "L006".into(),
267 finding_id: finding.id.clone(),
268 message: format!("P-value {:.4} is near the significance boundary", p),
269 suggestion: "Consider this finding borderline; report exact p-value and effect size"
270 .into(),
271 severity: Severity::Info,
272 });
273 }
274 diags
275}
276
277pub fn check_missing_controls(finding: &FindingBundle) -> Vec<Diagnostic> {
278 let mut diags = Vec::new();
279 if finding.evidence.evidence_type == "experimental" && !has_abstract_only_caveat(finding) {
280 let method_lower = finding.evidence.method.to_lowercase();
281 if !method_lower.contains("control")
282 && !method_lower.contains("sham")
283 && !method_lower.contains("vehicle")
284 && !method_lower.contains("placebo")
285 {
286 diags.push(Diagnostic {
287 rule_id: "L007".into(),
288 finding_id: finding.id.clone(),
289 message: "Experimental finding with no mention of controls in method".into(),
290 suggestion: "Document the control condition used (vehicle, sham, placebo, etc.)"
291 .into(),
292 severity: Severity::Warning,
293 });
294 }
295 }
296 diags
297}
298
299pub fn check_multiple_comparisons(finding: &FindingBundle) -> Vec<Diagnostic> {
300 let mut diags = Vec::new();
301 if finding.evidence.evidence_spans.len() > 3 {
302 let method_lower = finding.evidence.method.to_lowercase();
303 if !method_lower.contains("correction")
304 && !method_lower.contains("bonferroni")
305 && !method_lower.contains("holm")
306 && !method_lower.contains("fdr")
307 && !method_lower.contains("benjamini")
308 && !method_lower.contains("tukey")
309 {
310 diags.push(Diagnostic {
311 rule_id: "L008".into(),
312 finding_id: finding.id.clone(),
313 message: format!(
314 "{} evidence spans without mention of multiple comparison correction",
315 finding.evidence.evidence_spans.len()
316 ),
317 suggestion: "Apply Bonferroni, FDR, or other appropriate correction for multiple comparisons".into(),
318 severity: Severity::Warning,
319 });
320 }
321 }
322 diags
323}
324
325pub fn check_cherry_picking(frontier: &Project) -> Vec<Diagnostic> {
326 use std::collections::HashMap;
327
328 let mut doi_findings: HashMap<String, Vec<&FindingBundle>> = HashMap::new();
329 for f in &frontier.findings {
330 if let Some(ref doi) = f.provenance.doi {
331 doi_findings.entry(doi.clone()).or_default().push(f);
332 }
333 }
334
335 let mut diags = Vec::new();
336 for (doi, findings) in &doi_findings {
337 if findings.len() < 2 {
338 continue;
339 }
340 let has_significant = findings.iter().any(|f| {
341 f.evidence
342 .p_value
343 .as_ref()
344 .and_then(|pv| parse_p_value(pv))
345 .is_some_and(|p| p < 0.05)
346 });
347 let has_nonsignificant = findings.iter().any(|f| {
348 f.evidence
349 .p_value
350 .as_ref()
351 .and_then(|pv| parse_p_value(pv))
352 .is_some_and(|p| p >= 0.05)
353 });
354 if has_significant && has_nonsignificant {
355 for f in findings {
356 diags.push(Diagnostic {
357 rule_id: "L009".into(),
358 finding_id: f.id.clone(),
359 message: format!("DOI {} has findings with mixed significance", doi),
360 suggestion: "Verify all findings from this paper are reported, not just significant ones".into(),
361 severity: Severity::Warning,
362 });
363 }
364 }
365 }
366 diags
367}
368
369pub fn check_wrong_test(finding: &FindingBundle) -> Vec<Diagnostic> {
370 let mut diags = Vec::new();
371 let method_lower = finding.evidence.method.to_lowercase();
372 if method_lower.contains("t-test") || method_lower.contains("t test") {
373 let assertion_lower = finding.assertion.text.to_lowercase();
374 let multi_group = assertion_lower.contains("groups")
375 || assertion_lower.contains("three")
376 || assertion_lower.contains("four")
377 || assertion_lower.contains("multiple")
378 || assertion_lower.contains("several")
379 || finding.assertion.entities.len() > 3;
380 if multi_group {
381 diags.push(Diagnostic {
382 rule_id: "L010".into(),
383 finding_id: finding.id.clone(),
384 message: "T-test used but multiple groups appear to be compared".into(),
385 suggestion: "Use ANOVA or Kruskal-Wallis for comparisons across >2 groups".into(),
386 severity: Severity::Warning,
387 });
388 }
389 }
390 diags
391}
392
393pub fn check_causal_mismatch_on_supports(frontier: &Project) -> Vec<Diagnostic> {
403 use crate::bundle::CausalClaim;
404 use std::collections::HashMap;
405
406 let claim_rank = |c: CausalClaim| -> u32 {
407 match c {
408 CausalClaim::Correlation => 1,
409 CausalClaim::Mediation => 2,
410 CausalClaim::Intervention => 3,
411 }
412 };
413 let claim_name = |c: CausalClaim| -> &'static str {
414 match c {
415 CausalClaim::Correlation => "correlation",
416 CausalClaim::Mediation => "mediation",
417 CausalClaim::Intervention => "intervention",
418 }
419 };
420
421 let by_id: HashMap<&str, &FindingBundle> = frontier
422 .findings
423 .iter()
424 .map(|f| (f.id.as_str(), f))
425 .collect();
426
427 let mut diags = Vec::new();
428 for source in &frontier.findings {
429 let Some(source_claim) = source.assertion.causal_claim else {
430 continue;
431 };
432 for link in &source.links {
433 if link.link_type != "supports" {
434 continue;
435 }
436 let Some(target) = by_id.get(link.target.as_str()) else {
437 continue; };
439 let Some(target_claim) = target.assertion.causal_claim else {
440 continue;
441 };
442 if claim_rank(source_claim) < claim_rank(target_claim) {
443 diags.push(Diagnostic {
444 rule_id: "L011".into(),
445 finding_id: source.id.clone(),
446 message: format!(
447 "{src} (claim: {sc}) supports→ {tgt} (claim: {tc}); the source's design cannot bear the target's causal weight",
448 src = source.id,
449 sc = claim_name(source_claim),
450 tgt = target.id,
451 tc = claim_name(target_claim),
452 ),
453 suggestion: format!(
454 "Either re-grade {src} to {tc} (with appropriate evidence) or re-type the link from `supports` to `correlates_with` / `extends` / a weaker relationship.",
455 src = source.id,
456 tc = claim_name(target_claim),
457 ),
458 severity: Severity::Warning,
459 });
460 }
461 }
462 }
463 diags
464}
465
466pub fn lint_frontier(frontier: &Project) -> LintReport {
469 use std::collections::{HashMap, HashSet};
470
471 let mut diagnostics = Vec::new();
472 let finding_count = frontier.findings.len();
473
474 let finding_ids: HashSet<&str> = frontier.findings.iter().map(|f| f.id.as_str()).collect();
476 let finding_map: HashMap<&str, &FindingBundle> = frontier
478 .findings
479 .iter()
480 .map(|f| (f.id.as_str(), f))
481 .collect();
482
483 let mut inbound_count: HashMap<&str, usize> = HashMap::new();
485 let mut contradiction_pairs: Vec<(&str, &str)> = Vec::new();
487 let mut superseded_targets: HashSet<&str> = HashSet::new();
488 let mut dependent_count: HashMap<&str, usize> = HashMap::new();
490
491 for f in &frontier.findings {
492 for link in &f.links {
493 if finding_ids.contains(link.target.as_str()) {
494 *inbound_count.entry(link.target.as_str()).or_insert(0) += 1;
495 *dependent_count.entry(link.target.as_str()).or_insert(0) += 1;
496
497 if link.link_type == "contradicts" {
498 contradiction_pairs.push((f.id.as_str(), link.target.as_str()));
499 }
500 if link.link_type == "supersedes" {
501 superseded_targets.insert(link.target.as_str());
502 }
503 }
504 }
505 }
506
507 for f in &frontier.findings {
509 if inbound_count.get(f.id.as_str()).copied().unwrap_or(0) == 0 {
510 diagnostics.push(Diagnostic {
511 rule_id: "orphan".into(),
512 finding_id: f.id.clone(),
513 message: "Finding has no inbound links — may be disconnected from the graph".into(),
514 suggestion:
515 "Consider linking this finding to related findings, or mark as a seed finding"
516 .into(),
517 severity: Severity::Info,
518 });
519 }
520 }
521
522 for (id_a, id_b) in &contradiction_pairs {
524 let a = finding_map.get(id_a);
525 let b = finding_map.get(id_b);
526 if let (Some(a), Some(b)) = (a, b)
527 && !a.flags.retracted
528 && !b.flags.retracted
529 {
530 let reviewed_tension = a.flags.contested
531 || b.flags.contested
532 || a.flags.review_state.is_some()
533 || b.flags.review_state.is_some();
534 diagnostics.push(Diagnostic {
535 rule_id: "unresolved_contradiction".into(),
536 finding_id: id_a.to_string(),
537 message: format!("Contradiction between {} and {} has no resolution", id_a, id_b),
538 suggestion: "Review both findings and either retract one, adjust confidence, or add a resolution note".into(),
539 severity: if reviewed_tension { Severity::Info } else { Severity::Warning },
540 });
541 }
542 }
543
544 for f in &frontier.findings {
546 if f.flags.gap {
547 let deps = dependent_count.get(f.id.as_str()).copied().unwrap_or(0);
548 if deps > 0 {
549 diagnostics.push(Diagnostic {
550 rule_id: "critical_gap".into(),
551 finding_id: f.id.clone(),
552 message: format!(
553 "Gap finding has {} dependents — high-value experiment target",
554 deps
555 ),
556 suggestion: format!("Prioritize investigating this gap: {}", f.assertion.text),
557 severity: Severity::Warning,
558 });
559 }
560 }
561 }
562
563 for f in &frontier.findings {
565 if f.confidence.score < 0.6 {
566 let deps = dependent_count.get(f.id.as_str()).copied().unwrap_or(0);
567 if deps > 5 {
568 let severity = if has_abstract_only_caveat(f) {
569 Severity::Info
570 } else {
571 Severity::Warning
572 };
573 diagnostics.push(Diagnostic {
574 rule_id: "fragile_anchor".into(),
575 finding_id: f.id.clone(),
576 message: format!("Low-confidence finding ({:.2}) supports {} other findings", f.confidence.score, deps),
577 suggestion: "This finding is a fragile anchor — seek replication or higher-quality evidence".into(),
578 severity,
579 });
580 }
581 }
582 }
583
584 let mut entity_findings: HashMap<String, Vec<&str>> = HashMap::new();
586 for f in &frontier.findings {
587 for entity in &f.assertion.entities {
588 let key = entity.name.to_lowercase();
589 entity_findings.entry(key).or_default().push(f.id.as_str());
590 }
591 }
592
593 for (entity_name, fids) in &entity_findings {
594 if fids.len() >= 3 {
595 let fid_set: HashSet<&str> = fids.iter().copied().collect();
597 let mut has_any_link = false;
598 'outer: for &fid in fids {
599 if let Some(f) = finding_map.get(fid) {
600 for link in &f.links {
601 if fid_set.contains(link.target.as_str()) {
602 has_any_link = true;
603 break 'outer;
604 }
605 }
606 }
607 }
608 if !has_any_link {
609 let display_name = frontier
611 .findings
612 .iter()
613 .flat_map(|f| f.assertion.entities.iter())
614 .find(|e| e.name.to_lowercase() == *entity_name)
615 .map(|e| e.name.clone())
616 .unwrap_or_else(|| entity_name.clone());
617
618 diagnostics.push(Diagnostic {
619 rule_id: "missing_crossref".into(),
620 finding_id: fids[0].to_string(),
621 message: format!(
622 "Entity '{}' appears in {} findings with no links between them",
623 display_name,
624 fids.len()
625 ),
626 suggestion: format!(
627 "Consider adding typed links between findings that share entity '{}'",
628 display_name
629 ),
630 severity: Severity::Info,
631 });
632 }
633 }
634 }
635
636 for &target_id in &superseded_targets {
638 if let Some(f) = finding_map.get(target_id)
639 && !f.flags.retracted
640 && f.confidence.score >= 0.6
641 {
642 diagnostics.push(Diagnostic {
643 rule_id: "stale_superseded".into(),
644 finding_id: target_id.to_string(),
645 message: "Finding has been superseded but confidence hasn't been adjusted".into(),
646 suggestion: "Lower confidence or mark as superseded".into(),
647 severity: Severity::Warning,
648 });
649 }
650 }
651
652 let errors = diagnostics
653 .iter()
654 .filter(|d| d.severity == Severity::Error)
655 .count();
656 let warnings = diagnostics
657 .iter()
658 .filter(|d| d.severity == Severity::Warning)
659 .count();
660 let infos = diagnostics
661 .iter()
662 .filter(|d| d.severity == Severity::Info)
663 .count();
664
665 LintReport {
666 diagnostics,
667 findings_checked: finding_count,
668 errors,
669 warnings,
670 infos,
671 }
672}
673
674pub fn lint(
676 frontier: &Project,
677 rule_filter: Option<&str>,
678 severity_filter: Option<&str>,
679) -> LintReport {
680 let rules = all_rules();
681 let severity_threshold: Option<Severity> = severity_filter.and_then(|s| s.parse().ok());
682
683 let mut diagnostics = Vec::new();
684
685 for finding in &frontier.findings {
687 let mut finding_diags = Vec::new();
688 finding_diags.extend(check_sample_size(finding));
689 finding_diags.extend(check_no_replication(finding));
690 finding_diags.extend(check_missing_species(finding));
691 finding_diags.extend(check_confidence_mismatch(finding));
692 finding_diags.extend(check_unreported_effect(finding));
693 finding_diags.extend(check_p_boundary(finding));
694 finding_diags.extend(check_missing_controls(finding));
695 finding_diags.extend(check_multiple_comparisons(finding));
696 finding_diags.extend(check_wrong_test(finding));
697 diagnostics.extend(finding_diags);
698 }
699
700 diagnostics.extend(check_cherry_picking(frontier));
702 diagnostics.extend(check_causal_mismatch_on_supports(frontier));
703
704 if let Some(rule_id) = rule_filter {
706 let matching_ids: Vec<&str> = rules
708 .iter()
709 .filter(|r| r.id == rule_id || r.name == rule_id)
710 .map(|r| r.id.as_str())
711 .collect();
712 diagnostics.retain(|d| matching_ids.contains(&d.rule_id.as_str()));
713 }
714
715 if let Some(ref sev) = severity_threshold {
716 diagnostics.retain(|d| d.severity == *sev);
717 }
718
719 let errors = diagnostics
720 .iter()
721 .filter(|d| d.severity == Severity::Error)
722 .count();
723 let warnings = diagnostics
724 .iter()
725 .filter(|d| d.severity == Severity::Warning)
726 .count();
727 let infos = diagnostics
728 .iter()
729 .filter(|d| d.severity == Severity::Info)
730 .count();
731
732 LintReport {
733 diagnostics,
734 findings_checked: frontier.findings.len(),
735 errors,
736 warnings,
737 infos,
738 }
739}
740
741pub fn print_report(report: &LintReport) {
743 if report.diagnostics.is_empty() {
744 println!(
745 "No issues found across {} findings.",
746 report.findings_checked
747 );
748 return;
749 }
750
751 for d in &report.diagnostics {
752 let severity_str = match d.severity {
753 Severity::Error => "ERROR",
754 Severity::Warning => "WARN ",
755 Severity::Info => "INFO ",
756 };
757 println!(
758 "[{}] {} ({}): {}",
759 severity_str, d.finding_id, d.rule_id, d.message
760 );
761 println!(" suggestion: {}", d.suggestion);
762 }
763
764 println!(
765 "\n{} findings checked: {} errors, {} warnings, {} info",
766 report.findings_checked, report.errors, report.warnings, report.infos,
767 );
768}
769
770#[cfg(test)]
771mod tests {
772 use super::*;
773 use crate::bundle::*;
774
775 fn make_finding(id: &str) -> FindingBundle {
776 FindingBundle {
777 id: id.to_string(),
778 version: 1,
779 previous_version: None,
780 assertion: Assertion {
781 text: "Test assertion".into(),
782 assertion_type: "mechanism".into(),
783 entities: vec![],
784 relation: None,
785 direction: Some("positive".into()),
786 causal_claim: None,
787 causal_evidence_grade: None,
788 },
789 evidence: Evidence {
790 evidence_type: "experimental".into(),
791 model_system: "cell_culture".into(),
792 species: Some("Homo sapiens".into()),
793 method: "Western blot with control group".into(),
794 sample_size: Some("n=30".into()),
795 effect_size: Some("d=0.8".into()),
796 p_value: Some("p=0.01".into()),
797 replicated: true,
798 replication_count: Some(2),
799 evidence_spans: vec![],
800 },
801 conditions: Conditions {
802 text: "Standard conditions".into(),
803 species_verified: vec!["Homo sapiens".into()],
804 species_unverified: vec![],
805 in_vitro: true,
806 in_vivo: false,
807 human_data: false,
808 clinical_trial: false,
809 concentration_range: None,
810 duration: None,
811 age_group: None,
812 cell_type: None,
813 },
814 confidence: Confidence::raw(0.75, "experimental evidence", 0.9),
815 provenance: Provenance {
816 source_type: "published_paper".into(),
817 doi: Some("10.1234/test".into()),
818 pmid: None,
819 pmc: None,
820 openalex_id: None,
821 url: None,
822 title: "Test paper".into(),
823 authors: vec![],
824 year: Some(2024),
825 journal: Some("Test Journal".into()),
826 license: None,
827 publisher: None,
828 funders: vec![],
829 extraction: Extraction::default(),
830 review: None,
831 citation_count: None,
832 },
833 flags: Flags {
834 gap: false,
835 negative_space: false,
836 contested: false,
837 retracted: false,
838 declining: false,
839 gravity_well: false,
840 review_state: None,
841 superseded: false,
842 signature_threshold: None,
843 jointly_accepted: false,
844 },
845 links: vec![],
846 annotations: vec![],
847 attachments: vec![],
848 created: "2024-01-01T00:00:00Z".into(),
849 updated: None,
850
851 access_tier: crate::access_tier::AccessTier::Public,
852 }
853 }
854
855 fn make_frontier(findings: Vec<FindingBundle>) -> Project {
856 use crate::project::*;
857 use std::collections::HashMap;
858 Project {
859 vela_version: "0.1.0".into(),
860 schema: "vela/finding-bundle/0.1.0".into(),
861 frontier_id: None,
862 project: ProjectMeta {
863 name: "test".into(),
864 description: "test frontier".into(),
865 compiled_at: "2024-01-01T00:00:00Z".into(),
866 compiler: "vela/0.2.0".into(),
867 papers_processed: 1,
868 errors: 0,
869 dependencies: vec![],
870 },
871 stats: ProjectStats {
872 findings: findings.len(),
873 links: 0,
874 replicated: 0,
875 unreplicated: 0,
876 avg_confidence: 0.7,
877 gaps: 0,
878 negative_space: 0,
879 contested: 0,
880 categories: HashMap::new(),
881 link_types: HashMap::new(),
882 human_reviewed: 0,
883 review_event_count: 0,
884 confidence_update_count: 0,
885 event_count: 0,
886 source_count: 0,
887 evidence_atom_count: 0,
888 condition_record_count: 0,
889 proposal_count: 0,
890 confidence_distribution: ConfidenceDistribution {
891 high_gt_80: 0,
892 medium_60_80: 0,
893 low_lt_60: 0,
894 },
895 },
896 findings,
897 sources: vec![],
898 evidence_atoms: vec![],
899 condition_records: vec![],
900 review_events: vec![],
901 confidence_updates: vec![],
902 events: vec![],
903 proposals: vec![],
904 proof_state: Default::default(),
905 signatures: vec![],
906 actors: Vec::new(),
907 replications: Vec::new(),
908 datasets: Vec::new(),
909 code_artifacts: Vec::new(),
910 artifacts: Vec::new(),
911 predictions: Vec::new(),
912 resolutions: Vec::new(),
913 peers: vec![],
914 negative_results: vec![],
915 trajectories: vec![],
916 }
917 }
918
919 #[test]
920 fn all_rules_count() {
921 assert_eq!(all_rules().len(), 11);
922 }
923
924 #[test]
925 fn check_sample_size_small() {
926 let mut f = make_finding("vf_001");
927 f.evidence.sample_size = Some("n=5".into());
928 let diags = check_sample_size(&f);
929 assert_eq!(diags.len(), 1);
930 assert_eq!(diags[0].rule_id, "L001");
931 }
932
933 #[test]
934 fn check_sample_size_adequate() {
935 let f = make_finding("vf_002"); let diags = check_sample_size(&f);
937 assert!(diags.is_empty());
938 }
939
940 #[test]
941 fn check_no_replication_high_confidence() {
942 let mut f = make_finding("vf_003");
943 f.confidence.score = 0.9;
944 f.evidence.replicated = false;
945 let diags = check_no_replication(&f);
946 assert_eq!(diags.len(), 1);
947 assert_eq!(diags[0].rule_id, "L002");
948 }
949
950 #[test]
951 fn check_no_replication_ok() {
952 let mut f = make_finding("vf_004");
953 f.confidence.score = 0.9;
954 f.evidence.replicated = true;
955 let diags = check_no_replication(&f);
956 assert!(diags.is_empty());
957 }
958
959 #[test]
960 fn check_missing_species_experimental() {
961 let mut f = make_finding("vf_005");
962 f.evidence.species = None;
963 let diags = check_missing_species(&f);
964 assert_eq!(diags.len(), 1);
965 assert_eq!(diags[0].rule_id, "L003");
966 }
967
968 #[test]
969 fn check_confidence_mismatch_theoretical() {
970 let mut f = make_finding("vf_006");
971 f.assertion.assertion_type = "theoretical".into();
972 f.confidence.score = 0.95;
973 let diags = check_confidence_mismatch(&f);
974 assert_eq!(diags.len(), 1);
975 assert_eq!(diags[0].rule_id, "L004");
976 }
977
978 #[test]
979 fn check_unreported_effect_size() {
980 let mut f = make_finding("vf_007");
981 f.evidence.p_value = Some("p=0.01".into());
982 f.evidence.effect_size = None;
983 let diags = check_unreported_effect(&f);
984 assert_eq!(diags.len(), 1);
985 assert_eq!(diags[0].rule_id, "L005");
986 }
987
988 #[test]
989 fn check_p_boundary_near() {
990 let mut f = make_finding("vf_008");
991 f.evidence.p_value = Some("p=0.049".into());
992 let diags = check_p_boundary(&f);
993 assert_eq!(diags.len(), 1);
994 assert_eq!(diags[0].rule_id, "L006");
995 }
996
997 #[test]
998 fn check_p_boundary_clear() {
999 let mut f = make_finding("vf_009");
1000 f.evidence.p_value = Some("p=0.001".into());
1001 let diags = check_p_boundary(&f);
1002 assert!(diags.is_empty());
1003 }
1004
1005 #[test]
1006 fn check_missing_controls_no_mention() {
1007 let mut f = make_finding("vf_010");
1008 f.evidence.method = "Western blot".into();
1009 let diags = check_missing_controls(&f);
1010 assert_eq!(diags.len(), 1);
1011 assert_eq!(diags[0].rule_id, "L007");
1012 }
1013
1014 #[test]
1015 fn check_multiple_comparisons_many_spans() {
1016 let mut f = make_finding("vf_011");
1017 f.evidence.evidence_spans = vec![
1018 serde_json::json!("span1"),
1019 serde_json::json!("span2"),
1020 serde_json::json!("span3"),
1021 serde_json::json!("span4"),
1022 ];
1023 f.evidence.method = "ANOVA".into();
1024 let diags = check_multiple_comparisons(&f);
1025 assert_eq!(diags.len(), 1);
1026 assert_eq!(diags[0].rule_id, "L008");
1027 }
1028
1029 #[test]
1030 fn check_multiple_comparisons_with_correction() {
1031 let mut f = make_finding("vf_012");
1032 f.evidence.evidence_spans = vec![
1033 serde_json::json!("span1"),
1034 serde_json::json!("span2"),
1035 serde_json::json!("span3"),
1036 serde_json::json!("span4"),
1037 ];
1038 f.evidence.method = "ANOVA with Bonferroni correction".into();
1039 let diags = check_multiple_comparisons(&f);
1040 assert!(diags.is_empty());
1041 }
1042
1043 #[test]
1044 fn check_wrong_test_multiple_groups() {
1045 let mut f = make_finding("vf_013");
1046 f.evidence.method = "Student's t-test".into();
1047 f.assertion.text = "Comparison across three groups shows difference".into();
1048 let diags = check_wrong_test(&f);
1049 assert_eq!(diags.len(), 1);
1050 assert_eq!(diags[0].rule_id, "L010");
1051 }
1052
1053 #[test]
1054 fn check_cherry_picking_mixed_significance() {
1055 let mut f1 = make_finding("vf_014a");
1056 f1.provenance.doi = Some("10.1234/mixed".into());
1057 f1.evidence.p_value = Some("p=0.01".into());
1058
1059 let mut f2 = make_finding("vf_014b");
1060 f2.provenance.doi = Some("10.1234/mixed".into());
1061 f2.evidence.p_value = Some("p=0.15".into());
1062
1063 let frontier = make_frontier(vec![f1, f2]);
1064 let diags = check_cherry_picking(&frontier);
1065 assert_eq!(diags.len(), 2);
1066 assert!(diags.iter().all(|d| d.rule_id == "L009"));
1067 }
1068
1069 #[test]
1070 fn lint_with_rule_filter() {
1071 let mut f = make_finding("vf_015");
1072 f.evidence.sample_size = Some("n=3".into());
1073 f.evidence.species = None;
1074 let frontier = make_frontier(vec![f]);
1075 let report = lint(&frontier, Some("L001"), None);
1076 assert!(report.diagnostics.iter().all(|d| d.rule_id == "L001"));
1077 }
1078
1079 #[test]
1080 fn lint_with_severity_filter() {
1081 let mut f = make_finding("vf_016");
1082 f.evidence.p_value = Some("p=0.05".into());
1083 f.evidence.effect_size = None;
1084 let frontier = make_frontier(vec![f]);
1085 let report = lint(&frontier, None, Some("info"));
1086 assert!(
1087 report
1088 .diagnostics
1089 .iter()
1090 .all(|d| d.severity == Severity::Info)
1091 );
1092 }
1093
1094 #[test]
1095 fn lint_clean_finding() {
1096 let f = make_finding("vf_clean");
1097 let frontier = make_frontier(vec![f]);
1098 let report = lint(&frontier, None, None);
1099 assert_eq!(report.errors, 0);
1100 assert_eq!(report.warnings, 0);
1101 }
1102
1103 #[test]
1104 fn parse_sample_size_variants() {
1105 assert_eq!(parse_sample_size("n=24"), Some(24));
1106 assert_eq!(parse_sample_size("24 patients"), Some(24));
1107 assert_eq!(parse_sample_size("5"), Some(5));
1108 assert_eq!(parse_sample_size("n=100"), Some(100));
1109 }
1110
1111 #[test]
1112 fn parse_p_value_variants() {
1113 assert!((parse_p_value("p=0.05").unwrap() - 0.05).abs() < 0.001);
1114 assert!((parse_p_value("p<0.001").unwrap() - 0.001).abs() < 0.0001);
1115 assert!((parse_p_value("0.03").unwrap() - 0.03).abs() < 0.001);
1116 }
1117
1118 #[test]
1121 fn lint_frontier_orphan_findings() {
1122 let f1 = make_finding("vf_a");
1123 let mut f2 = make_finding("vf_b");
1124 f2.links.push(crate::bundle::Link {
1126 target: "vf_a".into(),
1127 link_type: "supports".into(),
1128 note: "".into(),
1129 inferred_by: "compiler".into(),
1130 created_at: "".into(),
1131 mechanism: None,
1132 });
1133 let frontier = make_frontier(vec![f1, f2]);
1134 let report = lint_frontier(&frontier);
1135 let orphans: Vec<_> = report
1136 .diagnostics
1137 .iter()
1138 .filter(|d| d.rule_id == "orphan")
1139 .collect();
1140 assert!(orphans.iter().any(|d| d.finding_id == "vf_b"));
1142 assert!(!orphans.iter().any(|d| d.finding_id == "vf_a"));
1144 }
1145
1146 #[test]
1147 fn lint_frontier_unresolved_contradiction() {
1148 let mut f1 = make_finding("vf_c");
1149 let f2 = make_finding("vf_d");
1150 f1.links.push(crate::bundle::Link {
1151 target: "vf_d".into(),
1152 link_type: "contradicts".into(),
1153 note: "".into(),
1154 inferred_by: "compiler".into(),
1155 created_at: "".into(),
1156 mechanism: None,
1157 });
1158 let frontier = make_frontier(vec![f1, f2]);
1159 let report = lint_frontier(&frontier);
1160 let contras: Vec<_> = report
1161 .diagnostics
1162 .iter()
1163 .filter(|d| d.rule_id == "unresolved_contradiction")
1164 .collect();
1165 assert_eq!(contras.len(), 1);
1166 assert!(contras[0].message.contains("vf_c"));
1167 assert!(contras[0].message.contains("vf_d"));
1168 }
1169
1170 #[test]
1171 fn lint_frontier_resolved_contradiction_no_warning() {
1172 let mut f1 = make_finding("vf_e");
1173 let mut f2 = make_finding("vf_f");
1174 f1.links.push(crate::bundle::Link {
1175 target: "vf_f".into(),
1176 link_type: "contradicts".into(),
1177 note: "".into(),
1178 inferred_by: "compiler".into(),
1179 created_at: "".into(),
1180 mechanism: None,
1181 });
1182 f2.flags.retracted = true;
1183 let frontier = make_frontier(vec![f1, f2]);
1184 let report = lint_frontier(&frontier);
1185 let contras: Vec<_> = report
1186 .diagnostics
1187 .iter()
1188 .filter(|d| d.rule_id == "unresolved_contradiction")
1189 .collect();
1190 assert_eq!(contras.len(), 0);
1191 }
1192
1193 #[test]
1194 fn lint_frontier_critical_gap() {
1195 let mut gap = make_finding("vf_gap");
1196 gap.flags.gap = true;
1197 let mut f1 = make_finding("vf_dep1");
1198 f1.links.push(crate::bundle::Link {
1199 target: "vf_gap".into(),
1200 link_type: "supports".into(),
1201 note: "".into(),
1202 inferred_by: "compiler".into(),
1203 created_at: "".into(),
1204 mechanism: None,
1205 });
1206 let frontier = make_frontier(vec![gap, f1]);
1207 let report = lint_frontier(&frontier);
1208 let gaps: Vec<_> = report
1209 .diagnostics
1210 .iter()
1211 .filter(|d| d.rule_id == "critical_gap")
1212 .collect();
1213 assert_eq!(gaps.len(), 1);
1214 assert!(gaps[0].message.contains("1 dependents"));
1215 }
1216
1217 #[test]
1218 fn lint_frontier_fragile_anchor() {
1219 let mut anchor = make_finding("vf_anchor");
1220 anchor.confidence.score = 0.4;
1221 let mut findings = vec![anchor];
1223 for i in 0..6 {
1224 let mut f = make_finding(&format!("vf_dep_{}", i));
1225 f.links.push(crate::bundle::Link {
1226 target: "vf_anchor".into(),
1227 link_type: "supports".into(),
1228 note: "".into(),
1229 inferred_by: "compiler".into(),
1230 created_at: "".into(),
1231 mechanism: None,
1232 });
1233 findings.push(f);
1234 }
1235 let frontier = make_frontier(findings);
1236 let report = lint_frontier(&frontier);
1237 let fragile: Vec<_> = report
1238 .diagnostics
1239 .iter()
1240 .filter(|d| d.rule_id == "fragile_anchor")
1241 .collect();
1242 assert_eq!(fragile.len(), 1);
1243 assert!(fragile[0].message.contains("0.40"));
1244 assert!(fragile[0].message.contains("6 other findings"));
1245 }
1246
1247 #[test]
1248 fn lint_frontier_missing_crossref() {
1249 let entity = Entity {
1251 name: "NLRP3".into(),
1252 entity_type: "protein".into(),
1253 identifiers: serde_json::Map::new(),
1254 canonical_id: None,
1255 candidates: vec![],
1256 aliases: vec![],
1257 resolution_provenance: None,
1258 resolution_confidence: 1.0,
1259 resolution_method: None,
1260 species_context: None,
1261 needs_review: false,
1262 };
1263 let mut f1 = make_finding("vf_x1");
1264 f1.assertion.entities = vec![entity.clone()];
1265 let mut f2 = make_finding("vf_x2");
1266 f2.assertion.entities = vec![entity.clone()];
1267 let mut f3 = make_finding("vf_x3");
1268 f3.assertion.entities = vec![entity.clone()];
1269 let frontier = make_frontier(vec![f1, f2, f3]);
1270 let report = lint_frontier(&frontier);
1271 let missing: Vec<_> = report
1272 .diagnostics
1273 .iter()
1274 .filter(|d| d.rule_id == "missing_crossref")
1275 .collect();
1276 assert_eq!(missing.len(), 1);
1277 assert!(missing[0].message.contains("NLRP3"));
1278 assert!(missing[0].message.contains("3 findings"));
1279 }
1280
1281 #[test]
1282 fn lint_frontier_stale_superseded() {
1283 let mut f1 = make_finding("vf_new");
1284 let f2 = make_finding("vf_old");
1285 f1.links.push(crate::bundle::Link {
1287 target: "vf_old".into(),
1288 link_type: "supersedes".into(),
1289 note: "".into(),
1290 inferred_by: "compiler".into(),
1291 created_at: "".into(),
1292 mechanism: None,
1293 });
1294 let frontier = make_frontier(vec![f1, f2]);
1296 let report = lint_frontier(&frontier);
1297 let stale: Vec<_> = report
1298 .diagnostics
1299 .iter()
1300 .filter(|d| d.rule_id == "stale_superseded")
1301 .collect();
1302 assert_eq!(stale.len(), 1);
1303 assert_eq!(stale[0].finding_id, "vf_old");
1304 }
1305
1306 #[test]
1307 fn lint_frontier_clean_frontier() {
1308 let f = make_finding("vf_clean2");
1310 let frontier = make_frontier(vec![f]);
1311 let report = lint_frontier(&frontier);
1312 assert!(report.diagnostics.iter().all(|d| d.rule_id == "orphan"));
1314 assert_eq!(report.errors, 0);
1315 assert_eq!(report.warnings, 0);
1316 }
1317
1318 fn link_supports(target: &str) -> Link {
1321 Link {
1322 target: target.into(),
1323 link_type: "supports".into(),
1324 note: String::new(),
1325 inferred_by: "test".into(),
1326 created_at: String::new(),
1327 mechanism: None,
1328 }
1329 }
1330
1331 #[test]
1332 fn correlation_supports_intervention_flagged() {
1333 let mut weak = make_finding("vf_weak");
1334 weak.assertion.causal_claim = Some(CausalClaim::Correlation);
1335 let mut strong = make_finding("vf_strong");
1336 strong.assertion.causal_claim = Some(CausalClaim::Intervention);
1337 weak.links.push(link_supports("vf_strong"));
1338 let frontier = make_frontier(vec![weak, strong]);
1339 let diags = check_causal_mismatch_on_supports(&frontier);
1340 assert_eq!(diags.len(), 1);
1341 assert_eq!(diags[0].rule_id, "L011");
1342 assert_eq!(diags[0].finding_id, "vf_weak");
1343 }
1344
1345 #[test]
1346 fn correlation_supports_correlation_clean() {
1347 let mut a = make_finding("vf_a");
1348 a.assertion.causal_claim = Some(CausalClaim::Correlation);
1349 let mut b = make_finding("vf_b");
1350 b.assertion.causal_claim = Some(CausalClaim::Correlation);
1351 a.links.push(link_supports("vf_b"));
1352 let frontier = make_frontier(vec![a, b]);
1353 let diags = check_causal_mismatch_on_supports(&frontier);
1354 assert_eq!(diags.len(), 0);
1355 }
1356
1357 #[test]
1358 fn intervention_supports_correlation_clean() {
1359 let mut a = make_finding("vf_a");
1361 a.assertion.causal_claim = Some(CausalClaim::Intervention);
1362 let mut b = make_finding("vf_b");
1363 b.assertion.causal_claim = Some(CausalClaim::Correlation);
1364 a.links.push(link_supports("vf_b"));
1365 let frontier = make_frontier(vec![a, b]);
1366 let diags = check_causal_mismatch_on_supports(&frontier);
1367 assert_eq!(diags.len(), 0);
1368 }
1369
1370 #[test]
1371 fn ungraded_findings_skipped() {
1372 let mut a = make_finding("vf_a");
1374 a.assertion.causal_claim = None;
1375 let mut b = make_finding("vf_b");
1376 b.assertion.causal_claim = Some(CausalClaim::Intervention);
1377 a.links.push(link_supports("vf_b"));
1378 let frontier = make_frontier(vec![a, b]);
1379 let diags = check_causal_mismatch_on_supports(&frontier);
1380 assert_eq!(diags.len(), 0);
1381 }
1382
1383 #[test]
1384 fn non_supports_link_types_ignored() {
1385 let mut a = make_finding("vf_a");
1387 a.assertion.causal_claim = Some(CausalClaim::Correlation);
1388 let mut b = make_finding("vf_b");
1389 b.assertion.causal_claim = Some(CausalClaim::Intervention);
1390 a.links.push(Link {
1391 target: "vf_b".into(),
1392 link_type: "contradicts".into(),
1393 note: String::new(),
1394 inferred_by: "test".into(),
1395 created_at: String::new(),
1396 mechanism: None,
1397 });
1398 let frontier = make_frontier(vec![a, b]);
1399 let diags = check_causal_mismatch_on_supports(&frontier);
1400 assert_eq!(diags.len(), 0);
1401 }
1402
1403 #[test]
1404 fn mediation_supports_intervention_flagged() {
1405 let mut med = make_finding("vf_med");
1406 med.assertion.causal_claim = Some(CausalClaim::Mediation);
1407 let mut iv = make_finding("vf_iv");
1408 iv.assertion.causal_claim = Some(CausalClaim::Intervention);
1409 med.links.push(link_supports("vf_iv"));
1410 let frontier = make_frontier(vec![med, iv]);
1411 let diags = check_causal_mismatch_on_supports(&frontier);
1412 assert_eq!(diags.len(), 1);
1413 assert_eq!(diags[0].rule_id, "L011");
1414 }
1415}