batuta/falsification/hypothesis_driven/
hdd_scientific.rs

1//! HDD Scientific Method Checks (HDD-07 through HDD-10)
2//!
3//! These checks focus on scientific rigor:
4//! - Statistical significance
5//! - Ablation studies
6//! - Negative result documentation
7//! - Metric pre-registration
8
9use super::helpers::check_for_pattern;
10use crate::falsification::helpers::{apply_check_outcome, CheckOutcome};
11use crate::falsification::types::{CheckItem, Evidence, EvidenceType, Severity};
12use std::path::Path;
13use std::time::Instant;
14
15/// HDD-07: Statistical Significance Requirement
16///
17/// **Claim:** Performance claims include statistical significance tests.
18///
19/// **Rejection Criteria (Major):**
20/// - p >= 0.05, no confidence interval
21pub fn check_statistical_significance(project_path: &Path) -> CheckItem {
22    let start = Instant::now();
23    let mut item = CheckItem::new(
24        "HDD-07",
25        "Statistical Significance Requirement",
26        "Performance claims include significance tests",
27    )
28    .with_severity(Severity::Major)
29    .with_tps("Scientific rigor");
30
31    // Check for statistical testing
32    let has_stats = check_for_pattern(
33        project_path,
34        &["p_value", "p-value", "confidence_interval", "t_test", "significance"],
35    );
36
37    // Check for effect size
38    let has_effect_size =
39        check_for_pattern(project_path, &["effect_size", "cohen_d", "glass_delta", "hedges_g"]);
40
41    // Check for statistical library
42    let has_stats_lib =
43        check_for_pattern(project_path, &["statrs", "statistical", "hypothesis_test"]);
44
45    item = item.with_evidence(Evidence {
46        evidence_type: EvidenceType::StaticAnalysis,
47        description: format!(
48            "Statistics: testing={}, effect_size={}, lib={}",
49            has_stats, has_effect_size, has_stats_lib
50        ),
51        data: None,
52        files: Vec::new(),
53    });
54
55    let has_perf_claims =
56        check_for_pattern(project_path, &["accuracy", "F1", "precision", "recall"]);
57    item = apply_check_outcome(
58        item,
59        &[
60            (!has_perf_claims, CheckOutcome::Pass),
61            (has_stats && has_effect_size, CheckOutcome::Pass),
62            (has_stats, CheckOutcome::Partial("Statistical testing (missing effect size)")),
63            (true, CheckOutcome::Partial("Performance metrics without significance testing")),
64        ],
65    );
66
67    item.finish_timed(start)
68}
69
70/// HDD-08: Ablation Study Requirement
71///
72/// **Claim:** Multi-component changes include ablation studies.
73///
74/// **Rejection Criteria (Major):**
75/// - >2 model changes without per-component analysis
76pub fn check_ablation_study(project_path: &Path) -> CheckItem {
77    let start = Instant::now();
78    let mut item = CheckItem::new(
79        "HDD-08",
80        "Ablation Study Requirement",
81        "Multi-component changes include ablation studies",
82    )
83    .with_severity(Severity::Major)
84    .with_tps("Scientific Method - isolation of variables");
85
86    // Check for ablation documentation
87    let has_ablation = check_for_pattern(
88        project_path,
89        &["ablation", "Ablation", "component_analysis", "feature_importance"],
90    );
91
92    // Check for sensitivity analysis
93    let has_sensitivity =
94        check_for_pattern(project_path, &["sensitivity", "hyperparameter_sweep", "grid_search"]);
95
96    item = item.with_evidence(Evidence {
97        evidence_type: EvidenceType::StaticAnalysis,
98        description: format!("Ablation: studies={}, sensitivity={}", has_ablation, has_sensitivity),
99        data: None,
100        files: Vec::new(),
101    });
102
103    let has_complex_models =
104        check_for_pattern(project_path, &["neural", "transformer", "ensemble", "multi_layer"]);
105    item = apply_check_outcome(
106        item,
107        &[
108            (!has_complex_models, CheckOutcome::Pass),
109            (has_ablation, CheckOutcome::Pass),
110            (has_sensitivity, CheckOutcome::Partial("Sensitivity analysis (no formal ablation)")),
111            (true, CheckOutcome::Partial("Complex models without ablation studies")),
112        ],
113    );
114
115    item.finish_timed(start)
116}
117
118/// HDD-09: Negative Result Documentation
119///
120/// **Claim:** Failed experiments are documented, not just successes.
121///
122/// **Rejection Criteria (Minor):**
123/// - Experiment log shows only successful attempts
124pub fn check_negative_result_documentation(project_path: &Path) -> CheckItem {
125    let start = Instant::now();
126    let mut item =
127        CheckItem::new("HDD-09", "Negative Result Documentation", "Failed experiments documented")
128            .with_severity(Severity::Minor)
129            .with_tps("Kaizen - learning from failures");
130
131    // Check for experiment logs
132    let has_experiment_log = project_path.join("experiments/").exists()
133        || project_path.join("logs/experiments/").exists()
134        || check_for_pattern(project_path, &["experiment_log", "run_history"]);
135
136    // Check for negative result documentation
137    let has_negative_docs = check_for_pattern(
138        project_path,
139        &["failed_experiment", "negative_result", "did_not_work", "unsuccessful"],
140    );
141
142    // Check for ADR (Architecture Decision Records)
143    let has_adr =
144        project_path.join("docs/adr/").exists() || project_path.join("docs/decisions/").exists();
145
146    item = item.with_evidence(Evidence {
147        evidence_type: EvidenceType::StaticAnalysis,
148        description: format!(
149            "Negative results: log={}, docs={}, adr={}",
150            has_experiment_log, has_negative_docs, has_adr
151        ),
152        data: None,
153        files: Vec::new(),
154    });
155
156    item = apply_check_outcome(
157        item,
158        &[
159            (has_negative_docs || has_adr, CheckOutcome::Pass),
160            (
161                has_experiment_log,
162                CheckOutcome::Partial("Experiment logging (check for negative results)"),
163            ),
164            (true, CheckOutcome::Partial("No negative result documentation")),
165        ],
166    );
167
168    item.finish_timed(start)
169}
170
171/// HDD-10: Pre-registration of Metrics
172///
173/// **Claim:** Evaluation metrics defined before experimentation.
174///
175/// **Rejection Criteria (Minor):**
176/// - Metric definition after experiment commit
177pub fn check_metric_preregistration(project_path: &Path) -> CheckItem {
178    let start = Instant::now();
179    let mut item = CheckItem::new(
180        "HDD-10",
181        "Pre-registration of Metrics",
182        "Metrics defined before experimentation",
183    )
184    .with_severity(Severity::Minor)
185    .with_tps("Scientific pre-registration");
186
187    // Check for metrics definition in config
188    let has_metric_config =
189        check_for_pattern(project_path, &["metrics:", "evaluation_metrics", "target_metric"]);
190
191    // Check for pre-registration documentation
192    let has_prereg = check_for_pattern(
193        project_path,
194        &["pre_registration", "preregistration", "planned_metrics"],
195    );
196
197    item = item.with_evidence(Evidence {
198        evidence_type: EvidenceType::StaticAnalysis,
199        description: format!("Pre-registration: config={}, docs={}", has_metric_config, has_prereg),
200        data: None,
201        files: Vec::new(),
202    });
203
204    let is_ml = check_for_pattern(project_path, &["accuracy", "loss", "evaluate"]);
205    item = apply_check_outcome(
206        item,
207        &[
208            (has_prereg, CheckOutcome::Pass),
209            (
210                has_metric_config,
211                CheckOutcome::Partial("Metrics in config (verify pre-registration)"),
212            ),
213            (!is_ml, CheckOutcome::Pass),
214            (true, CheckOutcome::Partial("No metric pre-registration")),
215        ],
216    );
217
218    item.finish_timed(start)
219}
batuta/falsification/hypothesis_driven/hdd_scientific.rs

batuta/falsification/hypothesis_driven/
hdd_scientific.rs