Skip to main content

batuta/falsification/hypothesis_driven/
hdd_reproducibility.rs

1//! HDD Reproducibility Checks (HDD-01 through HDD-06)
2//!
3//! These checks focus on reproducibility and environment requirements:
4//! - Hypothesis statements
5//! - Baseline comparisons
6//! - Gold standard reproducibility
7//! - Random seed documentation
8//! - Environment containerization
9//! - Data version control
10
11use super::helpers::{check_ci_for_pattern, check_for_pattern};
12use crate::falsification::helpers::{apply_check_outcome, CheckOutcome};
13use crate::falsification::types::{CheckItem, Evidence, EvidenceType, Severity};
14use std::path::Path;
15use std::time::Instant;
16
17/// HDD-01: Hypothesis Statement Requirement
18///
19/// **Claim:** Every model change PR includes falsifiable hypothesis.
20///
21/// **Rejection Criteria (Major):**
22/// - Model PR without "Hypothesis:" section
23pub fn check_hypothesis_statement(project_path: &Path) -> CheckItem {
24    let start = Instant::now();
25    let mut item = CheckItem::new(
26        "HDD-01",
27        "Hypothesis Statement Requirement",
28        "Model change PRs include falsifiable hypothesis",
29    )
30    .with_severity(Severity::Major)
31    .with_tps("Scientific Method integration");
32
33    // Check for PR template with hypothesis section
34    let pr_templates = [
35        project_path.join(".github/PULL_REQUEST_TEMPLATE.md"),
36        project_path.join(".github/pull_request_template.md"),
37        project_path.join("docs/PR_TEMPLATE.md"),
38    ];
39
40    let has_pr_template = pr_templates.iter().any(|p| p.exists());
41    let has_hypothesis_section = pr_templates.iter().filter(|p| p.exists()).any(|p| {
42        std::fs::read_to_string(p)
43            .ok()
44            .map(|c| {
45                c.to_lowercase().contains("hypothesis")
46                    || c.contains("## Hypothesis")
47                    || c.contains("### Hypothesis")
48            })
49            .unwrap_or(false)
50    });
51
52    // Check for hypothesis documentation in codebase
53    let has_hypothesis_docs = check_for_pattern(
54        project_path,
55        &["hypothesis:", "Hypothesis:", "H0:", "H1:", "null_hypothesis"],
56    );
57
58    item = item.with_evidence(Evidence {
59        evidence_type: EvidenceType::StaticAnalysis,
60        description: format!(
61            "Hypothesis: pr_template={}, hypothesis_section={}, docs={}",
62            has_pr_template, has_hypothesis_section, has_hypothesis_docs
63        ),
64        data: None,
65        files: Vec::new(),
66    });
67
68    let is_ml_project = check_for_pattern(project_path, &["model", "train", "predict"]);
69    item = apply_check_outcome(
70        item,
71        &[
72            (has_hypothesis_section, CheckOutcome::Pass),
73            (
74                has_pr_template || has_hypothesis_docs,
75                CheckOutcome::Partial("PR template exists but missing hypothesis section"),
76            ),
77            (!is_ml_project, CheckOutcome::Pass),
78            (true, CheckOutcome::Partial("No hypothesis requirement in PR workflow")),
79        ],
80    );
81
82    item.finish_timed(start)
83}
84
85/// HDD-02: Baseline Comparison Requirement
86///
87/// **Claim:** Complex models must beat simple baselines to be merged.
88///
89/// **Rejection Criteria (Major):**
90/// - Transformer without RF baseline, <5% improvement without justification
91pub fn check_baseline_comparison(project_path: &Path) -> CheckItem {
92    let start = Instant::now();
93    let mut item = CheckItem::new(
94        "HDD-02",
95        "Baseline Comparison Requirement",
96        "Complex models beat simple baselines",
97    )
98    .with_severity(Severity::Major)
99    .with_tps("Muda (Overprocessing) prevention");
100
101    // Check for baseline comparison in tests or benchmarks
102    let has_baseline_tests = check_for_pattern(
103        project_path,
104        &["baseline", "Baseline", "simple_model", "compare_to_baseline"],
105    );
106
107    // Check for benchmark infrastructure
108    let has_benchmarks = project_path.join("benches").exists()
109        || check_for_pattern(project_path, &["#[bench]", "criterion", "benchmark"]);
110
111    // Check for model comparison documentation
112    let has_comparison_docs =
113        check_for_pattern(project_path, &["vs_baseline", "improvement_over", "comparison"]);
114
115    item = item.with_evidence(Evidence {
116        evidence_type: EvidenceType::StaticAnalysis,
117        description: format!(
118            "Baseline: tests={}, benchmarks={}, comparison_docs={}",
119            has_baseline_tests, has_benchmarks, has_comparison_docs
120        ),
121        data: None,
122        files: Vec::new(),
123    });
124
125    let is_ml = check_for_pattern(project_path, &["neural", "transformer", "deep_learning"]);
126    item = apply_check_outcome(
127        item,
128        &[
129            (has_baseline_tests && has_benchmarks, CheckOutcome::Pass),
130            (
131                has_baseline_tests || has_comparison_docs,
132                CheckOutcome::Partial("Some baseline comparison infrastructure"),
133            ),
134            (!is_ml, CheckOutcome::Pass),
135            (true, CheckOutcome::Partial("Complex models without baseline comparison")),
136        ],
137    );
138
139    item.finish_timed(start)
140}
141
142/// HDD-03: Gold Standard Reproducibility
143///
144/// **Claim:** `make reproduce` recreates training results from scratch.
145///
146/// **Rejection Criteria (Critical):**
147/// - Build fails from clean state, metrics differ >1%
148pub fn check_gold_reproducibility(project_path: &Path) -> CheckItem {
149    let start = Instant::now();
150    let mut item = CheckItem::new(
151        "HDD-03",
152        "Gold Standard Reproducibility",
153        "make reproduce recreates training results",
154    )
155    .with_severity(Severity::Critical)
156    .with_tps("Scientific reproducibility");
157
158    // Check for reproduce target in Makefile
159    let makefile = project_path.join("Makefile");
160    let has_reproduce_target = makefile
161        .exists()
162        .then(|| std::fs::read_to_string(&makefile).ok())
163        .flatten()
164        .map(|c| c.contains("reproduce:") || c.contains("reproduce :"))
165        .unwrap_or(false);
166
167    // Check for reproduction documentation
168    let has_repro_docs =
169        check_for_pattern(project_path, &["REPRODUCIBILITY", "reproduce", "replication"]);
170
171    // Check for CI reproduction
172    let has_ci_repro = check_ci_for_pattern(project_path, &["reproduce", "replication"]);
173
174    item = item.with_evidence(Evidence {
175        evidence_type: EvidenceType::StaticAnalysis,
176        description: format!(
177            "Reproducibility: make_target={}, docs={}, ci={}",
178            has_reproduce_target, has_repro_docs, has_ci_repro
179        ),
180        data: None,
181        files: Vec::new(),
182    });
183
184    let is_ml = check_for_pattern(project_path, &["train", "model", "weights"]);
185    item = apply_check_outcome(
186        item,
187        &[
188            (has_reproduce_target && has_ci_repro, CheckOutcome::Pass),
189            (
190                has_reproduce_target || has_repro_docs,
191                CheckOutcome::Partial("Reproduction target exists (not in CI)"),
192            ),
193            (!is_ml, CheckOutcome::Pass),
194            (true, CheckOutcome::Partial("No reproduction infrastructure")),
195        ],
196    );
197
198    item.finish_timed(start)
199}
200
201/// HDD-04: Random Seed Documentation
202///
203/// **Claim:** All stochastic operations have documented, pinned seeds.
204///
205/// **Rejection Criteria (Major):**
206/// - Any stochastic operation without explicit seed
207pub fn check_random_seed_documentation(project_path: &Path) -> CheckItem {
208    let start = Instant::now();
209    let mut item = CheckItem::new(
210        "HDD-04",
211        "Random Seed Documentation",
212        "Stochastic operations have pinned seeds",
213    )
214    .with_severity(Severity::Major)
215    .with_tps("Deterministic reproducibility");
216
217    // Check for seed pinning in code
218    let has_seed_pinning =
219        check_for_pattern(project_path, &["seed", "Seed", "RANDOM_SEED", "rng_seed", "set_seed"]);
220
221    // Check for StdRng usage (seeded RNG)
222    let has_seeded_rng =
223        check_for_pattern(project_path, &["StdRng", "SeedableRng", "from_seed", "seed_from_u64"]);
224
225    // Check for seed documentation
226    let has_seed_docs =
227        check_for_pattern(project_path, &["seed=", "SEED:", "random seed", "deterministic"]);
228
229    item = item.with_evidence(Evidence {
230        evidence_type: EvidenceType::StaticAnalysis,
231        description: format!(
232            "Seeds: pinning={}, seeded_rng={}, docs={}",
233            has_seed_pinning, has_seeded_rng, has_seed_docs
234        ),
235        data: None,
236        files: Vec::new(),
237    });
238
239    let uses_random = check_for_pattern(project_path, &["rand::", "random", "Rng", "thread_rng"]);
240    item = apply_check_outcome(
241        item,
242        &[
243            (!uses_random, CheckOutcome::Pass),
244            (has_seeded_rng && has_seed_pinning, CheckOutcome::Pass),
245            (has_seed_pinning, CheckOutcome::Partial("Seed usage found (verify documentation)")),
246            (true, CheckOutcome::Partial("Random usage without explicit seed pinning")),
247        ],
248    );
249
250    item.finish_timed(start)
251}
252
253/// HDD-05: Environment Containerization
254///
255/// **Claim:** Training environment is fully containerized and versioned.
256///
257/// **Rejection Criteria (Major):**
258/// - Dockerfile missing, unpinned dependencies
259pub fn check_environment_containerization(project_path: &Path) -> CheckItem {
260    let start = Instant::now();
261    let mut item = CheckItem::new(
262        "HDD-05",
263        "Environment Containerization",
264        "Environment fully containerized and versioned",
265    )
266    .with_severity(Severity::Major)
267    .with_tps("Silver -> Gold reproducibility");
268
269    // Check for Dockerfile
270    let has_dockerfile =
271        project_path.join("Dockerfile").exists() || project_path.join("docker/Dockerfile").exists();
272
273    // Check for docker-compose
274    let has_compose = project_path.join("docker-compose.yml").exists()
275        || project_path.join("docker-compose.yaml").exists();
276
277    // Check for Cargo.lock (Rust dependency pinning)
278    let has_lock_file = project_path.join("Cargo.lock").exists();
279
280    // Check for nix/devenv
281    let has_nix = project_path.join("flake.nix").exists()
282        || project_path.join("shell.nix").exists()
283        || project_path.join("devenv.nix").exists();
284
285    item = item.with_evidence(Evidence {
286        evidence_type: EvidenceType::StaticAnalysis,
287        description: format!(
288            "Container: dockerfile={}, compose={}, lock={}, nix={}",
289            has_dockerfile, has_compose, has_lock_file, has_nix
290        ),
291        data: None,
292        files: Vec::new(),
293    });
294
295    item = apply_check_outcome(
296        item,
297        &[
298            ((has_dockerfile || has_nix) && has_lock_file, CheckOutcome::Pass),
299            (has_lock_file, CheckOutcome::Partial("Dependencies locked but no containerization")),
300            (true, CheckOutcome::Partial("No environment containerization")),
301        ],
302    );
303
304    item.finish_timed(start)
305}
306
307/// HDD-06: Data Version Control
308///
309/// **Claim:** Training data is versioned with content-addressable storage.
310///
311/// **Rejection Criteria (Major):**
312/// - Any data modification not captured in version control
313pub fn check_data_version_control(project_path: &Path) -> CheckItem {
314    let start = Instant::now();
315    let mut item = CheckItem::new(
316        "HDD-06",
317        "Data Version Control",
318        "Training data versioned with content-addressable storage",
319    )
320    .with_severity(Severity::Major)
321    .with_tps("Reproducibility requirement");
322
323    // Check for DVC
324    let has_dvc = project_path.join(".dvc").exists() || project_path.join("dvc.yaml").exists();
325
326    // Check for data versioning patterns
327    let has_data_versioning =
328        check_for_pattern(project_path, &["data_version", "dataset_hash", "content_hash"]);
329
330    // Check for data directory with hashes
331    let data_dir = project_path.join("data");
332    let has_data_dir = data_dir.exists();
333
334    item = item.with_evidence(Evidence {
335        evidence_type: EvidenceType::StaticAnalysis,
336        description: format!(
337            "Data versioning: dvc={}, versioning_code={}, data_dir={}",
338            has_dvc, has_data_versioning, has_data_dir
339        ),
340        data: None,
341        files: Vec::new(),
342    });
343
344    let uses_data = check_for_pattern(project_path, &["load_data", "Dataset", "DataLoader"]);
345    item = apply_check_outcome(
346        item,
347        &[
348            (!uses_data, CheckOutcome::Pass),
349            (has_dvc, CheckOutcome::Pass),
350            (has_data_versioning, CheckOutcome::Partial("Data versioning patterns found (no DVC)")),
351            (true, CheckOutcome::Partial("Data handling without version control")),
352        ],
353    );
354
355    item.finish_timed(start)
356}