Skip to main content

batuta/bug_hunter/
model_parity.rs

1//! Model Parity Gap Analysis (BH-27)
2//!
3//! Analyzes tiny-model-ground-truth directory for parity gaps:
4//! missing oracle files, failed claims, and incomplete oracle-ops coverage.
5
6use super::{DefectCategory, Finding, FindingEvidence, FindingSeverity, HuntMode};
7use std::path::Path;
8
9// ============================================================================
10// Constants
11// ============================================================================
12
13const EXPECTED_MODELS: &[&str] = &["smollm-135m", "qwen2-0.5b", "gpt2-124m"];
14const EXPECTED_PROMPTS: &[&str] = &["arithmetic", "code", "completion", "greeting"];
15const EXPECTED_OPS: &[&str] = &["convert", "quantize", "finetune", "merge", "prune"];
16
17// ============================================================================
18// Public API
19// ============================================================================
20
21/// Discover the tiny-model-ground-truth directory.
22///
23/// Checks explicit path first, then auto-discovers `../tiny-model-ground-truth/`.
24pub fn discover_model_parity_dir(
25    project_path: &Path,
26    explicit_path: Option<&Path>,
27) -> Option<std::path::PathBuf> {
28    if let Some(p) = explicit_path {
29        if p.exists() {
30            return Some(p.to_path_buf());
31        }
32    }
33    // Canonicalize to resolve "." correctly
34    let resolved = project_path.canonicalize().ok()?;
35    let parent = resolved.parent()?;
36    let auto_path = parent.join("tiny-model-ground-truth");
37    if auto_path.is_dir() {
38        Some(auto_path)
39    } else {
40        None
41    }
42}
43
44/// Analyze model parity gaps.
45///
46/// Produces `BH-PARITY-NNNN` findings for:
47/// 1. Missing oracle files (model/prompt combinations)
48/// 2. CLAIMS.md FAIL/Deferred claims
49/// 3. Incomplete oracle-ops directories
50pub fn analyze_model_parity_gaps(tmgt_dir: &Path, _project_path: &Path) -> Vec<Finding> {
51    contract_pre_analyze!(tmgt_dir);
52    let mut findings = Vec::new();
53    let mut finding_id = 0u32;
54
55    // Check 1: Oracle completeness
56    check_oracle_completeness(tmgt_dir, &mut findings, &mut finding_id);
57
58    // Check 2: CLAIMS.md status
59    check_claims_status(tmgt_dir, &mut findings, &mut finding_id);
60
61    // Check 3: Oracle-ops completeness
62    check_oracle_ops(tmgt_dir, &mut findings, &mut finding_id);
63
64    findings
65}
66
67// ============================================================================
68// Internal helpers
69// ============================================================================
70
71fn check_oracle_completeness(tmgt_dir: &Path, findings: &mut Vec<Finding>, finding_id: &mut u32) {
72    let oracle_dir = tmgt_dir.join("oracle");
73    if !oracle_dir.is_dir() {
74        *finding_id += 1;
75        findings.push(
76            Finding::new(
77                format!("BH-PARITY-{:04}", finding_id),
78                tmgt_dir,
79                1,
80                "Missing oracle directory",
81            )
82            .with_description("No oracle/ directory found in tiny-model-ground-truth")
83            .with_severity(FindingSeverity::High)
84            .with_category(DefectCategory::ModelParityGap)
85            .with_suspiciousness(0.8)
86            .with_discovered_by(HuntMode::Analyze)
87            .with_evidence(FindingEvidence::model_parity(
88                "all",
89                "oracle_dir",
90                "missing",
91            )),
92        );
93        return;
94    }
95
96    for model in EXPECTED_MODELS {
97        for prompt in EXPECTED_PROMPTS {
98            let oracle_file = oracle_dir.join(model).join(format!("{}.json", prompt));
99            if !oracle_file.exists() {
100                *finding_id += 1;
101                findings.push(
102                    Finding::new(
103                        format!("BH-PARITY-{:04}", finding_id),
104                        &oracle_dir,
105                        1,
106                        format!("Missing oracle: {}/{}.json", model, prompt),
107                    )
108                    .with_description(format!(
109                        "Oracle output for model `{}` prompt `{}` not generated",
110                        model, prompt
111                    ))
112                    .with_severity(FindingSeverity::Medium)
113                    .with_category(DefectCategory::ModelParityGap)
114                    .with_suspiciousness(0.6)
115                    .with_discovered_by(HuntMode::Analyze)
116                    .with_evidence(FindingEvidence::model_parity(*model, *prompt, "missing")),
117                );
118            }
119        }
120    }
121}
122
123fn check_claims_status(tmgt_dir: &Path, findings: &mut Vec<Finding>, finding_id: &mut u32) {
124    let claims_path = tmgt_dir.join("CLAIMS.md");
125    let Ok(content) = std::fs::read_to_string(&claims_path) else {
126        return;
127    };
128
129    for line in content.lines() {
130        // Match "### Claim N: Title" headers
131        let claim_header = line.strip_prefix("### Claim ");
132        if claim_header.is_none() {
133            continue;
134        }
135        let header = claim_header.expect("unexpected failure");
136        let claim_title = header.to_string();
137
138        // Check for (Deferred) in the header
139        if header.contains("(Deferred)") || header.contains("Deferred") {
140            *finding_id += 1;
141            findings.push(
142                Finding::new(
143                    format!("BH-PARITY-{:04}", finding_id),
144                    &claims_path,
145                    1,
146                    format!("Deferred claim: {}", claim_title.trim()),
147                )
148                .with_description("Claim is deferred — not yet testable or blocked")
149                .with_severity(FindingSeverity::Low)
150                .with_category(DefectCategory::ModelParityGap)
151                .with_suspiciousness(0.4)
152                .with_discovered_by(HuntMode::Analyze)
153                .with_evidence(FindingEvidence::model_parity(
154                    "claims",
155                    &claim_title,
156                    "deferred",
157                )),
158            );
159        }
160    }
161
162    // Check for FAIL status in the content — match "Status" field lines only
163    for line in content.lines() {
164        let trimmed = line.trim();
165        let is_status_line = trimmed.starts_with("- **Status**:")
166            || trimmed.starts_with("**Status**:")
167            || trimmed.starts_with("- Status:");
168        if is_status_line && trimmed.contains("FAIL") {
169            *finding_id += 1;
170            findings.push(
171                Finding::new(
172                    format!("BH-PARITY-{:04}", finding_id),
173                    &claims_path,
174                    1,
175                    "Failed claim detected in CLAIMS.md",
176                )
177                .with_description(line.trim().to_string())
178                .with_severity(FindingSeverity::High)
179                .with_category(DefectCategory::ModelParityGap)
180                .with_suspiciousness(0.8)
181                .with_discovered_by(HuntMode::Analyze)
182                .with_evidence(FindingEvidence::model_parity("claims", "status", "FAIL")),
183            );
184        }
185    }
186}
187
188fn check_oracle_ops(tmgt_dir: &Path, findings: &mut Vec<Finding>, finding_id: &mut u32) {
189    let ops_dir = tmgt_dir.join("oracle-ops");
190    if !ops_dir.is_dir() {
191        *finding_id += 1;
192        findings.push(
193            Finding::new(
194                format!("BH-PARITY-{:04}", finding_id),
195                tmgt_dir,
196                1,
197                "Missing oracle-ops directory",
198            )
199            .with_description("No oracle-ops/ directory found in tiny-model-ground-truth")
200            .with_severity(FindingSeverity::Medium)
201            .with_category(DefectCategory::ModelParityGap)
202            .with_suspiciousness(0.5)
203            .with_discovered_by(HuntMode::Analyze)
204            .with_evidence(FindingEvidence::model_parity(
205                "ops",
206                "oracle-ops",
207                "missing",
208            )),
209        );
210        return;
211    }
212
213    for op in EXPECTED_OPS {
214        let op_dir = ops_dir.join(op);
215        let is_empty = if op_dir.is_dir() {
216            std::fs::read_dir(&op_dir).map(|mut d| d.next().is_none()).unwrap_or(true)
217        } else {
218            true
219        };
220
221        if is_empty {
222            *finding_id += 1;
223            findings.push(
224                Finding::new(
225                    format!("BH-PARITY-{:04}", finding_id),
226                    &ops_dir,
227                    1,
228                    format!("Missing oracle-ops: {}/", op),
229                )
230                .with_description(format!("Oracle-ops `{}` directory is missing or empty", op))
231                .with_severity(FindingSeverity::Low)
232                .with_category(DefectCategory::ModelParityGap)
233                .with_suspiciousness(0.4)
234                .with_discovered_by(HuntMode::Analyze)
235                .with_evidence(FindingEvidence::model_parity("ops", *op, "missing")),
236            );
237        }
238    }
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244    use std::io::Write;
245
246    #[test]
247    fn test_discover_explicit_path() {
248        let dir = tempfile::tempdir().expect("tempdir creation failed");
249        let tmgt = dir.path().join("tmgt");
250        std::fs::create_dir_all(&tmgt).expect("mkdir failed");
251        let result = discover_model_parity_dir(dir.path(), Some(&tmgt));
252        assert!(result.is_some());
253        assert_eq!(result.expect("operation failed"), tmgt);
254    }
255
256    #[test]
257    fn test_discover_explicit_path_missing() {
258        let dir = tempfile::tempdir().expect("tempdir creation failed");
259        let missing = dir.path().join("nonexistent");
260        let result = discover_model_parity_dir(dir.path(), Some(&missing));
261        assert!(result.is_none());
262    }
263
264    #[test]
265    fn test_oracle_completeness_all_missing() {
266        let dir = tempfile::tempdir().expect("tempdir creation failed");
267        let tmgt = dir.path().join("tmgt");
268        std::fs::create_dir_all(tmgt.join("oracle")).expect("mkdir failed");
269        // No model dirs → all 12 (3×4) combos missing
270        let findings = analyze_model_parity_gaps(&tmgt, dir.path());
271        let oracle_gaps: Vec<_> =
272            findings.iter().filter(|f| f.title.contains("Missing oracle:")).collect();
273        assert_eq!(oracle_gaps.len(), 12);
274    }
275
276    #[test]
277    fn test_oracle_completeness_partial() {
278        let dir = tempfile::tempdir().expect("tempdir creation failed");
279        let tmgt = dir.path().join("tmgt");
280        let model_dir = tmgt.join("oracle").join("smollm-135m");
281        std::fs::create_dir_all(&model_dir).expect("mkdir failed");
282        // Create 2 of 4 prompts
283        std::fs::write(model_dir.join("arithmetic.json"), "{}").expect("fs write failed");
284        std::fs::write(model_dir.join("code.json"), "{}").expect("fs write failed");
285
286        let findings = analyze_model_parity_gaps(&tmgt, dir.path());
287        let smollm_gaps: Vec<_> =
288            findings.iter().filter(|f| f.title.contains("smollm-135m")).collect();
289        // 2 missing for smollm (completion, greeting)
290        assert_eq!(smollm_gaps.len(), 2);
291    }
292
293    #[test]
294    fn test_parse_claims_status_deferred() {
295        let dir = tempfile::tempdir().expect("tempdir creation failed");
296        let tmgt = dir.path().join("tmgt");
297        std::fs::create_dir_all(&tmgt).expect("mkdir failed");
298        let claims = tmgt.join("CLAIMS.md");
299        {
300            let mut f = std::fs::File::create(&claims).expect("file open failed");
301            write!(
302                f,
303                "# Claims\n\n### Claim 6: Cross-Runtime Parity (Deferred)\n- **Status**: Deferred.\n"
304            )
305            .expect("unexpected failure");
306        }
307
308        let mut findings = Vec::new();
309        let mut id = 0;
310        check_claims_status(&tmgt, &mut findings, &mut id);
311
312        let deferred: Vec<_> = findings.iter().filter(|f| f.title.contains("Deferred")).collect();
313        assert_eq!(deferred.len(), 1);
314        assert_eq!(deferred[0].severity, FindingSeverity::Low);
315    }
316
317    #[test]
318    fn test_parse_claims_status_fail() {
319        let dir = tempfile::tempdir().expect("tempdir creation failed");
320        let tmgt = dir.path().join("tmgt");
321        std::fs::create_dir_all(&tmgt).expect("mkdir failed");
322        let claims = tmgt.join("CLAIMS.md");
323        {
324            let mut f = std::fs::File::create(&claims).expect("file open failed");
325            write!(f, "# Claims\n\n### Claim 19: Throughput\n- **Status**: FAIL\n")
326                .expect("unexpected failure");
327        }
328
329        let mut findings = Vec::new();
330        let mut id = 0;
331        check_claims_status(&tmgt, &mut findings, &mut id);
332
333        let fails: Vec<_> = findings.iter().filter(|f| f.title.contains("Failed claim")).collect();
334        assert_eq!(fails.len(), 1);
335        assert_eq!(fails[0].severity, FindingSeverity::High);
336    }
337
338    #[test]
339    fn test_oracle_ops_completeness() {
340        let dir = tempfile::tempdir().expect("tempdir creation failed");
341        let tmgt = dir.path().join("tmgt");
342        let ops_dir = tmgt.join("oracle-ops");
343        // Create only convert and quantize with content
344        std::fs::create_dir_all(ops_dir.join("convert")).expect("mkdir failed");
345        std::fs::write(ops_dir.join("convert").join("smollm.json"), "{}").expect("fs write failed");
346        std::fs::create_dir_all(ops_dir.join("quantize")).expect("mkdir failed");
347        std::fs::write(ops_dir.join("quantize").join("smollm.json"), "{}")
348            .expect("fs write failed");
349        // finetune, merge, prune missing
350
351        let mut findings = Vec::new();
352        let mut id = 0;
353        check_oracle_ops(&tmgt, &mut findings, &mut id);
354
355        let ops_gaps: Vec<_> =
356            findings.iter().filter(|f| f.title.contains("Missing oracle-ops:")).collect();
357        assert_eq!(ops_gaps.len(), 3); // finetune, merge, prune
358    }
359
360    #[test]
361    fn test_missing_oracle_directory() {
362        let dir = tempfile::tempdir().expect("tempdir creation failed");
363        let tmgt = dir.path().join("tmgt");
364        std::fs::create_dir_all(&tmgt).expect("mkdir failed");
365        // No oracle/ dir at all
366
367        let mut findings = Vec::new();
368        let mut id = 0;
369        check_oracle_completeness(&tmgt, &mut findings, &mut id);
370
371        assert_eq!(findings.len(), 1);
372        assert!(findings[0].title.contains("Missing oracle directory"));
373    }
374
375    // ===== Falsification tests =====
376
377    #[test]
378    fn test_falsify_fail_detection_rejects_description_lines() {
379        // Falsifies: FAIL check must NOT match falsification criterion descriptions
380        // Real bug: line 146 of CLAIMS.md has `status == "FAIL"` as criteria, not status
381        let dir = tempfile::tempdir().expect("tempdir creation failed");
382        let tmgt = dir.path().join("tmgt");
383        std::fs::create_dir_all(&tmgt).expect("mkdir failed");
384        let claims = tmgt.join("CLAIMS.md");
385        {
386            let mut f = std::fs::File::create(&claims).expect("file open failed");
387            write!(
388                f,
389                "# Claims\n\n\
390                 ### Claim 20: QA Gate\n\
391                 - **Falsification**: any gate with `status == \"FAIL\"`.\n"
392            )
393            .expect("unexpected failure");
394        }
395
396        let mut findings = Vec::new();
397        let mut id = 0;
398        check_claims_status(&tmgt, &mut findings, &mut id);
399
400        let fails: Vec<_> = findings.iter().filter(|f| f.title.contains("Failed claim")).collect();
401        assert_eq!(fails.len(), 0, "Should NOT match falsification criterion line");
402    }
403
404    #[test]
405    fn test_falsify_fail_detection_matches_status_field() {
406        // Must match actual Status field lines with FAIL
407        let dir = tempfile::tempdir().expect("tempdir creation failed");
408        let tmgt = dir.path().join("tmgt");
409        std::fs::create_dir_all(&tmgt).expect("mkdir failed");
410        let claims = tmgt.join("CLAIMS.md");
411        {
412            let mut f = std::fs::File::create(&claims).expect("file open failed");
413            write!(
414                f,
415                "# Claims\n\n\
416                 ### Claim 19: Throughput\n\
417                 - **Status**: FAIL (0 tok/s bug)\n\
418                 ### Claim 20: QA Gate\n\
419                 **Status**: FAIL — critical gate failed\n\
420                 ### Claim 21: Other\n\
421                 - Status: FAIL\n"
422            )
423            .expect("unexpected failure");
424        }
425
426        let mut findings = Vec::new();
427        let mut id = 0;
428        check_claims_status(&tmgt, &mut findings, &mut id);
429
430        let fails: Vec<_> = findings.iter().filter(|f| f.title.contains("Failed claim")).collect();
431        assert_eq!(fails.len(), 3, "All three Status formats should match");
432    }
433
434    #[test]
435    fn test_falsify_missing_claims_file() {
436        // No CLAIMS.md → 0 findings (not an error)
437        let dir = tempfile::tempdir().expect("tempdir creation failed");
438        let tmgt = dir.path().join("tmgt");
439        std::fs::create_dir_all(&tmgt).expect("mkdir failed");
440
441        let mut findings = Vec::new();
442        let mut id = 0;
443        check_claims_status(&tmgt, &mut findings, &mut id);
444        assert_eq!(findings.len(), 0);
445    }
446
447    #[test]
448    fn test_falsify_empty_claims_file() {
449        let dir = tempfile::tempdir().expect("tempdir creation failed");
450        let tmgt = dir.path().join("tmgt");
451        std::fs::create_dir_all(&tmgt).expect("mkdir failed");
452        std::fs::write(tmgt.join("CLAIMS.md"), "").expect("fs write failed");
453
454        let mut findings = Vec::new();
455        let mut id = 0;
456        check_claims_status(&tmgt, &mut findings, &mut id);
457        assert_eq!(findings.len(), 0);
458    }
459
460    #[test]
461    fn test_falsify_oracle_all_present() {
462        // Complete oracle → 0 findings
463        let dir = tempfile::tempdir().expect("tempdir creation failed");
464        let tmgt = dir.path().join("tmgt");
465        for model in EXPECTED_MODELS {
466            for prompt in EXPECTED_PROMPTS {
467                let model_dir = tmgt.join("oracle").join(model);
468                std::fs::create_dir_all(&model_dir).expect("mkdir failed");
469                std::fs::write(model_dir.join(format!("{}.json", prompt)), "{}")
470                    .expect("fs write failed");
471            }
472        }
473
474        let mut findings = Vec::new();
475        let mut id = 0;
476        check_oracle_completeness(&tmgt, &mut findings, &mut id);
477        assert_eq!(findings.len(), 0, "All oracles present → 0 findings");
478    }
479
480    #[test]
481    fn test_falsify_oracle_ops_all_present() {
482        // All ops dirs populated → 0 findings
483        let dir = tempfile::tempdir().expect("tempdir creation failed");
484        let tmgt = dir.path().join("tmgt");
485        for op in EXPECTED_OPS {
486            let op_dir = tmgt.join("oracle-ops").join(op);
487            std::fs::create_dir_all(&op_dir).expect("mkdir failed");
488            std::fs::write(op_dir.join("result.json"), "{}").expect("fs write failed");
489        }
490
491        let mut findings = Vec::new();
492        let mut id = 0;
493        check_oracle_ops(&tmgt, &mut findings, &mut id);
494        assert_eq!(findings.len(), 0, "All ops present → 0 findings");
495    }
496
497    #[test]
498    fn test_falsify_ops_dir_exists_but_empty() {
499        // Dir exists but has no files → should still flag
500        let dir = tempfile::tempdir().expect("tempdir creation failed");
501        let tmgt = dir.path().join("tmgt");
502        for op in EXPECTED_OPS {
503            std::fs::create_dir_all(tmgt.join("oracle-ops").join(op)).expect("mkdir failed");
504        }
505
506        let mut findings = Vec::new();
507        let mut id = 0;
508        check_oracle_ops(&tmgt, &mut findings, &mut id);
509        assert_eq!(findings.len(), 5, "Empty dirs should be flagged");
510    }
511
512    #[test]
513    fn test_falsify_discover_nonexistent_parent() {
514        // Non-existent project path → None (canonicalize fails)
515        let result = discover_model_parity_dir(Path::new("/nonexistent/path/xyz"), None);
516        assert!(result.is_none());
517    }
518
519    #[test]
520    fn test_falsify_full_pipeline_empty_tmgt() {
521        // Empty tmgt dir → oracle missing + ops missing findings
522        let dir = tempfile::tempdir().expect("tempdir creation failed");
523        let tmgt = dir.path().join("tmgt");
524        std::fs::create_dir_all(&tmgt).expect("mkdir failed");
525
526        let findings = analyze_model_parity_gaps(&tmgt, dir.path());
527        // No oracle dir → 1 finding; no oracle-ops dir → 1 finding; no CLAIMS.md → 0
528        assert_eq!(findings.len(), 2);
529        assert!(findings.iter().any(|f| f.title.contains("Missing oracle directory")));
530        assert!(findings.iter().any(|f| f.title.contains("Missing oracle-ops directory")));
531    }
532}