1use super::{DefectCategory, Finding, FindingEvidence, FindingSeverity, HuntMode};
7use std::path::Path;
8
9const EXPECTED_MODELS: &[&str] = &["smollm-135m", "qwen2-0.5b", "gpt2-124m"];
14const EXPECTED_PROMPTS: &[&str] = &["arithmetic", "code", "completion", "greeting"];
15const EXPECTED_OPS: &[&str] = &["convert", "quantize", "finetune", "merge", "prune"];
16
17pub fn discover_model_parity_dir(
25 project_path: &Path,
26 explicit_path: Option<&Path>,
27) -> Option<std::path::PathBuf> {
28 if let Some(p) = explicit_path {
29 if p.exists() {
30 return Some(p.to_path_buf());
31 }
32 }
33 let resolved = project_path.canonicalize().ok()?;
35 let parent = resolved.parent()?;
36 let auto_path = parent.join("tiny-model-ground-truth");
37 if auto_path.is_dir() {
38 Some(auto_path)
39 } else {
40 None
41 }
42}
43
44pub fn analyze_model_parity_gaps(tmgt_dir: &Path, _project_path: &Path) -> Vec<Finding> {
51 contract_pre_analyze!(tmgt_dir);
52 let mut findings = Vec::new();
53 let mut finding_id = 0u32;
54
55 check_oracle_completeness(tmgt_dir, &mut findings, &mut finding_id);
57
58 check_claims_status(tmgt_dir, &mut findings, &mut finding_id);
60
61 check_oracle_ops(tmgt_dir, &mut findings, &mut finding_id);
63
64 findings
65}
66
67fn check_oracle_completeness(tmgt_dir: &Path, findings: &mut Vec<Finding>, finding_id: &mut u32) {
72 let oracle_dir = tmgt_dir.join("oracle");
73 if !oracle_dir.is_dir() {
74 *finding_id += 1;
75 findings.push(
76 Finding::new(
77 format!("BH-PARITY-{:04}", finding_id),
78 tmgt_dir,
79 1,
80 "Missing oracle directory",
81 )
82 .with_description("No oracle/ directory found in tiny-model-ground-truth")
83 .with_severity(FindingSeverity::High)
84 .with_category(DefectCategory::ModelParityGap)
85 .with_suspiciousness(0.8)
86 .with_discovered_by(HuntMode::Analyze)
87 .with_evidence(FindingEvidence::model_parity(
88 "all",
89 "oracle_dir",
90 "missing",
91 )),
92 );
93 return;
94 }
95
96 for model in EXPECTED_MODELS {
97 for prompt in EXPECTED_PROMPTS {
98 let oracle_file = oracle_dir.join(model).join(format!("{}.json", prompt));
99 if !oracle_file.exists() {
100 *finding_id += 1;
101 findings.push(
102 Finding::new(
103 format!("BH-PARITY-{:04}", finding_id),
104 &oracle_dir,
105 1,
106 format!("Missing oracle: {}/{}.json", model, prompt),
107 )
108 .with_description(format!(
109 "Oracle output for model `{}` prompt `{}` not generated",
110 model, prompt
111 ))
112 .with_severity(FindingSeverity::Medium)
113 .with_category(DefectCategory::ModelParityGap)
114 .with_suspiciousness(0.6)
115 .with_discovered_by(HuntMode::Analyze)
116 .with_evidence(FindingEvidence::model_parity(*model, *prompt, "missing")),
117 );
118 }
119 }
120 }
121}
122
123fn check_claims_status(tmgt_dir: &Path, findings: &mut Vec<Finding>, finding_id: &mut u32) {
124 let claims_path = tmgt_dir.join("CLAIMS.md");
125 let Ok(content) = std::fs::read_to_string(&claims_path) else {
126 return;
127 };
128
129 for line in content.lines() {
130 let claim_header = line.strip_prefix("### Claim ");
132 if claim_header.is_none() {
133 continue;
134 }
135 let header = claim_header.expect("unexpected failure");
136 let claim_title = header.to_string();
137
138 if header.contains("(Deferred)") || header.contains("Deferred") {
140 *finding_id += 1;
141 findings.push(
142 Finding::new(
143 format!("BH-PARITY-{:04}", finding_id),
144 &claims_path,
145 1,
146 format!("Deferred claim: {}", claim_title.trim()),
147 )
148 .with_description("Claim is deferred — not yet testable or blocked")
149 .with_severity(FindingSeverity::Low)
150 .with_category(DefectCategory::ModelParityGap)
151 .with_suspiciousness(0.4)
152 .with_discovered_by(HuntMode::Analyze)
153 .with_evidence(FindingEvidence::model_parity(
154 "claims",
155 &claim_title,
156 "deferred",
157 )),
158 );
159 }
160 }
161
162 for line in content.lines() {
164 let trimmed = line.trim();
165 let is_status_line = trimmed.starts_with("- **Status**:")
166 || trimmed.starts_with("**Status**:")
167 || trimmed.starts_with("- Status:");
168 if is_status_line && trimmed.contains("FAIL") {
169 *finding_id += 1;
170 findings.push(
171 Finding::new(
172 format!("BH-PARITY-{:04}", finding_id),
173 &claims_path,
174 1,
175 "Failed claim detected in CLAIMS.md",
176 )
177 .with_description(line.trim().to_string())
178 .with_severity(FindingSeverity::High)
179 .with_category(DefectCategory::ModelParityGap)
180 .with_suspiciousness(0.8)
181 .with_discovered_by(HuntMode::Analyze)
182 .with_evidence(FindingEvidence::model_parity("claims", "status", "FAIL")),
183 );
184 }
185 }
186}
187
188fn check_oracle_ops(tmgt_dir: &Path, findings: &mut Vec<Finding>, finding_id: &mut u32) {
189 let ops_dir = tmgt_dir.join("oracle-ops");
190 if !ops_dir.is_dir() {
191 *finding_id += 1;
192 findings.push(
193 Finding::new(
194 format!("BH-PARITY-{:04}", finding_id),
195 tmgt_dir,
196 1,
197 "Missing oracle-ops directory",
198 )
199 .with_description("No oracle-ops/ directory found in tiny-model-ground-truth")
200 .with_severity(FindingSeverity::Medium)
201 .with_category(DefectCategory::ModelParityGap)
202 .with_suspiciousness(0.5)
203 .with_discovered_by(HuntMode::Analyze)
204 .with_evidence(FindingEvidence::model_parity(
205 "ops",
206 "oracle-ops",
207 "missing",
208 )),
209 );
210 return;
211 }
212
213 for op in EXPECTED_OPS {
214 let op_dir = ops_dir.join(op);
215 let is_empty = if op_dir.is_dir() {
216 std::fs::read_dir(&op_dir).map(|mut d| d.next().is_none()).unwrap_or(true)
217 } else {
218 true
219 };
220
221 if is_empty {
222 *finding_id += 1;
223 findings.push(
224 Finding::new(
225 format!("BH-PARITY-{:04}", finding_id),
226 &ops_dir,
227 1,
228 format!("Missing oracle-ops: {}/", op),
229 )
230 .with_description(format!("Oracle-ops `{}` directory is missing or empty", op))
231 .with_severity(FindingSeverity::Low)
232 .with_category(DefectCategory::ModelParityGap)
233 .with_suspiciousness(0.4)
234 .with_discovered_by(HuntMode::Analyze)
235 .with_evidence(FindingEvidence::model_parity("ops", *op, "missing")),
236 );
237 }
238 }
239}
240
241#[cfg(test)]
242mod tests {
243 use super::*;
244 use std::io::Write;
245
246 #[test]
247 fn test_discover_explicit_path() {
248 let dir = tempfile::tempdir().expect("tempdir creation failed");
249 let tmgt = dir.path().join("tmgt");
250 std::fs::create_dir_all(&tmgt).expect("mkdir failed");
251 let result = discover_model_parity_dir(dir.path(), Some(&tmgt));
252 assert!(result.is_some());
253 assert_eq!(result.expect("operation failed"), tmgt);
254 }
255
256 #[test]
257 fn test_discover_explicit_path_missing() {
258 let dir = tempfile::tempdir().expect("tempdir creation failed");
259 let missing = dir.path().join("nonexistent");
260 let result = discover_model_parity_dir(dir.path(), Some(&missing));
261 assert!(result.is_none());
262 }
263
264 #[test]
265 fn test_oracle_completeness_all_missing() {
266 let dir = tempfile::tempdir().expect("tempdir creation failed");
267 let tmgt = dir.path().join("tmgt");
268 std::fs::create_dir_all(tmgt.join("oracle")).expect("mkdir failed");
269 let findings = analyze_model_parity_gaps(&tmgt, dir.path());
271 let oracle_gaps: Vec<_> =
272 findings.iter().filter(|f| f.title.contains("Missing oracle:")).collect();
273 assert_eq!(oracle_gaps.len(), 12);
274 }
275
276 #[test]
277 fn test_oracle_completeness_partial() {
278 let dir = tempfile::tempdir().expect("tempdir creation failed");
279 let tmgt = dir.path().join("tmgt");
280 let model_dir = tmgt.join("oracle").join("smollm-135m");
281 std::fs::create_dir_all(&model_dir).expect("mkdir failed");
282 std::fs::write(model_dir.join("arithmetic.json"), "{}").expect("fs write failed");
284 std::fs::write(model_dir.join("code.json"), "{}").expect("fs write failed");
285
286 let findings = analyze_model_parity_gaps(&tmgt, dir.path());
287 let smollm_gaps: Vec<_> =
288 findings.iter().filter(|f| f.title.contains("smollm-135m")).collect();
289 assert_eq!(smollm_gaps.len(), 2);
291 }
292
293 #[test]
294 fn test_parse_claims_status_deferred() {
295 let dir = tempfile::tempdir().expect("tempdir creation failed");
296 let tmgt = dir.path().join("tmgt");
297 std::fs::create_dir_all(&tmgt).expect("mkdir failed");
298 let claims = tmgt.join("CLAIMS.md");
299 {
300 let mut f = std::fs::File::create(&claims).expect("file open failed");
301 write!(
302 f,
303 "# Claims\n\n### Claim 6: Cross-Runtime Parity (Deferred)\n- **Status**: Deferred.\n"
304 )
305 .expect("unexpected failure");
306 }
307
308 let mut findings = Vec::new();
309 let mut id = 0;
310 check_claims_status(&tmgt, &mut findings, &mut id);
311
312 let deferred: Vec<_> = findings.iter().filter(|f| f.title.contains("Deferred")).collect();
313 assert_eq!(deferred.len(), 1);
314 assert_eq!(deferred[0].severity, FindingSeverity::Low);
315 }
316
317 #[test]
318 fn test_parse_claims_status_fail() {
319 let dir = tempfile::tempdir().expect("tempdir creation failed");
320 let tmgt = dir.path().join("tmgt");
321 std::fs::create_dir_all(&tmgt).expect("mkdir failed");
322 let claims = tmgt.join("CLAIMS.md");
323 {
324 let mut f = std::fs::File::create(&claims).expect("file open failed");
325 write!(f, "# Claims\n\n### Claim 19: Throughput\n- **Status**: FAIL\n")
326 .expect("unexpected failure");
327 }
328
329 let mut findings = Vec::new();
330 let mut id = 0;
331 check_claims_status(&tmgt, &mut findings, &mut id);
332
333 let fails: Vec<_> = findings.iter().filter(|f| f.title.contains("Failed claim")).collect();
334 assert_eq!(fails.len(), 1);
335 assert_eq!(fails[0].severity, FindingSeverity::High);
336 }
337
338 #[test]
339 fn test_oracle_ops_completeness() {
340 let dir = tempfile::tempdir().expect("tempdir creation failed");
341 let tmgt = dir.path().join("tmgt");
342 let ops_dir = tmgt.join("oracle-ops");
343 std::fs::create_dir_all(ops_dir.join("convert")).expect("mkdir failed");
345 std::fs::write(ops_dir.join("convert").join("smollm.json"), "{}").expect("fs write failed");
346 std::fs::create_dir_all(ops_dir.join("quantize")).expect("mkdir failed");
347 std::fs::write(ops_dir.join("quantize").join("smollm.json"), "{}")
348 .expect("fs write failed");
349 let mut findings = Vec::new();
352 let mut id = 0;
353 check_oracle_ops(&tmgt, &mut findings, &mut id);
354
355 let ops_gaps: Vec<_> =
356 findings.iter().filter(|f| f.title.contains("Missing oracle-ops:")).collect();
357 assert_eq!(ops_gaps.len(), 3); }
359
360 #[test]
361 fn test_missing_oracle_directory() {
362 let dir = tempfile::tempdir().expect("tempdir creation failed");
363 let tmgt = dir.path().join("tmgt");
364 std::fs::create_dir_all(&tmgt).expect("mkdir failed");
365 let mut findings = Vec::new();
368 let mut id = 0;
369 check_oracle_completeness(&tmgt, &mut findings, &mut id);
370
371 assert_eq!(findings.len(), 1);
372 assert!(findings[0].title.contains("Missing oracle directory"));
373 }
374
375 #[test]
378 fn test_falsify_fail_detection_rejects_description_lines() {
379 let dir = tempfile::tempdir().expect("tempdir creation failed");
382 let tmgt = dir.path().join("tmgt");
383 std::fs::create_dir_all(&tmgt).expect("mkdir failed");
384 let claims = tmgt.join("CLAIMS.md");
385 {
386 let mut f = std::fs::File::create(&claims).expect("file open failed");
387 write!(
388 f,
389 "# Claims\n\n\
390 ### Claim 20: QA Gate\n\
391 - **Falsification**: any gate with `status == \"FAIL\"`.\n"
392 )
393 .expect("unexpected failure");
394 }
395
396 let mut findings = Vec::new();
397 let mut id = 0;
398 check_claims_status(&tmgt, &mut findings, &mut id);
399
400 let fails: Vec<_> = findings.iter().filter(|f| f.title.contains("Failed claim")).collect();
401 assert_eq!(fails.len(), 0, "Should NOT match falsification criterion line");
402 }
403
404 #[test]
405 fn test_falsify_fail_detection_matches_status_field() {
406 let dir = tempfile::tempdir().expect("tempdir creation failed");
408 let tmgt = dir.path().join("tmgt");
409 std::fs::create_dir_all(&tmgt).expect("mkdir failed");
410 let claims = tmgt.join("CLAIMS.md");
411 {
412 let mut f = std::fs::File::create(&claims).expect("file open failed");
413 write!(
414 f,
415 "# Claims\n\n\
416 ### Claim 19: Throughput\n\
417 - **Status**: FAIL (0 tok/s bug)\n\
418 ### Claim 20: QA Gate\n\
419 **Status**: FAIL — critical gate failed\n\
420 ### Claim 21: Other\n\
421 - Status: FAIL\n"
422 )
423 .expect("unexpected failure");
424 }
425
426 let mut findings = Vec::new();
427 let mut id = 0;
428 check_claims_status(&tmgt, &mut findings, &mut id);
429
430 let fails: Vec<_> = findings.iter().filter(|f| f.title.contains("Failed claim")).collect();
431 assert_eq!(fails.len(), 3, "All three Status formats should match");
432 }
433
434 #[test]
435 fn test_falsify_missing_claims_file() {
436 let dir = tempfile::tempdir().expect("tempdir creation failed");
438 let tmgt = dir.path().join("tmgt");
439 std::fs::create_dir_all(&tmgt).expect("mkdir failed");
440
441 let mut findings = Vec::new();
442 let mut id = 0;
443 check_claims_status(&tmgt, &mut findings, &mut id);
444 assert_eq!(findings.len(), 0);
445 }
446
447 #[test]
448 fn test_falsify_empty_claims_file() {
449 let dir = tempfile::tempdir().expect("tempdir creation failed");
450 let tmgt = dir.path().join("tmgt");
451 std::fs::create_dir_all(&tmgt).expect("mkdir failed");
452 std::fs::write(tmgt.join("CLAIMS.md"), "").expect("fs write failed");
453
454 let mut findings = Vec::new();
455 let mut id = 0;
456 check_claims_status(&tmgt, &mut findings, &mut id);
457 assert_eq!(findings.len(), 0);
458 }
459
460 #[test]
461 fn test_falsify_oracle_all_present() {
462 let dir = tempfile::tempdir().expect("tempdir creation failed");
464 let tmgt = dir.path().join("tmgt");
465 for model in EXPECTED_MODELS {
466 for prompt in EXPECTED_PROMPTS {
467 let model_dir = tmgt.join("oracle").join(model);
468 std::fs::create_dir_all(&model_dir).expect("mkdir failed");
469 std::fs::write(model_dir.join(format!("{}.json", prompt)), "{}")
470 .expect("fs write failed");
471 }
472 }
473
474 let mut findings = Vec::new();
475 let mut id = 0;
476 check_oracle_completeness(&tmgt, &mut findings, &mut id);
477 assert_eq!(findings.len(), 0, "All oracles present → 0 findings");
478 }
479
480 #[test]
481 fn test_falsify_oracle_ops_all_present() {
482 let dir = tempfile::tempdir().expect("tempdir creation failed");
484 let tmgt = dir.path().join("tmgt");
485 for op in EXPECTED_OPS {
486 let op_dir = tmgt.join("oracle-ops").join(op);
487 std::fs::create_dir_all(&op_dir).expect("mkdir failed");
488 std::fs::write(op_dir.join("result.json"), "{}").expect("fs write failed");
489 }
490
491 let mut findings = Vec::new();
492 let mut id = 0;
493 check_oracle_ops(&tmgt, &mut findings, &mut id);
494 assert_eq!(findings.len(), 0, "All ops present → 0 findings");
495 }
496
497 #[test]
498 fn test_falsify_ops_dir_exists_but_empty() {
499 let dir = tempfile::tempdir().expect("tempdir creation failed");
501 let tmgt = dir.path().join("tmgt");
502 for op in EXPECTED_OPS {
503 std::fs::create_dir_all(tmgt.join("oracle-ops").join(op)).expect("mkdir failed");
504 }
505
506 let mut findings = Vec::new();
507 let mut id = 0;
508 check_oracle_ops(&tmgt, &mut findings, &mut id);
509 assert_eq!(findings.len(), 5, "Empty dirs should be flagged");
510 }
511
512 #[test]
513 fn test_falsify_discover_nonexistent_parent() {
514 let result = discover_model_parity_dir(Path::new("/nonexistent/path/xyz"), None);
516 assert!(result.is_none());
517 }
518
519 #[test]
520 fn test_falsify_full_pipeline_empty_tmgt() {
521 let dir = tempfile::tempdir().expect("tempdir creation failed");
523 let tmgt = dir.path().join("tmgt");
524 std::fs::create_dir_all(&tmgt).expect("mkdir failed");
525
526 let findings = analyze_model_parity_gaps(&tmgt, dir.path());
527 assert_eq!(findings.len(), 2);
529 assert!(findings.iter().any(|f| f.title.contains("Missing oracle directory")));
530 assert!(findings.iter().any(|f| f.title.contains("Missing oracle-ops directory")));
531 }
532}