1use std::collections::HashMap;
4
5#[derive(Debug, Clone)]
8pub struct Hypothesis {
9 pub id: String,
10 pub claim: String,
11 pub conditions: Vec<String>,
12 pub threshold: f32, pub submitted_by: String,
14 pub status: HypothesisStatus,
15 pub gate_violations: Vec<String>,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum HypothesisStatus {
20 Pending,
21 Gated,
22 Testing,
23 Confirmed,
24 Falsified,
25 Inconclusive,
26}
27
28impl Hypothesis {
29 pub fn new(id: &str, claim: &str, threshold: f32) -> Self {
30 Self {
31 id: id.to_string(),
32 claim: claim.to_string(),
33 conditions: Vec::new(),
34 threshold,
35 submitted_by: String::new(),
36 status: HypothesisStatus::Pending,
37 gate_violations: Vec::new(),
38 }
39 }
40
41 pub fn with_conditions(mut self, conditions: Vec<String>) -> Self {
42 self.conditions = conditions;
43 self
44 }
45
46 pub fn with_submitter(mut self, name: &str) -> Self {
47 self.submitted_by = name.to_string();
48 self
49 }
50}
51
52#[derive(Debug, Clone)]
55pub struct ExperimentResult {
56 pub hypothesis_id: String,
57 pub comprehension: f32, pub generalization: f32, pub retention: f32, pub raw_accuracy: f32, pub details: String,
62}
63
64impl ExperimentResult {
65 pub fn achievement_loss(&self) -> f32 {
67 let product = self.comprehension * self.generalization * self.retention;
68 1.0 - product
69 }
70}
71
72#[derive(Debug, Clone)]
75pub struct Verdict {
76 pub hypothesis_id: String,
77 pub status: HypothesisStatus,
78 pub achievement_loss: f32,
79 pub threshold: f32,
80 pub passed: bool,
81 pub raw_accuracy: f32,
82 pub warning: String,
83 pub details: String,
84}
85
86#[derive(Debug, Clone, PartialEq, Eq)]
89pub enum GateResult {
90 Pass,
91 Fail(String),
92}
93
94pub struct LabGuard {
97 hypotheses: HashMap<String, Hypothesis>,
98 results: HashMap<String, ExperimentResult>,
99 verdicts: Vec<Verdict>,
100 loss_threshold: f32,
101}
102
103impl LabGuard {
104 pub fn new() -> Self {
105 Self {
106 hypotheses: HashMap::new(),
107 results: HashMap::new(),
108 verdicts: Vec::new(),
109 loss_threshold: 0.4,
110 }
111 }
112
113 pub fn with_loss_threshold(mut self, threshold: f32) -> Self {
114 self.loss_threshold = threshold;
115 self
116 }
117
118 fn check_gates(&self, hyp: &Hypothesis) -> Vec<GateResult> {
121 let mut gates = Vec::new();
122
123 if hyp.claim.is_empty() {
125 gates.push(GateResult::Fail("Missing claim".to_string()));
126 } else if hyp.conditions.is_empty() {
127 gates.push(GateResult::Fail("Missing conditions".to_string()));
128 } else if hyp.threshold <= 0.0 || hyp.threshold >= 1.0 {
129 gates.push(GateResult::Fail("Threshold must be 0.0 < t < 1.0".to_string()));
130 } else {
131 gates.push(GateResult::Pass);
132 }
133
134 let absolutes = [
138 "always", "never", "all", "none", "every single",
139 "impossible to fail", "guaranteed", "proven", "unquestionably",
140 "without exception", "invariably", "universally",
141 ];
142 let claim_lower = hyp.claim.to_lowercase();
143 let has_absolute = absolutes.iter().any(|a| {
145 let pat = format!(" {} ", a); claim_lower.contains(&pat) ||
147 claim_lower.starts_with(&format!("{} ", a)) ||
148 claim_lower.ends_with(&format!(" {}", a)) ||
149 claim_lower.as_str() == *a
150 });
151 if has_absolute {
152 gates.push(GateResult::Fail(format!(
153 "Absolute quantifier detected — claims must be falsifiable under specific conditions. Check for: {:?}",
154 absolutes
155 )));
156 } else {
157 gates.push(GateResult::Pass);
158 }
159
160 let vague_patterns = [
164 "inversely proportional", "directly proportional",
165 "proportional to", "correlated with", "depends on",
166 ];
167 let has_vague = vague_patterns.iter().any(|p| claim_lower.contains(p));
168 let has_specifics = hyp.conditions.iter().any(|c| {
170 c.contains(|ch: char| ch.is_ascii_digit()) ||
171 c.contains("because") || c.contains("due to") || c.contains("via") ||
172 c.contains("mechanism") || c.contains("threshold")
173 });
174 if has_vague && !has_specifics {
175 gates.push(GateResult::Fail(
176 "Vague causation — claims about proportionality must specify the mechanism. \n
177 Add conditions with numbers or causal explanation (because, due to, via, mechanism).".to_string()
178 ));
179 } else {
180 gates.push(GateResult::Pass);
181 }
182
183 if self.hypotheses.contains_key(&hyp.id) || self.verdicts.iter().any(|v| v.hypothesis_id == hyp.id) {
185 gates.push(GateResult::Fail("Already tested".to_string()));
186 } else {
187 gates.push(GateResult::Pass);
188 }
189
190 if hyp.threshold <= 0.0 {
192 gates.push(GateResult::Fail("Threshold must be positive".to_string()));
193 } else {
194 gates.push(GateResult::Pass);
195 }
196
197 gates
198 }
199
200 pub fn submit(&mut self, mut hypothesis: Hypothesis) -> GateResult {
203 let gates = self.check_gates(&hypothesis);
204 let failures: Vec<String> = gates.iter()
205 .filter_map(|g| if let GateResult::Fail(r) = g { Some(r.clone()) } else { None })
206 .collect();
207
208 if failures.is_empty() {
209 hypothesis.status = HypothesisStatus::Gated;
210 hypothesis.gate_violations = Vec::new();
211 self.hypotheses.insert(hypothesis.id.clone(), hypothesis);
212 GateResult::Pass
213 } else {
214 hypothesis.gate_violations = failures.clone();
215 hypothesis.status = HypothesisStatus::Pending;
216 self.hypotheses.insert(hypothesis.id.clone(), hypothesis);
217 GateResult::Fail(failures.join("; "))
218 }
219 }
220
221 pub fn evaluate(&mut self, result: &ExperimentResult) -> Option<Verdict> {
222 let hyp = self.hypotheses.get_mut(&result.hypothesis_id)?;
223 hyp.status = HypothesisStatus::Testing;
224
225 let loss = result.achievement_loss();
226 let passed = loss <= hyp.threshold;
227
228 let cherry_pick_warning = if result.raw_accuracy > 0.95 && loss > 0.3 {
230 format!("CHERRY-PICK WARNING: raw_accuracy {:.2} but loss {:.2} — results may be cherry-picked",
231 result.raw_accuracy, loss)
232 } else {
233 String::new()
234 };
235
236 let status = if passed {
237 HypothesisStatus::Confirmed
238 } else if loss > self.loss_threshold {
239 HypothesisStatus::Falsified
240 } else {
241 HypothesisStatus::Inconclusive
242 };
243
244 hyp.status = status;
245 self.results.insert(result.hypothesis_id.clone(), result.clone());
246
247 let verdict = Verdict {
248 hypothesis_id: result.hypothesis_id.clone(),
249 status,
250 achievement_loss: loss,
251 threshold: hyp.threshold,
252 passed,
253 raw_accuracy: result.raw_accuracy,
254 warning: cherry_pick_warning,
255 details: format!("loss={:.4} threshold={:.4} comp={:.2} gen={:.2} ret={:.2}",
256 loss, hyp.threshold, result.comprehension, result.generalization, result.retention),
257 };
258
259 self.verdicts.push(verdict.clone());
260 Some(verdict)
261 }
262
263 pub fn hypothesis(&self, id: &str) -> Option<&Hypothesis> {
266 self.hypotheses.get(id)
267 }
268
269 pub fn result(&self, id: &str) -> Option<&ExperimentResult> {
270 self.results.get(id)
271 }
272
273 pub fn verdict(&self, id: &str) -> Option<&Verdict> {
274 self.verdicts.iter().find(|v| v.hypothesis_id == id)
275 }
276
277 pub fn confirmed_count(&self) -> usize {
278 self.verdicts.iter().filter(|v| v.status == HypothesisStatus::Confirmed).count()
279 }
280
281 pub fn falsified_count(&self) -> usize {
282 self.verdicts.iter().filter(|v| v.status == HypothesisStatus::Falsified).count()
283 }
284
285 pub fn total_evaluated(&self) -> usize {
286 self.verdicts.len()
287 }
288
289 pub fn average_loss(&self) -> f32 {
290 if self.verdicts.is_empty() { return 0.0; }
291 let sum: f32 = self.verdicts.iter().map(|v| v.achievement_loss).sum();
292 sum / self.verdicts.len() as f32
293 }
294
295 pub fn by_status(&self, status: HypothesisStatus) -> Vec<&Hypothesis> {
297 self.hypotheses.values().filter(|h| h.status == status).collect()
298 }
299}
300
301impl Default for LabGuard {
302 fn default() -> Self {
303 Self::new()
304 }
305}
306
307#[cfg(test)]
310mod tests {
311 use super::*;
312
313 fn valid_hypothesis() -> Hypothesis {
314 Hypothesis::new("hyp-1", "Snapping to Pythagorean coordinates reduces drift below 0.001", 0.3)
315 .with_conditions(vec!["CUDA environment".to_string(), "10K iterations".to_string()])
316 .with_submitter("Forgemaster")
317 }
318
319 #[test]
320 fn test_submit_valid() {
321 let mut guard = LabGuard::new();
322 let result = guard.submit(valid_hypothesis());
323 assert!(matches!(result, GateResult::Pass));
324 assert_eq!(guard.hypothesis("hyp-1").unwrap().status, HypothesisStatus::Gated);
325 }
326
327 #[test]
328 fn test_gate_missing_claim() {
329 let mut guard = LabGuard::new();
330 let mut hyp = Hypothesis::new("hyp-x", "", 0.3);
331 hyp.conditions = vec!["test".to_string()];
332 let result = guard.submit(hyp);
333 assert!(matches!(result, GateResult::Fail(_)));
334 }
335
336 #[test]
337 fn test_gate_missing_conditions() {
338 let mut guard = LabGuard::new();
339 let hyp = Hypothesis::new("hyp-x", "Some claim", 0.3);
340 let result = guard.submit(hyp);
341 assert!(matches!(result, GateResult::Fail(_)));
342 }
343
344 #[test]
345 fn test_gate_absolute_words() {
346 let mut guard = LabGuard::new();
347 let hyp = Hypothesis::new("hyp-x", "This always produces zero drift", 0.3)
348 .with_conditions(vec!["test".to_string()]);
349 let result = guard.submit(hyp);
350 assert!(matches!(result, GateResult::Fail(_)));
351 }
352
353 #[test]
354 fn test_gate_invalid_threshold() {
355 let mut guard = LabGuard::new();
356 let hyp = Hypothesis::new("hyp-x", "Some claim", 0.0)
357 .with_conditions(vec!["test".to_string()]);
358 let result = guard.submit(hyp);
359 assert!(matches!(result, GateResult::Fail(_)));
360 }
361
362 #[test]
363 fn test_gate_novelty() {
364 let mut guard = LabGuard::new();
365 guard.submit(valid_hypothesis());
366 let hyp2 = Hypothesis::new("hyp-1", "Different claim about drift", 0.3)
368 .with_conditions(vec!["test".to_string()]);
369 let result = guard.submit(hyp2);
370 assert!(matches!(result, GateResult::Fail(_)));
371 }
372
373 #[test]
374 fn test_evaluate_confirmed() {
375 let mut guard = LabGuard::new();
376 guard.submit(valid_hypothesis());
377
378 let result = ExperimentResult {
379 hypothesis_id: "hyp-1".to_string(),
380 comprehension: 0.95,
381 generalization: 0.90,
382 retention: 0.88,
383 raw_accuracy: 0.99,
384 details: "Strong results".to_string(),
385 };
386
387 let verdict = guard.evaluate(&result).unwrap();
388 assert_eq!(verdict.status, HypothesisStatus::Confirmed);
389 assert!(verdict.achievement_loss < 0.3);
391 assert!(verdict.passed);
392 }
393
394 #[test]
395 fn test_evaluate_falsified() {
396 let mut guard = LabGuard::new();
397 guard.submit(valid_hypothesis());
398
399 let result = ExperimentResult {
400 hypothesis_id: "hyp-1".to_string(),
401 comprehension: 0.3,
402 generalization: 0.2,
403 retention: 0.1,
404 raw_accuracy: 0.95,
405 details: "Poor learning".to_string(),
406 };
407
408 let verdict = guard.evaluate(&result).unwrap();
409 assert_eq!(verdict.status, HypothesisStatus::Falsified);
410 assert!(!verdict.passed);
411 }
412
413 #[test]
414 fn test_evaluate_inconclusive() {
415 let mut guard = LabGuard::new().with_loss_threshold(0.2);
416 guard.submit(Hypothesis::new("hyp-1", "Some claim", 0.5)
417 .with_conditions(vec!["test".to_string()]));
418
419 let result = ExperimentResult {
420 hypothesis_id: "hyp-1".to_string(),
421 comprehension: 0.7,
422 generalization: 0.6,
423 retention: 0.5,
424 raw_accuracy: 0.8,
425 details: "Mixed".to_string(),
426 };
427
428 let verdict = guard.evaluate(&result).unwrap();
430 assert_eq!(verdict.status, HypothesisStatus::Falsified);
431 }
432
433 #[test]
434 fn test_cherry_pick_warning() {
435 let mut guard = LabGuard::new();
436 guard.submit(valid_hypothesis());
437
438 let result = ExperimentResult {
439 hypothesis_id: "hyp-1".to_string(),
440 comprehension: 0.4,
441 generalization: 0.3,
442 retention: 0.3,
443 raw_accuracy: 0.99, details: "Looks great on paper".to_string(),
445 };
446
447 let verdict = guard.evaluate(&result).unwrap();
448 assert!(!verdict.warning.is_empty());
449 assert!(verdict.warning.contains("CHERRY-PICK"));
450 }
451
452 #[test]
453 fn test_no_cherry_pick_warning() {
454 let mut guard = LabGuard::new();
455 guard.submit(valid_hypothesis());
456
457 let result = ExperimentResult {
458 hypothesis_id: "hyp-1".to_string(),
459 comprehension: 0.9,
460 generalization: 0.88,
461 retention: 0.85,
462 raw_accuracy: 0.92,
463 details: "Consistent".to_string(),
464 };
465
466 let verdict = guard.evaluate(&result).unwrap();
467 assert!(verdict.warning.is_empty());
468 }
469
470 #[test]
471 fn test_achievement_loss_formula() {
472 let result = ExperimentResult {
473 hypothesis_id: "test".to_string(),
474 comprehension: 0.5,
475 generalization: 0.5,
476 retention: 0.5,
477 raw_accuracy: 0.9,
478 details: String::new(),
479 };
480 assert!((result.achievement_loss() - 0.875).abs() < 0.001);
482 }
483
484 #[test]
485 fn test_stats() {
486 let mut guard = LabGuard::new();
487 guard.submit(valid_hypothesis());
488
489 assert_eq!(guard.confirmed_count(), 0);
490 assert_eq!(guard.falsified_count(), 0);
491 assert_eq!(guard.total_evaluated(), 0);
492
493 let result = ExperimentResult {
494 hypothesis_id: "hyp-1".to_string(),
495 comprehension: 0.95,
496 generalization: 0.90,
497 retention: 0.88,
498 raw_accuracy: 0.99,
499 details: String::new(),
500 };
501 guard.evaluate(&result);
502
503 assert_eq!(guard.confirmed_count(), 1);
504 assert_eq!(guard.total_evaluated(), 1);
505 assert!(guard.average_loss() > 0.0);
506 }
507
508 #[test]
509 fn test_by_status() {
510 let mut guard = LabGuard::new();
511 guard.submit(valid_hypothesis());
512 let gated = guard.by_status(HypothesisStatus::Gated);
513 assert_eq!(gated.len(), 1);
514 }
515
516 #[test]
517 fn test_verdict_details() {
518 let mut guard = LabGuard::new();
519 guard.submit(valid_hypothesis());
520
521 let result = ExperimentResult {
522 hypothesis_id: "hyp-1".to_string(),
523 comprehension: 0.95,
524 generalization: 0.90,
525 retention: 0.88,
526 raw_accuracy: 0.99,
527 details: String::new(),
528 };
529 let verdict = guard.evaluate(&result).unwrap();
530 assert!(verdict.details.contains("loss="));
531 assert!(verdict.details.contains("comp="));
532 }
533
534 #[test]
535 fn test_multiple_hypotheses() {
536 let mut guard = LabGuard::new();
537 guard.submit(valid_hypothesis());
538 guard.submit(Hypothesis::new("hyp-2", "Constraint tightening improves precision", 0.35)
539 .with_conditions(vec!["test".to_string()]));
540
541 assert_eq!(guard.by_status(HypothesisStatus::Gated).len(), 2);
542 }
543
544 #[test]
547 fn test_gate_rejects_always() {
548 let mut guard = LabGuard::new();
549 let hyp = Hypothesis::new("always-bad", "DCS always improves fitness", 0.3)
550 .with_conditions(vec!["agents=256".to_string()]);
551 let result = guard.submit(hyp);
552 assert!(matches!(result, GateResult::Fail(_)));
553 }
554
555 #[test]
556 fn test_gate_rejects_never() {
557 let mut guard = LabGuard::new();
558 let hyp = Hypothesis::new("never-bad", "Trust decay never causes cascading failure", 0.2)
559 .with_conditions(vec!["test".to_string()]);
560 assert!(matches!(guard.submit(hyp), GateResult::Fail(_)));
561 }
562
563 #[test]
564 fn test_gate_rejects_guaranteed() {
565 let mut guard = LabGuard::new();
566 let hyp = Hypothesis::new("guaranteed-bad", "Ghost tiles are guaranteed to improve recall", 0.1)
567 .with_conditions(vec!["test".to_string()]);
568 assert!(matches!(guard.submit(hyp), GateResult::Fail(_)));
569 }
570
571 #[test]
572 fn test_gate_allows_falsifiable() {
573 let mut guard = LabGuard::new();
574 let hyp = Hypothesis::new("falsifiable", "DCS improves fitness when specialist ratio exceeds 5x", 0.3)
575 .with_conditions(vec!["agents=256, specialist_ratio=5.0".to_string()]);
576 assert!(matches!(guard.submit(hyp), GateResult::Pass));
577 }
578
579 #[test]
580 fn test_gate_rejects_overall_not_absolute() {
581 let mut guard = LabGuard::new();
583 let hyp = Hypothesis::new("overall-ok", "Overall system performance improves with tiling", 0.3)
584 .with_conditions(vec!["test".to_string()]);
585 assert!(matches!(guard.submit(hyp), GateResult::Pass));
586 }
587
588 #[test]
589 fn test_gate_rejects_vague_causation() {
590 let mut guard = LabGuard::new();
592 let hyp = Hypothesis::new("vague", "DCS benefit is inversely proportional to perception range", 0.3)
593 .with_conditions(vec!["tested with simulation".to_string()]); assert!(matches!(guard.submit(hyp), GateResult::Fail(_)));
595 }
596
597 #[test]
598 fn test_gate_allows_specific_causation() {
599 let mut guard = LabGuard::new();
601 let hyp = Hypothesis::new("specific", "DCS benefit is inversely proportional to perception range", 0.3)
602 .with_conditions(vec!["because specialists exploit local gradients that generalists miss".to_string()]);
603 assert!(matches!(guard.submit(hyp), GateResult::Pass));
604 }
605
606 #[test]
607 fn test_gate_allows_causation_with_numbers() {
608 let mut guard = LabGuard::new();
610 let hyp = Hypothesis::new("with-nums", "Fitness is proportional to food density", 0.4)
611 .with_conditions(vec!["food_density > 0.5".to_string()]);
612 assert!(matches!(guard.submit(hyp), GateResult::Pass));
613 }
614}