1use crate::conjunct::ConjunctStatus;
7use crate::evidence::{Evidence, SourceValue};
8use crate::sources;
9use crate::threshold;
10
11pub fn evaluate_generality(evidence: &[Evidence]) -> ConjunctStatus {
21 let mut arc_agi_2 = None;
22 let mut arc_agi_3 = None;
23 let mut hle = None;
24 let mut gpqa_diamond = None;
25
26 for e in evidence {
27 match e.source.as_str() {
28 sources::generality::ARC_AGI_2 => {
29 if let SourceValue::Fraction(f) = e.value {
30 arc_agi_2 = Some(f);
31 }
32 }
33 sources::generality::ARC_AGI_3 => {
34 if let SourceValue::Fraction(f) = e.value {
35 arc_agi_3 = Some(f);
36 }
37 }
38 sources::generality::HLE => {
39 if let SourceValue::Fraction(f) = e.value {
40 hle = Some(f);
41 }
42 }
43 sources::generality::GPQA_DIAMOND => {
44 if let SourceValue::Fraction(f) = e.value {
45 gpqa_diamond = Some(f);
46 }
47 }
48 _ => {}
49 }
50 }
51
52 let arc_agi_3 = match arc_agi_3 {
54 Some(f) => f,
55 None => return ConjunctStatus::InsufficientData,
56 };
57
58 if arc_agi_3.value() < threshold::generality::ARC_AGI_3_FLOOR {
60 return ConjunctStatus::Fail;
61 }
62
63 let available_sources = [arc_agi_2.is_some(), hle.is_some(), gpqa_diamond.is_some()]
65 .iter()
66 .filter(|&&x| x)
67 .count()
68 + 1; if available_sources < 3 {
72 return ConjunctStatus::InsufficientData;
73 }
74
75 let arc_agi_2_pass = arc_agi_2
77 .map(|f| f.value() >= threshold::generality::ARC_AGI_2_PASS)
78 .unwrap_or(false);
79 let arc_agi_3_pass = arc_agi_3.value() >= threshold::generality::ARC_AGI_3_PASS;
80 let hle_pass = hle
81 .map(|f| f.value() >= threshold::generality::HLE_PASS)
82 .unwrap_or(false);
83 let gpqa_pass = gpqa_diamond
84 .map(|f| f.value() >= threshold::generality::GPQA_DIAMOND_PASS)
85 .unwrap_or(false);
86
87 let sources = [
88 (arc_agi_2_pass, arc_agi_2.is_some()),
89 (arc_agi_3_pass, true),
90 (hle_pass, hle.is_some()),
91 (gpqa_pass, gpqa_diamond.is_some()),
92 ];
93
94 let passing = sources
95 .iter()
96 .filter(|(pass, present)| *present && *pass)
97 .count();
98 let present = sources.iter().filter(|(_, present)| *present).count();
99
100 if passing == present && present >= 3 {
101 ConjunctStatus::Pass
102 } else if passing > 0 && passing < present {
103 ConjunctStatus::Partial
104 } else {
105 ConjunctStatus::Fail
106 }
107}
108
109pub fn evaluate_economic_substitutability(evidence: &[Evidence]) -> ConjunctStatus {
118 let mut gdpval = None;
119 let mut rli = None;
120
121 for e in evidence {
122 match e.source.as_str() {
123 sources::economic_substitutability::GDPVAL
124 | sources::economic_substitutability::GDPVAL_AA => {
125 if let SourceValue::Fraction(f) = e.value {
126 gdpval = Some(f);
127 }
128 }
129 sources::economic_substitutability::RLI => {
130 if let SourceValue::Fraction(f) = e.value {
131 rli = Some(f);
132 }
133 }
134 sources::economic_substitutability::APEX_AGENTS => {
135 }
137 _ => {}
138 }
139 }
140
141 let gdpval = match gdpval {
143 Some(f) => f,
144 None => return ConjunctStatus::InsufficientData,
145 };
146 let rli = match rli {
147 Some(f) => f,
148 None => return ConjunctStatus::InsufficientData,
149 };
150
151 if rli.value() < threshold::economic_substitutability::RLI_FLOOR {
153 return ConjunctStatus::Fail;
154 }
155
156 let gdpval_pass = gdpval.value() >= threshold::economic_substitutability::GDPVAL_PASS;
157 let rli_pass = rli.value() >= threshold::economic_substitutability::RLI_PASS;
158
159 if gdpval_pass && rli_pass {
160 ConjunctStatus::Pass
161 } else if gdpval_pass || rli_pass {
162 ConjunctStatus::Partial
163 } else {
164 ConjunctStatus::Fail
165 }
166}
167
168pub fn evaluate_environmental_transfer(evidence: &[Evidence]) -> ConjunctStatus {
177 let mut arc_agi_3 = None;
178 let mut osworld = None;
179 let mut _nes = None;
180
181 for e in evidence {
182 match e.source.as_str() {
183 sources::environmental_transfer::ARC_AGI_3 => {
184 if let SourceValue::Fraction(f) = e.value {
185 arc_agi_3 = Some(f);
186 }
187 }
188 sources::environmental_transfer::OSWORLD => {
189 if let SourceValue::Fraction(f) = e.value {
190 osworld = Some(f);
191 }
192 }
193 sources::environmental_transfer::NES => {
194 if let SourceValue::Fraction(f) = e.value {
195 _nes = Some(f);
196 }
197 }
198 _ => {}
199 }
200 }
201
202 let arc_agi_3 = match arc_agi_3 {
204 Some(f) => f,
205 None => return ConjunctStatus::InsufficientData,
206 };
207
208 if arc_agi_3.value() < threshold::environmental_transfer::ARC_AGI_3_FLOOR {
210 return ConjunctStatus::Fail;
211 }
212
213 if osworld.is_none() {
217 return ConjunctStatus::InsufficientData;
218 }
219
220 let arc_agi_3_pass = arc_agi_3.value() >= threshold::environmental_transfer::ARC_AGI_3_PASS;
221 let osworld_pass = osworld
222 .map(|f| f.value() >= threshold::environmental_transfer::OSWORLD_PASS)
223 .unwrap_or(false);
224
225 if arc_agi_3_pass && osworld_pass {
226 ConjunctStatus::Pass
227 } else {
228 ConjunctStatus::Partial
232 }
233}
234
235pub fn evaluate_autonomous_agency(evidence: &[Evidence]) -> ConjunctStatus {
244 let mut metr = None;
245 let mut rebench = None;
246 let mut swebench = None;
247
248 for e in evidence {
249 match e.source.as_str() {
250 sources::autonomous_agency::METR_80PCT_TIME_HORIZON => {
251 if let SourceValue::Hours(h) = e.value {
252 metr = Some(h);
253 }
254 }
255 sources::autonomous_agency::RE_BENCH => {
256 if let SourceValue::Fraction(f) = e.value {
257 rebench = Some(f);
258 }
259 }
260 sources::autonomous_agency::SWE_BENCH_VERIFIED => {
261 if let SourceValue::Fraction(f) = e.value {
262 swebench = Some(f);
263 }
264 }
265 _ => {}
266 }
267 }
268
269 let metr = match metr {
271 Some(h) => h,
272 None => return ConjunctStatus::InsufficientData,
273 };
274
275 if metr.value() < threshold::autonomous_agency::METR_80PCT_FLOOR_HOURS {
277 return ConjunctStatus::Fail;
278 }
279
280 if rebench.is_none() && swebench.is_none() {
282 return ConjunctStatus::InsufficientData;
283 }
284
285 let metr_pass = metr.value() >= threshold::autonomous_agency::METR_80PCT_PASS_HOURS;
286 let rebench_pass = rebench
287 .map(|f| f.value() >= threshold::autonomous_agency::REBENCH_PASS)
288 .unwrap_or(false);
289 let swebench_pass = swebench
290 .map(|f| f.value() >= threshold::autonomous_agency::SWEBENCH_VERIFIED_PASS_AT_5)
291 .unwrap_or(false);
292
293 if metr_pass && (rebench_pass || swebench_pass) {
294 ConjunctStatus::Pass
295 } else if metr_pass || rebench_pass || swebench_pass {
296 ConjunctStatus::Partial
297 } else {
298 ConjunctStatus::Fail
299 }
300}
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305 use crate::evidence::{BoundedFraction, MeasurementId, NonNegativeHours, Provenance, SourceId};
306 use chrono::Utc;
307 use url::Url;
308
309 fn make_evidence(source: &str, measurement: &str, value: SourceValue) -> Evidence {
310 Evidence {
311 source: SourceId::new(source),
312 measurement: MeasurementId::new(measurement),
313 value,
314 reliability_percentile: 95,
315 provenance: Provenance {
316 source_url: Url::parse("https://example.com").unwrap(),
317 fetch_timestamp: Utc::now(),
318 source_version: Some("1.0".to_string()),
319 raw_value: "test".to_string(),
320 },
321 }
322 }
323
324 #[test]
325 fn generality_pass_all_sources() {
326 let evidence = vec![
327 make_evidence(
328 "arc-agi-2",
329 "pass-rate",
330 SourceValue::Fraction(BoundedFraction::new(0.85).unwrap()),
331 ),
332 make_evidence(
333 "arc-agi-3",
334 "pass-rate",
335 SourceValue::Fraction(BoundedFraction::new(0.50).unwrap()),
336 ),
337 make_evidence(
338 "hle",
339 "accuracy",
340 SourceValue::Fraction(BoundedFraction::new(0.80).unwrap()),
341 ),
342 make_evidence(
343 "gpqa-diamond",
344 "accuracy",
345 SourceValue::Fraction(BoundedFraction::new(0.90).unwrap()),
346 ),
347 ];
348
349 assert_eq!(evaluate_generality(&evidence), ConjunctStatus::Pass);
350 }
351
352 #[test]
353 fn generality_insufficient_data() {
354 let evidence = vec![make_evidence(
355 "arc-agi-3",
356 "pass-rate",
357 SourceValue::Fraction(BoundedFraction::new(0.50).unwrap()),
358 )];
359
360 assert_eq!(
361 evaluate_generality(&evidence),
362 ConjunctStatus::InsufficientData
363 );
364 }
365
366 #[test]
367 fn generality_fail_below_floor() {
368 let evidence = vec![
369 make_evidence(
370 "arc-agi-3",
371 "pass-rate",
372 SourceValue::Fraction(BoundedFraction::new(0.03).unwrap()),
373 ),
374 make_evidence(
375 "hle",
376 "accuracy",
377 SourceValue::Fraction(BoundedFraction::new(0.80).unwrap()),
378 ),
379 make_evidence(
380 "gpqa-diamond",
381 "accuracy",
382 SourceValue::Fraction(BoundedFraction::new(0.90).unwrap()),
383 ),
384 ];
385
386 assert_eq!(evaluate_generality(&evidence), ConjunctStatus::Fail);
387 }
388
389 #[test]
390 fn economic_substitutability_pass() {
391 let evidence = vec![
392 make_evidence(
393 "gdpval",
394 "win-rate",
395 SourceValue::Fraction(BoundedFraction::new(0.85).unwrap()),
396 ),
397 make_evidence(
398 "rli",
399 "completion-rate",
400 SourceValue::Fraction(BoundedFraction::new(0.60).unwrap()),
401 ),
402 ];
403
404 assert_eq!(
405 evaluate_economic_substitutability(&evidence),
406 ConjunctStatus::Pass
407 );
408 }
409
410 #[test]
411 fn economic_substitutability_insufficient_data() {
412 let evidence = vec![make_evidence(
413 "gdpval",
414 "win-rate",
415 SourceValue::Fraction(BoundedFraction::new(0.85).unwrap()),
416 )];
417
418 assert_eq!(
419 evaluate_economic_substitutability(&evidence),
420 ConjunctStatus::InsufficientData
421 );
422 }
423
424 #[test]
425 fn environmental_transfer_pass() {
426 let evidence = vec![
427 make_evidence(
428 "arc-agi-3",
429 "pass-rate",
430 SourceValue::Fraction(BoundedFraction::new(0.50).unwrap()),
431 ),
432 make_evidence(
433 "osworld",
434 "completion-rate",
435 SourceValue::Fraction(BoundedFraction::new(0.85).unwrap()),
436 ),
437 ];
438
439 assert_eq!(
440 evaluate_environmental_transfer(&evidence),
441 ConjunctStatus::Pass
442 );
443 }
444
445 #[test]
446 fn environmental_transfer_insufficient_without_secondary() {
447 let evidence = vec![make_evidence(
448 sources::environmental_transfer::ARC_AGI_3,
449 "pass-rate",
450 SourceValue::Fraction(BoundedFraction::new(0.50).unwrap()),
451 )];
452
453 assert_eq!(
454 evaluate_environmental_transfer(&evidence),
455 ConjunctStatus::InsufficientData
456 );
457 }
458
459 #[test]
460 fn environmental_transfer_insufficient_with_nes_only() {
461 let evidence = vec![
462 make_evidence(
463 sources::environmental_transfer::ARC_AGI_3,
464 "pass-rate",
465 SourceValue::Fraction(BoundedFraction::new(0.50).unwrap()),
466 ),
467 make_evidence(
468 sources::environmental_transfer::NES,
469 "completion-rate",
470 SourceValue::Fraction(BoundedFraction::new(0.90).unwrap()),
471 ),
472 ];
473
474 assert_eq!(
475 evaluate_environmental_transfer(&evidence),
476 ConjunctStatus::InsufficientData,
477 "NES alone cannot satisfy secondary source requirement in v0.1.x"
478 );
479 }
480
481 #[test]
482 fn autonomous_agency_pass() {
483 let evidence = vec![
484 make_evidence(
485 sources::autonomous_agency::METR_80PCT_TIME_HORIZON,
486 "hours",
487 SourceValue::Hours(NonNegativeHours::new(168.0).unwrap()),
488 ),
489 make_evidence(
490 sources::autonomous_agency::RE_BENCH,
491 "success-rate",
492 SourceValue::Fraction(BoundedFraction::new(0.60).unwrap()),
493 ),
494 ];
495
496 assert_eq!(evaluate_autonomous_agency(&evidence), ConjunctStatus::Pass);
497 }
498
499 #[test]
500 fn autonomous_agency_insufficient_without_supporting() {
501 let evidence = vec![make_evidence(
502 sources::autonomous_agency::METR_80PCT_TIME_HORIZON,
503 "hours",
504 SourceValue::Hours(NonNegativeHours::new(168.0).unwrap()),
505 )];
506
507 assert_eq!(
508 evaluate_autonomous_agency(&evidence),
509 ConjunctStatus::InsufficientData
510 );
511 }
512
513 #[test]
514 fn autonomous_agency_fail_below_floor() {
515 let evidence = vec![
516 make_evidence(
517 sources::autonomous_agency::METR_80PCT_TIME_HORIZON,
518 "hours",
519 SourceValue::Hours(NonNegativeHours::new(4.0).unwrap()),
520 ),
521 make_evidence(
522 sources::autonomous_agency::RE_BENCH,
523 "success-rate",
524 SourceValue::Fraction(BoundedFraction::new(0.60).unwrap()),
525 ),
526 ];
527
528 assert_eq!(evaluate_autonomous_agency(&evidence), ConjunctStatus::Fail);
529 }
530}