1use std::collections::{BTreeMap, BTreeSet};
14
15use chrono::{DateTime, NaiveDate, Utc};
16use rust_decimal::Decimal;
17use serde::{Deserialize, Serialize};
18
19use crate::{
20 decimal_to_u64, CoreError, CostLane, EngineSnapshot, PricingCatalog, ProviderStatus,
21 TokenTotals,
22};
23
24const BENCH_SCHEMA_VERSION: &str = "1";
25
26const DISCLAIMER_NOTE: &str = "~ cost-only comparison at equal token volume; not a quality claim.";
28
29const METERS: [&str; 4] = ["input", "output", "cache_read", "cache_write"];
31
32fn bundled_benchmarks_json() -> &'static str {
33 include_str!("../bench/benchmarks.v1.json")
36}
37
38#[derive(Debug, Deserialize)]
43struct BenchmarkTable {
44 schema_version: String,
45 #[serde(default)]
46 benchmarks: Vec<Benchmark>,
47}
48
49#[derive(Debug, Deserialize)]
50struct Benchmark {
51 name: String,
52 role: String,
53 source: String,
54 as_of: String,
55 #[serde(default)]
56 harness: Option<String>,
57 cost_note: String,
58 #[serde(default)]
59 points: Vec<BenchmarkPoint>,
60}
61
62#[derive(Debug, Deserialize)]
63struct BenchmarkPoint {
64 model_id: String,
65 label: String,
66 score_pct: Decimal,
67 #[serde(default)]
68 cost_per_task_usd: Option<Decimal>,
69 #[serde(default)]
70 note: Option<String>,
71}
72
73impl BenchmarkTable {
74 fn bundled() -> Result<Self, CoreError> {
75 Self::from_json(bundled_benchmarks_json())
76 }
77
78 fn from_json(value: &str) -> Result<Self, CoreError> {
79 let table: BenchmarkTable = serde_json::from_str(value).map_err(|err| {
82 CoreError::BenchValidation(format!("benchmark JSON parse error: {err}"))
83 })?;
84 table.validate()?;
85 Ok(table)
86 }
87
88 fn validate(&self) -> Result<(), CoreError> {
91 if self.schema_version != BENCH_SCHEMA_VERSION {
92 return Err(CoreError::BenchValidation(format!(
93 "unsupported schema_version {}; expected {}",
94 self.schema_version, BENCH_SCHEMA_VERSION
95 )));
96 }
97 if self.benchmarks.is_empty() {
98 return Err(CoreError::BenchValidation(
99 "bundled benchmark table has no benchmarks".to_string(),
100 ));
101 }
102 for benchmark in &self.benchmarks {
103 if benchmark.source.trim().is_empty() {
104 return Err(CoreError::BenchValidation(format!(
105 "benchmark {} has an empty source",
106 benchmark.name
107 )));
108 }
109 if NaiveDate::parse_from_str(benchmark.as_of.trim(), "%Y-%m-%d").is_err() {
110 return Err(CoreError::BenchValidation(format!(
111 "benchmark {} has an invalid as_of {:?}; expected YYYY-MM-DD",
112 benchmark.name, benchmark.as_of
113 )));
114 }
115 if benchmark.points.is_empty() {
116 return Err(CoreError::BenchValidation(format!(
117 "benchmark {} has no points",
118 benchmark.name
119 )));
120 }
121 let mut seen = BTreeSet::new();
122 for point in &benchmark.points {
123 if !seen.insert(point.model_id.as_str()) {
124 return Err(CoreError::BenchValidation(format!(
125 "benchmark {} has duplicate model_id {}",
126 benchmark.name, point.model_id
127 )));
128 }
129 if point.score_pct < Decimal::ZERO || point.score_pct > Decimal::from(100) {
130 return Err(CoreError::BenchValidation(format!(
131 "benchmark {} model {} score_pct {} is outside 0..=100",
132 benchmark.name, point.model_id, point.score_pct
133 )));
134 }
135 if matches!(point.cost_per_task_usd, Some(cost) if cost < Decimal::ZERO) {
136 return Err(CoreError::BenchValidation(format!(
137 "benchmark {} model {} has a negative cost",
138 benchmark.name, point.model_id
139 )));
140 }
141 }
142 }
143 Ok(())
144 }
145}
146
147#[derive(Debug, Clone, PartialEq, Serialize)]
154pub struct BenchFrontier {
155 pub name: String,
156 pub role: String,
157 pub source: String,
158 pub as_of: String,
159 pub harness: Option<String>,
160 pub cost_note: String,
161 pub points: Vec<FrontierPoint>,
162}
163
164#[derive(Debug, Clone, PartialEq, Serialize)]
166pub struct FrontierPoint {
167 pub model_id: String,
168 pub label: String,
169 pub score_pct: Decimal,
170 pub cost_per_task_usd: Option<Decimal>,
172 pub standing: FrontierStanding,
173 pub priced_in_catalog: bool,
176 pub note: Option<String>,
178}
179
180#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
182#[serde(rename_all = "snake_case")]
183pub enum FrontierStanding {
184 OnFrontier,
185 Dominated {
188 by: String,
189 },
190 CostUnknown,
192}
193
194#[derive(Debug, Clone, PartialEq, Serialize)]
196pub struct OverlayModel {
197 pub model_id: String,
199 pub raw_model: String,
201 pub billed_cost: Decimal,
203 pub tokens: TokenTotals,
204 pub appearances: Vec<OverlayAppearance>,
206 pub repricing: Vec<RepricingDelta>,
208}
209
210#[derive(Debug, Clone, PartialEq, Serialize)]
211pub struct OverlayAppearance {
212 pub benchmark_name: String,
213 pub score_pct: Decimal,
214 pub standing: FrontierStanding,
215}
216
217#[derive(Debug, Clone, PartialEq, Serialize)]
219pub struct RepricingDelta {
220 pub target_model_id: String,
221 pub target_label: String,
222 pub delta_usd: Decimal,
225 pub status: RepricingStatus,
226 pub on_frontier_in: Vec<String>,
228}
229
230#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
231#[serde(rename_all = "snake_case")]
232pub enum RepricingStatus {
233 Computed,
234 TargetRateGap,
236 SameModel,
238}
239
240#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
243pub struct BenchDisclaimer {
244 pub note: &'static str,
245 pub pricing_as_of: String,
246}
247
248#[derive(Debug, Clone, PartialEq, Serialize)]
250pub struct BenchView {
251 pub generated_at: DateTime<Utc>,
252 pub frontiers: Vec<BenchFrontier>,
253 pub overlay: Vec<OverlayModel>,
255 pub no_api_usage: bool,
258 pub disclaimer: BenchDisclaimer,
259 pub providers: Vec<ProviderStatus>,
262}
263
264pub fn bench_view(snapshot: &EngineSnapshot) -> Result<BenchView, CoreError> {
271 let table = BenchmarkTable::bundled()?;
272 let pricing = PricingCatalog::bundled()?;
273 let frontiers = build_frontiers(&table, &pricing);
274 let disclaimer = BenchDisclaimer {
275 note: DISCLAIMER_NOTE,
276 pricing_as_of: pricing.as_of.clone(),
277 };
278
279 let mut accum: BTreeMap<String, OverlayAccum> = BTreeMap::new();
282 for row in &snapshot.focus_rows {
283 if CostLane::from_access_path(&row.x_access_path) != CostLane::Api {
284 continue;
285 }
286 let key = pricing
287 .resolve_key(&row.x_model)
288 .map(str::to_string)
289 .unwrap_or_else(|| row.x_model.clone());
290 let entry = accum.entry(key).or_insert_with(|| OverlayAccum {
291 raw_model: row.x_model.clone(),
292 billed_cost: Decimal::ZERO,
293 tokens: TokenTotals::default(),
294 });
295 entry.billed_cost += row.billed_cost;
296 entry
297 .tokens
298 .add(&row.x_token_type, decimal_to_u64(row.x_consumed_tokens));
299 }
300
301 if accum.is_empty() {
302 return Ok(BenchView {
303 generated_at: snapshot.generated_at,
304 frontiers,
305 overlay: Vec::new(),
306 no_api_usage: true,
307 disclaimer,
308 providers: snapshot.providers.clone(),
309 });
310 }
311
312 let targets = repricing_targets(&frontiers);
313 let overlay = accum
314 .into_iter()
315 .map(|(model_id, acc)| {
316 let appearances = frontier_appearances(&frontiers, &model_id);
317 let repricing =
318 repricing_for(&model_id, &acc.tokens, acc.billed_cost, &targets, &pricing);
319 OverlayModel {
320 model_id,
321 raw_model: acc.raw_model,
322 billed_cost: acc.billed_cost,
323 tokens: acc.tokens,
324 appearances,
325 repricing,
326 }
327 })
328 .collect();
329
330 Ok(BenchView {
331 generated_at: snapshot.generated_at,
332 frontiers,
333 overlay,
334 no_api_usage: false,
335 disclaimer,
336 providers: snapshot.providers.clone(),
337 })
338}
339
340struct OverlayAccum {
341 raw_model: String,
342 billed_cost: Decimal,
343 tokens: TokenTotals,
344}
345
346fn build_frontiers(table: &BenchmarkTable, pricing: &PricingCatalog) -> Vec<BenchFrontier> {
347 table
348 .benchmarks
349 .iter()
350 .map(|benchmark| {
351 let points = benchmark
352 .points
353 .iter()
354 .enumerate()
355 .map(|(idx, point)| FrontierPoint {
356 model_id: point.model_id.clone(),
357 label: point.label.clone(),
358 score_pct: point.score_pct,
359 cost_per_task_usd: point.cost_per_task_usd,
360 standing: standing_for(point, &benchmark.points, idx),
361 priced_in_catalog: pricing.model(&point.model_id).is_some(),
362 note: point.note.clone(),
363 })
364 .collect();
365 BenchFrontier {
366 name: benchmark.name.clone(),
367 role: benchmark.role.clone(),
368 source: benchmark.source.clone(),
369 as_of: benchmark.as_of.clone(),
370 harness: benchmark.harness.clone(),
371 cost_note: benchmark.cost_note.clone(),
372 points,
373 }
374 })
375 .collect()
376}
377
378fn standing_for(point: &BenchmarkPoint, points: &[BenchmarkPoint], idx: usize) -> FrontierStanding {
383 let Some(cost) = point.cost_per_task_usd else {
384 return FrontierStanding::CostUnknown;
385 };
386 for (other_idx, other) in points.iter().enumerate() {
387 if other_idx == idx {
388 continue;
389 }
390 let Some(other_cost) = other.cost_per_task_usd else {
391 continue;
392 };
393 let cheaper_or_equal = other_cost <= cost;
394 let higher_or_equal = other.score_pct >= point.score_pct;
395 let strictly_better = other_cost < cost || other.score_pct > point.score_pct;
396 if cheaper_or_equal && higher_or_equal && strictly_better {
397 return FrontierStanding::Dominated {
398 by: other.model_id.clone(),
399 };
400 }
401 }
402 FrontierStanding::OnFrontier
403}
404
405struct RepricingTarget {
406 model_id: String,
407 label: String,
408 on_frontier_in: Vec<String>,
409}
410
411fn repricing_targets(frontiers: &[BenchFrontier]) -> Vec<RepricingTarget> {
415 let mut by_model: BTreeMap<String, RepricingTarget> = BTreeMap::new();
416 for frontier in frontiers {
417 for point in &frontier.points {
418 if point.priced_in_catalog && point.standing == FrontierStanding::OnFrontier {
419 by_model
420 .entry(point.model_id.clone())
421 .or_insert_with(|| RepricingTarget {
422 model_id: point.model_id.clone(),
423 label: point.label.clone(),
424 on_frontier_in: Vec::new(),
425 })
426 .on_frontier_in
427 .push(frontier.name.clone());
428 }
429 }
430 }
431 by_model.into_values().collect()
432}
433
434fn frontier_appearances(frontiers: &[BenchFrontier], model_id: &str) -> Vec<OverlayAppearance> {
435 frontiers
436 .iter()
437 .flat_map(|frontier| {
438 frontier
439 .points
440 .iter()
441 .filter(move |point| point.model_id == model_id)
442 .map(move |point| OverlayAppearance {
443 benchmark_name: frontier.name.clone(),
444 score_pct: point.score_pct,
445 standing: point.standing.clone(),
446 })
447 })
448 .collect()
449}
450
451fn repricing_for(
452 model_id: &str,
453 tokens: &TokenTotals,
454 billed_cost: Decimal,
455 targets: &[RepricingTarget],
456 pricing: &PricingCatalog,
457) -> Vec<RepricingDelta> {
458 targets
459 .iter()
460 .map(|target| {
461 if target.model_id == model_id {
462 return RepricingDelta {
463 target_model_id: target.model_id.clone(),
464 target_label: target.label.clone(),
465 delta_usd: Decimal::ZERO,
466 status: RepricingStatus::SameModel,
467 on_frontier_in: target.on_frontier_in.clone(),
468 };
469 }
470 match repriced_total(tokens, &target.model_id, pricing) {
471 Some(repriced) => RepricingDelta {
472 target_model_id: target.model_id.clone(),
473 target_label: target.label.clone(),
474 delta_usd: repriced - billed_cost,
475 status: RepricingStatus::Computed,
476 on_frontier_in: target.on_frontier_in.clone(),
477 },
478 None => RepricingDelta {
479 target_model_id: target.model_id.clone(),
480 target_label: target.label.clone(),
481 delta_usd: Decimal::ZERO,
482 status: RepricingStatus::TargetRateGap,
483 on_frontier_in: target.on_frontier_in.clone(),
484 },
485 }
486 })
487 .collect()
488}
489
490fn repriced_total(tokens: &TokenTotals, target: &str, pricing: &PricingCatalog) -> Option<Decimal> {
494 let million = Decimal::from(1_000_000_u64);
495 let mut total = Decimal::ZERO;
496 for meter in METERS {
497 let volume = meter_volume(tokens, meter);
498 if volume == 0 {
499 continue;
500 }
501 let price = pricing.meter_price(target, meter)?;
502 total += Decimal::from(volume) * price / million;
503 }
504 Some(total)
505}
506
507fn meter_volume(tokens: &TokenTotals, meter: &str) -> u64 {
508 match meter {
509 "input" => tokens.input,
510 "output" => tokens.output,
511 "cache_read" => tokens.cache_read,
512 "cache_write" => tokens.cache_write,
513 _ => 0,
514 }
515}
516
517#[cfg(test)]
518mod tests {
519 use super::*;
520 use crate::focus_records_from_usage;
521 use chrono::TimeZone;
522 use costroid_providers::{AccessPath, ProviderId, UsageEvent};
523
524 fn ts() -> DateTime<Utc> {
525 match Utc.with_ymd_and_hms(2026, 1, 7, 12, 0, 0) {
528 chrono::LocalResult::Single(value) => value,
529 _ => panic!("fixed test timestamp should be valid"),
530 }
531 }
532
533 fn event(model: &str, access: AccessPath, input: u64, output: u64) -> UsageEvent {
534 UsageEvent {
535 tool: ProviderId::Codex,
536 model: model.to_string(),
537 timestamp: ts(),
538 input_tokens: input,
539 output_tokens: output,
540 cache_read_tokens: 0,
541 cache_write_tokens: 0,
542 project: Some("/work/proj".to_string()),
543 access_path: access,
544 }
545 }
546
547 fn snapshot(events: &[UsageEvent]) -> EngineSnapshot {
548 let focus_rows = match focus_records_from_usage(events) {
549 Ok(rows) => rows,
550 Err(err) => panic!("events should price: {err}"),
551 };
552 EngineSnapshot {
553 generated_at: ts(),
554 usage_events: Vec::new(),
555 focus_rows,
556 limit_windows: Vec::new(),
557 providers: Vec::new(),
558 }
559 }
560
561 fn frontier<'a>(view: &'a BenchView, name: &str) -> &'a BenchFrontier {
562 match view.frontiers.iter().find(|f| f.name == name) {
563 Some(f) => f,
564 None => panic!("benchmark {name} should be present"),
565 }
566 }
567
568 fn point<'a>(frontier: &'a BenchFrontier, model_id: &str) -> &'a FrontierPoint {
569 match frontier.points.iter().find(|p| p.model_id == model_id) {
570 Some(p) => p,
571 None => panic!("point {model_id} should be present on {}", frontier.name),
572 }
573 }
574
575 fn benchmark_point(model_id: &str, score_pct: i64, cost: Option<i64>) -> BenchmarkPoint {
576 BenchmarkPoint {
577 model_id: model_id.to_string(),
578 label: model_id.to_string(),
579 score_pct: Decimal::from(score_pct),
580 cost_per_task_usd: cost.map(Decimal::from),
581 note: None,
582 }
583 }
584
585 #[test]
587 fn bundled_benchmarks_parse_and_validate() {
588 let table = match BenchmarkTable::bundled() {
589 Ok(table) => table,
590 Err(err) => panic!("bundled benchmarks should validate: {err}"),
591 };
592 assert_eq!(table.benchmarks.len(), 2);
593 assert_eq!(table.benchmarks[0].name, "DeepSWE");
594 assert_eq!(table.benchmarks[0].as_of, "2026-05-30");
595 assert_eq!(table.benchmarks[1].name, "CursorBench v3.1");
596 assert_eq!(table.benchmarks[1].as_of, "2026-05-18");
597 }
598
599 #[test]
601 fn as_of_guard_is_fail_closed() {
602 let body = |as_of: &str| {
603 format!(
604 r#"{{"schema_version":"1","benchmarks":[{{"name":"X","role":"primary","source":"https://x","as_of":"{as_of}","cost_note":"n","points":[{{"model_id":"gpt-5.5","label":"g","score_pct":"70.0","cost_per_task_usd":"1.0"}}]}}]}}"#
605 )
606 };
607 for bad in ["FILL_ME", "", "2026-13-99", "May 30 2026"] {
608 match BenchmarkTable::from_json(&body(bad)) {
609 Err(CoreError::BenchValidation(_)) => {}
610 other => panic!("as_of {bad:?} should be rejected, got {other:?}"),
611 }
612 }
613 assert!(BenchmarkTable::from_json(&body("2026-05-30")).is_ok());
615 }
616
617 #[test]
619 fn deepswe_opus47_is_dominated() {
620 let view = match bench_view(&snapshot(&[])) {
621 Ok(view) => view,
622 Err(err) => panic!("bench_view should build: {err}"),
623 };
624 let deepswe = frontier(&view, "DeepSWE");
625 assert_eq!(
626 point(deepswe, "claude-opus-4-7").standing,
627 FrontierStanding::Dominated {
628 by: "gpt-5.5".to_string()
629 }
630 );
631 assert_eq!(
632 point(deepswe, "gpt-5.5").standing,
633 FrontierStanding::OnFrontier
634 );
635 assert_eq!(
637 point(deepswe, "claude-sonnet-4-6").standing,
638 FrontierStanding::OnFrontier
639 );
640 assert!(point(deepswe, "claude-sonnet-4-6")
641 .cost_per_task_usd
642 .is_some());
643 }
644
645 #[test]
647 fn cost_unknown_point_is_score_only() {
648 let points = vec![
649 benchmark_point("gpt-5.5", 70, Some(6)),
650 benchmark_point("mystery", 40, None),
651 ];
652 assert_eq!(
653 standing_for(&points[1], &points, 1),
654 FrontierStanding::CostUnknown
655 );
656 assert_eq!(
658 standing_for(&points[0], &points, 0),
659 FrontierStanding::OnFrontier
660 );
661 }
662
663 #[test]
665 fn dominance_tie_handling() {
666 let tied = vec![
667 benchmark_point("a", 50, Some(5)),
668 benchmark_point("b", 50, Some(5)),
669 ];
670 assert_eq!(
671 standing_for(&tied[0], &tied, 0),
672 FrontierStanding::OnFrontier
673 );
674 assert_eq!(
675 standing_for(&tied[1], &tied, 1),
676 FrontierStanding::OnFrontier
677 );
678
679 let same_cost = vec![
681 benchmark_point("hi", 60, Some(5)),
682 benchmark_point("lo", 50, Some(5)),
683 ];
684 assert_eq!(
685 standing_for(&same_cost[1], &same_cost, 1),
686 FrontierStanding::Dominated {
687 by: "hi".to_string()
688 }
689 );
690
691 let same_score = vec![
693 benchmark_point("cheap", 50, Some(3)),
694 benchmark_point("dear", 50, Some(8)),
695 ];
696 assert_eq!(
697 standing_for(&same_score[1], &same_score, 1),
698 FrontierStanding::Dominated {
699 by: "cheap".to_string()
700 }
701 );
702 }
703
704 #[test]
706 fn api_rows_only_excludes_subscription() {
707 let view = match bench_view(&snapshot(&[
708 event("gpt-5.5", AccessPath::Api, 1_000_000, 0),
709 event("gpt-5.5", AccessPath::Subscription, 1_000_000, 0),
710 ])) {
711 Ok(view) => view,
712 Err(err) => panic!("bench_view should build: {err}"),
713 };
714 assert!(!view.no_api_usage);
715 assert_eq!(view.overlay.len(), 1);
716 assert_eq!(view.overlay[0].tokens.input, 1_000_000);
718 assert_eq!(view.overlay[0].billed_cost, Decimal::new(500, 2));
719 }
720
721 #[test]
723 fn overlay_api_total_reconciles_with_now_summary() {
724 let snap = snapshot(&[
725 event("gpt-5.5", AccessPath::Api, 1_000_000, 500_000),
726 event("claude-opus-4-7", AccessPath::Api, 200_000, 0),
727 event("gpt-5.5", AccessPath::Subscription, 999_999, 0),
728 ]);
729 let view = match bench_view(&snap) {
730 Ok(view) => view,
731 Err(err) => panic!("bench_view should build: {err}"),
732 };
733 let overlay_total: Decimal = view.overlay.iter().map(|m| m.billed_cost).sum();
734
735 let now = crate::now_summary(&snap, crate::NowOptions::default());
736 let now_api_total: Decimal = now
737 .current_costs
738 .iter()
739 .filter(|c| c.lane == CostLane::Api)
740 .map(|c| c.totals.billed_cost)
741 .sum();
742
743 assert_eq!(overlay_total, now_api_total);
744 }
745
746 #[test]
748 fn no_api_usage_zero_delta_reference() {
749 let view = match bench_view(&snapshot(&[event(
750 "gpt-5.5",
751 AccessPath::Subscription,
752 1_000_000,
753 0,
754 )])) {
755 Ok(view) => view,
756 Err(err) => panic!("bench_view should build: {err}"),
757 };
758 assert!(view.no_api_usage);
759 assert!(view.overlay.is_empty());
760 assert_eq!(view.frontiers.len(), 2);
761 }
762
763 #[test]
765 fn repricing_delta_on_known_volume() {
766 let view = match bench_view(&snapshot(&[event(
767 "claude-opus-4-7",
768 AccessPath::Api,
769 1_000_000,
770 500_000,
771 )])) {
772 Ok(view) => view,
773 Err(err) => panic!("bench_view should build: {err}"),
774 };
775 let opus = &view.overlay[0];
776 let gpt = match opus
777 .repricing
778 .iter()
779 .find(|d| d.target_model_id == "gpt-5.5")
780 {
781 Some(delta) => delta,
782 None => panic!("gpt-5.5 should be a re-pricing target"),
783 };
784 assert_eq!(gpt.status, RepricingStatus::Computed);
785 assert_eq!(gpt.delta_usd + opus.billed_cost, Decimal::new(2000, 2));
787 let self_delta = opus
789 .repricing
790 .iter()
791 .find(|d| d.target_model_id == "claude-opus-4-7");
792 assert_eq!(
793 self_delta.map(|d| d.status),
794 Some(RepricingStatus::SameModel)
795 );
796 }
797
798 #[test]
801 fn composer_is_a_gap_not_a_target() {
802 let view = match bench_view(&snapshot(&[event(
803 "claude-opus-4-7",
804 AccessPath::Api,
805 10,
806 0,
807 )])) {
808 Ok(view) => view,
809 Err(err) => panic!("bench_view should build: {err}"),
810 };
811 let cursorbench = frontier(&view, "CursorBench v3.1");
812 let composer = point(cursorbench, "composer-2.5");
813 assert!(!composer.priced_in_catalog);
814 assert_eq!(
815 composer.note.as_deref(),
816 Some("Cursor subscription only - no API access")
817 );
818 for overlay in &view.overlay {
819 assert!(
820 overlay
821 .repricing
822 .iter()
823 .all(|d| d.target_model_id != "composer-2.5"),
824 "composer-2.5 must never be a re-pricing target"
825 );
826 }
827 }
828
829 #[test]
831 fn missing_model_is_a_gap() {
832 let view = match bench_view(&snapshot(&[event(
833 "claude-haiku-4-5",
834 AccessPath::Api,
835 10,
836 0,
837 )])) {
838 Ok(view) => view,
839 Err(err) => panic!("bench_view should build: {err}"),
840 };
841 let haiku = match view
842 .overlay
843 .iter()
844 .find(|m| m.model_id == "claude-haiku-4-5")
845 {
846 Some(model) => model,
847 None => panic!("haiku should be in the overlay"),
848 };
849 assert!(haiku.appearances.is_empty());
850 }
851
852 #[test]
854 fn repricing_skips_target_rate_gap() {
855 let mut cache_write_event = event("claude-opus-4-7", AccessPath::Api, 0, 0);
858 cache_write_event.cache_write_tokens = 1_000_000;
859 let view = match bench_view(&snapshot(&[cache_write_event])) {
860 Ok(view) => view,
861 Err(err) => panic!("bench_view should build: {err}"),
862 };
863 let opus = &view.overlay[0];
864 let gpt = match opus
865 .repricing
866 .iter()
867 .find(|d| d.target_model_id == "gpt-5.5")
868 {
869 Some(delta) => delta,
870 None => panic!("gpt-5.5 should appear as a target"),
871 };
872 assert_eq!(gpt.status, RepricingStatus::TargetRateGap);
873 }
874
875 #[test]
877 fn disclaimer_carries_hedge_and_pricing_date() {
878 let view = match bench_view(&snapshot(&[])) {
879 Ok(view) => view,
880 Err(err) => panic!("bench_view should build: {err}"),
881 };
882 assert!(view.disclaimer.note.starts_with('~'));
883 assert!(view.disclaimer.note.contains("not a quality claim"));
884 assert!(!view.disclaimer.pricing_as_of.is_empty());
885 }
886}