tt_shared/batch_advisor.rs
1//! Batch-eligibility advisor (Batch/Flex phase 1 — ADVISORY only).
2//!
3//! The OpenAI / Anthropic / Gemini Batch APIs price asynchronous (≤24h) traffic
4//! at ~50% of standard — the single biggest no-quality-loss cost lever. Building
5//! the durable batch-submission queue is deferred (P3); phase 1 is purely
6//! advisory: detect request-log traffic that is **batch-eligible** (tagged
7//! background / offline / nightly / bulk, i.e. latency-insensitive) and PROJECT
8//! the savings of moving it to the Batch API.
9//!
10//! This module is the pure, tool-groundable core: given request-log aggregates
11//! (which the advisor already reasons over) and the embedded pricing catalog, it
12//! produces a [`BatchFinding`] per eligible tag segment with the eligible spend
13//! and the projected Batch-API cost/savings. The savings are computed from the
14//! **real per-model batch rates in the catalog** (`pricing.toml` carries
15//! `batch_{input,output}_per_million`), NOT a hardcoded 50% — a model with no
16//! catalog batch tier contributes no projected savings (conservative).
17//!
18//! Nothing here submits anything to a batch API; it only surfaces the projection.
19
20use serde::{Deserialize, Serialize};
21
22use crate::pricing::PricingCatalog;
23
24/// Default set of tags treated as **non-interactive** (batch-eligible) traffic.
25///
26/// These mark latency-insensitive bulk / offline work that can tolerate the
27/// Batch API's async (≤24h) turnaround. The set is overridable per call (see
28/// [`project_batch_savings_with_tags`]) so a deployment can configure its own
29/// "background" tag vocabulary; `tag=background` is the canonical example used
30/// across the codebase (routing, request-log attribution).
31pub const DEFAULT_BATCH_ELIGIBLE_TAGS: &[&str] =
32 &["background", "offline", "nightly", "batch", "bulk", "async"];
33
34/// One tag-grouped request-log aggregate — the condensed view the advisor /
35/// inspect path computes over `request_logs` (e.g. `SELECT provider, model, tag,
36/// SUM(input_tokens), SUM(output_tokens), SUM(cost_usd), COUNT(*) ... GROUP BY
37/// provider, model, tag`). One row per `(provider, model, tag)` segment.
38#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
39pub struct RequestAggregate {
40 /// Registry provider id the requests were served by (e.g. `"openai"`).
41 pub provider: String,
42 /// Provider-side model id (e.g. `"gpt-5.5"`).
43 pub model: String,
44 /// The request tag for this segment. `None` = untagged traffic (never
45 /// batch-eligible — an untagged request is assumed interactive).
46 pub tag: Option<String>,
47 /// Summed input tokens across the segment.
48 pub input_tokens: u64,
49 /// Summed output tokens across the segment.
50 pub output_tokens: u64,
51 /// Summed cost (USD) the org actually paid for the segment — the
52 /// denominator for "% of spend".
53 pub cost_usd: f64,
54 /// Number of requests in the segment (for the human-readable summary).
55 pub request_count: u64,
56}
57
58/// A projected-savings finding for one batch-eligible tag segment.
59///
60/// Produced only for segments whose tag is in the configured eligible set AND
61/// whose `(provider, model)` carries a batch rate in the catalog. The projection
62/// is `eligible_spend − projected_batch_cost`, both computed from the catalog's
63/// real per-model batch rates.
64#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
65pub struct BatchFinding {
66 /// The eligible tag this finding is for (e.g. `"nightly-evals"`).
67 pub tag: String,
68 /// Current spend (USD) attributable to this eligible segment — summed
69 /// across every `(provider, model)` under the tag that has a catalog batch
70 /// rate (segments with no batch tier are excluded, since they can't be
71 /// projected and aren't batch-eligible at the provider).
72 pub eligible_spend_usd: f64,
73 /// Projected cost (USD) of the same traffic priced at the catalog's Batch
74 /// API rates.
75 pub projected_batch_cost_usd: f64,
76 /// `eligible_spend_usd − projected_batch_cost_usd`, floored at 0. The
77 /// advisory headline ("≈ $X/mo saved at the Batch rate").
78 pub projected_savings_usd: f64,
79 /// `eligible_spend_usd / total_spend_usd * 100` — what fraction of *all*
80 /// spend (across every segment considered) this eligible tag represents.
81 /// Drives the "tag=X is N% of spend and batch-eligible" phrasing.
82 pub share_of_spend_pct: f64,
83 /// Number of requests folded into this finding.
84 pub request_count: u64,
85}
86
87impl BatchFinding {
88 /// The effective batch discount this finding realizes, as a percentage of
89 /// the eligible spend (`savings / eligible_spend * 100`). ~50 for providers
90 /// at the documented Batch-API rate; lower if the segment mixes models with
91 /// and without a batch tier. `0.0` when there is no eligible spend.
92 #[must_use]
93 pub fn discount_pct(&self) -> f64 {
94 if self.eligible_spend_usd <= 0.0 {
95 0.0
96 } else {
97 self.projected_savings_usd / self.eligible_spend_usd * 100.0
98 }
99 }
100
101 /// A one-line, tool-grounded advisory sentence, e.g.
102 /// `"tag=nightly-evals is 31.0% of spend and batch-eligible → ~$12.40/mo
103 /// saved at the Batch API rate (−50%)"`.
104 #[must_use]
105 pub fn summary(&self) -> String {
106 format!(
107 "tag={} is {:.1}% of spend and batch-eligible → ~${:.2} saved at the Batch API rate (−{:.0}%) on {} request(s)",
108 self.tag,
109 self.share_of_spend_pct,
110 self.projected_savings_usd,
111 self.discount_pct(),
112 self.request_count,
113 )
114 }
115}
116
117/// Project Batch-API savings over request-log aggregates using the default
118/// non-interactive tag set ([`DEFAULT_BATCH_ELIGIBLE_TAGS`]).
119///
120/// See [`project_batch_savings_with_tags`] for the full contract.
121#[must_use]
122pub fn project_batch_savings(
123 aggregates: &[RequestAggregate],
124 catalog: &PricingCatalog,
125) -> Vec<BatchFinding> {
126 project_batch_savings_with_tags(aggregates, catalog, DEFAULT_BATCH_ELIGIBLE_TAGS)
127}
128
129/// Project Batch-API savings over request-log aggregates, treating any segment
130/// whose tag (case-insensitive) is in `eligible_tags` as batch-eligible.
131///
132/// For each eligible segment, the catalog's per-model batch rates price the
133/// segment's input + output tokens; the projected savings is
134/// `current_spend − projected_batch_cost`. Segments are folded into one
135/// [`BatchFinding`] per tag (a tag may span several models). Findings are
136/// returned **descending by projected savings** so the biggest lever is first;
137/// ties break by tag name for determinism.
138///
139/// A segment is **excluded** (contributes nothing, not flagged) when:
140/// - its tag is `None` or not in `eligible_tags` (interactive / unknown
141/// traffic is never batch-eligible), or
142/// - its `(provider, model)` has no batch rate in the catalog (the provider has
143/// no batch tier for that model, so there is nothing real to project — we do
144/// NOT fabricate a 50% discount where catalog data is absent).
145///
146/// `share_of_spend_pct` is measured against the **total** spend of every
147/// aggregate passed in (eligible or not), so it answers "what fraction of all
148/// spend is this batch-eligible tag".
149///
150/// Tags that are eligible but whose entire spend is unpriceable produce no
151/// finding (zero eligible spend → nothing to advise).
152#[must_use]
153pub fn project_batch_savings_with_tags(
154 aggregates: &[RequestAggregate],
155 catalog: &PricingCatalog,
156 eligible_tags: &[&str],
157) -> Vec<BatchFinding> {
158 let total_spend: f64 = aggregates.iter().map(|a| a.cost_usd).sum();
159
160 // Accumulate per-tag totals across all of the tag's batch-priceable models.
161 // Keyed by the original tag string (case preserved for the summary).
162 let mut by_tag: std::collections::BTreeMap<String, TagAccumulator> =
163 std::collections::BTreeMap::new();
164
165 for agg in aggregates {
166 let Some(tag) = agg.tag.as_deref() else {
167 continue; // untagged → interactive, never batch-eligible
168 };
169 if !eligible_tags.iter().any(|t| t.eq_ignore_ascii_case(tag)) {
170 continue; // tag not in the non-interactive set
171 }
172 // Resolve the catalog's batch rate for this model; skip if absent.
173 let Some(pricing) = catalog.latest(&agg.provider, &agg.model) else {
174 continue;
175 };
176 let (Some(batch_in), Some(batch_out)) = (
177 pricing.batch_input_per_million,
178 pricing.batch_output_per_million,
179 ) else {
180 continue; // no batch tier for this model → nothing to project
181 };
182
183 let projected = (agg.input_tokens as f64) * batch_in / 1_000_000.0
184 + (agg.output_tokens as f64) * batch_out / 1_000_000.0;
185
186 let entry = by_tag.entry(tag.to_string()).or_default();
187 entry.eligible_spend += agg.cost_usd;
188 entry.projected_batch += projected;
189 entry.request_count += agg.request_count;
190 }
191
192 let mut findings: Vec<BatchFinding> = by_tag
193 .into_iter()
194 .filter(|(_, acc)| acc.eligible_spend > 0.0)
195 .map(|(tag, acc)| {
196 let savings = (acc.eligible_spend - acc.projected_batch).max(0.0);
197 let share = if total_spend > 0.0 {
198 acc.eligible_spend / total_spend * 100.0
199 } else {
200 0.0
201 };
202 BatchFinding {
203 tag,
204 eligible_spend_usd: acc.eligible_spend,
205 projected_batch_cost_usd: acc.projected_batch,
206 projected_savings_usd: savings,
207 share_of_spend_pct: share,
208 request_count: acc.request_count,
209 }
210 })
211 .collect();
212
213 // Biggest lever first; deterministic tie-break by tag.
214 findings.sort_by(|a, b| {
215 b.projected_savings_usd
216 .partial_cmp(&a.projected_savings_usd)
217 .unwrap_or(std::cmp::Ordering::Equal)
218 .then_with(|| a.tag.cmp(&b.tag))
219 });
220 findings
221}
222
223/// Running per-tag totals while folding aggregates.
224#[derive(Default)]
225struct TagAccumulator {
226 eligible_spend: f64,
227 projected_batch: f64,
228 request_count: u64,
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234 use crate::pricing::catalog;
235
236 fn agg(
237 provider: &str,
238 model: &str,
239 tag: Option<&str>,
240 input: u64,
241 output: u64,
242 cost: f64,
243 count: u64,
244 ) -> RequestAggregate {
245 RequestAggregate {
246 provider: provider.into(),
247 model: model.into(),
248 tag: tag.map(str::to_string),
249 input_tokens: input,
250 output_tokens: output,
251 cost_usd: cost,
252 request_count: count,
253 }
254 }
255
256 /// Core TDD behavior: a batch-eligible tagged segment produces a finding
257 /// with the correct projected −50% (catalog-rate) savings + the eligible
258 /// spend; non-eligible traffic in the same set is NOT flagged.
259 #[test]
260 fn flags_eligible_segment_with_catalog_rate_savings() {
261 let c = catalog();
262 // gpt-5.5: standard $5/$30 per 1M, batch $2.50/$15 per 1M (50% off).
263 // 1M input + 1M output @ standard = $5 + $30 = $35 actual spend.
264 // Plus an interactive (untagged) gpt-5.5 segment that must be ignored.
265 let aggs = vec![
266 agg(
267 "openai",
268 "gpt-5.5",
269 Some("nightly"),
270 1_000_000,
271 1_000_000,
272 35.0,
273 10,
274 ),
275 agg("openai", "gpt-5.5", None, 1_000_000, 1_000_000, 35.0, 10),
276 ];
277 let findings = project_batch_savings(&aggs, c);
278 assert_eq!(findings.len(), 1, "only the tagged segment is flagged");
279 let f = &findings[0];
280 assert_eq!(f.tag, "nightly");
281 assert!(
282 (f.eligible_spend_usd - 35.0).abs() < 1e-9,
283 "eligible spend = actual spend of the tagged segment"
284 );
285 // batch cost: 1M*$2.50 + 1M*$15 = $17.50 → savings = $35 - $17.50.
286 assert!(
287 (f.projected_batch_cost_usd - 17.50).abs() < 1e-9,
288 "batch cost from catalog rates, got {}",
289 f.projected_batch_cost_usd
290 );
291 assert!(
292 (f.projected_savings_usd - 17.50).abs() < 1e-9,
293 "−50% of $35 = $17.50, got {}",
294 f.projected_savings_usd
295 );
296 // share of spend: $35 eligible / $70 total = 50%.
297 assert!((f.share_of_spend_pct - 50.0).abs() < 1e-9);
298 // discount is the real catalog 50%, not a hardcoded constant.
299 assert!((f.discount_pct() - 50.0).abs() < 1e-9);
300 assert_eq!(f.request_count, 10);
301 }
302
303 /// Non-eligible tags (an interactive tag like "chat") are never flagged,
304 /// even when traffic exists for them.
305 #[test]
306 fn ignores_non_eligible_tags() {
307 let c = catalog();
308 let aggs = vec![
309 agg(
310 "openai",
311 "gpt-5.5",
312 Some("chat"),
313 1_000_000,
314 1_000_000,
315 35.0,
316 5,
317 ),
318 agg("openai", "gpt-5.5", Some("interactive"), 500_000, 0, 2.5, 3),
319 ];
320 let findings = project_batch_savings(&aggs, c);
321 assert!(
322 findings.is_empty(),
323 "no eligible tags present → no findings: {findings:?}"
324 );
325 }
326
327 /// A model with NO catalog batch tier contributes no projected savings —
328 /// we never fabricate a 50% discount where the catalog has no data.
329 #[test]
330 fn model_without_batch_tier_is_not_projected() {
331 let c = catalog();
332 // groq llama has no batch_{input,output}_per_million in the catalog.
333 let aggs = vec![agg(
334 "groq",
335 "llama-3.1-8b-instant",
336 Some("background"),
337 1_000_000,
338 1_000_000,
339 1.0,
340 4,
341 )];
342 let findings = project_batch_savings(&aggs, c);
343 assert!(
344 findings.is_empty(),
345 "no batch tier → nothing to project, got {findings:?}"
346 );
347 }
348
349 /// One eligible tag spanning several models folds into a single finding,
350 /// summing eligible spend and per-model batch projections.
351 #[test]
352 fn folds_multiple_models_under_one_tag() {
353 let c = catalog();
354 // gpt-5.5 batch $2.50/$15 and gpt-5.4 batch $1.25/$7.50.
355 let aggs = vec![
356 agg("openai", "gpt-5.5", Some("bulk"), 1_000_000, 0, 5.0, 2),
357 agg("openai", "gpt-5.4", Some("bulk"), 1_000_000, 0, 2.5, 3),
358 ];
359 let findings = project_batch_savings(&aggs, c);
360 assert_eq!(findings.len(), 1);
361 let f = &findings[0];
362 assert_eq!(f.tag, "bulk");
363 assert!((f.eligible_spend_usd - 7.5).abs() < 1e-9, "5.0 + 2.5");
364 // batch input: 1M*$2.50 + 1M*$1.25 = $3.75 → savings $7.5 - $3.75.
365 assert!((f.projected_batch_cost_usd - 3.75).abs() < 1e-9);
366 assert!((f.projected_savings_usd - 3.75).abs() < 1e-9);
367 assert_eq!(f.request_count, 5);
368 }
369
370 /// Findings sort by projected savings (biggest lever first), tie-break tag.
371 #[test]
372 fn findings_sorted_by_savings_desc() {
373 let c = catalog();
374 let aggs = vec![
375 // small eligible segment
376 agg("openai", "gpt-5.4", Some("offline"), 1_000_000, 0, 2.5, 1),
377 // large eligible segment
378 agg("openai", "gpt-5.5", Some("nightly"), 10_000_000, 0, 50.0, 1),
379 ];
380 let findings = project_batch_savings(&aggs, c);
381 assert_eq!(findings.len(), 2);
382 assert_eq!(findings[0].tag, "nightly", "bigger savings first");
383 assert!(findings[0].projected_savings_usd > findings[1].projected_savings_usd);
384 }
385
386 /// Tag matching is case-insensitive ("Background" matches "background").
387 #[test]
388 fn tag_match_is_case_insensitive() {
389 let c = catalog();
390 let aggs = vec![agg(
391 "openai",
392 "gpt-5.5",
393 Some("Background"),
394 1_000_000,
395 0,
396 5.0,
397 1,
398 )];
399 let findings = project_batch_savings(&aggs, c);
400 assert_eq!(findings.len(), 1);
401 assert_eq!(findings[0].tag, "Background", "original case preserved");
402 }
403
404 /// A custom (configurable) eligible-tag set is honored; the default
405 /// vocabulary is ignored when an explicit set is given.
406 #[test]
407 fn honors_configurable_tag_set() {
408 let c = catalog();
409 let aggs = vec![
410 agg(
411 "openai",
412 "gpt-5.5",
413 Some("nightly-evals"),
414 1_000_000,
415 0,
416 5.0,
417 1,
418 ),
419 // "background" is in the DEFAULT set but NOT in our custom set:
420 agg(
421 "openai",
422 "gpt-5.5",
423 Some("background"),
424 1_000_000,
425 0,
426 5.0,
427 1,
428 ),
429 ];
430 let findings = project_batch_savings_with_tags(&aggs, c, &["nightly-evals"]);
431 assert_eq!(findings.len(), 1, "only the custom tag matches");
432 assert_eq!(findings[0].tag, "nightly-evals");
433 }
434
435 /// The summary sentence is tool-grounded: carries the tag, the share of
436 /// spend, the dollar savings, and the realized discount.
437 #[test]
438 fn summary_is_grounded_and_human_readable() {
439 let f = BatchFinding {
440 tag: "nightly-evals".into(),
441 eligible_spend_usd: 40.0,
442 projected_batch_cost_usd: 20.0,
443 projected_savings_usd: 20.0,
444 share_of_spend_pct: 31.0,
445 request_count: 128,
446 };
447 let s = f.summary();
448 assert!(s.contains("tag=nightly-evals"), "{s}");
449 assert!(s.contains("31.0% of spend"), "{s}");
450 assert!(s.contains("$20.00"), "{s}");
451 assert!(s.contains("−50%"), "{s}");
452 assert!(s.contains("128 request"), "{s}");
453 }
454
455 /// Empty input → no findings, no panic (e.g. division by zero on share).
456 #[test]
457 fn empty_aggregates_produce_no_findings() {
458 let c = catalog();
459 assert!(project_batch_savings(&[], c).is_empty());
460 }
461
462 /// Anthropic batch rates also flow through (50% per the catalog).
463 #[test]
464 fn anthropic_eligible_segment_uses_catalog_batch_rate() {
465 let c = catalog();
466 // claude-opus-4-8: standard $5/$25, batch $2.50/$12.50.
467 // 1M in + 1M out standard = $30 actual.
468 let aggs = vec![agg(
469 "anthropic",
470 "claude-opus-4-8",
471 Some("offline"),
472 1_000_000,
473 1_000_000,
474 30.0,
475 7,
476 )];
477 let findings = project_batch_savings(&aggs, c);
478 assert_eq!(findings.len(), 1);
479 let f = &findings[0];
480 // batch: 1M*$2.50 + 1M*$12.50 = $15 → savings $30 - $15 = $15.
481 assert!((f.projected_batch_cost_usd - 15.0).abs() < 1e-9);
482 assert!((f.projected_savings_usd - 15.0).abs() < 1e-9);
483 }
484}