Skip to main content

tt_shared/
batch_advisor.rs

1//! Batch-eligibility advisor (Batch/Flex phase 1 — ADVISORY only).
2//!
3//! The OpenAI / Anthropic / Gemini Batch APIs price asynchronous (≤24h) traffic
4//! at ~50% of standard — the single biggest no-quality-loss cost lever. Building
5//! the durable batch-submission queue is deferred (P3); phase 1 is purely
6//! advisory: detect request-log traffic that is **batch-eligible** (tagged
7//! background / offline / nightly / bulk, i.e. latency-insensitive) and PROJECT
8//! the savings of moving it to the Batch API.
9//!
10//! This module is the pure, tool-groundable core: given request-log aggregates
11//! (which the advisor already reasons over) and the embedded pricing catalog, it
12//! produces a [`BatchFinding`] per eligible tag segment with the eligible spend
13//! and the projected Batch-API cost/savings. The savings are computed from the
14//! **real per-model batch rates in the catalog** (`pricing.toml` carries
15//! `batch_{input,output}_per_million`), NOT a hardcoded 50% — a model with no
16//! catalog batch tier contributes no projected savings (conservative).
17//!
18//! Nothing here submits anything to a batch API; it only surfaces the projection.
19
20use serde::{Deserialize, Serialize};
21
22use crate::pricing::PricingCatalog;
23
24/// Default set of tags treated as **non-interactive** (batch-eligible) traffic.
25///
26/// These mark latency-insensitive bulk / offline work that can tolerate the
27/// Batch API's async (≤24h) turnaround. The set is overridable per call (see
28/// [`project_batch_savings_with_tags`]) so a deployment can configure its own
29/// "background" tag vocabulary; `tag=background` is the canonical example used
30/// across the codebase (routing, request-log attribution).
31pub const DEFAULT_BATCH_ELIGIBLE_TAGS: &[&str] =
32    &["background", "offline", "nightly", "batch", "bulk", "async"];
33
34/// One tag-grouped request-log aggregate — the condensed view the advisor /
35/// inspect path computes over `request_logs` (e.g. `SELECT provider, model, tag,
36/// SUM(input_tokens), SUM(output_tokens), SUM(cost_usd), COUNT(*) ... GROUP BY
37/// provider, model, tag`). One row per `(provider, model, tag)` segment.
38#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
39pub struct RequestAggregate {
40    /// Registry provider id the requests were served by (e.g. `"openai"`).
41    pub provider: String,
42    /// Provider-side model id (e.g. `"gpt-5.5"`).
43    pub model: String,
44    /// The request tag for this segment. `None` = untagged traffic (never
45    /// batch-eligible — an untagged request is assumed interactive).
46    pub tag: Option<String>,
47    /// Summed input tokens across the segment.
48    pub input_tokens: u64,
49    /// Summed output tokens across the segment.
50    pub output_tokens: u64,
51    /// Summed cost (USD) the org actually paid for the segment — the
52    /// denominator for "% of spend".
53    pub cost_usd: f64,
54    /// Number of requests in the segment (for the human-readable summary).
55    pub request_count: u64,
56}
57
58/// A projected-savings finding for one batch-eligible tag segment.
59///
60/// Produced only for segments whose tag is in the configured eligible set AND
61/// whose `(provider, model)` carries a batch rate in the catalog. The projection
62/// is `eligible_spend − projected_batch_cost`, both computed from the catalog's
63/// real per-model batch rates.
64#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
65pub struct BatchFinding {
66    /// The eligible tag this finding is for (e.g. `"nightly-evals"`).
67    pub tag: String,
68    /// Current spend (USD) attributable to this eligible segment — summed
69    /// across every `(provider, model)` under the tag that has a catalog batch
70    /// rate (segments with no batch tier are excluded, since they can't be
71    /// projected and aren't batch-eligible at the provider).
72    pub eligible_spend_usd: f64,
73    /// Projected cost (USD) of the same traffic priced at the catalog's Batch
74    /// API rates.
75    pub projected_batch_cost_usd: f64,
76    /// `eligible_spend_usd − projected_batch_cost_usd`, floored at 0. The
77    /// advisory headline ("≈ $X/mo saved at the Batch rate").
78    pub projected_savings_usd: f64,
79    /// `eligible_spend_usd / total_spend_usd * 100` — what fraction of *all*
80    /// spend (across every segment considered) this eligible tag represents.
81    /// Drives the "tag=X is N% of spend and batch-eligible" phrasing.
82    pub share_of_spend_pct: f64,
83    /// Number of requests folded into this finding.
84    pub request_count: u64,
85}
86
87impl BatchFinding {
88    /// The effective batch discount this finding realizes, as a percentage of
89    /// the eligible spend (`savings / eligible_spend * 100`). ~50 for providers
90    /// at the documented Batch-API rate; lower if the segment mixes models with
91    /// and without a batch tier. `0.0` when there is no eligible spend.
92    #[must_use]
93    pub fn discount_pct(&self) -> f64 {
94        if self.eligible_spend_usd <= 0.0 {
95            0.0
96        } else {
97            self.projected_savings_usd / self.eligible_spend_usd * 100.0
98        }
99    }
100
101    /// A one-line, tool-grounded advisory sentence, e.g.
102    /// `"tag=nightly-evals is 31.0% of spend and batch-eligible → ~$12.40/mo
103    /// saved at the Batch API rate (−50%)"`.
104    #[must_use]
105    pub fn summary(&self) -> String {
106        format!(
107            "tag={} is {:.1}% of spend and batch-eligible → ~${:.2} saved at the Batch API rate (−{:.0}%) on {} request(s)",
108            self.tag,
109            self.share_of_spend_pct,
110            self.projected_savings_usd,
111            self.discount_pct(),
112            self.request_count,
113        )
114    }
115}
116
117/// Project Batch-API savings over request-log aggregates using the default
118/// non-interactive tag set ([`DEFAULT_BATCH_ELIGIBLE_TAGS`]).
119///
120/// See [`project_batch_savings_with_tags`] for the full contract.
121#[must_use]
122pub fn project_batch_savings(
123    aggregates: &[RequestAggregate],
124    catalog: &PricingCatalog,
125) -> Vec<BatchFinding> {
126    project_batch_savings_with_tags(aggregates, catalog, DEFAULT_BATCH_ELIGIBLE_TAGS)
127}
128
129/// Project Batch-API savings over request-log aggregates, treating any segment
130/// whose tag (case-insensitive) is in `eligible_tags` as batch-eligible.
131///
132/// For each eligible segment, the catalog's per-model batch rates price the
133/// segment's input + output tokens; the projected savings is
134/// `current_spend − projected_batch_cost`. Segments are folded into one
135/// [`BatchFinding`] per tag (a tag may span several models). Findings are
136/// returned **descending by projected savings** so the biggest lever is first;
137/// ties break by tag name for determinism.
138///
139/// A segment is **excluded** (contributes nothing, not flagged) when:
140/// - its tag is `None` or not in `eligible_tags` (interactive / unknown
141///   traffic is never batch-eligible), or
142/// - its `(provider, model)` has no batch rate in the catalog (the provider has
143///   no batch tier for that model, so there is nothing real to project — we do
144///   NOT fabricate a 50% discount where catalog data is absent).
145///
146/// `share_of_spend_pct` is measured against the **total** spend of every
147/// aggregate passed in (eligible or not), so it answers "what fraction of all
148/// spend is this batch-eligible tag".
149///
150/// Tags that are eligible but whose entire spend is unpriceable produce no
151/// finding (zero eligible spend → nothing to advise).
152#[must_use]
153pub fn project_batch_savings_with_tags(
154    aggregates: &[RequestAggregate],
155    catalog: &PricingCatalog,
156    eligible_tags: &[&str],
157) -> Vec<BatchFinding> {
158    let total_spend: f64 = aggregates.iter().map(|a| a.cost_usd).sum();
159
160    // Accumulate per-tag totals across all of the tag's batch-priceable models.
161    // Keyed by the original tag string (case preserved for the summary).
162    let mut by_tag: std::collections::BTreeMap<String, TagAccumulator> =
163        std::collections::BTreeMap::new();
164
165    for agg in aggregates {
166        let Some(tag) = agg.tag.as_deref() else {
167            continue; // untagged → interactive, never batch-eligible
168        };
169        if !eligible_tags.iter().any(|t| t.eq_ignore_ascii_case(tag)) {
170            continue; // tag not in the non-interactive set
171        }
172        // Resolve the catalog's batch rate for this model; skip if absent.
173        let Some(pricing) = catalog.latest(&agg.provider, &agg.model) else {
174            continue;
175        };
176        let (Some(batch_in), Some(batch_out)) = (
177            pricing.batch_input_per_million,
178            pricing.batch_output_per_million,
179        ) else {
180            continue; // no batch tier for this model → nothing to project
181        };
182
183        let projected = (agg.input_tokens as f64) * batch_in / 1_000_000.0
184            + (agg.output_tokens as f64) * batch_out / 1_000_000.0;
185
186        let entry = by_tag.entry(tag.to_string()).or_default();
187        entry.eligible_spend += agg.cost_usd;
188        entry.projected_batch += projected;
189        entry.request_count += agg.request_count;
190    }
191
192    let mut findings: Vec<BatchFinding> = by_tag
193        .into_iter()
194        .filter(|(_, acc)| acc.eligible_spend > 0.0)
195        .map(|(tag, acc)| {
196            let savings = (acc.eligible_spend - acc.projected_batch).max(0.0);
197            let share = if total_spend > 0.0 {
198                acc.eligible_spend / total_spend * 100.0
199            } else {
200                0.0
201            };
202            BatchFinding {
203                tag,
204                eligible_spend_usd: acc.eligible_spend,
205                projected_batch_cost_usd: acc.projected_batch,
206                projected_savings_usd: savings,
207                share_of_spend_pct: share,
208                request_count: acc.request_count,
209            }
210        })
211        .collect();
212
213    // Biggest lever first; deterministic tie-break by tag.
214    findings.sort_by(|a, b| {
215        b.projected_savings_usd
216            .partial_cmp(&a.projected_savings_usd)
217            .unwrap_or(std::cmp::Ordering::Equal)
218            .then_with(|| a.tag.cmp(&b.tag))
219    });
220    findings
221}
222
223/// Running per-tag totals while folding aggregates.
224#[derive(Default)]
225struct TagAccumulator {
226    eligible_spend: f64,
227    projected_batch: f64,
228    request_count: u64,
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234    use crate::pricing::catalog;
235
236    fn agg(
237        provider: &str,
238        model: &str,
239        tag: Option<&str>,
240        input: u64,
241        output: u64,
242        cost: f64,
243        count: u64,
244    ) -> RequestAggregate {
245        RequestAggregate {
246            provider: provider.into(),
247            model: model.into(),
248            tag: tag.map(str::to_string),
249            input_tokens: input,
250            output_tokens: output,
251            cost_usd: cost,
252            request_count: count,
253        }
254    }
255
256    /// Core TDD behavior: a batch-eligible tagged segment produces a finding
257    /// with the correct projected −50% (catalog-rate) savings + the eligible
258    /// spend; non-eligible traffic in the same set is NOT flagged.
259    #[test]
260    fn flags_eligible_segment_with_catalog_rate_savings() {
261        let c = catalog();
262        // gpt-5.5: standard $5/$30 per 1M, batch $2.50/$15 per 1M (50% off).
263        // 1M input + 1M output @ standard = $5 + $30 = $35 actual spend.
264        // Plus an interactive (untagged) gpt-5.5 segment that must be ignored.
265        let aggs = vec![
266            agg(
267                "openai",
268                "gpt-5.5",
269                Some("nightly"),
270                1_000_000,
271                1_000_000,
272                35.0,
273                10,
274            ),
275            agg("openai", "gpt-5.5", None, 1_000_000, 1_000_000, 35.0, 10),
276        ];
277        let findings = project_batch_savings(&aggs, c);
278        assert_eq!(findings.len(), 1, "only the tagged segment is flagged");
279        let f = &findings[0];
280        assert_eq!(f.tag, "nightly");
281        assert!(
282            (f.eligible_spend_usd - 35.0).abs() < 1e-9,
283            "eligible spend = actual spend of the tagged segment"
284        );
285        // batch cost: 1M*$2.50 + 1M*$15 = $17.50 → savings = $35 - $17.50.
286        assert!(
287            (f.projected_batch_cost_usd - 17.50).abs() < 1e-9,
288            "batch cost from catalog rates, got {}",
289            f.projected_batch_cost_usd
290        );
291        assert!(
292            (f.projected_savings_usd - 17.50).abs() < 1e-9,
293            "−50% of $35 = $17.50, got {}",
294            f.projected_savings_usd
295        );
296        // share of spend: $35 eligible / $70 total = 50%.
297        assert!((f.share_of_spend_pct - 50.0).abs() < 1e-9);
298        // discount is the real catalog 50%, not a hardcoded constant.
299        assert!((f.discount_pct() - 50.0).abs() < 1e-9);
300        assert_eq!(f.request_count, 10);
301    }
302
303    /// Non-eligible tags (an interactive tag like "chat") are never flagged,
304    /// even when traffic exists for them.
305    #[test]
306    fn ignores_non_eligible_tags() {
307        let c = catalog();
308        let aggs = vec![
309            agg(
310                "openai",
311                "gpt-5.5",
312                Some("chat"),
313                1_000_000,
314                1_000_000,
315                35.0,
316                5,
317            ),
318            agg("openai", "gpt-5.5", Some("interactive"), 500_000, 0, 2.5, 3),
319        ];
320        let findings = project_batch_savings(&aggs, c);
321        assert!(
322            findings.is_empty(),
323            "no eligible tags present → no findings: {findings:?}"
324        );
325    }
326
327    /// A model with NO catalog batch tier contributes no projected savings —
328    /// we never fabricate a 50% discount where the catalog has no data.
329    #[test]
330    fn model_without_batch_tier_is_not_projected() {
331        let c = catalog();
332        // groq llama has no batch_{input,output}_per_million in the catalog.
333        let aggs = vec![agg(
334            "groq",
335            "llama-3.1-8b-instant",
336            Some("background"),
337            1_000_000,
338            1_000_000,
339            1.0,
340            4,
341        )];
342        let findings = project_batch_savings(&aggs, c);
343        assert!(
344            findings.is_empty(),
345            "no batch tier → nothing to project, got {findings:?}"
346        );
347    }
348
349    /// One eligible tag spanning several models folds into a single finding,
350    /// summing eligible spend and per-model batch projections.
351    #[test]
352    fn folds_multiple_models_under_one_tag() {
353        let c = catalog();
354        // gpt-5.5 batch $2.50/$15 and gpt-5.4 batch $1.25/$7.50.
355        let aggs = vec![
356            agg("openai", "gpt-5.5", Some("bulk"), 1_000_000, 0, 5.0, 2),
357            agg("openai", "gpt-5.4", Some("bulk"), 1_000_000, 0, 2.5, 3),
358        ];
359        let findings = project_batch_savings(&aggs, c);
360        assert_eq!(findings.len(), 1);
361        let f = &findings[0];
362        assert_eq!(f.tag, "bulk");
363        assert!((f.eligible_spend_usd - 7.5).abs() < 1e-9, "5.0 + 2.5");
364        // batch input: 1M*$2.50 + 1M*$1.25 = $3.75 → savings $7.5 - $3.75.
365        assert!((f.projected_batch_cost_usd - 3.75).abs() < 1e-9);
366        assert!((f.projected_savings_usd - 3.75).abs() < 1e-9);
367        assert_eq!(f.request_count, 5);
368    }
369
370    /// Findings sort by projected savings (biggest lever first), tie-break tag.
371    #[test]
372    fn findings_sorted_by_savings_desc() {
373        let c = catalog();
374        let aggs = vec![
375            // small eligible segment
376            agg("openai", "gpt-5.4", Some("offline"), 1_000_000, 0, 2.5, 1),
377            // large eligible segment
378            agg("openai", "gpt-5.5", Some("nightly"), 10_000_000, 0, 50.0, 1),
379        ];
380        let findings = project_batch_savings(&aggs, c);
381        assert_eq!(findings.len(), 2);
382        assert_eq!(findings[0].tag, "nightly", "bigger savings first");
383        assert!(findings[0].projected_savings_usd > findings[1].projected_savings_usd);
384    }
385
386    /// Tag matching is case-insensitive ("Background" matches "background").
387    #[test]
388    fn tag_match_is_case_insensitive() {
389        let c = catalog();
390        let aggs = vec![agg(
391            "openai",
392            "gpt-5.5",
393            Some("Background"),
394            1_000_000,
395            0,
396            5.0,
397            1,
398        )];
399        let findings = project_batch_savings(&aggs, c);
400        assert_eq!(findings.len(), 1);
401        assert_eq!(findings[0].tag, "Background", "original case preserved");
402    }
403
404    /// A custom (configurable) eligible-tag set is honored; the default
405    /// vocabulary is ignored when an explicit set is given.
406    #[test]
407    fn honors_configurable_tag_set() {
408        let c = catalog();
409        let aggs = vec![
410            agg(
411                "openai",
412                "gpt-5.5",
413                Some("nightly-evals"),
414                1_000_000,
415                0,
416                5.0,
417                1,
418            ),
419            // "background" is in the DEFAULT set but NOT in our custom set:
420            agg(
421                "openai",
422                "gpt-5.5",
423                Some("background"),
424                1_000_000,
425                0,
426                5.0,
427                1,
428            ),
429        ];
430        let findings = project_batch_savings_with_tags(&aggs, c, &["nightly-evals"]);
431        assert_eq!(findings.len(), 1, "only the custom tag matches");
432        assert_eq!(findings[0].tag, "nightly-evals");
433    }
434
435    /// The summary sentence is tool-grounded: carries the tag, the share of
436    /// spend, the dollar savings, and the realized discount.
437    #[test]
438    fn summary_is_grounded_and_human_readable() {
439        let f = BatchFinding {
440            tag: "nightly-evals".into(),
441            eligible_spend_usd: 40.0,
442            projected_batch_cost_usd: 20.0,
443            projected_savings_usd: 20.0,
444            share_of_spend_pct: 31.0,
445            request_count: 128,
446        };
447        let s = f.summary();
448        assert!(s.contains("tag=nightly-evals"), "{s}");
449        assert!(s.contains("31.0% of spend"), "{s}");
450        assert!(s.contains("$20.00"), "{s}");
451        assert!(s.contains("−50%"), "{s}");
452        assert!(s.contains("128 request"), "{s}");
453    }
454
455    /// Empty input → no findings, no panic (e.g. division by zero on share).
456    #[test]
457    fn empty_aggregates_produce_no_findings() {
458        let c = catalog();
459        assert!(project_batch_savings(&[], c).is_empty());
460    }
461
462    /// Anthropic batch rates also flow through (50% per the catalog).
463    #[test]
464    fn anthropic_eligible_segment_uses_catalog_batch_rate() {
465        let c = catalog();
466        // claude-opus-4-8: standard $5/$25, batch $2.50/$12.50.
467        // 1M in + 1M out standard = $30 actual.
468        let aggs = vec![agg(
469            "anthropic",
470            "claude-opus-4-8",
471            Some("offline"),
472            1_000_000,
473            1_000_000,
474            30.0,
475            7,
476        )];
477        let findings = project_batch_savings(&aggs, c);
478        assert_eq!(findings.len(), 1);
479        let f = &findings[0];
480        // batch: 1M*$2.50 + 1M*$12.50 = $15 → savings $30 - $15 = $15.
481        assert!((f.projected_batch_cost_usd - 15.0).abs() < 1e-9);
482        assert!((f.projected_savings_usd - 15.0).abs() < 1e-9);
483    }
484}