tt-shared 0.1.2

Shared types, errors, and Provider trait for TokenTrimmer.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
//! Batch-eligibility advisor (Batch/Flex phase 1 — ADVISORY only).
//!
//! The OpenAI / Anthropic / Gemini Batch APIs price asynchronous (≤24h) traffic
//! at ~50% of standard — the single biggest no-quality-loss cost lever. Building
//! the durable batch-submission queue is deferred (P3); phase 1 is purely
//! advisory: detect request-log traffic that is **batch-eligible** (tagged
//! background / offline / nightly / bulk, i.e. latency-insensitive) and PROJECT
//! the savings of moving it to the Batch API.
//!
//! This module is the pure, tool-groundable core: given request-log aggregates
//! (which the advisor already reasons over) and the embedded pricing catalog, it
//! produces a [`BatchFinding`] per eligible tag segment with the eligible spend
//! and the projected Batch-API cost/savings. The savings are computed from the
//! **real per-model batch rates in the catalog** (`pricing.toml` carries
//! `batch_{input,output}_per_million`), NOT a hardcoded 50% — a model with no
//! catalog batch tier contributes no projected savings (conservative).
//!
//! Nothing here submits anything to a batch API; it only surfaces the projection.

use serde::{Deserialize, Serialize};

use crate::pricing::PricingCatalog;

/// Default set of tags treated as **non-interactive** (batch-eligible) traffic.
///
/// These mark latency-insensitive bulk / offline work that can tolerate the
/// Batch API's async (≤24h) turnaround. The set is overridable per call (see
/// [`project_batch_savings_with_tags`]) so a deployment can configure its own
/// "background" tag vocabulary; `tag=background` is the canonical example used
/// across the codebase (routing, request-log attribution).
pub const DEFAULT_BATCH_ELIGIBLE_TAGS: &[&str] =
    &["background", "offline", "nightly", "batch", "bulk", "async"];

/// One tag-grouped request-log aggregate — the condensed view the advisor /
/// inspect path computes over `request_logs` (e.g. `SELECT provider, model, tag,
/// SUM(input_tokens), SUM(output_tokens), SUM(cost_usd), COUNT(*) ... GROUP BY
/// provider, model, tag`). One row per `(provider, model, tag)` segment.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RequestAggregate {
    /// Registry provider id the requests were served by (e.g. `"openai"`).
    pub provider: String,
    /// Provider-side model id (e.g. `"gpt-5.5"`).
    pub model: String,
    /// The request tag for this segment. `None` = untagged traffic (never
    /// batch-eligible — an untagged request is assumed interactive).
    pub tag: Option<String>,
    /// Summed input tokens across the segment.
    pub input_tokens: u64,
    /// Summed output tokens across the segment.
    pub output_tokens: u64,
    /// Summed cost (USD) the org actually paid for the segment — the
    /// denominator for "% of spend".
    pub cost_usd: f64,
    /// Number of requests in the segment (for the human-readable summary).
    pub request_count: u64,
}

/// A projected-savings finding for one batch-eligible tag segment.
///
/// Produced only for segments whose tag is in the configured eligible set AND
/// whose `(provider, model)` carries a batch rate in the catalog. The projection
/// is `eligible_spend − projected_batch_cost`, both computed from the catalog's
/// real per-model batch rates.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BatchFinding {
    /// The eligible tag this finding is for (e.g. `"nightly-evals"`).
    pub tag: String,
    /// Current spend (USD) attributable to this eligible segment — summed
    /// across every `(provider, model)` under the tag that has a catalog batch
    /// rate (segments with no batch tier are excluded, since they can't be
    /// projected and aren't batch-eligible at the provider).
    pub eligible_spend_usd: f64,
    /// Projected cost (USD) of the same traffic priced at the catalog's Batch
    /// API rates.
    pub projected_batch_cost_usd: f64,
    /// `eligible_spend_usd − projected_batch_cost_usd`, floored at 0. The
    /// advisory headline ("≈ $X/mo saved at the Batch rate").
    pub projected_savings_usd: f64,
    /// `eligible_spend_usd / total_spend_usd * 100` — what fraction of *all*
    /// spend (across every segment considered) this eligible tag represents.
    /// Drives the "tag=X is N% of spend and batch-eligible" phrasing.
    pub share_of_spend_pct: f64,
    /// Number of requests folded into this finding.
    pub request_count: u64,
}

impl BatchFinding {
    /// The effective batch discount this finding realizes, as a percentage of
    /// the eligible spend (`savings / eligible_spend * 100`). ~50 for providers
    /// at the documented Batch-API rate; lower if the segment mixes models with
    /// and without a batch tier. `0.0` when there is no eligible spend.
    #[must_use]
    pub fn discount_pct(&self) -> f64 {
        if self.eligible_spend_usd <= 0.0 {
            0.0
        } else {
            self.projected_savings_usd / self.eligible_spend_usd * 100.0
        }
    }

    /// A one-line, tool-grounded advisory sentence, e.g.
    /// `"tag=nightly-evals is 31.0% of spend and batch-eligible → ~$12.40/mo
    /// saved at the Batch API rate (−50%)"`.
    #[must_use]
    pub fn summary(&self) -> String {
        format!(
            "tag={} is {:.1}% of spend and batch-eligible → ~${:.2} saved at the Batch API rate (−{:.0}%) on {} request(s)",
            self.tag,
            self.share_of_spend_pct,
            self.projected_savings_usd,
            self.discount_pct(),
            self.request_count,
        )
    }
}

/// Project Batch-API savings over request-log aggregates using the default
/// non-interactive tag set ([`DEFAULT_BATCH_ELIGIBLE_TAGS`]).
///
/// See [`project_batch_savings_with_tags`] for the full contract.
#[must_use]
pub fn project_batch_savings(
    aggregates: &[RequestAggregate],
    catalog: &PricingCatalog,
) -> Vec<BatchFinding> {
    project_batch_savings_with_tags(aggregates, catalog, DEFAULT_BATCH_ELIGIBLE_TAGS)
}

/// Project Batch-API savings over request-log aggregates, treating any segment
/// whose tag (case-insensitive) is in `eligible_tags` as batch-eligible.
///
/// For each eligible segment, the catalog's per-model batch rates price the
/// segment's input + output tokens; the projected savings is
/// `current_spend − projected_batch_cost`. Segments are folded into one
/// [`BatchFinding`] per tag (a tag may span several models). Findings are
/// returned **descending by projected savings** so the biggest lever is first;
/// ties break by tag name for determinism.
///
/// A segment is **excluded** (contributes nothing, not flagged) when:
/// - its tag is `None` or not in `eligible_tags` (interactive / unknown
///   traffic is never batch-eligible), or
/// - its `(provider, model)` has no batch rate in the catalog (the provider has
///   no batch tier for that model, so there is nothing real to project — we do
///   NOT fabricate a 50% discount where catalog data is absent).
///
/// `share_of_spend_pct` is measured against the **total** spend of every
/// aggregate passed in (eligible or not), so it answers "what fraction of all
/// spend is this batch-eligible tag".
///
/// Tags that are eligible but whose entire spend is unpriceable produce no
/// finding (zero eligible spend → nothing to advise).
#[must_use]
pub fn project_batch_savings_with_tags(
    aggregates: &[RequestAggregate],
    catalog: &PricingCatalog,
    eligible_tags: &[&str],
) -> Vec<BatchFinding> {
    let total_spend: f64 = aggregates.iter().map(|a| a.cost_usd).sum();

    // Accumulate per-tag totals across all of the tag's batch-priceable models.
    // Keyed by the original tag string (case preserved for the summary).
    let mut by_tag: std::collections::BTreeMap<String, TagAccumulator> =
        std::collections::BTreeMap::new();

    for agg in aggregates {
        let Some(tag) = agg.tag.as_deref() else {
            continue; // untagged → interactive, never batch-eligible
        };
        if !eligible_tags.iter().any(|t| t.eq_ignore_ascii_case(tag)) {
            continue; // tag not in the non-interactive set
        }
        // Resolve the catalog's batch rate for this model; skip if absent.
        let Some(pricing) = catalog.latest(&agg.provider, &agg.model) else {
            continue;
        };
        let (Some(batch_in), Some(batch_out)) = (
            pricing.batch_input_per_million,
            pricing.batch_output_per_million,
        ) else {
            continue; // no batch tier for this model → nothing to project
        };

        let projected = (agg.input_tokens as f64) * batch_in / 1_000_000.0
            + (agg.output_tokens as f64) * batch_out / 1_000_000.0;

        let entry = by_tag.entry(tag.to_string()).or_default();
        entry.eligible_spend += agg.cost_usd;
        entry.projected_batch += projected;
        entry.request_count += agg.request_count;
    }

    let mut findings: Vec<BatchFinding> = by_tag
        .into_iter()
        .filter(|(_, acc)| acc.eligible_spend > 0.0)
        .map(|(tag, acc)| {
            let savings = (acc.eligible_spend - acc.projected_batch).max(0.0);
            let share = if total_spend > 0.0 {
                acc.eligible_spend / total_spend * 100.0
            } else {
                0.0
            };
            BatchFinding {
                tag,
                eligible_spend_usd: acc.eligible_spend,
                projected_batch_cost_usd: acc.projected_batch,
                projected_savings_usd: savings,
                share_of_spend_pct: share,
                request_count: acc.request_count,
            }
        })
        .collect();

    // Biggest lever first; deterministic tie-break by tag.
    findings.sort_by(|a, b| {
        b.projected_savings_usd
            .partial_cmp(&a.projected_savings_usd)
            .unwrap_or(std::cmp::Ordering::Equal)
            .then_with(|| a.tag.cmp(&b.tag))
    });
    findings
}

/// Running per-tag totals while folding aggregates.
#[derive(Default)]
struct TagAccumulator {
    eligible_spend: f64,
    projected_batch: f64,
    request_count: u64,
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::pricing::catalog;

    fn agg(
        provider: &str,
        model: &str,
        tag: Option<&str>,
        input: u64,
        output: u64,
        cost: f64,
        count: u64,
    ) -> RequestAggregate {
        RequestAggregate {
            provider: provider.into(),
            model: model.into(),
            tag: tag.map(str::to_string),
            input_tokens: input,
            output_tokens: output,
            cost_usd: cost,
            request_count: count,
        }
    }

    /// Core TDD behavior: a batch-eligible tagged segment produces a finding
    /// with the correct projected −50% (catalog-rate) savings + the eligible
    /// spend; non-eligible traffic in the same set is NOT flagged.
    #[test]
    fn flags_eligible_segment_with_catalog_rate_savings() {
        let c = catalog();
        // gpt-5.5: standard $5/$30 per 1M, batch $2.50/$15 per 1M (50% off).
        // 1M input + 1M output @ standard = $5 + $30 = $35 actual spend.
        // Plus an interactive (untagged) gpt-5.5 segment that must be ignored.
        let aggs = vec![
            agg(
                "openai",
                "gpt-5.5",
                Some("nightly"),
                1_000_000,
                1_000_000,
                35.0,
                10,
            ),
            agg("openai", "gpt-5.5", None, 1_000_000, 1_000_000, 35.0, 10),
        ];
        let findings = project_batch_savings(&aggs, c);
        assert_eq!(findings.len(), 1, "only the tagged segment is flagged");
        let f = &findings[0];
        assert_eq!(f.tag, "nightly");
        assert!(
            (f.eligible_spend_usd - 35.0).abs() < 1e-9,
            "eligible spend = actual spend of the tagged segment"
        );
        // batch cost: 1M*$2.50 + 1M*$15 = $17.50 → savings = $35 - $17.50.
        assert!(
            (f.projected_batch_cost_usd - 17.50).abs() < 1e-9,
            "batch cost from catalog rates, got {}",
            f.projected_batch_cost_usd
        );
        assert!(
            (f.projected_savings_usd - 17.50).abs() < 1e-9,
            "−50% of $35 = $17.50, got {}",
            f.projected_savings_usd
        );
        // share of spend: $35 eligible / $70 total = 50%.
        assert!((f.share_of_spend_pct - 50.0).abs() < 1e-9);
        // discount is the real catalog 50%, not a hardcoded constant.
        assert!((f.discount_pct() - 50.0).abs() < 1e-9);
        assert_eq!(f.request_count, 10);
    }

    /// Non-eligible tags (an interactive tag like "chat") are never flagged,
    /// even when traffic exists for them.
    #[test]
    fn ignores_non_eligible_tags() {
        let c = catalog();
        let aggs = vec![
            agg(
                "openai",
                "gpt-5.5",
                Some("chat"),
                1_000_000,
                1_000_000,
                35.0,
                5,
            ),
            agg("openai", "gpt-5.5", Some("interactive"), 500_000, 0, 2.5, 3),
        ];
        let findings = project_batch_savings(&aggs, c);
        assert!(
            findings.is_empty(),
            "no eligible tags present → no findings: {findings:?}"
        );
    }

    /// A model with NO catalog batch tier contributes no projected savings —
    /// we never fabricate a 50% discount where the catalog has no data.
    #[test]
    fn model_without_batch_tier_is_not_projected() {
        let c = catalog();
        // groq llama has no batch_{input,output}_per_million in the catalog.
        let aggs = vec![agg(
            "groq",
            "llama-3.1-8b-instant",
            Some("background"),
            1_000_000,
            1_000_000,
            1.0,
            4,
        )];
        let findings = project_batch_savings(&aggs, c);
        assert!(
            findings.is_empty(),
            "no batch tier → nothing to project, got {findings:?}"
        );
    }

    /// One eligible tag spanning several models folds into a single finding,
    /// summing eligible spend and per-model batch projections.
    #[test]
    fn folds_multiple_models_under_one_tag() {
        let c = catalog();
        // gpt-5.5 batch $2.50/$15 and gpt-5.4 batch $1.25/$7.50.
        let aggs = vec![
            agg("openai", "gpt-5.5", Some("bulk"), 1_000_000, 0, 5.0, 2),
            agg("openai", "gpt-5.4", Some("bulk"), 1_000_000, 0, 2.5, 3),
        ];
        let findings = project_batch_savings(&aggs, c);
        assert_eq!(findings.len(), 1);
        let f = &findings[0];
        assert_eq!(f.tag, "bulk");
        assert!((f.eligible_spend_usd - 7.5).abs() < 1e-9, "5.0 + 2.5");
        // batch input: 1M*$2.50 + 1M*$1.25 = $3.75 → savings $7.5 - $3.75.
        assert!((f.projected_batch_cost_usd - 3.75).abs() < 1e-9);
        assert!((f.projected_savings_usd - 3.75).abs() < 1e-9);
        assert_eq!(f.request_count, 5);
    }

    /// Findings sort by projected savings (biggest lever first), tie-break tag.
    #[test]
    fn findings_sorted_by_savings_desc() {
        let c = catalog();
        let aggs = vec![
            // small eligible segment
            agg("openai", "gpt-5.4", Some("offline"), 1_000_000, 0, 2.5, 1),
            // large eligible segment
            agg("openai", "gpt-5.5", Some("nightly"), 10_000_000, 0, 50.0, 1),
        ];
        let findings = project_batch_savings(&aggs, c);
        assert_eq!(findings.len(), 2);
        assert_eq!(findings[0].tag, "nightly", "bigger savings first");
        assert!(findings[0].projected_savings_usd > findings[1].projected_savings_usd);
    }

    /// Tag matching is case-insensitive ("Background" matches "background").
    #[test]
    fn tag_match_is_case_insensitive() {
        let c = catalog();
        let aggs = vec![agg(
            "openai",
            "gpt-5.5",
            Some("Background"),
            1_000_000,
            0,
            5.0,
            1,
        )];
        let findings = project_batch_savings(&aggs, c);
        assert_eq!(findings.len(), 1);
        assert_eq!(findings[0].tag, "Background", "original case preserved");
    }

    /// A custom (configurable) eligible-tag set is honored; the default
    /// vocabulary is ignored when an explicit set is given.
    #[test]
    fn honors_configurable_tag_set() {
        let c = catalog();
        let aggs = vec![
            agg(
                "openai",
                "gpt-5.5",
                Some("nightly-evals"),
                1_000_000,
                0,
                5.0,
                1,
            ),
            // "background" is in the DEFAULT set but NOT in our custom set:
            agg(
                "openai",
                "gpt-5.5",
                Some("background"),
                1_000_000,
                0,
                5.0,
                1,
            ),
        ];
        let findings = project_batch_savings_with_tags(&aggs, c, &["nightly-evals"]);
        assert_eq!(findings.len(), 1, "only the custom tag matches");
        assert_eq!(findings[0].tag, "nightly-evals");
    }

    /// The summary sentence is tool-grounded: carries the tag, the share of
    /// spend, the dollar savings, and the realized discount.
    #[test]
    fn summary_is_grounded_and_human_readable() {
        let f = BatchFinding {
            tag: "nightly-evals".into(),
            eligible_spend_usd: 40.0,
            projected_batch_cost_usd: 20.0,
            projected_savings_usd: 20.0,
            share_of_spend_pct: 31.0,
            request_count: 128,
        };
        let s = f.summary();
        assert!(s.contains("tag=nightly-evals"), "{s}");
        assert!(s.contains("31.0% of spend"), "{s}");
        assert!(s.contains("$20.00"), "{s}");
        assert!(s.contains("−50%"), "{s}");
        assert!(s.contains("128 request"), "{s}");
    }

    /// Empty input → no findings, no panic (e.g. division by zero on share).
    #[test]
    fn empty_aggregates_produce_no_findings() {
        let c = catalog();
        assert!(project_batch_savings(&[], c).is_empty());
    }

    /// Anthropic batch rates also flow through (50% per the catalog).
    #[test]
    fn anthropic_eligible_segment_uses_catalog_batch_rate() {
        let c = catalog();
        // claude-opus-4-8: standard $5/$25, batch $2.50/$12.50.
        // 1M in + 1M out standard = $30 actual.
        let aggs = vec![agg(
            "anthropic",
            "claude-opus-4-8",
            Some("offline"),
            1_000_000,
            1_000_000,
            30.0,
            7,
        )];
        let findings = project_batch_savings(&aggs, c);
        assert_eq!(findings.len(), 1);
        let f = &findings[0];
        // batch: 1M*$2.50 + 1M*$12.50 = $15 → savings $30 - $15 = $15.
        assert!((f.projected_batch_cost_usd - 15.0).abs() < 1e-9);
        assert!((f.projected_savings_usd - 15.0).abs() < 1e-9);
    }
}