Skip to main content

shift_preflight/
cost.rs

1//! Token cost estimation for AI vision providers.
2//!
3//! Both OpenAI and Anthropic charge tokens for image inputs based on
4//! image dimensions. This module implements the public token-counting
5//! formulas so SHIFT can report estimated savings.
6
7use serde::{Deserialize, Serialize};
8
9/// Estimated token counts for a single image across providers.
10#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11pub struct TokenEstimate {
12    pub openai_tokens: u64,
13    pub anthropic_tokens: u64,
14}
15
16/// Per-image before/after metrics for the report.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct ImageMetrics {
19    /// Index in the original payload
20    pub image_index: usize,
21    /// Original dimensions
22    pub original_width: u32,
23    pub original_height: u32,
24    /// Transformed dimensions (same as original if unchanged)
25    pub transformed_width: u32,
26    pub transformed_height: u32,
27    /// Original byte size of the raw image
28    pub original_bytes: usize,
29    /// Transformed byte size
30    pub transformed_bytes: usize,
31    /// Format before transformation (e.g. "png", "jpeg", "svg")
32    pub format_before: String,
33    /// Format after transformation
34    pub format_after: String,
35    /// Estimated tokens before transformation
36    pub tokens_before: TokenEstimate,
37    /// Estimated tokens after transformation
38    pub tokens_after: TokenEstimate,
39}
40
41/// Aggregate token savings across all images.
42#[derive(Debug, Clone, Default, Serialize, Deserialize)]
43pub struct TokenSavings {
44    pub openai_before: u64,
45    pub openai_after: u64,
46    pub anthropic_before: u64,
47    pub anthropic_after: u64,
48}
49
50impl TokenSavings {
51    pub fn openai_saved(&self) -> u64 {
52        self.openai_before.saturating_sub(self.openai_after)
53    }
54
55    pub fn anthropic_saved(&self) -> u64 {
56        self.anthropic_before.saturating_sub(self.anthropic_after)
57    }
58
59    pub fn openai_pct(&self) -> f64 {
60        if self.openai_before == 0 {
61            return 0.0;
62        }
63        (self.openai_saved() as f64 / self.openai_before as f64) * 100.0
64    }
65
66    pub fn anthropic_pct(&self) -> f64 {
67        if self.anthropic_before == 0 {
68            return 0.0;
69        }
70        (self.anthropic_saved() as f64 / self.anthropic_before as f64) * 100.0
71    }
72
73    /// Aggregate from per-image metrics.
74    pub fn from_metrics(metrics: &[ImageMetrics]) -> Self {
75        let mut s = TokenSavings::default();
76        for m in metrics {
77            s.openai_before += m.tokens_before.openai_tokens;
78            s.openai_after += m.tokens_after.openai_tokens;
79            s.anthropic_before += m.tokens_before.anthropic_tokens;
80            s.anthropic_after += m.tokens_after.anthropic_tokens;
81        }
82        s
83    }
84}
85
86// ── OpenAI token estimation (tile-based, GPT-4o / GPT-4.1 family) ───
87
88/// OpenAI vision token count for `detail: high`.
89///
90/// Algorithm (from OpenAI docs, tile-based family):
91/// 1. Scale image so shortest side = 768px (only if larger)
92/// 2. Split into 512×512 tiles (ceiling)
93/// 3. Each tile = 170 tokens + 85 base tokens
94///
95/// For `detail: low`: fixed 85 tokens.
96///
97/// **Note:** This implements the tile-based formula for GPT-4o, GPT-4.1,
98/// GPT-4o-mini, and o-series models (except o4-mini). Newer models
99/// (GPT-4.1 2025-04-14+, o4-mini) use patch-based tokenization with
100/// different budgets. Pass-through accuracy for those models is not
101/// guaranteed.
102pub fn openai_tokens(width: u32, height: u32) -> u64 {
103    if width == 0 || height == 0 {
104        return 0;
105    }
106
107    // detail: high calculation
108    let (w, h) = openai_scale_to_fit(width, height);
109    let tiles_w = (w as f64 / 512.0).ceil() as u64;
110    let tiles_h = (h as f64 / 512.0).ceil() as u64;
111    let tiles = tiles_w * tiles_h;
112
113    170 * tiles + 85
114}
115
116/// Scale so the shortest side is at most 768px, preserving aspect ratio.
117/// Also cap the longest side at 2048px (OpenAI constraint).
118fn openai_scale_to_fit(width: u32, height: u32) -> (u32, u32) {
119    let mut w = width as f64;
120    let mut h = height as f64;
121
122    // Cap longest side at 2048
123    let max_dim = w.max(h);
124    if max_dim > 2048.0 {
125        let scale = 2048.0 / max_dim;
126        w *= scale;
127        h *= scale;
128    }
129
130    // Scale so shortest side is at most 768
131    let min_side = w.min(h);
132    if min_side > 768.0 {
133        let scale = 768.0 / min_side;
134        w *= scale;
135        h *= scale;
136    }
137
138    (w.ceil() as u32, h.ceil() as u32)
139}
140
141/// Fixed token count for OpenAI detail: low.
142pub fn openai_tokens_low() -> u64 {
143    85
144}
145
146// ── Anthropic token estimation ───────────────────────────────────────
147
148/// Anthropic vision token count for standard-resolution models.
149///
150/// Formula (from Anthropic docs):
151///   tokens ≈ (width × height) / 750
152///
153/// Images are first downscaled so the long edge ≤ 1568px (standard models)
154/// and then padded to a multiple of 28px.
155/// Max tokens per image: 1568 (standard) or 4784 (Opus 4.7).
156///
157/// **Note:** This implements the standard-resolution formula (1568px max
158/// long edge, 1568 token cap). Claude Opus 4.7 supports high-resolution
159/// images (2576px long edge, 4784 token cap). Estimates for Opus 4.7
160/// payloads with large images will be under-counted.
161pub fn anthropic_tokens(width: u32, height: u32) -> u64 {
162    if width == 0 || height == 0 {
163        return 0;
164    }
165
166    let (w, h) = anthropic_scale_to_fit(width, height);
167
168    // Pad to next multiple of 28
169    let pw = next_multiple_of_28(w);
170    let ph = next_multiple_of_28(h);
171
172    let tokens = (pw as u64 * ph as u64) / 750;
173    // Cap at 1568 tokens (standard models)
174    tokens.min(1568)
175}
176
177/// Scale so the long edge is at most 1568px, preserving aspect ratio.
178fn anthropic_scale_to_fit(width: u32, height: u32) -> (u32, u32) {
179    let max_edge = 1568.0_f64;
180    let w = width as f64;
181    let h = height as f64;
182    let long_edge = w.max(h);
183
184    if long_edge <= max_edge {
185        return (width, height);
186    }
187
188    let scale = max_edge / long_edge;
189    ((w * scale).ceil() as u32, (h * scale).ceil() as u32)
190}
191
192fn next_multiple_of_28(val: u32) -> u32 {
193    val.div_ceil(28) * 28
194}
195
196/// Estimate tokens for an image at given dimensions.
197pub fn estimate_tokens(width: u32, height: u32) -> TokenEstimate {
198    TokenEstimate {
199        openai_tokens: openai_tokens(width, height),
200        anthropic_tokens: anthropic_tokens(width, height),
201    }
202}
203
204// ── Tests ────────────────────────────────────────────────────────────
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209
210    // ── OpenAI ───────────────────────────────────────────────────
211
212    #[test]
213    fn test_openai_small_image() {
214        // 512x512: fits in one tile → 170 + 85 = 255
215        assert_eq!(openai_tokens(512, 512), 255);
216    }
217
218    #[test]
219    fn test_openai_768_image() {
220        // 768x768: shortest side = 768, so no scaling needed.
221        // Tiles: ceil(768/512) = 2 per side → 4 tiles → 4*170 + 85 = 765
222        assert_eq!(openai_tokens(768, 768), 765);
223    }
224
225    #[test]
226    fn test_openai_large_landscape() {
227        // 4000x3000:
228        // Step 1: cap longest at 2048 → scale = 2048/4000 = 0.512
229        //   → 2048 x 1536
230        // Step 2: shortest side 1536 > 768 → scale = 768/1536 = 0.5
231        //   → 1024 x 768
232        // Tiles: ceil(1024/512) * ceil(768/512) = 2 * 2 = 4
233        // Tokens: 4 * 170 + 85 = 765
234        assert_eq!(openai_tokens(4000, 3000), 765);
235    }
236
237    #[test]
238    fn test_openai_tall_portrait() {
239        // 1000x4000:
240        // Step 1: cap longest at 2048 → scale = 2048/4000 = 0.512
241        //   → 512 x 2048
242        // Step 2: shortest side 512 ≤ 768 → no change
243        // Tiles: ceil(512/512) * ceil(2048/512) = 1 * 4 = 4
244        // Tokens: 4 * 170 + 85 = 765
245        assert_eq!(openai_tokens(1000, 4000), 765);
246    }
247
248    #[test]
249    fn test_openai_zero() {
250        assert_eq!(openai_tokens(0, 0), 0);
251    }
252
253    #[test]
254    fn test_openai_low_detail() {
255        assert_eq!(openai_tokens_low(), 85);
256    }
257
258    #[test]
259    fn test_openai_very_small() {
260        // 100x100: fits in one tile
261        assert_eq!(openai_tokens(100, 100), 255);
262    }
263
264    // ── Anthropic ────────────────────────────────────────────────
265
266    #[test]
267    fn test_anthropic_small_image() {
268        // 200x200: no scaling, pad to 224x224
269        // tokens = 224*224/750 = 66.9 → 66
270        assert_eq!(anthropic_tokens(200, 200), 66);
271    }
272
273    #[test]
274    fn test_anthropic_1000x1000() {
275        // 1000x1000: long edge ≤ 1568, no scaling
276        // pad to 1008x1008 (1000 → next mult of 28 = 1008)
277        // tokens = 1008*1008/750 = 1354
278        assert_eq!(anthropic_tokens(1000, 1000), 1354);
279    }
280
281    #[test]
282    fn test_anthropic_large_downscaled() {
283        // 3000x2000: long edge 3000 > 1568
284        // scale = 1568/3000 = 0.5227 → 1568 x 1046 (ceil)
285        // pad: 1568 (already mult of 28), 1046 → 1064
286        // tokens = 1568*1064/750 = 2224, capped at 1568
287        assert_eq!(anthropic_tokens(3000, 2000), 1568);
288    }
289
290    #[test]
291    fn test_anthropic_zero() {
292        assert_eq!(anthropic_tokens(0, 0), 0);
293    }
294
295    #[test]
296    fn test_anthropic_exact_max() {
297        // 1568x1568: at limit
298        // pad: both already multiple of 28 (1568 = 56*28)
299        // tokens = 1568*1568/750 = 3277, capped at 1568
300        assert_eq!(anthropic_tokens(1568, 1568), 1568);
301    }
302
303    // ── TokenSavings ─────────────────────────────────────────────
304
305    #[test]
306    fn test_savings_calculation() {
307        let s = TokenSavings {
308            openai_before: 1000,
309            openai_after: 300,
310            anthropic_before: 2000,
311            anthropic_after: 500,
312        };
313        assert_eq!(s.openai_saved(), 700);
314        assert_eq!(s.anthropic_saved(), 1500);
315        assert!((s.openai_pct() - 70.0).abs() < 0.1);
316        assert!((s.anthropic_pct() - 75.0).abs() < 0.1);
317    }
318
319    #[test]
320    fn test_savings_zero_before() {
321        let s = TokenSavings::default();
322        assert_eq!(s.openai_pct(), 0.0);
323        assert_eq!(s.anthropic_pct(), 0.0);
324    }
325
326    // ── estimate_tokens ──────────────────────────────────────────
327
328    #[test]
329    fn test_estimate_tokens_both() {
330        let est = estimate_tokens(1000, 1000);
331        assert!(est.openai_tokens > 0);
332        assert!(est.anthropic_tokens > 0);
333    }
334
335    // ── Scaling helpers ──────────────────────────────────────────
336
337    #[test]
338    fn test_next_multiple_of_28() {
339        assert_eq!(next_multiple_of_28(28), 28);
340        assert_eq!(next_multiple_of_28(29), 56);
341        assert_eq!(next_multiple_of_28(1), 28);
342        assert_eq!(next_multiple_of_28(200), 224);
343        assert_eq!(next_multiple_of_28(1568), 1568);
344    }
345}