Skip to main content

shift_preflight/
cost.rs

1//! Token cost estimation for AI vision providers.
2//!
3//! Both OpenAI and Anthropic charge tokens for image inputs based on
4//! image dimensions. This module implements the public token-counting
5//! formulas so SHIFT can report estimated savings.
6
7use serde::{Deserialize, Serialize};
8
9/// Estimated token counts for a single image across providers.
10#[derive(Debug, Clone, Default, Serialize, Deserialize)]
11pub struct TokenEstimate {
12    pub openai_tokens: u64,
13    pub anthropic_tokens: u64,
14}
15
16/// Per-image before/after metrics for the report.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct ImageMetrics {
19    /// Index in the original payload
20    pub image_index: usize,
21    /// Original dimensions
22    pub original_width: u32,
23    pub original_height: u32,
24    /// Transformed dimensions (same as original if unchanged)
25    pub transformed_width: u32,
26    pub transformed_height: u32,
27    /// Original byte size of the raw image
28    pub original_bytes: usize,
29    /// Transformed byte size
30    pub transformed_bytes: usize,
31    /// Format before transformation (e.g. "png", "jpeg", "svg")
32    pub format_before: String,
33    /// Format after transformation
34    pub format_after: String,
35    /// Estimated tokens before transformation
36    pub tokens_before: TokenEstimate,
37    /// Estimated tokens after transformation
38    pub tokens_after: TokenEstimate,
39}
40
41/// Aggregate token savings across all images.
42#[derive(Debug, Clone, Default, Serialize, Deserialize)]
43pub struct TokenSavings {
44    pub openai_before: u64,
45    pub openai_after: u64,
46    pub anthropic_before: u64,
47    pub anthropic_after: u64,
48}
49
50impl TokenSavings {
51    pub fn openai_saved(&self) -> u64 {
52        self.openai_before.saturating_sub(self.openai_after)
53    }
54
55    pub fn anthropic_saved(&self) -> u64 {
56        self.anthropic_before.saturating_sub(self.anthropic_after)
57    }
58
59    pub fn openai_pct(&self) -> f64 {
60        if self.openai_before == 0 {
61            return 0.0;
62        }
63        (self.openai_saved() as f64 / self.openai_before as f64) * 100.0
64    }
65
66    pub fn anthropic_pct(&self) -> f64 {
67        if self.anthropic_before == 0 {
68            return 0.0;
69        }
70        (self.anthropic_saved() as f64 / self.anthropic_before as f64) * 100.0
71    }
72
73    /// Aggregate from per-image metrics.
74    ///
75    /// Excludes dropped images (transformed dimensions 0×0 with non-zero
76    /// original dimensions) so that information removal is not counted as
77    /// token "savings". Use [`from_metrics_all`] if you want raw totals.
78    pub fn from_metrics(metrics: &[ImageMetrics]) -> Self {
79        let mut s = TokenSavings::default();
80        for m in metrics {
81            // Skip dropped images: original had tokens but transformed is 0×0
82            let was_dropped = (m.original_width > 0 || m.original_height > 0)
83                && m.transformed_width == 0
84                && m.transformed_height == 0;
85            if was_dropped {
86                continue;
87            }
88            s.openai_before += m.tokens_before.openai_tokens;
89            s.openai_after += m.tokens_after.openai_tokens;
90            s.anthropic_before += m.tokens_before.anthropic_tokens;
91            s.anthropic_after += m.tokens_after.anthropic_tokens;
92        }
93        s
94    }
95
96    /// Aggregate from all per-image metrics including dropped images.
97    pub fn from_metrics_all(metrics: &[ImageMetrics]) -> Self {
98        let mut s = TokenSavings::default();
99        for m in metrics {
100            s.openai_before += m.tokens_before.openai_tokens;
101            s.openai_after += m.tokens_after.openai_tokens;
102            s.anthropic_before += m.tokens_before.anthropic_tokens;
103            s.anthropic_after += m.tokens_after.anthropic_tokens;
104        }
105        s
106    }
107}
108
109// ── OpenAI token estimation (tile-based, GPT-4o / GPT-4.1 family) ───
110
111/// OpenAI vision token count for `detail: high`.
112///
113/// Algorithm (from OpenAI docs, tile-based family):
114/// 1. Scale image so shortest side = 768px (only if larger)
115/// 2. Split into 512×512 tiles (ceiling)
116/// 3. Each tile = 170 tokens + 85 base tokens
117///
118/// For `detail: low`: fixed 85 tokens.
119///
120/// **Accuracy:** This implements the tile-based formula that is correct for
121/// **GPT-4o, GPT-4.1, and GPT-4.5** (base=85, tile=170). Other model
122/// families use different constants:
123///
124/// | Model family          | Base | Tile   | This function |
125/// |-----------------------|------|--------|---------------|
126/// | GPT-4o / 4.1 / 4.5   |   85 |   170  | Correct       |
127/// | GPT-4o-mini           | 2833 | 5,667  | ~33× under    |
128/// | o1 / o1-pro / o3      |   75 |   150  | ~13% over     |
129///
130/// Newer models (GPT-4.1 2025-04-14+, o4-mini) use patch-based tokenization
131/// with different budgets and are not covered by this formula.
132pub fn openai_tokens(width: u32, height: u32) -> u64 {
133    if width == 0 || height == 0 {
134        return 0;
135    }
136
137    // detail: high calculation
138    let (w, h) = openai_scale_to_fit(width, height);
139    let tiles_w = (w as f64 / 512.0).ceil() as u64;
140    let tiles_h = (h as f64 / 512.0).ceil() as u64;
141    let tiles = tiles_w * tiles_h;
142
143    170 * tiles + 85
144}
145
146/// Scale so the shortest side is at most 768px, preserving aspect ratio.
147/// Also cap the longest side at 2048px (OpenAI constraint).
148fn openai_scale_to_fit(width: u32, height: u32) -> (u32, u32) {
149    let mut w = width as f64;
150    let mut h = height as f64;
151
152    // Cap longest side at 2048
153    let max_dim = w.max(h);
154    if max_dim > 2048.0 {
155        let scale = 2048.0 / max_dim;
156        w *= scale;
157        h *= scale;
158    }
159
160    // Scale so shortest side is at most 768
161    let min_side = w.min(h);
162    if min_side > 768.0 {
163        let scale = 768.0 / min_side;
164        w *= scale;
165        h *= scale;
166    }
167
168    (w.ceil() as u32, h.ceil() as u32)
169}
170
171/// Fixed token count for OpenAI detail: low.
172pub fn openai_tokens_low() -> u64 {
173    85
174}
175
176// ── Anthropic token estimation ───────────────────────────────────────
177
178/// Anthropic vision token count for standard-resolution models.
179///
180/// Formula (from Anthropic docs):
181///   tokens ≈ (width × height) / 750
182///
183/// Images are first downscaled so the long edge ≤ 1568px (standard models)
184/// and then padded to a multiple of 28px.
185/// Max tokens per image: 1568 (standard) or 4784 (Opus 4.7).
186///
187/// **Note:** This implements the standard-resolution formula (1568px max
188/// long edge, 1568 token cap). Claude Opus 4.7 supports high-resolution
189/// images (2576px long edge, 4784 token cap). Estimates for Opus 4.7
190/// payloads with large images will be under-counted.
191pub fn anthropic_tokens(width: u32, height: u32) -> u64 {
192    if width == 0 || height == 0 {
193        return 0;
194    }
195
196    let (w, h) = anthropic_scale_to_fit(width, height);
197
198    // Pad to next multiple of 28
199    let pw = next_multiple_of_28(w);
200    let ph = next_multiple_of_28(h);
201
202    let tokens = (pw as u64 * ph as u64) / 750;
203    // Cap at 1568 tokens (standard models)
204    tokens.min(1568)
205}
206
207/// Scale so the long edge is at most 1568px, preserving aspect ratio.
208fn anthropic_scale_to_fit(width: u32, height: u32) -> (u32, u32) {
209    let max_edge = 1568.0_f64;
210    let w = width as f64;
211    let h = height as f64;
212    let long_edge = w.max(h);
213
214    if long_edge <= max_edge {
215        return (width, height);
216    }
217
218    let scale = max_edge / long_edge;
219    ((w * scale).ceil() as u32, (h * scale).ceil() as u32)
220}
221
222fn next_multiple_of_28(val: u32) -> u32 {
223    val.div_ceil(28) * 28
224}
225
226/// Estimate tokens for an image at given dimensions.
227pub fn estimate_tokens(width: u32, height: u32) -> TokenEstimate {
228    TokenEstimate {
229        openai_tokens: openai_tokens(width, height),
230        anthropic_tokens: anthropic_tokens(width, height),
231    }
232}
233
234// ── Tests ────────────────────────────────────────────────────────────
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239
240    // ── OpenAI ───────────────────────────────────────────────────
241
242    #[test]
243    fn test_openai_small_image() {
244        // 512x512: fits in one tile → 170 + 85 = 255
245        assert_eq!(openai_tokens(512, 512), 255);
246    }
247
248    #[test]
249    fn test_openai_768_image() {
250        // 768x768: shortest side = 768, so no scaling needed.
251        // Tiles: ceil(768/512) = 2 per side → 4 tiles → 4*170 + 85 = 765
252        assert_eq!(openai_tokens(768, 768), 765);
253    }
254
255    #[test]
256    fn test_openai_large_landscape() {
257        // 4000x3000:
258        // Step 1: cap longest at 2048 → scale = 2048/4000 = 0.512
259        //   → 2048 x 1536
260        // Step 2: shortest side 1536 > 768 → scale = 768/1536 = 0.5
261        //   → 1024 x 768
262        // Tiles: ceil(1024/512) * ceil(768/512) = 2 * 2 = 4
263        // Tokens: 4 * 170 + 85 = 765
264        assert_eq!(openai_tokens(4000, 3000), 765);
265    }
266
267    #[test]
268    fn test_openai_tall_portrait() {
269        // 1000x4000:
270        // Step 1: cap longest at 2048 → scale = 2048/4000 = 0.512
271        //   → 512 x 2048
272        // Step 2: shortest side 512 ≤ 768 → no change
273        // Tiles: ceil(512/512) * ceil(2048/512) = 1 * 4 = 4
274        // Tokens: 4 * 170 + 85 = 765
275        assert_eq!(openai_tokens(1000, 4000), 765);
276    }
277
278    #[test]
279    fn test_openai_zero() {
280        assert_eq!(openai_tokens(0, 0), 0);
281    }
282
283    #[test]
284    fn test_openai_low_detail() {
285        assert_eq!(openai_tokens_low(), 85);
286    }
287
288    #[test]
289    fn test_openai_very_small() {
290        // 100x100: fits in one tile
291        assert_eq!(openai_tokens(100, 100), 255);
292    }
293
294    // ── Anthropic ────────────────────────────────────────────────
295
296    #[test]
297    fn test_anthropic_small_image() {
298        // 200x200: no scaling, pad to 224x224
299        // tokens = 224*224/750 = 66.9 → 66
300        assert_eq!(anthropic_tokens(200, 200), 66);
301    }
302
303    #[test]
304    fn test_anthropic_1000x1000() {
305        // 1000x1000: long edge ≤ 1568, no scaling
306        // pad to 1008x1008 (1000 → next mult of 28 = 1008)
307        // tokens = 1008*1008/750 = 1354
308        assert_eq!(anthropic_tokens(1000, 1000), 1354);
309    }
310
311    #[test]
312    fn test_anthropic_large_downscaled() {
313        // 3000x2000: long edge 3000 > 1568
314        // scale = 1568/3000 = 0.5227 → 1568 x 1046 (ceil)
315        // pad: 1568 (already mult of 28), 1046 → 1064
316        // tokens = 1568*1064/750 = 2224, capped at 1568
317        assert_eq!(anthropic_tokens(3000, 2000), 1568);
318    }
319
320    #[test]
321    fn test_anthropic_zero() {
322        assert_eq!(anthropic_tokens(0, 0), 0);
323    }
324
325    #[test]
326    fn test_anthropic_exact_max() {
327        // 1568x1568: at limit
328        // pad: both already multiple of 28 (1568 = 56*28)
329        // tokens = 1568*1568/750 = 3277, capped at 1568
330        assert_eq!(anthropic_tokens(1568, 1568), 1568);
331    }
332
333    // ── TokenSavings ─────────────────────────────────────────────
334
335    #[test]
336    fn test_savings_calculation() {
337        let s = TokenSavings {
338            openai_before: 1000,
339            openai_after: 300,
340            anthropic_before: 2000,
341            anthropic_after: 500,
342        };
343        assert_eq!(s.openai_saved(), 700);
344        assert_eq!(s.anthropic_saved(), 1500);
345        assert!((s.openai_pct() - 70.0).abs() < 0.1);
346        assert!((s.anthropic_pct() - 75.0).abs() < 0.1);
347    }
348
349    #[test]
350    fn test_savings_zero_before() {
351        let s = TokenSavings::default();
352        assert_eq!(s.openai_pct(), 0.0);
353        assert_eq!(s.anthropic_pct(), 0.0);
354    }
355
356    // ── estimate_tokens ──────────────────────────────────────────
357
358    #[test]
359    fn test_estimate_tokens_both() {
360        let est = estimate_tokens(1000, 1000);
361        assert!(est.openai_tokens > 0);
362        assert!(est.anthropic_tokens > 0);
363    }
364
365    // ── estimate_tokens: different dimensions produce different results ─
366
367    #[test]
368    fn test_estimate_tokens_varies_by_size() {
369        let small = estimate_tokens(100, 100);
370        let large = estimate_tokens(4000, 3000);
371        // A 100x100 and 4000x3000 should not produce identical estimates
372        // (OpenAI: 255 vs 765; Anthropic: different too)
373        assert_ne!(
374            small.openai_tokens, large.openai_tokens,
375            "different dimensions should produce different OpenAI estimates"
376        );
377    }
378
379    // ── Extreme aspect ratios ────────────────────────────────────
380
381    #[test]
382    fn test_openai_extreme_tall() {
383        // 1x10000:
384        // Cap longest at 2048 → scale=2048/10000=0.2048 → ceil(1*0.2048)=1, ceil(10000*0.2048)=2048
385        // Shortest side 1 ≤ 768 → no further scaling
386        // Tiles: ceil(1/512)=1 * ceil(2048/512)=4 → 4 tiles → 4*170+85 = 765
387        assert_eq!(openai_tokens(1, 10000), 765);
388    }
389
390    #[test]
391    fn test_openai_extreme_wide() {
392        // 10000x1: same logic, just rotated
393        assert_eq!(openai_tokens(10000, 1), 765);
394    }
395
396    #[test]
397    fn test_anthropic_extreme_tall() {
398        // 1x10000: long edge 10000 > 1568 → scale=1568/10000=0.1568
399        //   → ceil(1*0.1568)=1, ceil(10000*0.1568)=1568
400        // pad: 1→28, 1568→1568
401        // tokens = 28*1568/750 = 58
402        let tokens = anthropic_tokens(1, 10000);
403        assert!(tokens > 0 && tokens < 100, "got {}", tokens);
404    }
405
406    #[test]
407    fn test_openai_1x1() {
408        // 1x1: fits in one tile → 170 + 85 = 255
409        assert_eq!(openai_tokens(1, 1), 255);
410    }
411
412    #[test]
413    fn test_anthropic_1x1() {
414        // 1x1: pad to 28x28, tokens = 28*28/750 = 1
415        assert_eq!(anthropic_tokens(1, 1), 1);
416    }
417
418    // ── Scaling helpers ──────────────────────────────────────────
419
420    #[test]
421    fn test_next_multiple_of_28() {
422        assert_eq!(next_multiple_of_28(28), 28);
423        assert_eq!(next_multiple_of_28(29), 56);
424        assert_eq!(next_multiple_of_28(1), 28);
425        assert_eq!(next_multiple_of_28(200), 224);
426        assert_eq!(next_multiple_of_28(1568), 1568);
427    }
428
429    // ── TokenSavings: dropped images excluded ────────────────────
430
431    #[test]
432    fn test_savings_excludes_dropped() {
433        use crate::cost::ImageMetrics;
434
435        let metrics = vec![
436            // Normal resize: 4000x3000 -> 2048x1536
437            ImageMetrics {
438                image_index: 0,
439                original_width: 4000,
440                original_height: 3000,
441                transformed_width: 2048,
442                transformed_height: 1536,
443                original_bytes: 5_000_000,
444                transformed_bytes: 500_000,
445                format_before: "png".to_string(),
446                format_after: "png".to_string(),
447                tokens_before: estimate_tokens(4000, 3000),
448                tokens_after: estimate_tokens(2048, 1536),
449            },
450            // Dropped image: 1000x1000 -> 0x0
451            ImageMetrics {
452                image_index: 1,
453                original_width: 1000,
454                original_height: 1000,
455                transformed_width: 0,
456                transformed_height: 0,
457                original_bytes: 100_000,
458                transformed_bytes: 0,
459                format_before: "png".to_string(),
460                format_after: "png".to_string(),
461                tokens_before: estimate_tokens(1000, 1000),
462                tokens_after: estimate_tokens(0, 0),
463            },
464        ];
465
466        let savings = TokenSavings::from_metrics(&metrics);
467        let savings_all = TokenSavings::from_metrics_all(&metrics);
468
469        // from_metrics should only include the resize, not the drop
470        assert_eq!(
471            savings.openai_before,
472            estimate_tokens(4000, 3000).openai_tokens
473        );
474        assert_eq!(
475            savings.openai_after,
476            estimate_tokens(2048, 1536).openai_tokens
477        );
478
479        // from_metrics_all should include both
480        assert_eq!(
481            savings_all.openai_before,
482            estimate_tokens(4000, 3000).openai_tokens + estimate_tokens(1000, 1000).openai_tokens
483        );
484    }
485}