Skip to main content

tt_preview/
lib.rs

1//! `tt-preview` — pure cost preview engine.
2//!
3//! See `docs/superpowers/specs/2026-05-28-trackC-cost-preview-api-design.md`.
4
5pub mod cache_projection;
6pub mod classifier;
7pub mod error;
8pub mod pricing;
9pub mod route_suggestions;
10pub mod token_estimator;
11pub mod types;
12
13pub use error::PreviewError;
14pub use types::{
15    CacheProjections, CurrentEstimate, EstimationConfidence, PreviewRequest, PreviewResponse,
16    QualityRiskBand, RouteSuggestion, Suggestion,
17};
18
19use uuid::Uuid;
20
21/// Top-level entry point. Returns a complete `PreviewResponse`. The only
22/// way this returns `Err` is if the model is unknown AND the optional
23/// fallback heuristic also fails — in practice the handler converts that
24/// into a 400 with a clear message.
25pub fn preview(req: &PreviewRequest) -> Result<PreviewResponse, PreviewError> {
26    let mut warnings = Vec::new();
27
28    let hit = pricing::lookup(&req.model)?;
29    // Clamp the projected output to the model's real catalog max-output when the
30    // model is catalogued (keeps an over-large or absent `max_tokens` honest);
31    // unknown models pass through uncapped.
32    let model_max_output = tt_shared::model_catalog()
33        .model_info(hit.provider, &req.model)
34        .map(|mi| u32::try_from(mi.max_output_tokens).unwrap_or(u32::MAX));
35    let est = token_estimator::estimate(
36        hit.provider,
37        &req.messages,
38        req.max_tokens,
39        model_max_output,
40    );
41    let cost = pricing::cost_usd(est.input_tokens, est.output_tokens, &hit);
42
43    let task_class = classifier::classify(&req.messages);
44
45    let cache = cache_projection::project(
46        cost,
47        cache_projection::DEFAULT_L1_HIT_PROBABILITY,
48        cache_projection::DEFAULT_L2_HIT_PROBABILITY,
49    );
50
51    let suggestions = route_suggestions::suggest(
52        &req.model,
53        cost,
54        est.input_tokens,
55        est.output_tokens,
56        task_class,
57    );
58    if suggestions.is_empty() && !matches!(task_class, classifier::TaskClass::Agent) {
59        warnings.push(format!(
60            "no cheaper-equivalent candidates for {} on this task class — \
61             current model may already be the cheapest in family",
62            req.model,
63        ));
64    }
65
66    Ok(PreviewResponse {
67        current: CurrentEstimate {
68            model: req.model.clone(),
69            provider: hit.provider.to_string(),
70            input_tokens_estimated: est.input_tokens,
71            output_tokens_estimated: est.output_tokens,
72            cost_usd: cost,
73            estimation_confidence: est.confidence,
74        },
75        cache_projections: cache,
76        route_suggestions: suggestions,
77        warnings,
78        trace_id: Uuid::new_v4().to_string(),
79    })
80}
81
82#[cfg(test)]
83mod tests {
84    use super::*;
85    use crate::types::Message;
86    use serde_json::json;
87
88    fn req_with_max(model: &str, max_tokens: Option<u32>) -> PreviewRequest {
89        PreviewRequest {
90            model: model.to_string(),
91            messages: vec![Message {
92                role: "user".into(),
93                content: json!("hello"),
94            }],
95            max_tokens,
96            tools: None,
97            stream: None,
98        }
99    }
100
101    /// End-to-end: a catalogued model with an over-large `max_tokens` has its
102    /// projected output clamped to the model's real catalog max-output, rather
103    /// than the caller's inflated ceiling or the old hardcoded 4096 cap.
104    /// `gpt-4o` is catalogued (openai, max_output_tokens = 16000).
105    #[test]
106    fn over_large_max_tokens_clamped_to_catalog_max_output() {
107        let model = "gpt-4o";
108        let catalog_max = tt_shared::model_catalog()
109            .model_info("openai", model)
110            .expect("gpt-4o must be catalogued")
111            .max_output_tokens;
112        let resp = preview(&req_with_max(model, Some(catalog_max as u32 + 50_000))).unwrap();
113        assert_eq!(resp.current.output_tokens_estimated as u64, catalog_max);
114    }
115
116    /// A modest explicit `max_tokens` below the model max is honored as-is.
117    #[test]
118    fn modest_max_tokens_is_honored() {
119        let resp = preview(&req_with_max("gpt-4o", Some(1000))).unwrap();
120        assert_eq!(resp.current.output_tokens_estimated, 1000);
121    }
122}