1pub mod cache_projection;
6pub mod classifier;
7pub mod error;
8pub mod pricing;
9pub mod route_suggestions;
10pub mod token_estimator;
11pub mod types;
12
13pub use error::PreviewError;
14pub use types::{
15 CacheProjections, CurrentEstimate, EstimationConfidence, PreviewRequest, PreviewResponse,
16 QualityRiskBand, RouteSuggestion, Suggestion,
17};
18
19use uuid::Uuid;
20
21pub fn preview(req: &PreviewRequest) -> Result<PreviewResponse, PreviewError> {
26 let mut warnings = Vec::new();
27
28 let hit = pricing::lookup(&req.model)?;
29 let model_max_output = tt_shared::model_catalog()
33 .model_info(hit.provider, &req.model)
34 .map(|mi| u32::try_from(mi.max_output_tokens).unwrap_or(u32::MAX));
35 let est = token_estimator::estimate(
36 hit.provider,
37 &req.messages,
38 req.max_tokens,
39 model_max_output,
40 );
41 let cost = pricing::cost_usd(est.input_tokens, est.output_tokens, &hit);
42
43 let task_class = classifier::classify(&req.messages);
44
45 let cache = cache_projection::project(
46 cost,
47 cache_projection::DEFAULT_L1_HIT_PROBABILITY,
48 cache_projection::DEFAULT_L2_HIT_PROBABILITY,
49 );
50
51 let suggestions = route_suggestions::suggest(
52 &req.model,
53 cost,
54 est.input_tokens,
55 est.output_tokens,
56 task_class,
57 );
58 if suggestions.is_empty() && !matches!(task_class, classifier::TaskClass::Agent) {
59 warnings.push(format!(
60 "no cheaper-equivalent candidates for {} on this task class — \
61 current model may already be the cheapest in family",
62 req.model,
63 ));
64 }
65
66 Ok(PreviewResponse {
67 current: CurrentEstimate {
68 model: req.model.clone(),
69 provider: hit.provider.to_string(),
70 input_tokens_estimated: est.input_tokens,
71 output_tokens_estimated: est.output_tokens,
72 cost_usd: cost,
73 estimation_confidence: est.confidence,
74 },
75 cache_projections: cache,
76 route_suggestions: suggestions,
77 warnings,
78 trace_id: Uuid::new_v4().to_string(),
79 })
80}
81
82#[cfg(test)]
83mod tests {
84 use super::*;
85 use crate::types::Message;
86 use serde_json::json;
87
88 fn req_with_max(model: &str, max_tokens: Option<u32>) -> PreviewRequest {
89 PreviewRequest {
90 model: model.to_string(),
91 messages: vec![Message {
92 role: "user".into(),
93 content: json!("hello"),
94 }],
95 max_tokens,
96 tools: None,
97 stream: None,
98 }
99 }
100
101 #[test]
106 fn over_large_max_tokens_clamped_to_catalog_max_output() {
107 let model = "gpt-4o";
108 let catalog_max = tt_shared::model_catalog()
109 .model_info("openai", model)
110 .expect("gpt-4o must be catalogued")
111 .max_output_tokens;
112 let resp = preview(&req_with_max(model, Some(catalog_max as u32 + 50_000))).unwrap();
113 assert_eq!(resp.current.output_tokens_estimated as u64, catalog_max);
114 }
115
116 #[test]
118 fn modest_max_tokens_is_honored() {
119 let resp = preview(&req_with_max("gpt-4o", Some(1000))).unwrap();
120 assert_eq!(resp.current.output_tokens_estimated, 1000);
121 }
122}