tt_shared/provider.rs
1//! The `Provider` trait every adapter implements. See
2//! `docs/02-provider-adapter-guide.md` for the contract and the worked Anthropic example.
3
4use async_trait::async_trait;
5use futures::stream::BoxStream;
6
7use crate::{
8 ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse, EmbeddingsRequest,
9 EmbeddingsResponse, ModelInfo, ModelPricing, ProviderError, RequestContext,
10};
11
12/// Adapters are stateless beyond their HTTP client and pricing table.
13/// All authentication, telemetry, and routing concerns live in the core layer.
14#[async_trait]
15pub trait Provider: Send + Sync {
16 /// Unique provider ID (e.g. "openai", "anthropic", "gemini").
17 fn id(&self) -> &'static str;
18
19 /// All models supported by this adapter, with capabilities.
20 fn models(&self) -> Vec<ModelInfo>;
21
22 /// Pricing for a model. Drawn from the manually-curated `data/pricing.toml`
23 /// snapshot embedded at build time; rates are updated by hand, not
24 /// automatically. Returns `None` only when the model is absent from the
25 /// catalog — local providers should return `Some` with zero rates instead.
26 fn pricing(&self, model: &str) -> Option<ModelPricing>;
27
28 /// Multiplier applied to computed cost/baseline to account for a provider
29 /// surcharge on top of the underlying model cost (e.g. OpenRouter's 5% BYOK
30 /// fee). Default `1.0` (no surcharge).
31 fn fee_multiplier(&self) -> f64 {
32 1.0
33 }
34
35 /// Names of request params this adapter **silently drops** for `req`
36 /// during translation because the upstream provider rejects them. The
37 /// gateway emits each as `X-TokenTrimmer-Warnings: param_dropped:<name>`.
38 /// Default: nothing dropped.
39 fn dropped_params(&self, _req: &ChatCompletionRequest) -> Vec<String> {
40 Vec::new()
41 }
42
43 /// Whether this provider honors `response_format: json_schema` (structured
44 /// outputs). Default `true`: most adapters forward `response_format`
45 /// verbatim, so the gateway must NOT strip a schema it isn't sure is
46 /// unsupported (doing so would silently lose structured-output capability).
47 /// Override `false` only for a provider known to be `json_object`-only —
48 /// the gateway then downgrades to `json_object` with a
49 /// `response_format_downgrade` warning.
50 fn supports_response_schema(&self) -> bool {
51 true
52 }
53
54 /// The provider's accepted `temperature` range `(min, max)`. The gateway
55 /// clamps an out-of-range request value to this and emits
56 /// `temperature_clamped`. Default `(0.0, 2.0)` — the widest common range
57 /// (OpenAI/Gemini). Override only with a narrower range you are confident is
58 /// correct, so the gateway never wrongly tightens a provider whose true max
59 /// is uncertain.
60 fn temperature_range(&self) -> (f32, f32) {
61 (0.0, 2.0)
62 }
63
64 /// Non-streaming chat completion.
65 async fn chat_completion(
66 &self,
67 req: ChatCompletionRequest,
68 ctx: &RequestContext,
69 ) -> Result<ChatCompletionResponse, ProviderError>;
70
71 /// Streaming chat completion.
72 async fn chat_completion_stream(
73 &self,
74 req: ChatCompletionRequest,
75 ctx: &RequestContext,
76 ) -> Result<BoxStream<'static, Result<ChatCompletionChunk, ProviderError>>, ProviderError>;
77
78 /// Embeddings. Returns Unsupported if the provider doesn't offer them.
79 async fn embeddings(
80 &self,
81 _req: EmbeddingsRequest,
82 _ctx: &RequestContext,
83 ) -> Result<EmbeddingsResponse, ProviderError> {
84 Err(ProviderError::Unsupported(format!(
85 "{} does not support embeddings",
86 self.id()
87 )))
88 }
89
90 /// Liveness check. Should not call the provider's pricey endpoints.
91 async fn health_check(&self) -> Result<(), ProviderError> {
92 Ok(())
93 }
94}