1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
//! The `Provider` trait every adapter implements. See
//! `docs/02-provider-adapter-guide.md` for the contract and the worked Anthropic example.
use async_trait::async_trait;
use futures::stream::BoxStream;
use crate::{
ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse, EmbeddingsRequest,
EmbeddingsResponse, ModelInfo, ModelPricing, ProviderError, RequestContext,
};
/// Adapters are stateless beyond their HTTP client and pricing table.
/// All authentication, telemetry, and routing concerns live in the core layer.
#[async_trait]
pub trait Provider: Send + Sync {
/// Unique provider ID (e.g. "openai", "anthropic", "gemini").
fn id(&self) -> &'static str;
/// All models supported by this adapter, with capabilities.
fn models(&self) -> Vec<ModelInfo>;
/// Pricing for a model. Drawn from the manually-curated `data/pricing.toml`
/// snapshot embedded at build time; rates are updated by hand, not
/// automatically. Returns `None` only when the model is absent from the
/// catalog — local providers should return `Some` with zero rates instead.
fn pricing(&self, model: &str) -> Option<ModelPricing>;
/// Multiplier applied to computed cost/baseline to account for a provider
/// surcharge on top of the underlying model cost (e.g. OpenRouter's 5% BYOK
/// fee). Default `1.0` (no surcharge).
fn fee_multiplier(&self) -> f64 {
1.0
}
/// Names of request params this adapter **silently drops** for `req`
/// during translation because the upstream provider rejects them. The
/// gateway emits each as `X-TokenTrimmer-Warnings: param_dropped:<name>`.
/// Default: nothing dropped.
fn dropped_params(&self, _req: &ChatCompletionRequest) -> Vec<String> {
Vec::new()
}
/// Whether this provider honors `response_format: json_schema` (structured
/// outputs). Default `true`: most adapters forward `response_format`
/// verbatim, so the gateway must NOT strip a schema it isn't sure is
/// unsupported (doing so would silently lose structured-output capability).
/// Override `false` only for a provider known to be `json_object`-only —
/// the gateway then downgrades to `json_object` with a
/// `response_format_downgrade` warning.
fn supports_response_schema(&self) -> bool {
true
}
/// The provider's accepted `temperature` range `(min, max)`. The gateway
/// clamps an out-of-range request value to this and emits
/// `temperature_clamped`. Default `(0.0, 2.0)` — the widest common range
/// (OpenAI/Gemini). Override only with a narrower range you are confident is
/// correct, so the gateway never wrongly tightens a provider whose true max
/// is uncertain.
fn temperature_range(&self) -> (f32, f32) {
(0.0, 2.0)
}
/// Non-streaming chat completion.
async fn chat_completion(
&self,
req: ChatCompletionRequest,
ctx: &RequestContext,
) -> Result<ChatCompletionResponse, ProviderError>;
/// Streaming chat completion.
async fn chat_completion_stream(
&self,
req: ChatCompletionRequest,
ctx: &RequestContext,
) -> Result<BoxStream<'static, Result<ChatCompletionChunk, ProviderError>>, ProviderError>;
/// Embeddings. Returns Unsupported if the provider doesn't offer them.
async fn embeddings(
&self,
_req: EmbeddingsRequest,
_ctx: &RequestContext,
) -> Result<EmbeddingsResponse, ProviderError> {
Err(ProviderError::Unsupported(format!(
"{} does not support embeddings",
self.id()
)))
}
/// Liveness check. Should not call the provider's pricey endpoints.
async fn health_check(&self) -> Result<(), ProviderError> {
Ok(())
}
}