use std::sync::Arc;
use std::task::{Context, Poll};
use futures::StreamExt;
use futures::future::BoxFuture;
use tower::util::BoxCloneService;
use tower::{Layer, Service, ServiceExt};
use crate::codecs::{BoxDeltaStream, Codec};
use crate::context::ExecutionContext;
use crate::error::{Error, Result};
use crate::ir::{
ContentPart, JsonSchemaSpec, Message, ModelRequest, ModelResponse, ReasoningEffort,
ResponseFormat, Role, SystemPrompt, ToolChoice, ToolSpec,
};
use crate::overrides::{RequestOverrides, RunOverrides};
use crate::service::{
BoxedModelService, BoxedStreamingService, ModelInvocation, ModelStream, NamedLayer,
StreamingModelInvocation,
};
use crate::stream::{StreamDelta, tap_aggregator};
use crate::transports::Transport;
fn apply_overrides(request: &mut ModelRequest, ctx: &ExecutionContext) {
if let Some(run) = ctx.extension::<RunOverrides>() {
if let Some(model) = run.model() {
model.clone_into(&mut request.model);
}
if let Some(system) = run.system_prompt() {
request.system = system.clone();
}
if let Some(specs) = run.tool_specs() {
request.tools = Arc::clone(specs);
}
}
if let Some(req) = ctx.extension::<RequestOverrides>() {
if let Some(t) = req.temperature() {
request.temperature = Some(t);
}
if let Some(p) = req.top_p() {
request.top_p = Some(p);
}
if let Some(k) = req.top_k() {
request.top_k = Some(k);
}
if let Some(n) = req.max_tokens() {
request.max_tokens = Some(n);
}
if let Some(sequences) = req.stop_sequences() {
request.stop_sequences = sequences.to_vec();
}
if let Some(effort) = req.reasoning_effort() {
request.reasoning_effort = Some(effort.clone());
}
if let Some(choice) = req.tool_choice() {
request.tool_choice = choice.clone();
}
if let Some(format) = req.response_format() {
request.response_format = Some(format.clone());
}
if let Some(parallel) = req.parallel_tool_calls() {
request.parallel_tool_calls = Some(parallel);
}
if let Some(user_id) = req.end_user_id() {
request.end_user_id = Some(user_id.to_owned());
}
if let Some(seed) = req.seed() {
request.seed = Some(seed);
}
}
}
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct ChatModelConfig {
model: String,
max_tokens: Option<u32>,
system: SystemPrompt,
temperature: Option<f32>,
top_p: Option<f32>,
top_k: Option<u32>,
stop_sequences: Vec<String>,
tools: Arc<[ToolSpec]>,
tool_choice: ToolChoice,
reasoning_effort: Option<ReasoningEffort>,
validation_retries: u32,
token_counter: Option<std::sync::Arc<dyn crate::tokens::TokenCounter>>,
}
impl ChatModelConfig {
#[must_use]
pub fn new(model: impl Into<String>) -> Self {
Self {
model: model.into(),
max_tokens: None,
system: SystemPrompt::default(),
temperature: None,
top_p: None,
top_k: None,
stop_sequences: Vec::new(),
tools: Arc::from([]),
tool_choice: ToolChoice::default(),
reasoning_effort: None,
validation_retries: 0,
token_counter: None,
}
}
#[must_use]
pub fn token_counter(&self) -> Option<&std::sync::Arc<dyn crate::tokens::TokenCounter>> {
self.token_counter.as_ref()
}
pub const fn validation_retries(&self) -> u32 {
self.validation_retries
}
pub fn model(&self) -> &str {
&self.model
}
pub const fn max_tokens(&self) -> Option<u32> {
self.max_tokens
}
pub const fn system(&self) -> &SystemPrompt {
&self.system
}
pub const fn temperature(&self) -> Option<f32> {
self.temperature
}
pub const fn top_p(&self) -> Option<f32> {
self.top_p
}
pub const fn top_k(&self) -> Option<u32> {
self.top_k
}
pub fn stop_sequences(&self) -> &[String] {
&self.stop_sequences
}
pub fn tools(&self) -> &[ToolSpec] {
&self.tools
}
pub const fn tool_choice(&self) -> &ToolChoice {
&self.tool_choice
}
pub const fn reasoning_effort(&self) -> Option<&ReasoningEffort> {
self.reasoning_effort.as_ref()
}
#[must_use]
pub fn build_request(&self, messages: Vec<Message>) -> ModelRequest {
ModelRequest {
model: self.model.clone(),
messages,
system: self.system.clone(),
max_tokens: self.max_tokens,
temperature: self.temperature,
top_p: self.top_p,
top_k: self.top_k,
stop_sequences: self.stop_sequences.clone(),
tools: Arc::clone(&self.tools),
tool_choice: self.tool_choice.clone(),
reasoning_effort: self.reasoning_effort.clone(),
..ModelRequest::default()
}
}
}
pub(crate) struct InnerChatModel<C: Codec, T: Transport> {
codec: Arc<C>,
transport: Arc<T>,
}
impl<C: Codec, T: Transport> Clone for InnerChatModel<C, T> {
fn clone(&self) -> Self {
Self {
codec: Arc::clone(&self.codec),
transport: Arc::clone(&self.transport),
}
}
}
impl<C: Codec, T: Transport> InnerChatModel<C, T> {
pub(crate) const fn from_arc(codec: Arc<C>, transport: Arc<T>) -> Self {
Self { codec, transport }
}
pub(crate) fn codec(&self) -> &C {
&self.codec
}
pub(crate) fn transport(&self) -> &T {
&self.transport
}
}
impl<C: Codec + 'static, T: Transport + 'static> Service<ModelInvocation> for InnerChatModel<C, T> {
type Response = ModelResponse;
type Error = Error;
type Future = BoxFuture<'static, Result<ModelResponse>>;
#[inline]
fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<()>> {
Poll::Ready(Ok(()))
}
fn call(&mut self, invocation: ModelInvocation) -> Self::Future {
let codec = Arc::clone(&self.codec);
let transport = Arc::clone(&self.transport);
Box::pin(async move {
let ModelInvocation { request, ctx } = invocation;
let encoded = codec.encode(&request)?;
let warnings = encoded.warnings.clone();
let response = transport.send(encoded, &ctx).await?;
if !(200..300).contains(&response.status) {
let body_text = String::from_utf8_lossy(&response.body).into_owned();
let mut err = Error::provider_http(response.status, body_text);
if let Some(after) =
crate::transports::parse_retry_after(response.headers.get("retry-after"))
{
err = err.with_retry_after(after);
}
return Err(err);
}
let rate_limit = codec.extract_rate_limit(&response.headers);
let mut decoded = codec.decode(&response.body, warnings)?;
decoded.rate_limit = rate_limit;
Ok(decoded)
})
}
}
impl<C: Codec + 'static, T: Transport + 'static> Service<StreamingModelInvocation>
for InnerChatModel<C, T>
{
type Response = ModelStream;
type Error = Error;
type Future = BoxFuture<'static, Result<ModelStream>>;
#[inline]
fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<()>> {
Poll::Ready(Ok(()))
}
fn call(&mut self, invocation: StreamingModelInvocation) -> Self::Future {
let codec = Arc::clone(&self.codec);
let transport = Arc::clone(&self.transport);
Box::pin(async move {
let StreamingModelInvocation {
inner: ModelInvocation { request, ctx },
} = invocation;
let encoded = codec.encode_streaming(&request)?;
let warnings = encoded.warnings.clone();
let stream = transport.send_streaming(encoded, &ctx).await?;
if !(200..300).contains(&stream.status) {
let mut buf = Vec::new();
let mut body = stream.body;
while let Some(chunk) = body.next().await {
if let Ok(b) = chunk {
buf.extend_from_slice(&b);
}
}
let body_text = String::from_utf8_lossy(&buf).into_owned();
let mut err = Error::provider_http(stream.status, body_text);
if let Some(after) =
crate::transports::parse_retry_after(stream.headers.get("retry-after"))
{
err = err.with_retry_after(after);
}
return Err(err);
}
let rate_limit = codec.extract_rate_limit(&stream.headers);
let codec_for_stream = Arc::clone(&codec);
#[allow(tail_expr_drop_order)]
let codec_stream: BoxDeltaStream<'static> = Box::pin(async_stream::stream! {
let inner = codec_for_stream.decode_stream(stream.body, warnings);
futures::pin_mut!(inner);
while let Some(delta) = inner.next().await {
yield delta;
}
});
let prefixed: BoxDeltaStream<'static> = match rate_limit {
Some(snapshot) => {
let prepend = futures::stream::iter(vec![Ok(StreamDelta::RateLimit(snapshot))]);
Box::pin(prepend.chain(codec_stream))
}
None => codec_stream,
};
Ok(tap_aggregator(prefixed))
})
}
}
type LayerFactory<C, T> = Arc<dyn Fn(InnerChatModel<C, T>) -> BoxedModelService + Send + Sync>;
type StreamingLayerFactory<C, T> =
Arc<dyn Fn(InnerChatModel<C, T>) -> BoxedStreamingService + Send + Sync>;
pub struct ChatModel<C: Codec + 'static, T: Transport + 'static> {
inner: InnerChatModel<C, T>,
config: ChatModelConfig,
factory: Option<LayerFactory<C, T>>,
streaming_factory: Option<StreamingLayerFactory<C, T>>,
layer_names: Vec<&'static str>,
}
impl<C: Codec + 'static, T: Transport + 'static> Clone for ChatModel<C, T> {
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
config: self.config.clone(),
factory: self.factory.clone(),
streaming_factory: self.streaming_factory.clone(),
layer_names: self.layer_names.clone(),
}
}
}
impl<C: Codec + 'static, T: Transport + 'static> ChatModel<C, T> {
pub fn new(codec: C, transport: T, model: impl Into<String>) -> Self {
Self::from_arc(Arc::new(codec), Arc::new(transport), model)
}
pub fn from_arc(codec: Arc<C>, transport: Arc<T>, model: impl Into<String>) -> Self {
Self {
inner: InnerChatModel::from_arc(codec, transport),
config: ChatModelConfig::new(model),
factory: None,
streaming_factory: None,
layer_names: Vec::new(),
}
}
#[must_use]
pub const fn with_max_tokens(mut self, n: u32) -> Self {
self.config.max_tokens = Some(n);
self
}
#[must_use]
pub fn with_system(mut self, s: impl Into<String>) -> Self {
self.config.system = SystemPrompt::text(s);
self
}
#[must_use]
pub fn with_system_prompt(mut self, prompt: SystemPrompt) -> Self {
self.config.system = prompt;
self
}
#[must_use]
pub const fn with_temperature(mut self, t: f32) -> Self {
self.config.temperature = Some(t);
self
}
#[must_use]
pub const fn with_validation_retries(mut self, n: u32) -> Self {
self.config.validation_retries = n;
self
}
#[must_use]
pub fn with_token_counter(
mut self,
counter: std::sync::Arc<dyn crate::tokens::TokenCounter>,
) -> Self {
self.config.token_counter = Some(counter);
self
}
#[must_use]
pub const fn with_top_p(mut self, p: f32) -> Self {
self.config.top_p = Some(p);
self
}
#[must_use]
pub const fn with_top_k(mut self, k: u32) -> Self {
self.config.top_k = Some(k);
self
}
#[must_use]
pub fn with_stop_sequence(mut self, s: impl Into<String>) -> Self {
self.config.stop_sequences.push(s.into());
self
}
#[must_use]
pub fn with_stop_sequences(mut self, seqs: Vec<String>) -> Self {
self.config.stop_sequences = seqs;
self
}
#[must_use]
pub fn with_tools(mut self, tools: impl Into<Arc<[ToolSpec]>>) -> Self {
self.config.tools = tools.into();
self
}
#[must_use]
pub fn with_tool_choice(mut self, c: ToolChoice) -> Self {
self.config.tool_choice = c;
self
}
#[must_use]
pub fn with_reasoning_effort(mut self, effort: ReasoningEffort) -> Self {
self.config.reasoning_effort = Some(effort);
self
}
#[must_use]
pub fn layer<L>(mut self, layer: L) -> Self
where
L: Layer<BoxedModelService>
+ Layer<BoxedStreamingService>
+ NamedLayer
+ Clone
+ Send
+ Sync
+ 'static,
<L as Layer<BoxedModelService>>::Service: Service<ModelInvocation, Response = ModelResponse, Error = Error>
+ Clone
+ Send
+ 'static,
<<L as Layer<BoxedModelService>>::Service as Service<ModelInvocation>>::Future:
Send + 'static,
<L as Layer<BoxedStreamingService>>::Service: Service<StreamingModelInvocation, Response = ModelStream, Error = Error>
+ Clone
+ Send
+ 'static,
<<L as Layer<BoxedStreamingService>>::Service as Service<StreamingModelInvocation>>::Future:
Send + 'static,
{
self.layer_names.push(layer.layer_name());
let prev = self.factory.take();
let prev_streaming = self.streaming_factory.take();
let layer_one_shot = layer.clone();
let layer_streaming = layer;
let new_factory: LayerFactory<C, T> = Arc::new(move |inner: InnerChatModel<C, T>| {
let stacked: BoxedModelService = match &prev {
Some(prev_factory) => prev_factory(inner),
None => BoxCloneService::new(inner),
};
BoxCloneService::new(<L as Layer<BoxedModelService>>::layer(
&layer_one_shot,
stacked,
))
});
let new_streaming: StreamingLayerFactory<C, T> =
Arc::new(move |inner: InnerChatModel<C, T>| {
let stacked: BoxedStreamingService = match &prev_streaming {
Some(prev_factory) => prev_factory(inner),
None => BoxCloneService::new(inner),
};
BoxCloneService::new(<L as Layer<BoxedStreamingService>>::layer(
&layer_streaming,
stacked,
))
});
self.factory = Some(new_factory);
self.streaming_factory = Some(new_streaming);
self
}
#[must_use]
pub fn layer_named<L>(self, name: &'static str, layer: L) -> Self
where
L: Layer<BoxedModelService> + Layer<BoxedStreamingService> + Clone + Send + Sync + 'static,
<L as Layer<BoxedModelService>>::Service: Service<ModelInvocation, Response = ModelResponse, Error = Error>
+ Clone
+ Send
+ 'static,
<<L as Layer<BoxedModelService>>::Service as Service<ModelInvocation>>::Future:
Send + 'static,
<L as Layer<BoxedStreamingService>>::Service: Service<StreamingModelInvocation, Response = ModelStream, Error = Error>
+ Clone
+ Send
+ 'static,
<<L as Layer<BoxedStreamingService>>::Service as Service<StreamingModelInvocation>>::Future:
Send + 'static,
{
self.layer(crate::service::WithName::new(name, layer))
}
#[must_use]
pub fn layer_names(&self) -> &[&'static str] {
&self.layer_names
}
pub fn codec(&self) -> &C {
self.inner.codec()
}
pub fn transport(&self) -> &T {
self.inner.transport()
}
pub const fn config(&self) -> &ChatModelConfig {
&self.config
}
#[must_use]
pub fn service(&self) -> BoxedModelService {
match &self.factory {
Some(factory) => factory(self.inner.clone()),
None => BoxCloneService::new(self.inner.clone()),
}
}
#[must_use]
pub fn streaming_service(&self) -> BoxedStreamingService {
match &self.streaming_factory {
Some(factory) => factory(self.inner.clone()),
None => BoxCloneService::new(self.inner.clone()),
}
}
pub async fn complete(
&self,
messages: Vec<Message>,
ctx: &ExecutionContext,
) -> Result<Message> {
let response = self.complete_full(messages, ctx).await?;
Ok(Message::new(Role::Assistant, response.content))
}
pub async fn complete_full(
&self,
messages: Vec<Message>,
ctx: &ExecutionContext,
) -> Result<ModelResponse> {
let budget = ctx.run_budget();
if let Some(budget) = &budget {
budget.check_pre_request()?;
}
let mut request = self.config.build_request(messages);
apply_overrides(&mut request, ctx);
let invocation = ModelInvocation::new(request, ctx.clone());
let response = self.service().oneshot(invocation).await?;
if let Some(budget) = &budget {
budget.observe_usage(&response.usage)?;
}
Ok(response)
}
pub async fn complete_typed<O>(
&self,
messages: Vec<Message>,
ctx: &ExecutionContext,
) -> Result<O>
where
O: schemars::JsonSchema + serde::de::DeserializeOwned + Send + 'static,
{
self.complete_typed_validated(messages, |_: &O| Ok(()), ctx)
.await
}
pub async fn complete_typed_validated<O, V>(
&self,
messages: Vec<Message>,
validator: V,
ctx: &ExecutionContext,
) -> Result<O>
where
O: schemars::JsonSchema + serde::de::DeserializeOwned + Send + 'static,
V: crate::output_validator::OutputValidator<O>,
{
use crate::llm_facing::RenderedForLlm;
let schema_value = serde_json::to_value(schemars::schema_for!(O)).map_err(Error::Serde)?;
let type_name = std::any::type_name::<O>();
let short_name = type_name.rsplit("::").next().unwrap_or(type_name);
let spec = JsonSchemaSpec::new(short_name, schema_value)?;
let format = ResponseFormat::strict(spec);
let mut conversation = messages;
let max_retries = self.config.validation_retries;
let mut attempt: u32 = 0;
loop {
let budget = ctx.run_budget();
if let Some(budget) = &budget {
budget.check_pre_request()?;
}
let mut request = self.config.build_request(conversation.clone());
apply_overrides(&mut request, ctx);
request.response_format = Some(format.clone());
let invocation = ModelInvocation::new(request, ctx.clone());
let response = self.service().oneshot(invocation).await?;
if let Some(budget) = &budget {
budget.observe_usage(&response.usage)?;
}
let assistant_text = response_text_for_retry(&response);
let retry_hint: RenderedForLlm<String> =
match parse_typed_response::<O>(short_name, response) {
Ok(value) => match validator.validate(&value) {
Ok(()) => return Ok(value),
Err(Error::ModelRetry { hint, .. }) => hint,
Err(err) => return Err(err),
},
Err(Error::ModelRetry { hint, .. }) => hint,
Err(err) => return Err(err),
};
if attempt >= max_retries {
return Err(Error::model_retry(retry_hint, attempt));
}
attempt += 1;
conversation.push(Message::new(
crate::ir::Role::Assistant,
vec![ContentPart::Text {
text: assistant_text.unwrap_or_default(),
cache_control: None,
provider_echoes: Vec::new(),
}],
));
conversation.push(Message::new(
crate::ir::Role::User,
vec![ContentPart::Text {
text: retry_hint.into_inner(),
cache_control: None,
provider_echoes: Vec::new(),
}],
));
}
}
pub async fn stream_deltas(
&self,
messages: Vec<Message>,
ctx: &ExecutionContext,
) -> Result<ModelStream> {
let budget = ctx.run_budget();
if let Some(budget) = &budget {
budget.check_pre_request()?;
}
let mut request = self.config.build_request(messages);
apply_overrides(&mut request, ctx);
let invocation = StreamingModelInvocation::new(ModelInvocation::new(request, ctx.clone()));
let model_stream = self.streaming_service().oneshot(invocation).await?;
let ModelStream { stream, completion } = model_stream;
let budget_for_completion = budget.clone();
let user_facing = async move {
let result = completion.await;
if let (Ok(response), Some(budget)) = (&result, budget_for_completion.as_ref()) {
budget.observe_usage(&response.usage)?;
}
result
};
Ok(ModelStream {
stream,
completion: Box::pin(user_facing),
})
}
pub async fn stream_typed<O>(
&self,
messages: Vec<Message>,
ctx: &ExecutionContext,
) -> Result<TypedModelStream<O>>
where
O: schemars::JsonSchema + serde::de::DeserializeOwned + Send + 'static,
{
if self.config.validation_retries > 0 {
tracing::debug!(
validation_retries = self.config.validation_retries,
"ChatModel::stream_typed ignores validation_retries — \
streaming + retry would emit a divergent second stream \
over already-surfaced deltas. Use complete_typed for \
the unified retry budget."
);
}
let schema_value = serde_json::to_value(schemars::schema_for!(O)).map_err(Error::Serde)?;
let type_name = std::any::type_name::<O>();
let short_name = type_name.rsplit("::").next().unwrap_or(type_name);
let spec = JsonSchemaSpec::new(short_name, schema_value)?;
let format = ResponseFormat::strict(spec);
let budget = ctx.run_budget();
if let Some(budget) = &budget {
budget.check_pre_request()?;
}
let mut request = self.config.build_request(messages);
apply_overrides(&mut request, ctx);
request.response_format = Some(format);
let invocation = StreamingModelInvocation::new(ModelInvocation::new(request, ctx.clone()));
let model_stream = self.streaming_service().oneshot(invocation).await?;
let ModelStream { stream, completion } = model_stream;
let budget_for_completion = budget.clone();
let short_name_owned = short_name.to_owned();
let typed_completion = async move {
let response = completion.await?;
if let Some(budget) = &budget_for_completion {
budget.observe_usage(&response.usage)?;
}
parse_typed_response::<O>(&short_name_owned, response)
};
Ok(TypedModelStream {
stream,
completion: Box::pin(typed_completion),
})
}
}
pub struct TypedModelStream<O> {
pub stream: BoxDeltaStream<'static>,
pub completion: BoxFuture<'static, Result<O>>,
}
impl<O> std::fmt::Debug for TypedModelStream<O> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("TypedModelStream")
.field("stream", &"<BoxDeltaStream>")
.field(
"completion",
&format_args!("<BoxFuture<Result<{}>>>", std::any::type_name::<O>()),
)
.finish()
}
}
impl<C: Codec + 'static, T: Transport + 'static> std::fmt::Debug for ChatModel<C, T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ChatModel")
.field("model", &self.config.model)
.field("codec", &self.codec().name())
.field("transport", &self.transport().name())
.field("layers", &self.layer_names)
.finish()
}
}
fn response_text_for_retry(response: &ModelResponse) -> Option<String> {
let mut out = String::new();
for part in &response.content {
if let ContentPart::Text { text, .. } = part {
out.push_str(text);
}
}
if out.is_empty() { None } else { Some(out) }
}
#[allow(clippy::needless_pass_by_value)]
fn parse_typed_response<O>(short_name: &str, response: ModelResponse) -> Result<O>
where
O: serde::de::DeserializeOwned,
{
use crate::llm_facing::LlmRenderable;
let wrap = |e: serde_json::Error| -> Error {
Error::model_retry(schema_mismatch_diagnostic(short_name, &e).for_llm(), 0)
};
for part in &response.content {
if let ContentPart::ToolUse { input, .. } = part {
return serde_json::from_value(input.clone()).map_err(wrap);
}
}
for part in &response.content {
if let ContentPart::Text { text, .. } = part {
return serde_json::from_str(text).map_err(wrap);
}
}
Err(Error::invalid_request(
"complete_typed: model response carried neither a `tool_use` block nor a text \
block — the configured `OutputStrategy` did not produce typed output",
))
}
fn schema_mismatch_diagnostic(short_name: &str, err: &serde_json::Error) -> String {
let raw = err.to_string();
let trimmed = raw
.split(" at line ")
.next()
.unwrap_or(raw.as_str())
.trim_end_matches('.');
format!(
"Your previous response did not match the required JSON schema for `{short_name}`. \
Parser diagnostic: {trimmed}.\n\
Re-emit the response as a single valid JSON object that conforms to the schema."
)
}
use crate::auth::{ApiKeyProvider, BearerProvider};
use crate::codecs::{AnthropicMessagesCodec, GeminiCodec, OpenAiChatCodec};
use crate::transports::DirectTransport;
use secrecy::SecretString;
impl ChatModel<AnthropicMessagesCodec, DirectTransport> {
pub fn anthropic(api_key: impl Into<SecretString>, model: impl Into<String>) -> Result<Self> {
let credentials = Arc::new(ApiKeyProvider::anthropic(api_key));
let transport = DirectTransport::anthropic(credentials)?;
Ok(Self::new(AnthropicMessagesCodec::new(), transport, model))
}
}
impl ChatModel<OpenAiChatCodec, DirectTransport> {
pub fn openai(api_key: impl Into<SecretString>, model: impl Into<String>) -> Result<Self> {
let credentials = Arc::new(BearerProvider::new(api_key));
let transport = DirectTransport::openai(credentials)?;
Ok(Self::new(OpenAiChatCodec::new(), transport, model))
}
}
impl ChatModel<GeminiCodec, DirectTransport> {
pub fn gemini(api_key: impl Into<SecretString>, model: impl Into<String>) -> Result<Self> {
let credentials = Arc::new(BearerProvider::new(api_key));
let transport = DirectTransport::gemini(credentials)?;
Ok(Self::new(GeminiCodec::new(), transport, model))
}
}