crabllm_core/
provider.rs

1use crate::{
2    AnthropicRequest, AnthropicResponse, AnthropicStreamEvent, AudioSpeechRequest,
3    ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse, EmbeddingRequest,
4    EmbeddingResponse, Error, GeminiRequest, GeminiResponse, ImageRequest, MultipartField, ir,
5};
6use bytes::Bytes;
7use futures_core::Stream;
8use std::{future::Future, pin::Pin};
9
10/// A boxed, `Send`, dynamically-typed stream — used by `Provider` so the
11/// trait can return a uniform stream type without each implementor leaking
12/// its concrete combinator chain through an associated type.
13pub type BoxStream<'a, T> = Pin<Box<dyn Stream<Item = T> + Send + 'a>>;
14
15/// Raw byte stream for SSE passthrough.
16pub type ByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, std::io::Error>> + Send>>;
17
18/// The dispatch surface every provider implementation satisfies.
19///
20/// Returns futures via RPITIT (return-position `impl Trait` in trait) so the
21/// type system can monomorphize through the trait without dyn dispatch or
22/// per-call boxing. The proxy crate is generic over `P: Provider`; the binary
23/// crate picks the concrete type by defining a workspace-level union enum
24/// that delegates each method.
25///
26/// Streaming responses use `BoxStream` because returning an opaque stream
27/// from an async-returning trait method requires a fixed type at the trait
28/// boundary; the boxing is one allocation per stream creation, not per item.
29/// **Implementors must clone any borrowed data from the request before
30/// returning the stream** — the returned `BoxStream` is `'static` and cannot
31/// borrow from the request reference.
32///
33/// The optional methods (`embedding`, `image_generation`, `audio_speech`,
34/// `audio_transcription`) default to returning `Error::not_implemented`, so
35/// concrete providers only override the methods they actually support.
36/// Overrides are free to capture `self` or the request reference — only the
37/// default impl bodies happen to capture nothing, and that's an
38/// implementation detail of the defaults, not a constraint on the trait.
39pub trait Provider: Send + Sync {
40    fn chat_completion(
41        &self,
42        request: &ChatCompletionRequest,
43    ) -> impl Future<Output = Result<ChatCompletionResponse, Error>> + Send;
44
45    fn chat_completion_stream(
46        &self,
47        request: &ChatCompletionRequest,
48    ) -> impl Future<Output = Result<BoxStream<'static, Result<ChatCompletionChunk, Error>>, Error>> + Send;
49
50    fn anthropic_messages(
51        &self,
52        request: &AnthropicRequest,
53    ) -> impl Future<Output = Result<AnthropicResponse, Error>> + Send;
54
55    fn anthropic_messages_stream(
56        &self,
57        request: &AnthropicRequest,
58    ) -> impl Future<Output = Result<BoxStream<'static, Result<AnthropicStreamEvent, Error>>, Error>>
59    + Send;
60
61    /// Gemini Generative Language API: `:generateContent`.
62    ///
63    /// Default impl converts `GeminiRequest` to canonical Anthropic IR
64    /// (filling `model` from the path), dispatches to `anthropic_messages`,
65    /// and converts the response back. Providers that natively speak Gemini
66    /// (e.g. `GoogleProvider`) override for direct dispatch.
67    fn gemini_generate_content(
68        &self,
69        model: &str,
70        request: &GeminiRequest,
71    ) -> impl Future<Output = Result<GeminiResponse, Error>> + Send {
72        async move {
73            let mut ir_req = ir::Request::from(request);
74            ir_req.model = model.to_string();
75            let ir_resp = self.complete(&ir_req).await?;
76            Ok(GeminiResponse::from(&ir_resp))
77        }
78    }
79
80    /// Gemini Generative Language API: `:streamGenerateContent`.
81    ///
82    /// Returns native `GeminiResponse` items — each SSE chunk from Google
83    /// is a full response object with a single candidate.
84    fn gemini_generate_content_stream(
85        &self,
86        model: &str,
87        request: &GeminiRequest,
88    ) -> impl Future<Output = Result<BoxStream<'static, Result<GeminiResponse, Error>>, Error>> + Send;
89
90    fn complete(
91        &self,
92        _request: &ir::Request,
93    ) -> impl Future<Output = Result<ir::Response, Error>> + Send {
94        async { Err(Error::not_implemented("complete")) }
95    }
96
97    fn complete_stream(
98        &self,
99        _request: &ir::Request,
100    ) -> impl Future<Output = Result<BoxStream<'static, Result<ir::StreamEvent, Error>>, Error>> + Send
101    {
102        async { Err(Error::not_implemented("complete_stream")) }
103    }
104
105    fn embedding(
106        &self,
107        _request: &EmbeddingRequest,
108    ) -> impl Future<Output = Result<EmbeddingResponse, Error>> + Send {
109        async { Err(Error::not_implemented("embedding")) }
110    }
111
112    fn image_generation(
113        &self,
114        _request: &ImageRequest,
115    ) -> impl Future<Output = Result<(Bytes, String), Error>> + Send {
116        async { Err(Error::not_implemented("image_generation")) }
117    }
118
119    fn audio_speech(
120        &self,
121        _request: &AudioSpeechRequest,
122    ) -> impl Future<Output = Result<(Bytes, String), Error>> + Send {
123        async { Err(Error::not_implemented("audio_speech")) }
124    }
125
126    fn audio_transcription(
127        &self,
128        _model: &str,
129        _fields: &[MultipartField],
130    ) -> impl Future<Output = Result<(Bytes, String), Error>> + Send {
131        async { Err(Error::not_implemented("audio_transcription")) }
132    }
133
134    /// Whether this provider speaks the OpenAI wire format and can forward
135    /// raw JSON bytes without deserialization.
136    fn is_openai_compat(&self) -> bool {
137        false
138    }
139
140    /// Whether this provider speaks the Anthropic wire format and can
141    /// forward raw `/v1/messages` bytes without translation.
142    fn is_anthropic_compat(&self) -> bool {
143        false
144    }
145
146    /// Whether this provider speaks the Gemini wire format and can
147    /// forward raw `:generateContent` bytes without translation.
148    fn is_gemini_compat(&self) -> bool {
149        false
150    }
151
152    /// Stream a request body directly to an OpenAI-compatible endpoint and
153    /// return the raw SSE response stream. The body is consumed — no retries.
154    fn chat_completion_stream_passthrough(
155        &self,
156        _model: &str,
157        _body_stream: crate::ByteStream,
158    ) -> impl Future<Output = Result<crate::ByteStream, Error>> + Send {
159        async { Err(Error::not_implemented("chat_completion_stream_passthrough")) }
160    }
161
162    /// Stream raw OpenAI SSE bytes from an OpenAI-compatible endpoint.
163    /// The default returns `not_implemented`. OpenAI-compatible providers
164    /// override to forward bytes without deserialization.
165    fn chat_completion_stream_raw(
166        &self,
167        _model: &str,
168        _raw_body: Bytes,
169    ) -> impl Future<Output = Result<crate::ByteStream, Error>> + Send {
170        async { Err(Error::not_implemented("chat_completion_stream_raw")) }
171    }
172
173    /// Forward raw OpenAI-format JSON body and return raw response bytes.
174    /// The default deserializes, calls [`chat_completion`](Self::chat_completion),
175    /// and re-serializes. OpenAI-compatible providers override to skip serde.
176    fn chat_completion_raw(
177        &self,
178        _model: &str,
179        raw_body: Bytes,
180    ) -> impl Future<Output = Result<Bytes, Error>> + Send {
181        async move {
182            let request: ChatCompletionRequest =
183                crate::json::from_slice(&raw_body).map_err(|e| Error::Internal(e.to_string()))?;
184            let resp = self.chat_completion(&request).await?;
185            Ok(Bytes::from(
186                crate::json::to_vec(&resp).map_err(|e| Error::Internal(e.to_string()))?,
187            ))
188        }
189    }
190
191    /// Forward raw Anthropic-format JSON body and return raw response bytes.
192    /// The default translates Anthropic → OpenAI, calls [`chat_completion`],
193    /// and translates back. The Anthropic provider overrides to skip serde.
194    fn anthropic_messages_raw(
195        &self,
196        _raw_body: Bytes,
197    ) -> impl Future<Output = Result<Bytes, Error>> + Send {
198        async { Err(Error::not_implemented("anthropic_messages_raw")) }
199    }
200
201    /// Stream raw Anthropic SSE bytes from an Anthropic-compatible endpoint.
202    fn anthropic_messages_stream_raw(
203        &self,
204        _raw_body: Bytes,
205    ) -> impl Future<Output = Result<crate::ByteStream, Error>> + Send {
206        async { Err(Error::not_implemented("anthropic_messages_stream_raw")) }
207    }
208
209    /// Forward raw Gemini-format JSON body and return raw response bytes.
210    /// Defaults to deserialize → [`gemini_generate_content`](Self::gemini_generate_content) → re-serialize.
211    /// Gemini-compatible providers override to skip serde.
212    fn gemini_generate_content_raw(
213        &self,
214        model: &str,
215        raw_body: Bytes,
216    ) -> impl Future<Output = Result<Bytes, Error>> + Send {
217        async move {
218            let request: GeminiRequest =
219                crate::json::from_slice(&raw_body).map_err(|e| Error::Internal(e.to_string()))?;
220            let resp = self.gemini_generate_content(model, &request).await?;
221            Ok(Bytes::from(
222                crate::json::to_vec(&resp).map_err(|e| Error::Internal(e.to_string()))?,
223            ))
224        }
225    }
226
227    /// Stream raw Gemini SSE bytes from a Gemini-compatible endpoint.
228    fn gemini_generate_content_stream_raw(
229        &self,
230        _model: &str,
231        _raw_body: Bytes,
232    ) -> impl Future<Output = Result<crate::ByteStream, Error>> + Send {
233        async { Err(Error::not_implemented("gemini_generate_content_stream_raw")) }
234    }
235}
crabllm_core/provider.rs

crabllm_core/
provider.rs