Skip to main content

sparrow_providers/
lib.rs

1// Inherit the same lint policy as the main crate (this code was extracted from
2// it). Keeps `clippy --all-targets -- -D warnings` green across the workspace.
3#![allow(
4    clippy::collapsible_if,
5    clippy::collapsible_match,
6    clippy::derivable_impls,
7    clippy::format_in_format_args,
8    clippy::if_same_then_else,
9    clippy::iter_cloned_collect,
10    clippy::manual_clamp,
11    clippy::manual_div_ceil,
12    clippy::manual_is_multiple_of,
13    clippy::manual_pattern_char_comparison,
14    clippy::needless_borrow,
15    clippy::needless_range_loop,
16    clippy::new_without_default,
17    clippy::ptr_arg,
18    clippy::should_implement_trait,
19    clippy::single_match,
20    clippy::type_complexity,
21    clippy::unnecessary_cast,
22    clippy::let_and_return,
23    clippy::useless_conversion,
24    clippy::useless_format,
25    clippy::while_let_loop
26)]
27
28use async_trait::async_trait;
29use futures::stream::Stream;
30use serde::{Deserialize, Serialize};
31use std::pin::Pin;
32
33use sparrow_core::event::{StopReason, TokenUsage};
34
35pub mod anthropic;
36pub mod discovery;
37pub mod ollama;
38pub mod openai_compat;
39pub mod responses;
40pub mod sse_buffer;
41pub mod tool_markup;
42
43// ─── Model capabilities ─────────────────────────────────────────────────────────
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct ModelCaps {
47    /// Context window size in tokens
48    pub context_window: u64,
49    /// Maximum output tokens
50    pub max_output: u64,
51    /// Whether the model supports tool calling
52    pub tools: bool,
53    /// Whether the model supports vision/image inputs
54    pub vision: bool,
55    /// Cost per million input tokens (USD)
56    pub cost_input_per_mtok: f64,
57    /// Cost per million output tokens (USD)
58    pub cost_output_per_mtok: f64,
59    /// Latency class
60    pub latency: LatencyClass,
61}
62
63impl Default for ModelCaps {
64    fn default() -> Self {
65        Self {
66            context_window: 128_000,
67            max_output: 16_000,
68            tools: true,
69            vision: false,
70            cost_input_per_mtok: 0.0,
71            cost_output_per_mtok: 0.0,
72            latency: LatencyClass::Medium,
73        }
74    }
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
78pub enum LatencyClass {
79    Fast,
80    Medium,
81    Slow,
82}
83
84// ─── Message types ──────────────────────────────────────────────────────────────
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct Msg {
88    pub role: String,
89    pub content: Vec<ContentBlock>,
90}
91
92#[derive(Debug, Clone, Serialize, Deserialize)]
93#[serde(tag = "type")]
94pub enum ContentBlock {
95    #[serde(rename = "text")]
96    Text { text: String },
97    #[serde(rename = "image")]
98    Image { source: ImageSource },
99    #[serde(rename = "tool_use")]
100    ToolUse {
101        id: String,
102        name: String,
103        input: serde_json::Value,
104    },
105    #[serde(rename = "tool_result")]
106    ToolResult {
107        tool_use_id: String,
108        content: Vec<ContentBlock>,
109        is_error: Option<bool>,
110    },
111    /// Chain-of-thought / reasoning content produced by reasoning-mode models
112    /// (DeepSeek v4 Pro / R1, OpenAI o-series via Responses API, etc.).
113    /// MUST be echoed back to the API on the next turn for some providers —
114    /// DeepSeek rejects the request with 400 "The `reasoning_content` in the
115    /// thinking mode must be passed back to the API" otherwise.
116    #[serde(rename = "reasoning")]
117    Reasoning { text: String },
118}
119
120#[derive(Debug, Clone, Serialize, Deserialize)]
121#[serde(tag = "type")]
122pub enum ImageSource {
123    #[serde(rename = "base64")]
124    Base64 { media_type: String, data: String },
125    #[serde(rename = "url")]
126    Url { url: String },
127}
128
129// ─── Tool specification (for Brain request) ─────────────────────────────────────
130
131#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct ToolSpec {
133    pub name: String,
134    pub description: String,
135    pub input_schema: serde_json::Value,
136}
137
138// ─── Prompt cache policy ───────────────────────────────────────────────────────
139
140#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
141pub enum PromptCacheTtl {
142    FiveMinutes,
143    OneHour,
144}
145
146impl PromptCacheTtl {
147    pub fn anthropic_ttl(&self) -> &'static str {
148        match self {
149            Self::FiveMinutes => "5m",
150            Self::OneHour => "1h",
151        }
152    }
153
154    pub fn openai_retention(&self) -> &'static str {
155        // OpenAI exposes `in_memory` (typically 5-10 minutes, up to one hour)
156        // and `24h`; there is no exact 1h request parameter.
157        "in_memory"
158    }
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
162pub struct PromptCacheConfig {
163    pub enabled: bool,
164    pub ttl: PromptCacheTtl,
165    pub key: Option<String>,
166}
167
168impl PromptCacheConfig {
169    pub fn enabled(key: Option<String>) -> Self {
170        Self {
171            enabled: true,
172            ttl: PromptCacheTtl::OneHour,
173            key: key.into(),
174        }
175    }
176
177    pub fn disabled() -> Self {
178        Self {
179            enabled: false,
180            ttl: PromptCacheTtl::FiveMinutes,
181            key: None,
182        }
183    }
184}
185
186impl Default for PromptCacheConfig {
187    fn default() -> Self {
188        Self::disabled()
189    }
190}
191
192// ─── Brain request ──────────────────────────────────────────────────────────────
193
194#[derive(Debug, Clone)]
195pub struct BrainRequest {
196    pub system: Option<String>,
197    pub messages: Vec<Msg>,
198    pub tools: Vec<ToolSpec>,
199    pub max_tokens: u32,
200    pub temperature: f32,
201    pub stop: Vec<String>,
202    pub cache: PromptCacheConfig,
203}
204
205impl Default for BrainRequest {
206    fn default() -> Self {
207        Self {
208            system: None,
209            messages: vec![],
210            tools: vec![],
211            max_tokens: 4096,
212            temperature: 0.0,
213            stop: vec![],
214            cache: PromptCacheConfig::default(),
215        }
216    }
217}
218
219// ─── Brain events (unified stream) ──────────────────────────────────────────────
220
221#[derive(Debug, Clone)]
222pub enum BrainEvent {
223    TextDelta(String),
224    /// Reasoning / chain-of-thought delta (DeepSeek `reasoning_content`,
225    /// OpenAI Responses reasoning summaries, …). Must be re-sent on the
226    /// next turn for providers that require it.
227    ReasoningDelta(String),
228    ToolUseStart {
229        id: String,
230        name: String,
231    },
232    ToolUseDelta {
233        id: String,
234        json: String,
235    },
236    ToolUseEnd {
237        id: String,
238    },
239    Usage(TokenUsage),
240    Done(StopReason),
241    Error(String),
242}
243
244// ─── Brain stream ───────────────────────────────────────────────────────────────
245
246pub type BrainStream = Pin<Box<dyn Stream<Item = BrainEvent> + Send>>;
247
248// ─── THE BRAIN TRAIT ────────────────────────────────────────────────────────────
249
250/// Uniform interface over every model vendor.
251/// Normalizes messages, streaming, and tool-calling so the rest of the system is vendor-agnostic.
252#[async_trait]
253pub trait Brain: Send + Sync {
254    /// Full provider:model identifier, e.g. "anthropic:claude-sonnet-4-6"
255    fn id(&self) -> &str;
256    /// Model capabilities
257    fn caps(&self) -> ModelCaps;
258    /// Stream a completion
259    async fn complete(&self, req: BrainRequest) -> anyhow::Result<BrainStream>;
260}
261
262// ─── Brain error ────────────────────────────────────────────────────────────────
263
264#[derive(Debug, Clone)]
265pub enum BrainError {
266    RateLimit { retry_after: Option<u64> },
267    ServerError { status: u16, body: String },
268    Timeout,
269    Refusal(String),
270    Unknown(String),
271}
272
273impl std::fmt::Display for BrainError {
274    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275        match self {
276            BrainError::RateLimit { retry_after } => {
277                write!(f, "rate limited (retry after {:?}s)", retry_after)
278            }
279            BrainError::ServerError { status, body } => {
280                write!(f, "server error {}: {}", status, body)
281            }
282            BrainError::Timeout => write!(f, "timeout"),
283            BrainError::Refusal(msg) => write!(f, "refusal: {}", msg),
284            BrainError::Unknown(msg) => write!(f, "unknown: {}", msg),
285        }
286    }
287}