Skip to main content

sparrow/provider/
mod.rs

1use async_trait::async_trait;
2use futures::stream::Stream;
3use serde::{Deserialize, Serialize};
4use std::pin::Pin;
5
6use crate::event::{StopReason, TokenUsage};
7
8pub mod anthropic;
9pub mod detect;
10pub mod discovery;
11pub mod ollama;
12pub mod openai_compat;
13pub mod responses;
14pub mod sse_buffer;
15
16// ─── Model capabilities ─────────────────────────────────────────────────────────
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct ModelCaps {
20    /// Context window size in tokens
21    pub context_window: u64,
22    /// Maximum output tokens
23    pub max_output: u64,
24    /// Whether the model supports tool calling
25    pub tools: bool,
26    /// Whether the model supports vision/image inputs
27    pub vision: bool,
28    /// Cost per million input tokens (USD)
29    pub cost_input_per_mtok: f64,
30    /// Cost per million output tokens (USD)
31    pub cost_output_per_mtok: f64,
32    /// Latency class
33    pub latency: LatencyClass,
34}
35
36impl Default for ModelCaps {
37    fn default() -> Self {
38        Self {
39            context_window: 128_000,
40            max_output: 16_000,
41            tools: true,
42            vision: false,
43            cost_input_per_mtok: 0.0,
44            cost_output_per_mtok: 0.0,
45            latency: LatencyClass::Medium,
46        }
47    }
48}
49
50#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
51pub enum LatencyClass {
52    Fast,
53    Medium,
54    Slow,
55}
56
57// ─── Message types ──────────────────────────────────────────────────────────────
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct Msg {
61    pub role: String,
62    pub content: Vec<ContentBlock>,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
66#[serde(tag = "type")]
67pub enum ContentBlock {
68    #[serde(rename = "text")]
69    Text { text: String },
70    #[serde(rename = "image")]
71    Image { source: ImageSource },
72    #[serde(rename = "tool_use")]
73    ToolUse {
74        id: String,
75        name: String,
76        input: serde_json::Value,
77    },
78    #[serde(rename = "tool_result")]
79    ToolResult {
80        tool_use_id: String,
81        content: Vec<ContentBlock>,
82        is_error: Option<bool>,
83    },
84    /// Chain-of-thought / reasoning content produced by reasoning-mode models
85    /// (DeepSeek v4 Pro / R1, OpenAI o-series via Responses API, etc.).
86    /// MUST be echoed back to the API on the next turn for some providers —
87    /// DeepSeek rejects the request with 400 "The `reasoning_content` in the
88    /// thinking mode must be passed back to the API" otherwise.
89    #[serde(rename = "reasoning")]
90    Reasoning { text: String },
91}
92
93#[derive(Debug, Clone, Serialize, Deserialize)]
94#[serde(tag = "type")]
95pub enum ImageSource {
96    #[serde(rename = "base64")]
97    Base64 { media_type: String, data: String },
98    #[serde(rename = "url")]
99    Url { url: String },
100}
101
102// ─── Tool specification (for Brain request) ─────────────────────────────────────
103
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub struct ToolSpec {
106    pub name: String,
107    pub description: String,
108    pub input_schema: serde_json::Value,
109}
110
111// ─── Prompt cache policy ───────────────────────────────────────────────────────
112
113#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
114pub enum PromptCacheTtl {
115    FiveMinutes,
116    OneHour,
117}
118
119impl PromptCacheTtl {
120    pub fn anthropic_ttl(&self) -> &'static str {
121        match self {
122            Self::FiveMinutes => "5m",
123            Self::OneHour => "1h",
124        }
125    }
126
127    pub fn openai_retention(&self) -> &'static str {
128        // OpenAI exposes `in_memory` (typically 5-10 minutes, up to one hour)
129        // and `24h`; there is no exact 1h request parameter.
130        "in_memory"
131    }
132}
133
134#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
135pub struct PromptCacheConfig {
136    pub enabled: bool,
137    pub ttl: PromptCacheTtl,
138    pub key: Option<String>,
139}
140
141impl PromptCacheConfig {
142    pub fn enabled(key: Option<String>) -> Self {
143        Self {
144            enabled: true,
145            ttl: PromptCacheTtl::OneHour,
146            key: key.into(),
147        }
148    }
149
150    pub fn disabled() -> Self {
151        Self {
152            enabled: false,
153            ttl: PromptCacheTtl::FiveMinutes,
154            key: None,
155        }
156    }
157}
158
159impl Default for PromptCacheConfig {
160    fn default() -> Self {
161        Self::enabled(None)
162    }
163}
164
165// ─── Brain request ──────────────────────────────────────────────────────────────
166
167#[derive(Debug, Clone)]
168pub struct BrainRequest {
169    pub system: Option<String>,
170    pub messages: Vec<Msg>,
171    pub tools: Vec<ToolSpec>,
172    pub max_tokens: u32,
173    pub temperature: f32,
174    pub stop: Vec<String>,
175    pub cache: PromptCacheConfig,
176}
177
178impl Default for BrainRequest {
179    fn default() -> Self {
180        Self {
181            system: None,
182            messages: vec![],
183            tools: vec![],
184            max_tokens: 4096,
185            temperature: 0.0,
186            stop: vec![],
187            cache: PromptCacheConfig::default(),
188        }
189    }
190}
191
192// ─── Brain events (unified stream) ──────────────────────────────────────────────
193
194#[derive(Debug, Clone)]
195pub enum BrainEvent {
196    TextDelta(String),
197    /// Reasoning / chain-of-thought delta (DeepSeek `reasoning_content`,
198    /// OpenAI Responses reasoning summaries, …). Must be re-sent on the
199    /// next turn for providers that require it.
200    ReasoningDelta(String),
201    ToolUseStart {
202        id: String,
203        name: String,
204    },
205    ToolUseDelta {
206        id: String,
207        json: String,
208    },
209    ToolUseEnd {
210        id: String,
211    },
212    Usage(TokenUsage),
213    Done(StopReason),
214    Error(String),
215}
216
217// ─── Brain stream ───────────────────────────────────────────────────────────────
218
219pub type BrainStream = Pin<Box<dyn Stream<Item = BrainEvent> + Send>>;
220
221// ─── THE BRAIN TRAIT ────────────────────────────────────────────────────────────
222
223/// Uniform interface over every model vendor.
224/// Normalizes messages, streaming, and tool-calling so the rest of the system is vendor-agnostic.
225#[async_trait]
226pub trait Brain: Send + Sync {
227    /// Full provider:model identifier, e.g. "anthropic:claude-sonnet-4-6"
228    fn id(&self) -> &str;
229    /// Model capabilities
230    fn caps(&self) -> ModelCaps;
231    /// Stream a completion
232    async fn complete(&self, req: BrainRequest) -> anyhow::Result<BrainStream>;
233}
234
235// ─── Brain error ────────────────────────────────────────────────────────────────
236
237#[derive(Debug, Clone)]
238pub enum BrainError {
239    RateLimit { retry_after: Option<u64> },
240    ServerError { status: u16, body: String },
241    Timeout,
242    Refusal(String),
243    Unknown(String),
244}
245
246impl std::fmt::Display for BrainError {
247    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
248        match self {
249            BrainError::RateLimit { retry_after } => {
250                write!(f, "rate limited (retry after {:?}s)", retry_after)
251            }
252            BrainError::ServerError { status, body } => {
253                write!(f, "server error {}: {}", status, body)
254            }
255            BrainError::Timeout => write!(f, "timeout"),
256            BrainError::Refusal(msg) => write!(f, "refusal: {}", msg),
257            BrainError::Unknown(msg) => write!(f, "unknown: {}", msg),
258        }
259    }
260}