Skip to main content

sparrow/provider/
mod.rs

1use async_trait::async_trait;
2use futures::stream::Stream;
3use serde::{Deserialize, Serialize};
4use std::pin::Pin;
5
6use crate::event::{StopReason, TokenUsage};
7
8pub mod anthropic;
9pub mod detect;
10pub mod discovery;
11pub mod ollama;
12pub mod openai_compat;
13pub mod responses;
14pub mod sse_buffer;
15pub mod tool_markup;
16
17// ─── Model capabilities ─────────────────────────────────────────────────────────
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct ModelCaps {
21    /// Context window size in tokens
22    pub context_window: u64,
23    /// Maximum output tokens
24    pub max_output: u64,
25    /// Whether the model supports tool calling
26    pub tools: bool,
27    /// Whether the model supports vision/image inputs
28    pub vision: bool,
29    /// Cost per million input tokens (USD)
30    pub cost_input_per_mtok: f64,
31    /// Cost per million output tokens (USD)
32    pub cost_output_per_mtok: f64,
33    /// Latency class
34    pub latency: LatencyClass,
35}
36
37impl Default for ModelCaps {
38    fn default() -> Self {
39        Self {
40            context_window: 128_000,
41            max_output: 16_000,
42            tools: true,
43            vision: false,
44            cost_input_per_mtok: 0.0,
45            cost_output_per_mtok: 0.0,
46            latency: LatencyClass::Medium,
47        }
48    }
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
52pub enum LatencyClass {
53    Fast,
54    Medium,
55    Slow,
56}
57
58// ─── Message types ──────────────────────────────────────────────────────────────
59
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct Msg {
62    pub role: String,
63    pub content: Vec<ContentBlock>,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
67#[serde(tag = "type")]
68pub enum ContentBlock {
69    #[serde(rename = "text")]
70    Text { text: String },
71    #[serde(rename = "image")]
72    Image { source: ImageSource },
73    #[serde(rename = "tool_use")]
74    ToolUse {
75        id: String,
76        name: String,
77        input: serde_json::Value,
78    },
79    #[serde(rename = "tool_result")]
80    ToolResult {
81        tool_use_id: String,
82        content: Vec<ContentBlock>,
83        is_error: Option<bool>,
84    },
85    /// Chain-of-thought / reasoning content produced by reasoning-mode models
86    /// (DeepSeek v4 Pro / R1, OpenAI o-series via Responses API, etc.).
87    /// MUST be echoed back to the API on the next turn for some providers —
88    /// DeepSeek rejects the request with 400 "The `reasoning_content` in the
89    /// thinking mode must be passed back to the API" otherwise.
90    #[serde(rename = "reasoning")]
91    Reasoning { text: String },
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize)]
95#[serde(tag = "type")]
96pub enum ImageSource {
97    #[serde(rename = "base64")]
98    Base64 { media_type: String, data: String },
99    #[serde(rename = "url")]
100    Url { url: String },
101}
102
103// ─── Tool specification (for Brain request) ─────────────────────────────────────
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct ToolSpec {
107    pub name: String,
108    pub description: String,
109    pub input_schema: serde_json::Value,
110}
111
112// ─── Prompt cache policy ───────────────────────────────────────────────────────
113
114#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
115pub enum PromptCacheTtl {
116    FiveMinutes,
117    OneHour,
118}
119
120impl PromptCacheTtl {
121    pub fn anthropic_ttl(&self) -> &'static str {
122        match self {
123            Self::FiveMinutes => "5m",
124            Self::OneHour => "1h",
125        }
126    }
127
128    pub fn openai_retention(&self) -> &'static str {
129        // OpenAI exposes `in_memory` (typically 5-10 minutes, up to one hour)
130        // and `24h`; there is no exact 1h request parameter.
131        "in_memory"
132    }
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
136pub struct PromptCacheConfig {
137    pub enabled: bool,
138    pub ttl: PromptCacheTtl,
139    pub key: Option<String>,
140}
141
142impl PromptCacheConfig {
143    pub fn enabled(key: Option<String>) -> Self {
144        Self {
145            enabled: true,
146            ttl: PromptCacheTtl::OneHour,
147            key: key.into(),
148        }
149    }
150
151    pub fn disabled() -> Self {
152        Self {
153            enabled: false,
154            ttl: PromptCacheTtl::FiveMinutes,
155            key: None,
156        }
157    }
158}
159
160impl Default for PromptCacheConfig {
161    fn default() -> Self {
162        Self::enabled(None)
163    }
164}
165
166// ─── Brain request ──────────────────────────────────────────────────────────────
167
168#[derive(Debug, Clone)]
169pub struct BrainRequest {
170    pub system: Option<String>,
171    pub messages: Vec<Msg>,
172    pub tools: Vec<ToolSpec>,
173    pub max_tokens: u32,
174    pub temperature: f32,
175    pub stop: Vec<String>,
176    pub cache: PromptCacheConfig,
177}
178
179impl Default for BrainRequest {
180    fn default() -> Self {
181        Self {
182            system: None,
183            messages: vec![],
184            tools: vec![],
185            max_tokens: 4096,
186            temperature: 0.0,
187            stop: vec![],
188            cache: PromptCacheConfig::default(),
189        }
190    }
191}
192
193// ─── Brain events (unified stream) ──────────────────────────────────────────────
194
195#[derive(Debug, Clone)]
196pub enum BrainEvent {
197    TextDelta(String),
198    /// Reasoning / chain-of-thought delta (DeepSeek `reasoning_content`,
199    /// OpenAI Responses reasoning summaries, …). Must be re-sent on the
200    /// next turn for providers that require it.
201    ReasoningDelta(String),
202    ToolUseStart {
203        id: String,
204        name: String,
205    },
206    ToolUseDelta {
207        id: String,
208        json: String,
209    },
210    ToolUseEnd {
211        id: String,
212    },
213    Usage(TokenUsage),
214    Done(StopReason),
215    Error(String),
216}
217
218// ─── Brain stream ───────────────────────────────────────────────────────────────
219
220pub type BrainStream = Pin<Box<dyn Stream<Item = BrainEvent> + Send>>;
221
222// ─── THE BRAIN TRAIT ────────────────────────────────────────────────────────────
223
224/// Uniform interface over every model vendor.
225/// Normalizes messages, streaming, and tool-calling so the rest of the system is vendor-agnostic.
226#[async_trait]
227pub trait Brain: Send + Sync {
228    /// Full provider:model identifier, e.g. "anthropic:claude-sonnet-4-6"
229    fn id(&self) -> &str;
230    /// Model capabilities
231    fn caps(&self) -> ModelCaps;
232    /// Stream a completion
233    async fn complete(&self, req: BrainRequest) -> anyhow::Result<BrainStream>;
234}
235
236// ─── Brain error ────────────────────────────────────────────────────────────────
237
238#[derive(Debug, Clone)]
239pub enum BrainError {
240    RateLimit { retry_after: Option<u64> },
241    ServerError { status: u16, body: String },
242    Timeout,
243    Refusal(String),
244    Unknown(String),
245}
246
247impl std::fmt::Display for BrainError {
248    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
249        match self {
250            BrainError::RateLimit { retry_after } => {
251                write!(f, "rate limited (retry after {:?}s)", retry_after)
252            }
253            BrainError::ServerError { status, body } => {
254                write!(f, "server error {}: {}", status, body)
255            }
256            BrainError::Timeout => write!(f, "timeout"),
257            BrainError::Refusal(msg) => write!(f, "refusal: {}", msg),
258            BrainError::Unknown(msg) => write!(f, "unknown: {}", msg),
259        }
260    }
261}