Skip to main content

ferro_ai/client/
mod.rs

1//! Provider-agnostic LLM client trait and request/response types.
2//!
3//! The [`LlmClient`] trait is the central abstraction — implemented by
4//! [`anthropic::AnthropicClient`], [`openai::OpenAiClient`], and [`ollama::OllamaClient`].
5//! All three are instantiable as `Box<dyn LlmClient>`.
6//!
7//! Use [`crate::config::AiConfig::from_env`] to construct the configured client from
8//! environment variables at startup.
9
10pub mod anthropic;
11pub mod ollama;
12pub mod openai;
13
14pub use anthropic::AnthropicClient;
15pub use ollama::OllamaClient;
16pub use openai::OpenAiClient;
17
18use crate::error::Error;
19use async_trait::async_trait;
20use futures::stream::BoxStream;
21
22/// Opaque stream of text tokens from a streaming LLM completion.
23///
24/// Each item is either a text token chunk (`Ok(String)`) or a provider error
25/// (`Err(Error)`). Callers consume via [`futures::StreamExt::next`].
26///
27/// `reqwest-eventsource` is NOT re-exported — this type alias hides the
28/// underlying stream implementation (D-09).
29pub type TokenStream = BoxStream<'static, Result<String, Error>>;
30
31/// Role of a message participant in a completion request.
32#[derive(Debug, Clone)]
33pub enum Role {
34    /// A message from the end user or calling code.
35    User,
36    /// A message from the assistant (used for multi-turn conversations).
37    Assistant,
38    /// A tool result message.
39    ///
40    /// Anthropic: sent as `role: "user"` with `type: "tool_result"` content.
41    /// OpenAI: sent as `role: "tool"` with `tool_call_id`.
42    Tool,
43}
44
45/// A single message in a completion conversation.
46#[derive(Debug, Clone)]
47pub struct Message {
48    /// The role of the message sender.
49    pub role: Role,
50    /// The text content of the message.
51    pub content: String,
52    /// Provider call identifier for tool results.
53    ///
54    /// Set by `ToolRegistry::result_to_message` when `role == Role::Tool`.
55    /// Anthropic places this as `tool_use_id` inside a `tool_result` content block.
56    /// OpenAI places this as the top-level `tool_call_id` field.
57    /// `None` for all non-tool messages.
58    pub tool_call_id: Option<String>,
59}
60
61/// A tool definition included in a completion request.
62///
63/// `parameters_schema` must already be normalized via
64/// `schema::for_structured_output` before being placed here (D-14).
65#[derive(Debug, Clone)]
66pub struct ToolRequest {
67    /// The tool name. Must match the name in [`crate::tools::ToolDef`].
68    pub name: String,
69    /// Human-readable description of what the tool does.
70    pub description: String,
71    /// JSON Schema for the tool's input parameters (normalized).
72    pub parameters_schema: serde_json::Value,
73}
74
75/// Controls how the LLM selects a tool when tools are available.
76#[derive(Debug, Clone)]
77pub enum ToolChoice {
78    /// The LLM decides whether to call a tool (default).
79    Auto,
80    /// The LLM must not call any tool.
81    None,
82}
83
84/// A single tool-use block returned by the LLM.
85#[derive(Debug, Clone)]
86pub struct ToolUseBlock {
87    /// Provider-assigned call identifier (used when sending back tool results).
88    pub id: String,
89    /// The tool name the LLM chose to call.
90    pub name: String,
91    /// The arguments the LLM generated for the tool call.
92    pub input: serde_json::Value,
93}
94
95/// The result of a `complete_with_tools` call.
96///
97/// Either the LLM produced a final text answer or it wants to call one or more tools.
98#[derive(Debug)]
99pub enum CompletionResponse {
100    /// The LLM produced a final text response (stop_reason "end_turn" / finish_reason "stop").
101    Text(String),
102    /// The LLM wants to invoke tools (stop_reason "tool_use" / finish_reason "tool_calls").
103    ///
104    /// Carries both the parsed tool-use blocks and the raw assistant content string.
105    /// The dispatch loop must push an `Assistant` message with `assistant_content` into
106    /// history BEFORE appending tool result messages — both Anthropic and OpenAI require
107    /// the assistant's tool-call turn to precede the corresponding tool_result messages.
108    ToolUse {
109        /// Parsed tool-use blocks to dispatch.
110        blocks: Vec<ToolUseBlock>,
111        /// Raw assistant content (JSON array string for Anthropic, or tool_calls JSON for
112        /// OpenAI). Stored verbatim so the dispatch loop can reconstruct the assistant message.
113        assistant_content: String,
114    },
115}
116
117/// Request for a text completion from an LLM provider.
118#[derive(Debug, Clone)]
119pub struct CompletionRequest {
120    /// Optional system prompt. Sent before the conversation messages.
121    pub system: Option<String>,
122    /// Conversation messages in chronological order.
123    pub messages: Vec<Message>,
124    /// Maximum number of tokens in the response.
125    pub max_tokens: u32,
126    /// Optional per-request model override.
127    ///
128    /// `None` resolves to the client's [`LlmClient::default_model`] at call time.
129    pub model_override: Option<String>,
130    /// Optional JSON schema for structured output.
131    ///
132    /// Passed through to the provider as-is. Phase 166 adds a typed
133    /// `complete::<T>()` wrapper with schemars normalization on top of this field.
134    /// With streaming + schema, tokens arrive as raw JSON fragments — callers
135    /// must accumulate before parsing (Pitfall 1).
136    pub schema: Option<serde_json::Value>,
137    /// Optional tool definitions for the tool-calling dispatch loop.
138    ///
139    /// Each entry's `parameters_schema` must be pre-normalized via
140    /// `schema::for_structured_output`. Set by `ToolRegistry::dispatch` (D-14).
141    pub tools: Option<Vec<ToolRequest>>,
142    /// Controls how the LLM selects a tool when `tools` is `Some`.
143    pub tool_choice: Option<ToolChoice>,
144}
145
146/// Provider-agnostic LLM client.
147///
148/// Implement this trait to add a new provider. All methods use `&self` so the
149/// client can be shared via `Arc<dyn LlmClient>` or `Box<dyn LlmClient>`.
150///
151/// Providers that lack a capability (e.g. Anthropic has no embeddings endpoint)
152/// return `Err(Error::Unsupported)` — they never panic.
153#[async_trait]
154pub trait LlmClient: Send + Sync {
155    /// The provider's default model identifier.
156    ///
157    /// Used when [`CompletionRequest::model_override`] is `None`. Overridable
158    /// at startup via `FERRO_AI_MODEL`.
159    fn default_model(&self) -> &str;
160
161    /// Run a non-streaming completion, returning the full response text.
162    async fn complete(&self, request: CompletionRequest) -> Result<String, Error>;
163
164    /// Run a streaming completion, returning a token stream.
165    ///
166    /// Each yielded item is a text token chunk. When `request.schema` is set,
167    /// tokens are raw JSON fragments; accumulate them before parsing.
168    async fn complete_stream(&self, request: CompletionRequest) -> Result<TokenStream, Error>;
169
170    /// Generate a text embedding vector.
171    ///
172    /// Returns `Err(Error::Unsupported)` for providers without an embeddings
173    /// endpoint (e.g. [`anthropic::AnthropicClient`]).
174    async fn embed(&self, text: &str) -> Result<Vec<f32>, Error>;
175
176    /// Run a completion that may invoke tools.
177    ///
178    /// Returns [`CompletionResponse::Text`] when the LLM produces a final answer,
179    /// or [`CompletionResponse::ToolUse`] when the LLM requests tool execution.
180    ///
181    /// The default implementation returns `Err(Error::Unsupported)` — providers
182    /// that do not support tool calling (e.g. [`ollama::OllamaClient`]) inherit this
183    /// and existing callers of `complete()` are unaffected (D-14).
184    async fn complete_with_tools(
185        &self,
186        request: CompletionRequest,
187    ) -> Result<CompletionResponse, Error> {
188        let _ = request;
189        Err(Error::Unsupported)
190    }
191}