ferro_ai/client/mod.rs
1//! Provider-agnostic LLM client trait and request/response types.
2//!
3//! The [`LlmClient`] trait is the central abstraction — implemented by
4//! [`anthropic::AnthropicClient`], [`openai::OpenAiClient`], and [`ollama::OllamaClient`].
5//! All three are instantiable as `Box<dyn LlmClient>`.
6//!
7//! Use [`crate::config::AiConfig::from_env`] to construct the configured client from
8//! environment variables at startup.
9
10pub mod anthropic;
11pub mod ollama;
12pub mod openai;
13
14pub use anthropic::AnthropicClient;
15pub use ollama::OllamaClient;
16pub use openai::OpenAiClient;
17
18use crate::error::Error;
19use async_trait::async_trait;
20use futures::stream::BoxStream;
21
22/// Opaque stream of text tokens from a streaming LLM completion.
23///
24/// Each item is either a text token chunk (`Ok(String)`) or a provider error
25/// (`Err(Error)`). Callers consume via [`futures::StreamExt::next`].
26///
27/// `reqwest-eventsource` is NOT re-exported — this type alias hides the
28/// underlying stream implementation (D-09).
29pub type TokenStream = BoxStream<'static, Result<String, Error>>;
30
31/// Role of a message participant in a completion request.
32#[derive(Debug, Clone)]
33pub enum Role {
34 /// A message from the end user or calling code.
35 User,
36 /// A message from the assistant (used for multi-turn conversations).
37 Assistant,
38 /// A tool result message.
39 ///
40 /// Anthropic: sent as `role: "user"` with `type: "tool_result"` content.
41 /// OpenAI: sent as `role: "tool"` with `tool_call_id`.
42 Tool,
43}
44
45/// A single message in a completion conversation.
46#[derive(Debug, Clone)]
47pub struct Message {
48 /// The role of the message sender.
49 pub role: Role,
50 /// The text content of the message.
51 pub content: String,
52 /// Provider call identifier for tool results.
53 ///
54 /// Set by `ToolRegistry::result_to_message` when `role == Role::Tool`.
55 /// Anthropic places this as `tool_use_id` inside a `tool_result` content block.
56 /// OpenAI places this as the top-level `tool_call_id` field.
57 /// `None` for all non-tool messages.
58 pub tool_call_id: Option<String>,
59}
60
61/// A tool definition included in a completion request.
62///
63/// `parameters_schema` must already be normalized via
64/// `schema::for_structured_output` before being placed here (D-14).
65#[derive(Debug, Clone)]
66pub struct ToolRequest {
67 /// The tool name. Must match the name in [`crate::tools::ToolDef`].
68 pub name: String,
69 /// Human-readable description of what the tool does.
70 pub description: String,
71 /// JSON Schema for the tool's input parameters (normalized).
72 pub parameters_schema: serde_json::Value,
73}
74
75/// Controls how the LLM selects a tool when tools are available.
76#[derive(Debug, Clone)]
77pub enum ToolChoice {
78 /// The LLM decides whether to call a tool (default).
79 Auto,
80 /// The LLM must not call any tool.
81 None,
82}
83
84/// A single tool-use block returned by the LLM.
85#[derive(Debug, Clone)]
86pub struct ToolUseBlock {
87 /// Provider-assigned call identifier (used when sending back tool results).
88 pub id: String,
89 /// The tool name the LLM chose to call.
90 pub name: String,
91 /// The arguments the LLM generated for the tool call.
92 pub input: serde_json::Value,
93}
94
95/// The result of a `complete_with_tools` call.
96///
97/// Either the LLM produced a final text answer or it wants to call one or more tools.
98#[derive(Debug)]
99pub enum CompletionResponse {
100 /// The LLM produced a final text response (stop_reason "end_turn" / finish_reason "stop").
101 Text(String),
102 /// The LLM wants to invoke tools (stop_reason "tool_use" / finish_reason "tool_calls").
103 ///
104 /// Carries both the parsed tool-use blocks and the raw assistant content string.
105 /// The dispatch loop must push an `Assistant` message with `assistant_content` into
106 /// history BEFORE appending tool result messages — both Anthropic and OpenAI require
107 /// the assistant's tool-call turn to precede the corresponding tool_result messages.
108 ToolUse {
109 /// Parsed tool-use blocks to dispatch.
110 blocks: Vec<ToolUseBlock>,
111 /// Raw assistant content (JSON array string for Anthropic, or tool_calls JSON for
112 /// OpenAI). Stored verbatim so the dispatch loop can reconstruct the assistant message.
113 assistant_content: String,
114 },
115}
116
117/// Request for a text completion from an LLM provider.
118#[derive(Debug, Clone)]
119pub struct CompletionRequest {
120 /// Optional system prompt. Sent before the conversation messages.
121 pub system: Option<String>,
122 /// Conversation messages in chronological order.
123 pub messages: Vec<Message>,
124 /// Maximum number of tokens in the response.
125 pub max_tokens: u32,
126 /// Optional per-request model override.
127 ///
128 /// `None` resolves to the client's [`LlmClient::default_model`] at call time.
129 pub model_override: Option<String>,
130 /// Optional JSON schema for structured output.
131 ///
132 /// Passed through to the provider as-is. Phase 166 adds a typed
133 /// `complete::<T>()` wrapper with schemars normalization on top of this field.
134 /// With streaming + schema, tokens arrive as raw JSON fragments — callers
135 /// must accumulate before parsing (Pitfall 1).
136 pub schema: Option<serde_json::Value>,
137 /// Optional tool definitions for the tool-calling dispatch loop.
138 ///
139 /// Each entry's `parameters_schema` must be pre-normalized via
140 /// `schema::for_structured_output`. Set by `ToolRegistry::dispatch` (D-14).
141 pub tools: Option<Vec<ToolRequest>>,
142 /// Controls how the LLM selects a tool when `tools` is `Some`.
143 pub tool_choice: Option<ToolChoice>,
144}
145
146/// Provider-agnostic LLM client.
147///
148/// Implement this trait to add a new provider. All methods use `&self` so the
149/// client can be shared via `Arc<dyn LlmClient>` or `Box<dyn LlmClient>`.
150///
151/// Providers that lack a capability (e.g. Anthropic has no embeddings endpoint)
152/// return `Err(Error::Unsupported)` — they never panic.
153#[async_trait]
154pub trait LlmClient: Send + Sync {
155 /// The provider's default model identifier.
156 ///
157 /// Used when [`CompletionRequest::model_override`] is `None`. Overridable
158 /// at startup via `FERRO_AI_MODEL`.
159 fn default_model(&self) -> &str;
160
161 /// Run a non-streaming completion, returning the full response text.
162 async fn complete(&self, request: CompletionRequest) -> Result<String, Error>;
163
164 /// Run a streaming completion, returning a token stream.
165 ///
166 /// Each yielded item is a text token chunk. When `request.schema` is set,
167 /// tokens are raw JSON fragments; accumulate them before parsing.
168 async fn complete_stream(&self, request: CompletionRequest) -> Result<TokenStream, Error>;
169
170 /// Generate a text embedding vector.
171 ///
172 /// Returns `Err(Error::Unsupported)` for providers without an embeddings
173 /// endpoint (e.g. [`anthropic::AnthropicClient`]).
174 async fn embed(&self, text: &str) -> Result<Vec<f32>, Error>;
175
176 /// Run a completion that may invoke tools.
177 ///
178 /// Returns [`CompletionResponse::Text`] when the LLM produces a final answer,
179 /// or [`CompletionResponse::ToolUse`] when the LLM requests tool execution.
180 ///
181 /// The default implementation returns `Err(Error::Unsupported)` — providers
182 /// that do not support tool calling (e.g. [`ollama::OllamaClient`]) inherit this
183 /// and existing callers of `complete()` are unaffected (D-14).
184 async fn complete_with_tools(
185 &self,
186 request: CompletionRequest,
187 ) -> Result<CompletionResponse, Error> {
188 let _ = request;
189 Err(Error::Unsupported)
190 }
191}