multi_llm/provider.rs
1//! Provider trait and types for LLM abstraction.
2//!
3//! This module defines the [`LlmProvider`] trait that all LLM providers implement,
4//! along with request/response types, tool definitions, and configuration.
5//!
6//! # Overview
7//!
8//! The provider abstraction allows you to:
9//! - Switch between LLM providers without changing your application code
10//! - Use a consistent interface for all LLM operations
11//! - Access provider-specific features (like caching) through unified APIs
12//!
13//! # Provider Trait
14//!
15//! All providers implement [`LlmProvider`], which provides:
16//! - [`execute_llm()`](LlmProvider::execute_llm): Execute a standard LLM request
17//! - [`execute_structured_llm()`](LlmProvider::execute_structured_llm): Execute with JSON schema output
18//! - [`provider_name()`](LlmProvider::provider_name): Get the provider identifier
19//!
20//! # Tool Calling
21//!
22//! Define tools with [`Tool`] and handle the calling flow:
23//!
24//! ```rust
25//! use multi_llm::{Tool, ToolChoice, ToolCall, ToolResult};
26//!
27//! // Define a tool
28//! let weather_tool = Tool {
29//! name: "get_weather".to_string(),
30//! description: "Get current weather for a city".to_string(),
31//! parameters: serde_json::json!({
32//! "type": "object",
33//! "properties": {
34//! "city": {"type": "string", "description": "City name"}
35//! },
36//! "required": ["city"]
37//! }),
38//! };
39//!
40//! // Handle a tool call from the LLM
41//! let tool_call = ToolCall {
42//! id: "call_123".to_string(),
43//! name: "get_weather".to_string(),
44//! arguments: serde_json::json!({"city": "London"}),
45//! };
46//!
47//! // Return the result
48//! let result = ToolResult {
49//! tool_call_id: "call_123".to_string(),
50//! content: "Sunny, 22°C".to_string(),
51//! is_error: false,
52//! error_category: None,
53//! };
54//! ```
55//!
56//! # Response Structure
57//!
58//! All providers return a [`Response`] containing:
59//! - Text content (for standard requests)
60//! - Structured JSON (when using `execute_structured_llm`)
61//! - Tool calls (when the model wants to call functions)
62//! - Token usage statistics
63
64use crate::error::UserErrorCategory;
65#[cfg(feature = "events")]
66use crate::internals::events::{BusinessEvent, EventScope};
67use crate::messages::{UnifiedLLMRequest, UnifiedMessage};
68use serde::{Deserialize, Serialize};
69
70/// Result type alias for provider operations.
71///
72/// Uses [`LlmError`](crate::LlmError) for structured error handling with
73/// rich metadata (categories, retry info, user messages).
74///
75/// See [`LlmError`](crate::LlmError) for available error variants and helper methods:
76/// - [`is_retryable()`](crate::LlmError::is_retryable): Check if retry makes sense
77/// - [`category()`](crate::LlmError::category): Get error category for routing
78/// - [`user_message()`](crate::LlmError::user_message): Get safe user-facing message
79pub type Result<T> = std::result::Result<T, crate::LlmError>;
80
81/// Definition of a tool/function that the LLM can call.
82///
83/// Tools allow LLMs to perform actions by generating structured calls that your
84/// application executes. The LLM sees the tool's name, description, and parameter
85/// schema to understand when and how to use it.
86///
87/// # Example
88///
89/// ```rust
90/// use multi_llm::Tool;
91///
92/// let search_tool = Tool {
93/// name: "web_search".to_string(),
94/// description: "Search the web for information".to_string(),
95/// parameters: serde_json::json!({
96/// "type": "object",
97/// "properties": {
98/// "query": {
99/// "type": "string",
100/// "description": "The search query"
101/// },
102/// "max_results": {
103/// "type": "integer",
104/// "description": "Maximum results to return",
105/// "default": 10
106/// }
107/// },
108/// "required": ["query"]
109/// }),
110/// };
111/// ```
112///
113/// # Parameter Schema
114///
115/// The `parameters` field should be a valid JSON Schema object describing the
116/// tool's input. Use `type`, `properties`, `required`, and `description` fields
117/// to help the LLM understand how to call your tool correctly.
118#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
119pub struct Tool {
120 /// Tool name - must be unique within a request.
121 ///
122 /// Use descriptive names like "get_weather" or "search_documents".
123 /// This is how the LLM identifies which tool to call.
124 pub name: String,
125
126 /// Human-readable description of what the tool does.
127 ///
128 /// Be specific about capabilities and limitations. This helps the LLM
129 /// decide when to use this tool vs. others.
130 pub description: String,
131
132 /// JSON Schema defining the tool's input parameters.
133 ///
134 /// Should be a JSON Schema object with `type: "object"` and `properties`
135 /// describing each parameter. Include `description` for each property.
136 pub parameters: serde_json::Value,
137}
138
139/// A tool call generated by the LLM.
140///
141/// When the LLM decides to use a tool, it generates a `ToolCall` with:
142/// - A unique ID to match with the response
143/// - The tool name to invoke
144/// - Arguments parsed from the conversation
145///
146/// Your application should:
147/// 1. Execute the tool with the provided arguments
148/// 2. Return a [`ToolResult`] with the matching `tool_call_id`
149/// 3. Continue the conversation so the LLM can use the result
150///
151/// # Example
152///
153/// ```rust
154/// use multi_llm::ToolCall;
155///
156/// // Received from LLM response
157/// let call = ToolCall {
158/// id: "call_abc123".to_string(),
159/// name: "get_weather".to_string(),
160/// arguments: serde_json::json!({"city": "Paris", "units": "celsius"}),
161/// };
162///
163/// // Parse and execute
164/// let city = call.arguments["city"].as_str().unwrap();
165/// // ... execute weather lookup ...
166/// ```
167#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
168pub struct ToolCall {
169 /// Unique identifier for this tool call (generated by the LLM).
170 ///
171 /// Use this ID when returning the [`ToolResult`] to match the response
172 /// with the original call.
173 pub id: String,
174
175 /// Name of the tool to call.
176 ///
177 /// Must match a [`Tool::name`] from the request's tool list.
178 pub name: String,
179
180 /// Arguments to pass to the tool as JSON.
181 ///
182 /// Structure matches the `parameters` schema defined in the [`Tool`].
183 /// May be an empty object `{}` if the tool has no required parameters.
184 pub arguments: serde_json::Value,
185}
186
187/// Result from executing a tool, sent back to the LLM.
188///
189/// After executing a [`ToolCall`], create a `ToolResult` to send back.
190/// The LLM will use this information to continue the conversation.
191///
192/// # Example
193///
194/// ```rust
195/// use multi_llm::ToolResult;
196///
197/// // Successful result
198/// let success = ToolResult {
199/// tool_call_id: "call_abc123".to_string(),
200/// content: "Weather in Paris: Sunny, 18°C".to_string(),
201/// is_error: false,
202/// error_category: None,
203/// };
204///
205/// // Error result
206/// use multi_llm::error::UserErrorCategory;
207/// let error = ToolResult {
208/// tool_call_id: "call_xyz789".to_string(),
209/// content: "City not found".to_string(),
210/// is_error: true,
211/// error_category: Some(UserErrorCategory::NotFound),
212/// };
213/// ```
214#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
215pub struct ToolResult {
216 /// ID of the tool call this result responds to.
217 ///
218 /// Must match the `id` from the corresponding [`ToolCall`].
219 pub tool_call_id: String,
220
221 /// Result content from the tool execution.
222 ///
223 /// For successful calls, this is the tool's output (often a string or JSON).
224 /// For errors, this should describe what went wrong.
225 pub content: String,
226
227 /// Whether the tool execution failed.
228 ///
229 /// When `true`, the LLM knows the tool didn't work and may try alternatives.
230 pub is_error: bool,
231
232 /// Error category for structured error handling.
233 ///
234 /// Only meaningful when `is_error` is `true`. Helps applications handle
235 /// different error types appropriately.
236 pub error_category: Option<UserErrorCategory>,
237}
238
239/// Strategy for how the LLM should handle tool selection.
240///
241/// Controls whether the LLM must use tools, can choose to use them, or is
242/// restricted from using them.
243///
244/// # Example
245///
246/// ```rust
247/// use multi_llm::{RequestConfig, ToolChoice};
248///
249/// // Let the model decide
250/// let config = RequestConfig {
251/// tool_choice: Some(ToolChoice::Auto),
252/// ..Default::default()
253/// };
254///
255/// // Force a specific tool
256/// let config = RequestConfig {
257/// tool_choice: Some(ToolChoice::Specific("get_weather".to_string())),
258/// ..Default::default()
259/// };
260/// ```
261#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
262pub enum ToolChoice {
263 /// Let the model decide whether and which tools to use.
264 ///
265 /// The model may call zero, one, or multiple tools based on the conversation.
266 #[default]
267 Auto,
268
269 /// Disable tool use entirely for this request.
270 ///
271 /// Even if tools are defined, the model will not call them.
272 None,
273
274 /// Force the model to use at least one tool.
275 ///
276 /// Useful when you know a tool call is needed but don't care which one.
277 Required,
278
279 /// Force the model to use a specific tool by name.
280 ///
281 /// The model will call exactly this tool (if arguments can be determined).
282 Specific(String),
283}
284
285/// Configuration for a single LLM request.
286///
287/// Override default provider settings on a per-request basis. All fields are
288/// optional - unset fields use the provider's defaults.
289///
290/// # Basic Usage
291///
292/// ```rust
293/// use multi_llm::RequestConfig;
294///
295/// let config = RequestConfig {
296/// temperature: Some(0.7),
297/// max_tokens: Some(1000),
298/// ..Default::default()
299/// };
300/// ```
301///
302/// # With Tools
303///
304/// ```rust
305/// use multi_llm::{RequestConfig, Tool, ToolChoice};
306///
307/// let weather_tool = Tool {
308/// name: "get_weather".to_string(),
309/// description: "Get weather for a city".to_string(),
310/// parameters: serde_json::json!({"type": "object", "properties": {}}),
311/// };
312///
313/// let config = RequestConfig {
314/// tools: vec![weather_tool],
315/// tool_choice: Some(ToolChoice::Auto),
316/// ..Default::default()
317/// };
318/// ```
319///
320/// # Sampling Parameters
321///
322/// | Parameter | Range | Effect |
323/// |-----------|-------|--------|
324/// | `temperature` | 0.0-2.0 | Randomness (0=deterministic, 2=very random) |
325/// | `top_p` | 0.0-1.0 | Nucleus sampling threshold |
326/// | `top_k` | 1+ | Limit vocab to top K tokens |
327/// | `presence_penalty` | -2.0-2.0 | Discourage repetition |
328#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
329pub struct RequestConfig {
330 // Standard LLM parameters
331 /// Temperature for response randomness.
332 ///
333 /// - `0.0`: Deterministic (always pick most likely token)
334 /// - `0.7`: Balanced (good default for most tasks)
335 /// - `1.0+`: More creative/random
336 ///
337 /// Range: 0.0 to 2.0 (provider-dependent)
338 pub temperature: Option<f64>,
339
340 /// Maximum tokens to generate in the response.
341 ///
342 /// Limits response length. The actual response may be shorter if the
343 /// model completes its thought naturally.
344 pub max_tokens: Option<u32>,
345
346 /// Top-p (nucleus) sampling parameter.
347 ///
348 /// Only consider tokens whose cumulative probability exceeds this threshold.
349 /// Lower values = more focused, higher values = more diverse.
350 /// Range: 0.0 to 1.0 (typically 0.9-0.95)
351 pub top_p: Option<f64>,
352
353 /// Top-k sampling parameter.
354 ///
355 /// Only consider the top K most likely tokens at each step.
356 /// Lower values = more focused. Not all providers support this.
357 pub top_k: Option<u32>,
358
359 /// Min-p sampling parameter.
360 ///
361 /// Filter tokens below this probability relative to the top token.
362 /// Range: 0.0 to 1.0. Not all providers support this.
363 pub min_p: Option<f64>,
364
365 /// Presence penalty to discourage repetition.
366 ///
367 /// Positive values reduce likelihood of repeating tokens that have appeared.
368 /// Range: -2.0 to 2.0 (typically 0.0 to 1.0)
369 pub presence_penalty: Option<f64>,
370
371 /// Response format for structured output.
372 ///
373 /// When set, the model attempts to return JSON matching the schema.
374 /// Use with [`LlmProvider::execute_structured_llm()`] for best results.
375 pub response_format: Option<ResponseFormat>,
376
377 // Tool-specific configuration
378 /// Tools available for this request.
379 ///
380 /// Define functions the LLM can call. See [`Tool`] for details.
381 pub tools: Vec<Tool>,
382
383 /// Strategy for tool selection.
384 ///
385 /// Controls whether tools are optional, required, or disabled.
386 /// See [`ToolChoice`] for options.
387 pub tool_choice: Option<ToolChoice>,
388
389 // Context metadata for logging and analytics
390 /// User ID for analytics and cache analysis.
391 ///
392 /// Helps track cache hit rates per user and debug user-specific issues.
393 pub user_id: Option<String>,
394
395 /// Session ID for session-level analytics.
396 ///
397 /// Track cache performance and behavior within a conversation session.
398 pub session_id: Option<String>,
399
400 /// LLM path context for distinguishing call types.
401 ///
402 /// Useful when your application has multiple LLM call paths
403 /// (e.g., "chat", "analysis", "summarization").
404 pub llm_path: Option<String>,
405}
406
407/// Schema specification for structured JSON output.
408///
409/// When you need the LLM to return data in a specific JSON format, define
410/// a `ResponseFormat` with a JSON Schema. The model will attempt to conform
411/// its output to this schema.
412///
413/// # Example
414///
415/// ```rust
416/// use multi_llm::ResponseFormat;
417///
418/// let format = ResponseFormat {
419/// name: "person_info".to_string(),
420/// schema: serde_json::json!({
421/// "type": "object",
422/// "properties": {
423/// "name": {"type": "string"},
424/// "age": {"type": "integer"},
425/// "email": {"type": "string", "format": "email"}
426/// },
427/// "required": ["name", "age"]
428/// }),
429/// };
430/// ```
431#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
432pub struct ResponseFormat {
433 /// Name identifier for this schema.
434 ///
435 /// Used for logging and debugging. Should be descriptive of the expected output.
436 pub name: String,
437
438 /// JSON Schema specification defining the expected output structure.
439 ///
440 /// The model will attempt to return JSON that validates against this schema.
441 /// Include `type`, `properties`, `required`, and `description` fields.
442 pub schema: serde_json::Value,
443}
444
445/// Token usage statistics for an LLM request.
446///
447/// Tracks how many tokens were consumed by the prompt and completion,
448/// useful for cost estimation and monitoring context window usage.
449///
450/// # Cost Estimation
451///
452/// Most providers charge per token. Multiply token counts by the provider's
453/// per-token rate to estimate costs:
454///
455/// ```rust
456/// use multi_llm::TokenUsage;
457///
458/// let usage = TokenUsage {
459/// prompt_tokens: 1000,
460/// completion_tokens: 500,
461/// total_tokens: 1500,
462/// };
463///
464/// // Example: OpenAI GPT-4 pricing (illustrative)
465/// let prompt_cost = usage.prompt_tokens as f64 * 0.00003;
466/// let completion_cost = usage.completion_tokens as f64 * 0.00006;
467/// let total_cost = prompt_cost + completion_cost;
468/// ```
469#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
470pub struct TokenUsage {
471 /// Number of tokens in the prompt/input.
472 ///
473 /// Includes system messages, conversation history, and the current query.
474 pub prompt_tokens: u32,
475
476 /// Number of tokens in the completion/output.
477 ///
478 /// The generated response from the model.
479 pub completion_tokens: u32,
480
481 /// Total tokens used (prompt + completion).
482 ///
483 /// Should equal `prompt_tokens + completion_tokens`.
484 pub total_tokens: u32,
485}
486
487/// Response from an LLM operation.
488///
489/// Contains the model's output along with metadata about the request.
490/// Check `tool_calls` first - if non-empty, the model wants to call functions
491/// rather than provide a final response.
492///
493/// # Basic Response
494///
495/// ```rust,no_run
496/// use multi_llm::Response;
497///
498/// # fn example(response: Response) {
499/// // Standard text response
500/// println!("Response: {}", response.content);
501///
502/// // Check token usage
503/// if let Some(usage) = &response.usage {
504/// println!("Used {} tokens", usage.total_tokens);
505/// }
506/// # }
507/// ```
508///
509/// # Tool Calling Response
510///
511/// ```rust,no_run
512/// use multi_llm::Response;
513///
514/// # fn example(response: Response) {
515/// // Check if model wants to call tools
516/// if !response.tool_calls.is_empty() {
517/// for call in &response.tool_calls {
518/// println!("Tool: {} with args: {}", call.name, call.arguments);
519/// // Execute tool and return result...
520/// }
521/// }
522/// # }
523/// ```
524///
525/// # Structured Response
526///
527/// ```rust,no_run
528/// use multi_llm::Response;
529///
530/// # fn example(response: Response) {
531/// // When using execute_structured_llm
532/// if let Some(json) = &response.structured_response {
533/// let name = json["name"].as_str().unwrap_or("unknown");
534/// println!("Extracted name: {}", name);
535/// }
536/// # }
537/// ```
538///
539/// # Note on Trait Implementations
540///
541/// This type intentionally omits `Serialize`, `Deserialize`, and `PartialEq`:
542/// - `structured_response` contains arbitrary `serde_json::Value` that may not round-trip cleanly
543/// - `raw_body` is provider-specific debug data not meant for serialization
544/// - Equality comparison on JSON values can be surprising (object key ordering, number precision)
545///
546/// If you need to serialize responses, extract the specific fields you need.
547#[derive(Debug, Clone)]
548pub struct Response {
549 /// Primary text content of the response.
550 ///
551 /// For standard requests, this is the model's natural language output.
552 /// May be empty if the model only returned tool calls.
553 pub content: String,
554
555 /// Parsed JSON when using structured output.
556 ///
557 /// Populated when using [`LlmProvider::execute_structured_llm()`] with a schema.
558 /// Contains the parsed JSON that (should) match the requested schema.
559 pub structured_response: Option<serde_json::Value>,
560
561 /// Tool calls the model wants to execute.
562 ///
563 /// If non-empty, the model is requesting function calls rather than
564 /// providing a final answer. Execute the tools and continue the conversation.
565 pub tool_calls: Vec<ToolCall>,
566
567 /// Token usage statistics for this request.
568 ///
569 /// May be `None` if the provider doesn't report usage or if the request failed.
570 pub usage: Option<TokenUsage>,
571
572 /// The model that generated this response.
573 ///
574 /// Useful when the provider might use different models than requested
575 /// (e.g., fallback models or model aliases).
576 pub model: Option<String>,
577
578 /// Raw response body for debugging.
579 ///
580 /// Contains the unprocessed JSON response from the provider API.
581 /// Useful for debugging parsing issues or accessing provider-specific fields.
582 pub raw_body: Option<String>,
583}
584
585/// Business event generated during LLM operations.
586///
587/// Wraps a [`BusinessEvent`] with its scope for routing to the appropriate
588/// storage backend. Only available with the `events` feature enabled.
589///
590/// # Feature Flag
591///
592/// This type requires the `events` feature:
593/// ```toml
594/// [dependencies]
595/// multi-llm = { version = "...", features = ["events"] }
596/// ```
597#[cfg(feature = "events")]
598#[derive(Debug, Clone)]
599pub struct LLMBusinessEvent {
600 /// The business event containing type and metadata.
601 pub event: BusinessEvent,
602
603 /// Scope determining where the event should be stored.
604 ///
605 /// - [`EventScope::User`]: Written to user-specific storage
606 /// - [`EventScope::System`]: Written to system-wide storage
607 pub scope: EventScope,
608}
609
610/// State for a tool calling round in multi-turn conversations.
611///
612/// When using tool calling, conversations often have multiple rounds:
613/// 1. User asks a question
614/// 2. Assistant requests tool calls
615/// 3. Tools execute and return results
616/// 4. Assistant uses results to form final response
617///
618/// `ToolCallingRound` captures the assistant's tool requests and the corresponding
619/// results, allowing providers to properly format multi-turn tool conversations.
620///
621/// # Example Flow
622///
623/// ```rust,no_run
624/// use multi_llm::{ToolCallingRound, ToolResult, UnifiedMessage};
625///
626/// // After receiving tool calls from the LLM
627/// # fn example(assistant_response: UnifiedMessage, tool_results: Vec<ToolResult>) {
628/// let round = ToolCallingRound {
629/// assistant_message: assistant_response, // The message with tool calls
630/// tool_results, // Results from executing those calls
631/// };
632///
633/// // Pass to execute_llm for the next turn
634/// // provider.execute_llm(request, Some(round), config).await?;
635/// # }
636/// ```
637#[derive(Debug, Clone)]
638pub struct ToolCallingRound {
639 /// The assistant message that initiated tool calls.
640 ///
641 /// This message contains the tool call content variants
642 /// that the assistant generated.
643 pub assistant_message: UnifiedMessage,
644
645 /// Results from executing the requested tools.
646 ///
647 /// Each result's `tool_call_id` should match an ID from the assistant message.
648 pub tool_results: Vec<ToolResult>,
649}
650
651/// Trait implemented by all LLM providers.
652///
653/// This is the core abstraction that makes multi-llm work. All providers
654/// (OpenAI, Anthropic, Ollama, LM Studio) implement this trait, allowing
655/// you to switch providers without changing your application code.
656///
657/// # Usage
658///
659/// You typically don't implement this trait yourself. Instead, use
660/// [`UnifiedLLMClient`](crate::UnifiedLLMClient) which wraps all providers:
661///
662/// ```rust,no_run
663/// use multi_llm::{unwrap_response, UnifiedLLMClient, LLMConfig, UnifiedMessage, UnifiedLLMRequest, LlmProvider};
664///
665/// # async fn example() -> anyhow::Result<()> {
666/// let config = LLMConfig::from_env()?;
667/// let client = UnifiedLLMClient::from_config(config)?;
668///
669/// let request = UnifiedLLMRequest::new(vec![
670/// UnifiedMessage::user("Hello!")
671/// ]);
672///
673/// let response = unwrap_response!(client.execute_llm(request, None, None).await?);
674/// println!("Response: {}", response.content);
675/// # Ok(())
676/// # }
677/// ```
678///
679/// # Return Types
680///
681/// Return types depend on the `events` feature:
682/// - **Without `events`**: Returns `Result<Response>`
683/// - **With `events`**: Returns `Result<(Response, Vec<LLMBusinessEvent>)>`
684///
685/// # Implementing Custom Providers
686///
687/// If you need to implement a custom provider:
688///
689/// ```rust,ignore
690/// use multi_llm::{LlmProvider, UnifiedLLMRequest, RequestConfig, Response, ToolCallingRound};
691/// use async_trait::async_trait;
692///
693/// struct MyProvider { /* ... */ }
694///
695/// #[async_trait]
696/// impl LlmProvider for MyProvider {
697/// async fn execute_llm(
698/// &self,
699/// request: UnifiedLLMRequest,
700/// current_tool_round: Option<ToolCallingRound>,
701/// config: Option<RequestConfig>,
702/// ) -> multi_llm::provider::Result<Response> {
703/// // Convert request to your API format
704/// // Make API call
705/// // Convert response to Response
706/// todo!()
707/// }
708///
709/// async fn execute_structured_llm(
710/// &self,
711/// request: UnifiedLLMRequest,
712/// current_tool_round: Option<ToolCallingRound>,
713/// schema: serde_json::Value,
714/// config: Option<RequestConfig>,
715/// ) -> multi_llm::provider::Result<Response> {
716/// // Similar to execute_llm but with JSON schema enforcement
717/// todo!()
718/// }
719///
720/// fn provider_name(&self) -> &'static str {
721/// "my_provider"
722/// }
723/// }
724/// ```
725#[async_trait::async_trait]
726pub trait LlmProvider: Send + Sync {
727 /// Execute an LLM request and return the response.
728 ///
729 /// This is the primary method for interacting with LLMs. It handles:
730 /// - Message conversion to provider-specific formats
731 /// - Tool calling (if tools are defined in the request)
732 /// - Caching hints (for providers that support it)
733 /// - Retry logic (based on provider configuration)
734 ///
735 /// # Arguments
736 ///
737 /// * `request` - The unified request containing messages and optional config
738 /// * `current_tool_round` - Previous tool calling state for multi-turn tool use
739 /// * `config` - Optional per-request configuration overrides
740 ///
741 /// # Returns
742 ///
743 /// - Without `events` feature: `Result<Response>`
744 /// - With `events` feature: `Result<(Response, Vec<LLMBusinessEvent>)>`
745 #[cfg(feature = "events")]
746 async fn execute_llm(
747 &self,
748 request: UnifiedLLMRequest,
749 current_tool_round: Option<ToolCallingRound>,
750 config: Option<RequestConfig>,
751 ) -> Result<(Response, Vec<LLMBusinessEvent>)>;
752
753 /// Execute an LLM request and return the response.
754 ///
755 /// This is the primary method for interacting with LLMs. It handles:
756 /// - Message conversion to provider-specific formats
757 /// - Tool calling (if tools are defined in the request)
758 /// - Caching hints (for providers that support it)
759 /// - Retry logic (based on provider configuration)
760 ///
761 /// # Arguments
762 ///
763 /// * `request` - The unified request containing messages and optional config
764 /// * `current_tool_round` - Previous tool calling state for multi-turn tool use
765 /// * `config` - Optional per-request configuration overrides
766 #[cfg(not(feature = "events"))]
767 async fn execute_llm(
768 &self,
769 request: UnifiedLLMRequest,
770 current_tool_round: Option<ToolCallingRound>,
771 config: Option<RequestConfig>,
772 ) -> Result<Response>;
773
774 /// Execute an LLM request with structured JSON output.
775 ///
776 /// Like [`execute_llm()`](Self::execute_llm), but instructs the model to return
777 /// JSON conforming to the provided schema. The parsed JSON is available in
778 /// [`Response::structured_response`].
779 ///
780 /// # Arguments
781 ///
782 /// * `request` - The unified request containing messages
783 /// * `current_tool_round` - Previous tool calling state
784 /// * `schema` - JSON Schema the response should conform to
785 /// * `config` - Optional per-request configuration overrides
786 ///
787 /// # Returns
788 ///
789 /// - Without `events` feature: `Result<Response>`
790 /// - With `events` feature: `Result<(Response, Vec<LLMBusinessEvent>)>`
791 #[cfg(feature = "events")]
792 async fn execute_structured_llm(
793 &self,
794 request: UnifiedLLMRequest,
795 current_tool_round: Option<ToolCallingRound>,
796 schema: serde_json::Value,
797 config: Option<RequestConfig>,
798 ) -> Result<(Response, Vec<LLMBusinessEvent>)>;
799
800 /// Execute an LLM request with structured JSON output.
801 ///
802 /// Like [`execute_llm()`](Self::execute_llm), but instructs the model to return
803 /// JSON conforming to the provided schema. The parsed JSON is available in
804 /// [`Response::structured_response`].
805 ///
806 /// # Arguments
807 ///
808 /// * `request` - The unified request containing messages
809 /// * `current_tool_round` - Previous tool calling state
810 /// * `schema` - JSON Schema the response should conform to
811 /// * `config` - Optional per-request configuration overrides
812 #[cfg(not(feature = "events"))]
813 async fn execute_structured_llm(
814 &self,
815 request: UnifiedLLMRequest,
816 current_tool_round: Option<ToolCallingRound>,
817 schema: serde_json::Value,
818 config: Option<RequestConfig>,
819 ) -> Result<Response>;
820
821 /// Get the provider's identifier.
822 ///
823 /// Returns a static string like "anthropic", "openai", "ollama", or "lmstudio".
824 /// Used for logging, debugging, and provider-specific behavior.
825 fn provider_name(&self) -> &'static str;
826}
827
828/// Type aliases for backward compatibility
829pub type LLMRequestConfig = RequestConfig;
830pub type LLMResponseFormat = ResponseFormat;
831pub type LLMTokenUsage = TokenUsage;