multi_llm/
provider.rs

1//! Provider trait and types for LLM abstraction.
2//!
3//! This module defines the [`LlmProvider`] trait that all LLM providers implement,
4//! along with request/response types, tool definitions, and configuration.
5//!
6//! # Overview
7//!
8//! The provider abstraction allows you to:
9//! - Switch between LLM providers without changing your application code
10//! - Use a consistent interface for all LLM operations
11//! - Access provider-specific features (like caching) through unified APIs
12//!
13//! # Provider Trait
14//!
15//! All providers implement [`LlmProvider`], which provides:
16//! - [`execute_llm()`](LlmProvider::execute_llm): Execute a standard LLM request
17//! - [`execute_structured_llm()`](LlmProvider::execute_structured_llm): Execute with JSON schema output
18//! - [`provider_name()`](LlmProvider::provider_name): Get the provider identifier
19//!
20//! # Tool Calling
21//!
22//! Define tools with [`Tool`] and handle the calling flow:
23//!
24//! ```rust
25//! use multi_llm::{Tool, ToolChoice, ToolCall, ToolResult};
26//!
27//! // Define a tool
28//! let weather_tool = Tool {
29//!     name: "get_weather".to_string(),
30//!     description: "Get current weather for a city".to_string(),
31//!     parameters: serde_json::json!({
32//!         "type": "object",
33//!         "properties": {
34//!             "city": {"type": "string", "description": "City name"}
35//!         },
36//!         "required": ["city"]
37//!     }),
38//! };
39//!
40//! // Handle a tool call from the LLM
41//! let tool_call = ToolCall {
42//!     id: "call_123".to_string(),
43//!     name: "get_weather".to_string(),
44//!     arguments: serde_json::json!({"city": "London"}),
45//! };
46//!
47//! // Return the result
48//! let result = ToolResult {
49//!     tool_call_id: "call_123".to_string(),
50//!     content: "Sunny, 22°C".to_string(),
51//!     is_error: false,
52//!     error_category: None,
53//! };
54//! ```
55//!
56//! # Response Structure
57//!
58//! All providers return a [`Response`] containing:
59//! - Text content (for standard requests)
60//! - Structured JSON (when using `execute_structured_llm`)
61//! - Tool calls (when the model wants to call functions)
62//! - Token usage statistics
63
64use crate::error::UserErrorCategory;
65#[cfg(feature = "events")]
66use crate::internals::events::{BusinessEvent, EventScope};
67use crate::messages::{UnifiedLLMRequest, UnifiedMessage};
68use serde::{Deserialize, Serialize};
69
70/// Result type alias for provider operations.
71///
72/// Uses [`LlmError`](crate::LlmError) for structured error handling with
73/// rich metadata (categories, retry info, user messages).
74///
75/// See [`LlmError`](crate::LlmError) for available error variants and helper methods:
76/// - [`is_retryable()`](crate::LlmError::is_retryable): Check if retry makes sense
77/// - [`category()`](crate::LlmError::category): Get error category for routing
78/// - [`user_message()`](crate::LlmError::user_message): Get safe user-facing message
79pub type Result<T> = std::result::Result<T, crate::LlmError>;
80
81/// Definition of a tool/function that the LLM can call.
82///
83/// Tools allow LLMs to perform actions by generating structured calls that your
84/// application executes. The LLM sees the tool's name, description, and parameter
85/// schema to understand when and how to use it.
86///
87/// # Example
88///
89/// ```rust
90/// use multi_llm::Tool;
91///
92/// let search_tool = Tool {
93///     name: "web_search".to_string(),
94///     description: "Search the web for information".to_string(),
95///     parameters: serde_json::json!({
96///         "type": "object",
97///         "properties": {
98///             "query": {
99///                 "type": "string",
100///                 "description": "The search query"
101///             },
102///             "max_results": {
103///                 "type": "integer",
104///                 "description": "Maximum results to return",
105///                 "default": 10
106///             }
107///         },
108///         "required": ["query"]
109///     }),
110/// };
111/// ```
112///
113/// # Parameter Schema
114///
115/// The `parameters` field should be a valid JSON Schema object describing the
116/// tool's input. Use `type`, `properties`, `required`, and `description` fields
117/// to help the LLM understand how to call your tool correctly.
118#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
119pub struct Tool {
120    /// Tool name - must be unique within a request.
121    ///
122    /// Use descriptive names like "get_weather" or "search_documents".
123    /// This is how the LLM identifies which tool to call.
124    pub name: String,
125
126    /// Human-readable description of what the tool does.
127    ///
128    /// Be specific about capabilities and limitations. This helps the LLM
129    /// decide when to use this tool vs. others.
130    pub description: String,
131
132    /// JSON Schema defining the tool's input parameters.
133    ///
134    /// Should be a JSON Schema object with `type: "object"` and `properties`
135    /// describing each parameter. Include `description` for each property.
136    pub parameters: serde_json::Value,
137}
138
139/// A tool call generated by the LLM.
140///
141/// When the LLM decides to use a tool, it generates a `ToolCall` with:
142/// - A unique ID to match with the response
143/// - The tool name to invoke
144/// - Arguments parsed from the conversation
145///
146/// Your application should:
147/// 1. Execute the tool with the provided arguments
148/// 2. Return a [`ToolResult`] with the matching `tool_call_id`
149/// 3. Continue the conversation so the LLM can use the result
150///
151/// # Example
152///
153/// ```rust
154/// use multi_llm::ToolCall;
155///
156/// // Received from LLM response
157/// let call = ToolCall {
158///     id: "call_abc123".to_string(),
159///     name: "get_weather".to_string(),
160///     arguments: serde_json::json!({"city": "Paris", "units": "celsius"}),
161/// };
162///
163/// // Parse and execute
164/// let city = call.arguments["city"].as_str().unwrap();
165/// // ... execute weather lookup ...
166/// ```
167#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
168pub struct ToolCall {
169    /// Unique identifier for this tool call (generated by the LLM).
170    ///
171    /// Use this ID when returning the [`ToolResult`] to match the response
172    /// with the original call.
173    pub id: String,
174
175    /// Name of the tool to call.
176    ///
177    /// Must match a [`Tool::name`] from the request's tool list.
178    pub name: String,
179
180    /// Arguments to pass to the tool as JSON.
181    ///
182    /// Structure matches the `parameters` schema defined in the [`Tool`].
183    /// May be an empty object `{}` if the tool has no required parameters.
184    pub arguments: serde_json::Value,
185}
186
187/// Result from executing a tool, sent back to the LLM.
188///
189/// After executing a [`ToolCall`], create a `ToolResult` to send back.
190/// The LLM will use this information to continue the conversation.
191///
192/// # Example
193///
194/// ```rust
195/// use multi_llm::ToolResult;
196///
197/// // Successful result
198/// let success = ToolResult {
199///     tool_call_id: "call_abc123".to_string(),
200///     content: "Weather in Paris: Sunny, 18°C".to_string(),
201///     is_error: false,
202///     error_category: None,
203/// };
204///
205/// // Error result
206/// use multi_llm::error::UserErrorCategory;
207/// let error = ToolResult {
208///     tool_call_id: "call_xyz789".to_string(),
209///     content: "City not found".to_string(),
210///     is_error: true,
211///     error_category: Some(UserErrorCategory::NotFound),
212/// };
213/// ```
214#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
215pub struct ToolResult {
216    /// ID of the tool call this result responds to.
217    ///
218    /// Must match the `id` from the corresponding [`ToolCall`].
219    pub tool_call_id: String,
220
221    /// Result content from the tool execution.
222    ///
223    /// For successful calls, this is the tool's output (often a string or JSON).
224    /// For errors, this should describe what went wrong.
225    pub content: String,
226
227    /// Whether the tool execution failed.
228    ///
229    /// When `true`, the LLM knows the tool didn't work and may try alternatives.
230    pub is_error: bool,
231
232    /// Error category for structured error handling.
233    ///
234    /// Only meaningful when `is_error` is `true`. Helps applications handle
235    /// different error types appropriately.
236    pub error_category: Option<UserErrorCategory>,
237}
238
239/// Strategy for how the LLM should handle tool selection.
240///
241/// Controls whether the LLM must use tools, can choose to use them, or is
242/// restricted from using them.
243///
244/// # Example
245///
246/// ```rust
247/// use multi_llm::{RequestConfig, ToolChoice};
248///
249/// // Let the model decide
250/// let config = RequestConfig {
251///     tool_choice: Some(ToolChoice::Auto),
252///     ..Default::default()
253/// };
254///
255/// // Force a specific tool
256/// let config = RequestConfig {
257///     tool_choice: Some(ToolChoice::Specific("get_weather".to_string())),
258///     ..Default::default()
259/// };
260/// ```
261#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
262pub enum ToolChoice {
263    /// Let the model decide whether and which tools to use.
264    ///
265    /// The model may call zero, one, or multiple tools based on the conversation.
266    #[default]
267    Auto,
268
269    /// Disable tool use entirely for this request.
270    ///
271    /// Even if tools are defined, the model will not call them.
272    None,
273
274    /// Force the model to use at least one tool.
275    ///
276    /// Useful when you know a tool call is needed but don't care which one.
277    Required,
278
279    /// Force the model to use a specific tool by name.
280    ///
281    /// The model will call exactly this tool (if arguments can be determined).
282    Specific(String),
283}
284
285/// Configuration for a single LLM request.
286///
287/// Override default provider settings on a per-request basis. All fields are
288/// optional - unset fields use the provider's defaults.
289///
290/// # Basic Usage
291///
292/// ```rust
293/// use multi_llm::RequestConfig;
294///
295/// let config = RequestConfig {
296///     temperature: Some(0.7),
297///     max_tokens: Some(1000),
298///     ..Default::default()
299/// };
300/// ```
301///
302/// # With Tools
303///
304/// ```rust
305/// use multi_llm::{RequestConfig, Tool, ToolChoice};
306///
307/// let weather_tool = Tool {
308///     name: "get_weather".to_string(),
309///     description: "Get weather for a city".to_string(),
310///     parameters: serde_json::json!({"type": "object", "properties": {}}),
311/// };
312///
313/// let config = RequestConfig {
314///     tools: vec![weather_tool],
315///     tool_choice: Some(ToolChoice::Auto),
316///     ..Default::default()
317/// };
318/// ```
319///
320/// # Sampling Parameters
321///
322/// | Parameter | Range | Effect |
323/// |-----------|-------|--------|
324/// | `temperature` | 0.0-2.0 | Randomness (0=deterministic, 2=very random) |
325/// | `top_p` | 0.0-1.0 | Nucleus sampling threshold |
326/// | `top_k` | 1+ | Limit vocab to top K tokens |
327/// | `presence_penalty` | -2.0-2.0 | Discourage repetition |
328#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
329pub struct RequestConfig {
330    // Standard LLM parameters
331    /// Temperature for response randomness.
332    ///
333    /// - `0.0`: Deterministic (always pick most likely token)
334    /// - `0.7`: Balanced (good default for most tasks)
335    /// - `1.0+`: More creative/random
336    ///
337    /// Range: 0.0 to 2.0 (provider-dependent)
338    pub temperature: Option<f64>,
339
340    /// Maximum tokens to generate in the response.
341    ///
342    /// Limits response length. The actual response may be shorter if the
343    /// model completes its thought naturally.
344    pub max_tokens: Option<u32>,
345
346    /// Top-p (nucleus) sampling parameter.
347    ///
348    /// Only consider tokens whose cumulative probability exceeds this threshold.
349    /// Lower values = more focused, higher values = more diverse.
350    /// Range: 0.0 to 1.0 (typically 0.9-0.95)
351    pub top_p: Option<f64>,
352
353    /// Top-k sampling parameter.
354    ///
355    /// Only consider the top K most likely tokens at each step.
356    /// Lower values = more focused. Not all providers support this.
357    pub top_k: Option<u32>,
358
359    /// Min-p sampling parameter.
360    ///
361    /// Filter tokens below this probability relative to the top token.
362    /// Range: 0.0 to 1.0. Not all providers support this.
363    pub min_p: Option<f64>,
364
365    /// Presence penalty to discourage repetition.
366    ///
367    /// Positive values reduce likelihood of repeating tokens that have appeared.
368    /// Range: -2.0 to 2.0 (typically 0.0 to 1.0)
369    pub presence_penalty: Option<f64>,
370
371    /// Response format for structured output.
372    ///
373    /// When set, the model attempts to return JSON matching the schema.
374    /// Use with [`LlmProvider::execute_structured_llm()`] for best results.
375    pub response_format: Option<ResponseFormat>,
376
377    // Tool-specific configuration
378    /// Tools available for this request.
379    ///
380    /// Define functions the LLM can call. See [`Tool`] for details.
381    pub tools: Vec<Tool>,
382
383    /// Strategy for tool selection.
384    ///
385    /// Controls whether tools are optional, required, or disabled.
386    /// See [`ToolChoice`] for options.
387    pub tool_choice: Option<ToolChoice>,
388
389    // Context metadata for logging and analytics
390    /// User ID for analytics and cache analysis.
391    ///
392    /// Helps track cache hit rates per user and debug user-specific issues.
393    pub user_id: Option<String>,
394
395    /// Session ID for session-level analytics.
396    ///
397    /// Track cache performance and behavior within a conversation session.
398    pub session_id: Option<String>,
399
400    /// LLM path context for distinguishing call types.
401    ///
402    /// Useful when your application has multiple LLM call paths
403    /// (e.g., "chat", "analysis", "summarization").
404    pub llm_path: Option<String>,
405}
406
407/// Schema specification for structured JSON output.
408///
409/// When you need the LLM to return data in a specific JSON format, define
410/// a `ResponseFormat` with a JSON Schema. The model will attempt to conform
411/// its output to this schema.
412///
413/// # Example
414///
415/// ```rust
416/// use multi_llm::ResponseFormat;
417///
418/// let format = ResponseFormat {
419///     name: "person_info".to_string(),
420///     schema: serde_json::json!({
421///         "type": "object",
422///         "properties": {
423///             "name": {"type": "string"},
424///             "age": {"type": "integer"},
425///             "email": {"type": "string", "format": "email"}
426///         },
427///         "required": ["name", "age"]
428///     }),
429/// };
430/// ```
431#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
432pub struct ResponseFormat {
433    /// Name identifier for this schema.
434    ///
435    /// Used for logging and debugging. Should be descriptive of the expected output.
436    pub name: String,
437
438    /// JSON Schema specification defining the expected output structure.
439    ///
440    /// The model will attempt to return JSON that validates against this schema.
441    /// Include `type`, `properties`, `required`, and `description` fields.
442    pub schema: serde_json::Value,
443}
444
445/// Token usage statistics for an LLM request.
446///
447/// Tracks how many tokens were consumed by the prompt and completion,
448/// useful for cost estimation and monitoring context window usage.
449///
450/// # Cost Estimation
451///
452/// Most providers charge per token. Multiply token counts by the provider's
453/// per-token rate to estimate costs:
454///
455/// ```rust
456/// use multi_llm::TokenUsage;
457///
458/// let usage = TokenUsage {
459///     prompt_tokens: 1000,
460///     completion_tokens: 500,
461///     total_tokens: 1500,
462/// };
463///
464/// // Example: OpenAI GPT-4 pricing (illustrative)
465/// let prompt_cost = usage.prompt_tokens as f64 * 0.00003;
466/// let completion_cost = usage.completion_tokens as f64 * 0.00006;
467/// let total_cost = prompt_cost + completion_cost;
468/// ```
469#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
470pub struct TokenUsage {
471    /// Number of tokens in the prompt/input.
472    ///
473    /// Includes system messages, conversation history, and the current query.
474    pub prompt_tokens: u32,
475
476    /// Number of tokens in the completion/output.
477    ///
478    /// The generated response from the model.
479    pub completion_tokens: u32,
480
481    /// Total tokens used (prompt + completion).
482    ///
483    /// Should equal `prompt_tokens + completion_tokens`.
484    pub total_tokens: u32,
485}
486
487/// Response from an LLM operation.
488///
489/// Contains the model's output along with metadata about the request.
490/// Check `tool_calls` first - if non-empty, the model wants to call functions
491/// rather than provide a final response.
492///
493/// # Basic Response
494///
495/// ```rust,no_run
496/// use multi_llm::Response;
497///
498/// # fn example(response: Response) {
499/// // Standard text response
500/// println!("Response: {}", response.content);
501///
502/// // Check token usage
503/// if let Some(usage) = &response.usage {
504///     println!("Used {} tokens", usage.total_tokens);
505/// }
506/// # }
507/// ```
508///
509/// # Tool Calling Response
510///
511/// ```rust,no_run
512/// use multi_llm::Response;
513///
514/// # fn example(response: Response) {
515/// // Check if model wants to call tools
516/// if !response.tool_calls.is_empty() {
517///     for call in &response.tool_calls {
518///         println!("Tool: {} with args: {}", call.name, call.arguments);
519///         // Execute tool and return result...
520///     }
521/// }
522/// # }
523/// ```
524///
525/// # Structured Response
526///
527/// ```rust,no_run
528/// use multi_llm::Response;
529///
530/// # fn example(response: Response) {
531/// // When using execute_structured_llm
532/// if let Some(json) = &response.structured_response {
533///     let name = json["name"].as_str().unwrap_or("unknown");
534///     println!("Extracted name: {}", name);
535/// }
536/// # }
537/// ```
538///
539/// # Note on Trait Implementations
540///
541/// This type intentionally omits `Serialize`, `Deserialize`, and `PartialEq`:
542/// - `structured_response` contains arbitrary `serde_json::Value` that may not round-trip cleanly
543/// - `raw_body` is provider-specific debug data not meant for serialization
544/// - Equality comparison on JSON values can be surprising (object key ordering, number precision)
545///
546/// If you need to serialize responses, extract the specific fields you need.
547#[derive(Debug, Clone)]
548pub struct Response {
549    /// Primary text content of the response.
550    ///
551    /// For standard requests, this is the model's natural language output.
552    /// May be empty if the model only returned tool calls.
553    pub content: String,
554
555    /// Parsed JSON when using structured output.
556    ///
557    /// Populated when using [`LlmProvider::execute_structured_llm()`] with a schema.
558    /// Contains the parsed JSON that (should) match the requested schema.
559    pub structured_response: Option<serde_json::Value>,
560
561    /// Tool calls the model wants to execute.
562    ///
563    /// If non-empty, the model is requesting function calls rather than
564    /// providing a final answer. Execute the tools and continue the conversation.
565    pub tool_calls: Vec<ToolCall>,
566
567    /// Token usage statistics for this request.
568    ///
569    /// May be `None` if the provider doesn't report usage or if the request failed.
570    pub usage: Option<TokenUsage>,
571
572    /// The model that generated this response.
573    ///
574    /// Useful when the provider might use different models than requested
575    /// (e.g., fallback models or model aliases).
576    pub model: Option<String>,
577
578    /// Raw response body for debugging.
579    ///
580    /// Contains the unprocessed JSON response from the provider API.
581    /// Useful for debugging parsing issues or accessing provider-specific fields.
582    pub raw_body: Option<String>,
583}
584
585/// Business event generated during LLM operations.
586///
587/// Wraps a [`BusinessEvent`] with its scope for routing to the appropriate
588/// storage backend. Only available with the `events` feature enabled.
589///
590/// # Feature Flag
591///
592/// This type requires the `events` feature:
593/// ```toml
594/// [dependencies]
595/// multi-llm = { version = "...", features = ["events"] }
596/// ```
597#[cfg(feature = "events")]
598#[derive(Debug, Clone)]
599pub struct LLMBusinessEvent {
600    /// The business event containing type and metadata.
601    pub event: BusinessEvent,
602
603    /// Scope determining where the event should be stored.
604    ///
605    /// - [`EventScope::User`]: Written to user-specific storage
606    /// - [`EventScope::System`]: Written to system-wide storage
607    pub scope: EventScope,
608}
609
610/// State for a tool calling round in multi-turn conversations.
611///
612/// When using tool calling, conversations often have multiple rounds:
613/// 1. User asks a question
614/// 2. Assistant requests tool calls
615/// 3. Tools execute and return results
616/// 4. Assistant uses results to form final response
617///
618/// `ToolCallingRound` captures the assistant's tool requests and the corresponding
619/// results, allowing providers to properly format multi-turn tool conversations.
620///
621/// # Example Flow
622///
623/// ```rust,no_run
624/// use multi_llm::{ToolCallingRound, ToolResult, UnifiedMessage};
625///
626/// // After receiving tool calls from the LLM
627/// # fn example(assistant_response: UnifiedMessage, tool_results: Vec<ToolResult>) {
628/// let round = ToolCallingRound {
629///     assistant_message: assistant_response,  // The message with tool calls
630///     tool_results,  // Results from executing those calls
631/// };
632///
633/// // Pass to execute_llm for the next turn
634/// // provider.execute_llm(request, Some(round), config).await?;
635/// # }
636/// ```
637#[derive(Debug, Clone)]
638pub struct ToolCallingRound {
639    /// The assistant message that initiated tool calls.
640    ///
641    /// This message contains the tool call content variants
642    /// that the assistant generated.
643    pub assistant_message: UnifiedMessage,
644
645    /// Results from executing the requested tools.
646    ///
647    /// Each result's `tool_call_id` should match an ID from the assistant message.
648    pub tool_results: Vec<ToolResult>,
649}
650
651/// Trait implemented by all LLM providers.
652///
653/// This is the core abstraction that makes multi-llm work. All providers
654/// (OpenAI, Anthropic, Ollama, LM Studio) implement this trait, allowing
655/// you to switch providers without changing your application code.
656///
657/// # Usage
658///
659/// You typically don't implement this trait yourself. Instead, use
660/// [`UnifiedLLMClient`](crate::UnifiedLLMClient) which wraps all providers:
661///
662/// ```rust,no_run
663/// use multi_llm::{unwrap_response, UnifiedLLMClient, LLMConfig, UnifiedMessage, UnifiedLLMRequest, LlmProvider};
664///
665/// # async fn example() -> anyhow::Result<()> {
666/// let config = LLMConfig::from_env()?;
667/// let client = UnifiedLLMClient::from_config(config)?;
668///
669/// let request = UnifiedLLMRequest::new(vec![
670///     UnifiedMessage::user("Hello!")
671/// ]);
672///
673/// let response = unwrap_response!(client.execute_llm(request, None, None).await?);
674/// println!("Response: {}", response.content);
675/// # Ok(())
676/// # }
677/// ```
678///
679/// # Return Types
680///
681/// Return types depend on the `events` feature:
682/// - **Without `events`**: Returns `Result<Response>`
683/// - **With `events`**: Returns `Result<(Response, Vec<LLMBusinessEvent>)>`
684///
685/// # Implementing Custom Providers
686///
687/// If you need to implement a custom provider:
688///
689/// ```rust,ignore
690/// use multi_llm::{LlmProvider, UnifiedLLMRequest, RequestConfig, Response, ToolCallingRound};
691/// use async_trait::async_trait;
692///
693/// struct MyProvider { /* ... */ }
694///
695/// #[async_trait]
696/// impl LlmProvider for MyProvider {
697///     async fn execute_llm(
698///         &self,
699///         request: UnifiedLLMRequest,
700///         current_tool_round: Option<ToolCallingRound>,
701///         config: Option<RequestConfig>,
702///     ) -> multi_llm::provider::Result<Response> {
703///         // Convert request to your API format
704///         // Make API call
705///         // Convert response to Response
706///         todo!()
707///     }
708///
709///     async fn execute_structured_llm(
710///         &self,
711///         request: UnifiedLLMRequest,
712///         current_tool_round: Option<ToolCallingRound>,
713///         schema: serde_json::Value,
714///         config: Option<RequestConfig>,
715///     ) -> multi_llm::provider::Result<Response> {
716///         // Similar to execute_llm but with JSON schema enforcement
717///         todo!()
718///     }
719///
720///     fn provider_name(&self) -> &'static str {
721///         "my_provider"
722///     }
723/// }
724/// ```
725#[async_trait::async_trait]
726pub trait LlmProvider: Send + Sync {
727    /// Execute an LLM request and return the response.
728    ///
729    /// This is the primary method for interacting with LLMs. It handles:
730    /// - Message conversion to provider-specific formats
731    /// - Tool calling (if tools are defined in the request)
732    /// - Caching hints (for providers that support it)
733    /// - Retry logic (based on provider configuration)
734    ///
735    /// # Arguments
736    ///
737    /// * `request` - The unified request containing messages and optional config
738    /// * `current_tool_round` - Previous tool calling state for multi-turn tool use
739    /// * `config` - Optional per-request configuration overrides
740    ///
741    /// # Returns
742    ///
743    /// - Without `events` feature: `Result<Response>`
744    /// - With `events` feature: `Result<(Response, Vec<LLMBusinessEvent>)>`
745    #[cfg(feature = "events")]
746    async fn execute_llm(
747        &self,
748        request: UnifiedLLMRequest,
749        current_tool_round: Option<ToolCallingRound>,
750        config: Option<RequestConfig>,
751    ) -> Result<(Response, Vec<LLMBusinessEvent>)>;
752
753    /// Execute an LLM request and return the response.
754    ///
755    /// This is the primary method for interacting with LLMs. It handles:
756    /// - Message conversion to provider-specific formats
757    /// - Tool calling (if tools are defined in the request)
758    /// - Caching hints (for providers that support it)
759    /// - Retry logic (based on provider configuration)
760    ///
761    /// # Arguments
762    ///
763    /// * `request` - The unified request containing messages and optional config
764    /// * `current_tool_round` - Previous tool calling state for multi-turn tool use
765    /// * `config` - Optional per-request configuration overrides
766    #[cfg(not(feature = "events"))]
767    async fn execute_llm(
768        &self,
769        request: UnifiedLLMRequest,
770        current_tool_round: Option<ToolCallingRound>,
771        config: Option<RequestConfig>,
772    ) -> Result<Response>;
773
774    /// Execute an LLM request with structured JSON output.
775    ///
776    /// Like [`execute_llm()`](Self::execute_llm), but instructs the model to return
777    /// JSON conforming to the provided schema. The parsed JSON is available in
778    /// [`Response::structured_response`].
779    ///
780    /// # Arguments
781    ///
782    /// * `request` - The unified request containing messages
783    /// * `current_tool_round` - Previous tool calling state
784    /// * `schema` - JSON Schema the response should conform to
785    /// * `config` - Optional per-request configuration overrides
786    ///
787    /// # Returns
788    ///
789    /// - Without `events` feature: `Result<Response>`
790    /// - With `events` feature: `Result<(Response, Vec<LLMBusinessEvent>)>`
791    #[cfg(feature = "events")]
792    async fn execute_structured_llm(
793        &self,
794        request: UnifiedLLMRequest,
795        current_tool_round: Option<ToolCallingRound>,
796        schema: serde_json::Value,
797        config: Option<RequestConfig>,
798    ) -> Result<(Response, Vec<LLMBusinessEvent>)>;
799
800    /// Execute an LLM request with structured JSON output.
801    ///
802    /// Like [`execute_llm()`](Self::execute_llm), but instructs the model to return
803    /// JSON conforming to the provided schema. The parsed JSON is available in
804    /// [`Response::structured_response`].
805    ///
806    /// # Arguments
807    ///
808    /// * `request` - The unified request containing messages
809    /// * `current_tool_round` - Previous tool calling state
810    /// * `schema` - JSON Schema the response should conform to
811    /// * `config` - Optional per-request configuration overrides
812    #[cfg(not(feature = "events"))]
813    async fn execute_structured_llm(
814        &self,
815        request: UnifiedLLMRequest,
816        current_tool_round: Option<ToolCallingRound>,
817        schema: serde_json::Value,
818        config: Option<RequestConfig>,
819    ) -> Result<Response>;
820
821    /// Get the provider's identifier.
822    ///
823    /// Returns a static string like "anthropic", "openai", "ollama", or "lmstudio".
824    /// Used for logging, debugging, and provider-specific behavior.
825    fn provider_name(&self) -> &'static str;
826}
827
828/// Type aliases for backward compatibility
829pub type LLMRequestConfig = RequestConfig;
830pub type LLMResponseFormat = ResponseFormat;
831pub type LLMTokenUsage = TokenUsage;