Skip to main content

everruns_core/
llm_driver_registry.rs

1// LLM Driver Abstractions
2//
3// This module encapsulates all abstractions needed to interact with LLM Providers:
4// - LlmDriver trait and types for provider-agnostic LLM interactions
5// - DriverRegistry for dynamic driver registration at startup
6// - Message types for LLM calls
7//
8// Supports both simple text content and multipart content (text, images, audio).
9//
10// IMPORTANT: API keys must be provided from the database. The registry does NOT read
11// from environment variables. Keys should be decrypted and passed via ProviderConfig.
12//
13// Design: Dependency inversion - provider crates (everruns-anthropic, everruns-openai)
14// depend on core and register their drivers at startup. Core has no knowledge of
15// specific provider implementations.
16
17use crate::error::{AgentLoopError, Result};
18use crate::openresponses_protocol::{CompactRequest, CompactResponse};
19use crate::runtime_agent::RuntimeAgent;
20use crate::tool_types::{ToolCall, ToolDefinition};
21use async_trait::async_trait;
22use chrono::{DateTime, Utc};
23use futures::Stream;
24use std::collections::HashMap;
25use std::pin::Pin;
26use std::sync::Arc;
27
28// ============================================================================
29// LlmDriver Trait
30// ============================================================================
31
32/// Type alias for the LLM response stream
33pub type LlmResponseStream = Pin<Box<dyn Stream<Item = Result<LlmStreamEvent>> + Send>>;
34
35/// Events emitted during LLM streaming
36#[derive(Debug, Clone)]
37pub enum LlmStreamEvent {
38    /// Text delta (incremental content)
39    TextDelta(String),
40    /// Thinking delta (incremental reasoning content from extended thinking models)
41    ThinkingDelta(String),
42    /// Cryptographic signature for thinking content (Anthropic Claude)
43    /// Emitted when a thinking block completes, before the Done event
44    ThinkingSignature(String),
45    /// Opaque assistant reasoning response item (OpenAI Responses).
46    /// Carries provider-supplied opaque/encrypted reasoning artifacts plus safe
47    /// summary text and per-item metadata. Plaintext hidden reasoning content is
48    /// intentionally excluded so callers can persist this without exposing
49    /// chain-of-thought.
50    ReasonItem {
51        /// Provider name (e.g., "openai").
52        provider: String,
53        /// Model identifier reported by the provider, if known.
54        model: Option<String>,
55        /// Provider-assigned identifier for the reasoning item.
56        item_id: String,
57        /// Provider-encrypted reasoning context, if supplied.
58        encrypted_content: Option<String>,
59        /// Safe summary text segments curated by the provider.
60        summary: Vec<String>,
61        /// Per-item reasoning token count, when the provider reports one.
62        token_count: Option<u32>,
63    },
64    /// Tool calls from the LLM
65    ToolCalls(Vec<ToolCall>),
66    /// Streaming completed
67    Done(Box<LlmCompletionMetadata>),
68    /// Error during streaming
69    Error(String),
70}
71
72/// Model information discovered from a provider's list_models API
73///
74/// Represents a model available from a provider. Used for dynamic model discovery
75/// to sync available models from provider APIs into the database.
76///
77/// The `discovered_profile` field carries structured capability/limit metadata
78/// parsed from the provider's API response (e.g., Anthropic's capabilities object).
79/// During model sync, this profile is merged with hardcoded profiles: hardcoded
80/// values take precedence (they include cost data not available from APIs),
81/// but discovered data fills gaps for models without hardcoded profiles.
82#[derive(Debug, Clone)]
83pub struct DiscoveredModel {
84    /// Model identifier (e.g., "gpt-5.2", "claude-opus-4-5-20251101")
85    pub model_id: String,
86    /// Human-readable display name (if provided by API)
87    pub display_name: Option<String>,
88    /// When the model was created/released
89    pub created_at: Option<DateTime<Utc>>,
90    /// Owner or organization (e.g., "openai", "system")
91    pub owned_by: Option<String>,
92    /// Structured profile built from provider API metadata (capabilities, limits).
93    /// Populated by drivers that return rich model metadata (e.g., Anthropic /v1/models).
94    pub discovered_profile: Option<crate::llm_models::LlmModelProfile>,
95}
96
97/// Metadata about LLM completion
98///
99/// Contains token usage and completion information from the LLM response.
100/// Cache token fields are provider-specific:
101/// - OpenAI: `cache_read_tokens` from prompt_tokens_details.cached_tokens
102/// - Anthropic: `cache_read_tokens` from cache_read_input_tokens,
103///   `cache_creation_tokens` from cache_creation_input_tokens
104#[derive(Debug, Clone, Default)]
105pub struct LlmCompletionMetadata {
106    /// Total tokens used
107    pub total_tokens: Option<u32>,
108    /// Prompt tokens
109    pub prompt_tokens: Option<u32>,
110    /// Completion tokens
111    pub completion_tokens: Option<u32>,
112    /// Tokens read from cache (reduces cost)
113    pub cache_read_tokens: Option<u32>,
114    /// Tokens written to cache (Anthropic-specific)
115    pub cache_creation_tokens: Option<u32>,
116    /// Authoritative cost of this generation in USD, when the provider reports
117    /// it inline (e.g. OpenRouter's `usage.cost`). `None` for providers that do
118    /// not return a cost.
119    pub provider_cost_usd: Option<f64>,
120    /// Model used
121    pub model: Option<String>,
122    /// Finish reason
123    pub finish_reason: Option<String>,
124    /// Retry metadata (present if rate limit retries occurred)
125    pub retry_metadata: Option<crate::llm_retry::RetryMetadata>,
126    /// Provider's response ID (e.g., OpenAI response ID from response.completed).
127    /// Used for `previous_response_id` chaining and OTel tracing.
128    pub response_id: Option<String>,
129    /// Execution phase from the provider's response (e.g., "commentary", "final_answer").
130    /// When present, this value should be preserved on the assistant message and sent
131    /// back as-is in subsequent requests. Only set by providers with native phase support.
132    pub phase: Option<String>,
133}
134
135/// Trait for LLM drivers
136///
137/// Implementations handle provider-specific API calls and response parsing.
138#[async_trait]
139pub trait LlmDriver: Send + Sync {
140    /// Call the LLM with streaming response
141    async fn chat_completion_stream(
142        &self,
143        messages: Vec<LlmMessage>,
144        config: &LlmCallConfig,
145    ) -> Result<LlmResponseStream>;
146
147    /// Call the LLM without streaming (convenience method)
148    async fn chat_completion(
149        &self,
150        messages: Vec<LlmMessage>,
151        config: &LlmCallConfig,
152    ) -> Result<LlmResponse> {
153        use futures::StreamExt;
154
155        let mut stream = self.chat_completion_stream(messages, config).await?;
156        let mut text = String::new();
157        let mut thinking = String::new();
158        let mut thinking_signature: Option<String> = None;
159        let mut tool_calls = Vec::new();
160        let mut metadata = LlmCompletionMetadata::default();
161
162        while let Some(event) = stream.next().await {
163            match event? {
164                LlmStreamEvent::TextDelta(delta) => text.push_str(&delta),
165                LlmStreamEvent::ThinkingDelta(delta) => thinking.push_str(&delta),
166                LlmStreamEvent::ThinkingSignature(sig) => thinking_signature = Some(sig),
167                LlmStreamEvent::ReasonItem {
168                    encrypted_content, ..
169                } => {
170                    if let Some(sig) = encrypted_content {
171                        thinking_signature = Some(sig);
172                    }
173                }
174                LlmStreamEvent::ToolCalls(calls) => tool_calls = calls,
175                LlmStreamEvent::Done(meta) => metadata = *meta,
176                LlmStreamEvent::Error(err) => return Err(crate::error::AgentLoopError::llm(err)),
177            }
178        }
179
180        Ok(LlmResponse {
181            text,
182            thinking: if thinking.is_empty() {
183                None
184            } else {
185                Some(thinking)
186            },
187            thinking_signature,
188            tool_calls: if tool_calls.is_empty() {
189                None
190            } else {
191                Some(tool_calls)
192            },
193            metadata,
194        })
195    }
196
197    /// List available models from the provider
198    ///
199    /// Returns `Ok(Some(models))` if the provider supports model listing,
200    /// or `Ok(None)` if not supported (e.g., custom endpoints, proxies).
201    ///
202    /// Implementations should filter to chat/completion models only,
203    /// excluding embedding models, TTS, whisper, etc.
204    async fn list_models(&self) -> Result<Option<Vec<DiscoveredModel>>> {
205        // Default: not supported. Providers override if they support listing.
206        Ok(None)
207    }
208
209    /// Check if this driver supports the compact endpoint
210    ///
211    /// The compact endpoint compresses conversation history by replacing
212    /// assistant messages, tool calls, and tool results with an encrypted
213    /// compaction item. User messages are kept verbatim.
214    ///
215    /// Returns `true` if the driver supports compaction, `false` otherwise.
216    /// Currently only supported by OpenAI's Responses API.
217    fn supports_compact(&self) -> bool {
218        // Default: not supported
219        false
220    }
221
222    /// Compact a conversation to reduce context size
223    ///
224    /// This method compresses conversation history by calling the provider's
225    /// compact endpoint. User messages are kept verbatim, while assistant
226    /// messages, tool calls, and tool results are replaced by an encrypted
227    /// compaction item that preserves latent context but is opaque.
228    ///
229    /// # Arguments
230    ///
231    /// * `request` - The compact request containing the model and input items
232    ///
233    /// # Returns
234    ///
235    /// Returns `Ok(Some(response))` if compaction succeeded,
236    /// `Ok(None)` if compaction is not supported by this driver,
237    /// or `Err` if an error occurred.
238    ///
239    /// The response contains the compacted output items which can be used
240    /// directly as input for the next chat completion call.
241    async fn compact(&self, _request: CompactRequest) -> Result<Option<CompactResponse>> {
242        // Default: not supported
243        Ok(None)
244    }
245}
246
247/// Implement LlmDriver for `Box<dyn LlmDriver>` to allow dynamic dispatch
248#[async_trait]
249impl LlmDriver for Box<dyn LlmDriver> {
250    async fn chat_completion_stream(
251        &self,
252        messages: Vec<LlmMessage>,
253        config: &LlmCallConfig,
254    ) -> Result<LlmResponseStream> {
255        (**self).chat_completion_stream(messages, config).await
256    }
257
258    async fn chat_completion(
259        &self,
260        messages: Vec<LlmMessage>,
261        config: &LlmCallConfig,
262    ) -> Result<LlmResponse> {
263        (**self).chat_completion(messages, config).await
264    }
265
266    async fn list_models(&self) -> Result<Option<Vec<DiscoveredModel>>> {
267        (**self).list_models().await
268    }
269
270    fn supports_compact(&self) -> bool {
271        (**self).supports_compact()
272    }
273
274    async fn compact(&self, request: CompactRequest) -> Result<Option<CompactResponse>> {
275        (**self).compact(request).await
276    }
277}
278
279// ============================================================================
280// Message Types
281// ============================================================================
282
283/// Message format for LLM calls (provider-agnostic)
284#[derive(Debug, Clone)]
285pub struct LlmMessage {
286    pub role: LlmMessageRole,
287    pub content: LlmMessageContent,
288    pub tool_calls: Option<Vec<ToolCall>>,
289    pub tool_call_id: Option<String>,
290    /// Execution phase for assistant messages.
291    /// Helps models distinguish between intermediate working commentary (`Commentary`)
292    /// and completed answers (`FinalAnswer`) in multi-step tool-calling flows.
293    /// Only set on assistant messages. Must be preserved when replaying conversation history.
294    pub phase: Option<crate::message::ExecutionPhase>,
295    /// Thinking content from extended thinking models (Anthropic Claude)
296    /// Must be included in subsequent API calls when thinking is enabled
297    pub thinking: Option<String>,
298    /// Cryptographic signature for thinking content (Anthropic Claude)
299    /// Required when sending thinking back in subsequent API calls
300    pub thinking_signature: Option<String>,
301}
302
303impl LlmMessage {
304    /// Create a message with text content
305    pub fn text(role: LlmMessageRole, content: impl Into<String>) -> Self {
306        Self {
307            role,
308            content: LlmMessageContent::Text(content.into()),
309            tool_calls: None,
310            tool_call_id: None,
311            phase: None,
312            thinking: None,
313            thinking_signature: None,
314        }
315    }
316
317    /// Create a message with content parts (text, images, audio)
318    pub fn parts(role: LlmMessageRole, parts: Vec<LlmContentPart>) -> Self {
319        Self {
320            role,
321            content: LlmMessageContent::Parts(parts),
322            tool_calls: None,
323            tool_call_id: None,
324            phase: None,
325            thinking: None,
326            thinking_signature: None,
327        }
328    }
329
330    /// Get content as plain text string (for simple cases)
331    pub fn content_as_text(&self) -> String {
332        self.content.to_text()
333    }
334
335    /// Prepend a prefix to the first text content.
336    ///
337    /// Used by ReasonAtom to inject external actor identity (e.g. `"[Alice] "`)
338    /// into user messages from external channels.
339    pub fn prepend_text_prefix(&mut self, prefix: &str) {
340        match &mut self.content {
341            LlmMessageContent::Text(text) => {
342                *text = format!("{}{}", prefix, text);
343            }
344            LlmMessageContent::Parts(parts) => {
345                for part in parts.iter_mut() {
346                    if let LlmContentPart::Text { text } = part {
347                        *text = format!("{}{}", prefix, text);
348                        return;
349                    }
350                }
351                // No text part found — prepend one
352                parts.insert(
353                    0,
354                    LlmContentPart::Text {
355                        text: prefix.to_string(),
356                    },
357                );
358            }
359        }
360    }
361}
362
363/// Message content - either a simple string or array of content parts
364#[derive(Debug, Clone)]
365pub enum LlmMessageContent {
366    /// Simple text content
367    Text(String),
368    /// Array of content parts (text, images, audio)
369    Parts(Vec<LlmContentPart>),
370}
371
372impl LlmMessageContent {
373    /// Convert to plain text (concatenates text parts, ignores media)
374    pub fn to_text(&self) -> String {
375        match self {
376            LlmMessageContent::Text(s) => s.clone(),
377            LlmMessageContent::Parts(parts) => parts
378                .iter()
379                .filter_map(|p| match p {
380                    LlmContentPart::Text { text } => Some(text.clone()),
381                    _ => None,
382                })
383                .collect::<Vec<_>>()
384                .join(""),
385        }
386    }
387
388    /// Check if content is simple text
389    pub fn is_text(&self) -> bool {
390        matches!(self, LlmMessageContent::Text(_))
391    }
392
393    /// Check if content has multiple parts
394    pub fn is_parts(&self) -> bool {
395        matches!(self, LlmMessageContent::Parts(_))
396    }
397}
398
399impl From<String> for LlmMessageContent {
400    fn from(s: String) -> Self {
401        LlmMessageContent::Text(s)
402    }
403}
404
405impl From<&str> for LlmMessageContent {
406    fn from(s: &str) -> Self {
407        LlmMessageContent::Text(s.to_string())
408    }
409}
410
411/// A single content part within a message
412#[derive(Debug, Clone)]
413pub enum LlmContentPart {
414    /// Text content
415    Text { text: String },
416    /// Image content (base64 data URL or HTTP URL)
417    Image { url: String },
418    /// Audio content (base64 data URL)
419    Audio { url: String },
420}
421
422impl LlmContentPart {
423    /// Create a text content part
424    pub fn text(text: impl Into<String>) -> Self {
425        LlmContentPart::Text { text: text.into() }
426    }
427
428    /// Create an image content part from URL (can be data URL or HTTP URL)
429    pub fn image(url: impl Into<String>) -> Self {
430        LlmContentPart::Image { url: url.into() }
431    }
432
433    /// Create an audio content part from URL (typically a data URL)
434    pub fn audio(url: impl Into<String>) -> Self {
435        LlmContentPart::Audio { url: url.into() }
436    }
437}
438
439/// Message role for LLM calls
440#[derive(Debug, Clone, PartialEq, Eq)]
441pub enum LlmMessageRole {
442    System,
443    User,
444    Assistant,
445    Tool,
446}
447
448// ============================================================================
449// Configuration and Response Types
450// ============================================================================
451
452/// Configuration for tool_search (deferred tool loading).
453///
454/// When enabled, the driver groups tools into namespaces and marks them with
455/// `defer_loading: true` so the model only loads full schemas on-demand.
456/// This reduces token usage for agents with many tools.
457#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
458pub struct ToolSearchConfig {
459    /// Enable tool_search for this request (requires model support)
460    pub enabled: bool,
461    /// Minimum number of tools before activating tool_search.
462    /// Below this threshold, full schemas are sent even when enabled.
463    pub threshold: usize,
464}
465
466/// Strategy for prompt caching.
467#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
468#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
469#[serde(rename_all = "snake_case")]
470pub enum PromptCacheStrategy {
471    /// Let each driver choose the safest provider-specific behavior.
472    #[default]
473    Auto,
474}
475
476/// Configuration for prompt caching.
477///
478/// Drivers translate this into provider-specific request options when possible.
479/// Unsupported providers or models should ignore it without failing the call.
480#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
481#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
482pub struct PromptCacheConfig {
483    /// Enable prompt caching for this request.
484    pub enabled: bool,
485    /// Strategy the driver should use when enabling prompt caching.
486    #[serde(default)]
487    pub strategy: PromptCacheStrategy,
488    /// Existing Gemini cached content resource name (`cachedContents/{id}`).
489    ///
490    /// When set, the Gemini driver uses explicit caching via the
491    /// `cachedContent` request field. When absent, Gemini falls back to its
492    /// default provider behavior (for example implicit caching on supported
493    /// models).
494    #[serde(default, skip_serializing_if = "Option::is_none")]
495    pub gemini_cached_content: Option<String>,
496}
497
498/// OpenRouter model fallback and provider routing controls.
499///
500/// These fields mirror OpenRouter's request-level routing extensions. Drivers
501/// must only forward this config to OpenRouter-compatible endpoints.
502#[derive(Debug, Clone, Default, PartialEq, serde::Serialize, serde::Deserialize)]
503#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
504pub struct OpenRouterRoutingConfig {
505    /// Candidate models to try in OpenRouter's fallback order.
506    #[serde(default, skip_serializing_if = "Vec::is_empty")]
507    pub models: Vec<String>,
508    /// OpenRouter route strategy. Currently `fallback` is the stable route
509    /// value used with `models`.
510    #[serde(default, skip_serializing_if = "Option::is_none")]
511    pub route: Option<OpenRouterRoute>,
512    /// Provider ordering, policy, and sorting preferences.
513    #[serde(default, skip_serializing_if = "Option::is_none")]
514    pub provider: Option<OpenRouterProviderRouting>,
515}
516
517impl OpenRouterRoutingConfig {
518    pub fn is_empty(&self) -> bool {
519        self.models.is_empty() && self.route.is_none() && self.provider.is_none()
520    }
521
522    /// Build an ordered model-fallback routing config.
523    pub fn fallback_models(models: impl IntoIterator<Item = impl Into<String>>) -> Self {
524        let models = models.into_iter().map(Into::into).collect::<Vec<_>>();
525        let route = (!models.is_empty()).then_some(OpenRouterRoute::Fallback);
526        Self {
527            models,
528            route,
529            provider: None,
530        }
531    }
532
533    pub fn validate_for_primary_model(
534        &self,
535        primary_model: &str,
536    ) -> std::result::Result<(), String> {
537        if self.route == Some(OpenRouterRoute::Fallback) && self.models.is_empty() {
538            return Err(
539                "OpenRouter fallback routing requires at least one model in `models`".to_string(),
540            );
541        }
542
543        if let Some(first_model) = self.models.first()
544            && first_model != primary_model
545        {
546            return Err(format!(
547                "OpenRouter routing models[0] ('{first_model}') must match primary model ('{primary_model}')"
548            ));
549        }
550
551        Ok(())
552    }
553}
554
555/// OpenRouter route strategy.
556#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
557#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
558#[serde(rename_all = "snake_case")]
559pub enum OpenRouterRoute {
560    Fallback,
561}
562
563/// OpenRouter provider routing preferences.
564#[derive(Debug, Clone, Default, PartialEq, serde::Serialize, serde::Deserialize)]
565#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
566pub struct OpenRouterProviderRouting {
567    /// Provider slugs to try first, in order.
568    #[serde(default, skip_serializing_if = "Vec::is_empty")]
569    pub order: Vec<String>,
570    /// Restrict routing to these provider slugs.
571    #[serde(default, skip_serializing_if = "Vec::is_empty")]
572    pub only: Vec<String>,
573    /// Provider slugs to skip.
574    #[serde(default, skip_serializing_if = "Vec::is_empty")]
575    pub ignore: Vec<String>,
576    /// Whether OpenRouter may fall back outside the ordered/allowed providers.
577    #[serde(default, skip_serializing_if = "Option::is_none")]
578    pub allow_fallbacks: Option<bool>,
579    /// Require routed providers to support all request parameters.
580    #[serde(default, skip_serializing_if = "Option::is_none")]
581    pub require_parameters: Option<bool>,
582    /// Restrict routing by provider data-retention policy.
583    #[serde(default, skip_serializing_if = "Option::is_none")]
584    pub data_collection: Option<OpenRouterDataCollection>,
585    /// Restrict routing to zero-data-retention endpoints.
586    #[serde(default, skip_serializing_if = "Option::is_none")]
587    pub zdr: Option<bool>,
588    /// Restrict routing to distillable-text endpoints.
589    #[serde(default, skip_serializing_if = "Option::is_none")]
590    pub enforce_distillable_text: Option<bool>,
591    /// Restrict routing to provider quantization levels.
592    #[serde(default, skip_serializing_if = "Vec::is_empty")]
593    pub quantizations: Vec<String>,
594    /// Sort provider endpoints by price, throughput, or latency.
595    #[serde(default, skip_serializing_if = "Option::is_none")]
596    pub sort: Option<OpenRouterProviderSort>,
597    /// Maximum accepted per-unit provider price.
598    #[serde(default, skip_serializing_if = "Option::is_none")]
599    pub max_price: Option<OpenRouterMaxPrice>,
600}
601
602impl OpenRouterProviderRouting {
603    pub fn is_empty(&self) -> bool {
604        self.order.is_empty()
605            && self.only.is_empty()
606            && self.ignore.is_empty()
607            && self.allow_fallbacks.is_none()
608            && self.require_parameters.is_none()
609            && self.data_collection.is_none()
610            && self.zdr.is_none()
611            && self.enforce_distillable_text.is_none()
612            && self.quantizations.is_empty()
613            && self.sort.is_none()
614            && self.max_price.is_none()
615    }
616}
617
618/// OpenRouter provider data-retention preference.
619#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
620#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
621#[serde(rename_all = "snake_case")]
622pub enum OpenRouterDataCollection {
623    Allow,
624    Deny,
625}
626
627/// OpenRouter provider sort preference.
628#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
629#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
630#[serde(untagged)]
631pub enum OpenRouterProviderSort {
632    Simple(OpenRouterProviderSortBy),
633    Advanced(OpenRouterProviderSortOptions),
634}
635
636/// OpenRouter provider sorting dimension.
637#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
638#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
639#[serde(rename_all = "snake_case")]
640pub enum OpenRouterProviderSortBy {
641    Price,
642    Throughput,
643    Latency,
644}
645
646/// OpenRouter advanced provider sort options.
647#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
648#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
649pub struct OpenRouterProviderSortOptions {
650    pub by: OpenRouterProviderSortBy,
651    #[serde(default, skip_serializing_if = "Option::is_none")]
652    pub partition: Option<OpenRouterSortPartition>,
653}
654
655/// How OpenRouter sorts endpoints when multiple fallback models are present.
656#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
657#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
658#[serde(rename_all = "snake_case")]
659pub enum OpenRouterSortPartition {
660    Model,
661    None,
662}
663
664/// Maximum accepted OpenRouter provider pricing, expressed in dollars per
665/// million prompt/completion tokens or per request/image where supported.
666#[derive(Debug, Clone, Default, PartialEq, serde::Serialize, serde::Deserialize)]
667#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
668pub struct OpenRouterMaxPrice {
669    #[serde(default, skip_serializing_if = "Option::is_none")]
670    pub prompt: Option<f64>,
671    #[serde(default, skip_serializing_if = "Option::is_none")]
672    pub completion: Option<f64>,
673    #[serde(default, skip_serializing_if = "Option::is_none")]
674    pub request: Option<f64>,
675    #[serde(default, skip_serializing_if = "Option::is_none")]
676    pub image: Option<f64>,
677}
678
679/// Configuration for an LLM call
680#[derive(Debug, Clone)]
681pub struct LlmCallConfig {
682    pub model: String,
683    pub temperature: Option<f32>,
684    pub max_tokens: Option<u32>,
685    pub tools: Vec<ToolDefinition>,
686    /// Reasoning effort level (for models that support it: low, medium, high)
687    pub reasoning_effort: Option<String>,
688    /// Metadata to send with the API request for tracking and debugging.
689    /// Keys and values are strings. Both OpenAI and Anthropic support metadata fields.
690    /// Typically includes: session_id, agent_id, org_id, turn_id, exec_id.
691    pub metadata: HashMap<String, String>,
692    /// Previous response ID for stateful continuation (OpenAI Responses API).
693    /// When set, the provider can skip re-encoding cached context.
694    pub previous_response_id: Option<String>,
695    /// Tool search configuration for deferred tool loading
696    pub tool_search: Option<ToolSearchConfig>,
697    /// Prompt caching configuration for provider-specific cache controls.
698    pub prompt_cache: Option<PromptCacheConfig>,
699    /// OpenRouter-only model fallback and provider routing controls.
700    pub openrouter_routing: Option<OpenRouterRoutingConfig>,
701}
702
703impl From<&RuntimeAgent> for LlmCallConfig {
704    fn from(runtime_agent: &RuntimeAgent) -> Self {
705        Self {
706            model: runtime_agent.model.clone(),
707            temperature: runtime_agent.temperature,
708            max_tokens: runtime_agent.max_tokens,
709            tools: runtime_agent.tools.clone(),
710            reasoning_effort: None, // Set by ReasonAtom from user message controls
711            metadata: HashMap::new(), // Set by ReasonAtom with session/agent context
712            previous_response_id: None,
713            tool_search: runtime_agent.tool_search.clone(),
714            prompt_cache: runtime_agent.prompt_cache.clone(),
715            openrouter_routing: None,
716        }
717    }
718}
719
720/// Response from an LLM call (non-streaming)
721#[derive(Debug, Clone)]
722pub struct LlmResponse {
723    pub text: String,
724    /// Thinking content from extended thinking models (e.g., Claude with thinking enabled)
725    pub thinking: Option<String>,
726    /// Cryptographic signature for thinking content (Anthropic Claude)
727    pub thinking_signature: Option<String>,
728    pub tool_calls: Option<Vec<ToolCall>>,
729    pub metadata: LlmCompletionMetadata,
730}
731
732/// Builder for LlmCallConfig with fluent API
733///
734/// Use `from(&runtime_agent)` to start building from a RuntimeAgent, then chain
735/// methods like `reasoning_effort()`, `temperature()`, etc. Call `build()`
736/// to get the final config.
737///
738/// # Example
739///
740/// ```ignore
741/// use everruns_core::llm::LlmCallConfigBuilder;
742/// use everruns_core::runtime_agent::RuntimeAgent;
743///
744/// let runtime_agent = RuntimeAgent::new("You are helpful", "gpt-4o");
745/// let llm_config = LlmCallConfigBuilder::from(&runtime_agent)
746///     .reasoning_effort("high")
747///     .temperature(0.7)
748///     .build();
749/// ```
750pub struct LlmCallConfigBuilder {
751    config: LlmCallConfig,
752}
753
754impl LlmCallConfigBuilder {
755    /// Start building from a RuntimeAgent
756    pub fn from(runtime_agent: &RuntimeAgent) -> Self {
757        Self {
758            config: LlmCallConfig::from(runtime_agent),
759        }
760    }
761
762    /// Set reasoning effort level (for models that support it: low, medium, high)
763    pub fn reasoning_effort(mut self, effort: impl Into<String>) -> Self {
764        self.config.reasoning_effort = Some(effort.into());
765        self
766    }
767
768    /// Set the model
769    pub fn model(mut self, model: impl Into<String>) -> Self {
770        self.config.model = model.into();
771        self
772    }
773
774    /// Set temperature
775    pub fn temperature(mut self, temp: f32) -> Self {
776        self.config.temperature = Some(temp);
777        self
778    }
779
780    /// Set max tokens
781    pub fn max_tokens(mut self, tokens: u32) -> Self {
782        self.config.max_tokens = Some(tokens);
783        self
784    }
785
786    /// Set tools
787    pub fn tools(mut self, tools: Vec<ToolDefinition>) -> Self {
788        self.config.tools = tools;
789        self
790    }
791
792    /// Set metadata for API tracking
793    ///
794    /// This metadata is sent to the LLM provider for tracking and debugging.
795    /// Typically includes session_id, agent_id, org_id, turn_id, exec_id.
796    pub fn metadata(mut self, metadata: HashMap<String, String>) -> Self {
797        self.config.metadata = metadata;
798        self
799    }
800
801    /// Add a single metadata key-value pair
802    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
803        self.config.metadata.insert(key.into(), value.into());
804        self
805    }
806
807    /// Set previous response ID for stateful continuation
808    pub fn previous_response_id(mut self, id: Option<String>) -> Self {
809        self.config.previous_response_id = id;
810        self
811    }
812
813    /// Set tool_search configuration
814    pub fn tool_search(mut self, config: ToolSearchConfig) -> Self {
815        self.config.tool_search = Some(config);
816        self
817    }
818
819    /// Set prompt caching configuration
820    pub fn prompt_cache(mut self, config: PromptCacheConfig) -> Self {
821        self.config.prompt_cache = Some(config);
822        self
823    }
824
825    /// Set OpenRouter model fallback and provider routing controls.
826    pub fn openrouter_routing(mut self, config: OpenRouterRoutingConfig) -> Self {
827        self.config.openrouter_routing = (!config.is_empty()).then_some(config);
828        self
829    }
830
831    /// Build the configuration
832    pub fn build(self) -> LlmCallConfig {
833        self.config
834    }
835}
836
837// ============================================================================
838// Conversion from Message
839// ============================================================================
840
841impl From<&crate::message::Message> for LlmMessage {
842    /// Convert a Message to LlmMessage (text-only, images become placeholders)
843    ///
844    /// This conversion is suitable for messages without images or when image
845    /// resolution is not available. For multimodal messages, use
846    /// `LlmMessage::from_message_with_images()` instead.
847    fn from(msg: &crate::message::Message) -> Self {
848        let role = match msg.role {
849            crate::message::MessageRole::System => LlmMessageRole::System,
850            crate::message::MessageRole::User => LlmMessageRole::User,
851            crate::message::MessageRole::Agent => LlmMessageRole::Assistant,
852            crate::message::MessageRole::ToolResult => LlmMessageRole::Tool,
853        };
854
855        // Convert tool calls from ContentPart format to ToolCall format
856        let tool_calls: Vec<ToolCall> = msg
857            .tool_calls()
858            .into_iter()
859            .map(|tc| ToolCall {
860                id: tc.id.clone(),
861                name: tc.name.clone(),
862                arguments: tc.arguments.clone(),
863            })
864            .collect();
865
866        LlmMessage {
867            role,
868            content: LlmMessageContent::Text(msg.content_to_llm_string()),
869            tool_calls: if tool_calls.is_empty() {
870                None
871            } else {
872                Some(tool_calls)
873            },
874            tool_call_id: msg.tool_call_id().map(|s| s.to_string()),
875            phase: msg.phase,
876            thinking: msg.thinking.clone(),
877            thinking_signature: msg.thinking_signature.clone(),
878        }
879    }
880}
881
882// ============================================================================
883// Message Conversion with Images
884// ============================================================================
885
886use crate::traits::ResolvedImage;
887use uuid::Uuid;
888
889impl LlmMessage {
890    /// Convert a Message to LlmMessage with resolved images
891    ///
892    /// This method handles multimodal messages by converting:
893    /// - `text` content parts → `LlmContentPart::Text`
894    /// - `image` content parts → `LlmContentPart::Image` (data URL)
895    /// - `image_file` content parts → `LlmContentPart::Image` (resolved to data URL)
896    /// - `tool_call` content parts → extracted to `tool_calls` field
897    /// - `tool_result` content parts → text representation
898    ///
899    /// # Provider-specific formatting
900    ///
901    /// The `LlmContentPart::Image` uses data URLs which are converted by each provider:
902    /// - **OpenAI**: `{ "type": "image_url", "image_url": { "url": "data:..." } }`
903    /// - **Anthropic**: `{ "type": "image", "source": { "type": "base64", ... } }`
904    ///
905    /// # Arguments
906    ///
907    /// * `msg` - The message to convert
908    /// * `resolved_images` - Pre-resolved images keyed by image_id
909    pub fn from_message_with_images(
910        msg: &crate::message::Message,
911        resolved_images: &HashMap<Uuid, ResolvedImage>,
912    ) -> Self {
913        use crate::message::{ContentPart, MessageRole};
914
915        let role = match msg.role {
916            MessageRole::System => LlmMessageRole::System,
917            MessageRole::User => LlmMessageRole::User,
918            MessageRole::Agent => LlmMessageRole::Assistant,
919            MessageRole::ToolResult => LlmMessageRole::Tool,
920        };
921
922        // Convert content parts to LlmContentParts
923        let mut parts: Vec<LlmContentPart> = Vec::new();
924        let mut tool_calls: Vec<ToolCall> = Vec::new();
925
926        for part in &msg.content {
927            match part {
928                ContentPart::Text(t) => {
929                    parts.push(LlmContentPart::Text {
930                        text: t.text.clone(),
931                    });
932                }
933                ContentPart::Image(img) => {
934                    // Convert inline image to data URL
935                    if let Some(url) = &img.url {
936                        parts.push(LlmContentPart::Image { url: url.clone() });
937                    } else if let (Some(base64), Some(media_type)) = (&img.base64, &img.media_type)
938                    {
939                        let data_url = format!("data:{};base64,{}", media_type, base64);
940                        parts.push(LlmContentPart::Image { url: data_url });
941                    }
942                }
943                ContentPart::ImageFile(img_file) => {
944                    // Resolve image_file to actual image data
945                    if let Some(resolved) = resolved_images.get(&img_file.image_id.uuid()) {
946                        parts.push(LlmContentPart::Image {
947                            url: resolved.to_data_url(),
948                        });
949                    } else {
950                        // Image not found - add placeholder text
951                        parts.push(LlmContentPart::Text {
952                            text: format!("[Image not found: {}]", img_file.image_id),
953                        });
954                    }
955                }
956                ContentPart::ToolCall(tc) => {
957                    // Extract tool calls to separate field (don't include in content)
958                    tool_calls.push(ToolCall {
959                        id: tc.id.clone(),
960                        name: tc.name.clone(),
961                        arguments: tc.arguments.clone(),
962                    });
963                }
964                ContentPart::ToolResult(tr) => {
965                    // Convert tool result to text representation
966                    let text = if let Some(err) = &tr.error {
967                        format!("Tool error: {}", err)
968                    } else if let Some(res) = &tr.result {
969                        serde_json::to_string(res).unwrap_or_else(|_| "{}".to_string())
970                    } else {
971                        "{}".to_string()
972                    };
973                    // Primary hard limit enforced by OutputHardLimitHook (EVE-225)
974                    // at tool execution time. This backstop catches tool results
975                    // that bypass ActAtom hooks (client-submitted, stored events).
976                    let text = truncate_tool_result(text);
977                    parts.push(LlmContentPart::Text { text });
978                }
979            }
980        }
981
982        // Determine content format
983        let content = if parts.len() == 1 && matches!(&parts[0], LlmContentPart::Text { .. }) {
984            // Single text part - use simple Text format
985            if let LlmContentPart::Text { text } = &parts[0] {
986                LlmMessageContent::Text(text.clone())
987            } else {
988                LlmMessageContent::Parts(parts)
989            }
990        } else if parts.is_empty() {
991            // No content parts - use empty text
992            LlmMessageContent::Text(String::new())
993        } else {
994            // Multiple parts or non-text - use Parts format
995            LlmMessageContent::Parts(parts)
996        };
997
998        LlmMessage {
999            role,
1000            content,
1001            tool_calls: if tool_calls.is_empty() {
1002                None
1003            } else {
1004                Some(tool_calls)
1005            },
1006            tool_call_id: msg.tool_call_id().map(|s| s.to_string()),
1007            phase: msg.phase,
1008            thinking: msg.thinking.clone(),
1009            thinking_signature: msg.thinking_signature.clone(),
1010        }
1011    }
1012
1013    /// Check if a message contains image_file references that need resolution
1014    pub fn message_has_image_files(msg: &crate::message::Message) -> bool {
1015        msg.content.iter().any(|p| p.is_image_file())
1016    }
1017
1018    /// Extract all image_file IDs from a message
1019    pub fn extract_image_file_ids(msg: &crate::message::Message) -> Vec<Uuid> {
1020        msg.content
1021            .iter()
1022            .filter_map(|p| match p {
1023                crate::message::ContentPart::ImageFile(f) => Some(f.image_id.uuid()),
1024                _ => None,
1025            })
1026            .collect()
1027    }
1028}
1029
1030// ============================================================================
1031// Driver Factory Types
1032// ============================================================================
1033
1034/// Provider type enumeration matching the database/contracts
1035#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1036pub enum ProviderType {
1037    /// OpenAI using Open Responses API (<https://www.openresponses.org/>)
1038    /// This is the recommended API for new projects.
1039    OpenAI,
1040    /// OpenRouter using the OpenAI-compatible Responses API.
1041    OpenRouter,
1042    /// Azure OpenAI using the Azure-hosted OpenAI v1 API.
1043    AzureOpenAI,
1044    /// OpenAI using Chat Completions API (for backward compatibility)
1045    /// Use this if you need the legacy /v1/chat/completions endpoint.
1046    OpenAICompletions,
1047    Anthropic,
1048    /// Google Gemini API
1049    Gemini,
1050    /// LLM simulator for testing (uses llmsim crate)
1051    LlmSim,
1052    /// AWS Bedrock Runtime (ConverseStream API)
1053    Bedrock,
1054}
1055
1056impl std::str::FromStr for ProviderType {
1057    type Err = String;
1058
1059    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
1060        match s.to_lowercase().as_str() {
1061            "openai" => Ok(ProviderType::OpenAI),
1062            "openrouter" => Ok(ProviderType::OpenRouter),
1063            "azure_openai" => Ok(ProviderType::AzureOpenAI),
1064            "openai_completions" => Ok(ProviderType::OpenAICompletions),
1065            "anthropic" => Ok(ProviderType::Anthropic),
1066            "gemini" => Ok(ProviderType::Gemini),
1067            "llmsim" => Ok(ProviderType::LlmSim),
1068            "bedrock" => Ok(ProviderType::Bedrock),
1069            _ => Err(format!("Unknown provider type: {}", s)),
1070        }
1071    }
1072}
1073
1074impl std::fmt::Display for ProviderType {
1075    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1076        match self {
1077            ProviderType::OpenAI => write!(f, "openai"),
1078            ProviderType::OpenRouter => write!(f, "openrouter"),
1079            ProviderType::AzureOpenAI => write!(f, "azure_openai"),
1080            ProviderType::OpenAICompletions => write!(f, "openai_completions"),
1081            ProviderType::Anthropic => write!(f, "anthropic"),
1082            ProviderType::Gemini => write!(f, "gemini"),
1083            ProviderType::LlmSim => write!(f, "llmsim"),
1084            ProviderType::Bedrock => write!(f, "bedrock"),
1085        }
1086    }
1087}
1088
1089/// Configuration for creating an LLM provider
1090#[derive(Debug, Clone)]
1091pub struct ProviderConfig {
1092    /// Type of provider
1093    pub provider_type: ProviderType,
1094    /// API key for authentication
1095    pub api_key: Option<String>,
1096    /// Base URL override (optional)
1097    pub base_url: Option<String>,
1098}
1099
1100impl ProviderConfig {
1101    /// Create a new provider config
1102    pub fn new(provider_type: ProviderType) -> Self {
1103        Self {
1104            provider_type,
1105            api_key: None,
1106            base_url: None,
1107        }
1108    }
1109
1110    /// Set the API key
1111    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
1112        self.api_key = Some(api_key.into());
1113        self
1114    }
1115
1116    /// Set the base URL
1117    pub fn with_base_url(mut self, base_url: impl Into<String>) -> Self {
1118        self.base_url = Some(base_url.into());
1119        self
1120    }
1121}
1122
1123impl From<crate::llm_models::LlmProviderType> for ProviderType {
1124    fn from(provider_type: crate::llm_models::LlmProviderType) -> Self {
1125        use crate::llm_models::LlmProviderType;
1126        match provider_type {
1127            LlmProviderType::Openai => ProviderType::OpenAI,
1128            LlmProviderType::Openrouter => ProviderType::OpenRouter,
1129            LlmProviderType::AzureOpenai => ProviderType::AzureOpenAI,
1130            LlmProviderType::OpenaiCompletions => ProviderType::OpenAICompletions,
1131            LlmProviderType::Anthropic => ProviderType::Anthropic,
1132            LlmProviderType::Gemini => ProviderType::Gemini,
1133            LlmProviderType::LlmSim => ProviderType::LlmSim,
1134            LlmProviderType::Bedrock => ProviderType::Bedrock,
1135        }
1136    }
1137}
1138
1139impl From<&crate::traits::ModelWithProvider> for ProviderConfig {
1140    fn from(model: &crate::traits::ModelWithProvider) -> Self {
1141        Self {
1142            provider_type: model.provider_type.clone().into(),
1143            api_key: model.api_key.clone(),
1144            base_url: model.base_url.clone(),
1145        }
1146    }
1147}
1148
1149/// Boxed LLM driver for dynamic dispatch
1150pub type BoxedLlmDriver = Box<dyn LlmDriver>;
1151
1152// ============================================================================
1153// Driver Registry
1154// ============================================================================
1155
1156/// Factory function type for creating LLM drivers
1157///
1158/// Takes api_key and optional base_url, returns a boxed driver
1159pub type DriverFactory = Arc<dyn Fn(&str, Option<&str>) -> BoxedLlmDriver + Send + Sync>;
1160
1161/// Registry for LLM drivers
1162///
1163/// Enables dependency inversion: provider crates (everruns-anthropic, everruns-openai)
1164/// register their drivers at startup. The core has no direct knowledge of implementations.
1165///
1166/// # Example
1167///
1168/// ```ignore
1169/// use everruns_core::llm_drivers::{DriverRegistry, ProviderType};
1170/// use everruns_anthropic::register_driver;
1171/// use everruns_openai::register_driver as register_openai;
1172///
1173/// let mut registry = DriverRegistry::new();
1174/// everruns_anthropic::register_driver(&mut registry);
1175/// everruns_openai::register_driver(&mut registry);
1176///
1177/// // Later, create a driver from config
1178/// let driver = registry.create_driver(&config)?;
1179/// ```
1180#[derive(Clone, Default)]
1181pub struct DriverRegistry {
1182    factories: HashMap<ProviderType, DriverFactory>,
1183}
1184
1185impl DriverRegistry {
1186    /// Create a new empty registry
1187    pub fn new() -> Self {
1188        Self {
1189            factories: HashMap::new(),
1190        }
1191    }
1192
1193    /// Register a driver factory for a provider type
1194    pub fn register<F>(&mut self, provider_type: ProviderType, factory: F)
1195    where
1196        F: Fn(&str, Option<&str>) -> BoxedLlmDriver + Send + Sync + 'static,
1197    {
1198        self.factories.insert(provider_type, Arc::new(factory));
1199    }
1200
1201    /// Create an LLM driver based on configuration
1202    ///
1203    /// API keys must be provided in the config for real providers. This function does NOT fall back to
1204    /// environment variables. Keys should be decrypted from the database and passed here.
1205    /// Exception: LlmSim provider does not require an API key.
1206    ///
1207    /// Returns `DriverNotRegistered` error if no driver is registered for the provider type.
1208    pub fn create_driver(&self, config: &ProviderConfig) -> Result<BoxedLlmDriver> {
1209        // API key is required for real providers, but not for LlmSim (testing)
1210        let api_key = if config.provider_type == ProviderType::LlmSim {
1211            // LlmSim doesn't need a real API key
1212            config.api_key.as_deref().unwrap_or("")
1213        } else {
1214            config.api_key.as_ref().ok_or_else(|| {
1215                AgentLoopError::llm(
1216                    "API key is required. Configure the API key in provider settings.",
1217                )
1218            })?
1219        };
1220
1221        // Look up the factory for this provider type
1222        let factory = self.factories.get(&config.provider_type).ok_or_else(|| {
1223            AgentLoopError::driver_not_registered(config.provider_type.to_string())
1224        })?;
1225
1226        // Create the driver using the factory
1227        Ok(factory(api_key, config.base_url.as_deref()))
1228    }
1229
1230    /// Check if a driver is registered for a provider type
1231    pub fn has_driver(&self, provider_type: &ProviderType) -> bool {
1232        self.factories.contains_key(provider_type)
1233    }
1234
1235    /// Get the list of registered provider types
1236    pub fn registered_providers(&self) -> Vec<ProviderType> {
1237        self.factories.keys().cloned().collect()
1238    }
1239}
1240
1241/// Maximum tool result size in bytes before truncation (64 KiB).
1242/// Defense-in-depth backstop for tool results that bypass ActAtom hooks
1243/// (e.g. client-submitted or stored events). The primary hard limit is
1244/// enforced by `OutputHardLimitHook` (EVE-225) at tool execution time.
1245const MAX_TOOL_RESULT_BYTES: usize = 64 * 1024;
1246
1247const TRUNCATION_SUFFIX: &str =
1248    "\n\n[Output truncated — exceeded 64 KiB limit. Try quiet flags, pipes, or redirect to file.]";
1249
1250fn truncate_tool_result(text: String) -> String {
1251    if text.len() <= MAX_TOOL_RESULT_BYTES {
1252        return text;
1253    }
1254    let content_budget = MAX_TOOL_RESULT_BYTES.saturating_sub(TRUNCATION_SUFFIX.len());
1255    let mut end = content_budget;
1256    while end > 0 && !text.is_char_boundary(end) {
1257        end -= 1;
1258    }
1259    let mut truncated = text[..end].to_string();
1260    truncated.push_str(TRUNCATION_SUFFIX);
1261    truncated
1262}
1263
1264// ============================================================================
1265// Tests
1266// ============================================================================
1267
1268#[cfg(test)]
1269mod tests {
1270    use super::*;
1271
1272    #[test]
1273    fn test_llm_call_config_builder_from_runtime_agent() {
1274        let runtime_agent = RuntimeAgent::new("You are helpful", "gpt-4o");
1275        let llm_config = LlmCallConfigBuilder::from(&runtime_agent).build();
1276
1277        assert_eq!(llm_config.model, "gpt-4o");
1278        assert!(llm_config.reasoning_effort.is_none());
1279        assert!(llm_config.temperature.is_none());
1280        assert!(llm_config.max_tokens.is_none());
1281        assert!(llm_config.tools.is_empty());
1282        assert!(llm_config.metadata.is_empty());
1283    }
1284
1285    #[test]
1286    fn test_llm_call_config_builder_with_metadata() {
1287        let runtime_agent = RuntimeAgent::new("You are helpful", "gpt-4o");
1288        let llm_config = LlmCallConfigBuilder::from(&runtime_agent)
1289            .with_metadata("session_id", "session_abc123")
1290            .with_metadata("agent_id", "agent_xyz789")
1291            .build();
1292
1293        assert_eq!(
1294            llm_config.metadata.get("session_id"),
1295            Some(&"session_abc123".to_string())
1296        );
1297        assert_eq!(
1298            llm_config.metadata.get("agent_id"),
1299            Some(&"agent_xyz789".to_string())
1300        );
1301    }
1302
1303    #[test]
1304    fn test_llm_call_config_builder_with_metadata_hashmap() {
1305        let runtime_agent = RuntimeAgent::new("You are helpful", "gpt-4o");
1306        let mut metadata = HashMap::new();
1307        metadata.insert("key1".to_string(), "value1".to_string());
1308        metadata.insert("key2".to_string(), "value2".to_string());
1309
1310        let llm_config = LlmCallConfigBuilder::from(&runtime_agent)
1311            .metadata(metadata)
1312            .build();
1313
1314        assert_eq!(llm_config.metadata.get("key1"), Some(&"value1".to_string()));
1315        assert_eq!(llm_config.metadata.get("key2"), Some(&"value2".to_string()));
1316    }
1317
1318    #[test]
1319    fn test_llm_call_config_builder_with_reasoning_effort() {
1320        let runtime_agent = RuntimeAgent::new("You are helpful", "gpt-4o");
1321        let llm_config = LlmCallConfigBuilder::from(&runtime_agent)
1322            .reasoning_effort("high")
1323            .build();
1324
1325        assert_eq!(llm_config.reasoning_effort, Some("high".to_string()));
1326    }
1327
1328    #[test]
1329    fn test_llm_call_config_builder_with_all_options() {
1330        let runtime_agent = RuntimeAgent::new("You are helpful", "gpt-4o");
1331        let llm_config = LlmCallConfigBuilder::from(&runtime_agent)
1332            .model("claude-3-opus")
1333            .reasoning_effort("medium")
1334            .temperature(0.7)
1335            .max_tokens(1000)
1336            .build();
1337
1338        assert_eq!(llm_config.model, "claude-3-opus");
1339        assert_eq!(llm_config.reasoning_effort, Some("medium".to_string()));
1340        assert_eq!(llm_config.temperature, Some(0.7));
1341        assert_eq!(llm_config.max_tokens, Some(1000));
1342    }
1343
1344    #[test]
1345    fn test_llm_call_config_builder_with_openrouter_routing() {
1346        let runtime_agent = RuntimeAgent::new("You are helpful", "openai/gpt-5-mini");
1347        let routing = OpenRouterRoutingConfig::fallback_models([
1348            "openai/gpt-5-mini",
1349            "anthropic/claude-sonnet-4.5",
1350        ]);
1351
1352        let llm_config = LlmCallConfigBuilder::from(&runtime_agent)
1353            .openrouter_routing(routing.clone())
1354            .build();
1355
1356        assert_eq!(llm_config.openrouter_routing, Some(routing));
1357    }
1358
1359    #[test]
1360    fn test_openrouter_fallback_models_empty_is_empty() {
1361        let routing = OpenRouterRoutingConfig::fallback_models(std::iter::empty::<String>());
1362
1363        assert!(routing.is_empty());
1364        assert_eq!(routing.route, None);
1365    }
1366
1367    #[test]
1368    fn test_openrouter_routing_validates_primary_model() {
1369        let routing = OpenRouterRoutingConfig::fallback_models([
1370            "openai/gpt-5-mini",
1371            "anthropic/claude-sonnet-4.5",
1372        ]);
1373
1374        assert!(
1375            routing
1376                .validate_for_primary_model("openai/gpt-5-mini")
1377                .is_ok()
1378        );
1379        let err = routing
1380            .validate_for_primary_model("anthropic/claude-sonnet-4.5")
1381            .unwrap_err();
1382        assert!(err.contains("models[0]"));
1383    }
1384
1385    #[test]
1386    fn test_openrouter_routing_rejects_fallback_without_models() {
1387        let routing = OpenRouterRoutingConfig {
1388            route: Some(OpenRouterRoute::Fallback),
1389            ..Default::default()
1390        };
1391
1392        let err = routing
1393            .validate_for_primary_model("openai/gpt-5-mini")
1394            .unwrap_err();
1395        assert!(err.contains("requires at least one model"));
1396    }
1397
1398    #[test]
1399    fn test_openrouter_routing_serializes_request_fields() {
1400        let routing = OpenRouterRoutingConfig {
1401            models: vec![
1402                "openai/gpt-5-mini".to_string(),
1403                "anthropic/claude-sonnet-4.5".to_string(),
1404            ],
1405            route: Some(OpenRouterRoute::Fallback),
1406            provider: Some(OpenRouterProviderRouting {
1407                order: vec!["anthropic".to_string(), "openai".to_string()],
1408                allow_fallbacks: Some(false),
1409                require_parameters: Some(true),
1410                data_collection: Some(OpenRouterDataCollection::Deny),
1411                zdr: Some(true),
1412                sort: Some(OpenRouterProviderSort::Advanced(
1413                    OpenRouterProviderSortOptions {
1414                        by: OpenRouterProviderSortBy::Throughput,
1415                        partition: Some(OpenRouterSortPartition::None),
1416                    },
1417                )),
1418                max_price: Some(OpenRouterMaxPrice {
1419                    prompt: Some(1.0),
1420                    completion: Some(2.0),
1421                    ..Default::default()
1422                }),
1423                ..Default::default()
1424            }),
1425        };
1426
1427        let json = serde_json::to_value(routing).unwrap();
1428
1429        assert_eq!(
1430            json,
1431            serde_json::json!({
1432                "models": [
1433                    "openai/gpt-5-mini",
1434                    "anthropic/claude-sonnet-4.5"
1435                ],
1436                "route": "fallback",
1437                "provider": {
1438                    "order": ["anthropic", "openai"],
1439                    "allow_fallbacks": false,
1440                    "require_parameters": true,
1441                    "data_collection": "deny",
1442                    "zdr": true,
1443                    "sort": {
1444                        "by": "throughput",
1445                        "partition": "none"
1446                    },
1447                    "max_price": {
1448                        "prompt": 1.0,
1449                        "completion": 2.0
1450                    }
1451                }
1452            })
1453        );
1454    }
1455
1456    #[test]
1457    fn test_provider_type_parsing() {
1458        assert_eq!(
1459            "openai".parse::<ProviderType>().unwrap(),
1460            ProviderType::OpenAI
1461        );
1462        assert_eq!(
1463            "openrouter".parse::<ProviderType>().unwrap(),
1464            ProviderType::OpenRouter
1465        );
1466        assert_eq!(
1467            "openai_completions".parse::<ProviderType>().unwrap(),
1468            ProviderType::OpenAICompletions
1469        );
1470        assert_eq!(
1471            "azure_openai".parse::<ProviderType>().unwrap(),
1472            ProviderType::AzureOpenAI
1473        );
1474        assert_eq!(
1475            "anthropic".parse::<ProviderType>().unwrap(),
1476            ProviderType::Anthropic
1477        );
1478        assert_eq!(
1479            "gemini".parse::<ProviderType>().unwrap(),
1480            ProviderType::Gemini
1481        );
1482        // Ollama and Custom are no longer supported
1483        assert!("ollama".parse::<ProviderType>().is_err());
1484        assert!("custom".parse::<ProviderType>().is_err());
1485    }
1486
1487    #[test]
1488    fn test_provider_type_display() {
1489        assert_eq!(ProviderType::OpenAI.to_string(), "openai");
1490        assert_eq!(ProviderType::OpenRouter.to_string(), "openrouter");
1491        assert_eq!(ProviderType::AzureOpenAI.to_string(), "azure_openai");
1492        assert_eq!(
1493            ProviderType::OpenAICompletions.to_string(),
1494            "openai_completions"
1495        );
1496        assert_eq!(ProviderType::Anthropic.to_string(), "anthropic");
1497        assert_eq!(ProviderType::Gemini.to_string(), "gemini");
1498    }
1499
1500    #[test]
1501    fn test_provider_config_builder() {
1502        let config = ProviderConfig::new(ProviderType::Anthropic)
1503            .with_api_key("test-key")
1504            .with_base_url("https://custom.api.com");
1505
1506        assert_eq!(config.provider_type, ProviderType::Anthropic);
1507        assert_eq!(config.api_key, Some("test-key".to_string()));
1508        assert_eq!(config.base_url, Some("https://custom.api.com".to_string()));
1509    }
1510
1511    #[test]
1512    fn test_driver_registry_requires_api_key() {
1513        // Register a mock factory
1514        let mut registry = DriverRegistry::new();
1515        registry.register(ProviderType::OpenAI, |_api_key, _base_url| {
1516            // Return a mock driver - just need something that compiles
1517            struct MockDriver;
1518            #[async_trait]
1519            impl LlmDriver for MockDriver {
1520                async fn chat_completion_stream(
1521                    &self,
1522                    _messages: Vec<LlmMessage>,
1523                    _config: &LlmCallConfig,
1524                ) -> Result<LlmResponseStream> {
1525                    unimplemented!()
1526                }
1527            }
1528            Box::new(MockDriver)
1529        });
1530
1531        // Driver without API key should fail
1532        let config = ProviderConfig::new(ProviderType::OpenAI);
1533        let result = registry.create_driver(&config);
1534        assert!(result.is_err());
1535
1536        // Driver with API key should succeed
1537        let config_with_key = ProviderConfig::new(ProviderType::OpenAI).with_api_key("test-key");
1538        let result = registry.create_driver(&config_with_key);
1539        assert!(result.is_ok());
1540    }
1541
1542    #[test]
1543    fn test_driver_registry_returns_error_for_unregistered_provider() {
1544        let registry = DriverRegistry::new();
1545        let config = ProviderConfig::new(ProviderType::Anthropic).with_api_key("test-key");
1546
1547        let result = registry.create_driver(&config);
1548
1549        // Should fail with DriverNotRegistered error
1550        if let Err(AgentLoopError::DriverNotRegistered(provider)) = result {
1551            assert_eq!(provider, "anthropic");
1552        } else {
1553            panic!("Expected DriverNotRegistered error");
1554        }
1555    }
1556
1557    #[test]
1558    fn test_driver_registry_registration() {
1559        let mut registry = DriverRegistry::new();
1560
1561        assert!(!registry.has_driver(&ProviderType::OpenAI));
1562        assert!(!registry.has_driver(&ProviderType::Anthropic));
1563
1564        registry.register(ProviderType::OpenAI, |_, _| {
1565            struct MockDriver;
1566            #[async_trait]
1567            impl LlmDriver for MockDriver {
1568                async fn chat_completion_stream(
1569                    &self,
1570                    _messages: Vec<LlmMessage>,
1571                    _config: &LlmCallConfig,
1572                ) -> Result<LlmResponseStream> {
1573                    unimplemented!()
1574                }
1575            }
1576            Box::new(MockDriver)
1577        });
1578
1579        assert!(registry.has_driver(&ProviderType::OpenAI));
1580        assert!(!registry.has_driver(&ProviderType::Anthropic));
1581    }
1582
1583    // ========================================================================
1584    // Image resolution tests
1585    // ========================================================================
1586
1587    use crate::{ContentPart, ImageFileContentPart, Message, MessageRole, TextContentPart};
1588
1589    #[test]
1590    fn test_message_has_image_files_with_image_file() {
1591        let message = Message {
1592            id: uuid::Uuid::new_v4().into(),
1593            role: MessageRole::User,
1594            content: vec![
1595                ContentPart::Text(TextContentPart {
1596                    text: "Look at this image".to_string(),
1597                }),
1598                ContentPart::ImageFile(ImageFileContentPart {
1599                    image_id: uuid::Uuid::new_v4().into(),
1600                    filename: Some("test.png".to_string()),
1601                }),
1602            ],
1603            phase: None,
1604            thinking: None,
1605            thinking_signature: None,
1606            controls: None,
1607            metadata: None,
1608            external_actor: None,
1609            created_at: chrono::Utc::now(),
1610        };
1611
1612        assert!(LlmMessage::message_has_image_files(&message));
1613    }
1614
1615    #[test]
1616    fn test_message_has_image_files_without_image_file() {
1617        let message = Message {
1618            id: uuid::Uuid::new_v4().into(),
1619            role: MessageRole::User,
1620            content: vec![ContentPart::Text(TextContentPart {
1621                text: "Just text".to_string(),
1622            })],
1623            phase: None,
1624            thinking: None,
1625            thinking_signature: None,
1626            controls: None,
1627            metadata: None,
1628            external_actor: None,
1629            created_at: chrono::Utc::now(),
1630        };
1631
1632        assert!(!LlmMessage::message_has_image_files(&message));
1633    }
1634
1635    #[test]
1636    fn test_extract_image_file_ids() {
1637        let id1 = uuid::Uuid::new_v4();
1638        let id2 = uuid::Uuid::new_v4();
1639
1640        let message = Message {
1641            id: uuid::Uuid::new_v4().into(),
1642            role: MessageRole::User,
1643            content: vec![
1644                ContentPart::Text(TextContentPart {
1645                    text: "Look at these images".to_string(),
1646                }),
1647                ContentPart::ImageFile(ImageFileContentPart {
1648                    image_id: id1.into(),
1649                    filename: Some("test1.png".to_string()),
1650                }),
1651                ContentPart::ImageFile(ImageFileContentPart {
1652                    image_id: id2.into(),
1653                    filename: Some("test2.png".to_string()),
1654                }),
1655            ],
1656            phase: None,
1657            thinking: None,
1658            thinking_signature: None,
1659            controls: None,
1660            metadata: None,
1661            external_actor: None,
1662            created_at: chrono::Utc::now(),
1663        };
1664
1665        let ids = LlmMessage::extract_image_file_ids(&message);
1666        assert_eq!(ids.len(), 2);
1667        assert!(ids.contains(&id1));
1668        assert!(ids.contains(&id2));
1669    }
1670
1671    #[test]
1672    fn test_from_message_with_images_text_only() {
1673        let message = Message {
1674            id: uuid::Uuid::new_v4().into(),
1675            role: MessageRole::User,
1676            content: vec![ContentPart::Text(TextContentPart {
1677                text: "Hello".to_string(),
1678            })],
1679            phase: None,
1680            thinking: None,
1681            thinking_signature: None,
1682            controls: None,
1683            metadata: None,
1684            external_actor: None,
1685            created_at: chrono::Utc::now(),
1686        };
1687
1688        let resolved = std::collections::HashMap::new();
1689        let llm_message = LlmMessage::from_message_with_images(&message, &resolved);
1690
1691        assert_eq!(llm_message.role, LlmMessageRole::User);
1692        match llm_message.content {
1693            LlmMessageContent::Text(text) => assert_eq!(text, "Hello"),
1694            _ => panic!("Expected text content"),
1695        }
1696    }
1697
1698    #[test]
1699    fn test_from_message_with_images_resolved_image() {
1700        let image_id = uuid::Uuid::new_v4();
1701        let message = Message {
1702            id: uuid::Uuid::new_v4().into(),
1703            role: MessageRole::User,
1704            content: vec![
1705                ContentPart::Text(TextContentPart {
1706                    text: "Look at this".to_string(),
1707                }),
1708                ContentPart::ImageFile(ImageFileContentPart {
1709                    image_id: image_id.into(),
1710                    filename: Some("test.png".to_string()),
1711                }),
1712            ],
1713            phase: None,
1714            thinking: None,
1715            thinking_signature: None,
1716            controls: None,
1717            metadata: None,
1718            external_actor: None,
1719            created_at: chrono::Utc::now(),
1720        };
1721
1722        let mut resolved = std::collections::HashMap::new();
1723        resolved.insert(
1724            image_id,
1725            crate::ResolvedImage::new("base64data", "image/png"),
1726        );
1727
1728        let llm_message = LlmMessage::from_message_with_images(&message, &resolved);
1729
1730        match &llm_message.content {
1731            LlmMessageContent::Parts(parts) => {
1732                assert_eq!(parts.len(), 2);
1733                // First part should be text
1734                assert!(matches!(&parts[0], LlmContentPart::Text { .. }));
1735                // Second part should be resolved image
1736                if let LlmContentPart::Image { url } = &parts[1] {
1737                    assert!(url.starts_with("data:image/png;base64,"));
1738                } else {
1739                    panic!("Expected image content part");
1740                }
1741            }
1742            _ => panic!("Expected parts content"),
1743        }
1744    }
1745
1746    #[test]
1747    fn test_from_message_with_images_unresolved_image() {
1748        let image_id = uuid::Uuid::new_v4();
1749        let message = Message {
1750            id: uuid::Uuid::new_v4().into(),
1751            role: MessageRole::User,
1752            content: vec![ContentPart::ImageFile(ImageFileContentPart {
1753                image_id: image_id.into(),
1754                filename: Some("missing.png".to_string()),
1755            })],
1756            phase: None,
1757            thinking: None,
1758            thinking_signature: None,
1759            controls: None,
1760            metadata: None,
1761            external_actor: None,
1762            created_at: chrono::Utc::now(),
1763        };
1764
1765        // Empty resolved map - image not found
1766        let resolved = std::collections::HashMap::new();
1767        let llm_message = LlmMessage::from_message_with_images(&message, &resolved);
1768
1769        // Should have placeholder text for missing image
1770        // When there's only one part, it may return Text directly instead of Parts
1771        match &llm_message.content {
1772            LlmMessageContent::Text(text) => {
1773                assert!(text.contains("Image not found"));
1774            }
1775            LlmMessageContent::Parts(parts) => {
1776                assert_eq!(parts.len(), 1);
1777                if let LlmContentPart::Text { text } = &parts[0] {
1778                    assert!(text.contains("Image not found"));
1779                } else {
1780                    panic!("Expected text placeholder for missing image");
1781                }
1782            }
1783        }
1784    }
1785
1786    #[test]
1787    fn test_prepend_text_prefix_simple_text() {
1788        let mut msg = LlmMessage::text(LlmMessageRole::User, "Hello bot");
1789        msg.prepend_text_prefix("[Alice] ");
1790        assert_eq!(msg.content_as_text(), "[Alice] Hello bot");
1791    }
1792
1793    #[test]
1794    fn test_prepend_text_prefix_parts() {
1795        let mut msg = LlmMessage::parts(
1796            LlmMessageRole::User,
1797            vec![
1798                LlmContentPart::Text {
1799                    text: "Hello".to_string(),
1800                },
1801                LlmContentPart::Image {
1802                    url: "data:image/png;base64,abc".to_string(),
1803                },
1804            ],
1805        );
1806        msg.prepend_text_prefix("[Bob] ");
1807        match &msg.content {
1808            LlmMessageContent::Parts(parts) => {
1809                if let LlmContentPart::Text { text } = &parts[0] {
1810                    assert_eq!(text, "[Bob] Hello");
1811                } else {
1812                    panic!("Expected text part");
1813                }
1814            }
1815            _ => panic!("Expected parts content"),
1816        }
1817    }
1818
1819    #[test]
1820    fn test_prepend_text_prefix_parts_no_text() {
1821        let mut msg = LlmMessage::parts(
1822            LlmMessageRole::User,
1823            vec![LlmContentPart::Image {
1824                url: "data:image/png;base64,abc".to_string(),
1825            }],
1826        );
1827        msg.prepend_text_prefix("[Eve] ");
1828        match &msg.content {
1829            LlmMessageContent::Parts(parts) => {
1830                assert_eq!(parts.len(), 2);
1831                if let LlmContentPart::Text { text } = &parts[0] {
1832                    assert_eq!(text, "[Eve] ");
1833                } else {
1834                    panic!("Expected prepended text part");
1835                }
1836            }
1837            _ => panic!("Expected parts content"),
1838        }
1839    }
1840}