Skip to main content

everruns_core/
openresponses_protocol.rs

1// Open Responses Protocol Driver
2//
3// Implementation of the Open Responses specification (https://www.openresponses.org/)
4// an open-source, vendor-neutral API standard for multi-provider LLM interfaces.
5//
6// Rate limit handling: On 429 errors, the driver automatically retries with
7// exponential backoff, respecting x-ratelimit-reset-* and retry-after headers.
8// Retry metadata is included in the response for observability.
9//
10// The spec is inspired by and interoperable with the OpenAI Responses API, offering:
11// - One spec, many providers (OpenAI, Anthropic, Gemini, local models)
12// - Agentic loop support with tool calls and state machines
13// - Semantic streaming events (not raw text deltas)
14// - 40-80% better cache utilization vs Chat Completions API
15// - Native stateful conversation support
16//
17// Specification: https://www.openresponses.org/specification
18// GitHub: https://github.com/openresponses/openresponses
19//
20// The Chat Completions API remains supported for backward compatibility.
21
22use async_trait::async_trait;
23use eventsource_stream::Eventsource;
24use futures::StreamExt;
25use reqwest::Client;
26use serde::{Deserialize, Serialize};
27use serde_json::{Value, json};
28use sha2::{Digest, Sha256};
29use std::collections::HashSet;
30use std::sync::{Arc, Mutex};
31
32use crate::error::{AgentLoopError, Result};
33use crate::llm_driver_registry::{
34    LlmCallConfig, LlmCompletionMetadata, LlmContentPart, LlmDriver, LlmMessage, LlmMessageContent,
35    LlmMessageRole, LlmResponseStream, LlmStreamEvent,
36};
37use crate::llm_models::LlmProviderType;
38use crate::llm_retry::{
39    LlmRetryConfig, RateLimitInfo, RetryMetadata, is_rate_limit_status, is_transient_error,
40};
41use crate::openai_protocol::{
42    apply_openai_api_auth, is_openai_model_not_found, is_openai_request_too_large,
43};
44use crate::openresponses_types::{self as types, StreamingEvent};
45use crate::tool_types::{ToolCall, ToolDefinition};
46
47const DEFAULT_API_URL: &str = "https://api.openai.com/v1/responses";
48const OPENAI_PROMPT_CACHE_KEY_MAX_LEN: usize = 64;
49const PROMPT_CACHE_KEY_PREFIX: &str = "everruns:";
50
51/// Open Responses Protocol Driver (OpenAI implementation)
52///
53/// Implements `LlmDriver` using the Open Responses specification
54/// (<https://www.openresponses.org/>). This driver targets OpenAI's API
55/// but follows the vendor-neutral Open Responses standard.
56///
57/// Rate limit handling: On 429 errors, automatically retries with exponential
58/// backoff, respecting `x-ratelimit-reset-*` and `retry-after` headers.
59///
60/// The Open Responses spec is recommended for new projects, offering:
61/// - Better performance with reasoning models (o1, o3, GPT-5)
62/// - Provider-agnostic streaming events
63/// - Native agentic loop support
64///
65/// # Example
66///
67/// ```ignore
68/// use everruns_core::OpenResponsesProtocolLlmDriver;
69///
70/// let driver = OpenResponsesProtocolLlmDriver::from_env()?;
71/// // or
72/// let driver = OpenResponsesProtocolLlmDriver::new("your-api-key");
73/// // or with custom endpoint
74/// let driver = OpenResponsesProtocolLlmDriver::with_base_url("your-api-key", "https://api.example.com/v1/responses");
75/// // or with custom retry config
76/// let driver = OpenResponsesProtocolLlmDriver::new("your-api-key")
77///     .with_retry_config(LlmRetryConfig::aggressive());
78/// ```
79#[derive(Clone)]
80pub struct OpenResponsesProtocolLlmDriver {
81    client: Client,
82    api_key: String,
83    api_url: String,
84    provider_type: LlmProviderType,
85    /// Retry configuration for rate limit errors
86    retry_config: LlmRetryConfig,
87}
88
89impl OpenResponsesProtocolLlmDriver {
90    /// Create a new driver with the given API key
91    pub fn new(api_key: impl Into<String>) -> Self {
92        Self {
93            client: Client::new(),
94            api_key: api_key.into(),
95            api_url: DEFAULT_API_URL.to_string(),
96            provider_type: LlmProviderType::Openai,
97            retry_config: LlmRetryConfig::default(),
98        }
99    }
100
101    /// Create a new driver from the OPENAI_API_KEY environment variable
102    pub fn from_env() -> Result<Self> {
103        let api_key = std::env::var("OPENAI_API_KEY")
104            .map_err(|_| AgentLoopError::llm("OPENAI_API_KEY environment variable not set"))?;
105        Ok(Self::new(api_key))
106    }
107
108    /// Create a new driver with a custom API URL
109    pub fn with_base_url(api_key: impl Into<String>, api_url: impl Into<String>) -> Self {
110        Self {
111            client: Client::new(),
112            api_key: api_key.into(),
113            api_url: api_url.into(),
114            provider_type: LlmProviderType::Openai,
115            retry_config: LlmRetryConfig::default(),
116        }
117    }
118
119    /// Set the model provider used for provider-specific request features.
120    pub fn with_provider_type(mut self, provider_type: LlmProviderType) -> Self {
121        self.provider_type = provider_type;
122        self
123    }
124
125    /// Configure retry behavior for rate limit errors
126    pub fn with_retry_config(mut self, config: LlmRetryConfig) -> Self {
127        self.retry_config = config;
128        self
129    }
130
131    /// Get the API URL
132    pub fn api_url(&self) -> &str {
133        &self.api_url
134    }
135
136    /// Get the API key (for subclass access)
137    pub fn api_key(&self) -> &str {
138        &self.api_key
139    }
140
141    /// Get the HTTP client (for subclass access)
142    pub fn client(&self) -> &Client {
143        &self.client
144    }
145
146    /// Get the provider type used for model profile lookup.
147    pub fn provider_type(&self) -> &LlmProviderType {
148        &self.provider_type
149    }
150
151    fn convert_role(role: &LlmMessageRole) -> &'static str {
152        match role {
153            LlmMessageRole::System => "developer", // Responses API uses "developer" for system
154            LlmMessageRole::User => "user",
155            LlmMessageRole::Assistant => "assistant",
156            LlmMessageRole::Tool => "tool",
157        }
158    }
159
160    fn convert_message(msg: &LlmMessage, supports_phases: bool) -> ResponsesInputItem {
161        // Handle tool result messages differently
162        // Note: OpenAI Responses API function_call_output only supports text output.
163        // Images in tool results are dropped with a warning.
164        if msg.role == LlmMessageRole::Tool
165            && let Some(tool_call_id) = &msg.tool_call_id
166        {
167            let mut has_images = false;
168            let output = match &msg.content {
169                LlmMessageContent::Text(text) => text.clone(),
170                LlmMessageContent::Parts(parts) => {
171                    has_images = parts
172                        .iter()
173                        .any(|p| matches!(p, LlmContentPart::Image { .. }));
174                    parts
175                        .iter()
176                        .filter_map(|p| match p {
177                            LlmContentPart::Text { text } => Some(text.clone()),
178                            _ => None,
179                        })
180                        .collect::<Vec<_>>()
181                        .join("")
182                }
183            };
184            if has_images {
185                tracing::warn!(
186                    tool_call_id = %tool_call_id,
187                    "OpenResponses API does not support images in tool results; images dropped"
188                );
189            }
190            return ResponsesInputItem::FunctionCallOutput {
191                r#type: "function_call_output".to_string(),
192                call_id: tool_call_id.clone(),
193                output,
194            };
195        }
196
197        let content = match &msg.content {
198            LlmMessageContent::Text(text) => ResponsesContent::Text(text.clone()),
199            LlmMessageContent::Parts(parts) => {
200                let responses_parts: Vec<ResponsesContentPart> = parts
201                    .iter()
202                    .map(|part| match part {
203                        LlmContentPart::Text { text } => ResponsesContentPart::InputText {
204                            r#type: "input_text".to_string(),
205                            text: text.clone(),
206                        },
207                        LlmContentPart::Image { url } => ResponsesContentPart::InputImage {
208                            r#type: "input_image".to_string(),
209                            image_url: url.clone(),
210                        },
211                        LlmContentPart::Audio { url } => ResponsesContentPart::InputAudio {
212                            r#type: "input_audio".to_string(),
213                            input_audio: ResponsesInputAudio {
214                                data: url.clone(),
215                                format: "wav".to_string(),
216                            },
217                        },
218                    })
219                    .collect();
220                ResponsesContent::Parts(responses_parts)
221            }
222        };
223
224        // Only include phase on assistant messages when the model supports it.
225        // Map ExecutionPhase enum to the provider's wire format string.
226        let phase = if supports_phases && msg.role == LlmMessageRole::Assistant {
227            msg.phase.map(|p| p.as_provider_str().to_string())
228        } else {
229            None
230        };
231
232        ResponsesInputItem::Message {
233            r#type: "message".to_string(),
234            role: Self::convert_role(&msg.role).to_string(),
235            content,
236            phase,
237        }
238    }
239
240    /// Ensure an object-typed JSON Schema has a `properties` key.
241    /// OpenAI rejects function schemas where `type: "object"` lacks `properties`.
242    fn sanitize_parameters(params: &Value) -> Value {
243        let mut p = params.clone();
244        if let Some(obj) = p.as_object_mut()
245            && obj.get("type").and_then(|v| v.as_str()) == Some("object")
246            && !obj.contains_key("properties")
247        {
248            obj.insert(
249                "properties".to_string(),
250                serde_json::Value::Object(serde_json::Map::new()),
251            );
252        }
253        p
254    }
255
256    fn convert_tools(tools: &[ToolDefinition]) -> Vec<ResponsesTool> {
257        tools
258            .iter()
259            .map(|tool| ResponsesTool::Function {
260                r#type: "function".to_string(),
261                name: tool.name().to_string(),
262                description: tool.description().to_string(),
263                parameters: Self::sanitize_parameters(tool.parameters()),
264                defer_loading: None,
265            })
266            .collect()
267    }
268
269    /// Convert tools with tool_search support: groups tools into namespaces,
270    /// marks them as deferred, and appends a `tool_search` entry.
271    fn convert_tools_with_search(tools: &[ToolDefinition], threshold: usize) -> Vec<ResponsesTool> {
272        use crate::tool_types::DeferrablePolicy;
273        use std::collections::HashMap;
274
275        // Below threshold: fall back to standard conversion
276        if tools.len() < threshold {
277            return Self::convert_tools(tools);
278        }
279
280        let mut namespaces: HashMap<String, Vec<ResponsesTool>> = HashMap::new();
281        let mut ungrouped = vec![];
282        let mut never_defer = vec![];
283
284        for tool in tools {
285            let should_defer = match tool.deferrable() {
286                DeferrablePolicy::Never => false,
287                DeferrablePolicy::Automatic | DeferrablePolicy::Always => true,
288            };
289
290            let func = ResponsesTool::Function {
291                r#type: "function".to_string(),
292                name: tool.name().to_string(),
293                description: tool.description().to_string(),
294                parameters: Self::sanitize_parameters(tool.parameters()),
295                defer_loading: if should_defer { Some(true) } else { None },
296            };
297
298            if !should_defer {
299                never_defer.push(func);
300            } else {
301                match tool.category() {
302                    Some(cat) => {
303                        namespaces.entry(cat.to_string()).or_default().push(func);
304                    }
305                    None => ungrouped.push(func),
306                }
307            }
308        }
309
310        let mut result: Vec<ResponsesTool> = Vec::new();
311
312        // Non-deferred tools first (always visible to model)
313        result.extend(never_defer);
314
315        // Namespaced tools
316        for (name, tools) in namespaces {
317            let description = format!("Tools for {name}");
318            result.push(ResponsesTool::Namespace {
319                r#type: "namespace".to_string(),
320                name,
321                description,
322                tools,
323            });
324        }
325
326        // Ungrouped deferred tools
327        result.extend(ungrouped);
328
329        // Add tool_search activator
330        result.push(ResponsesTool::ToolSearch {
331            r#type: "tool_search".to_string(),
332        });
333
334        result
335    }
336
337    fn build_prompt_cache_key(
338        config: &LlmCallConfig,
339        _input_items: &[ResponsesInputItem],
340        instructions: &Option<String>,
341        tools: &Option<Vec<ResponsesTool>>,
342    ) -> Option<String> {
343        let prompt_cache = config.prompt_cache.as_ref().filter(|cfg| cfg.enabled)?;
344        let cache_family = config
345            .metadata
346            .get("session_id")
347            .or_else(|| config.metadata.get("agent_id"))
348            .or_else(|| config.metadata.get("harness_id"))
349            .or_else(|| config.metadata.get("org_id"));
350        let fingerprint = json!({
351            "strategy": prompt_cache.strategy,
352            "model": config.model,
353            "cache_family": cache_family,
354            "instructions": instructions,
355            "tools": tools,
356        });
357        let payload = serde_json::to_vec(&fingerprint).ok()?;
358        let digest = format!("{:x}", Sha256::digest(payload));
359        let digest_len = OPENAI_PROMPT_CACHE_KEY_MAX_LEN - PROMPT_CACHE_KEY_PREFIX.len();
360        Some(format!(
361            "{PROMPT_CACHE_KEY_PREFIX}{}",
362            &digest[..digest_len]
363        ))
364    }
365
366    /// Compact a conversation to reduce context size
367    ///
368    /// This method calls the /v1/responses/compact endpoint to compress the conversation
369    /// history. User messages are kept verbatim, while assistant messages, tool calls,
370    /// and tool results are replaced by an encrypted compaction item.
371    ///
372    /// # Arguments
373    ///
374    /// * `request` - The compact request containing the model and input items
375    ///
376    /// # Returns
377    ///
378    /// Returns a `CompactResponse` containing the compacted output items.
379    /// The output can be used directly as input for the next /v1/responses call.
380    ///
381    /// # Example
382    ///
383    /// ```ignore
384    /// use everruns_core::{OpenResponsesProtocolLlmDriver, CompactRequest, CompactInputItem, CompactContent};
385    ///
386    /// let driver = OpenResponsesProtocolLlmDriver::new("your-api-key");
387    ///
388    /// let request = CompactRequest {
389    ///     model: "gpt-4o".to_string(),
390    ///     input: vec![
391    ///         CompactInputItem::Message {
392    ///             role: "user".to_string(),
393    ///             content: CompactContent::Text("Hello!".to_string()),
394    ///         },
395    ///     ],
396    ///     previous_response_id: None,
397    ///     instructions: None,
398    /// };
399    ///
400    /// let response = driver.compact(request).await?;
401    /// // Use response.output as input for the next /v1/responses call
402    /// ```
403    pub async fn compact(&self, request: CompactRequest) -> Result<CompactResponse> {
404        // Build the compact endpoint URL
405        // Replace /v1/responses with /v1/responses/compact
406        let compact_url = if self.api_url.ends_with("/responses") {
407            format!("{}/compact", self.api_url)
408        } else if self.api_url.ends_with("/responses/") {
409            format!("{}compact", self.api_url)
410        } else {
411            // Custom URL - just append /compact
412            format!("{}/compact", self.api_url.trim_end_matches('/'))
413        };
414
415        // Retry loop for rate limit (429) and transient errors
416        let mut retry_metadata = RetryMetadata::default();
417        let mut last_error: Option<String> = None;
418
419        let response = loop {
420            let response =
421                apply_openai_api_auth(self.client.post(&compact_url), &compact_url, &self.api_key)
422                    .header("Content-Type", "application/json")
423                    .json(&request)
424                    .send()
425                    .await
426                    .map_err(|e| {
427                        AgentLoopError::llm(format!("Failed to send compact request: {}", e))
428                    })?;
429
430            let status = response.status();
431
432            if status.is_success() {
433                break response;
434            }
435
436            // Check if this is a retryable error
437            if is_transient_error(status) && retry_metadata.attempts < self.retry_config.max_retries
438            {
439                let rate_limit_info = if is_rate_limit_status(status) {
440                    Some(RateLimitInfo::from_openai_headers(response.headers()))
441                } else {
442                    None
443                };
444
445                let error_text = response.text().await.unwrap_or_default();
446
447                let wait_duration = rate_limit_info
448                    .as_ref()
449                    .map(|info| info.recommended_wait(&self.retry_config, retry_metadata.attempts))
450                    .unwrap_or_else(|| {
451                        self.retry_config.calculate_backoff(retry_metadata.attempts)
452                    });
453
454                tracing::warn!(
455                    status = %status,
456                    attempt = retry_metadata.attempts + 1,
457                    max_retries = self.retry_config.max_retries,
458                    wait_secs = wait_duration.as_secs_f64(),
459                    "OpenResponsesDriver: compact rate limit or transient error, retrying"
460                );
461
462                retry_metadata.record_retry(wait_duration, rate_limit_info);
463                last_error = Some(error_text);
464
465                tokio::time::sleep(wait_duration).await;
466                continue;
467            }
468
469            // Non-retryable error or max retries exceeded
470            let error_text = response.text().await.unwrap_or_default();
471
472            // Check if this is a model-not-found error
473            if is_openai_model_not_found(status, &error_text) {
474                return Err(AgentLoopError::model_not_available(request.model.clone()));
475            }
476
477            // Check if this is a request-too-large error (context length exceeded)
478            if is_openai_request_too_large(status, &error_text) {
479                return Err(AgentLoopError::request_too_large(format!(
480                    "OpenAI Responses compact API ({}): {}",
481                    status, error_text
482                )));
483            }
484
485            let error_msg = format!(
486                "OpenAI Responses compact API error ({}): {}",
487                status, error_text
488            );
489
490            if retry_metadata.attempts > 0 {
491                return Err(AgentLoopError::llm(format!(
492                    "{} (after {} retries, last error: {})",
493                    error_msg,
494                    retry_metadata.attempts,
495                    last_error.unwrap_or_default()
496                )));
497            }
498
499            return Err(AgentLoopError::llm(error_msg));
500        };
501
502        if retry_metadata.had_retries() {
503            tracing::info!(
504                attempts = retry_metadata.attempts,
505                total_wait_secs = retry_metadata.total_retry_wait.as_secs_f64(),
506                "OpenResponsesDriver: compact request succeeded after retries"
507            );
508        }
509
510        // Parse the response
511        let compact_response: CompactResponse = response
512            .json()
513            .await
514            .map_err(|e| AgentLoopError::llm(format!("Failed to parse compact response: {}", e)))?;
515
516        Ok(compact_response)
517    }
518
519    /// Check if this driver supports the compact endpoint
520    ///
521    /// Returns true for OpenAI's Responses API. Custom endpoints may or may not
522    /// support compaction.
523    pub fn supports_compact(&self) -> bool {
524        // We assume compact is supported for the default OpenAI endpoint
525        // For custom endpoints, callers should try and handle errors gracefully
526        self.api_url.starts_with("https://api.openai.com/")
527    }
528
529    /// Build input items from messages, extracting system/developer instructions
530    ///
531    /// Handles the conversion of:
532    /// - Assistant messages with tool_calls into separate FunctionCall items
533    /// - Assistant messages with thinking into Reasoning items (for o-series/GPT-5 models)
534    ///
535    /// Note: this function always reconstructs the FULL transcript from the supplied
536    /// messages. The caller is responsible for trimming to a delta window when a
537    /// `previous_response_id` is in play — see [`compute_delta_input_items`]. The
538    /// stateful Responses invariant is: a request must not mix `previous_response_id`
539    /// with prior transcript input the provider already holds server-side.
540    fn build_input(
541        messages: &[LlmMessage],
542        supports_phases: bool,
543    ) -> (Option<String>, Vec<ResponsesInputItem>) {
544        let mut instructions: Option<String> = None;
545        let mut input_items = Vec::new();
546        // Counter for generating reasoning item IDs
547        let mut reasoning_counter = 0u32;
548
549        for msg in messages {
550            if msg.role == LlmMessageRole::System {
551                // Extract system message as instructions
552                instructions = Some(match &msg.content {
553                    LlmMessageContent::Text(text) => text.clone(),
554                    LlmMessageContent::Parts(parts) => parts
555                        .iter()
556                        .filter_map(|p| match p {
557                            LlmContentPart::Text { text } => Some(text.clone()),
558                            _ => None,
559                        })
560                        .collect::<Vec<_>>()
561                        .join(""),
562                });
563            } else if msg.role == LlmMessageRole::Assistant {
564                // For assistant messages, emit Reasoning item BEFORE message content if present
565                // This is required for o-series and GPT-5 models with extended thinking
566                if let Some(encrypted_content) = &msg.thinking_signature {
567                    reasoning_counter += 1;
568                    input_items.push(ResponsesInputItem::Reasoning {
569                        r#type: "reasoning".to_string(),
570                        id: format!("rs_{:08x}", reasoning_counter),
571                        encrypted_content: encrypted_content.clone(),
572                    });
573                    tracing::debug!(
574                        encrypted_len = encrypted_content.len(),
575                        "OpenResponses: including reasoning item in request"
576                    );
577                }
578
579                // Handle tool calls
580                if msg.tool_calls.as_ref().is_some_and(|tc| !tc.is_empty()) {
581                    // First emit the message content if non-empty
582                    let has_content = match &msg.content {
583                        LlmMessageContent::Text(text) => !text.is_empty(),
584                        LlmMessageContent::Parts(parts) => !parts.is_empty(),
585                    };
586                    if has_content {
587                        input_items.push(Self::convert_message(msg, supports_phases));
588                    }
589
590                    // Then emit FunctionCall items for each tool call
591                    if let Some(tool_calls) = &msg.tool_calls {
592                        for tc in tool_calls {
593                            input_items.push(ResponsesInputItem::FunctionCall {
594                                r#type: "function_call".to_string(),
595                                call_id: tc.id.clone(),
596                                name: tc.name.clone(),
597                                arguments: tc.arguments.to_string(),
598                            });
599                        }
600                    }
601                } else {
602                    input_items.push(Self::convert_message(msg, supports_phases));
603                }
604            } else {
605                input_items.push(Self::convert_message(msg, supports_phases));
606            }
607        }
608
609        (instructions, input_items)
610    }
611}
612
613/// Trim input items to the "delta" window for a stateful Responses continuation.
614///
615/// When a request carries `previous_response_id`, OpenAI already holds the prior
616/// transcript server-side. Re-sending it in `input` double-counts context (charges
617/// the user twice and inflates prompt-cache keys). The invariant is:
618///
619///   **A request must not mix `previous_response_id` with prior transcript input.**
620///
621/// "Delta" is everything strictly after the last item that belonged to a prior
622/// assistant turn. Items that belong to a prior assistant turn are: assistant
623/// `Message`, `Reasoning`, and `FunctionCall` (the assistant's own tool calls).
624/// What remains as delta is typically `FunctionCallOutput` items (tool results
625/// the client produced) plus any fresh user `Message`s.
626///
627/// Defensive behavior: if no prior-assistant item is found (e.g., the caller
628/// passed only fresh user input), all items are treated as delta and kept. An
629/// empty input is also valid — the provider can resume purely from
630/// `previous_response_id`.
631fn compute_delta_input_items(items: Vec<ResponsesInputItem>) -> Vec<ResponsesInputItem> {
632    // Find the index of the last item that is part of a prior assistant turn.
633    let last_assistant_turn_idx = items
634        .iter()
635        .enumerate()
636        .rev()
637        .find_map(|(i, item)| match item {
638            ResponsesInputItem::Message { role, .. } if role == "assistant" => Some(i),
639            ResponsesInputItem::Reasoning { .. } => Some(i),
640            ResponsesInputItem::FunctionCall { .. } => Some(i),
641            _ => None,
642        });
643
644    match last_assistant_turn_idx {
645        Some(idx) => items.into_iter().skip(idx + 1).collect(),
646        // No prior-assistant items in input — defensive: keep all items as delta.
647        None => items,
648    }
649}
650
651/// The single decision point for whether a Responses request `input` should be
652/// trimmed to the delta window. Extracted so the call path can be regression-tested
653/// without spinning up an HTTP mock — protects against accidentally removing the
654/// `previous_response_id.is_some()` guard that enforces the stateful invariant.
655fn finalize_input_for_request(
656    input_items: Vec<ResponsesInputItem>,
657    previous_response_id: &Option<String>,
658) -> Vec<ResponsesInputItem> {
659    if previous_response_id.is_some() {
660        compute_delta_input_items(input_items)
661    } else {
662        drop_locally_orphaned_function_call_outputs(input_items)
663    }
664}
665
666fn drop_locally_orphaned_function_call_outputs(
667    input_items: Vec<ResponsesInputItem>,
668) -> Vec<ResponsesInputItem> {
669    let visible_call_ids: HashSet<String> = input_items
670        .iter()
671        .filter_map(|item| match item {
672            ResponsesInputItem::FunctionCall { call_id, .. } => Some(call_id.clone()),
673            _ => None,
674        })
675        .collect();
676
677    if visible_call_ids.is_empty() {
678        return input_items
679            .into_iter()
680            .filter(|item| !matches!(item, ResponsesInputItem::FunctionCallOutput { .. }))
681            .collect();
682    }
683
684    input_items
685        .into_iter()
686        .filter(|item| match item {
687            ResponsesInputItem::FunctionCallOutput { call_id, .. } => {
688                visible_call_ids.contains(call_id.as_str())
689            }
690            _ => true,
691        })
692        .collect()
693}
694
695/// Whether the endpoint at `api_url` persists responses server-side and honors
696/// `previous_response_id` for stateful continuation.
697///
698/// Only OpenAI's hosted API and Azure OpenAI are known to store responses.
699/// OpenAI-compatible gateways that expose a stateless `/responses` shim — e.g.
700/// OpenRouter and Google Gemini's compat endpoint — *accept* `previous_response_id`
701/// but silently ignore it (`store: false`). Chaining against those drops the
702/// conversation from turn 2 onward, so they must get the full transcript replayed
703/// in `input` each turn instead (EVE-523).
704fn endpoint_persists_responses(api_url: &str) -> bool {
705    crate::openai_protocol::is_openai_api_url(api_url)
706        || crate::openai_protocol::is_azure_openai_api_url(api_url)
707}
708
709#[async_trait]
710impl LlmDriver for OpenResponsesProtocolLlmDriver {
711    async fn chat_completion_stream(
712        &self,
713        messages: Vec<LlmMessage>,
714        config: &LlmCallConfig,
715    ) -> Result<LlmResponseStream> {
716        // Check the provider-specific model profile before sending native
717        // Responses features. OpenAI-compatible gateways may share base model
718        // metadata without supporting OpenAI-only extensions such as phases or
719        // hosted tool_search.
720        let model_profile =
721            crate::llm_model_profiles::get_model_profile(&self.provider_type, &config.model);
722        let supports_phases = model_profile
723            .as_ref()
724            .is_some_and(|profile| profile.supports_phases);
725        let supports_tool_search = model_profile
726            .as_ref()
727            .is_some_and(|profile| profile.tool_search);
728
729        let (instructions, input_items) = Self::build_input(&messages, supports_phases);
730
731        // Only chain via `previous_response_id` when the endpoint actually persists
732        // responses server-side. Stateless OpenAI-compatible gateways (OpenRouter,
733        // Gemini compat, …) accept the field but ignore it, so chaining there drops
734        // the conversation from turn 2 onward (EVE-523). For those we send no
735        // continuation handle and replay the full transcript in `input` below.
736        let previous_response_id = if endpoint_persists_responses(&self.api_url) {
737            config.previous_response_id.clone()
738        } else {
739            None
740        };
741
742        // Stateful Responses continuations must not mix `previous_response_id` with
743        // the prior transcript input the provider already holds server-side. When a
744        // continuation handle is present, trim `input_items` to the delta window so
745        // the request only carries new tool results / user messages. With no handle
746        // (incl. stateless gateways), the full transcript is kept.
747        let input_items = finalize_input_for_request(input_items, &previous_response_id);
748
749        let tools = if config.tools.is_empty() {
750            None
751        } else if let Some(ref ts_config) = config.tool_search {
752            if ts_config.enabled && supports_tool_search {
753                Some(Self::convert_tools_with_search(
754                    &config.tools,
755                    ts_config.threshold,
756                ))
757            } else {
758                Some(Self::convert_tools(&config.tools))
759            }
760        } else {
761            Some(Self::convert_tools(&config.tools))
762        };
763
764        // Build reasoning config if specified.
765        // Skip when effort is "none" — sending reasoning params to models that
766        // don't support them (or with effort=none) causes OpenAI API errors.
767        let reasoning = config
768            .reasoning_effort
769            .as_ref()
770            .filter(|e| !e.eq_ignore_ascii_case("none"))
771            .map(|effort| ResponsesReasoning {
772                effort: effort.clone(),
773                summary: "detailed".to_string(),
774            });
775
776        // Build metadata for request tracking
777        let metadata = if config.metadata.is_empty() {
778            None
779        } else {
780            Some(config.metadata.clone())
781        };
782        let prompt_cache_key =
783            Self::build_prompt_cache_key(config, &input_items, &instructions, &tools);
784
785        let request = ResponsesRequest {
786            model: config.model.clone(),
787            input: input_items,
788            instructions,
789            previous_response_id,
790            temperature: config.temperature,
791            max_output_tokens: config.max_tokens,
792            stream: true,
793            tools,
794            reasoning,
795            metadata,
796            prompt_cache_key,
797        };
798
799        // Log request details for debugging LLM errors.
800        // Only log request shape to avoid leaking prompt or metadata contents.
801        {
802            let tool_count = request.tools.as_ref().map_or(0, |t| t.len());
803            let input_count = request.input.len();
804            let has_instructions = request.instructions.is_some();
805            let has_reasoning = request.reasoning.is_some();
806            let has_previous_response = request.previous_response_id.is_some();
807            tracing::debug!(
808                model = %request.model,
809                input_items = input_count,
810                tool_count = tool_count,
811                has_instructions = has_instructions,
812                has_reasoning = has_reasoning,
813                has_previous_response = has_previous_response,
814                api_url = %self.api_url,
815                "OpenResponsesDriver: sending request"
816            );
817        }
818
819        // Retry loop for rate limit (429) and transient errors
820        let mut retry_metadata = RetryMetadata::default();
821        let mut last_error: Option<String> = None;
822
823        let response = loop {
824            let response = apply_openai_api_auth(
825                self.client.post(&self.api_url),
826                &self.api_url,
827                &self.api_key,
828            )
829            .header("Content-Type", "application/json")
830            .json(&request)
831            .send()
832            .await
833            .map_err(|e| AgentLoopError::llm(format!("Failed to send request: {}", e)))?;
834
835            let status = response.status();
836
837            if status.is_success() {
838                // Success - exit retry loop
839                break response;
840            }
841
842            // Check if this is a retryable error
843            if is_transient_error(status) && retry_metadata.attempts < self.retry_config.max_retries
844            {
845                // Parse rate limit info from headers before consuming response body
846                let rate_limit_info = if is_rate_limit_status(status) {
847                    Some(RateLimitInfo::from_openai_headers(response.headers()))
848                } else {
849                    None
850                };
851
852                let error_text = response.text().await.unwrap_or_default();
853
854                // Calculate wait duration
855                let wait_duration = rate_limit_info
856                    .as_ref()
857                    .map(|info| info.recommended_wait(&self.retry_config, retry_metadata.attempts))
858                    .unwrap_or_else(|| {
859                        self.retry_config.calculate_backoff(retry_metadata.attempts)
860                    });
861
862                tracing::warn!(
863                    status = %status,
864                    attempt = retry_metadata.attempts + 1,
865                    max_retries = self.retry_config.max_retries,
866                    wait_secs = wait_duration.as_secs_f64(),
867                    retry_after = ?rate_limit_info.as_ref().and_then(|i| i.retry_after_secs),
868                    "OpenResponsesDriver: rate limit or transient error, retrying"
869                );
870
871                // Record retry attempt
872                retry_metadata.record_retry(wait_duration, rate_limit_info);
873                last_error = Some(error_text);
874
875                // Wait before retry
876                tokio::time::sleep(wait_duration).await;
877                continue;
878            }
879
880            // Non-retryable error or max retries exceeded
881            let error_text = response.text().await.unwrap_or_default();
882
883            // Check if this is a model-not-found error
884            if is_openai_model_not_found(status, &error_text) {
885                return Err(AgentLoopError::model_not_available(config.model.clone()));
886            }
887
888            // Check if this is a request-too-large error (context length exceeded)
889            if is_openai_request_too_large(status, &error_text) {
890                return Err(AgentLoopError::request_too_large(format!(
891                    "OpenAI Responses API ({}): {}",
892                    status, error_text
893                )));
894            }
895
896            let error_msg = format!("OpenAI Responses API error ({}): {}", status, error_text);
897
898            // If we exhausted retries, include that in the error message
899            if retry_metadata.attempts > 0 {
900                return Err(AgentLoopError::llm(format!(
901                    "{} (after {} retries, last error: {})",
902                    error_msg,
903                    retry_metadata.attempts,
904                    last_error.unwrap_or_default()
905                )));
906            }
907
908            return Err(AgentLoopError::llm(error_msg));
909        };
910
911        // Log successful retry recovery
912        if retry_metadata.had_retries() {
913            tracing::info!(
914                attempts = retry_metadata.attempts,
915                total_wait_secs = retry_metadata.total_retry_wait.as_secs_f64(),
916                "OpenResponsesDriver: request succeeded after retries"
917            );
918        }
919
920        let byte_stream = response.bytes_stream();
921        let event_stream = byte_stream.eventsource();
922
923        let model = config.model.clone();
924        let input_tokens = Arc::new(Mutex::new(0u32));
925        let output_tokens = Arc::new(Mutex::new(0u32));
926        let cache_read_tokens = Arc::new(Mutex::new(Option::<u32>::None));
927        let accumulated_tool_calls = Arc::new(Mutex::new(Vec::<ToolCallAccumulator>::new()));
928        let finish_reason = Arc::new(Mutex::new(Option::<String>::None));
929        // Share retry metadata with stream closure (only set if retries occurred)
930        let shared_retry_metadata = if retry_metadata.had_retries() {
931            Some(Arc::new(retry_metadata))
932        } else {
933            None
934        };
935
936        let converted_stream: LlmResponseStream = Box::pin(event_stream.then(move |result| {
937            let model = model.clone();
938            let input_tokens = Arc::clone(&input_tokens);
939            let output_tokens = Arc::clone(&output_tokens);
940            let cache_read_tokens = Arc::clone(&cache_read_tokens);
941            let accumulated_tool_calls = Arc::clone(&accumulated_tool_calls);
942            let finish_reason = Arc::clone(&finish_reason);
943            let retry_metadata_for_done = shared_retry_metadata.clone();
944
945            async move {
946                match result {
947                    Ok(event) => {
948                        let event_data = &event.data;
949
950                        // Try to parse as typed StreamingEvent first for type safety
951                        if let Ok(streaming_event) =
952                            serde_json::from_str::<StreamingEvent>(event_data)
953                        {
954                            return Ok(handle_streaming_event(
955                                streaming_event,
956                                &input_tokens,
957                                &output_tokens,
958                                &cache_read_tokens,
959                                &accumulated_tool_calls,
960                                &finish_reason,
961                                model,
962                                retry_metadata_for_done,
963                            ));
964                        }
965
966                        // Fallback: parse as generic JSON for backwards compatibility
967                        let parsed: std::result::Result<Value, _> =
968                            serde_json::from_str(event_data);
969
970                        match parsed {
971                            Ok(json) => {
972                                let event_type = json.get("type").and_then(|t| t.as_str());
973
974                                match event_type {
975                                    Some("response.output_text.delta") => {
976                                        // Text delta
977                                        if let Some(delta) =
978                                            json.get("delta").and_then(|d| d.as_str())
979                                        {
980                                            Ok(LlmStreamEvent::TextDelta(delta.to_string()))
981                                        } else {
982                                            Ok(LlmStreamEvent::TextDelta(String::new()))
983                                        }
984                                    }
985
986                                    Some("response.function_call_arguments.delta") => {
987                                        // Function call arguments delta
988                                        if let (Some(item_id), Some(delta)) = (
989                                            json.get("item_id").and_then(|c| c.as_str()),
990                                            json.get("delta").and_then(|d| d.as_str()),
991                                        ) {
992                                            let mut acc = accumulated_tool_calls.lock().unwrap();
993                                            // Find or create accumulator for this item_id
994                                            if let Some(tc) =
995                                                acc.iter_mut().find(|t| t.id == item_id)
996                                            {
997                                                tc.arguments.push_str(delta);
998                                            } else {
999                                                acc.push(ToolCallAccumulator {
1000                                                    id: item_id.to_string(),
1001                                                    call_id: String::new(),
1002                                                    name: String::new(),
1003                                                    arguments: delta.to_string(),
1004                                                });
1005                                            }
1006                                        }
1007                                        Ok(LlmStreamEvent::TextDelta(String::new()))
1008                                    }
1009
1010                                    Some("response.output_item.added") => {
1011                                        // New output item added - may be function call
1012                                        if let Some(item) = json.get("item")
1013                                            && item.get("type").and_then(|t| t.as_str())
1014                                                == Some("function_call")
1015                                        {
1016                                            let id = item
1017                                                .get("id")
1018                                                .and_then(|c| c.as_str())
1019                                                .unwrap_or("")
1020                                                .to_string();
1021                                            let call_id = item
1022                                                .get("call_id")
1023                                                .and_then(|c| c.as_str())
1024                                                .unwrap_or("")
1025                                                .to_string();
1026                                            let name = item
1027                                                .get("name")
1028                                                .and_then(|n| n.as_str())
1029                                                .unwrap_or("")
1030                                                .to_string();
1031
1032                                            let mut acc = accumulated_tool_calls.lock().unwrap();
1033                                            if let Some(tc) = acc.iter_mut().find(|t| t.id == id) {
1034                                                tc.name = name;
1035                                                tc.call_id = call_id;
1036                                            } else {
1037                                                acc.push(ToolCallAccumulator {
1038                                                    id,
1039                                                    call_id,
1040                                                    name,
1041                                                    arguments: String::new(),
1042                                                });
1043                                            }
1044                                        }
1045                                        Ok(LlmStreamEvent::TextDelta(String::new()))
1046                                    }
1047
1048                                    Some("response.output_item.done") => {
1049                                        // Output item completed - check if it's a function call
1050                                        if let Some(item) = json.get("item")
1051                                            && item.get("type").and_then(|t| t.as_str())
1052                                                == Some("function_call")
1053                                        {
1054                                            // Function call completed, emit ToolCalls event
1055                                            let acc = accumulated_tool_calls.lock().unwrap();
1056                                            if !acc.is_empty() {
1057                                                let tool_calls: Vec<ToolCall> = acc
1058                                                    .iter()
1059                                                    .filter(|tc| !tc.name.is_empty())
1060                                                    .map(|tc| {
1061                                                        let arguments: Value =
1062                                                            serde_json::from_str(&tc.arguments)
1063                                                                .unwrap_or(json!({}));
1064                                                        ToolCall {
1065                                                            id: tc.call_id.clone(),
1066                                                            name: tc.name.clone(),
1067                                                            arguments,
1068                                                        }
1069                                                    })
1070                                                    .collect();
1071
1072                                                if !tool_calls.is_empty() {
1073                                                    *finish_reason.lock().unwrap() =
1074                                                        Some("tool_calls".to_string());
1075                                                    return Ok(LlmStreamEvent::ToolCalls(
1076                                                        tool_calls,
1077                                                    ));
1078                                                }
1079                                            }
1080                                        }
1081                                        Ok(LlmStreamEvent::TextDelta(String::new()))
1082                                    }
1083
1084                                    Some("response.completed") | Some("response.done") => {
1085                                        // Response completed - extract usage
1086                                        let response_obj = json.get("response").unwrap_or(&json);
1087
1088                                        // Authoritative per-request cost from OpenAI-compatible
1089                                        // gateways (e.g. OpenRouter `usage.cost`, in USD credits).
1090                                        let mut provider_cost_usd: Option<f64> = None;
1091                                        if let Some(usage) = response_obj.get("usage") {
1092                                            if let Some(input) =
1093                                                usage.get("input_tokens").and_then(|t| t.as_u64())
1094                                            {
1095                                                *input_tokens.lock().unwrap() = input as u32;
1096                                            }
1097                                            if let Some(output) =
1098                                                usage.get("output_tokens").and_then(|t| t.as_u64())
1099                                            {
1100                                                *output_tokens.lock().unwrap() = output as u32;
1101                                            }
1102                                            // Check for cached tokens
1103                                            if let Some(details) = usage.get("input_tokens_details")
1104                                                && let Some(cached) = details
1105                                                    .get("cached_tokens")
1106                                                    .and_then(|t| t.as_u64())
1107                                            {
1108                                                *cache_read_tokens.lock().unwrap() =
1109                                                    Some(cached as u32);
1110                                            }
1111                                            provider_cost_usd =
1112                                                usage.get("cost").and_then(|c| c.as_f64());
1113                                        }
1114
1115                                        // Determine finish reason from status
1116                                        let status = response_obj
1117                                            .get("status")
1118                                            .and_then(|s| s.as_str())
1119                                            .unwrap_or("completed");
1120
1121                                        let reason = match status {
1122                                            "completed" => {
1123                                                // Check if there were tool calls
1124                                                let existing_reason =
1125                                                    finish_reason.lock().unwrap().clone();
1126                                                existing_reason
1127                                                    .unwrap_or_else(|| "stop".to_string())
1128                                            }
1129                                            "failed" => {
1130                                                let error_detail = response_obj
1131                                                    .get("error")
1132                                                    .map(|e| e.to_string())
1133                                                    .unwrap_or_else(|| "no error detail".into());
1134                                                tracing::warn!(
1135                                                    response_error = %error_detail,
1136                                                    "OpenResponsesDriver: response completed with 'failed' status (fallback parser)"
1137                                                );
1138                                                "error".to_string()
1139                                            }
1140                                            "cancelled" => "stop".to_string(),
1141                                            _ => "stop".to_string(),
1142                                        };
1143
1144                                        // Extract phase from the last assistant message in output items
1145                                        let phase = response_obj
1146                                            .get("output")
1147                                            .and_then(|o| o.as_array())
1148                                            .and_then(|items| {
1149                                                items.iter().rev().find_map(|item| {
1150                                                    if item.get("type")?.as_str()? == "message"
1151                                                        && item.get("role")?.as_str()?
1152                                                            == "assistant"
1153                                                    {
1154                                                        item.get("phase")?
1155                                                            .as_str()
1156                                                            .map(String::from)
1157                                                    } else {
1158                                                        None
1159                                                    }
1160                                                })
1161                                            });
1162
1163                                        let input = *input_tokens.lock().unwrap();
1164                                        let output = *output_tokens.lock().unwrap();
1165                                        let cached = *cache_read_tokens.lock().unwrap();
1166
1167                                        Ok(LlmStreamEvent::Done(Box::new(LlmCompletionMetadata {
1168                                            total_tokens: Some(input + output),
1169                                            prompt_tokens: Some(input),
1170                                            completion_tokens: Some(output),
1171                                            cache_read_tokens: cached,
1172                                            cache_creation_tokens: None,
1173                                            provider_cost_usd,
1174                                            model: Some(model),
1175                                            finish_reason: Some(reason),
1176                                            retry_metadata: retry_metadata_for_done
1177                                                .map(|arc| (*arc).clone()),
1178                                            response_id: None,
1179                                            phase,
1180                                        })))
1181                                    }
1182
1183                                    Some("error") => {
1184                                        // Error event (fallback JSON path)
1185                                        let error_code = json
1186                                            .get("error")
1187                                            .and_then(|e| e.get("code"))
1188                                            .and_then(|c| c.as_str())
1189                                            .unwrap_or("unknown");
1190                                        let error_msg = json
1191                                            .get("error")
1192                                            .and_then(|e| e.get("message"))
1193                                            .and_then(|m| m.as_str())
1194                                            .unwrap_or("Unknown error");
1195                                        tracing::warn!(
1196                                            error_code = error_code,
1197                                            error_message = error_msg,
1198                                            raw_error = %json.get("error").unwrap_or(&json),
1199                                            "OpenResponsesDriver: received streaming error event (fallback parser)"
1200                                        );
1201                                        let formatted = if error_code != "unknown" {
1202                                            format!("{}: {}", error_code, error_msg)
1203                                        } else {
1204                                            error_msg.to_string()
1205                                        };
1206                                        Ok(LlmStreamEvent::Error(formatted))
1207                                    }
1208
1209                                    _ => {
1210                                        // Other event types - ignore
1211                                        Ok(LlmStreamEvent::TextDelta(String::new()))
1212                                    }
1213                                }
1214                            }
1215                            Err(e) => Ok(LlmStreamEvent::Error(format!(
1216                                "Failed to parse event: {}",
1217                                e
1218                            ))),
1219                        }
1220                    }
1221                    Err(e) => Ok(LlmStreamEvent::Error(format!("Stream error: {}", e))),
1222                }
1223            }
1224        }));
1225
1226        Ok(converted_stream)
1227    }
1228
1229    fn supports_compact(&self) -> bool {
1230        // Delegate to the inherent method
1231        OpenResponsesProtocolLlmDriver::supports_compact(self)
1232    }
1233
1234    async fn compact(
1235        &self,
1236        request: crate::openresponses_protocol::CompactRequest,
1237    ) -> Result<Option<crate::openresponses_protocol::CompactResponse>> {
1238        // Delegate to the inherent method and wrap in Some
1239        Ok(Some(
1240            OpenResponsesProtocolLlmDriver::compact(self, request).await?,
1241        ))
1242    }
1243}
1244
1245impl std::fmt::Debug for OpenResponsesProtocolLlmDriver {
1246    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1247        f.debug_struct("OpenResponsesProtocolLlmDriver")
1248            .field("api_url", &self.api_url)
1249            .field("provider_type", &self.provider_type)
1250            .field("api_key", &"[REDACTED]")
1251            .finish()
1252    }
1253}
1254
1255// ============================================================================
1256// Helper Types
1257// ============================================================================
1258
1259/// Accumulator for tool call arguments during streaming
1260#[derive(Clone, Default)]
1261struct ToolCallAccumulator {
1262    /// Item ID in the stream
1263    id: String,
1264    /// Unique call ID for the function call
1265    call_id: String,
1266    /// Function name
1267    name: String,
1268    /// Accumulated JSON arguments
1269    arguments: String,
1270}
1271
1272/// Handle typed streaming events from the OpenResponses API
1273#[allow(clippy::too_many_arguments)]
1274fn handle_streaming_event(
1275    event: StreamingEvent,
1276    input_tokens: &Mutex<u32>,
1277    output_tokens: &Mutex<u32>,
1278    cache_read_tokens: &Mutex<Option<u32>>,
1279    accumulated_tool_calls: &Mutex<Vec<ToolCallAccumulator>>,
1280    finish_reason: &Mutex<Option<String>>,
1281    model: String,
1282    retry_metadata: Option<Arc<RetryMetadata>>,
1283) -> LlmStreamEvent {
1284    match event {
1285        StreamingEvent::OutputTextDelta { delta, .. } => LlmStreamEvent::TextDelta(delta),
1286
1287        StreamingEvent::ReasoningDelta { delta, .. } => LlmStreamEvent::ThinkingDelta(delta),
1288
1289        StreamingEvent::ReasoningTextDelta { delta, .. } => LlmStreamEvent::ThinkingDelta(delta),
1290
1291        StreamingEvent::ReasoningSummaryDelta { delta, .. } => LlmStreamEvent::ThinkingDelta(delta),
1292
1293        StreamingEvent::FunctionCallArgumentsDelta { item_id, delta, .. } => {
1294            let mut acc = accumulated_tool_calls.lock().unwrap();
1295            if let Some(tc) = acc.iter_mut().find(|t| t.id == item_id) {
1296                tc.arguments.push_str(&delta);
1297            } else {
1298                acc.push(ToolCallAccumulator {
1299                    id: item_id,
1300                    call_id: String::new(),
1301                    name: String::new(),
1302                    arguments: delta,
1303                });
1304            }
1305            LlmStreamEvent::TextDelta(String::new())
1306        }
1307
1308        StreamingEvent::OutputItemAdded { item, .. } => {
1309            if let Some(types::OutputItem::FunctionCall {
1310                id, call_id, name, ..
1311            }) = item
1312            {
1313                let mut acc = accumulated_tool_calls.lock().unwrap();
1314                if let Some(tc) = acc.iter_mut().find(|t| t.id == id) {
1315                    tc.name = name;
1316                    tc.call_id = call_id;
1317                } else {
1318                    acc.push(ToolCallAccumulator {
1319                        id,
1320                        call_id,
1321                        name,
1322                        arguments: String::new(),
1323                    });
1324                }
1325            }
1326            LlmStreamEvent::TextDelta(String::new())
1327        }
1328
1329        StreamingEvent::OutputItemDone { item, .. } => {
1330            match item {
1331                Some(types::OutputItem::FunctionCall { .. }) => {
1332                    let acc = accumulated_tool_calls.lock().unwrap();
1333                    if !acc.is_empty() {
1334                        let tool_calls: Vec<ToolCall> = acc
1335                            .iter()
1336                            .filter(|tc| !tc.name.is_empty())
1337                            .map(|tc| {
1338                                let arguments: Value =
1339                                    serde_json::from_str(&tc.arguments).unwrap_or(json!({}));
1340                                ToolCall {
1341                                    id: tc.call_id.clone(),
1342                                    name: tc.name.clone(),
1343                                    arguments,
1344                                }
1345                            })
1346                            .collect();
1347
1348                        if !tool_calls.is_empty() {
1349                            *finish_reason.lock().unwrap() = Some("tool_calls".to_string());
1350                            return LlmStreamEvent::ToolCalls(tool_calls);
1351                        }
1352                    }
1353                    LlmStreamEvent::TextDelta(String::new())
1354                }
1355                Some(types::OutputItem::Reasoning {
1356                    id,
1357                    summary,
1358                    content: _, // plaintext reasoning content is intentionally not propagated
1359                    encrypted_content,
1360                }) => {
1361                    // Plaintext reasoning content from the provider is intentionally
1362                    // dropped here so it never reaches persisted events. Only the
1363                    // provider's opaque encrypted artifact and curated summary text
1364                    // travel forward.
1365                    let safe_summary: Vec<String> = summary
1366                        .into_iter()
1367                        .filter_map(|part| match part {
1368                            types::ContentPart::SummaryText { text } => Some(text),
1369                            _ => None,
1370                        })
1371                        .collect();
1372                    tracing::debug!(
1373                        encrypted_len = encrypted_content.as_ref().map(|s| s.len()).unwrap_or(0),
1374                        summary_segments = safe_summary.len(),
1375                        "OpenResponses: received reasoning item"
1376                    );
1377                    LlmStreamEvent::ReasonItem {
1378                        provider: "openai".to_string(),
1379                        model: Some(model.clone()),
1380                        item_id: id,
1381                        encrypted_content,
1382                        summary: safe_summary,
1383                        token_count: None,
1384                    }
1385                }
1386                _ => LlmStreamEvent::TextDelta(String::new()),
1387            }
1388        }
1389
1390        StreamingEvent::ResponseCompleted { response, .. } => {
1391            // Extract usage
1392            if let Some(usage) = &response.usage {
1393                *input_tokens.lock().unwrap() = usage.input_tokens;
1394                *output_tokens.lock().unwrap() = usage.output_tokens;
1395                if let Some(details) = &usage.input_tokens_details {
1396                    *cache_read_tokens.lock().unwrap() = Some(details.cached_tokens);
1397                }
1398            }
1399
1400            let reason = match response.status {
1401                types::ResponseStatus::Completed => {
1402                    let existing = finish_reason.lock().unwrap().clone();
1403                    existing.unwrap_or_else(|| "stop".to_string())
1404                }
1405                types::ResponseStatus::Failed => {
1406                    tracing::warn!(
1407                        response_id = %response.id,
1408                        error = ?response.error,
1409                        "OpenResponsesDriver: response completed with 'failed' status"
1410                    );
1411                    "error".to_string()
1412                }
1413                types::ResponseStatus::Cancelled => "stop".to_string(),
1414                _ => "stop".to_string(),
1415            };
1416
1417            // Extract phase from the last assistant message in output items.
1418            // The API assigns the phase; we preserve it as-is for subsequent requests.
1419            let phase = response.output.iter().rev().find_map(|item| {
1420                if let types::OutputItem::Message { phase, .. } = item {
1421                    phase.clone()
1422                } else {
1423                    None
1424                }
1425            });
1426
1427            let input = *input_tokens.lock().unwrap();
1428            let output = *output_tokens.lock().unwrap();
1429            let cached = *cache_read_tokens.lock().unwrap();
1430            let provider_cost_usd = response.usage.as_ref().and_then(|u| u.cost);
1431
1432            LlmStreamEvent::Done(Box::new(LlmCompletionMetadata {
1433                total_tokens: Some(input + output),
1434                prompt_tokens: Some(input),
1435                completion_tokens: Some(output),
1436                cache_read_tokens: cached,
1437                cache_creation_tokens: None,
1438                provider_cost_usd,
1439                model: Some(model),
1440                finish_reason: Some(reason),
1441                retry_metadata: retry_metadata.map(|arc| (*arc).clone()),
1442                response_id: Some(response.id),
1443                phase,
1444            }))
1445        }
1446
1447        StreamingEvent::Error { error, .. } => {
1448            let msg = if let Some(code) = &error.code {
1449                format!("{}: {}", code, error.message)
1450            } else {
1451                error.message.clone()
1452            };
1453            tracing::warn!(
1454                error_code = error.code.as_deref().unwrap_or("none"),
1455                error_message = %error.message,
1456                "OpenResponsesDriver: received streaming error event from provider"
1457            );
1458            LlmStreamEvent::Error(msg)
1459        }
1460
1461        StreamingEvent::RefusalDelta { delta, .. } => {
1462            // Treat refusal as an error message
1463            LlmStreamEvent::Error(format!("Model refused: {}", delta))
1464        }
1465
1466        // All other events: emit empty delta to maintain stream continuity
1467        _ => LlmStreamEvent::TextDelta(String::new()),
1468    }
1469}
1470
1471// ============================================================================
1472// Compact Endpoint Types (Public API)
1473// ============================================================================
1474
1475/// Request for the /v1/responses/compact endpoint
1476///
1477/// This endpoint compacts a conversation by replacing prior assistant messages,
1478/// tool calls, and tool results with an encrypted compaction item that preserves
1479/// latent context but is opaque. User messages are kept verbatim.
1480#[derive(Debug, Clone, Serialize)]
1481pub struct CompactRequest {
1482    /// Model to use for compaction (required)
1483    pub model: String,
1484    /// Input items to compact (the current conversation window)
1485    #[serde(skip_serializing_if = "Vec::is_empty")]
1486    pub input: Vec<CompactInputItem>,
1487    /// Previous response ID (optional, alternative to input)
1488    #[serde(skip_serializing_if = "Option::is_none")]
1489    pub previous_response_id: Option<String>,
1490    /// System instructions (optional, applies only to the compaction request)
1491    #[serde(skip_serializing_if = "Option::is_none")]
1492    pub instructions: Option<String>,
1493}
1494
1495/// Input item for compact request
1496///
1497/// These are the same types as ResponsesInputItem but exposed publicly
1498/// for callers to construct compact requests.
1499#[derive(Debug, Clone, Serialize, Deserialize)]
1500#[serde(tag = "type")]
1501pub enum CompactInputItem {
1502    /// A message (user, assistant, or developer)
1503    #[serde(rename = "message")]
1504    Message {
1505        role: String,
1506        content: CompactContent,
1507    },
1508    /// A function call from the assistant
1509    #[serde(rename = "function_call")]
1510    FunctionCall {
1511        call_id: String,
1512        name: String,
1513        arguments: String,
1514    },
1515    /// Output from a function call
1516    #[serde(rename = "function_call_output")]
1517    FunctionCallOutput { call_id: String, output: String },
1518    /// A compaction item (from a previous compact call)
1519    #[serde(rename = "compaction")]
1520    Compaction { encrypted_content: String },
1521}
1522
1523/// Content for compact input items
1524#[derive(Debug, Clone, Serialize, Deserialize)]
1525#[serde(untagged)]
1526pub enum CompactContent {
1527    /// Simple text content
1528    Text(String),
1529    /// Array of content parts
1530    Parts(Vec<CompactContentPart>),
1531}
1532
1533/// Content part for compact input
1534#[derive(Debug, Clone, Serialize, Deserialize)]
1535#[serde(tag = "type")]
1536pub enum CompactContentPart {
1537    /// Text content
1538    #[serde(rename = "input_text")]
1539    InputText { text: String },
1540    /// Image content
1541    #[serde(rename = "input_image")]
1542    InputImage { image_url: String },
1543}
1544
1545/// Response from the /v1/responses/compact endpoint
1546#[derive(Debug, Clone, Deserialize)]
1547pub struct CompactResponse {
1548    /// The compacted output items
1549    pub output: Vec<CompactOutputItem>,
1550    /// Token usage information
1551    pub usage: Option<CompactUsage>,
1552}
1553
1554/// Output item from compact response
1555#[derive(Debug, Clone, Serialize, Deserialize)]
1556#[serde(tag = "type")]
1557pub enum CompactOutputItem {
1558    /// A user message (kept verbatim)
1559    #[serde(rename = "message")]
1560    Message {
1561        role: String,
1562        content: CompactContent,
1563    },
1564    /// An encrypted compaction item
1565    #[serde(rename = "compaction")]
1566    Compaction {
1567        /// Encrypted content that preserves latent context
1568        encrypted_content: String,
1569    },
1570}
1571
1572/// Token usage from compact response
1573#[derive(Debug, Clone, Deserialize)]
1574pub struct CompactUsage {
1575    /// Input tokens processed
1576    pub input_tokens: Option<u32>,
1577    /// Output tokens generated
1578    pub output_tokens: Option<u32>,
1579    /// Total tokens used
1580    pub total_tokens: Option<u32>,
1581}
1582
1583// ============================================================================
1584// Compaction Conversion Utilities
1585// ============================================================================
1586
1587impl CompactInputItem {
1588    /// Convert an LlmMessage to CompactInputItem(s)
1589    ///
1590    /// An assistant message with tool_calls is expanded into multiple items:
1591    /// one Message for the text content and one FunctionCall for each tool call.
1592    pub fn from_llm_message(msg: &LlmMessage) -> Vec<Self> {
1593        let mut items = Vec::new();
1594
1595        let role = match msg.role {
1596            LlmMessageRole::System => "developer",
1597            LlmMessageRole::User => "user",
1598            LlmMessageRole::Assistant => "assistant",
1599            LlmMessageRole::Tool => "tool",
1600        };
1601
1602        // Handle tool result messages differently
1603        if msg.role == LlmMessageRole::Tool
1604            && let Some(tool_call_id) = &msg.tool_call_id
1605        {
1606            let output = match &msg.content {
1607                LlmMessageContent::Text(text) => text.clone(),
1608                LlmMessageContent::Parts(parts) => parts
1609                    .iter()
1610                    .filter_map(|p| match p {
1611                        LlmContentPart::Text { text } => Some(text.clone()),
1612                        _ => None,
1613                    })
1614                    .collect::<Vec<_>>()
1615                    .join(""),
1616            };
1617            items.push(CompactInputItem::FunctionCallOutput {
1618                call_id: tool_call_id.clone(),
1619                output,
1620            });
1621            return items;
1622        }
1623
1624        // Add message content (if non-empty)
1625        let content = Self::content_from_llm_message(msg);
1626        let has_content = match &content {
1627            CompactContent::Text(t) => !t.is_empty(),
1628            CompactContent::Parts(p) => !p.is_empty(),
1629        };
1630
1631        if has_content || msg.tool_calls.is_none() {
1632            items.push(CompactInputItem::Message {
1633                role: role.to_string(),
1634                content,
1635            });
1636        }
1637
1638        // Add function calls for assistant messages
1639        if msg.role == LlmMessageRole::Assistant
1640            && let Some(tool_calls) = &msg.tool_calls
1641        {
1642            for tc in tool_calls {
1643                items.push(CompactInputItem::FunctionCall {
1644                    call_id: tc.id.clone(),
1645                    name: tc.name.clone(),
1646                    arguments: tc.arguments.to_string(),
1647                });
1648            }
1649        }
1650
1651        items
1652    }
1653
1654    /// Convert LlmMessageContent to CompactContent
1655    fn content_from_llm_message(msg: &LlmMessage) -> CompactContent {
1656        match &msg.content {
1657            LlmMessageContent::Text(text) => CompactContent::Text(text.clone()),
1658            LlmMessageContent::Parts(parts) => {
1659                let compact_parts: Vec<CompactContentPart> = parts
1660                    .iter()
1661                    .filter_map(|part| match part {
1662                        LlmContentPart::Text { text } => {
1663                            Some(CompactContentPart::InputText { text: text.clone() })
1664                        }
1665                        LlmContentPart::Image { url } => {
1666                            // URL is already in data URL format (data:image/png;base64,...)
1667                            Some(CompactContentPart::InputImage {
1668                                image_url: url.clone(),
1669                            })
1670                        }
1671                        LlmContentPart::Audio { .. } => None, // Audio not supported in compact
1672                    })
1673                    .collect();
1674                if compact_parts.len() == 1
1675                    && let CompactContentPart::InputText { text } = &compact_parts[0]
1676                {
1677                    return CompactContent::Text(text.clone());
1678                }
1679                CompactContent::Parts(compact_parts)
1680            }
1681        }
1682    }
1683}
1684
1685impl CompactOutputItem {
1686    /// Convert a CompactOutputItem to LlmMessage
1687    ///
1688    /// Compaction items are converted to a special system message containing
1689    /// the encrypted context that will be included in subsequent requests.
1690    pub fn to_llm_message(&self) -> Option<LlmMessage> {
1691        match self {
1692            CompactOutputItem::Message { role, content } => {
1693                let llm_role = match role.as_str() {
1694                    "user" => LlmMessageRole::User,
1695                    "assistant" => LlmMessageRole::Assistant,
1696                    "developer" | "system" => LlmMessageRole::System,
1697                    "tool" => LlmMessageRole::Tool,
1698                    _ => LlmMessageRole::User, // Default to user
1699                };
1700
1701                let llm_content = match content {
1702                    CompactContent::Text(text) => LlmMessageContent::Text(text.clone()),
1703                    CompactContent::Parts(parts) => {
1704                        let llm_parts: Vec<LlmContentPart> = parts
1705                            .iter()
1706                            .map(|p| match p {
1707                                CompactContentPart::InputText { text } => {
1708                                    LlmContentPart::Text { text: text.clone() }
1709                                }
1710                                CompactContentPart::InputImage { image_url } => {
1711                                    // Pass the URL directly - it's already in data URL format
1712                                    LlmContentPart::Image {
1713                                        url: image_url.clone(),
1714                                    }
1715                                }
1716                            })
1717                            .collect();
1718                        LlmMessageContent::Parts(llm_parts)
1719                    }
1720                };
1721
1722                Some(LlmMessage {
1723                    role: llm_role,
1724                    content: llm_content,
1725                    tool_calls: None,
1726                    tool_call_id: None,
1727                    phase: None,
1728                    thinking: None,
1729                    thinking_signature: None,
1730                })
1731            }
1732            CompactOutputItem::Compaction { .. } => {
1733                // Compaction items are handled separately - they're passed as-is
1734                // to the next request, not converted to messages
1735                None
1736            }
1737        }
1738    }
1739}
1740
1741/// Convert a slice of LlmMessages to CompactInputItems
1742pub fn messages_to_compact_input(messages: &[LlmMessage]) -> Vec<CompactInputItem> {
1743    messages
1744        .iter()
1745        .flat_map(CompactInputItem::from_llm_message)
1746        .collect()
1747}
1748
1749/// Convert CompactResponse output to LlmMessages plus any compaction items
1750///
1751/// Returns a tuple of (regular messages, compaction items).
1752/// The compaction items should be preserved and included in subsequent compact requests.
1753pub fn compact_output_to_messages(
1754    output: &[CompactOutputItem],
1755) -> (Vec<LlmMessage>, Vec<CompactInputItem>) {
1756    let mut messages = Vec::new();
1757    let mut compaction_items = Vec::new();
1758
1759    for item in output {
1760        match item {
1761            CompactOutputItem::Message { role, content } => {
1762                if let Some(msg) = item.to_llm_message() {
1763                    messages.push(msg);
1764                } else {
1765                    // Re-add as compact input for next request
1766                    compaction_items.push(CompactInputItem::Message {
1767                        role: role.clone(),
1768                        content: content.clone(),
1769                    });
1770                }
1771            }
1772            CompactOutputItem::Compaction { encrypted_content } => {
1773                compaction_items.push(CompactInputItem::Compaction {
1774                    encrypted_content: encrypted_content.clone(),
1775                });
1776            }
1777        }
1778    }
1779
1780    (messages, compaction_items)
1781}
1782
1783// ============================================================================
1784// OpenAI Responses API Types
1785// ============================================================================
1786
1787#[derive(Debug, Serialize)]
1788struct ResponsesRequest {
1789    model: String,
1790    input: Vec<ResponsesInputItem>,
1791    #[serde(skip_serializing_if = "Option::is_none")]
1792    instructions: Option<String>,
1793    #[serde(skip_serializing_if = "Option::is_none")]
1794    previous_response_id: Option<String>,
1795    #[serde(skip_serializing_if = "Option::is_none")]
1796    temperature: Option<f32>,
1797    #[serde(skip_serializing_if = "Option::is_none")]
1798    max_output_tokens: Option<u32>,
1799    stream: bool,
1800    #[serde(skip_serializing_if = "Option::is_none")]
1801    tools: Option<Vec<ResponsesTool>>,
1802    #[serde(skip_serializing_if = "Option::is_none")]
1803    reasoning: Option<ResponsesReasoning>,
1804    /// Metadata for tracking API usage (up to 16 key-value pairs).
1805    /// Useful for correlating requests with session_id, agent_id, org_id, etc.
1806    #[serde(skip_serializing_if = "Option::is_none")]
1807    metadata: Option<std::collections::HashMap<String, String>>,
1808    #[serde(skip_serializing_if = "Option::is_none")]
1809    prompt_cache_key: Option<String>,
1810}
1811
1812#[derive(Debug, Serialize)]
1813struct ResponsesReasoning {
1814    effort: String,
1815    /// Request reasoning summary to get thinking tokens streamed back.
1816    /// Without this, reasoning happens internally but tokens are not exposed.
1817    summary: String,
1818}
1819
1820#[derive(Debug, Serialize)]
1821#[serde(untagged)]
1822enum ResponsesInputItem {
1823    Message {
1824        r#type: String,
1825        role: String,
1826        content: ResponsesContent,
1827        /// Execution phase for assistant messages (e.g., "in_progress", "completed").
1828        /// Helps GPT-5.x distinguish intermediate working commentary from final answers.
1829        /// Only set on assistant messages; must be preserved when replaying history.
1830        #[serde(skip_serializing_if = "Option::is_none")]
1831        phase: Option<String>,
1832    },
1833    FunctionCall {
1834        r#type: String,
1835        call_id: String,
1836        name: String,
1837        arguments: String,
1838    },
1839    FunctionCallOutput {
1840        r#type: String,
1841        call_id: String,
1842        output: String,
1843    },
1844    /// Reasoning item for o-series and GPT-5 models
1845    /// Contains encrypted reasoning content that preserves reasoning context across turns
1846    /// (similar to Anthropic's thinking signature).
1847    ///
1848    /// Stateless requests must re-send prior `Reasoning` items in `input` so the model can
1849    /// continue from them. Stateful continuations (those carrying `previous_response_id`)
1850    /// rely on OpenAI to hold the prior reasoning chain server-side, so [`compute_delta_input_items`]
1851    /// intentionally drops `Reasoning` items that belong to a prior assistant turn — re-sending
1852    /// them alongside `previous_response_id` would violate the no-mixing invariant.
1853    Reasoning {
1854        r#type: String,
1855        /// Unique ID for this reasoning item
1856        id: String,
1857        /// Encrypted reasoning content (required for multi-turn conversations)
1858        encrypted_content: String,
1859    },
1860}
1861
1862#[derive(Debug, Serialize, Deserialize)]
1863#[serde(untagged)]
1864enum ResponsesContent {
1865    Text(String),
1866    Parts(Vec<ResponsesContentPart>),
1867}
1868
1869// The "Input" prefix matches OpenAI's Responses API naming convention
1870#[derive(Debug, Serialize, Deserialize)]
1871#[serde(untagged)]
1872#[allow(clippy::enum_variant_names)]
1873enum ResponsesContentPart {
1874    InputText {
1875        r#type: String,
1876        text: String,
1877    },
1878    InputImage {
1879        r#type: String,
1880        image_url: String,
1881    },
1882    InputAudio {
1883        r#type: String,
1884        input_audio: ResponsesInputAudio,
1885    },
1886}
1887
1888#[derive(Debug, Serialize, Deserialize)]
1889struct ResponsesInputAudio {
1890    data: String,
1891    format: String,
1892}
1893
1894#[derive(Debug, Serialize)]
1895#[serde(untagged)]
1896enum ResponsesTool {
1897    /// Standard function tool (or deferred function with defer_loading)
1898    Function {
1899        r#type: String,
1900        name: String,
1901        description: String,
1902        parameters: Value,
1903        #[serde(skip_serializing_if = "Option::is_none")]
1904        defer_loading: Option<bool>,
1905    },
1906    /// Namespace grouping for tool_search (groups related deferred tools)
1907    Namespace {
1908        r#type: String,
1909        name: String,
1910        description: String,
1911        tools: Vec<ResponsesTool>,
1912    },
1913    /// Activates tool_search on the request
1914    ToolSearch { r#type: String },
1915}
1916
1917// ============================================================================
1918// Tests
1919// ============================================================================
1920
1921#[cfg(test)]
1922mod tests {
1923    use super::*;
1924
1925    #[test]
1926    fn test_driver_with_api_key() {
1927        let driver = OpenResponsesProtocolLlmDriver::new("test-key");
1928        assert!(format!("{:?}", driver).contains("OpenResponsesProtocolLlmDriver"));
1929    }
1930
1931    #[test]
1932    fn test_driver_with_base_url() {
1933        let driver = OpenResponsesProtocolLlmDriver::with_base_url(
1934            "test-key",
1935            "https://custom.api.com/v1/responses",
1936        );
1937        assert!(format!("{:?}", driver).contains("OpenResponsesProtocolLlmDriver"));
1938        assert_eq!(driver.api_url(), "https://custom.api.com/v1/responses");
1939    }
1940
1941    #[test]
1942    fn test_request_serialization() {
1943        let request = ResponsesRequest {
1944            model: "gpt-4o".to_string(),
1945            input: vec![ResponsesInputItem::Message {
1946                r#type: "message".to_string(),
1947                role: "user".to_string(),
1948                content: ResponsesContent::Text("Hello".to_string()),
1949                phase: None,
1950            }],
1951            instructions: Some("You are helpful".to_string()),
1952            previous_response_id: None,
1953            temperature: None,
1954            max_output_tokens: None,
1955            stream: true,
1956            tools: None,
1957            reasoning: None,
1958            metadata: None,
1959            prompt_cache_key: None,
1960        };
1961
1962        let json = serde_json::to_value(&request).unwrap();
1963        assert_eq!(json["model"], "gpt-4o");
1964        assert_eq!(json["stream"], true);
1965        assert_eq!(json["instructions"], "You are helpful");
1966        assert!(json["input"].is_array());
1967    }
1968
1969    #[test]
1970    fn test_request_with_reasoning() {
1971        let request = ResponsesRequest {
1972            model: "o3".to_string(),
1973            input: vec![ResponsesInputItem::Message {
1974                r#type: "message".to_string(),
1975                role: "user".to_string(),
1976                content: ResponsesContent::Text("Think about this".to_string()),
1977                phase: None,
1978            }],
1979            instructions: None,
1980            previous_response_id: None,
1981            temperature: None,
1982            max_output_tokens: None,
1983            stream: true,
1984            tools: None,
1985            reasoning: Some(ResponsesReasoning {
1986                effort: "high".to_string(),
1987                summary: "detailed".to_string(),
1988            }),
1989            metadata: None,
1990            prompt_cache_key: None,
1991        };
1992
1993        let json = serde_json::to_value(&request).unwrap();
1994        assert_eq!(json["reasoning"]["effort"], "high");
1995        assert_eq!(json["reasoning"]["summary"], "detailed");
1996    }
1997
1998    #[test]
1999    fn test_request_with_metadata() {
2000        let mut metadata = std::collections::HashMap::new();
2001        metadata.insert("session_id".to_string(), "session_abc123".to_string());
2002        metadata.insert("agent_id".to_string(), "agent_xyz789".to_string());
2003
2004        let request = ResponsesRequest {
2005            model: "gpt-4o".to_string(),
2006            input: vec![ResponsesInputItem::Message {
2007                r#type: "message".to_string(),
2008                role: "user".to_string(),
2009                content: ResponsesContent::Text("Hello".to_string()),
2010                phase: None,
2011            }],
2012            instructions: None,
2013            previous_response_id: None,
2014            temperature: None,
2015            max_output_tokens: None,
2016            stream: true,
2017            tools: None,
2018            reasoning: None,
2019            metadata: Some(metadata),
2020            prompt_cache_key: None,
2021        };
2022
2023        let json = serde_json::to_value(&request).unwrap();
2024        assert_eq!(json["metadata"]["session_id"], "session_abc123");
2025        assert_eq!(json["metadata"]["agent_id"], "agent_xyz789");
2026    }
2027
2028    #[test]
2029    fn test_build_prompt_cache_key_when_enabled() {
2030        let mut metadata = std::collections::HashMap::new();
2031        metadata.insert("session_id".to_string(), "session_abc123".to_string());
2032        let config = LlmCallConfig {
2033            model: "gpt-5.4".to_string(),
2034            temperature: None,
2035            max_tokens: None,
2036            tools: vec![],
2037            reasoning_effort: None,
2038            metadata,
2039            previous_response_id: None,
2040            tool_search: None,
2041            prompt_cache: Some(crate::llm_driver_registry::PromptCacheConfig {
2042                enabled: true,
2043                strategy: crate::llm_driver_registry::PromptCacheStrategy::Auto,
2044                gemini_cached_content: None,
2045            }),
2046        };
2047        let input = vec![ResponsesInputItem::Message {
2048            r#type: "message".to_string(),
2049            role: "user".to_string(),
2050            content: ResponsesContent::Text("Hello".to_string()),
2051            phase: None,
2052        }];
2053
2054        let key = OpenResponsesProtocolLlmDriver::build_prompt_cache_key(
2055            &config,
2056            &input,
2057            &Some("You are helpful".to_string()),
2058            &None,
2059        );
2060
2061        assert!(key.is_some());
2062        assert!(key.unwrap().starts_with("everruns:"));
2063    }
2064
2065    #[test]
2066    fn test_build_prompt_cache_key_ignores_changing_input() {
2067        let mut metadata = std::collections::HashMap::new();
2068        metadata.insert("session_id".to_string(), "session_abc123".to_string());
2069        let config = LlmCallConfig {
2070            model: "gpt-5.4".to_string(),
2071            temperature: None,
2072            max_tokens: None,
2073            tools: vec![],
2074            reasoning_effort: None,
2075            metadata,
2076            previous_response_id: None,
2077            tool_search: None,
2078            prompt_cache: Some(crate::llm_driver_registry::PromptCacheConfig {
2079                enabled: true,
2080                strategy: crate::llm_driver_registry::PromptCacheStrategy::Auto,
2081                gemini_cached_content: None,
2082            }),
2083        };
2084        let first_input = vec![ResponsesInputItem::Message {
2085            r#type: "message".to_string(),
2086            role: "user".to_string(),
2087            content: ResponsesContent::Text("first turn".to_string()),
2088            phase: None,
2089        }];
2090        let second_input = vec![ResponsesInputItem::Message {
2091            r#type: "message".to_string(),
2092            role: "user".to_string(),
2093            content: ResponsesContent::Text("second turn with different text".to_string()),
2094            phase: None,
2095        }];
2096
2097        let first = OpenResponsesProtocolLlmDriver::build_prompt_cache_key(
2098            &config,
2099            &first_input,
2100            &Some("You are helpful".to_string()),
2101            &None,
2102        );
2103        let second = OpenResponsesProtocolLlmDriver::build_prompt_cache_key(
2104            &config,
2105            &second_input,
2106            &Some("You are helpful".to_string()),
2107            &None,
2108        );
2109
2110        assert_eq!(first, second);
2111    }
2112
2113    #[test]
2114    fn test_build_prompt_cache_key_changes_with_cache_family() {
2115        let mut first_metadata = std::collections::HashMap::new();
2116        first_metadata.insert("session_id".to_string(), "session_abc123".to_string());
2117        let mut second_metadata = std::collections::HashMap::new();
2118        second_metadata.insert("session_id".to_string(), "session_xyz789".to_string());
2119        let make_config = |metadata| LlmCallConfig {
2120            model: "gpt-5.4".to_string(),
2121            temperature: None,
2122            max_tokens: None,
2123            tools: vec![],
2124            reasoning_effort: None,
2125            metadata,
2126            previous_response_id: None,
2127            tool_search: None,
2128            prompt_cache: Some(crate::llm_driver_registry::PromptCacheConfig {
2129                enabled: true,
2130                strategy: crate::llm_driver_registry::PromptCacheStrategy::Auto,
2131                gemini_cached_content: None,
2132            }),
2133        };
2134        let input = vec![ResponsesInputItem::Message {
2135            r#type: "message".to_string(),
2136            role: "user".to_string(),
2137            content: ResponsesContent::Text("same turn".to_string()),
2138            phase: None,
2139        }];
2140
2141        let first = OpenResponsesProtocolLlmDriver::build_prompt_cache_key(
2142            &make_config(first_metadata),
2143            &input,
2144            &Some("You are helpful".to_string()),
2145            &None,
2146        );
2147        let second = OpenResponsesProtocolLlmDriver::build_prompt_cache_key(
2148            &make_config(second_metadata),
2149            &input,
2150            &Some("You are helpful".to_string()),
2151            &None,
2152        );
2153
2154        assert_ne!(first, second);
2155    }
2156
2157    #[test]
2158    fn test_build_prompt_cache_key_stays_within_openai_limit() {
2159        let config = LlmCallConfig {
2160            model: "gpt-5.5".to_string(),
2161            temperature: None,
2162            max_tokens: None,
2163            tools: vec![],
2164            reasoning_effort: None,
2165            metadata: std::collections::HashMap::new(),
2166            previous_response_id: None,
2167            tool_search: None,
2168            prompt_cache: Some(crate::llm_driver_registry::PromptCacheConfig {
2169                enabled: true,
2170                strategy: crate::llm_driver_registry::PromptCacheStrategy::Auto,
2171                gemini_cached_content: None,
2172            }),
2173        };
2174        let input = vec![ResponsesInputItem::Message {
2175            r#type: "message".to_string(),
2176            role: "user".to_string(),
2177            content: ResponsesContent::Text("fetch chalyi.name for me".to_string()),
2178            phase: None,
2179        }];
2180
2181        let key = OpenResponsesProtocolLlmDriver::build_prompt_cache_key(
2182            &config,
2183            &input,
2184            &Some("You are helpful".to_string()),
2185            &None,
2186        )
2187        .unwrap();
2188
2189        assert!(
2190            key.len() <= 64,
2191            "OpenAI prompt_cache_key limit is 64 characters, got {}",
2192            key.len()
2193        );
2194    }
2195
2196    #[test]
2197    fn test_function_call_output_serialization() {
2198        let item = ResponsesInputItem::FunctionCallOutput {
2199            r#type: "function_call_output".to_string(),
2200            call_id: "call_123".to_string(),
2201            output: r#"{"result": 42}"#.to_string(),
2202        };
2203
2204        let json = serde_json::to_value(&item).unwrap();
2205        assert_eq!(json["type"], "function_call_output");
2206        assert_eq!(json["call_id"], "call_123");
2207        assert_eq!(json["output"], r#"{"result": 42}"#);
2208    }
2209
2210    #[test]
2211    fn test_multipart_content_serialization() {
2212        let content = ResponsesContent::Parts(vec![
2213            ResponsesContentPart::InputText {
2214                r#type: "input_text".to_string(),
2215                text: "Look at this image".to_string(),
2216            },
2217            ResponsesContentPart::InputImage {
2218                r#type: "input_image".to_string(),
2219                image_url: "data:image/png;base64,abc123".to_string(),
2220            },
2221        ]);
2222
2223        let json = serde_json::to_value(&content).unwrap();
2224        assert!(json.is_array());
2225        assert_eq!(json[0]["type"], "input_text");
2226        assert_eq!(json[1]["type"], "input_image");
2227    }
2228
2229    #[test]
2230    fn test_tool_serialization() {
2231        let tool = ResponsesTool::Function {
2232            r#type: "function".to_string(),
2233            name: "get_weather".to_string(),
2234            description: "Get weather for a location".to_string(),
2235            parameters: json!({
2236                "type": "object",
2237                "properties": {
2238                    "location": {"type": "string"}
2239                },
2240                "required": ["location"]
2241            }),
2242            defer_loading: None,
2243        };
2244
2245        let json = serde_json::to_value(&tool).unwrap();
2246        assert_eq!(json["type"], "function");
2247        assert_eq!(json["name"], "get_weather");
2248        assert!(json["parameters"]["properties"]["location"].is_object());
2249    }
2250
2251    #[test]
2252    fn test_build_input_extracts_system_as_instructions() {
2253        let messages = vec![
2254            LlmMessage::text(LlmMessageRole::System, "You are a helpful assistant"),
2255            LlmMessage::text(LlmMessageRole::User, "Hello"),
2256        ];
2257
2258        let (instructions, input) = OpenResponsesProtocolLlmDriver::build_input(&messages, false);
2259
2260        assert_eq!(
2261            instructions,
2262            Some("You are a helpful assistant".to_string())
2263        );
2264        assert_eq!(input.len(), 1); // Only user message, system converted to instructions
2265    }
2266
2267    #[test]
2268    fn test_convert_role() {
2269        assert_eq!(
2270            OpenResponsesProtocolLlmDriver::convert_role(&LlmMessageRole::System),
2271            "developer"
2272        );
2273        assert_eq!(
2274            OpenResponsesProtocolLlmDriver::convert_role(&LlmMessageRole::User),
2275            "user"
2276        );
2277        assert_eq!(
2278            OpenResponsesProtocolLlmDriver::convert_role(&LlmMessageRole::Assistant),
2279            "assistant"
2280        );
2281        assert_eq!(
2282            OpenResponsesProtocolLlmDriver::convert_role(&LlmMessageRole::Tool),
2283            "tool"
2284        );
2285    }
2286
2287    #[test]
2288    fn test_function_call_serialization() {
2289        let item = ResponsesInputItem::FunctionCall {
2290            r#type: "function_call".to_string(),
2291            call_id: "call_abc123".to_string(),
2292            name: "get_current_time".to_string(),
2293            arguments: r#"{"timezone":"UTC"}"#.to_string(),
2294        };
2295
2296        let json = serde_json::to_value(&item).unwrap();
2297        assert_eq!(json["type"], "function_call");
2298        assert_eq!(json["call_id"], "call_abc123");
2299        assert_eq!(json["name"], "get_current_time");
2300        assert_eq!(json["arguments"], r#"{"timezone":"UTC"}"#);
2301    }
2302
2303    #[test]
2304    fn test_build_input_with_tool_calls() {
2305        use crate::tool_types::ToolCall;
2306
2307        // Simulate a conversation with tool calls:
2308        // 1. User asks a question
2309        // 2. Assistant calls a tool
2310        // 3. Tool returns result
2311        let messages = vec![
2312            LlmMessage::text(LlmMessageRole::System, "You are helpful"),
2313            LlmMessage::text(LlmMessageRole::User, "What time is it?"),
2314            LlmMessage {
2315                role: LlmMessageRole::Assistant,
2316                content: LlmMessageContent::Text(String::new()),
2317                tool_calls: Some(vec![ToolCall {
2318                    id: "call_xyz789".to_string(),
2319                    name: "get_current_time".to_string(),
2320                    arguments: json!({"timezone": "UTC"}),
2321                }]),
2322                tool_call_id: None,
2323                phase: None,
2324                thinking: None,
2325                thinking_signature: None,
2326            },
2327            LlmMessage {
2328                role: LlmMessageRole::Tool,
2329                content: LlmMessageContent::Text("2025-01-19T10:30:00Z".to_string()),
2330                tool_calls: None,
2331                tool_call_id: Some("call_xyz789".to_string()),
2332                phase: None,
2333                thinking: None,
2334                thinking_signature: None,
2335            },
2336        ];
2337
2338        let (instructions, input) = OpenResponsesProtocolLlmDriver::build_input(&messages, false);
2339
2340        // System message becomes instructions
2341        assert_eq!(instructions, Some("You are helpful".to_string()));
2342
2343        // Should have: user message, function_call, function_call_output
2344        assert_eq!(input.len(), 3);
2345
2346        // Verify the function_call is present (second item, since assistant had empty content)
2347        let json = serde_json::to_value(&input[1]).unwrap();
2348        assert_eq!(json["type"], "function_call");
2349        assert_eq!(json["call_id"], "call_xyz789");
2350        assert_eq!(json["name"], "get_current_time");
2351
2352        // Verify the function_call_output is present
2353        let json = serde_json::to_value(&input[2]).unwrap();
2354        assert_eq!(json["type"], "function_call_output");
2355        assert_eq!(json["call_id"], "call_xyz789");
2356        assert_eq!(json["output"], "2025-01-19T10:30:00Z");
2357    }
2358
2359    #[test]
2360    fn test_build_input_with_tool_calls_and_text() {
2361        use crate::tool_types::ToolCall;
2362
2363        // Assistant message with both text content and tool calls
2364        let messages = vec![
2365            LlmMessage::text(LlmMessageRole::User, "What time is it?"),
2366            LlmMessage {
2367                role: LlmMessageRole::Assistant,
2368                content: LlmMessageContent::Text("Let me check the time for you.".to_string()),
2369                tool_calls: Some(vec![ToolCall {
2370                    id: "call_abc".to_string(),
2371                    name: "get_time".to_string(),
2372                    arguments: json!({}),
2373                }]),
2374                tool_call_id: None,
2375                phase: None,
2376                thinking: None,
2377                thinking_signature: None,
2378            },
2379        ];
2380
2381        let (_, input) = OpenResponsesProtocolLlmDriver::build_input(&messages, false);
2382
2383        // Should have: user message, assistant message, function_call
2384        assert_eq!(input.len(), 3);
2385
2386        // First is user message
2387        let json = serde_json::to_value(&input[0]).unwrap();
2388        assert_eq!(json["role"], "user");
2389
2390        // Second is assistant message with text
2391        let json = serde_json::to_value(&input[1]).unwrap();
2392        assert_eq!(json["role"], "assistant");
2393
2394        // Third is function_call
2395        let json = serde_json::to_value(&input[2]).unwrap();
2396        assert_eq!(json["type"], "function_call");
2397        assert_eq!(json["call_id"], "call_abc");
2398    }
2399
2400    // ========================================================================
2401    // EVE-488: Stateful Responses continuations must not double-send context.
2402    //
2403    // When `previous_response_id` is set, the OpenAI Responses provider already
2404    // holds the prior transcript server-side. Re-sending it in `input` causes
2405    // double-counting. These tests pin the invariant that the delta-trim helper
2406    // only keeps items strictly after the most recent assistant turn, and
2407    // that the request-building path applies the trim when (and only when) a
2408    // continuation handle is present.
2409    // ========================================================================
2410
2411    /// Issue reproducer: a stateful continuation must not carry the full prior
2412    /// transcript in `input` alongside `previous_response_id`. After trimming,
2413    /// only the new tool result and any fresh user input should remain.
2414    #[test]
2415    fn openresponses_requests_should_not_mix_previous_response_id_with_full_transcript() {
2416        use crate::tool_types::ToolCall;
2417
2418        // Simulate a multi-turn transcript: system + user + assistant(tool_call) + tool result.
2419        // This is the exact shape that gets reconstructed on a follow-up turn when
2420        // the runtime has a `previous_response_id` from the prior assistant turn.
2421        let messages = vec![
2422            LlmMessage::text(LlmMessageRole::System, "You are helpful"),
2423            LlmMessage::text(LlmMessageRole::User, "What time is it?"),
2424            LlmMessage {
2425                role: LlmMessageRole::Assistant,
2426                content: LlmMessageContent::Text("Let me check.".to_string()),
2427                tool_calls: Some(vec![ToolCall {
2428                    id: "call_xyz789".to_string(),
2429                    name: "get_current_time".to_string(),
2430                    arguments: json!({"timezone": "UTC"}),
2431                }]),
2432                tool_call_id: None,
2433                phase: None,
2434                thinking: None,
2435                thinking_signature: None,
2436            },
2437            LlmMessage {
2438                role: LlmMessageRole::Tool,
2439                content: LlmMessageContent::Text("2025-01-19T10:30:00Z".to_string()),
2440                tool_calls: None,
2441                tool_call_id: Some("call_xyz789".to_string()),
2442                phase: None,
2443                thinking: None,
2444                thinking_signature: None,
2445            },
2446        ];
2447
2448        // Build the full transcript the same way the driver does.
2449        let (instructions, full_input) =
2450            OpenResponsesProtocolLlmDriver::build_input(&messages, false);
2451
2452        // Without trimming the full transcript leaks user + assistant + function_call
2453        // + function_call_output — exactly the bug.
2454        assert!(
2455            full_input.len() > 1,
2456            "sanity: full transcript has multi items"
2457        );
2458
2459        // The trim performed when `previous_response_id` is present in the request
2460        // path must drop everything up to and including the last prior-assistant item.
2461        let delta = compute_delta_input_items(full_input);
2462
2463        // Only the tool result (function_call_output) should remain.
2464        assert_eq!(
2465            delta.len(),
2466            1,
2467            "stateful continuation must only send delta items; got {} items",
2468            delta.len()
2469        );
2470        let json = serde_json::to_value(&delta[0]).unwrap();
2471        assert_eq!(json["type"], "function_call_output");
2472        assert_eq!(json["call_id"], "call_xyz789");
2473        assert_eq!(json["output"], "2025-01-19T10:30:00Z");
2474
2475        // Instructions (system message) are NOT part of `input`; they're still sent
2476        // separately and that is correct — they don't count toward the invariant.
2477        assert_eq!(instructions, Some("You are helpful".to_string()));
2478    }
2479
2480    /// Stateless mode (no previous_response_id): all input items are kept.
2481    /// The trim helper is only invoked by the call path when previous_response_id
2482    /// is set; this test pins that the helper produces correct delta output
2483    /// regardless, leaving the fresh user message that follows the assistant turn.
2484    #[test]
2485    fn compute_delta_keeps_tail_after_assistant_message() {
2486        let items = vec![
2487            ResponsesInputItem::Message {
2488                r#type: "message".to_string(),
2489                role: "user".to_string(),
2490                content: ResponsesContent::Text("hi".to_string()),
2491                phase: None,
2492            },
2493            ResponsesInputItem::Message {
2494                r#type: "message".to_string(),
2495                role: "assistant".to_string(),
2496                content: ResponsesContent::Text("hello".to_string()),
2497                phase: None,
2498            },
2499            ResponsesInputItem::Message {
2500                r#type: "message".to_string(),
2501                role: "user".to_string(),
2502                content: ResponsesContent::Text("follow up".to_string()),
2503                phase: None,
2504            },
2505        ];
2506        let trimmed = compute_delta_input_items(items);
2507        assert_eq!(trimmed.len(), 1);
2508        let json = serde_json::to_value(&trimmed[0]).unwrap();
2509        assert_eq!(json["role"], "user");
2510        assert_eq!(
2511            json["content"], "follow up",
2512            "trim keeps the fresh user message that arrived after the assistant turn"
2513        );
2514    }
2515
2516    /// Stateful continuation with parallel tool calls: every tool output that
2517    /// follows the prior assistant's function_call items is kept. The function_call
2518    /// items themselves belong to server-side state and are dropped.
2519    #[test]
2520    fn compute_delta_keeps_tool_results_after_last_assistant_turn() {
2521        let items = vec![
2522            ResponsesInputItem::Message {
2523                r#type: "message".to_string(),
2524                role: "user".to_string(),
2525                content: ResponsesContent::Text("do two things".to_string()),
2526                phase: None,
2527            },
2528            ResponsesInputItem::Message {
2529                r#type: "message".to_string(),
2530                role: "assistant".to_string(),
2531                content: ResponsesContent::Text("ok".to_string()),
2532                phase: None,
2533            },
2534            ResponsesInputItem::FunctionCall {
2535                r#type: "function_call".to_string(),
2536                call_id: "call_a".to_string(),
2537                name: "tool_a".to_string(),
2538                arguments: "{}".to_string(),
2539            },
2540            ResponsesInputItem::FunctionCall {
2541                r#type: "function_call".to_string(),
2542                call_id: "call_b".to_string(),
2543                name: "tool_b".to_string(),
2544                arguments: "{}".to_string(),
2545            },
2546            ResponsesInputItem::FunctionCallOutput {
2547                r#type: "function_call_output".to_string(),
2548                call_id: "call_a".to_string(),
2549                output: "a result".to_string(),
2550            },
2551            ResponsesInputItem::FunctionCallOutput {
2552                r#type: "function_call_output".to_string(),
2553                call_id: "call_b".to_string(),
2554                output: "b result".to_string(),
2555            },
2556        ];
2557
2558        let trimmed = compute_delta_input_items(items);
2559
2560        // The function_call items live in server-side state. The delta carries
2561        // only the tool outputs the client produced for them.
2562        assert_eq!(trimmed.len(), 2);
2563        for item in &trimmed {
2564            let json = serde_json::to_value(item).unwrap();
2565            assert_eq!(json["type"], "function_call_output");
2566        }
2567    }
2568
2569    /// Empty input with previous_response_id is valid: the provider can resume
2570    /// purely from the continuation handle, no input needed.
2571    #[test]
2572    fn compute_delta_allows_empty_input_for_stateful_continuation() {
2573        let trimmed = compute_delta_input_items(vec![]);
2574        assert!(trimmed.is_empty());
2575    }
2576
2577    /// Defensive: if no prior-assistant item is present (caller passed only fresh
2578    /// user input), all items are kept as delta.
2579    #[test]
2580    fn compute_delta_keeps_all_items_when_no_assistant_turn_present() {
2581        let items = vec![
2582            ResponsesInputItem::Message {
2583                r#type: "message".to_string(),
2584                role: "user".to_string(),
2585                content: ResponsesContent::Text("one".to_string()),
2586                phase: None,
2587            },
2588            ResponsesInputItem::Message {
2589                r#type: "message".to_string(),
2590                role: "user".to_string(),
2591                content: ResponsesContent::Text("two".to_string()),
2592                phase: None,
2593            },
2594        ];
2595        let trimmed = compute_delta_input_items(items);
2596        assert_eq!(trimmed.len(), 2);
2597    }
2598
2599    /// Reasoning items from a prior assistant turn are also dropped by the trim.
2600    #[test]
2601    fn compute_delta_drops_prior_reasoning_items() {
2602        let items = vec![
2603            ResponsesInputItem::Reasoning {
2604                r#type: "reasoning".to_string(),
2605                id: "rs_00000001".to_string(),
2606                encrypted_content: "encrypted-blob".to_string(),
2607            },
2608            ResponsesInputItem::Message {
2609                r#type: "message".to_string(),
2610                role: "assistant".to_string(),
2611                content: ResponsesContent::Text("prior".to_string()),
2612                phase: None,
2613            },
2614            ResponsesInputItem::FunctionCallOutput {
2615                r#type: "function_call_output".to_string(),
2616                call_id: "call_z".to_string(),
2617                output: "result".to_string(),
2618            },
2619        ];
2620        let trimmed = compute_delta_input_items(items);
2621        assert_eq!(trimmed.len(), 1);
2622        let json = serde_json::to_value(&trimmed[0]).unwrap();
2623        assert_eq!(json["type"], "function_call_output");
2624    }
2625
2626    // ------------------------------------------------------------------------
2627    // Request-builder integration: `finalize_input_for_request` is the single
2628    // gate that chooses whether the request `input` is trimmed. These tests
2629    // pin the exact decision the call path makes — they catch regressions
2630    // where the `previous_response_id`-presence check is accidentally dropped
2631    // or inverted, which is what would re-introduce the bug even if the trim
2632    // helper itself stays correct.
2633    // ------------------------------------------------------------------------
2634
2635    fn sample_full_transcript_items() -> Vec<ResponsesInputItem> {
2636        vec![
2637            ResponsesInputItem::Message {
2638                r#type: "message".to_string(),
2639                role: "user".to_string(),
2640                content: ResponsesContent::Text("first request".to_string()),
2641                phase: None,
2642            },
2643            ResponsesInputItem::Message {
2644                r#type: "message".to_string(),
2645                role: "assistant".to_string(),
2646                content: ResponsesContent::Text("first reply".to_string()),
2647                phase: None,
2648            },
2649            ResponsesInputItem::Message {
2650                r#type: "message".to_string(),
2651                role: "user".to_string(),
2652                content: ResponsesContent::Text("follow-up".to_string()),
2653                phase: None,
2654            },
2655        ]
2656    }
2657
2658    #[test]
2659    fn finalize_input_skips_trim_when_previous_response_id_is_none() {
2660        let items = sample_full_transcript_items();
2661        let original_len = items.len();
2662        let out = finalize_input_for_request(items, &None);
2663        assert_eq!(
2664            out.len(),
2665            original_len,
2666            "stateless mode keeps the full transcript so the model has context"
2667        );
2668    }
2669
2670    #[test]
2671    fn finalize_input_drops_locally_orphaned_tool_output_without_previous_response_id() {
2672        let items = vec![
2673            ResponsesInputItem::Message {
2674                r#type: "message".to_string(),
2675                role: "user".to_string(),
2676                content: ResponsesContent::Text("fresh".to_string()),
2677                phase: None,
2678            },
2679            ResponsesInputItem::FunctionCallOutput {
2680                r#type: "function_call_output".to_string(),
2681                call_id: "call_trimmed".to_string(),
2682                output: "result".to_string(),
2683            },
2684        ];
2685
2686        let out = finalize_input_for_request(items, &None);
2687
2688        assert_eq!(out.len(), 1);
2689        let json = serde_json::to_value(&out[0]).unwrap();
2690        assert_eq!(json["type"], "message");
2691    }
2692
2693    #[test]
2694    fn finalize_input_keeps_tool_output_with_previous_response_id_even_without_local_call() {
2695        let items = vec![
2696            ResponsesInputItem::FunctionCallOutput {
2697                r#type: "function_call_output".to_string(),
2698                call_id: "call_server_side".to_string(),
2699                output: "stateful result".to_string(),
2700            },
2701            ResponsesInputItem::Message {
2702                r#type: "message".to_string(),
2703                role: "user".to_string(),
2704                content: ResponsesContent::Text("follow-up".to_string()),
2705                phase: None,
2706            },
2707        ];
2708
2709        let out = finalize_input_for_request(items, &Some("resp_prev_42".to_string()));
2710
2711        assert_eq!(out.len(), 2);
2712        let json = serde_json::to_value(&out[0]).unwrap();
2713        assert_eq!(json["type"], "function_call_output");
2714        assert_eq!(json["call_id"], "call_server_side");
2715    }
2716
2717    #[test]
2718    fn finalize_input_trims_when_previous_response_id_is_set() {
2719        let items = sample_full_transcript_items();
2720        let out = finalize_input_for_request(items, &Some("resp_prev_42".to_string()));
2721        assert_eq!(
2722            out.len(),
2723            1,
2724            "stateful continuation must drop everything up to and including the prior assistant message"
2725        );
2726        let json = serde_json::to_value(&out[0]).unwrap();
2727        assert_eq!(json["type"], "message");
2728        assert_eq!(json["role"], "user");
2729        // Only the post-assistant follow-up survives.
2730        let txt = json["content"].as_str().unwrap_or("");
2731        assert_eq!(txt, "follow-up");
2732    }
2733
2734    #[test]
2735    fn finalize_input_allows_empty_input_with_previous_response_id() {
2736        let out = finalize_input_for_request(vec![], &Some("resp_anything".to_string()));
2737        assert!(
2738            out.is_empty(),
2739            "empty delta is valid — the provider can resume purely from the response id"
2740        );
2741    }
2742
2743    // ========================================================================
2744    // Stateless-gateway detection (EVE-523)
2745    // ========================================================================
2746
2747    #[test]
2748    fn endpoint_persists_responses_for_openai_and_azure() {
2749        // OpenAI hosted API — stateful.
2750        assert!(endpoint_persists_responses(
2751            "https://api.openai.com/v1/responses"
2752        ));
2753        assert!(endpoint_persists_responses(
2754            "https://api.openai.com:443/v1/responses"
2755        ));
2756        // Azure OpenAI — stateful.
2757        assert!(endpoint_persists_responses(
2758            "https://my-resource.openai.azure.com/openai/v1/responses"
2759        ));
2760        assert!(endpoint_persists_responses(
2761            "https://my-resource.services.ai.azure.com/openai/v1/responses"
2762        ));
2763    }
2764
2765    #[test]
2766    fn endpoint_does_not_persist_for_stateless_gateways() {
2767        // OpenRouter and Gemini's compat shim accept `previous_response_id` but
2768        // ignore it — they must be treated as stateless so we replay the full
2769        // transcript each turn (EVE-523).
2770        assert!(!endpoint_persists_responses(
2771            "https://openrouter.ai/api/v1/responses"
2772        ));
2773        assert!(!endpoint_persists_responses(
2774            "https://generativelanguage.googleapis.com/v1beta/openai/responses"
2775        ));
2776        // A host that merely contains "openai" in its name must not be trusted.
2777        assert!(!endpoint_persists_responses(
2778            "https://api.openai.example.com/v1/responses"
2779        ));
2780    }
2781
2782    /// End-to-end shape of the call path: against a stateless gateway, a request
2783    /// that carries a `previous_response_id` in config must still send the FULL
2784    /// transcript in `input` (no trim) because the gateway will not have stored
2785    /// the prior response. This is the core EVE-523 regression guard.
2786    #[test]
2787    fn stateless_gateway_replays_full_transcript_despite_previous_response_id() {
2788        let api_url = "https://openrouter.ai/api/v1/responses";
2789        let prev_id: Option<String> = Some("gen-turn-1".to_string());
2790
2791        // Mirror the gating the call path performs.
2792        let effective_prev_id = if endpoint_persists_responses(api_url) {
2793            prev_id.clone()
2794        } else {
2795            None
2796        };
2797        assert!(
2798            effective_prev_id.is_none(),
2799            "stateless gateway must not chain via previous_response_id"
2800        );
2801
2802        let items = sample_full_transcript_items();
2803        let original_len = items.len();
2804        let out = finalize_input_for_request(items, &effective_prev_id);
2805        assert_eq!(
2806            out.len(),
2807            original_len,
2808            "stateless gateway must replay the full transcript so the model keeps context"
2809        );
2810    }
2811
2812    /// The same transcript against OpenAI's hosted API trims to the delta window
2813    /// and keeps the continuation handle — confirming the optimization is intact
2814    /// for genuinely stateful endpoints.
2815    #[test]
2816    fn stateful_endpoint_still_trims_and_chains() {
2817        let api_url = "https://api.openai.com/v1/responses";
2818        let prev_id: Option<String> = Some("resp_turn_1".to_string());
2819
2820        let effective_prev_id = if endpoint_persists_responses(api_url) {
2821            prev_id.clone()
2822        } else {
2823            None
2824        };
2825        assert_eq!(
2826            effective_prev_id, prev_id,
2827            "stateful endpoint keeps the continuation handle"
2828        );
2829
2830        let out = finalize_input_for_request(sample_full_transcript_items(), &effective_prev_id);
2831        assert_eq!(out.len(), 1, "stateful endpoint trims to the delta window");
2832    }
2833
2834    /// Wire-level EVE-523 reproducer: drive the real `chat_completion_stream`
2835    /// against a mock endpoint on a non-OpenAI host. Even with a
2836    /// `previous_response_id` in config, the request on the wire must omit it and
2837    /// carry the FULL transcript (user task + assistant turn + tool result), so a
2838    /// stateless gateway that ignores `previous_response_id` still sees the task.
2839    #[tokio::test]
2840    async fn stateless_gateway_request_replays_full_transcript_on_the_wire() {
2841        use crate::tool_types::ToolCall;
2842        use serde_json::json;
2843        use wiremock::matchers::method;
2844        use wiremock::{Mock, MockServer, ResponseTemplate};
2845
2846        let server = MockServer::start().await;
2847        // Any 200 lets the request through; we inspect the captured request, not
2848        // the (empty) streamed body.
2849        Mock::given(method("POST"))
2850            .respond_with(ResponseTemplate::new(200).set_body_string(""))
2851            .mount(&server)
2852            .await;
2853
2854        // server.uri() is a 127.0.0.1 host — not OpenAI/Azure — so it is treated
2855        // as a stateless gateway.
2856        let api_url = format!("{}/v1/responses", server.uri());
2857        let driver = OpenResponsesProtocolLlmDriver::with_base_url("test-key", api_url);
2858
2859        let messages = vec![
2860            LlmMessage::text(LlmMessageRole::System, "You are helpful"),
2861            LlmMessage::text(LlmMessageRole::User, "upgrade dependencies"),
2862            LlmMessage {
2863                role: LlmMessageRole::Assistant,
2864                content: LlmMessageContent::Text("Let me look.".to_string()),
2865                tool_calls: Some(vec![ToolCall {
2866                    id: "call_1".to_string(),
2867                    name: "read_file".to_string(),
2868                    arguments: json!({"path": "Cargo.toml"}),
2869                }]),
2870                tool_call_id: None,
2871                phase: None,
2872                thinking: None,
2873                thinking_signature: None,
2874            },
2875            LlmMessage {
2876                role: LlmMessageRole::Tool,
2877                content: LlmMessageContent::Text("[package]…".to_string()),
2878                tool_calls: None,
2879                tool_call_id: Some("call_1".to_string()),
2880                phase: None,
2881                thinking: None,
2882                thinking_signature: None,
2883            },
2884        ];
2885
2886        let config = LlmCallConfig {
2887            model: "some/model".to_string(),
2888            temperature: None,
2889            max_tokens: None,
2890            tools: vec![],
2891            reasoning_effort: None,
2892            metadata: std::collections::HashMap::new(),
2893            // Continuation handle from a prior turn — must be ignored on a
2894            // stateless gateway.
2895            previous_response_id: Some("gen-turn-1".to_string()),
2896            tool_search: None,
2897            prompt_cache: None,
2898        };
2899
2900        // Fire the request. The stream body is irrelevant for this assertion.
2901        let _ = driver.chat_completion_stream(messages, &config).await;
2902
2903        let requests = server
2904            .received_requests()
2905            .await
2906            .expect("mock server recorded requests");
2907        assert_eq!(requests.len(), 1, "exactly one request should be sent");
2908        let body: serde_json::Value = requests[0].body_json().expect("request body is JSON");
2909
2910        // previous_response_id must be absent (skipped) — the gateway would ignore it.
2911        assert!(
2912            body.get("previous_response_id").is_none(),
2913            "stateless gateway request must omit previous_response_id; body: {body}"
2914        );
2915
2916        // The full transcript must be replayed: user message, assistant message,
2917        // function_call, and function_call_output (instructions carry the system msg).
2918        let input = body["input"].as_array().expect("input is an array");
2919        assert_eq!(
2920            input.len(),
2921            4,
2922            "full transcript must be replayed on a stateless gateway; got {input:?}"
2923        );
2924        assert_eq!(body["instructions"], "You are helpful");
2925        let has_user_task = input
2926            .iter()
2927            .any(|item| item["type"] == "message" && item["role"] == "user");
2928        assert!(
2929            has_user_task,
2930            "the original user task must be replayed; got {input:?}"
2931        );
2932        let has_tool_output = input
2933            .iter()
2934            .any(|item| item["type"] == "function_call_output");
2935        assert!(
2936            has_tool_output,
2937            "the latest tool result must still be present; got {input:?}"
2938        );
2939    }
2940
2941    #[tokio::test]
2942    async fn openrouter_provider_does_not_send_hosted_tool_search() {
2943        use crate::tool_types::DeferrablePolicy;
2944        use serde_json::json;
2945        use wiremock::matchers::method;
2946        use wiremock::{Mock, MockServer, ResponseTemplate};
2947
2948        let server = MockServer::start().await;
2949        Mock::given(method("POST"))
2950            .respond_with(ResponseTemplate::new(200).set_body_string(""))
2951            .mount(&server)
2952            .await;
2953
2954        let api_url = format!("{}/v1/responses", server.uri());
2955        let driver = OpenResponsesProtocolLlmDriver::with_base_url("test-key", api_url)
2956            .with_provider_type(LlmProviderType::Openrouter);
2957
2958        let tools: Vec<ToolDefinition> = (0..16)
2959            .map(|i| {
2960                make_tool(
2961                    &format!("tool_{i}"),
2962                    Some("General"),
2963                    DeferrablePolicy::Automatic,
2964                )
2965            })
2966            .collect();
2967
2968        let config = LlmCallConfig {
2969            model: "gpt-5.4".to_string(),
2970            temperature: None,
2971            max_tokens: None,
2972            tools,
2973            reasoning_effort: None,
2974            metadata: std::collections::HashMap::new(),
2975            previous_response_id: None,
2976            tool_search: Some(crate::llm_driver_registry::ToolSearchConfig {
2977                enabled: true,
2978                threshold: 15,
2979            }),
2980            prompt_cache: None,
2981        };
2982
2983        let messages = vec![LlmMessage::text(LlmMessageRole::User, "hello")];
2984        let _ = driver.chat_completion_stream(messages, &config).await;
2985
2986        let requests = server
2987            .received_requests()
2988            .await
2989            .expect("mock server recorded requests");
2990        assert_eq!(requests.len(), 1, "exactly one request should be sent");
2991        let body: serde_json::Value = requests[0].body_json().expect("request body is JSON");
2992        let tools = body["tools"].as_array().expect("tools is an array");
2993
2994        assert!(
2995            tools.iter().all(|tool| tool["type"] == "function"),
2996            "OpenRouter should receive regular function tools, not hosted tool_search payloads: {tools:?}"
2997        );
2998        assert!(
2999            tools.iter().all(|tool| tool.get("defer_loading").is_none()),
3000            "OpenRouter tool schemas should not be deferred by hosted tool_search: {tools:?}"
3001        );
3002        assert_eq!(
3003            body["input"],
3004            json!([{"type": "message", "role": "user", "content": "hello"}])
3005        );
3006    }
3007
3008    // ========================================================================
3009    // Compact endpoint tests
3010    // ========================================================================
3011
3012    #[test]
3013    fn test_compact_request_serialization() {
3014        let request = CompactRequest {
3015            model: "gpt-4o".to_string(),
3016            input: vec![
3017                CompactInputItem::Message {
3018                    role: "user".to_string(),
3019                    content: CompactContent::Text("Hello!".to_string()),
3020                },
3021                CompactInputItem::Message {
3022                    role: "assistant".to_string(),
3023                    content: CompactContent::Text("Hi there!".to_string()),
3024                },
3025            ],
3026            previous_response_id: None,
3027            instructions: Some("Be helpful".to_string()),
3028        };
3029
3030        let json = serde_json::to_value(&request).unwrap();
3031        assert_eq!(json["model"], "gpt-4o");
3032        assert_eq!(json["instructions"], "Be helpful");
3033        assert!(json["input"].is_array());
3034        assert_eq!(json["input"].as_array().unwrap().len(), 2);
3035    }
3036
3037    #[test]
3038    fn test_compact_input_item_message_serialization() {
3039        let item = CompactInputItem::Message {
3040            role: "user".to_string(),
3041            content: CompactContent::Text("Test message".to_string()),
3042        };
3043
3044        let json = serde_json::to_value(&item).unwrap();
3045        assert_eq!(json["type"], "message");
3046        assert_eq!(json["role"], "user");
3047        assert_eq!(json["content"], "Test message");
3048    }
3049
3050    #[test]
3051    fn test_compact_input_item_function_call_serialization() {
3052        let item = CompactInputItem::FunctionCall {
3053            call_id: "call_123".to_string(),
3054            name: "get_weather".to_string(),
3055            arguments: r#"{"city":"NYC"}"#.to_string(),
3056        };
3057
3058        let json = serde_json::to_value(&item).unwrap();
3059        assert_eq!(json["type"], "function_call");
3060        assert_eq!(json["call_id"], "call_123");
3061        assert_eq!(json["name"], "get_weather");
3062        assert_eq!(json["arguments"], r#"{"city":"NYC"}"#);
3063    }
3064
3065    #[test]
3066    fn test_compact_input_item_compaction_serialization() {
3067        let item = CompactInputItem::Compaction {
3068            encrypted_content: "encrypted_data_here".to_string(),
3069        };
3070
3071        let json = serde_json::to_value(&item).unwrap();
3072        assert_eq!(json["type"], "compaction");
3073        assert_eq!(json["encrypted_content"], "encrypted_data_here");
3074    }
3075
3076    #[test]
3077    fn test_compact_output_item_deserialization() {
3078        let json = r#"{
3079            "type": "message",
3080            "role": "user",
3081            "content": "Hello"
3082        }"#;
3083
3084        let item: CompactOutputItem = serde_json::from_str(json).unwrap();
3085        match item {
3086            CompactOutputItem::Message { role, content } => {
3087                assert_eq!(role, "user");
3088                match content {
3089                    CompactContent::Text(text) => assert_eq!(text, "Hello"),
3090                    _ => panic!("Expected text content"),
3091                }
3092            }
3093            _ => panic!("Expected Message item"),
3094        }
3095    }
3096
3097    #[test]
3098    fn test_compact_output_compaction_deserialization() {
3099        let json = r#"{
3100            "type": "compaction",
3101            "encrypted_content": "abc123encrypted"
3102        }"#;
3103
3104        let item: CompactOutputItem = serde_json::from_str(json).unwrap();
3105        match item {
3106            CompactOutputItem::Compaction { encrypted_content } => {
3107                assert_eq!(encrypted_content, "abc123encrypted");
3108            }
3109            _ => panic!("Expected Compaction item"),
3110        }
3111    }
3112
3113    #[test]
3114    fn test_compact_response_deserialization() {
3115        let json = r#"{
3116            "output": [
3117                {"type": "message", "role": "user", "content": "Hello"},
3118                {"type": "compaction", "encrypted_content": "xyz789"}
3119            ],
3120            "usage": {
3121                "input_tokens": 100,
3122                "output_tokens": 50,
3123                "total_tokens": 150
3124            }
3125        }"#;
3126
3127        let response: CompactResponse = serde_json::from_str(json).unwrap();
3128        assert_eq!(response.output.len(), 2);
3129        assert!(response.usage.is_some());
3130        let usage = response.usage.unwrap();
3131        assert_eq!(usage.input_tokens, Some(100));
3132        assert_eq!(usage.output_tokens, Some(50));
3133        assert_eq!(usage.total_tokens, Some(150));
3134    }
3135
3136    #[test]
3137    fn test_compact_content_parts_serialization() {
3138        let content = CompactContent::Parts(vec![
3139            CompactContentPart::InputText {
3140                text: "Check this image".to_string(),
3141            },
3142            CompactContentPart::InputImage {
3143                image_url: "data:image/png;base64,abc".to_string(),
3144            },
3145        ]);
3146
3147        let json = serde_json::to_value(&content).unwrap();
3148        assert!(json.is_array());
3149        assert_eq!(json[0]["type"], "input_text");
3150        assert_eq!(json[0]["text"], "Check this image");
3151        assert_eq!(json[1]["type"], "input_image");
3152    }
3153
3154    #[test]
3155    fn test_supports_compact_default_url() {
3156        let driver = OpenResponsesProtocolLlmDriver::new("test-key");
3157        // Default URL is OpenAI, should support compact
3158        assert!(driver.supports_compact());
3159    }
3160
3161    #[test]
3162    fn test_supports_compact_custom_url() {
3163        let driver = OpenResponsesProtocolLlmDriver::with_base_url(
3164            "test-key",
3165            "https://custom.api.com/v1/responses",
3166        );
3167        // Custom URL, compact support unknown
3168        assert!(!driver.supports_compact());
3169    }
3170
3171    // ========================================================================
3172    // OpenAI Thinking/Reasoning Support Tests
3173    // ========================================================================
3174
3175    #[test]
3176    fn test_reasoning_input_item_serialization() {
3177        let item = ResponsesInputItem::Reasoning {
3178            r#type: "reasoning".to_string(),
3179            id: "rs_00000001".to_string(),
3180            encrypted_content: "encrypted_reasoning_context_here".to_string(),
3181        };
3182
3183        let json = serde_json::to_value(&item).unwrap();
3184        assert_eq!(json["type"], "reasoning");
3185        assert_eq!(json["id"], "rs_00000001");
3186        assert_eq!(
3187            json["encrypted_content"],
3188            "encrypted_reasoning_context_here"
3189        );
3190    }
3191
3192    #[test]
3193    fn test_build_input_with_thinking_signature() {
3194        // Assistant message with thinking and thinking_signature (encrypted_content)
3195        let messages = vec![
3196            LlmMessage::text(LlmMessageRole::User, "Think about this deeply"),
3197            LlmMessage {
3198                role: LlmMessageRole::Assistant,
3199                content: LlmMessageContent::Text("I have thought about this.".to_string()),
3200                tool_calls: None,
3201                tool_call_id: None,
3202                phase: None,
3203                thinking: Some("This is my chain of thought reasoning...".to_string()),
3204                thinking_signature: Some("encrypted_reasoning_token_123".to_string()),
3205            },
3206            LlmMessage::text(LlmMessageRole::User, "What else?"),
3207        ];
3208
3209        let (_, input) = OpenResponsesProtocolLlmDriver::build_input(&messages, false);
3210
3211        // Should have: user message, reasoning item, assistant message, user message
3212        assert_eq!(input.len(), 4);
3213
3214        // First is user message
3215        let json = serde_json::to_value(&input[0]).unwrap();
3216        assert_eq!(json["role"], "user");
3217        assert_eq!(json["content"], "Think about this deeply");
3218
3219        // Second is reasoning item (before assistant message)
3220        let json = serde_json::to_value(&input[1]).unwrap();
3221        assert_eq!(json["type"], "reasoning");
3222        assert_eq!(json["encrypted_content"], "encrypted_reasoning_token_123");
3223
3224        // Third is assistant message
3225        let json = serde_json::to_value(&input[2]).unwrap();
3226        assert_eq!(json["role"], "assistant");
3227        assert_eq!(json["content"], "I have thought about this.");
3228
3229        // Fourth is second user message
3230        let json = serde_json::to_value(&input[3]).unwrap();
3231        assert_eq!(json["role"], "user");
3232    }
3233
3234    #[test]
3235    fn test_build_input_with_thinking_signature_and_tool_calls() {
3236        use crate::tool_types::ToolCall;
3237
3238        // Assistant message with thinking, tool calls, and thinking_signature
3239        let messages = vec![
3240            LlmMessage::text(LlmMessageRole::User, "What time is it? Think carefully."),
3241            LlmMessage {
3242                role: LlmMessageRole::Assistant,
3243                content: LlmMessageContent::Text("Let me check.".to_string()),
3244                tool_calls: Some(vec![ToolCall {
3245                    id: "call_123".to_string(),
3246                    name: "get_time".to_string(),
3247                    arguments: json!({}),
3248                }]),
3249                tool_call_id: None,
3250                phase: None,
3251                thinking: Some("I need to call the get_time tool...".to_string()),
3252                thinking_signature: Some("encrypted_token_xyz".to_string()),
3253            },
3254            LlmMessage {
3255                role: LlmMessageRole::Tool,
3256                content: LlmMessageContent::Text("10:30 AM".to_string()),
3257                tool_calls: None,
3258                tool_call_id: Some("call_123".to_string()),
3259                phase: None,
3260                thinking: None,
3261                thinking_signature: None,
3262            },
3263        ];
3264
3265        let (_, input) = OpenResponsesProtocolLlmDriver::build_input(&messages, false);
3266
3267        // Should have: user, reasoning, assistant, function_call, function_call_output
3268        assert_eq!(input.len(), 5);
3269
3270        // Reasoning item comes before assistant message
3271        let json = serde_json::to_value(&input[1]).unwrap();
3272        assert_eq!(json["type"], "reasoning");
3273        assert_eq!(json["encrypted_content"], "encrypted_token_xyz");
3274
3275        // Assistant message
3276        let json = serde_json::to_value(&input[2]).unwrap();
3277        assert_eq!(json["role"], "assistant");
3278
3279        // Function call
3280        let json = serde_json::to_value(&input[3]).unwrap();
3281        assert_eq!(json["type"], "function_call");
3282        assert_eq!(json["call_id"], "call_123");
3283
3284        // Function call output
3285        let json = serde_json::to_value(&input[4]).unwrap();
3286        assert_eq!(json["type"], "function_call_output");
3287    }
3288
3289    #[test]
3290    fn test_build_input_without_thinking_signature() {
3291        // Assistant message with thinking but NO thinking_signature should not emit reasoning item
3292        let messages = vec![
3293            LlmMessage::text(LlmMessageRole::User, "Hello"),
3294            LlmMessage {
3295                role: LlmMessageRole::Assistant,
3296                content: LlmMessageContent::Text("Hi there!".to_string()),
3297                tool_calls: None,
3298                tool_call_id: None,
3299                phase: None,
3300                thinking: Some("Some thinking...".to_string()),
3301                thinking_signature: None, // No signature!
3302            },
3303        ];
3304
3305        let (_, input) = OpenResponsesProtocolLlmDriver::build_input(&messages, false);
3306
3307        // Should have: user message, assistant message (no reasoning item)
3308        assert_eq!(input.len(), 2);
3309
3310        // Verify no reasoning item
3311        let json = serde_json::to_value(&input[0]).unwrap();
3312        assert_eq!(json["role"], "user");
3313
3314        let json = serde_json::to_value(&input[1]).unwrap();
3315        assert_eq!(json["role"], "assistant");
3316    }
3317
3318    #[test]
3319    fn test_handle_streaming_event_reasoning_encrypted_content() {
3320        use std::sync::Mutex;
3321
3322        let input_tokens = Mutex::new(0u32);
3323        let output_tokens = Mutex::new(0u32);
3324        let cache_read_tokens = Mutex::new(None);
3325        let accumulated_tool_calls = Mutex::new(Vec::new());
3326        let finish_reason = Mutex::new(None);
3327
3328        // Create an OutputItemDone event with Reasoning item containing encrypted_content
3329        let event = StreamingEvent::OutputItemDone {
3330            sequence_number: 5,
3331            output_index: 0,
3332            item: Some(types::OutputItem::Reasoning {
3333                id: "rs_001".to_string(),
3334                summary: vec![],
3335                content: None,
3336                encrypted_content: Some("encrypted_reasoning_data".to_string()),
3337            }),
3338        };
3339
3340        let result = handle_streaming_event(
3341            event,
3342            &input_tokens,
3343            &output_tokens,
3344            &cache_read_tokens,
3345            &accumulated_tool_calls,
3346            &finish_reason,
3347            "gpt-5".to_string(),
3348            None,
3349        );
3350
3351        // Should emit ReasonItem with the encrypted content and metadata
3352        match result {
3353            LlmStreamEvent::ReasonItem {
3354                provider,
3355                model,
3356                item_id,
3357                encrypted_content,
3358                summary,
3359                token_count,
3360            } => {
3361                assert_eq!(provider, "openai");
3362                assert_eq!(model.as_deref(), Some("gpt-5"));
3363                assert_eq!(item_id, "rs_001");
3364                assert_eq!(
3365                    encrypted_content.as_deref(),
3366                    Some("encrypted_reasoning_data")
3367                );
3368                assert!(summary.is_empty());
3369                assert!(token_count.is_none());
3370            }
3371            other => panic!("Expected ReasonItem event, got {:?}", other),
3372        }
3373    }
3374
3375    #[test]
3376    fn test_handle_streaming_event_reasoning_without_encrypted_content() {
3377        use std::sync::Mutex;
3378
3379        let input_tokens = Mutex::new(0u32);
3380        let output_tokens = Mutex::new(0u32);
3381        let cache_read_tokens = Mutex::new(None);
3382        let accumulated_tool_calls = Mutex::new(Vec::new());
3383        let finish_reason = Mutex::new(None);
3384
3385        // Create an OutputItemDone event with Reasoning item but NO encrypted_content
3386        let event = StreamingEvent::OutputItemDone {
3387            sequence_number: 5,
3388            output_index: 0,
3389            item: Some(types::OutputItem::Reasoning {
3390                id: "rs_001".to_string(),
3391                summary: vec![types::ContentPart::SummaryText {
3392                    text: "Some summary".to_string(),
3393                }],
3394                content: None,
3395                encrypted_content: None, // No encrypted content
3396            }),
3397        };
3398
3399        let result = handle_streaming_event(
3400            event,
3401            &input_tokens,
3402            &output_tokens,
3403            &cache_read_tokens,
3404            &accumulated_tool_calls,
3405            &finish_reason,
3406            "gpt-5".to_string(),
3407            None,
3408        );
3409
3410        // Should still emit ReasonItem carrying the safe summary even when no
3411        // encrypted content is present so the durable reasoning record survives.
3412        match result {
3413            LlmStreamEvent::ReasonItem {
3414                provider,
3415                item_id,
3416                encrypted_content,
3417                summary,
3418                ..
3419            } => {
3420                assert_eq!(provider, "openai");
3421                assert_eq!(item_id, "rs_001");
3422                assert!(encrypted_content.is_none());
3423                assert_eq!(summary, vec!["Some summary".to_string()]);
3424            }
3425            other => panic!("Expected ReasonItem event, got {:?}", other),
3426        }
3427    }
3428
3429    #[test]
3430    fn test_handle_streaming_event_reasoning_drops_plaintext_content() {
3431        use std::sync::Mutex;
3432
3433        let input_tokens = Mutex::new(0u32);
3434        let output_tokens = Mutex::new(0u32);
3435        let cache_read_tokens = Mutex::new(None);
3436        let accumulated_tool_calls = Mutex::new(Vec::new());
3437        let finish_reason = Mutex::new(None);
3438
3439        // Reasoning item with plaintext content and a non-summary content part in `summary`.
3440        // Both must be excluded from the emitted ReasonItem.
3441        let event = StreamingEvent::OutputItemDone {
3442            sequence_number: 5,
3443            output_index: 0,
3444            item: Some(types::OutputItem::Reasoning {
3445                id: "rs_002".to_string(),
3446                summary: vec![
3447                    types::ContentPart::SummaryText {
3448                        text: "safe summary".to_string(),
3449                    },
3450                    types::ContentPart::ReasoningText {
3451                        text: "SECRET hidden reasoning".to_string(),
3452                    },
3453                ],
3454                content: Some(vec![types::ContentPart::ReasoningText {
3455                    text: "SECRET hidden reasoning".to_string(),
3456                }]),
3457                encrypted_content: Some("opaque".to_string()),
3458            }),
3459        };
3460
3461        let result = handle_streaming_event(
3462            event,
3463            &input_tokens,
3464            &output_tokens,
3465            &cache_read_tokens,
3466            &accumulated_tool_calls,
3467            &finish_reason,
3468            "gpt-5".to_string(),
3469            None,
3470        );
3471
3472        match result {
3473            LlmStreamEvent::ReasonItem {
3474                summary,
3475                encrypted_content,
3476                ..
3477            } => {
3478                assert_eq!(summary, vec!["safe summary".to_string()]);
3479                assert_eq!(encrypted_content.as_deref(), Some("opaque"));
3480            }
3481            other => panic!("Expected ReasonItem event, got {:?}", other),
3482        }
3483    }
3484
3485    #[test]
3486    fn test_handle_streaming_event_reasoning_delta() {
3487        use std::sync::Mutex;
3488
3489        let input_tokens = Mutex::new(0u32);
3490        let output_tokens = Mutex::new(0u32);
3491        let cache_read_tokens = Mutex::new(None);
3492        let accumulated_tool_calls = Mutex::new(Vec::new());
3493        let finish_reason = Mutex::new(None);
3494
3495        // ReasoningDelta (opaque reasoning from o-series) maps to ThinkingDelta
3496        let event = StreamingEvent::ReasoningDelta {
3497            sequence_number: 3,
3498            item_id: "rs_001".to_string(),
3499            output_index: 0,
3500            content_index: 0,
3501            delta: "Let me reason about this...".to_string(),
3502            obfuscation: None,
3503        };
3504
3505        let result = handle_streaming_event(
3506            event,
3507            &input_tokens,
3508            &output_tokens,
3509            &cache_read_tokens,
3510            &accumulated_tool_calls,
3511            &finish_reason,
3512            "o3".to_string(),
3513            None,
3514        );
3515
3516        match result {
3517            LlmStreamEvent::ThinkingDelta(text) => {
3518                assert_eq!(text, "Let me reason about this...");
3519            }
3520            _ => panic!("Expected ThinkingDelta, got {:?}", result),
3521        }
3522    }
3523
3524    #[test]
3525    fn test_handle_streaming_event_reasoning_summary_delta() {
3526        use std::sync::Mutex;
3527
3528        let input_tokens = Mutex::new(0u32);
3529        let output_tokens = Mutex::new(0u32);
3530        let cache_read_tokens = Mutex::new(None);
3531        let accumulated_tool_calls = Mutex::new(Vec::new());
3532        let finish_reason = Mutex::new(None);
3533
3534        // ReasoningSummaryDelta (readable summary from GPT-5.x) maps to ThinkingDelta
3535        let event = StreamingEvent::ReasoningSummaryDelta {
3536            sequence_number: 4,
3537            item_id: "rs_002".to_string(),
3538            output_index: 0,
3539            summary_index: 0,
3540            delta: "Breaking down the problem...".to_string(),
3541            obfuscation: None,
3542        };
3543
3544        let result = handle_streaming_event(
3545            event,
3546            &input_tokens,
3547            &output_tokens,
3548            &cache_read_tokens,
3549            &accumulated_tool_calls,
3550            &finish_reason,
3551            "gpt-5.2".to_string(),
3552            None,
3553        );
3554
3555        match result {
3556            LlmStreamEvent::ThinkingDelta(text) => {
3557                assert_eq!(text, "Breaking down the problem...");
3558            }
3559            _ => panic!("Expected ThinkingDelta, got {:?}", result),
3560        }
3561    }
3562
3563    #[test]
3564    fn test_request_reasoning_none_is_omitted() {
3565        // When reasoning effort is "none", the reasoning field should be omitted
3566        // to avoid API errors on models that don't support reasoning params
3567        let config = LlmCallConfig {
3568            model: "gpt-5.2".to_string(),
3569            temperature: None,
3570            max_tokens: None,
3571            tools: vec![],
3572            reasoning_effort: Some("none".to_string()),
3573            metadata: std::collections::HashMap::new(),
3574            previous_response_id: None,
3575            tool_search: None,
3576            prompt_cache: None,
3577        };
3578
3579        // Simulate the driver's filter logic
3580        let reasoning = config
3581            .reasoning_effort
3582            .as_ref()
3583            .filter(|e| !e.eq_ignore_ascii_case("none"))
3584            .map(|effort| ResponsesReasoning {
3585                effort: effort.clone(),
3586                summary: "detailed".to_string(),
3587            });
3588
3589        assert!(
3590            reasoning.is_none(),
3591            "reasoning should be None for effort=none"
3592        );
3593    }
3594
3595    #[test]
3596    fn test_request_reasoning_high_is_included() {
3597        // When reasoning effort is "high", the reasoning field should be present
3598        let config = LlmCallConfig {
3599            model: "gpt-5.2".to_string(),
3600            temperature: None,
3601            max_tokens: None,
3602            tools: vec![],
3603            reasoning_effort: Some("high".to_string()),
3604            metadata: std::collections::HashMap::new(),
3605            previous_response_id: None,
3606            tool_search: None,
3607            prompt_cache: None,
3608        };
3609
3610        let reasoning = config
3611            .reasoning_effort
3612            .as_ref()
3613            .filter(|e| !e.eq_ignore_ascii_case("none"))
3614            .map(|effort| ResponsesReasoning {
3615                effort: effort.clone(),
3616                summary: "detailed".to_string(),
3617            });
3618
3619        assert!(
3620            reasoning.is_some(),
3621            "reasoning should be present for effort=high"
3622        );
3623        let r = reasoning.unwrap();
3624        assert_eq!(r.effort, "high");
3625        assert_eq!(r.summary, "detailed");
3626    }
3627
3628    #[test]
3629    fn test_request_reasoning_none_case_insensitive() {
3630        // "None", "NONE", "none" should all be filtered out
3631        for effort in &["none", "None", "NONE"] {
3632            let reasoning = Some(effort.to_string())
3633                .as_ref()
3634                .filter(|e| !e.eq_ignore_ascii_case("none"))
3635                .cloned();
3636
3637            assert!(
3638                reasoning.is_none(),
3639                "effort={effort:?} should be filtered out"
3640            );
3641        }
3642    }
3643
3644    #[test]
3645    fn test_build_input_assistant_without_thinking_or_tools() {
3646        // Plain assistant message (no thinking, no tool calls) should just be a message
3647        let messages = vec![
3648            LlmMessage::text(LlmMessageRole::User, "Hello"),
3649            LlmMessage {
3650                role: LlmMessageRole::Assistant,
3651                content: LlmMessageContent::Text("Hi there!".to_string()),
3652                tool_calls: None,
3653                tool_call_id: None,
3654                phase: None,
3655                thinking: None,
3656                thinking_signature: None,
3657            },
3658        ];
3659
3660        let (_, input) = OpenResponsesProtocolLlmDriver::build_input(&messages, false);
3661
3662        assert_eq!(input.len(), 2);
3663        let json = serde_json::to_value(&input[1]).unwrap();
3664        assert_eq!(json["role"], "assistant");
3665        assert!(json.get("type").is_none() || json["type"] == "message");
3666    }
3667
3668    #[test]
3669    fn test_build_input_multiple_reasoning_items_get_unique_ids() {
3670        // Multiple assistant messages with thinking_signature should get unique reasoning IDs
3671        let messages = vec![
3672            LlmMessage::text(LlmMessageRole::User, "First question"),
3673            LlmMessage {
3674                role: LlmMessageRole::Assistant,
3675                content: LlmMessageContent::Text("First answer.".to_string()),
3676                tool_calls: None,
3677                tool_call_id: None,
3678                phase: None,
3679                thinking: Some("thinking 1".to_string()),
3680                thinking_signature: Some("encrypted_1".to_string()),
3681            },
3682            LlmMessage::text(LlmMessageRole::User, "Second question"),
3683            LlmMessage {
3684                role: LlmMessageRole::Assistant,
3685                content: LlmMessageContent::Text("Second answer.".to_string()),
3686                tool_calls: None,
3687                tool_call_id: None,
3688                phase: None,
3689                thinking: Some("thinking 2".to_string()),
3690                thinking_signature: Some("encrypted_2".to_string()),
3691            },
3692        ];
3693
3694        let (_, input) = OpenResponsesProtocolLlmDriver::build_input(&messages, false);
3695
3696        // Should have: user, reasoning_1, assistant, user, reasoning_2, assistant
3697        assert_eq!(input.len(), 6);
3698
3699        let r1 = serde_json::to_value(&input[1]).unwrap();
3700        let r2 = serde_json::to_value(&input[4]).unwrap();
3701
3702        assert_eq!(r1["type"], "reasoning");
3703        assert_eq!(r2["type"], "reasoning");
3704        assert_ne!(r1["id"], r2["id"], "Reasoning items should have unique IDs");
3705        assert_eq!(r1["encrypted_content"], "encrypted_1");
3706        assert_eq!(r2["encrypted_content"], "encrypted_2");
3707    }
3708
3709    #[test]
3710    fn test_build_input_with_phases_enabled() {
3711        use crate::message::ExecutionPhase;
3712
3713        let messages = vec![
3714            LlmMessage::text(LlmMessageRole::System, "You are helpful"),
3715            LlmMessage::text(LlmMessageRole::User, "Hello"),
3716            LlmMessage {
3717                role: LlmMessageRole::Assistant,
3718                content: LlmMessageContent::Text("Working on it...".to_string()),
3719                tool_calls: Some(vec![crate::tool_types::ToolCall {
3720                    id: "call_1".to_string(),
3721                    name: "search".to_string(),
3722                    arguments: json!({}),
3723                }]),
3724                tool_call_id: None,
3725                phase: Some(ExecutionPhase::Commentary),
3726                thinking: None,
3727                thinking_signature: None,
3728            },
3729            LlmMessage {
3730                role: LlmMessageRole::Tool,
3731                content: LlmMessageContent::Text("result".to_string()),
3732                tool_calls: None,
3733                tool_call_id: Some("call_1".to_string()),
3734                phase: None,
3735                thinking: None,
3736                thinking_signature: None,
3737            },
3738        ];
3739
3740        // With supports_phases=true, assistant message should include phase
3741        let (_, input) = OpenResponsesProtocolLlmDriver::build_input(&messages, true);
3742        let assistant_json = serde_json::to_value(&input[1]).unwrap();
3743        assert_eq!(assistant_json["phase"], "commentary");
3744
3745        // With supports_phases=false, phase should be absent
3746        let (_, input_no_phases) = OpenResponsesProtocolLlmDriver::build_input(&messages, false);
3747        let assistant_json_no = serde_json::to_value(&input_no_phases[1]).unwrap();
3748        assert!(assistant_json_no.get("phase").is_none() || assistant_json_no["phase"].is_null());
3749    }
3750
3751    // ========================================================================
3752    // tool_search / convert_tools_with_search tests
3753    // ========================================================================
3754
3755    /// Helper: create a ToolDefinition with optional category and deferrable policy
3756    fn make_tool(
3757        name: &str,
3758        category: Option<&str>,
3759        deferrable: crate::tool_types::DeferrablePolicy,
3760    ) -> ToolDefinition {
3761        ToolDefinition::Builtin(crate::tool_types::BuiltinTool {
3762            name: name.to_string(),
3763            display_name: None,
3764            description: format!("{} description", name),
3765            parameters: json!({"type": "object", "properties": {}}),
3766            policy: crate::tool_types::ToolPolicy::Auto,
3767            category: category.map(|s| s.to_string()),
3768            deferrable,
3769            hints: crate::tool_types::ToolHints::default(),
3770            full_parameters: None,
3771        })
3772    }
3773
3774    #[test]
3775    fn test_convert_tools_with_search_below_threshold_falls_back() {
3776        use crate::tool_types::DeferrablePolicy;
3777
3778        let tools: Vec<ToolDefinition> = (0..5)
3779            .map(|i| {
3780                make_tool(
3781                    &format!("tool_{i}"),
3782                    Some("cat"),
3783                    DeferrablePolicy::Automatic,
3784                )
3785            })
3786            .collect();
3787
3788        // threshold=15, only 5 tools → should fall back to standard convert_tools
3789        let result = OpenResponsesProtocolLlmDriver::convert_tools_with_search(&tools, 15);
3790        assert_eq!(result.len(), 5);
3791        // No ToolSearch entry, no namespaces
3792        let json = serde_json::to_value(&result).unwrap();
3793        for item in json.as_array().unwrap() {
3794            assert_eq!(item["type"], "function");
3795            assert!(item.get("defer_loading").is_none() || item["defer_loading"].is_null());
3796        }
3797    }
3798
3799    #[test]
3800    fn test_convert_tools_with_search_groups_by_category() {
3801        use crate::tool_types::DeferrablePolicy;
3802
3803        let mut tools = vec![];
3804        // 10 "FileSystem" tools + 6 "Weather" tools = 16, threshold=15
3805        for i in 0..10 {
3806            tools.push(make_tool(
3807                &format!("fs_tool_{i}"),
3808                Some("FileSystem"),
3809                DeferrablePolicy::Automatic,
3810            ));
3811        }
3812        for i in 0..6 {
3813            tools.push(make_tool(
3814                &format!("weather_tool_{i}"),
3815                Some("Weather"),
3816                DeferrablePolicy::Automatic,
3817            ));
3818        }
3819
3820        let result = OpenResponsesProtocolLlmDriver::convert_tools_with_search(&tools, 15);
3821        let json = serde_json::to_value(&result).unwrap();
3822        let arr = json.as_array().unwrap();
3823
3824        // Should have: 2 namespace entries + 1 tool_search entry = 3
3825        assert_eq!(arr.len(), 3);
3826
3827        // Last entry should be tool_search
3828        assert_eq!(arr.last().unwrap()["type"], "tool_search");
3829
3830        // The two namespace entries
3831        let ns: Vec<&Value> = arr.iter().filter(|v| v["type"] == "namespace").collect();
3832        assert_eq!(ns.len(), 2);
3833
3834        let ns_names: Vec<&str> = ns.iter().map(|v| v["name"].as_str().unwrap()).collect();
3835        assert!(ns_names.contains(&"FileSystem"));
3836        assert!(ns_names.contains(&"Weather"));
3837
3838        // Check tool counts inside namespaces
3839        for n in &ns {
3840            let inner_tools = n["tools"].as_array().unwrap();
3841            match n["name"].as_str().unwrap() {
3842                "FileSystem" => assert_eq!(inner_tools.len(), 10),
3843                "Weather" => assert_eq!(inner_tools.len(), 6),
3844                other => panic!("Unexpected namespace: {other}"),
3845            }
3846            // All inner tools should have defer_loading: true
3847            for t in inner_tools {
3848                assert_eq!(t["defer_loading"], true);
3849            }
3850        }
3851    }
3852
3853    #[test]
3854    fn test_convert_tools_with_search_never_defer_stays_top_level() {
3855        use crate::tool_types::DeferrablePolicy;
3856
3857        let mut tools = vec![];
3858        // 2 Never-defer tools
3859        tools.push(make_tool(
3860            "write_todos",
3861            Some("Productivity"),
3862            DeferrablePolicy::Never,
3863        ));
3864        tools.push(make_tool(
3865            "get_session_info",
3866            Some("Session"),
3867            DeferrablePolicy::Never,
3868        ));
3869        // 14 Automatic tools in "FileSystem" category
3870        for i in 0..14 {
3871            tools.push(make_tool(
3872                &format!("fs_tool_{i}"),
3873                Some("FileSystem"),
3874                DeferrablePolicy::Automatic,
3875            ));
3876        }
3877
3878        let result = OpenResponsesProtocolLlmDriver::convert_tools_with_search(&tools, 15);
3879        let json = serde_json::to_value(&result).unwrap();
3880        let arr = json.as_array().unwrap();
3881
3882        // 2 never-defer functions + 1 FileSystem namespace + 1 tool_search = 4
3883        assert_eq!(arr.len(), 4);
3884
3885        // First two should be non-deferred functions
3886        let funcs: Vec<&Value> = arr.iter().filter(|v| v["type"] == "function").collect();
3887        assert_eq!(funcs.len(), 2);
3888        for f in &funcs {
3889            // No defer_loading on never-defer tools
3890            assert!(f.get("defer_loading").is_none() || f["defer_loading"].is_null());
3891        }
3892
3893        // Namespace
3894        let ns: Vec<&Value> = arr.iter().filter(|v| v["type"] == "namespace").collect();
3895        assert_eq!(ns.len(), 1);
3896        assert_eq!(ns[0]["name"], "FileSystem");
3897        assert_eq!(ns[0]["tools"].as_array().unwrap().len(), 14);
3898    }
3899
3900    #[test]
3901    fn test_convert_tools_with_search_ungrouped_tools() {
3902        use crate::tool_types::DeferrablePolicy;
3903
3904        let mut tools = vec![];
3905        // 10 categorized tools
3906        for i in 0..10 {
3907            tools.push(make_tool(
3908                &format!("cat_tool_{i}"),
3909                Some("Cat"),
3910                DeferrablePolicy::Automatic,
3911            ));
3912        }
3913        // 6 uncategorized tools (no category → ungrouped)
3914        for i in 0..6 {
3915            tools.push(make_tool(
3916                &format!("misc_tool_{i}"),
3917                None,
3918                DeferrablePolicy::Automatic,
3919            ));
3920        }
3921
3922        let result = OpenResponsesProtocolLlmDriver::convert_tools_with_search(&tools, 15);
3923        let json = serde_json::to_value(&result).unwrap();
3924        let arr = json.as_array().unwrap();
3925
3926        // 1 namespace + 6 ungrouped functions + 1 tool_search = 8
3927        assert_eq!(arr.len(), 8);
3928
3929        let ns: Vec<&Value> = arr.iter().filter(|v| v["type"] == "namespace").collect();
3930        assert_eq!(ns.len(), 1);
3931        assert_eq!(ns[0]["tools"].as_array().unwrap().len(), 10);
3932
3933        let funcs: Vec<&Value> = arr.iter().filter(|v| v["type"] == "function").collect();
3934        assert_eq!(funcs.len(), 6);
3935        // These ungrouped tools should still have defer_loading: true
3936        for f in &funcs {
3937            assert_eq!(f["defer_loading"], true);
3938        }
3939
3940        assert_eq!(arr.last().unwrap()["type"], "tool_search");
3941    }
3942
3943    #[test]
3944    fn test_convert_tools_with_search_always_policy() {
3945        use crate::tool_types::DeferrablePolicy;
3946
3947        let mut tools = vec![];
3948        // 14 Automatic tools
3949        for i in 0..14 {
3950            tools.push(make_tool(
3951                &format!("tool_{i}"),
3952                Some("General"),
3953                DeferrablePolicy::Automatic,
3954            ));
3955        }
3956        // 1 Always tool (should be deferred even if only at threshold)
3957        tools.push(make_tool(
3958            "always_tool",
3959            Some("General"),
3960            DeferrablePolicy::Always,
3961        ));
3962
3963        // Exactly at threshold (15 tools, threshold=15)
3964        let result = OpenResponsesProtocolLlmDriver::convert_tools_with_search(&tools, 15);
3965        let json = serde_json::to_value(&result).unwrap();
3966        let arr = json.as_array().unwrap();
3967
3968        // 1 namespace (General) + 1 tool_search = 2
3969        assert_eq!(arr.len(), 2);
3970
3971        let ns = &arr[0];
3972        assert_eq!(ns["type"], "namespace");
3973        let inner = ns["tools"].as_array().unwrap();
3974        assert_eq!(inner.len(), 15);
3975        // All should have defer_loading: true
3976        for t in inner {
3977            assert_eq!(t["defer_loading"], true);
3978        }
3979    }
3980
3981    #[test]
3982    fn test_tool_search_serialization_format() {
3983        // Verify the ToolSearch entry serializes correctly
3984        let ts = ResponsesTool::ToolSearch {
3985            r#type: "tool_search".to_string(),
3986        };
3987        let json = serde_json::to_value(&ts).unwrap();
3988        assert_eq!(json, json!({"type": "tool_search"}));
3989    }
3990
3991    #[test]
3992    fn test_namespace_serialization_format() {
3993        let ns = ResponsesTool::Namespace {
3994            r#type: "namespace".to_string(),
3995            name: "FileSystem".to_string(),
3996            description: "Tools for FileSystem".to_string(),
3997            tools: vec![ResponsesTool::Function {
3998                r#type: "function".to_string(),
3999                name: "read_file".to_string(),
4000                description: "Read a file".to_string(),
4001                parameters: json!({}),
4002                defer_loading: Some(true),
4003            }],
4004        };
4005        let json = serde_json::to_value(&ns).unwrap();
4006        assert_eq!(json["type"], "namespace");
4007        assert_eq!(json["name"], "FileSystem");
4008        assert_eq!(json["tools"][0]["name"], "read_file");
4009        assert_eq!(json["tools"][0]["defer_loading"], true);
4010    }
4011
4012    #[test]
4013    fn test_hosted_tool_search_completed_event_preserves_response_id() {
4014        let event_json = r#"{
4015            "type": "response.completed",
4016            "sequence_number": 8,
4017            "response": {
4018                "id": "resp_tool_search",
4019                "object": "response",
4020                "created_at": 1780000000,
4021                "status": "completed",
4022                "model": "gpt-5.5",
4023                "output": [
4024                    {
4025                        "type": "tool_search_call",
4026                        "execution": "server",
4027                        "call_id": null,
4028                        "status": "completed",
4029                        "arguments": { "paths": ["Math"] }
4030                    },
4031                    {
4032                        "type": "tool_search_output",
4033                        "execution": "server",
4034                        "call_id": null,
4035                        "status": "completed",
4036                        "tools": [
4037                            {
4038                                "type": "namespace",
4039                                "name": "Math",
4040                                "description": "Tools for Math",
4041                                "tools": [
4042                                    {
4043                                        "type": "function",
4044                                        "name": "add",
4045                                        "description": "Add numbers.",
4046                                        "defer_loading": true,
4047                                        "parameters": {
4048                                            "type": "object",
4049                                            "properties": {
4050                                                "a": { "type": "number" },
4051                                                "b": { "type": "number" }
4052                                            },
4053                                            "required": ["a", "b"],
4054                                            "additionalProperties": false
4055                                        }
4056                                    }
4057                                ]
4058                            }
4059                        ]
4060                    },
4061                    {
4062                        "type": "function_call",
4063                        "id": "fc_123",
4064                        "call_id": "call_123",
4065                        "name": "add",
4066                        "namespace": "Math",
4067                        "arguments": "{\"a\":7,\"b\":3}",
4068                        "status": "completed"
4069                    }
4070                ],
4071                "usage": {
4072                    "input_tokens": 10,
4073                    "output_tokens": 5,
4074                    "total_tokens": 15
4075                }
4076            }
4077        }"#;
4078
4079        let event: StreamingEvent = serde_json::from_str(event_json).unwrap();
4080        let stream_event = handle_streaming_event(
4081            event,
4082            &Mutex::new(0),
4083            &Mutex::new(0),
4084            &Mutex::new(None),
4085            &Mutex::new(Vec::new()),
4086            &Mutex::new(Some("tool_calls".to_string())),
4087            "gpt-5.5".to_string(),
4088            None,
4089        );
4090
4091        match stream_event {
4092            LlmStreamEvent::Done(metadata) => {
4093                assert_eq!(metadata.response_id.as_deref(), Some("resp_tool_search"));
4094                assert_eq!(metadata.finish_reason.as_deref(), Some("tool_calls"));
4095            }
4096            other => panic!("expected Done event, got {other:?}"),
4097        }
4098    }
4099
4100    #[test]
4101    fn test_sanitize_parameters_adds_missing_properties() {
4102        let params = json!({"type": "object", "additionalProperties": false});
4103        let sanitized = OpenResponsesProtocolLlmDriver::sanitize_parameters(&params);
4104        assert_eq!(
4105            sanitized,
4106            json!({"type": "object", "properties": {}, "additionalProperties": false})
4107        );
4108    }
4109
4110    #[test]
4111    fn test_sanitize_parameters_preserves_existing_properties() {
4112        let params = json!({"type": "object", "properties": {"x": {"type": "string"}}, "additionalProperties": false});
4113        let sanitized = OpenResponsesProtocolLlmDriver::sanitize_parameters(&params);
4114        assert_eq!(sanitized, params);
4115    }
4116
4117    #[test]
4118    fn test_sanitize_parameters_ignores_non_object_types() {
4119        let params = json!({"type": "string"});
4120        let sanitized = OpenResponsesProtocolLlmDriver::sanitize_parameters(&params);
4121        assert_eq!(sanitized, params);
4122    }
4123}