Skip to main content

heartbit_core/llm/
openrouter.rs

1//! OpenRouter LLM provider — access 200+ models via a single OpenAI-compatible API.
2
3use bytes::Bytes;
4use futures::StreamExt;
5use reqwest::Client;
6use reqwest::redirect::Policy;
7use serde::Deserialize;
8use std::time::Duration;
9use tracing::warn;
10
11use crate::error::Error;
12use crate::llm::LlmProvider;
13use crate::llm::anthropic::SseParser;
14use crate::llm::types::{
15    CompletionRequest, CompletionResponse, ContentBlock, ReasoningEffort, Role, StopReason,
16    TokenUsage, ToolChoice, ToolDefinition,
17};
18
19const API_URL: &str = "https://openrouter.ai/api/v1/chat/completions";
20
21/// Build a hardened reqwest Client for the OpenRouter API.
22///
23/// SECURITY (F-LLM-1, F-LLM-2): although `Authorization: Bearer ...` *is*
24/// stripped by reqwest on cross-host redirects, disable redirects entirely
25/// for consistency with the other providers and to neutralise any future
26/// addition of custom auth headers (e.g. `HTTP-Referer`).
27fn build_secure_client() -> Result<Client, Error> {
28    Client::builder()
29        .redirect(Policy::none())
30        .https_only(true)
31        .connect_timeout(Duration::from_secs(10))
32        .timeout(Duration::from_secs(120))
33        .build()
34        .map_err(Error::from)
35}
36
37/// OpenRouter LLM provider (OpenAI-compatible API).
38///
39/// Supports both non-streaming (`complete`) and streaming (`stream_complete`)
40/// modes. Streaming uses OpenAI's SSE format with `choices[].delta` chunks.
41pub struct OpenRouterProvider {
42    client: Client,
43    api_key: String,
44    model: String,
45}
46
47impl OpenRouterProvider {
48    /// Create a new OpenRouter provider with the given API key and model identifier.
49    pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
50        Self {
51            client: build_secure_client()
52                .expect("failed to build hardened HTTPS client for OpenRouterProvider"),
53            api_key: api_key.into(),
54            model: model.into(),
55        }
56    }
57}
58
59impl LlmProvider for OpenRouterProvider {
60    fn model_name(&self) -> Option<&str> {
61        Some(&self.model)
62    }
63
64    async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse, Error> {
65        let body = build_openai_request(&self.model, &request)?;
66
67        let response = self
68            .client
69            .post(API_URL)
70            .header("Authorization", format!("Bearer {}", self.api_key))
71            .header("Content-Type", "application/json")
72            .json(&body)
73            .send()
74            .await?;
75
76        if !response.status().is_success() {
77            return Err(super::api_error_from_response(response).await);
78        }
79
80        let api_response: OpenAiResponse = response.json().await?;
81        into_completion_response(api_response)
82    }
83
84    async fn stream_complete(
85        &self,
86        request: CompletionRequest,
87        on_text: &crate::llm::OnText,
88    ) -> Result<CompletionResponse, Error> {
89        let mut body = build_openai_request(&self.model, &request)?;
90        body["stream"] = serde_json::json!(true);
91        body["stream_options"] = serde_json::json!({"include_usage": true});
92
93        let response = self
94            .client
95            .post(API_URL)
96            .header("Authorization", format!("Bearer {}", self.api_key))
97            .header("Content-Type", "application/json")
98            .json(&body)
99            .send()
100            .await?;
101
102        if !response.status().is_success() {
103            return Err(super::api_error_from_response(response).await);
104        }
105
106        parse_openai_stream(response.bytes_stream(), on_text).await
107    }
108}
109
110// --- Request building: our types → OpenAI format ---
111
112pub(crate) fn build_openai_request(
113    model: &str,
114    request: &CompletionRequest,
115) -> Result<serde_json::Value, Error> {
116    let mut messages = Vec::new();
117
118    // System message
119    if !request.system.is_empty() {
120        messages.push(serde_json::json!({
121            "role": "system",
122            "content": request.system,
123        }));
124    }
125
126    // Convert our messages to OpenAI format
127    for msg in &request.messages {
128        match msg.role {
129            Role::User => {
130                let has_media = msg
131                    .content
132                    .iter()
133                    .any(|b| matches!(b, ContentBlock::Image { .. } | ContentBlock::Audio { .. }));
134
135                let mut text_parts = Vec::new();
136                let mut media_parts = Vec::new();
137                for block in &msg.content {
138                    match block {
139                        ContentBlock::Text { text } => {
140                            text_parts.push(text.as_str());
141                        }
142                        ContentBlock::Image { media_type, data } => {
143                            media_parts.push(serde_json::json!({
144                                "type": "image_url",
145                                "image_url": {
146                                    "url": format!("data:{media_type};base64,{data}")
147                                }
148                            }));
149                        }
150                        ContentBlock::Audio { format, data } => {
151                            media_parts.push(serde_json::json!({
152                                "type": "input_audio",
153                                "input_audio": {
154                                    "data": data,
155                                    "format": format,
156                                }
157                            }));
158                        }
159                        ContentBlock::ToolResult {
160                            tool_use_id,
161                            content,
162                            is_error,
163                        } => {
164                            // OpenAI format has no is_error field; prefix content
165                            // so the LLM sees the error context.
166                            let content = if *is_error {
167                                format!("[ERROR] {content}")
168                            } else {
169                                content.clone()
170                            };
171                            messages.push(serde_json::json!({
172                                "role": "tool",
173                                "tool_call_id": tool_use_id,
174                                "content": content,
175                            }));
176                        }
177                        _ => {}
178                    }
179                }
180
181                if has_media {
182                    // Use array content format when images/audio are present
183                    let mut content_parts: Vec<serde_json::Value> = Vec::new();
184                    if !text_parts.is_empty() {
185                        content_parts.push(serde_json::json!({
186                            "type": "text",
187                            "text": text_parts.join("\n\n"),
188                        }));
189                    }
190                    content_parts.extend(media_parts);
191                    if !content_parts.is_empty() {
192                        messages.push(serde_json::json!({
193                            "role": "user",
194                            "content": content_parts,
195                        }));
196                    }
197                } else if !text_parts.is_empty() {
198                    messages.push(serde_json::json!({
199                        "role": "user",
200                        "content": text_parts.join("\n\n"),
201                    }));
202                } else if !msg
203                    .content
204                    .iter()
205                    .any(|b| matches!(b, ContentBlock::ToolResult { .. }))
206                {
207                    // No text blocks and no tool results — add empty user message
208                    messages.push(serde_json::json!({
209                        "role": "user",
210                        "content": "",
211                    }));
212                }
213            }
214            Role::Assistant => {
215                let text: String = msg
216                    .content
217                    .iter()
218                    .filter_map(|b| match b {
219                        ContentBlock::Text { text } => Some(text.as_str()),
220                        _ => None,
221                    })
222                    .collect::<Vec<_>>()
223                    .join("");
224
225                let tool_calls: Vec<serde_json::Value> = msg
226                    .content
227                    .iter()
228                    .filter_map(|b| match b {
229                        ContentBlock::ToolUse { id, name, input } => Some(serde_json::json!({
230                            "id": id,
231                            "type": "function",
232                            "function": {
233                                "name": name,
234                                "arguments": serde_json::to_string(input)
235                                    .expect("serde_json::Value serialization is infallible"),
236                            }
237                        })),
238                        _ => None,
239                    })
240                    .collect();
241
242                let mut msg_json = serde_json::json!({
243                    "role": "assistant",
244                });
245
246                if !text.is_empty() {
247                    msg_json["content"] = serde_json::Value::String(text);
248                } else {
249                    msg_json["content"] = serde_json::Value::Null;
250                }
251
252                if !tool_calls.is_empty() {
253                    msg_json["tool_calls"] = serde_json::Value::Array(tool_calls);
254                }
255
256                messages.push(msg_json);
257            }
258        }
259    }
260
261    let mut body = serde_json::json!({
262        "model": model,
263        "messages": messages,
264        "max_tokens": request.max_tokens,
265    });
266
267    // Convert tools to OpenAI function format
268    if !request.tools.is_empty() {
269        let tools: Vec<serde_json::Value> = request.tools.iter().map(tool_to_openai).collect();
270        body["tools"] = serde_json::Value::Array(tools);
271    }
272
273    // Convert tool_choice to OpenAI format
274    if let Some(ref tc) = request.tool_choice {
275        body["tool_choice"] = tool_choice_to_openai(tc);
276    }
277
278    // Add reasoning/thinking parameter for models that support it (e.g., Qwen3)
279    if let Some(effort) = request.reasoning_effort {
280        let effort_str = match effort {
281            ReasoningEffort::High => "high",
282            ReasoningEffort::Medium => "medium",
283            ReasoningEffort::Low => "low",
284            ReasoningEffort::None => "none",
285        };
286        body["reasoning"] = serde_json::json!({"effort": effort_str});
287    }
288
289    Ok(body)
290}
291
292/// Convert our `ToolChoice` to OpenAI's format.
293///
294/// - `Auto` → `"auto"` (string)
295/// - `Any` → `"required"` (OpenAI's equivalent of "must call a tool")
296/// - `Tool { name }` → `{"type": "function", "function": {"name": "..."}}`
297fn tool_choice_to_openai(tc: &ToolChoice) -> serde_json::Value {
298    match tc {
299        ToolChoice::Auto => serde_json::json!("auto"),
300        ToolChoice::Any => serde_json::json!("required"),
301        ToolChoice::Tool { name } => serde_json::json!({
302            "type": "function",
303            "function": {"name": name}
304        }),
305    }
306}
307
308fn tool_to_openai(tool: &ToolDefinition) -> serde_json::Value {
309    serde_json::json!({
310        "type": "function",
311        "function": {
312            "name": tool.name,
313            "description": tool.description,
314            "parameters": tool.input_schema,
315        }
316    })
317}
318
319// --- Response parsing: OpenAI format → our types ---
320
321#[derive(Deserialize)]
322pub(crate) struct OpenAiResponse {
323    choices: Vec<OpenAiChoice>,
324    #[serde(default)]
325    usage: Option<OpenAiUsage>,
326}
327
328#[derive(Deserialize)]
329struct OpenAiChoice {
330    message: OpenAiMessage,
331    finish_reason: Option<String>,
332}
333
334#[derive(Deserialize)]
335struct OpenAiMessage {
336    #[serde(default)]
337    content: Option<String>,
338    #[serde(default)]
339    tool_calls: Option<Vec<OpenAiToolCall>>,
340}
341
342#[derive(Deserialize)]
343struct OpenAiToolCall {
344    id: String,
345    function: OpenAiFunction,
346}
347
348#[derive(Deserialize)]
349struct OpenAiFunction {
350    name: String,
351    arguments: String,
352}
353
354#[derive(Deserialize, Default)]
355struct OpenAiUsage {
356    prompt_tokens: u32,
357    completion_tokens: u32,
358    #[serde(default)]
359    cache_creation_input_tokens: u32,
360    #[serde(default)]
361    cache_read_input_tokens: u32,
362    #[serde(default)]
363    reasoning_tokens: u32,
364}
365
366pub(crate) fn into_completion_response(api: OpenAiResponse) -> Result<CompletionResponse, Error> {
367    let choice = api.choices.into_iter().next().ok_or_else(|| Error::Api {
368        status: 502,
369        message: "empty choices array in response".into(),
370    })?;
371
372    let mut content = Vec::new();
373
374    // Text content
375    if let Some(text) = choice.message.content
376        && !text.is_empty()
377    {
378        content.push(ContentBlock::Text { text });
379    }
380
381    // Tool calls
382    if let Some(tool_calls) = choice.message.tool_calls {
383        for tc in tool_calls {
384            let input: serde_json::Value = if tc.function.arguments.is_empty() {
385                serde_json::json!({})
386            } else {
387                serde_json::from_str(&tc.function.arguments).unwrap_or_else(|e| {
388                    tracing::warn!(
389                        tool = %tc.function.name,
390                        error = %e,
391                        "malformed tool arguments JSON, defaulting to empty object"
392                    );
393                    serde_json::json!({})
394                })
395            };
396            content.push(ContentBlock::ToolUse {
397                id: tc.id,
398                name: tc.function.name,
399                input,
400            });
401        }
402    }
403
404    let has_tool_calls = content
405        .iter()
406        .any(|c| matches!(c, ContentBlock::ToolUse { .. }));
407
408    // Normalize: some providers send "stop" even when tool calls are present.
409    let stop_reason = match choice.finish_reason.as_deref() {
410        Some("tool_calls") => StopReason::ToolUse,
411        Some("stop") if has_tool_calls => StopReason::ToolUse,
412        Some("stop") => StopReason::EndTurn,
413        Some("length") => StopReason::MaxTokens,
414        Some(other) => {
415            warn!(
416                finish_reason = other,
417                "unknown finish_reason, treating as EndTurn"
418            );
419            StopReason::EndTurn
420        }
421        None => StopReason::EndTurn,
422    };
423
424    let usage = api.usage.map_or(TokenUsage::default(), |u| TokenUsage {
425        input_tokens: u.prompt_tokens,
426        output_tokens: u.completion_tokens,
427        cache_creation_input_tokens: u.cache_creation_input_tokens,
428        cache_read_input_tokens: u.cache_read_input_tokens,
429        reasoning_tokens: u.reasoning_tokens,
430    });
431
432    Ok(CompletionResponse {
433        content,
434        stop_reason,
435        usage,
436        model: None,
437    })
438}
439
440// --- Streaming: OpenAI SSE delta format ---
441
442#[derive(Deserialize, Default)]
443struct StreamingChunk {
444    #[serde(default)]
445    choices: Vec<StreamingChoice>,
446    #[serde(default)]
447    usage: Option<OpenAiUsage>,
448}
449
450#[derive(Deserialize)]
451struct StreamingChoice {
452    #[serde(default)]
453    delta: StreamingDelta,
454    #[serde(default)]
455    finish_reason: Option<String>,
456}
457
458#[derive(Deserialize, Default)]
459struct StreamingDelta {
460    #[serde(default)]
461    content: Option<String>,
462    #[serde(default)]
463    tool_calls: Option<Vec<StreamingToolCallDelta>>,
464}
465
466#[derive(Deserialize)]
467struct StreamingToolCallDelta {
468    #[serde(default)]
469    index: usize,
470    #[serde(default)]
471    id: Option<String>,
472    #[serde(default)]
473    function: Option<StreamingFunctionDelta>,
474}
475
476#[derive(Deserialize)]
477struct StreamingFunctionDelta {
478    #[serde(default)]
479    name: Option<String>,
480    #[serde(default)]
481    arguments: Option<String>,
482}
483
484#[derive(Default)]
485struct AccumulatedToolCall {
486    id: String,
487    name: String,
488    arguments: String,
489}
490
491/// Parse an OpenAI-format SSE stream, emitting text deltas via `on_text`.
492///
493/// Reuses the `SseParser` from the Anthropic module for SSE framing.
494/// The JSON payload format differs: OpenAI uses `choices[].delta` with
495/// incremental content and tool call fragments.
496pub(crate) async fn parse_openai_stream<S>(
497    stream: S,
498    on_text: &super::OnText,
499) -> Result<CompletionResponse, Error>
500where
501    S: futures::Stream<Item = Result<Bytes, reqwest::Error>> + Unpin,
502{
503    let mut parser = SseParser::new();
504    let mut utf8_buf: Vec<u8> = Vec::new();
505    let mut text = String::new();
506    let mut tool_calls: Vec<AccumulatedToolCall> = Vec::new();
507    let mut finish_reason: Option<String> = None;
508    let mut usage = TokenUsage::default();
509
510    tokio::pin!(stream);
511
512    while let Some(chunk) = stream.next().await {
513        let chunk = chunk.map_err(Error::Http)?;
514        utf8_buf.extend_from_slice(&chunk);
515
516        let valid_len = match std::str::from_utf8(&utf8_buf) {
517            Ok(_) => utf8_buf.len(),
518            Err(e) => e.valid_up_to(),
519        };
520
521        if valid_len > 0 {
522            let s = std::str::from_utf8(&utf8_buf[..valid_len])
523                .expect("valid_up_to guarantees valid UTF-8");
524            for event in parser.feed(s) {
525                process_openai_event(
526                    &event.data,
527                    on_text,
528                    &mut text,
529                    &mut tool_calls,
530                    &mut finish_reason,
531                    &mut usage,
532                );
533            }
534        }
535        utf8_buf.drain(..valid_len);
536    }
537
538    // Remaining buffer
539    if !utf8_buf.is_empty()
540        && let Ok(s) = std::str::from_utf8(&utf8_buf)
541    {
542        for event in parser.feed(s) {
543            process_openai_event(
544                &event.data,
545                on_text,
546                &mut text,
547                &mut tool_calls,
548                &mut finish_reason,
549                &mut usage,
550            );
551        }
552    }
553
554    for event in parser.flush() {
555        process_openai_event(
556            &event.data,
557            on_text,
558            &mut text,
559            &mut tool_calls,
560            &mut finish_reason,
561            &mut usage,
562        );
563    }
564
565    // Build content blocks
566    let mut content = Vec::new();
567    if !text.is_empty() {
568        content.push(ContentBlock::Text { text });
569    }
570    for tc in tool_calls {
571        let input = if tc.arguments.is_empty() {
572            serde_json::json!({})
573        } else {
574            serde_json::from_str(&tc.arguments).unwrap_or_else(|e| {
575                warn!(tool = %tc.name, error = %e, "malformed streaming tool arguments");
576                serde_json::json!({})
577            })
578        };
579        content.push(ContentBlock::ToolUse {
580            id: tc.id,
581            name: tc.name,
582            input,
583        });
584    }
585
586    // Guard: if the stream completed without any content and no finish_reason
587    // was ever received, the upstream likely returned empty choices in all chunks.
588    // Return a retryable error (matching the non-streaming path).
589    if content.is_empty() && finish_reason.is_none() {
590        return Err(Error::Api {
591            status: 502,
592            message: "empty choices in all streaming chunks".into(),
593        });
594    }
595
596    let has_tool_calls = content
597        .iter()
598        .any(|c| matches!(c, ContentBlock::ToolUse { .. }));
599
600    // Normalize: some providers send "stop" even when tool calls are present.
601    // The agent loop checks content (not stop_reason) for tool detection, but
602    // we normalize here so the CompletionResponse is semantically correct.
603    let stop_reason = match finish_reason.as_deref() {
604        Some("tool_calls") => StopReason::ToolUse,
605        Some("stop") if has_tool_calls => StopReason::ToolUse,
606        Some("stop") => StopReason::EndTurn,
607        Some("length") => StopReason::MaxTokens,
608        Some(other) => {
609            warn!(
610                finish_reason = other,
611                "unknown finish_reason in stream, treating as EndTurn"
612            );
613            StopReason::EndTurn
614        }
615        None => StopReason::EndTurn,
616    };
617
618    Ok(CompletionResponse {
619        content,
620        stop_reason,
621        usage,
622        model: None,
623    })
624}
625
626fn process_openai_event(
627    data: &str,
628    on_text: &super::OnText,
629    text: &mut String,
630    tool_calls: &mut Vec<AccumulatedToolCall>,
631    finish_reason: &mut Option<String>,
632    usage: &mut TokenUsage,
633) {
634    if data == "[DONE]" {
635        return;
636    }
637
638    let chunk: StreamingChunk = match serde_json::from_str(data) {
639        Ok(c) => c,
640        Err(e) => {
641            warn!(error = %e, "failed to parse streaming chunk, skipping");
642            return;
643        }
644    };
645
646    if let Some(choice) = chunk.choices.first() {
647        if let Some(ref content) = choice.delta.content {
648            // SECURITY (F-LLM-4): cap text accumulation per response.
649            if text.len().saturating_add(content.len()) <= super::STREAM_MAX_TEXT_BYTES {
650                text.push_str(content);
651                on_text(content);
652            } else if text.len() < super::STREAM_MAX_TEXT_BYTES {
653                let remaining = super::STREAM_MAX_TEXT_BYTES - text.len();
654                let take = std::cmp::min(remaining, content.len());
655                let boundary = crate::tool::builtins::floor_char_boundary(content, take);
656                let safe = &content[..boundary];
657                text.push_str(safe);
658                on_text(safe);
659                tracing::warn!(
660                    text_len = text.len(),
661                    limit = super::STREAM_MAX_TEXT_BYTES,
662                    "OpenAI-format streaming text exceeded cap; truncated"
663                );
664            }
665        }
666
667        if let Some(ref tcs) = choice.delta.tool_calls {
668            for tc_delta in tcs {
669                // SECURITY (F-LLM-4): refuse outsized tool_call indices.
670                if tc_delta.index >= super::STREAM_MAX_TOOL_CALLS {
671                    tracing::warn!(
672                        index = tc_delta.index,
673                        limit = super::STREAM_MAX_TOOL_CALLS,
674                        "OpenAI-format tool_call index exceeds cap; dropping delta"
675                    );
676                    continue;
677                }
678                while tool_calls.len() <= tc_delta.index {
679                    tool_calls.push(AccumulatedToolCall::default());
680                }
681                let tc = &mut tool_calls[tc_delta.index];
682                if let Some(ref id) = tc_delta.id {
683                    tc.id.clone_from(id);
684                }
685                if let Some(ref func) = tc_delta.function {
686                    if let Some(ref name) = func.name {
687                        tc.name.clone_from(name);
688                    }
689                    if let Some(ref args) = func.arguments
690                        && tc.arguments.len().saturating_add(args.len())
691                            <= super::STREAM_MAX_TOOL_ARGS_BYTES
692                    {
693                        tc.arguments.push_str(args);
694                    }
695                }
696            }
697        }
698
699        if choice.finish_reason.is_some() {
700            *finish_reason = choice.finish_reason.clone();
701        }
702    }
703
704    if let Some(ref u) = chunk.usage {
705        *usage = TokenUsage {
706            input_tokens: u.prompt_tokens,
707            output_tokens: u.completion_tokens,
708            cache_creation_input_tokens: u.cache_creation_input_tokens,
709            cache_read_input_tokens: u.cache_read_input_tokens,
710            reasoning_tokens: u.reasoning_tokens,
711        };
712    }
713}
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718    use crate::llm::types::Message;
719    use serde_json::json;
720
721    // --- Request building tests ---
722
723    #[test]
724    fn build_request_minimal() {
725        let request = CompletionRequest {
726            system: String::new(),
727            messages: vec![Message::user("hello")],
728            tools: vec![],
729            max_tokens: 1024,
730            tool_choice: None,
731            reasoning_effort: None,
732        };
733
734        let body = build_openai_request("anthropic/claude-sonnet-4", &request).unwrap();
735        assert_eq!(body["model"], "anthropic/claude-sonnet-4");
736        assert_eq!(body["max_tokens"], 1024);
737
738        let messages = body["messages"].as_array().unwrap();
739        assert_eq!(messages.len(), 1);
740        assert_eq!(messages[0]["role"], "user");
741        assert_eq!(messages[0]["content"], "hello");
742    }
743
744    #[test]
745    fn build_request_with_system() {
746        let request = CompletionRequest {
747            system: "You are helpful.".into(),
748            messages: vec![Message::user("hi")],
749            tools: vec![],
750            max_tokens: 1024,
751            tool_choice: None,
752            reasoning_effort: None,
753        };
754
755        let body = build_openai_request("model", &request).unwrap();
756        let messages = body["messages"].as_array().unwrap();
757        assert_eq!(messages[0]["role"], "system");
758        assert_eq!(messages[0]["content"], "You are helpful.");
759        assert_eq!(messages[1]["role"], "user");
760    }
761
762    #[test]
763    fn build_request_with_tools() {
764        let request = CompletionRequest {
765            system: String::new(),
766            messages: vec![Message::user("search")],
767            tools: vec![ToolDefinition {
768                name: "search".into(),
769                description: "Search the web".into(),
770                input_schema: json!({"type": "object", "properties": {"q": {"type": "string"}}}),
771            }],
772            max_tokens: 1024,
773            tool_choice: None,
774            reasoning_effort: None,
775        };
776
777        let body = build_openai_request("model", &request).unwrap();
778        let tools = body["tools"].as_array().unwrap();
779        assert_eq!(tools.len(), 1);
780        assert_eq!(tools[0]["type"], "function");
781        assert_eq!(tools[0]["function"]["name"], "search");
782    }
783
784    #[test]
785    fn build_request_assistant_with_tool_calls() {
786        let request = CompletionRequest {
787            system: String::new(),
788            messages: vec![
789                Message::user("search for rust"),
790                Message {
791                    role: Role::Assistant,
792                    content: vec![
793                        ContentBlock::Text {
794                            text: "Let me search.".into(),
795                        },
796                        ContentBlock::ToolUse {
797                            id: "call-1".into(),
798                            name: "search".into(),
799                            input: json!({"q": "rust"}),
800                        },
801                    ],
802                },
803            ],
804            tools: vec![],
805            max_tokens: 1024,
806            tool_choice: None,
807            reasoning_effort: None,
808        };
809
810        let body = build_openai_request("model", &request).unwrap();
811        let messages = body["messages"].as_array().unwrap();
812        let assistant_msg = &messages[1];
813        assert_eq!(assistant_msg["role"], "assistant");
814        assert_eq!(assistant_msg["content"], "Let me search.");
815        assert_eq!(assistant_msg["tool_calls"][0]["id"], "call-1");
816        assert_eq!(assistant_msg["tool_calls"][0]["function"]["name"], "search");
817    }
818
819    #[test]
820    fn build_request_tool_results() {
821        use crate::llm::types::ToolResult;
822
823        let request = CompletionRequest {
824            system: String::new(),
825            messages: vec![Message::tool_results(vec![
826                ToolResult::success("call-1", "found it"),
827                ToolResult::error("call-2", "not found"),
828            ])],
829            tools: vec![],
830            max_tokens: 1024,
831            tool_choice: None,
832            reasoning_effort: None,
833        };
834
835        let body = build_openai_request("model", &request).unwrap();
836        let messages = body["messages"].as_array().unwrap();
837        assert_eq!(messages.len(), 2); // Two separate tool messages
838        assert_eq!(messages[0]["role"], "tool");
839        assert_eq!(messages[0]["tool_call_id"], "call-1");
840        assert_eq!(messages[0]["content"], "found it");
841        assert_eq!(messages[1]["role"], "tool");
842        assert_eq!(messages[1]["tool_call_id"], "call-2");
843        // Error tool results get [ERROR] prefix since OpenAI format has no is_error field
844        assert_eq!(messages[1]["content"], "[ERROR] not found");
845    }
846
847    // --- Response parsing tests ---
848
849    #[test]
850    fn parse_text_response() {
851        let api = OpenAiResponse {
852            choices: vec![OpenAiChoice {
853                message: OpenAiMessage {
854                    content: Some("Hello!".into()),
855                    tool_calls: None,
856                },
857                finish_reason: Some("stop".into()),
858            }],
859            usage: Some(OpenAiUsage {
860                prompt_tokens: 10,
861                completion_tokens: 5,
862                ..Default::default()
863            }),
864        };
865
866        let response = into_completion_response(api).unwrap();
867        assert_eq!(response.text(), "Hello!");
868        assert_eq!(response.stop_reason, StopReason::EndTurn);
869        assert_eq!(response.usage.input_tokens, 10);
870        assert_eq!(response.usage.output_tokens, 5);
871    }
872
873    #[test]
874    fn parse_tool_call_response() {
875        let api = OpenAiResponse {
876            choices: vec![OpenAiChoice {
877                message: OpenAiMessage {
878                    content: Some("Let me search.".into()),
879                    tool_calls: Some(vec![OpenAiToolCall {
880                        id: "call_abc".into(),
881                        function: OpenAiFunction {
882                            name: "search".into(),
883                            arguments: r#"{"q":"rust"}"#.into(),
884                        },
885                    }]),
886                },
887                finish_reason: Some("tool_calls".into()),
888            }],
889            usage: Some(OpenAiUsage {
890                prompt_tokens: 20,
891                completion_tokens: 10,
892                ..Default::default()
893            }),
894        };
895
896        let response = into_completion_response(api).unwrap();
897        assert_eq!(response.stop_reason, StopReason::ToolUse);
898        assert_eq!(response.text(), "Let me search.");
899
900        let calls = response.tool_calls();
901        assert_eq!(calls.len(), 1);
902        assert_eq!(calls[0].id, "call_abc");
903        assert_eq!(calls[0].name, "search");
904        assert_eq!(calls[0].input["q"], "rust");
905    }
906
907    #[test]
908    fn parse_max_tokens_response() {
909        let api = OpenAiResponse {
910            choices: vec![OpenAiChoice {
911                message: OpenAiMessage {
912                    content: Some("truncated...".into()),
913                    tool_calls: None,
914                },
915                finish_reason: Some("length".into()),
916            }],
917            usage: None,
918        };
919
920        let response = into_completion_response(api).unwrap();
921        assert_eq!(response.stop_reason, StopReason::MaxTokens);
922    }
923
924    #[test]
925    fn parse_empty_choices_errors() {
926        let api = OpenAiResponse {
927            choices: vec![],
928            usage: None,
929        };
930
931        let err = into_completion_response(api).unwrap_err();
932        assert!(err.to_string().contains("empty choices"));
933        // Must be retryable (status 502 = bad gateway from upstream model)
934        match &err {
935            Error::Api { status, .. } => assert_eq!(*status, 502),
936            other => panic!("expected Error::Api, got: {other:?}"),
937        }
938    }
939
940    #[test]
941    fn parse_parallel_tool_calls() {
942        let api = OpenAiResponse {
943            choices: vec![OpenAiChoice {
944                message: OpenAiMessage {
945                    content: None,
946                    tool_calls: Some(vec![
947                        OpenAiToolCall {
948                            id: "call_1".into(),
949                            function: OpenAiFunction {
950                                name: "search".into(),
951                                arguments: r#"{"q":"a"}"#.into(),
952                            },
953                        },
954                        OpenAiToolCall {
955                            id: "call_2".into(),
956                            function: OpenAiFunction {
957                                name: "read".into(),
958                                arguments: r#"{"path":"/tmp"}"#.into(),
959                            },
960                        },
961                    ]),
962                },
963                finish_reason: Some("tool_calls".into()),
964            }],
965            usage: None,
966        };
967
968        let response = into_completion_response(api).unwrap();
969        let calls = response.tool_calls();
970        assert_eq!(calls.len(), 2);
971        assert_eq!(calls[0].name, "search");
972        assert_eq!(calls[1].name, "read");
973    }
974
975    #[test]
976    fn parse_stop_with_tool_calls_normalizes_to_tool_use() {
977        // Some providers send finish_reason "stop" even with tool_calls present.
978        // We normalize to ToolUse for semantic correctness.
979        let api = OpenAiResponse {
980            choices: vec![OpenAiChoice {
981                message: OpenAiMessage {
982                    content: None,
983                    tool_calls: Some(vec![OpenAiToolCall {
984                        id: "call_1".into(),
985                        function: OpenAiFunction {
986                            name: "search".into(),
987                            arguments: "{}".into(),
988                        },
989                    }]),
990                },
991                finish_reason: Some("stop".into()), // wrong but real
992            }],
993            usage: None,
994        };
995
996        let response = into_completion_response(api).unwrap();
997        assert_eq!(response.stop_reason, StopReason::ToolUse); // normalized
998        assert_eq!(response.tool_calls().len(), 1);
999    }
1000
1001    #[test]
1002    fn build_request_multi_text_blocks_concatenated() {
1003        // Multiple text blocks in a user message should be concatenated into
1004        // a single message to avoid consecutive user messages (OpenAI constraint).
1005        let request = CompletionRequest {
1006            system: String::new(),
1007            messages: vec![Message {
1008                role: Role::User,
1009                content: vec![
1010                    ContentBlock::Text {
1011                        text: "First paragraph.".into(),
1012                    },
1013                    ContentBlock::Text {
1014                        text: "Second paragraph.".into(),
1015                    },
1016                ],
1017            }],
1018            tools: vec![],
1019            max_tokens: 1024,
1020            tool_choice: None,
1021            reasoning_effort: None,
1022        };
1023
1024        let body = build_openai_request("model", &request).unwrap();
1025        let messages = body["messages"].as_array().unwrap();
1026        // Should produce a single user message, not two
1027        assert_eq!(messages.len(), 1);
1028        assert_eq!(messages[0]["role"], "user");
1029        assert_eq!(
1030            messages[0]["content"],
1031            "First paragraph.\n\nSecond paragraph."
1032        );
1033    }
1034
1035    #[test]
1036    fn build_request_mixed_user_message_tool_results_before_text() {
1037        // When a User message has both Text and ToolResult blocks, OpenAI format
1038        // requires tool messages immediately after the assistant's tool_calls.
1039        // The current implementation correctly emits tool messages first, then
1040        // the user text message, regardless of block order in the source message.
1041        let request = CompletionRequest {
1042            system: String::new(),
1043            messages: vec![
1044                Message::user("search for rust"),
1045                Message {
1046                    role: Role::Assistant,
1047                    content: vec![ContentBlock::ToolUse {
1048                        id: "call-1".into(),
1049                        name: "search".into(),
1050                        input: json!({"q": "rust"}),
1051                    }],
1052                },
1053                // Mixed message: text + tool result
1054                Message {
1055                    role: Role::User,
1056                    content: vec![
1057                        ContentBlock::Text {
1058                            text: "Here are the results:".into(),
1059                        },
1060                        ContentBlock::ToolResult {
1061                            tool_use_id: "call-1".into(),
1062                            content: "found it".into(),
1063                            is_error: false,
1064                        },
1065                    ],
1066                },
1067            ],
1068            tools: vec![],
1069            max_tokens: 1024,
1070            tool_choice: None,
1071            reasoning_effort: None,
1072        };
1073
1074        let body = build_openai_request("model", &request).unwrap();
1075        let messages = body["messages"].as_array().unwrap();
1076        // user + assistant + tool + user(text)
1077        assert_eq!(messages.len(), 4);
1078        assert_eq!(messages[0]["role"], "user");
1079        assert_eq!(messages[1]["role"], "assistant");
1080        // Tool result comes before user text (correct for OpenAI format)
1081        assert_eq!(messages[2]["role"], "tool");
1082        assert_eq!(messages[2]["tool_call_id"], "call-1");
1083        assert_eq!(messages[3]["role"], "user");
1084        assert_eq!(messages[3]["content"], "Here are the results:");
1085    }
1086
1087    #[test]
1088    fn build_request_user_with_image_uses_array_content() {
1089        let request = CompletionRequest {
1090            system: String::new(),
1091            messages: vec![Message {
1092                role: Role::User,
1093                content: vec![
1094                    ContentBlock::Text {
1095                        text: "What is this?".into(),
1096                    },
1097                    ContentBlock::Image {
1098                        media_type: "image/jpeg".into(),
1099                        data: "base64data".into(),
1100                    },
1101                ],
1102            }],
1103            tools: vec![],
1104            max_tokens: 1024,
1105            tool_choice: None,
1106            reasoning_effort: None,
1107        };
1108
1109        let body = build_openai_request("model", &request).unwrap();
1110        let messages = body["messages"].as_array().unwrap();
1111        assert_eq!(messages.len(), 1);
1112        // Content should be an array (not a string) when images are present
1113        let content = messages[0]["content"].as_array().unwrap();
1114        assert_eq!(content.len(), 2);
1115        assert_eq!(content[0]["type"], "text");
1116        assert_eq!(content[0]["text"], "What is this?");
1117        assert_eq!(content[1]["type"], "image_url");
1118        assert_eq!(
1119            content[1]["image_url"]["url"],
1120            "data:image/jpeg;base64,base64data"
1121        );
1122    }
1123
1124    #[test]
1125    fn build_request_text_only_still_uses_string_content() {
1126        // Text-only messages should still use string content (backward-compatible)
1127        let request = CompletionRequest {
1128            system: String::new(),
1129            messages: vec![Message::user("hello")],
1130            tools: vec![],
1131            max_tokens: 1024,
1132            tool_choice: None,
1133            reasoning_effort: None,
1134        };
1135
1136        let body = build_openai_request("model", &request).unwrap();
1137        let messages = body["messages"].as_array().unwrap();
1138        // String content, not array
1139        assert!(messages[0]["content"].is_string());
1140        assert_eq!(messages[0]["content"], "hello");
1141    }
1142
1143    #[test]
1144    fn build_request_user_with_audio_uses_input_audio() {
1145        let request = CompletionRequest {
1146            system: String::new(),
1147            messages: vec![Message {
1148                role: Role::User,
1149                content: vec![
1150                    ContentBlock::Text {
1151                        text: "What does this say?".into(),
1152                    },
1153                    ContentBlock::Audio {
1154                        format: "ogg".into(),
1155                        data: "base64audio".into(),
1156                    },
1157                ],
1158            }],
1159            tools: vec![],
1160            max_tokens: 1024,
1161            tool_choice: None,
1162            reasoning_effort: None,
1163        };
1164
1165        let body = build_openai_request("model", &request).unwrap();
1166        let messages = body["messages"].as_array().unwrap();
1167        assert_eq!(messages.len(), 1);
1168        let content = messages[0]["content"].as_array().unwrap();
1169        assert_eq!(content.len(), 2);
1170        assert_eq!(content[0]["type"], "text");
1171        assert_eq!(content[0]["text"], "What does this say?");
1172        assert_eq!(content[1]["type"], "input_audio");
1173        assert_eq!(content[1]["input_audio"]["data"], "base64audio");
1174        assert_eq!(content[1]["input_audio"]["format"], "ogg");
1175    }
1176
1177    #[test]
1178    fn build_request_audio_only_no_text() {
1179        let request = CompletionRequest {
1180            system: String::new(),
1181            messages: vec![Message {
1182                role: Role::User,
1183                content: vec![ContentBlock::Audio {
1184                    format: "mp3".into(),
1185                    data: "audiodata".into(),
1186                }],
1187            }],
1188            tools: vec![],
1189            max_tokens: 1024,
1190            tool_choice: None,
1191            reasoning_effort: None,
1192        };
1193
1194        let body = build_openai_request("model", &request).unwrap();
1195        let messages = body["messages"].as_array().unwrap();
1196        let content = messages[0]["content"].as_array().unwrap();
1197        assert_eq!(content.len(), 1);
1198        assert_eq!(content[0]["type"], "input_audio");
1199    }
1200
1201    #[test]
1202    fn build_request_image_only_no_text() {
1203        let request = CompletionRequest {
1204            system: String::new(),
1205            messages: vec![Message {
1206                role: Role::User,
1207                content: vec![ContentBlock::Image {
1208                    media_type: "image/png".into(),
1209                    data: "abc123".into(),
1210                }],
1211            }],
1212            tools: vec![],
1213            max_tokens: 1024,
1214            tool_choice: None,
1215            reasoning_effort: None,
1216        };
1217
1218        let body = build_openai_request("model", &request).unwrap();
1219        let messages = body["messages"].as_array().unwrap();
1220        let content = messages[0]["content"].as_array().unwrap();
1221        // Only image, no text part
1222        assert_eq!(content.len(), 1);
1223        assert_eq!(content[0]["type"], "image_url");
1224    }
1225
1226    // --- tool_choice tests ---
1227
1228    #[test]
1229    fn build_request_no_tool_choice_omits_field() {
1230        let request = CompletionRequest {
1231            system: String::new(),
1232            messages: vec![Message::user("hi")],
1233            tools: vec![],
1234            max_tokens: 1024,
1235            tool_choice: None,
1236            reasoning_effort: None,
1237        };
1238        let body = build_openai_request("model", &request).unwrap();
1239        assert!(body.get("tool_choice").is_none());
1240    }
1241
1242    #[test]
1243    fn build_request_tool_choice_auto() {
1244        let request = CompletionRequest {
1245            system: String::new(),
1246            messages: vec![Message::user("hi")],
1247            tools: vec![],
1248            max_tokens: 1024,
1249            tool_choice: Some(ToolChoice::Auto),
1250            reasoning_effort: None,
1251        };
1252        let body = build_openai_request("model", &request).unwrap();
1253        assert_eq!(body["tool_choice"], "auto");
1254    }
1255
1256    #[test]
1257    fn build_request_tool_choice_any() {
1258        let request = CompletionRequest {
1259            system: String::new(),
1260            messages: vec![Message::user("hi")],
1261            tools: vec![],
1262            max_tokens: 1024,
1263            tool_choice: Some(ToolChoice::Any),
1264            reasoning_effort: None,
1265        };
1266        let body = build_openai_request("model", &request).unwrap();
1267        // OpenAI uses "required" for "must call a tool"
1268        assert_eq!(body["tool_choice"], "required");
1269    }
1270
1271    #[test]
1272    fn build_request_tool_choice_specific_tool() {
1273        let request = CompletionRequest {
1274            system: String::new(),
1275            messages: vec![Message::user("hi")],
1276            tools: vec![],
1277            max_tokens: 1024,
1278            tool_choice: Some(ToolChoice::Tool {
1279                name: "search".into(),
1280            }),
1281            reasoning_effort: None,
1282        };
1283        let body = build_openai_request("model", &request).unwrap();
1284        assert_eq!(body["tool_choice"]["type"], "function");
1285        assert_eq!(body["tool_choice"]["function"]["name"], "search");
1286    }
1287
1288    #[test]
1289    fn build_request_reasoning_effort_included() {
1290        let request = CompletionRequest {
1291            system: String::new(),
1292            messages: vec![Message::user("hi")],
1293            tools: vec![],
1294            max_tokens: 1024,
1295            tool_choice: None,
1296            reasoning_effort: Some(ReasoningEffort::Medium),
1297        };
1298        let body = build_openai_request("model", &request).unwrap();
1299        assert_eq!(body["reasoning"]["effort"], "medium");
1300    }
1301
1302    #[test]
1303    fn build_request_reasoning_effort_high() {
1304        let request = CompletionRequest {
1305            system: String::new(),
1306            messages: vec![Message::user("hi")],
1307            tools: vec![],
1308            max_tokens: 1024,
1309            tool_choice: None,
1310            reasoning_effort: Some(ReasoningEffort::High),
1311        };
1312        let body = build_openai_request("model", &request).unwrap();
1313        assert_eq!(body["reasoning"]["effort"], "high");
1314    }
1315
1316    #[test]
1317    fn build_request_no_reasoning_effort_omits_field() {
1318        let request = CompletionRequest {
1319            system: String::new(),
1320            messages: vec![Message::user("hi")],
1321            tools: vec![],
1322            max_tokens: 1024,
1323            tool_choice: None,
1324            reasoning_effort: None,
1325        };
1326        let body = build_openai_request("model", &request).unwrap();
1327        assert!(body.get("reasoning").is_none());
1328    }
1329
1330    #[test]
1331    fn parse_response_reasoning_tokens() {
1332        let api = OpenAiResponse {
1333            choices: vec![OpenAiChoice {
1334                message: OpenAiMessage {
1335                    content: Some("Hello!".into()),
1336                    tool_calls: None,
1337                },
1338                finish_reason: Some("stop".into()),
1339            }],
1340            usage: Some(OpenAiUsage {
1341                prompt_tokens: 50,
1342                completion_tokens: 10,
1343                cache_creation_input_tokens: 0,
1344                cache_read_input_tokens: 0,
1345                reasoning_tokens: 25,
1346            }),
1347        };
1348        let response = into_completion_response(api).unwrap();
1349        assert_eq!(response.usage.reasoning_tokens, 25);
1350    }
1351
1352    // --- Roundtrip test: request → response → request ---
1353
1354    #[test]
1355    fn full_conversation_roundtrip() {
1356        use crate::llm::types::ToolResult;
1357
1358        // Build initial request
1359        let request1 = CompletionRequest {
1360            system: "You are helpful.".into(),
1361            messages: vec![Message::user("search for rust")],
1362            tools: vec![ToolDefinition {
1363                name: "search".into(),
1364                description: "Search".into(),
1365                input_schema: json!({"type": "object"}),
1366            }],
1367            max_tokens: 1024,
1368            tool_choice: None,
1369            reasoning_effort: None,
1370        };
1371
1372        let body1 = build_openai_request("model", &request1).unwrap();
1373        assert!(body1["messages"].as_array().unwrap().len() == 2); // system + user
1374
1375        // Simulate tool call response
1376        let response1 = into_completion_response(OpenAiResponse {
1377            choices: vec![OpenAiChoice {
1378                message: OpenAiMessage {
1379                    content: Some("Searching...".into()),
1380                    tool_calls: Some(vec![OpenAiToolCall {
1381                        id: "call_1".into(),
1382                        function: OpenAiFunction {
1383                            name: "search".into(),
1384                            arguments: r#"{"q":"rust"}"#.into(),
1385                        },
1386                    }]),
1387                },
1388                finish_reason: Some("tool_calls".into()),
1389            }],
1390            usage: None,
1391        })
1392        .unwrap();
1393
1394        // Build follow-up with tool results
1395        let request2 = CompletionRequest {
1396            system: "You are helpful.".into(),
1397            messages: vec![
1398                Message::user("search for rust"),
1399                Message {
1400                    role: Role::Assistant,
1401                    content: response1.content,
1402                },
1403                Message::tool_results(vec![ToolResult::success("call_1", "Rust is great")]),
1404            ],
1405            tools: vec![],
1406            max_tokens: 1024,
1407            tool_choice: None,
1408            reasoning_effort: None,
1409        };
1410
1411        let body2 = build_openai_request("model", &request2).unwrap();
1412        let msgs = body2["messages"].as_array().unwrap();
1413        // system + user + assistant (with tool_calls) + tool result
1414        assert_eq!(msgs.len(), 4);
1415        assert_eq!(msgs[0]["role"], "system");
1416        assert_eq!(msgs[1]["role"], "user");
1417        assert_eq!(msgs[2]["role"], "assistant");
1418        assert_eq!(msgs[3]["role"], "tool");
1419    }
1420
1421    // --- Streaming tests ---
1422
1423    fn make_sse_data(chunks: &[&str]) -> String {
1424        chunks
1425            .iter()
1426            .map(|c| format!("data: {c}\n\n"))
1427            .collect::<Vec<_>>()
1428            .join("")
1429            + "data: [DONE]\n\n"
1430    }
1431
1432    #[tokio::test]
1433    async fn stream_text_response() {
1434        let sse = make_sse_data(&[
1435            r#"{"choices":[{"delta":{"content":"Hello"},"finish_reason":null}]}"#,
1436            r#"{"choices":[{"delta":{"content":" world"},"finish_reason":null}]}"#,
1437            r#"{"choices":[{"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":10,"completion_tokens":5}}"#,
1438        ]);
1439
1440        let stream = futures::stream::iter(vec![Ok(Bytes::from(sse))]);
1441        let received = std::sync::Arc::new(std::sync::Mutex::new(Vec::<String>::new()));
1442        let r = received.clone();
1443        let on_text: &crate::llm::OnText = &move |t: &str| {
1444            r.lock().expect("lock").push(t.to_string());
1445        };
1446
1447        let response = parse_openai_stream(stream, on_text).await.unwrap();
1448        assert_eq!(response.text(), "Hello world");
1449        assert_eq!(response.stop_reason, StopReason::EndTurn);
1450        assert_eq!(response.usage.input_tokens, 10);
1451        assert_eq!(response.usage.output_tokens, 5);
1452
1453        let texts = received.lock().expect("lock");
1454        assert_eq!(*texts, vec!["Hello", " world"]);
1455    }
1456
1457    #[tokio::test]
1458    async fn stream_tool_call_response() {
1459        let sse = make_sse_data(&[
1460            r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_1","function":{"name":"search","arguments":""}}]},"finish_reason":null}]}"#,
1461            r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"q\":"}}]},"finish_reason":null}]}"#,
1462            r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"rust\"}"}}]},"finish_reason":null}]}"#,
1463            r#"{"choices":[{"delta":{},"finish_reason":"tool_calls"}],"usage":{"prompt_tokens":20,"completion_tokens":10}}"#,
1464        ]);
1465
1466        let stream = futures::stream::iter(vec![Ok(Bytes::from(sse))]);
1467        let on_text: &crate::llm::OnText = &|_| {};
1468
1469        let response = parse_openai_stream(stream, on_text).await.unwrap();
1470        assert_eq!(response.stop_reason, StopReason::ToolUse);
1471
1472        let calls = response.tool_calls();
1473        assert_eq!(calls.len(), 1);
1474        assert_eq!(calls[0].id, "call_1");
1475        assert_eq!(calls[0].name, "search");
1476        assert_eq!(calls[0].input["q"], "rust");
1477    }
1478
1479    #[tokio::test]
1480    async fn stream_parallel_tool_calls() {
1481        let sse = make_sse_data(&[
1482            r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"id":"c1","function":{"name":"search","arguments":""}},{"index":1,"id":"c2","function":{"name":"read","arguments":""}}]},"finish_reason":null}]}"#,
1483            r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{}"}},{"index":1,"function":{"arguments":"{}"}}]},"finish_reason":null}]}"#,
1484            r#"{"choices":[{"delta":{},"finish_reason":"tool_calls"}]}"#,
1485        ]);
1486
1487        let stream = futures::stream::iter(vec![Ok(Bytes::from(sse))]);
1488        let on_text: &crate::llm::OnText = &|_| {};
1489
1490        let response = parse_openai_stream(stream, on_text).await.unwrap();
1491        let calls = response.tool_calls();
1492        assert_eq!(calls.len(), 2);
1493        assert_eq!(calls[0].name, "search");
1494        assert_eq!(calls[1].name, "read");
1495    }
1496
1497    #[tokio::test]
1498    async fn stream_text_with_tool_calls() {
1499        let sse = make_sse_data(&[
1500            r#"{"choices":[{"delta":{"content":"Let me search."},"finish_reason":null}]}"#,
1501            r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"id":"c1","function":{"name":"search","arguments":"{}"}}]},"finish_reason":null}]}"#,
1502            r#"{"choices":[{"delta":{},"finish_reason":"tool_calls"}]}"#,
1503        ]);
1504
1505        let stream = futures::stream::iter(vec![Ok(Bytes::from(sse))]);
1506        let on_text: &crate::llm::OnText = &|_| {};
1507
1508        let response = parse_openai_stream(stream, on_text).await.unwrap();
1509        assert_eq!(response.text(), "Let me search.");
1510        assert_eq!(response.tool_calls().len(), 1);
1511    }
1512
1513    #[tokio::test]
1514    async fn stream_max_tokens() {
1515        let sse = make_sse_data(&[
1516            r#"{"choices":[{"delta":{"content":"trunc"},"finish_reason":null}]}"#,
1517            r#"{"choices":[{"delta":{},"finish_reason":"length"}]}"#,
1518        ]);
1519
1520        let stream = futures::stream::iter(vec![Ok(Bytes::from(sse))]);
1521        let on_text: &crate::llm::OnText = &|_| {};
1522
1523        let response = parse_openai_stream(stream, on_text).await.unwrap();
1524        assert_eq!(response.stop_reason, StopReason::MaxTokens);
1525    }
1526
1527    #[tokio::test]
1528    async fn stream_stop_with_tool_calls_normalizes() {
1529        // Provider sends "stop" with tool calls — should normalize to ToolUse
1530        let sse = make_sse_data(&[
1531            r#"{"choices":[{"delta":{"tool_calls":[{"index":0,"id":"c1","function":{"name":"search","arguments":"{}"}}]},"finish_reason":null}]}"#,
1532            r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#,
1533        ]);
1534
1535        let stream = futures::stream::iter(vec![Ok(Bytes::from(sse))]);
1536        let on_text: &crate::llm::OnText = &|_| {};
1537
1538        let response = parse_openai_stream(stream, on_text).await.unwrap();
1539        assert_eq!(response.stop_reason, StopReason::ToolUse); // normalized
1540        assert_eq!(response.tool_calls().len(), 1);
1541    }
1542
1543    #[test]
1544    fn parse_response_with_cache_tokens() {
1545        let api = OpenAiResponse {
1546            choices: vec![OpenAiChoice {
1547                message: OpenAiMessage {
1548                    content: Some("Hello!".into()),
1549                    tool_calls: None,
1550                },
1551                finish_reason: Some("stop".into()),
1552            }],
1553            usage: Some(OpenAiUsage {
1554                prompt_tokens: 100,
1555                completion_tokens: 20,
1556                cache_creation_input_tokens: 80,
1557                cache_read_input_tokens: 60,
1558                reasoning_tokens: 0,
1559            }),
1560        };
1561
1562        let response = into_completion_response(api).unwrap();
1563        assert_eq!(response.usage.input_tokens, 100);
1564        assert_eq!(response.usage.output_tokens, 20);
1565        assert_eq!(response.usage.cache_creation_input_tokens, 80);
1566        assert_eq!(response.usage.cache_read_input_tokens, 60);
1567    }
1568
1569    #[test]
1570    fn parse_response_cache_tokens_default_when_missing() {
1571        let api = OpenAiResponse {
1572            choices: vec![OpenAiChoice {
1573                message: OpenAiMessage {
1574                    content: Some("Hello!".into()),
1575                    tool_calls: None,
1576                },
1577                finish_reason: Some("stop".into()),
1578            }],
1579            usage: Some(OpenAiUsage {
1580                prompt_tokens: 50,
1581                completion_tokens: 10,
1582                cache_creation_input_tokens: 0,
1583                cache_read_input_tokens: 0,
1584                reasoning_tokens: 0,
1585            }),
1586        };
1587
1588        let response = into_completion_response(api).unwrap();
1589        assert_eq!(response.usage.cache_creation_input_tokens, 0);
1590        assert_eq!(response.usage.cache_read_input_tokens, 0);
1591    }
1592
1593    #[tokio::test]
1594    async fn stream_cache_tokens_passthrough() {
1595        let sse = make_sse_data(&[
1596            r#"{"choices":[{"delta":{"content":"hi"},"finish_reason":null}]}"#,
1597            r#"{"choices":[{"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":100,"completion_tokens":10,"cache_creation_input_tokens":80,"cache_read_input_tokens":60}}"#,
1598        ]);
1599
1600        let stream = futures::stream::iter(vec![Ok(Bytes::from(sse))]);
1601        let on_text: &crate::llm::OnText = &|_| {};
1602
1603        let response = parse_openai_stream(stream, on_text).await.unwrap();
1604        assert_eq!(response.usage.cache_creation_input_tokens, 80);
1605        assert_eq!(response.usage.cache_read_input_tokens, 60);
1606    }
1607
1608    #[tokio::test]
1609    async fn stream_chunked_delivery() {
1610        // SSE data split across multiple byte chunks
1611        let sse = make_sse_data(&[
1612            r#"{"choices":[{"delta":{"content":"he"},"finish_reason":null}]}"#,
1613            r#"{"choices":[{"delta":{"content":"llo"},"finish_reason":null}]}"#,
1614            r#"{"choices":[{"delta":{},"finish_reason":"stop"}]}"#,
1615        ]);
1616        let mid = sse.len() / 2;
1617        let chunk1 = Bytes::from(sse[..mid].to_string());
1618        let chunk2 = Bytes::from(sse[mid..].to_string());
1619
1620        let stream = futures::stream::iter(vec![Ok(chunk1), Ok(chunk2)]);
1621        let on_text: &crate::llm::OnText = &|_| {};
1622
1623        let response = parse_openai_stream(stream, on_text).await.unwrap();
1624        assert_eq!(response.text(), "hello");
1625    }
1626
1627    #[tokio::test]
1628    async fn stream_empty_choices_returns_retryable_error() {
1629        // All chunks have empty choices — should produce a 502 error, not a silent empty response
1630        let sse = make_sse_data(&[
1631            r#"{"choices":[]}"#,
1632            r#"{"choices":[],"usage":{"prompt_tokens":5,"completion_tokens":0}}"#,
1633        ]);
1634
1635        let stream = futures::stream::iter(vec![Ok(Bytes::from(sse))]);
1636        let on_text: &crate::llm::OnText = &|_| {};
1637
1638        let err = parse_openai_stream(stream, on_text).await.unwrap_err();
1639        assert!(
1640            err.to_string().contains("empty choices"),
1641            "expected empty choices error, got: {err}"
1642        );
1643        match &err {
1644            Error::Api { status, .. } => assert_eq!(*status, 502),
1645            other => panic!("expected Error::Api, got: {other:?}"),
1646        }
1647    }
1648
1649    #[test]
1650    fn model_name_returns_configured_model() {
1651        let provider = OpenRouterProvider::new("key", "anthropic/claude-3-opus");
1652        assert_eq!(provider.model_name(), Some("anthropic/claude-3-opus"));
1653    }
1654}