Skip to main content

sgr_agent/
oxide_client.rs

1//! OxideClient — LlmClient adapter for `openai-oxide` crate.
2//!
3//! Uses the **Responses API** (`POST /responses`) instead of Chat Completions.
4//! With `oxide-ws` feature: persistent WebSocket connection for -20-25% latency.
5//! Supports: structured output (json_schema), function calling, multi-turn (previous_response_id).
6
7use crate::client::LlmClient;
8use crate::multimodal;
9use crate::tool::ToolDef;
10use crate::types::{LlmConfig, Message, Role, SgrError, ToolCall};
11use openai_oxide::OpenAI;
12use openai_oxide::config::ClientConfig;
13use openai_oxide::types::responses::*;
14use serde_json::Value;
15
16/// Record OTEL span for Responses API call via shared telemetry helper.
17#[cfg(feature = "telemetry")]
18fn record_otel_usage(response: &Response, model: &str, messages: &[Message]) {
19    let pt = response
20        .usage
21        .as_ref()
22        .and_then(|u| u.input_tokens)
23        .unwrap_or(0);
24    let ct = response
25        .usage
26        .as_ref()
27        .and_then(|u| u.output_tokens)
28        .unwrap_or(0);
29    let cached = response
30        .usage
31        .as_ref()
32        .and_then(|u| u.input_tokens_details.as_ref())
33        .and_then(|d| d.cached_tokens)
34        .unwrap_or(0);
35
36    let input = last_user_content(messages, 500);
37    let output_text = response.output_text();
38    let output = truncate_str(&output_text, 500);
39    let tool_calls: Vec<(String, String)> = response
40        .function_calls()
41        .iter()
42        .map(|fc| (fc.name.clone(), fc.arguments.to_string()))
43        .collect();
44
45    crate::telemetry::record_llm_span(
46        "oxide.responses.api",
47        model,
48        &input,
49        &output,
50        &tool_calls,
51        &crate::telemetry::LlmUsage {
52            prompt_tokens: pt,
53            completion_tokens: ct,
54            cached_tokens: cached,
55            response_model: response.model.clone(),
56        },
57    );
58}
59
60#[cfg(not(feature = "telemetry"))]
61fn record_otel_usage(_response: &Response, _model: &str, _messages: &[Message]) {}
62
63#[cfg(feature = "telemetry")]
64fn last_user_content(messages: &[Message], max_len: usize) -> String {
65    messages
66        .iter()
67        .rev()
68        .find(|m| matches!(m.role, Role::User | Role::Tool))
69        .map(|m| truncate_str(&m.content, max_len))
70        .unwrap_or_default()
71}
72
73#[cfg(feature = "telemetry")]
74fn truncate_str(s: &str, max_len: usize) -> String {
75    use crate::str_ext::StrExt;
76    let t = s.trunc(max_len);
77    if t.len() < s.len() {
78        format!("{t}...")
79    } else {
80        s.to_string()
81    }
82}
83
84/// LlmClient backed by openai-oxide (Responses API).
85///
86/// With `oxide-ws` feature: call `connect_ws()` to upgrade to WebSocket mode.
87/// All subsequent calls go over persistent wss:// connection (-20-25% latency).
88pub struct OxideClient {
89    client: OpenAI,
90    pub(crate) model: String,
91    pub(crate) temperature: Option<f64>,
92    pub(crate) max_tokens: Option<u32>,
93    /// WebSocket session (when oxide-ws feature is enabled and connected).
94    #[cfg(feature = "oxide-ws")]
95    ws: tokio::sync::Mutex<Option<openai_oxide::websocket::WsSession>>,
96    /// Lazy WS: true = connect on first request, false = HTTP only.
97    #[cfg(feature = "oxide-ws")]
98    ws_enabled: std::sync::atomic::AtomicBool,
99}
100
101impl OxideClient {
102    /// Create from LlmConfig.
103    pub fn from_config(config: &LlmConfig) -> Result<Self, SgrError> {
104        let api_key = config
105            .api_key
106            .clone()
107            .or_else(|| std::env::var("OPENAI_API_KEY").ok())
108            .unwrap_or_else(|| {
109                if config.base_url.is_some() {
110                    "dummy_key".into()
111                } else {
112                    "".into()
113                }
114            });
115
116        if api_key.is_empty() {
117            return Err(SgrError::Schema("No API key for oxide client".into()));
118        }
119
120        let mut client_config = ClientConfig::new(&api_key);
121        if let Some(ref url) = config.base_url {
122            client_config = client_config.base_url(url.clone());
123        }
124        config.apply_headers(&mut client_config);
125
126        Ok(Self {
127            client: OpenAI::with_config(client_config),
128            model: config.model.clone(),
129            temperature: Some(config.temp),
130            max_tokens: config.max_tokens,
131            #[cfg(feature = "oxide-ws")]
132            ws: tokio::sync::Mutex::new(None),
133            #[cfg(feature = "oxide-ws")]
134            ws_enabled: std::sync::atomic::AtomicBool::new(false),
135        })
136    }
137
138    /// Enable WebSocket mode — lazy connect on first request.
139    ///
140    /// Does NOT open a connection immediately. The WS connection is established
141    /// on the first `send_request_auto()` call, eliminating idle timeout issues.
142    /// Falls back to HTTP automatically if WS fails.
143    ///
144    /// Requires `oxide-ws` feature.
145    #[cfg(feature = "oxide-ws")]
146    pub async fn connect_ws(&self) -> Result<(), SgrError> {
147        self.ws_enabled
148            .store(true, std::sync::atomic::Ordering::Relaxed);
149        tracing::info!(model = %self.model, "oxide WebSocket enabled (lazy connect)");
150        Ok(())
151    }
152
153    /// Send request — lazy WS connect + send, falls back to HTTP on any WS error.
154    async fn send_request_auto(
155        &self,
156        request: ResponseCreateRequest,
157    ) -> Result<Response, SgrError> {
158        #[cfg(feature = "oxide-ws")]
159        if self.ws_enabled.load(std::sync::atomic::Ordering::Relaxed) {
160            let mut ws_guard = self.ws.lock().await;
161
162            // Lazy connect
163            if ws_guard.is_none() {
164                match self.client.ws_session().await {
165                    Ok(session) => {
166                        tracing::info!(model = %self.model, "oxide WS connected (lazy)");
167                        *ws_guard = Some(session);
168                    }
169                    Err(e) => {
170                        tracing::warn!("oxide WS connect failed, using HTTP: {e}");
171                        self.ws_enabled
172                            .store(false, std::sync::atomic::Ordering::Relaxed);
173                    }
174                }
175            }
176
177            if let Some(ref mut session) = *ws_guard {
178                match session.send(request.clone()).await {
179                    Ok(response) => return Ok(response),
180                    Err(e) => {
181                        tracing::warn!("oxide WS send failed, falling back to HTTP: {e}");
182                        *ws_guard = None;
183                    }
184                }
185            }
186        }
187
188        // HTTP fallback
189        self.client
190            .responses()
191            .create(request)
192            .await
193            .map_err(|e| SgrError::Api {
194                status: 0,
195                body: e.to_string(),
196            })
197    }
198
199    /// Build a ResponseCreateRequest from messages + optional schema + optional chaining.
200    ///
201    /// - `previous_response_id` is None: full history as Messages format
202    /// - `previous_response_id` is Some: Items format with function_call_output
203    ///   (required for chaining after tool calls via Responses API)
204    /// - `schema`: optional structured output json_schema config
205    pub(crate) fn build_request(
206        &self,
207        messages: &[Message],
208        schema: Option<&Value>,
209        previous_response_id: Option<&str>,
210    ) -> ResponseCreateRequest {
211        if previous_response_id.is_some() {
212            // Items format: messages + function_call_output items.
213            // HTTP API accepts Messages format (without type), but WS API requires it.
214            // Using Items consistently ensures both HTTP and WS work.
215            return self.build_request_items(messages, previous_response_id);
216        }
217
218        // Messages format: standard request with optional structured output
219        let mut input_items = Vec::new();
220
221        for msg in messages {
222            match msg.role {
223                Role::System => {
224                    input_items.push(ResponseInputItem {
225                        role: openai_oxide::types::common::Role::System,
226                        content: Value::String(msg.content.clone()),
227                    });
228                }
229                Role::User => {
230                    let content = if msg.images.is_empty() {
231                        Value::String(msg.content.clone())
232                    } else {
233                        serde_json::to_value(multimodal::responses_parts(&msg.content, &msg.images))
234                            .unwrap_or_else(|_| Value::String(msg.content.clone()))
235                    };
236                    input_items.push(ResponseInputItem {
237                        role: openai_oxide::types::common::Role::User,
238                        content,
239                    });
240                }
241                Role::Assistant => {
242                    // Include tool call info so structured_call context shows
243                    // what action was taken.
244                    let mut content = msg.content.clone();
245                    if !msg.tool_calls.is_empty() {
246                        for tc in &msg.tool_calls {
247                            let args = tc.arguments.to_string();
248                            let preview = if args.len() > 200 {
249                                use crate::str_ext::StrExt;
250                                args.trunc(200)
251                            } else {
252                                &args
253                            };
254                            content.push_str(&format!("\n→ {}({})", tc.name, preview));
255                        }
256                    }
257                    input_items.push(ResponseInputItem {
258                        role: openai_oxide::types::common::Role::Assistant,
259                        content: Value::String(content),
260                    });
261                }
262                Role::Tool => {
263                    // Clean format — no "[Tool result for ...]" prefix.
264                    // The assistant message above already has the action name.
265                    input_items.push(ResponseInputItem {
266                        role: openai_oxide::types::common::Role::User,
267                        content: Value::String(msg.content.clone()),
268                    });
269                }
270            }
271        }
272
273        let mut req = ResponseCreateRequest::new(&self.model);
274
275        // Set input — prefer simple text when single user message (fewer tokens)
276        if input_items.len() == 1 && input_items[0].role == openai_oxide::types::common::Role::User
277        {
278            if let Some(text) = input_items[0].content.as_str() {
279                req = req.input(text);
280            } else {
281                req.input = Some(ResponseInput::Messages(input_items));
282            }
283        } else if !input_items.is_empty() {
284            req.input = Some(ResponseInput::Messages(input_items));
285        }
286
287        // Temperature — skip default to reduce payload
288        if let Some(temp) = self.temperature
289            && (temp - 1.0).abs() > f64::EPSILON
290        {
291            req = req.temperature(temp);
292        }
293
294        // Max tokens
295        if let Some(max) = self.max_tokens {
296            req = req.max_output_tokens(max as i64);
297        }
298
299        // Structured output via json_schema
300        if let Some(schema_val) = schema {
301            req = req.text(ResponseTextConfig {
302                format: Some(ResponseTextFormat::JsonSchema {
303                    name: "sgr_response".into(),
304                    description: None,
305                    schema: Some(schema_val.clone()),
306                    strict: Some(true),
307                }),
308                verbosity: None,
309            });
310        }
311
312        req
313    }
314
315    /// Build Items-format request for stateful chaining with previous_response_id.
316    fn build_request_items(
317        &self,
318        messages: &[Message],
319        previous_response_id: Option<&str>,
320    ) -> ResponseCreateRequest {
321        use openai_oxide::types::responses::ResponseInput;
322
323        let mut items: Vec<Value> = Vec::new();
324
325        for msg in messages {
326            match msg.role {
327                Role::Tool => {
328                    if let Some(ref call_id) = msg.tool_call_id {
329                        items.push(serde_json::json!({
330                            "type": "function_call_output",
331                            "call_id": call_id,
332                            "output": msg.content
333                        }));
334                    }
335                }
336                Role::System => {
337                    items.push(serde_json::json!({
338                        "type": "message",
339                        "role": "system",
340                        "content": msg.content
341                    }));
342                }
343                Role::User => {
344                    let content = if msg.images.is_empty() {
345                        serde_json::json!(msg.content)
346                    } else {
347                        serde_json::to_value(multimodal::responses_parts(&msg.content, &msg.images))
348                            .unwrap_or_else(|_| serde_json::json!(msg.content))
349                    };
350                    items.push(serde_json::json!({
351                        "type": "message",
352                        "role": "user",
353                        "content": content,
354                    }));
355                }
356                Role::Assistant => {
357                    items.push(serde_json::json!({
358                        "type": "message",
359                        "role": "assistant",
360                        "content": msg.content
361                    }));
362                }
363            }
364        }
365
366        let mut req = ResponseCreateRequest::new(&self.model);
367        if !items.is_empty() {
368            req.input = Some(ResponseInput::Items(items));
369        }
370
371        // Temperature
372        if let Some(temp) = self.temperature
373            && (temp - 1.0).abs() > f64::EPSILON
374        {
375            req = req.temperature(temp);
376        }
377        if let Some(max) = self.max_tokens {
378            req = req.max_output_tokens(max as i64);
379        }
380
381        if let Some(prev_id) = previous_response_id {
382            req = req.previous_response_id(prev_id);
383        }
384
385        req
386    }
387
388    /// Function calling with explicit previous_response_id.
389    /// Returns tool calls + new response_id for chaining.
390    ///
391    /// Always sets `store(true)` so responses can be referenced by subsequent calls.
392    /// When `previous_response_id` is provided, only delta messages need to be sent
393    /// (server has full history from previous stored response).
394    ///
395    /// Tool messages (role=Tool with tool_call_id) are converted to Responses API
396    /// `function_call_output` items — required for chaining with previous_response_id.
397    ///
398    /// This method does NOT use the Mutex — all state is explicit via parameters/return.
399    async fn tools_call_stateful_impl(
400        &self,
401        messages: &[Message],
402        tools: &[ToolDef],
403        previous_response_id: Option<&str>,
404    ) -> Result<(Vec<ToolCall>, Option<String>), SgrError> {
405        let mut req = self.build_request(messages, None, previous_response_id);
406        // Always store so next call can chain via previous_response_id
407        req = req.store(true);
408
409        // Convert ToolDefs to ResponseTools with strict mode.
410        // strict: true guarantees LLM output matches schema exactly (no parse errors).
411        // oxide ensure_strict() handles: additionalProperties, all-required,
412        // nullable→anyOf, allOf inlining, oneOf→anyOf.
413        let response_tools: Vec<ResponseTool> = tools
414            .iter()
415            .map(|t| {
416                let mut params = t.parameters.clone();
417                openai_oxide::parsing::ensure_strict(&mut params);
418                ResponseTool::Function {
419                    name: t.name.clone(),
420                    description: if t.description.is_empty() {
421                        None
422                    } else {
423                        Some(t.description.clone())
424                    },
425                    parameters: Some(params),
426                    strict: Some(true),
427                }
428            })
429            .collect();
430        req = req.tools(response_tools);
431
432        let response = self.send_request_auto(req).await?;
433
434        let response_id = response.id.clone();
435        // No Mutex save — caller owns the response_id
436        record_otel_usage(&response, &self.model, messages);
437
438        let input_tokens = response
439            .usage
440            .as_ref()
441            .and_then(|u| u.input_tokens)
442            .unwrap_or(0);
443        let cached_tokens = response
444            .usage
445            .as_ref()
446            .and_then(|u| u.input_tokens_details.as_ref())
447            .and_then(|d| d.cached_tokens)
448            .unwrap_or(0);
449
450        let chained = previous_response_id.is_some();
451        let cache_pct = if input_tokens > 0 {
452            (cached_tokens * 100) / input_tokens
453        } else {
454            0
455        };
456
457        tracing::info!(
458            model = %response.model,
459            response_id = %response_id,
460            input_tokens,
461            cached_tokens,
462            cache_pct,
463            chained,
464            "oxide.tools_call_stateful"
465        );
466
467        if cached_tokens > 0 {
468            eprintln!(
469                "    💰 {}in/{}out (cached: {}, {}%)",
470                input_tokens,
471                response
472                    .usage
473                    .as_ref()
474                    .and_then(|u| u.output_tokens)
475                    .unwrap_or(0),
476                cached_tokens,
477                cache_pct
478            );
479        } else {
480            eprintln!(
481                "    💰 {}in/{}out",
482                input_tokens,
483                response
484                    .usage
485                    .as_ref()
486                    .and_then(|u| u.output_tokens)
487                    .unwrap_or(0)
488            );
489        }
490
491        Self::check_truncation(&response)?;
492        Ok((Self::extract_tool_calls(&response), Some(response_id)))
493    }
494
495    /// Check if response was truncated due to max_output_tokens.
496    /// Returns Err(MaxOutputTokens) if truncated, Ok(()) otherwise.
497    fn check_truncation(response: &Response) -> Result<(), SgrError> {
498        let is_incomplete = response
499            .status
500            .as_deref()
501            .is_some_and(|s| s == "incomplete");
502        let is_max_tokens = response
503            .incomplete_details
504            .as_ref()
505            .and_then(|d| d.reason.as_deref())
506            .is_some_and(|r| r == "max_output_tokens");
507
508        if is_incomplete && is_max_tokens {
509            return Err(SgrError::MaxOutputTokens {
510                partial_content: response.output_text(),
511            });
512        }
513        Ok(())
514    }
515
516    /// Extract tool calls from Responses API output items.
517    fn extract_tool_calls(response: &Response) -> Vec<ToolCall> {
518        response
519            .function_calls()
520            .into_iter()
521            .map(|fc| ToolCall {
522                id: fc.call_id,
523                name: fc.name,
524                arguments: fc.arguments,
525            })
526            .collect()
527    }
528}
529
530#[async_trait::async_trait]
531impl LlmClient for OxideClient {
532    async fn structured_call(
533        &self,
534        messages: &[Message],
535        schema: &Value,
536    ) -> Result<(Option<Value>, Vec<ToolCall>, String), SgrError> {
537        // Make schema OpenAI-strict — UNLESS it's already strict
538        // (build_action_schema produces pre-strict schemas that ensure_strict would break)
539        let strict_schema =
540            if schema.get("additionalProperties").and_then(|v| v.as_bool()) == Some(false) {
541                // Already strict-compatible (e.g., from build_action_schema)
542                schema.clone()
543            } else {
544                let mut s = schema.clone();
545                openai_oxide::parsing::ensure_strict(&mut s);
546                s
547            };
548
549        // Stateless — build request with full message history, no chaining.
550        // store(true) enables server-side prompt caching for stable prefix.
551        let mut req = self.build_request(messages, Some(&strict_schema), None);
552        req = req.store(true);
553
554        let span = tracing::info_span!(
555            "oxide.responses.create",
556            model = %self.model,
557            method = "structured_call",
558        );
559        let _enter = span.enter();
560
561        // Debug: dump schema on first call
562        if std::env::var("SGR_DEBUG_SCHEMA").is_ok()
563            && let Some(ref text_cfg) = req.text
564        {
565            eprintln!(
566                "[sgr] Schema: {}",
567                serde_json::to_string(text_cfg).unwrap_or_default()
568            );
569        }
570
571        let response = self.send_request_auto(req).await?;
572
573        // No Mutex save — structured_call is stateless
574        record_otel_usage(&response, &self.model, messages);
575
576        Self::check_truncation(&response)?;
577
578        let raw_text = response.output_text();
579        if std::env::var("SGR_DEBUG").is_ok() {
580            eprintln!("[sgr] Raw response: {}", {
581                use crate::str_ext::StrExt;
582                raw_text.trunc(500)
583            });
584        }
585        let tool_calls = Self::extract_tool_calls(&response);
586        let parsed = serde_json::from_str::<Value>(&raw_text).ok();
587
588        let input_tokens = response
589            .usage
590            .as_ref()
591            .and_then(|u| u.input_tokens)
592            .unwrap_or(0);
593        let cached_tokens = response
594            .usage
595            .as_ref()
596            .and_then(|u| u.input_tokens_details.as_ref())
597            .and_then(|d| d.cached_tokens)
598            .unwrap_or(0);
599        let cache_pct = if input_tokens > 0 {
600            (cached_tokens * 100) / input_tokens
601        } else {
602            0
603        };
604
605        {
606            let output_tokens = response
607                .usage
608                .as_ref()
609                .and_then(|u| u.output_tokens)
610                .unwrap_or(0);
611            if cached_tokens > 0 {
612                eprintln!(
613                    "    💰 {}in/{}out (cached: {}, {}%)",
614                    input_tokens, output_tokens, cached_tokens, cache_pct
615                );
616            } else {
617                eprintln!("    💰 {}in/{}out", input_tokens, output_tokens);
618            }
619        }
620
621        Ok((parsed, tool_calls, raw_text))
622    }
623
624    async fn tools_call(
625        &self,
626        messages: &[Message],
627        tools: &[ToolDef],
628    ) -> Result<Vec<ToolCall>, SgrError> {
629        // Stateless — no previous_response_id, full message history.
630        // store(true) enables server-side prompt caching: OpenAI auto-caches
631        // the stable prefix (system prompt + tools) for requests >1024 tokens.
632        let mut req = self.build_request(messages, None, None);
633        req = req.store(true);
634
635        // Convert ToolDefs to ResponseTools — no strict mode (faster server-side)
636        let response_tools: Vec<ResponseTool> = tools
637            .iter()
638            .map(|t| ResponseTool::Function {
639                name: t.name.clone(),
640                description: if t.description.is_empty() {
641                    None
642                } else {
643                    Some(t.description.clone())
644                },
645                parameters: Some(t.parameters.clone()),
646                strict: None, // AI-NOTE: strict=true breaks tools with optional params
647            })
648            .collect();
649        req = req.tools(response_tools);
650
651        // Force model to always call a tool — prevents text-only responses
652        // that lose answer content (tools_call only returns Vec<ToolCall>).
653        req = req.tool_choice(openai_oxide::types::responses::ResponseToolChoice::Mode(
654            "required".into(),
655        ));
656        // AI-NOTE: explicit parallel_tool_calls=true for OpenAI. Anthropic models on OpenRouter
657        // reject this param (404); they use `disable_parallel_tool_use` natively, not exposed here.
658        if !self.model.contains("anthropic/") && !self.model.starts_with("claude") {
659            req = req.parallel_tool_calls(true);
660        }
661
662        let response = self.send_request_auto(req).await?;
663
664        record_otel_usage(&response, &self.model, messages);
665        Self::check_truncation(&response)?;
666
667        let input_tokens = response
668            .usage
669            .as_ref()
670            .and_then(|u| u.input_tokens)
671            .unwrap_or(0);
672        let cached_tokens = response
673            .usage
674            .as_ref()
675            .and_then(|u| u.input_tokens_details.as_ref())
676            .and_then(|d| d.cached_tokens)
677            .unwrap_or(0);
678        let cache_pct = if input_tokens > 0 {
679            (cached_tokens * 100) / input_tokens
680        } else {
681            0
682        };
683
684        if cached_tokens > 0 {
685            eprintln!(
686                "    💰 {}in/{}out (cached: {}, {}%)",
687                input_tokens,
688                response
689                    .usage
690                    .as_ref()
691                    .and_then(|u| u.output_tokens)
692                    .unwrap_or(0),
693                cached_tokens,
694                cache_pct
695            );
696        } else {
697            eprintln!(
698                "    💰 {}in/{}out",
699                input_tokens,
700                response
701                    .usage
702                    .as_ref()
703                    .and_then(|u| u.output_tokens)
704                    .unwrap_or(0)
705            );
706        }
707
708        let calls = Self::extract_tool_calls(&response);
709        Ok(calls)
710    }
711
712    async fn tools_call_stateful(
713        &self,
714        messages: &[Message],
715        tools: &[ToolDef],
716        previous_response_id: Option<&str>,
717    ) -> Result<(Vec<ToolCall>, Option<String>), SgrError> {
718        self.tools_call_stateful_impl(messages, tools, previous_response_id)
719            .await
720    }
721
722    /// tool_choice=auto so model can emit reasoning text ALONGSIDE tool calls in one response.
723    /// Returns (tool_calls, reasoning_text). Used by single-phase agent to get 1 LLM call/step.
724    async fn tools_call_with_text(
725        &self,
726        messages: &[Message],
727        tools: &[ToolDef],
728    ) -> Result<(Vec<ToolCall>, String), SgrError> {
729        let mut req = self.build_request(messages, None, None);
730        req = req.store(true);
731
732        let response_tools: Vec<ResponseTool> = tools
733            .iter()
734            .map(|t| ResponseTool::Function {
735                name: t.name.clone(),
736                description: if t.description.is_empty() {
737                    None
738                } else {
739                    Some(t.description.clone())
740                },
741                parameters: Some(t.parameters.clone()),
742                strict: None,
743            })
744            .collect();
745        req = req.tools(response_tools);
746        // AI-NOTE: tool_choice=auto (not required) — model can return text+tools in same response.
747        // This is the key for single-phase: reasoning in text, action in tool calls, 1 LLM call.
748        req = req.tool_choice(openai_oxide::types::responses::ResponseToolChoice::Mode(
749            "auto".into(),
750        ));
751        req = req.parallel_tool_calls(true);
752
753        let response = self.send_request_auto(req).await?;
754
755        record_otel_usage(&response, &self.model, messages);
756        Self::check_truncation(&response)?;
757
758        let input_tokens = response
759            .usage
760            .as_ref()
761            .and_then(|u| u.input_tokens)
762            .unwrap_or(0);
763        let cached_tokens = response
764            .usage
765            .as_ref()
766            .and_then(|u| u.input_tokens_details.as_ref())
767            .and_then(|d| d.cached_tokens)
768            .unwrap_or(0);
769        let cache_pct = if input_tokens > 0 {
770            (cached_tokens * 100) / input_tokens
771        } else {
772            0
773        };
774        let output_tokens = response
775            .usage
776            .as_ref()
777            .and_then(|u| u.output_tokens)
778            .unwrap_or(0);
779        if cached_tokens > 0 {
780            eprintln!(
781                "    💰 {}in/{}out (cached: {}, {}%)",
782                input_tokens, output_tokens, cached_tokens, cache_pct
783            );
784        } else {
785            eprintln!("    💰 {}in/{}out", input_tokens, output_tokens);
786        }
787
788        let text = response.output_text();
789        let calls = Self::extract_tool_calls(&response);
790        Ok((calls, text))
791    }
792
793    async fn complete(&self, messages: &[Message]) -> Result<String, SgrError> {
794        let mut req = self.build_request(messages, None, None);
795        req = req.store(true);
796
797        let response = self.send_request_auto(req).await?;
798
799        record_otel_usage(&response, &self.model, messages);
800        Self::check_truncation(&response)?;
801
802        let text = response.output_text();
803        if text.is_empty() {
804            return Err(SgrError::EmptyResponse);
805        }
806
807        tracing::info!(
808            model = %response.model,
809            response_id = %response.id,
810            input_tokens = response.usage.as_ref().and_then(|u| u.input_tokens).unwrap_or(0),
811            output_tokens = response.usage.as_ref().and_then(|u| u.output_tokens).unwrap_or(0),
812            "oxide.complete"
813        );
814
815        Ok(text)
816    }
817}
818
819#[cfg(test)]
820mod tests {
821    use super::*;
822    use crate::types::ImagePart;
823
824    #[test]
825    fn oxide_client_from_config() {
826        // Just test construction doesn't panic
827        let config = LlmConfig::with_key("sk-test", "gpt-5.4");
828        let client = OxideClient::from_config(&config).unwrap();
829        assert_eq!(client.model, "gpt-5.4");
830    }
831
832    #[test]
833    fn build_request_simple() {
834        let config = LlmConfig::with_key("sk-test", "gpt-5.4").temperature(0.5);
835        let client = OxideClient::from_config(&config).unwrap();
836        let messages = vec![Message::system("Be helpful."), Message::user("Hello")];
837        let req = client.build_request(&messages, None, None);
838        assert_eq!(req.model, "gpt-5.4");
839        assert!(req.instructions.is_none());
840        assert!(req.input.is_some());
841        assert_eq!(req.temperature, Some(0.5));
842    }
843
844    #[test]
845    fn build_request_with_schema() {
846        let config = LlmConfig::with_key("sk-test", "gpt-5.4");
847        let client = OxideClient::from_config(&config).unwrap();
848        let schema = serde_json::json!({
849            "type": "object",
850            "properties": {"answer": {"type": "string"}},
851            "required": ["answer"]
852        });
853        let req = client.build_request(&[Message::user("Hi")], Some(&schema), None);
854        assert!(req.text.is_some());
855    }
856
857    #[test]
858    fn build_request_stateless_no_previous_response_id() {
859        let config = LlmConfig::with_key("sk-test", "gpt-5.4");
860        let client = OxideClient::from_config(&config).unwrap();
861
862        let req = client.build_request(&[Message::user("Hi")], None, None);
863        assert!(
864            req.previous_response_id.is_none(),
865            "build_request must be stateless when no explicit ID"
866        );
867    }
868
869    #[test]
870    fn build_request_explicit_chaining() {
871        let config = LlmConfig::with_key("sk-test", "gpt-5.4");
872        let client = OxideClient::from_config(&config).unwrap();
873
874        // With previous_response_id — uses Items format for chaining
875        let req = client.build_request(&[Message::user("Hi")], None, Some("resp_xyz"));
876        assert_eq!(
877            req.previous_response_id.as_deref(),
878            Some("resp_xyz"),
879            "build_request should chain with explicit previous_response_id"
880        );
881    }
882
883    #[test]
884    fn build_request_tool_outputs_chaining() {
885        let config = LlmConfig::with_key("sk-test", "gpt-5.4");
886        let client = OxideClient::from_config(&config).unwrap();
887
888        // With previous_response_id — tool outputs as function_call_output items
889        let messages = vec![Message::tool("call_1", "result data")];
890        let req = client.build_request(&messages, None, Some("resp_123"));
891        assert_eq!(req.previous_response_id.as_deref(), Some("resp_123"));
892
893        // Without previous_response_id
894        let req = client.build_request(&messages, None, None);
895        assert!(
896            req.previous_response_id.is_none(),
897            "build_request must be stateless when no explicit ID"
898        );
899    }
900
901    #[test]
902    fn build_request_multimodal_user() {
903        let config = LlmConfig::with_key("sk-test", "gpt-5.4");
904        let client = OxideClient::from_config(&config).unwrap();
905        let img = ImagePart {
906            data: "AAAA".into(),
907            mime_type: "image/jpeg".into(),
908        };
909        let messages = vec![Message::user_with_images("Describe this", vec![img])];
910        let req = client.build_request(&messages, None, None);
911
912        // Single user with images must serialize as content-parts array, not string
913        let input = req.input.as_ref().expect("input missing");
914        let serialized = serde_json::to_value(input).unwrap();
915        let s = serde_json::to_string(&serialized).unwrap();
916        assert!(s.contains("input_text"), "missing input_text part: {s}");
917        assert!(s.contains("input_image"), "missing input_image part: {s}");
918        assert!(
919            s.contains("data:image/jpeg;base64,AAAA"),
920            "missing data URL: {s}"
921        );
922    }
923
924    #[test]
925    fn build_request_items_multimodal_user() {
926        let config = LlmConfig::with_key("sk-test", "gpt-5.4");
927        let client = OxideClient::from_config(&config).unwrap();
928        let img = ImagePart {
929            data: "BBBB".into(),
930            mime_type: "image/png".into(),
931        };
932        let messages = vec![Message::user_with_images("What's on screen?", vec![img])];
933        // previous_response_id triggers items-format path
934        let req = client.build_request(&messages, None, Some("resp_prev"));
935
936        let input = req.input.as_ref().expect("input missing");
937        let s = serde_json::to_string(input).unwrap();
938        assert!(
939            s.contains("input_text"),
940            "items path missing input_text: {s}"
941        );
942        assert!(
943            s.contains("input_image"),
944            "items path missing input_image: {s}"
945        );
946        assert!(
947            s.contains("data:image/png;base64,BBBB"),
948            "items path missing data URL: {s}"
949        );
950    }
951}