1use crate::client::LlmClient;
8use crate::multimodal;
9use crate::tool::ToolDef;
10use crate::types::{LlmConfig, Message, Role, SgrError, ToolCall};
11use openai_oxide::OpenAI;
12use openai_oxide::config::ClientConfig;
13use openai_oxide::types::responses::*;
14use serde_json::Value;
15
16#[cfg(feature = "telemetry")]
18fn record_otel_usage(response: &Response, model: &str, messages: &[Message]) {
19 let pt = response
20 .usage
21 .as_ref()
22 .and_then(|u| u.input_tokens)
23 .unwrap_or(0);
24 let ct = response
25 .usage
26 .as_ref()
27 .and_then(|u| u.output_tokens)
28 .unwrap_or(0);
29 let cached = response
30 .usage
31 .as_ref()
32 .and_then(|u| u.input_tokens_details.as_ref())
33 .and_then(|d| d.cached_tokens)
34 .unwrap_or(0);
35
36 let input = last_user_content(messages, 500);
37 let output_text = response.output_text();
38 let output = truncate_str(&output_text, 500);
39 let tool_calls: Vec<(String, String)> = response
40 .function_calls()
41 .iter()
42 .map(|fc| (fc.name.clone(), fc.arguments.to_string()))
43 .collect();
44
45 crate::telemetry::record_llm_span(
46 "oxide.responses.api",
47 model,
48 &input,
49 &output,
50 &tool_calls,
51 &crate::telemetry::LlmUsage {
52 prompt_tokens: pt,
53 completion_tokens: ct,
54 cached_tokens: cached,
55 response_model: response.model.clone(),
56 },
57 );
58}
59
60#[cfg(not(feature = "telemetry"))]
61fn record_otel_usage(_response: &Response, _model: &str, _messages: &[Message]) {}
62
63#[cfg(feature = "telemetry")]
64fn last_user_content(messages: &[Message], max_len: usize) -> String {
65 messages
66 .iter()
67 .rev()
68 .find(|m| matches!(m.role, Role::User | Role::Tool))
69 .map(|m| truncate_str(&m.content, max_len))
70 .unwrap_or_default()
71}
72
73#[cfg(feature = "telemetry")]
74fn truncate_str(s: &str, max_len: usize) -> String {
75 use crate::str_ext::StrExt;
76 let t = s.trunc(max_len);
77 if t.len() < s.len() {
78 format!("{t}...")
79 } else {
80 s.to_string()
81 }
82}
83
84pub struct OxideClient {
89 client: OpenAI,
90 pub(crate) model: String,
91 pub(crate) temperature: Option<f64>,
92 pub(crate) max_tokens: Option<u32>,
93 pub(crate) verbosity: Option<String>,
96 pub(crate) reasoning_effort: Option<openai_oxide::types::common::ReasoningEffort>,
100 #[cfg(feature = "oxide-ws")]
102 ws: tokio::sync::Mutex<Option<openai_oxide::websocket::WsSession>>,
103 #[cfg(feature = "oxide-ws")]
105 ws_enabled: std::sync::atomic::AtomicBool,
106}
107
108pub(crate) fn model_supports_temperature(model: &str) -> bool {
111 let m = model.strip_prefix("openai/").unwrap_or(model);
112 !(m.starts_with("gpt-5") || m.starts_with("o1") || m.starts_with("o3") || m.starts_with("o4"))
113}
114
115impl OxideClient {
116 pub fn from_config(config: &LlmConfig) -> Result<Self, SgrError> {
118 let api_key = config
119 .api_key
120 .clone()
121 .or_else(|| std::env::var("OPENAI_API_KEY").ok())
122 .unwrap_or_else(|| {
123 if config.base_url.is_some() {
124 "dummy_key".into()
125 } else {
126 "".into()
127 }
128 });
129
130 if api_key.is_empty() {
131 return Err(SgrError::Schema("No API key for oxide client".into()));
132 }
133
134 let mut client_config = ClientConfig::new(&api_key);
135 if let Some(ref url) = config.base_url {
136 client_config = client_config.base_url(url.clone());
137 }
138 config.apply_headers(&mut client_config);
139
140 let reasoning_effort = config.reasoning_effort.as_deref().and_then(|s| {
141 use openai_oxide::types::common::ReasoningEffort;
142 match s {
143 "none" => Some(ReasoningEffort::None),
144 "minimal" => Some(ReasoningEffort::Minimal),
145 "low" => Some(ReasoningEffort::Low),
146 "medium" => Some(ReasoningEffort::Medium),
147 "high" => Some(ReasoningEffort::High),
148 "xhigh" => Some(ReasoningEffort::Xhigh),
149 _ => None,
150 }
151 });
152
153 Ok(Self {
154 client: OpenAI::with_config(client_config),
155 model: config.model.clone(),
156 temperature: Some(config.temp),
157 max_tokens: config.max_tokens,
158 verbosity: config.verbosity.clone(),
159 reasoning_effort,
160 #[cfg(feature = "oxide-ws")]
161 ws: tokio::sync::Mutex::new(None),
162 #[cfg(feature = "oxide-ws")]
163 ws_enabled: std::sync::atomic::AtomicBool::new(false),
164 })
165 }
166
167 #[cfg(feature = "oxide-ws")]
175 pub async fn connect_ws(&self) -> Result<(), SgrError> {
176 self.ws_enabled
177 .store(true, std::sync::atomic::Ordering::Relaxed);
178 tracing::info!(model = %self.model, "oxide WebSocket enabled (lazy connect)");
179 Ok(())
180 }
181
182 async fn send_request_auto(
184 &self,
185 request: ResponseCreateRequest,
186 ) -> Result<Response, SgrError> {
187 #[cfg(feature = "oxide-ws")]
188 if self.ws_enabled.load(std::sync::atomic::Ordering::Relaxed) {
189 let mut ws_guard = self.ws.lock().await;
190
191 if ws_guard.is_none() {
193 match self.client.ws_session().await {
194 Ok(session) => {
195 tracing::info!(model = %self.model, "oxide WS connected (lazy)");
196 *ws_guard = Some(session);
197 }
198 Err(e) => {
199 tracing::warn!("oxide WS connect failed, using HTTP: {e}");
200 self.ws_enabled
201 .store(false, std::sync::atomic::Ordering::Relaxed);
202 }
203 }
204 }
205
206 if let Some(ref mut session) = *ws_guard {
207 match session.send(request.clone()).await {
208 Ok(response) => return Ok(response),
209 Err(e) => {
210 tracing::warn!("oxide WS send failed, falling back to HTTP: {e}");
211 *ws_guard = None;
212 }
213 }
214 }
215 }
216
217 self.client
219 .responses()
220 .create(request)
221 .await
222 .map_err(|e| SgrError::Api {
223 status: 0,
224 body: e.to_string(),
225 })
226 }
227
228 pub(crate) fn build_request(
235 &self,
236 messages: &[Message],
237 schema: Option<&Value>,
238 previous_response_id: Option<&str>,
239 ) -> ResponseCreateRequest {
240 if previous_response_id.is_some() {
241 return self.build_request_items(messages, previous_response_id);
245 }
246
247 let mut input_items = Vec::new();
249
250 for msg in messages {
251 match msg.role {
252 Role::System => {
253 input_items.push(ResponseInputItem {
254 role: openai_oxide::types::common::Role::System,
255 content: Value::String(msg.content.clone()),
256 });
257 }
258 Role::User => {
259 let content = if msg.images.is_empty() {
260 Value::String(msg.content.clone())
261 } else {
262 serde_json::to_value(multimodal::responses_parts(&msg.content, &msg.images))
263 .unwrap_or_else(|_| Value::String(msg.content.clone()))
264 };
265 input_items.push(ResponseInputItem {
266 role: openai_oxide::types::common::Role::User,
267 content,
268 });
269 }
270 Role::Assistant => {
271 let mut content = msg.content.clone();
274 if !msg.tool_calls.is_empty() {
275 for tc in &msg.tool_calls {
276 let args = tc.arguments.to_string();
277 let preview = if args.len() > 200 {
278 use crate::str_ext::StrExt;
279 args.trunc(200)
280 } else {
281 &args
282 };
283 content.push_str(&format!("\n→ {}({})", tc.name, preview));
284 }
285 }
286 input_items.push(ResponseInputItem {
287 role: openai_oxide::types::common::Role::Assistant,
288 content: Value::String(content),
289 });
290 }
291 Role::Tool => {
292 input_items.push(ResponseInputItem {
295 role: openai_oxide::types::common::Role::User,
296 content: Value::String(msg.content.clone()),
297 });
298 }
299 }
300 }
301
302 let mut req = ResponseCreateRequest::new(&self.model);
303
304 if input_items.len() == 1 && input_items[0].role == openai_oxide::types::common::Role::User
306 {
307 if let Some(text) = input_items[0].content.as_str() {
308 req = req.input(text);
309 } else {
310 req.input = Some(ResponseInput::Messages(input_items));
311 }
312 } else if !input_items.is_empty() {
313 req.input = Some(ResponseInput::Messages(input_items));
314 }
315
316 if let Some(temp) = self.temperature
319 && (temp - 1.0).abs() > f64::EPSILON
320 && model_supports_temperature(&self.model)
321 {
322 req = req.temperature(temp);
323 }
324
325 if let Some(max) = self.max_tokens {
327 req = req.max_output_tokens(max as i64);
328 }
329
330 if let Some(ref effort) = self.reasoning_effort {
334 req.reasoning = Some(openai_oxide::types::responses::Reasoning {
335 effort: Some(effort.clone()),
336 summary: None,
337 });
338 }
339
340 match (schema, self.verbosity.clone()) {
342 (Some(schema_val), v) => {
343 req = req.text(ResponseTextConfig {
344 format: Some(ResponseTextFormat::JsonSchema {
345 name: "sgr_response".into(),
346 description: None,
347 schema: Some(schema_val.clone()),
348 strict: Some(true),
349 }),
350 verbosity: v,
351 });
352 }
353 (None, Some(v)) => {
354 req = req.text(ResponseTextConfig {
355 format: None,
356 verbosity: Some(v),
357 });
358 }
359 (None, None) => {}
360 }
361
362 req
363 }
364
365 fn build_request_items(
367 &self,
368 messages: &[Message],
369 previous_response_id: Option<&str>,
370 ) -> ResponseCreateRequest {
371 use openai_oxide::types::responses::ResponseInput;
372
373 let mut items: Vec<Value> = Vec::new();
374
375 for msg in messages {
376 match msg.role {
377 Role::Tool => {
378 if let Some(ref call_id) = msg.tool_call_id {
379 items.push(serde_json::json!({
380 "type": "function_call_output",
381 "call_id": call_id,
382 "output": msg.content
383 }));
384 }
385 }
386 Role::System => {
387 items.push(serde_json::json!({
388 "type": "message",
389 "role": "system",
390 "content": msg.content
391 }));
392 }
393 Role::User => {
394 let content = if msg.images.is_empty() {
395 serde_json::json!(msg.content)
396 } else {
397 serde_json::to_value(multimodal::responses_parts(&msg.content, &msg.images))
398 .unwrap_or_else(|_| serde_json::json!(msg.content))
399 };
400 items.push(serde_json::json!({
401 "type": "message",
402 "role": "user",
403 "content": content,
404 }));
405 }
406 Role::Assistant => {
407 items.push(serde_json::json!({
408 "type": "message",
409 "role": "assistant",
410 "content": msg.content
411 }));
412 }
413 }
414 }
415
416 let mut req = ResponseCreateRequest::new(&self.model);
417 if !items.is_empty() {
418 req.input = Some(ResponseInput::Items(items));
419 }
420
421 if let Some(temp) = self.temperature
423 && (temp - 1.0).abs() > f64::EPSILON
424 && model_supports_temperature(&self.model)
425 {
426 req = req.temperature(temp);
427 }
428 if let Some(max) = self.max_tokens {
429 req = req.max_output_tokens(max as i64);
430 }
431
432 if let Some(v) = self.verbosity.clone() {
433 req = req.text(ResponseTextConfig {
434 format: None,
435 verbosity: Some(v),
436 });
437 }
438
439 if let Some(prev_id) = previous_response_id {
440 req = req.previous_response_id(prev_id);
441 }
442
443 req
444 }
445
446 async fn tools_call_stateful_impl(
458 &self,
459 messages: &[Message],
460 tools: &[ToolDef],
461 previous_response_id: Option<&str>,
462 ) -> Result<(Vec<ToolCall>, Option<String>), SgrError> {
463 let mut req = self.build_request(messages, None, previous_response_id);
464 req = req.store(true);
466
467 let response_tools: Vec<ResponseTool> = tools
472 .iter()
473 .map(|t| {
474 let mut params = t.parameters.clone();
475 openai_oxide::parsing::ensure_strict(&mut params);
476 ResponseTool::Function {
477 name: t.name.clone(),
478 description: if t.description.is_empty() {
479 None
480 } else {
481 Some(t.description.clone())
482 },
483 parameters: Some(params),
484 strict: Some(true),
485 }
486 })
487 .collect();
488 req = req.tools(response_tools);
489
490 let response = self.send_request_auto(req).await?;
491
492 let response_id = response.id.clone();
493 record_otel_usage(&response, &self.model, messages);
495
496 let input_tokens = response
497 .usage
498 .as_ref()
499 .and_then(|u| u.input_tokens)
500 .unwrap_or(0);
501 let cached_tokens = response
502 .usage
503 .as_ref()
504 .and_then(|u| u.input_tokens_details.as_ref())
505 .and_then(|d| d.cached_tokens)
506 .unwrap_or(0);
507
508 let chained = previous_response_id.is_some();
509 let cache_pct = if input_tokens > 0 {
510 (cached_tokens * 100) / input_tokens
511 } else {
512 0
513 };
514
515 tracing::info!(
516 model = %response.model,
517 response_id = %response_id,
518 input_tokens,
519 cached_tokens,
520 cache_pct,
521 chained,
522 "oxide.tools_call_stateful"
523 );
524
525 if cached_tokens > 0 {
526 eprintln!(
527 " 💰 {}in/{}out (cached: {}, {}%)",
528 input_tokens,
529 response
530 .usage
531 .as_ref()
532 .and_then(|u| u.output_tokens)
533 .unwrap_or(0),
534 cached_tokens,
535 cache_pct
536 );
537 } else {
538 eprintln!(
539 " 💰 {}in/{}out",
540 input_tokens,
541 response
542 .usage
543 .as_ref()
544 .and_then(|u| u.output_tokens)
545 .unwrap_or(0)
546 );
547 }
548
549 Self::check_truncation(&response)?;
550 Ok((Self::extract_tool_calls(&response), Some(response_id)))
551 }
552
553 fn check_truncation(response: &Response) -> Result<(), SgrError> {
556 let is_incomplete = response
557 .status
558 .as_deref()
559 .is_some_and(|s| s == "incomplete");
560 let is_max_tokens = response
561 .incomplete_details
562 .as_ref()
563 .and_then(|d| d.reason.as_deref())
564 .is_some_and(|r| r == "max_output_tokens");
565
566 if is_incomplete && is_max_tokens {
567 return Err(SgrError::MaxOutputTokens {
568 partial_content: response.output_text(),
569 });
570 }
571 Ok(())
572 }
573
574 fn extract_tool_calls(response: &Response) -> Vec<ToolCall> {
576 response
577 .function_calls()
578 .into_iter()
579 .map(|fc| ToolCall {
580 id: fc.call_id,
581 name: fc.name,
582 arguments: fc.arguments,
583 })
584 .collect()
585 }
586}
587
588#[async_trait::async_trait]
589impl LlmClient for OxideClient {
590 async fn structured_call(
591 &self,
592 messages: &[Message],
593 schema: &Value,
594 ) -> Result<(Option<Value>, Vec<ToolCall>, String), SgrError> {
595 let strict_schema =
598 if schema.get("additionalProperties").and_then(|v| v.as_bool()) == Some(false) {
599 schema.clone()
601 } else {
602 let mut s = schema.clone();
603 openai_oxide::parsing::ensure_strict(&mut s);
604 s
605 };
606
607 let mut req = self.build_request(messages, Some(&strict_schema), None);
610 req = req.store(true);
611
612 let span = tracing::info_span!(
613 "oxide.responses.create",
614 model = %self.model,
615 method = "structured_call",
616 );
617 let _enter = span.enter();
618
619 if std::env::var("SGR_DEBUG_SCHEMA").is_ok()
621 && let Some(ref text_cfg) = req.text
622 {
623 eprintln!(
624 "[sgr] Schema: {}",
625 serde_json::to_string(text_cfg).unwrap_or_default()
626 );
627 }
628
629 let response = self.send_request_auto(req).await?;
630
631 record_otel_usage(&response, &self.model, messages);
633
634 Self::check_truncation(&response)?;
635
636 let raw_text = response.output_text();
637 if std::env::var("SGR_DEBUG").is_ok() {
638 eprintln!("[sgr] Raw response: {}", {
639 use crate::str_ext::StrExt;
640 raw_text.trunc(500)
641 });
642 }
643 let tool_calls = Self::extract_tool_calls(&response);
644 let parsed = serde_json::from_str::<Value>(&raw_text).ok();
645
646 let input_tokens = response
647 .usage
648 .as_ref()
649 .and_then(|u| u.input_tokens)
650 .unwrap_or(0);
651 let cached_tokens = response
652 .usage
653 .as_ref()
654 .and_then(|u| u.input_tokens_details.as_ref())
655 .and_then(|d| d.cached_tokens)
656 .unwrap_or(0);
657 let cache_pct = if input_tokens > 0 {
658 (cached_tokens * 100) / input_tokens
659 } else {
660 0
661 };
662
663 {
664 let output_tokens = response
665 .usage
666 .as_ref()
667 .and_then(|u| u.output_tokens)
668 .unwrap_or(0);
669 if cached_tokens > 0 {
670 eprintln!(
671 " 💰 {}in/{}out (cached: {}, {}%)",
672 input_tokens, output_tokens, cached_tokens, cache_pct
673 );
674 } else {
675 eprintln!(" 💰 {}in/{}out", input_tokens, output_tokens);
676 }
677 }
678
679 Ok((parsed, tool_calls, raw_text))
680 }
681
682 async fn tools_call(
683 &self,
684 messages: &[Message],
685 tools: &[ToolDef],
686 ) -> Result<Vec<ToolCall>, SgrError> {
687 let mut req = self.build_request(messages, None, None);
691 req = req.store(true);
692
693 let response_tools: Vec<ResponseTool> = tools
695 .iter()
696 .map(|t| ResponseTool::Function {
697 name: t.name.clone(),
698 description: if t.description.is_empty() {
699 None
700 } else {
701 Some(t.description.clone())
702 },
703 parameters: Some(t.parameters.clone()),
704 strict: None, })
706 .collect();
707 req = req.tools(response_tools);
708
709 req = req.tool_choice(openai_oxide::types::responses::ResponseToolChoice::Mode(
712 "required".into(),
713 ));
714 if !self.model.contains("anthropic/") && !self.model.starts_with("claude") {
717 req = req.parallel_tool_calls(true);
718 }
719
720 let response = self.send_request_auto(req).await?;
721
722 record_otel_usage(&response, &self.model, messages);
723 Self::check_truncation(&response)?;
724
725 let input_tokens = response
726 .usage
727 .as_ref()
728 .and_then(|u| u.input_tokens)
729 .unwrap_or(0);
730 let cached_tokens = response
731 .usage
732 .as_ref()
733 .and_then(|u| u.input_tokens_details.as_ref())
734 .and_then(|d| d.cached_tokens)
735 .unwrap_or(0);
736 let cache_pct = if input_tokens > 0 {
737 (cached_tokens * 100) / input_tokens
738 } else {
739 0
740 };
741
742 if cached_tokens > 0 {
743 eprintln!(
744 " 💰 {}in/{}out (cached: {}, {}%)",
745 input_tokens,
746 response
747 .usage
748 .as_ref()
749 .and_then(|u| u.output_tokens)
750 .unwrap_or(0),
751 cached_tokens,
752 cache_pct
753 );
754 } else {
755 eprintln!(
756 " 💰 {}in/{}out",
757 input_tokens,
758 response
759 .usage
760 .as_ref()
761 .and_then(|u| u.output_tokens)
762 .unwrap_or(0)
763 );
764 }
765
766 let calls = Self::extract_tool_calls(&response);
767 Ok(calls)
768 }
769
770 async fn tools_call_stateful(
771 &self,
772 messages: &[Message],
773 tools: &[ToolDef],
774 previous_response_id: Option<&str>,
775 ) -> Result<(Vec<ToolCall>, Option<String>), SgrError> {
776 self.tools_call_stateful_impl(messages, tools, previous_response_id)
777 .await
778 }
779
780 async fn tools_call_with_text(
783 &self,
784 messages: &[Message],
785 tools: &[ToolDef],
786 ) -> Result<(Vec<ToolCall>, String), SgrError> {
787 let mut req = self.build_request(messages, None, None);
788 req = req.store(true);
789
790 let response_tools: Vec<ResponseTool> = tools
791 .iter()
792 .map(|t| ResponseTool::Function {
793 name: t.name.clone(),
794 description: if t.description.is_empty() {
795 None
796 } else {
797 Some(t.description.clone())
798 },
799 parameters: Some(t.parameters.clone()),
800 strict: None,
801 })
802 .collect();
803 req = req.tools(response_tools);
804 req = req.tool_choice(openai_oxide::types::responses::ResponseToolChoice::Mode(
807 "auto".into(),
808 ));
809 req = req.parallel_tool_calls(true);
810
811 let response = self.send_request_auto(req).await?;
812
813 record_otel_usage(&response, &self.model, messages);
814 Self::check_truncation(&response)?;
815
816 let input_tokens = response
817 .usage
818 .as_ref()
819 .and_then(|u| u.input_tokens)
820 .unwrap_or(0);
821 let cached_tokens = response
822 .usage
823 .as_ref()
824 .and_then(|u| u.input_tokens_details.as_ref())
825 .and_then(|d| d.cached_tokens)
826 .unwrap_or(0);
827 let cache_pct = if input_tokens > 0 {
828 (cached_tokens * 100) / input_tokens
829 } else {
830 0
831 };
832 let output_tokens = response
833 .usage
834 .as_ref()
835 .and_then(|u| u.output_tokens)
836 .unwrap_or(0);
837 if cached_tokens > 0 {
838 eprintln!(
839 " 💰 {}in/{}out (cached: {}, {}%)",
840 input_tokens, output_tokens, cached_tokens, cache_pct
841 );
842 } else {
843 eprintln!(" 💰 {}in/{}out", input_tokens, output_tokens);
844 }
845
846 let text = response.output_text();
847 let calls = Self::extract_tool_calls(&response);
848 Ok((calls, text))
849 }
850
851 async fn complete(&self, messages: &[Message]) -> Result<String, SgrError> {
852 let mut req = self.build_request(messages, None, None);
853 req = req.store(true);
854
855 let response = self.send_request_auto(req).await?;
856
857 record_otel_usage(&response, &self.model, messages);
858 Self::check_truncation(&response)?;
859
860 let text = response.output_text();
861 if text.is_empty() {
862 return Err(SgrError::EmptyResponse);
863 }
864
865 tracing::info!(
866 model = %response.model,
867 response_id = %response.id,
868 input_tokens = response.usage.as_ref().and_then(|u| u.input_tokens).unwrap_or(0),
869 output_tokens = response.usage.as_ref().and_then(|u| u.output_tokens).unwrap_or(0),
870 "oxide.complete"
871 );
872
873 Ok(text)
874 }
875}
876
877#[cfg(test)]
878mod tests {
879 use super::*;
880 use crate::types::ImagePart;
881
882 #[test]
883 fn oxide_client_from_config() {
884 let config = LlmConfig::with_key("sk-test", "gpt-5.4");
886 let client = OxideClient::from_config(&config).unwrap();
887 assert_eq!(client.model, "gpt-5.4");
888 }
889
890 #[test]
891 fn build_request_simple() {
892 let config = LlmConfig::with_key("sk-test", "gpt-4o-mini").temperature(0.5);
893 let client = OxideClient::from_config(&config).unwrap();
894 let messages = vec![Message::system("Be helpful."), Message::user("Hello")];
895 let req = client.build_request(&messages, None, None);
896 assert_eq!(req.model, "gpt-4o-mini");
897 assert!(req.instructions.is_none());
898 assert!(req.input.is_some());
899 assert_eq!(req.temperature, Some(0.5));
900 }
901
902 #[test]
903 fn temperature_skipped_for_reasoning_models() {
904 for model in [
906 "gpt-5",
907 "gpt-5-mini",
908 "gpt-5.4",
909 "gpt-5.5",
910 "o1-mini",
911 "o3",
912 "o4-mini",
913 ] {
914 let config = LlmConfig::with_key("sk-test", model).temperature(0.7);
915 let client = OxideClient::from_config(&config).unwrap();
916 let req = client.build_request(&[Message::user("Hi")], None, None);
917 assert_eq!(
918 req.temperature, None,
919 "temperature must be omitted for reasoning model `{model}`"
920 );
921 }
922 }
923
924 #[test]
925 fn model_supports_temperature_classification() {
926 assert!(model_supports_temperature("gpt-4o"));
927 assert!(model_supports_temperature("gpt-4o-mini"));
928 assert!(model_supports_temperature("gpt-4.1"));
929 assert!(model_supports_temperature("anthropic/claude-sonnet-4.6"));
930 assert!(!model_supports_temperature("gpt-5"));
931 assert!(!model_supports_temperature("gpt-5-mini"));
932 assert!(!model_supports_temperature("gpt-5.4"));
933 assert!(!model_supports_temperature("gpt-5.5"));
934 assert!(!model_supports_temperature("openai/gpt-5.5"));
935 assert!(!model_supports_temperature("o1-preview"));
936 assert!(!model_supports_temperature("o3-mini"));
937 assert!(!model_supports_temperature("o4-mini"));
938 }
939
940 #[test]
941 fn build_request_with_schema() {
942 let config = LlmConfig::with_key("sk-test", "gpt-5.4");
943 let client = OxideClient::from_config(&config).unwrap();
944 let schema = serde_json::json!({
945 "type": "object",
946 "properties": {"answer": {"type": "string"}},
947 "required": ["answer"]
948 });
949 let req = client.build_request(&[Message::user("Hi")], Some(&schema), None);
950 assert!(req.text.is_some());
951 }
952
953 #[test]
954 fn build_request_stateless_no_previous_response_id() {
955 let config = LlmConfig::with_key("sk-test", "gpt-5.4");
956 let client = OxideClient::from_config(&config).unwrap();
957
958 let req = client.build_request(&[Message::user("Hi")], None, None);
959 assert!(
960 req.previous_response_id.is_none(),
961 "build_request must be stateless when no explicit ID"
962 );
963 }
964
965 #[test]
966 fn build_request_explicit_chaining() {
967 let config = LlmConfig::with_key("sk-test", "gpt-5.4");
968 let client = OxideClient::from_config(&config).unwrap();
969
970 let req = client.build_request(&[Message::user("Hi")], None, Some("resp_xyz"));
972 assert_eq!(
973 req.previous_response_id.as_deref(),
974 Some("resp_xyz"),
975 "build_request should chain with explicit previous_response_id"
976 );
977 }
978
979 #[test]
980 fn build_request_tool_outputs_chaining() {
981 let config = LlmConfig::with_key("sk-test", "gpt-5.4");
982 let client = OxideClient::from_config(&config).unwrap();
983
984 let messages = vec![Message::tool("call_1", "result data")];
986 let req = client.build_request(&messages, None, Some("resp_123"));
987 assert_eq!(req.previous_response_id.as_deref(), Some("resp_123"));
988
989 let req = client.build_request(&messages, None, None);
991 assert!(
992 req.previous_response_id.is_none(),
993 "build_request must be stateless when no explicit ID"
994 );
995 }
996
997 #[test]
998 fn build_request_multimodal_user() {
999 let config = LlmConfig::with_key("sk-test", "gpt-5.4");
1000 let client = OxideClient::from_config(&config).unwrap();
1001 let img = ImagePart {
1002 data: "AAAA".into(),
1003 mime_type: "image/jpeg".into(),
1004 };
1005 let messages = vec![Message::user_with_images("Describe this", vec![img])];
1006 let req = client.build_request(&messages, None, None);
1007
1008 let input = req.input.as_ref().expect("input missing");
1010 let serialized = serde_json::to_value(input).unwrap();
1011 let s = serde_json::to_string(&serialized).unwrap();
1012 assert!(s.contains("input_text"), "missing input_text part: {s}");
1013 assert!(s.contains("input_image"), "missing input_image part: {s}");
1014 assert!(
1015 s.contains("data:image/jpeg;base64,AAAA"),
1016 "missing data URL: {s}"
1017 );
1018 }
1019
1020 #[test]
1021 fn build_request_items_multimodal_user() {
1022 let config = LlmConfig::with_key("sk-test", "gpt-5.4");
1023 let client = OxideClient::from_config(&config).unwrap();
1024 let img = ImagePart {
1025 data: "BBBB".into(),
1026 mime_type: "image/png".into(),
1027 };
1028 let messages = vec![Message::user_with_images("What's on screen?", vec![img])];
1029 let req = client.build_request(&messages, None, Some("resp_prev"));
1031
1032 let input = req.input.as_ref().expect("input missing");
1033 let s = serde_json::to_string(input).unwrap();
1034 assert!(
1035 s.contains("input_text"),
1036 "items path missing input_text: {s}"
1037 );
1038 assert!(
1039 s.contains("input_image"),
1040 "items path missing input_image: {s}"
1041 );
1042 assert!(
1043 s.contains("data:image/png;base64,BBBB"),
1044 "items path missing data URL: {s}"
1045 );
1046 }
1047}