1use crate::client::LlmClient;
7use crate::multimodal;
8use crate::tool::ToolDef;
9use crate::types::{LlmConfig, Message, Role, SgrError, ToolCall};
10use openai_oxide::OpenAI;
11use openai_oxide::config::ClientConfig;
12use openai_oxide::types::chat::*;
13use serde_json::Value;
14
15#[cfg(feature = "telemetry")]
18fn record_chat_otel(
19 model: &str,
20 messages: &[Message],
21 usage: Option<&openai_oxide::types::chat::Usage>,
22 tool_calls: &[ToolCall],
23 text_output: &str,
24) {
25 let (pt, ct, cached) = usage
26 .map(|u| {
27 let pt = u.prompt_tokens.unwrap_or(0);
28 let ct = u.completion_tokens.unwrap_or(0);
29 let cached = u
30 .prompt_tokens_details
31 .as_ref()
32 .and_then(|d| d.cached_tokens)
33 .unwrap_or(0);
34 (pt, ct, cached)
35 })
36 .unwrap_or((0, 0, 0));
37
38 let input = last_user_content(messages, 500);
39 let output = truncate_str(text_output, 500);
40 let tc: Vec<(String, String)> = tool_calls
41 .iter()
42 .map(|tc| (tc.name.clone(), tc.arguments.to_string()))
43 .collect();
44
45 crate::telemetry::record_llm_span(
46 "chat.completions.api",
47 model,
48 &input,
49 &output,
50 &tc,
51 &crate::telemetry::LlmUsage {
52 prompt_tokens: pt,
53 completion_tokens: ct,
54 cached_tokens: cached,
55 response_model: model.to_string(),
56 },
57 );
58}
59
60#[cfg(not(feature = "telemetry"))]
61fn record_chat_otel(
62 _model: &str,
63 _messages: &[Message],
64 _usage: Option<&openai_oxide::types::chat::Usage>,
65 _tool_calls: &[ToolCall],
66 _text: &str,
67) {
68}
69
70#[cfg(feature = "telemetry")]
71fn last_user_content(messages: &[Message], max_len: usize) -> String {
72 messages
73 .iter()
74 .rev()
75 .find(|m| matches!(m.role, Role::User | Role::Tool))
76 .map(|m| truncate_str(&m.content, max_len))
77 .unwrap_or_default()
78}
79
80#[cfg(feature = "telemetry")]
81fn truncate_str(s: &str, max_len: usize) -> String {
82 use crate::str_ext::StrExt;
83 let t = s.trunc(max_len);
84 if t.len() < s.len() {
85 format!("{t}...")
86 } else {
87 s.to_string()
88 }
89}
90
91pub struct OxideChatClient {
93 client: OpenAI,
94 pub(crate) model: String,
95 pub(crate) temperature: Option<f64>,
96 pub(crate) max_tokens: Option<u32>,
97 pub(crate) reasoning_effort: Option<openai_oxide::types::chat::ReasoningEffort>,
99 pub(crate) prompt_cache_key: Option<String>,
101 pub(crate) session_id: Option<String>,
103 cache_ttl: Option<String>,
105 pin_provider: Option<String>,
107}
108
109impl OxideChatClient {
110 pub fn from_config(config: &LlmConfig) -> Result<Self, SgrError> {
112 let api_key = config
113 .api_key
114 .clone()
115 .or_else(|| std::env::var("OPENAI_API_KEY").ok())
116 .unwrap_or_else(|| {
117 if config.base_url.is_some() {
118 "dummy_key".into()
119 } else {
120 "".into()
121 }
122 });
123
124 if api_key.is_empty() {
125 return Err(SgrError::Schema("No API key for oxide chat client".into()));
126 }
127
128 let mut client_config = ClientConfig::new(&api_key);
129 if let Some(ref url) = config.base_url {
130 client_config = client_config.base_url(url.clone());
131 }
132 config.apply_headers(&mut client_config);
133
134 let reasoning_effort = config.reasoning_effort.as_deref().and_then(|s| match s {
135 "none" => Some(openai_oxide::types::chat::ReasoningEffort::None),
136 "low" => Some(openai_oxide::types::chat::ReasoningEffort::Low),
137 "medium" => Some(openai_oxide::types::chat::ReasoningEffort::Medium),
138 "high" => Some(openai_oxide::types::chat::ReasoningEffort::High),
139 _ => None,
140 });
141
142 Ok(Self {
143 client: OpenAI::with_config(client_config),
144 model: config.model.clone(),
145 temperature: Some(config.temp),
146 max_tokens: config.max_tokens,
147 reasoning_effort,
148 prompt_cache_key: config.prompt_cache_key.clone(),
149 session_id: config.session_id.clone(),
150 cache_ttl: config.resolved_cache_ttl().map(String::from),
151 pin_provider: config.resolved_pin_provider().map(String::from),
152 })
153 }
154
155 fn build_messages(&self, messages: &[Message]) -> Vec<ChatCompletionMessageParam> {
156 let result: Vec<ChatCompletionMessageParam> = messages
157 .iter()
158 .map(|m| match m.role {
159 Role::System => ChatCompletionMessageParam::System {
160 content: m.content.clone(),
161 name: None,
162 },
163 Role::User => {
164 let content = if m.images.is_empty() {
168 UserContent::Text(m.content.clone())
169 } else {
170 UserContent::Parts(multimodal::chat_parts(&m.content, &m.images))
171 };
172 ChatCompletionMessageParam::User {
173 content,
174 name: None,
175 }
176 }
177 Role::Assistant => {
178 let tc = if m.tool_calls.is_empty() {
179 None
180 } else {
181 Some(
182 m.tool_calls
183 .iter()
184 .map(|tc| openai_oxide::types::chat::ToolCall {
185 id: tc.id.clone(),
186 type_: "function".into(),
187 function: openai_oxide::types::chat::FunctionCall {
188 name: tc.name.clone(),
189 arguments: tc.arguments.to_string(),
190 },
191 })
192 .collect(),
193 )
194 };
195 ChatCompletionMessageParam::Assistant {
196 content: if m.content.is_empty() {
197 None
198 } else {
199 Some(m.content.clone())
200 },
201 name: None,
202 tool_calls: tc,
203 refusal: None,
204 }
205 }
206 Role::Tool => ChatCompletionMessageParam::Tool {
207 content: m.content.clone(),
208 tool_call_id: m.tool_call_id.clone().unwrap_or_default(),
209 },
210 })
211 .collect();
212
213 result
214 }
215
216 fn build_request(&self, messages: &[Message]) -> ChatCompletionRequest {
217 self.build_request_with_reasoning(messages, self.reasoning_effort.as_ref())
218 }
219
220 fn build_request_no_reasoning(&self, messages: &[Message]) -> ChatCompletionRequest {
221 if self.reasoning_effort.is_some() {
223 self.build_request_with_reasoning(
224 messages,
225 Some(&openai_oxide::types::chat::ReasoningEffort::None),
226 )
227 } else {
228 self.build_request_with_reasoning(messages, None)
229 }
230 }
231
232 fn build_request_with_reasoning(
233 &self,
234 messages: &[Message],
235 reasoning: Option<&openai_oxide::types::chat::ReasoningEffort>,
236 ) -> ChatCompletionRequest {
237 let mut req = ChatCompletionRequest::new(&self.model, self.build_messages(messages));
238 if let Some(temp) = self.temperature {
239 req.temperature = Some(temp);
240 }
241 if let Some(max) = self.max_tokens {
242 if self.model.starts_with("gpt-5") || self.model.starts_with("o") {
243 req = req.max_completion_tokens(max as i64);
244 } else {
245 req.max_tokens = Some(max as i64);
246 }
247 }
248 if let Some(effort) = reasoning {
249 req.reasoning_effort = Some(effort.clone());
250 }
251 if let Some(ref key) = self.prompt_cache_key {
252 req.prompt_cache_key = Some(key.clone());
253 }
254 if let Some(ref sid) = self.session_id
258 && (self.cache_ttl.is_some() || self.pin_provider.is_some())
259 {
260 req.session_id = Some(sid.clone());
261 }
262 if let Some(ref ttl) = self.cache_ttl {
264 req.cache_control = Some(serde_json::json!({"type": "ephemeral", "ttl": ttl}));
265 }
266 if let Some(ref provider) = self.pin_provider
267 && let Ok(prefs) =
268 openai_oxide::openrouter::ProviderPreferences::pinned(provider).to_value()
269 {
270 req.provider = Some(prefs);
271 }
272 req
273 }
274
275 fn extract_tool_calls(response: &ChatCompletionResponse) -> Vec<ToolCall> {
276 let Some(choice) = response.choices.first() else {
277 return Vec::new();
278 };
279 let Some(ref calls) = choice.message.tool_calls else {
280 return Vec::new();
281 };
282 calls
283 .iter()
284 .map(|tc| ToolCall {
285 id: tc.id.clone(),
286 name: tc.function.name.clone(),
287 arguments: crate::str_ext::parse_tool_args(&tc.function.arguments),
288 })
289 .collect()
290 }
291}
292
293#[async_trait::async_trait]
294impl LlmClient for OxideChatClient {
295 async fn structured_call(
296 &self,
297 messages: &[Message],
298 schema: &Value,
299 ) -> Result<(Option<Value>, Vec<ToolCall>, String), SgrError> {
300 let strict_schema =
302 if schema.get("additionalProperties").and_then(|v| v.as_bool()) == Some(false) {
303 schema.clone()
304 } else {
305 let mut s = schema.clone();
306 openai_oxide::parsing::ensure_strict(&mut s);
307 s
308 };
309
310 let mut req = self.build_request(messages);
311 req.response_format = Some(ResponseFormat::JsonSchema {
312 json_schema: JsonSchema {
313 name: "response".into(),
314 description: None,
315 schema: Some(strict_schema),
316 strict: Some(true),
317 },
318 });
319
320 let response = self
321 .client
322 .chat()
323 .completions()
324 .create(req)
325 .await
326 .map_err(|e| SgrError::Api {
327 status: 0,
328 body: e.to_string(),
329 })?;
330
331 let raw_text = response
332 .choices
333 .first()
334 .and_then(|c| c.message.content.clone())
335 .unwrap_or_default();
336 let tool_calls = Self::extract_tool_calls(&response);
337 let parsed = serde_json::from_str::<Value>(&raw_text).ok();
338
339 if let Some(ref usage) = response.usage {
340 let input = usage.prompt_tokens.unwrap_or(0);
341 let cached = usage
342 .prompt_tokens_details
343 .as_ref()
344 .and_then(|d| d.cached_tokens)
345 .unwrap_or(0);
346 let output = usage.completion_tokens.unwrap_or(0);
347 if cached > 0 {
348 let pct = if input > 0 { cached * 100 / input } else { 0 };
349 eprintln!(
350 " 💰 {}in/{}out (cached: {}, {}%)",
351 input, output, cached, pct
352 );
353 } else {
354 eprintln!(" 💰 {}in/{}out", input, output);
355 }
356 }
357
358 record_chat_otel(
359 &self.model,
360 messages,
361 response.usage.as_ref(),
362 &tool_calls,
363 &raw_text,
364 );
365 Ok((parsed, tool_calls, raw_text))
366 }
367
368 async fn tools_call(
369 &self,
370 messages: &[Message],
371 tools: &[ToolDef],
372 ) -> Result<Vec<ToolCall>, SgrError> {
373 let mut req = self.build_request_no_reasoning(messages);
375
376 let chat_tools: Vec<Tool> = tools
380 .iter()
381 .map(|t| {
382 let mut params = t.parameters.clone();
383 openai_oxide::parsing::ensure_strict(&mut params);
384 Tool::function(
385 &t.name,
386 if t.description.is_empty() {
387 "No description"
388 } else {
389 &t.description
390 },
391 params,
392 )
393 })
394 .collect();
395 req.tools = Some(chat_tools);
396 req.tool_choice = Some(openai_oxide::types::chat::ToolChoice::Mode(
397 "required".into(),
398 ));
399 if !self.model.contains("anthropic/") {
402 req.parallel_tool_calls = Some(true);
403 }
404
405 let response = self
406 .client
407 .chat()
408 .completions()
409 .create(req)
410 .await
411 .map_err(|e| SgrError::Api {
412 status: 0,
413 body: e.to_string(),
414 })?;
415
416 if let Some(ref usage) = response.usage {
417 let input = usage.prompt_tokens.unwrap_or(0);
418 let cached = usage
419 .prompt_tokens_details
420 .as_ref()
421 .and_then(|d| d.cached_tokens)
422 .unwrap_or(0);
423 let output = usage.completion_tokens.unwrap_or(0);
424 if cached > 0 {
425 let pct = if input > 0 { cached * 100 / input } else { 0 };
426 eprintln!(
427 " 💰 {}in/{}out (cached: {}, {}%)",
428 input, output, cached, pct
429 );
430 } else {
431 eprintln!(" 💰 {}in/{}out", input, output);
432 }
433 }
434
435 let calls = Self::extract_tool_calls(&response);
436 record_chat_otel(&self.model, messages, response.usage.as_ref(), &calls, "");
437 Ok(calls)
439 }
440
441 async fn tools_call_with_text(
444 &self,
445 messages: &[Message],
446 tools: &[ToolDef],
447 ) -> Result<(Vec<ToolCall>, String), SgrError> {
448 let mut req = self.build_request_no_reasoning(messages);
449
450 let chat_tools: Vec<Tool> = tools
451 .iter()
452 .map(|t| {
453 let mut params = t.parameters.clone();
454 openai_oxide::parsing::ensure_strict(&mut params);
455 Tool::function(
456 &t.name,
457 if t.description.is_empty() {
458 "No description"
459 } else {
460 &t.description
461 },
462 params,
463 )
464 })
465 .collect();
466 req.tools = Some(chat_tools);
467 req.tool_choice = Some(openai_oxide::types::chat::ToolChoice::Mode("auto".into()));
469
470 let response = self
471 .client
472 .chat()
473 .completions()
474 .create(req)
475 .await
476 .map_err(|e| SgrError::Api {
477 status: 0,
478 body: e.to_string(),
479 })?;
480
481 if let Some(ref usage) = response.usage {
482 let input = usage.prompt_tokens.unwrap_or(0);
483 let cached = usage
484 .prompt_tokens_details
485 .as_ref()
486 .and_then(|d| d.cached_tokens)
487 .unwrap_or(0);
488 let output = usage.completion_tokens.unwrap_or(0);
489 if cached > 0 {
490 let pct = if input > 0 { cached * 100 / input } else { 0 };
491 eprintln!(
492 " 💰 {}in/{}out (cached: {}, {}%)",
493 input, output, cached, pct
494 );
495 } else {
496 eprintln!(" 💰 {}in/{}out", input, output);
497 }
498 }
499
500 let text = response
501 .choices
502 .first()
503 .and_then(|c| c.message.content.clone())
504 .unwrap_or_default();
505 let calls = Self::extract_tool_calls(&response);
506 record_chat_otel(
507 &self.model,
508 messages,
509 response.usage.as_ref(),
510 &calls,
511 &text,
512 );
513 Ok((calls, text))
514 }
515
516 async fn complete(&self, messages: &[Message]) -> Result<String, SgrError> {
517 let req = self.build_request(messages);
518
519 let response = self
520 .client
521 .chat()
522 .completions()
523 .create(req)
524 .await
525 .map_err(|e| SgrError::Api {
526 status: 0,
527 body: e.to_string(),
528 })?;
529
530 tracing::info!(model = %response.model, "oxide_chat.complete");
531
532 let text = response
533 .choices
534 .first()
535 .and_then(|c| c.message.content.clone())
536 .unwrap_or_default();
537 record_chat_otel(&self.model, messages, response.usage.as_ref(), &[], &text);
538 Ok(text)
539 }
540}