1use crate::client::LlmClient;
7use crate::tool::ToolDef;
8use crate::types::{LlmConfig, Message, Role, SgrError, ToolCall};
9use openai_oxide::OpenAI;
10use openai_oxide::config::ClientConfig;
11use openai_oxide::types::chat::*;
12use serde_json::Value;
13
14#[cfg(feature = "telemetry")]
17fn record_chat_otel(
18 model: &str,
19 messages: &[Message],
20 usage: Option<&openai_oxide::types::chat::Usage>,
21 tool_calls: &[ToolCall],
22 text_output: &str,
23) {
24 let (pt, ct, cached) = usage
25 .map(|u| {
26 let pt = u.prompt_tokens.unwrap_or(0);
27 let ct = u.completion_tokens.unwrap_or(0);
28 let cached = u
29 .prompt_tokens_details
30 .as_ref()
31 .and_then(|d| d.cached_tokens)
32 .unwrap_or(0);
33 (pt, ct, cached)
34 })
35 .unwrap_or((0, 0, 0));
36
37 let input = last_user_content(messages, 500);
38 let output = truncate_str(text_output, 500);
39 let tc: Vec<(String, String)> = tool_calls
40 .iter()
41 .map(|tc| (tc.name.clone(), tc.arguments.to_string()))
42 .collect();
43
44 crate::telemetry::record_llm_span(
45 "chat.completions.api",
46 model,
47 &input,
48 &output,
49 &tc,
50 &crate::telemetry::LlmUsage {
51 prompt_tokens: pt,
52 completion_tokens: ct,
53 cached_tokens: cached,
54 response_model: model.to_string(),
55 },
56 );
57}
58
59#[cfg(not(feature = "telemetry"))]
60fn record_chat_otel(
61 _model: &str,
62 _messages: &[Message],
63 _usage: Option<&openai_oxide::types::chat::Usage>,
64 _tool_calls: &[ToolCall],
65 _text: &str,
66) {
67}
68
69#[cfg(feature = "telemetry")]
70fn last_user_content(messages: &[Message], max_len: usize) -> String {
71 messages
72 .iter()
73 .rev()
74 .find(|m| matches!(m.role, Role::User | Role::Tool))
75 .map(|m| truncate_str(&m.content, max_len))
76 .unwrap_or_default()
77}
78
79#[cfg(feature = "telemetry")]
80fn truncate_str(s: &str, max_len: usize) -> String {
81 if s.len() > max_len {
82 format!("{}...", &s[..max_len])
83 } else {
84 s.to_string()
85 }
86}
87
88pub struct OxideChatClient {
90 client: OpenAI,
91 pub(crate) model: String,
92 pub(crate) temperature: Option<f64>,
93 pub(crate) max_tokens: Option<u32>,
94 pub(crate) reasoning_effort: Option<openai_oxide::types::chat::ReasoningEffort>,
96 pub(crate) prompt_cache_key: Option<String>,
98}
99
100impl OxideChatClient {
101 pub fn from_config(config: &LlmConfig) -> Result<Self, SgrError> {
103 let api_key = config
104 .api_key
105 .clone()
106 .or_else(|| std::env::var("OPENAI_API_KEY").ok())
107 .unwrap_or_else(|| {
108 if config.base_url.is_some() {
109 "dummy_key".into()
110 } else {
111 "".into()
112 }
113 });
114
115 if api_key.is_empty() {
116 return Err(SgrError::Schema("No API key for oxide chat client".into()));
117 }
118
119 let mut client_config = ClientConfig::new(&api_key);
120 if let Some(ref url) = config.base_url {
121 client_config = client_config.base_url(url.clone());
122 }
123 config.apply_headers(&mut client_config);
124
125 let reasoning_effort = config.reasoning_effort.as_deref().and_then(|s| match s {
126 "none" => Some(openai_oxide::types::chat::ReasoningEffort::None),
127 "low" => Some(openai_oxide::types::chat::ReasoningEffort::Low),
128 "medium" => Some(openai_oxide::types::chat::ReasoningEffort::Medium),
129 "high" => Some(openai_oxide::types::chat::ReasoningEffort::High),
130 _ => None,
131 });
132
133 Ok(Self {
134 client: OpenAI::with_config(client_config),
135 model: config.model.clone(),
136 temperature: Some(config.temp),
137 max_tokens: config.max_tokens,
138 reasoning_effort,
139 prompt_cache_key: config.prompt_cache_key.clone(),
140 })
141 }
142
143 fn build_messages(&self, messages: &[Message]) -> Vec<ChatCompletionMessageParam> {
144 messages
145 .iter()
146 .map(|m| match m.role {
147 Role::System => ChatCompletionMessageParam::System {
148 content: m.content.clone(),
149 name: None,
150 },
151 Role::User => ChatCompletionMessageParam::User {
152 content: UserContent::Text(m.content.clone()),
153 name: None,
154 },
155 Role::Assistant => {
156 let tc = if m.tool_calls.is_empty() {
157 None
158 } else {
159 Some(
160 m.tool_calls
161 .iter()
162 .map(|tc| openai_oxide::types::chat::ToolCall {
163 id: tc.id.clone(),
164 type_: "function".into(),
165 function: openai_oxide::types::chat::FunctionCall {
166 name: tc.name.clone(),
167 arguments: tc.arguments.to_string(),
168 },
169 })
170 .collect(),
171 )
172 };
173 ChatCompletionMessageParam::Assistant {
174 content: if m.content.is_empty() {
175 None
176 } else {
177 Some(m.content.clone())
178 },
179 name: None,
180 tool_calls: tc,
181 refusal: None,
182 }
183 }
184 Role::Tool => ChatCompletionMessageParam::Tool {
185 content: m.content.clone(),
186 tool_call_id: m.tool_call_id.clone().unwrap_or_default(),
187 },
188 })
189 .collect()
190 }
191
192 fn build_request(&self, messages: &[Message]) -> ChatCompletionRequest {
193 self.build_request_with_reasoning(messages, self.reasoning_effort.as_ref())
194 }
195
196 fn build_request_no_reasoning(&self, messages: &[Message]) -> ChatCompletionRequest {
197 if self.reasoning_effort.is_some() {
199 self.build_request_with_reasoning(
200 messages,
201 Some(&openai_oxide::types::chat::ReasoningEffort::None),
202 )
203 } else {
204 self.build_request_with_reasoning(messages, None)
205 }
206 }
207
208 fn build_request_with_reasoning(
209 &self,
210 messages: &[Message],
211 reasoning: Option<&openai_oxide::types::chat::ReasoningEffort>,
212 ) -> ChatCompletionRequest {
213 let mut req = ChatCompletionRequest::new(&self.model, self.build_messages(messages));
214 if let Some(temp) = self.temperature {
215 req.temperature = Some(temp);
216 }
217 if let Some(max) = self.max_tokens {
218 if self.model.starts_with("gpt-5") || self.model.starts_with("o") {
219 req = req.max_completion_tokens(max as i64);
220 } else {
221 req.max_tokens = Some(max as i64);
222 }
223 }
224 if let Some(effort) = reasoning {
225 req.reasoning_effort = Some(effort.clone());
226 }
227 if let Some(ref key) = self.prompt_cache_key {
228 req.prompt_cache_key = Some(key.clone());
229 }
230 req
231 }
232
233 fn extract_tool_calls(response: &ChatCompletionResponse) -> Vec<ToolCall> {
234 let Some(choice) = response.choices.first() else {
235 return Vec::new();
236 };
237 let Some(ref calls) = choice.message.tool_calls else {
238 return Vec::new();
239 };
240 calls
241 .iter()
242 .map(|tc| ToolCall {
243 id: tc.id.clone(),
244 name: tc.function.name.clone(),
245 arguments: serde_json::from_str(&tc.function.arguments).unwrap_or(Value::Null),
246 })
247 .collect()
248 }
249}
250
251#[async_trait::async_trait]
252impl LlmClient for OxideChatClient {
253 async fn structured_call(
254 &self,
255 messages: &[Message],
256 schema: &Value,
257 ) -> Result<(Option<Value>, Vec<ToolCall>, String), SgrError> {
258 let strict_schema =
260 if schema.get("additionalProperties").and_then(|v| v.as_bool()) == Some(false) {
261 schema.clone()
262 } else {
263 let mut s = schema.clone();
264 openai_oxide::parsing::ensure_strict(&mut s);
265 s
266 };
267
268 let mut req = self.build_request(messages);
269 req.response_format = Some(ResponseFormat::JsonSchema {
270 json_schema: JsonSchema {
271 name: "response".into(),
272 description: None,
273 schema: Some(strict_schema),
274 strict: Some(true),
275 },
276 });
277
278 let response = self
279 .client
280 .chat()
281 .completions()
282 .create(req)
283 .await
284 .map_err(|e| SgrError::Api {
285 status: 0,
286 body: e.to_string(),
287 })?;
288
289 let raw_text = response
290 .choices
291 .first()
292 .and_then(|c| c.message.content.clone())
293 .unwrap_or_default();
294 let tool_calls = Self::extract_tool_calls(&response);
295 let parsed = serde_json::from_str::<Value>(&raw_text).ok();
296
297 if let Some(ref usage) = response.usage {
298 let input = usage.prompt_tokens.unwrap_or(0);
299 let cached = usage
300 .prompt_tokens_details
301 .as_ref()
302 .and_then(|d| d.cached_tokens)
303 .unwrap_or(0);
304 let output = usage.completion_tokens.unwrap_or(0);
305 if cached > 0 {
306 let pct = if input > 0 { cached * 100 / input } else { 0 };
307 eprintln!(
308 " 💰 {}in/{}out (cached: {}, {}%)",
309 input, output, cached, pct
310 );
311 } else {
312 eprintln!(" 💰 {}in/{}out", input, output);
313 }
314 }
315
316 record_chat_otel(
317 &self.model,
318 messages,
319 response.usage.as_ref(),
320 &tool_calls,
321 &raw_text,
322 );
323 Ok((parsed, tool_calls, raw_text))
324 }
325
326 async fn tools_call(
327 &self,
328 messages: &[Message],
329 tools: &[ToolDef],
330 ) -> Result<Vec<ToolCall>, SgrError> {
331 let mut req = self.build_request_no_reasoning(messages);
333
334 let chat_tools: Vec<Tool> = tools
335 .iter()
336 .map(|t| {
337 Tool::function(
338 &t.name,
339 if t.description.is_empty() {
340 "No description"
341 } else {
342 &t.description
343 },
344 t.parameters.clone(),
345 )
346 })
347 .collect();
348 req.tools = Some(chat_tools);
349 req.tool_choice = Some(openai_oxide::types::chat::ToolChoice::Mode(
350 "required".into(),
351 ));
352
353 let response = self
354 .client
355 .chat()
356 .completions()
357 .create(req)
358 .await
359 .map_err(|e| SgrError::Api {
360 status: 0,
361 body: e.to_string(),
362 })?;
363
364 if let Some(ref usage) = response.usage {
365 let input = usage.prompt_tokens.unwrap_or(0);
366 let cached = usage
367 .prompt_tokens_details
368 .as_ref()
369 .and_then(|d| d.cached_tokens)
370 .unwrap_or(0);
371 let output = usage.completion_tokens.unwrap_or(0);
372 if cached > 0 {
373 let pct = if input > 0 { cached * 100 / input } else { 0 };
374 eprintln!(
375 " 💰 {}in/{}out (cached: {}, {}%)",
376 input, output, cached, pct
377 );
378 } else {
379 eprintln!(" 💰 {}in/{}out", input, output);
380 }
381 }
382
383 let calls = Self::extract_tool_calls(&response);
384 record_chat_otel(&self.model, messages, response.usage.as_ref(), &calls, "");
385 Ok(calls)
387 }
388
389 async fn complete(&self, messages: &[Message]) -> Result<String, SgrError> {
390 let req = self.build_request(messages);
391
392 let response = self
393 .client
394 .chat()
395 .completions()
396 .create(req)
397 .await
398 .map_err(|e| SgrError::Api {
399 status: 0,
400 body: e.to_string(),
401 })?;
402
403 tracing::info!(model = %response.model, "oxide_chat.complete");
404
405 let text = response
406 .choices
407 .first()
408 .and_then(|c| c.message.content.clone())
409 .unwrap_or_default();
410 record_chat_otel(&self.model, messages, response.usage.as_ref(), &[], &text);
411 Ok(text)
412 }
413}