1use crate::client::LlmClient;
7use crate::tool::ToolDef;
8use crate::types::{LlmConfig, Message, Role, SgrError, ToolCall};
9use openai_oxide::OpenAI;
10use openai_oxide::config::ClientConfig;
11use openai_oxide::types::chat::*;
12use serde_json::Value;
13
14#[cfg(feature = "telemetry")]
17fn record_chat_otel(model: &str, messages: &[Message], usage: Option<&openai_oxide::types::chat::Usage>, tool_calls: &[ToolCall], text_output: &str) {
18 let (pt, ct, cached) = usage.map(|u| {
19 let pt = u.prompt_tokens.unwrap_or(0) as i64;
20 let ct = u.completion_tokens.unwrap_or(0) as i64;
21 let cached = u.prompt_tokens_details.as_ref().and_then(|d| d.cached_tokens).unwrap_or(0) as i64;
22 (pt, ct, cached)
23 }).unwrap_or((0, 0, 0));
24
25 let input = last_user_content(messages, 500);
26 let output = truncate_str(text_output, 500);
27 let tc: Vec<(String, String)> = tool_calls.iter()
28 .map(|tc| (tc.name.clone(), tc.arguments.to_string()))
29 .collect();
30
31 crate::telemetry::record_llm_span(
32 "chat.completions.api",
33 model,
34 &input,
35 &output,
36 &tc,
37 &crate::telemetry::LlmUsage { prompt_tokens: pt, completion_tokens: ct, cached_tokens: cached, response_model: model.to_string() },
38 );
39}
40
41#[cfg(not(feature = "telemetry"))]
42fn record_chat_otel(_model: &str, _messages: &[Message], _usage: Option<&openai_oxide::types::chat::Usage>, _tool_calls: &[ToolCall], _text: &str) {}
43
44#[cfg(feature = "telemetry")]
45fn last_user_content(messages: &[Message], max_len: usize) -> String {
46 messages.iter().rev()
47 .find(|m| matches!(m.role, Role::User | Role::Tool))
48 .map(|m| truncate_str(&m.content, max_len))
49 .unwrap_or_default()
50}
51
52#[cfg(feature = "telemetry")]
53fn truncate_str(s: &str, max_len: usize) -> String {
54 if s.len() > max_len { format!("{}...", &s[..max_len]) } else { s.to_string() }
55}
56
57pub struct OxideChatClient {
59 client: OpenAI,
60 pub(crate) model: String,
61 pub(crate) temperature: Option<f64>,
62 pub(crate) max_tokens: Option<u32>,
63 pub(crate) reasoning_effort: Option<openai_oxide::types::chat::ReasoningEffort>,
65 pub(crate) prompt_cache_key: Option<String>,
67}
68
69impl OxideChatClient {
70 pub fn from_config(config: &LlmConfig) -> Result<Self, SgrError> {
72 let api_key = config
73 .api_key
74 .clone()
75 .or_else(|| std::env::var("OPENAI_API_KEY").ok())
76 .unwrap_or_else(|| {
77 if config.base_url.is_some() {
78 "dummy_key".into()
79 } else {
80 "".into()
81 }
82 });
83
84 if api_key.is_empty() {
85 return Err(SgrError::Schema("No API key for oxide chat client".into()));
86 }
87
88 let mut client_config = ClientConfig::new(&api_key);
89 if let Some(ref url) = config.base_url {
90 client_config = client_config.base_url(url.clone());
91 }
92 config.apply_headers(&mut client_config);
93
94 let reasoning_effort = config.reasoning_effort.as_deref().and_then(|s| match s {
95 "none" => Some(openai_oxide::types::chat::ReasoningEffort::None),
96 "low" => Some(openai_oxide::types::chat::ReasoningEffort::Low),
97 "medium" => Some(openai_oxide::types::chat::ReasoningEffort::Medium),
98 "high" => Some(openai_oxide::types::chat::ReasoningEffort::High),
99 _ => None,
100 });
101
102 Ok(Self {
103 client: OpenAI::with_config(client_config),
104 model: config.model.clone(),
105 temperature: Some(config.temp),
106 max_tokens: config.max_tokens,
107 reasoning_effort,
108 prompt_cache_key: config.prompt_cache_key.clone(),
109 })
110 }
111
112 fn build_messages(&self, messages: &[Message]) -> Vec<ChatCompletionMessageParam> {
113 messages
114 .iter()
115 .map(|m| match m.role {
116 Role::System => ChatCompletionMessageParam::System {
117 content: m.content.clone(),
118 name: None,
119 },
120 Role::User => ChatCompletionMessageParam::User {
121 content: UserContent::Text(m.content.clone()),
122 name: None,
123 },
124 Role::Assistant => {
125 let tc = if m.tool_calls.is_empty() {
126 None
127 } else {
128 Some(
129 m.tool_calls
130 .iter()
131 .map(|tc| openai_oxide::types::chat::ToolCall {
132 id: tc.id.clone(),
133 type_: "function".into(),
134 function: openai_oxide::types::chat::FunctionCall {
135 name: tc.name.clone(),
136 arguments: tc.arguments.to_string(),
137 },
138 })
139 .collect(),
140 )
141 };
142 ChatCompletionMessageParam::Assistant {
143 content: if m.content.is_empty() {
144 None
145 } else {
146 Some(m.content.clone())
147 },
148 name: None,
149 tool_calls: tc,
150 refusal: None,
151 }
152 }
153 Role::Tool => ChatCompletionMessageParam::Tool {
154 content: m.content.clone(),
155 tool_call_id: m.tool_call_id.clone().unwrap_or_default(),
156 },
157 })
158 .collect()
159 }
160
161 fn build_request(&self, messages: &[Message]) -> ChatCompletionRequest {
162 self.build_request_with_reasoning(messages, self.reasoning_effort.as_ref())
163 }
164
165 fn build_request_no_reasoning(&self, messages: &[Message]) -> ChatCompletionRequest {
166 if self.reasoning_effort.is_some() {
168 self.build_request_with_reasoning(
169 messages,
170 Some(&openai_oxide::types::chat::ReasoningEffort::None),
171 )
172 } else {
173 self.build_request_with_reasoning(messages, None)
174 }
175 }
176
177 fn build_request_with_reasoning(
178 &self,
179 messages: &[Message],
180 reasoning: Option<&openai_oxide::types::chat::ReasoningEffort>,
181 ) -> ChatCompletionRequest {
182 let mut req = ChatCompletionRequest::new(&self.model, self.build_messages(messages));
183 if let Some(temp) = self.temperature {
184 req.temperature = Some(temp);
185 }
186 if let Some(max) = self.max_tokens {
187 if self.model.starts_with("gpt-5") || self.model.starts_with("o") {
188 req = req.max_completion_tokens(max as i64);
189 } else {
190 req.max_tokens = Some(max as i64);
191 }
192 }
193 if let Some(effort) = reasoning {
194 req.reasoning_effort = Some(effort.clone());
195 }
196 if let Some(ref key) = self.prompt_cache_key {
197 req.prompt_cache_key = Some(key.clone());
198 }
199 req
200 }
201
202 fn extract_tool_calls(response: &ChatCompletionResponse) -> Vec<ToolCall> {
203 let Some(choice) = response.choices.first() else {
204 return Vec::new();
205 };
206 let Some(ref calls) = choice.message.tool_calls else {
207 return Vec::new();
208 };
209 calls
210 .iter()
211 .map(|tc| ToolCall {
212 id: tc.id.clone(),
213 name: tc.function.name.clone(),
214 arguments: serde_json::from_str(&tc.function.arguments).unwrap_or(Value::Null),
215 })
216 .collect()
217 }
218}
219
220#[async_trait::async_trait]
221impl LlmClient for OxideChatClient {
222 async fn structured_call(
223 &self,
224 messages: &[Message],
225 schema: &Value,
226 ) -> Result<(Option<Value>, Vec<ToolCall>, String), SgrError> {
227 let strict_schema =
229 if schema.get("additionalProperties").and_then(|v| v.as_bool()) == Some(false) {
230 schema.clone()
231 } else {
232 let mut s = schema.clone();
233 openai_oxide::parsing::ensure_strict(&mut s);
234 s
235 };
236
237 let mut req = self.build_request(messages);
238 req.response_format = Some(ResponseFormat::JsonSchema {
239 json_schema: JsonSchema {
240 name: "response".into(),
241 description: None,
242 schema: Some(strict_schema),
243 strict: Some(true),
244 },
245 });
246
247 let response = self
248 .client
249 .chat()
250 .completions()
251 .create(req)
252 .await
253 .map_err(|e| SgrError::Api {
254 status: 0,
255 body: e.to_string(),
256 })?;
257
258 let raw_text = response
259 .choices
260 .first()
261 .and_then(|c| c.message.content.clone())
262 .unwrap_or_default();
263 let tool_calls = Self::extract_tool_calls(&response);
264 let parsed = serde_json::from_str::<Value>(&raw_text).ok();
265
266 if let Some(ref usage) = response.usage {
267 let input = usage.prompt_tokens.unwrap_or(0);
268 let cached = usage
269 .prompt_tokens_details
270 .as_ref()
271 .and_then(|d| d.cached_tokens)
272 .unwrap_or(0);
273 let output = usage.completion_tokens.unwrap_or(0);
274 if cached > 0 {
275 let pct = if input > 0 { cached * 100 / input } else { 0 };
276 eprintln!(
277 " 💰 {}in/{}out (cached: {}, {}%)",
278 input, output, cached, pct
279 );
280 } else {
281 eprintln!(" 💰 {}in/{}out", input, output);
282 }
283 }
284
285 record_chat_otel(&self.model, messages, response.usage.as_ref(), &tool_calls, &raw_text);
286 Ok((parsed, tool_calls, raw_text))
287 }
288
289 async fn tools_call(
290 &self,
291 messages: &[Message],
292 tools: &[ToolDef],
293 ) -> Result<Vec<ToolCall>, SgrError> {
294 let mut req = self.build_request_no_reasoning(messages);
296
297 let chat_tools: Vec<Tool> = tools
298 .iter()
299 .map(|t| {
300 Tool::function(
301 &t.name,
302 if t.description.is_empty() {
303 "No description"
304 } else {
305 &t.description
306 },
307 t.parameters.clone(),
308 )
309 })
310 .collect();
311 req.tools = Some(chat_tools);
312 req.tool_choice = Some(openai_oxide::types::chat::ToolChoice::Mode(
313 "required".into(),
314 ));
315
316 let response = self
317 .client
318 .chat()
319 .completions()
320 .create(req)
321 .await
322 .map_err(|e| SgrError::Api {
323 status: 0,
324 body: e.to_string(),
325 })?;
326
327 if let Some(ref usage) = response.usage {
328 let input = usage.prompt_tokens.unwrap_or(0);
329 let cached = usage
330 .prompt_tokens_details
331 .as_ref()
332 .and_then(|d| d.cached_tokens)
333 .unwrap_or(0);
334 let output = usage.completion_tokens.unwrap_or(0);
335 if cached > 0 {
336 let pct = if input > 0 { cached * 100 / input } else { 0 };
337 eprintln!(
338 " 💰 {}in/{}out (cached: {}, {}%)",
339 input, output, cached, pct
340 );
341 } else {
342 eprintln!(" 💰 {}in/{}out", input, output);
343 }
344 }
345
346 let calls = Self::extract_tool_calls(&response);
347 record_chat_otel(&self.model, messages, response.usage.as_ref(), &calls, "");
348 Ok(calls)
350 }
351
352 async fn complete(&self, messages: &[Message]) -> Result<String, SgrError> {
353 let req = self.build_request(messages);
354
355 let response = self
356 .client
357 .chat()
358 .completions()
359 .create(req)
360 .await
361 .map_err(|e| SgrError::Api {
362 status: 0,
363 body: e.to_string(),
364 })?;
365
366 tracing::info!(model = %response.model, "oxide_chat.complete");
367
368 let text = response.choices.first().and_then(|c| c.message.content.clone()).unwrap_or_default();
369 record_chat_otel(&self.model, messages, response.usage.as_ref(), &[], &text);
370 Ok(text)
371 }
372}