Skip to main content

sgr_agent/
oxide_chat_client.rs

1//! OxideChatClient — LlmClient via Chat Completions API (not Responses).
2//!
3//! For OpenAI-compatible endpoints that don't support /responses:
4//! Cloudflare AI Gateway compat, OpenRouter, local models, Workers AI.
5
6use crate::client::LlmClient;
7use crate::tool::ToolDef;
8use crate::types::{LlmConfig, Message, Role, SgrError, ToolCall};
9use openai_oxide::OpenAI;
10use openai_oxide::config::ClientConfig;
11use openai_oxide::types::chat::*;
12use serde_json::Value;
13
14/// Record OTEL span for Chat Completions API call via shared telemetry helper.
15/// AI-NOTE: OxideChatClient is the primary client for Nemotron (Cloudflare Workers AI).
16#[cfg(feature = "telemetry")]
17fn record_chat_otel(model: &str, messages: &[Message], usage: Option<&openai_oxide::types::chat::Usage>, tool_calls: &[ToolCall], text_output: &str) {
18    let (pt, ct, cached) = usage.map(|u| {
19        let pt = u.prompt_tokens.unwrap_or(0) as i64;
20        let ct = u.completion_tokens.unwrap_or(0) as i64;
21        let cached = u.prompt_tokens_details.as_ref().and_then(|d| d.cached_tokens).unwrap_or(0) as i64;
22        (pt, ct, cached)
23    }).unwrap_or((0, 0, 0));
24
25    let input = last_user_content(messages, 500);
26    let output = truncate_str(text_output, 500);
27    let tc: Vec<(String, String)> = tool_calls.iter()
28        .map(|tc| (tc.name.clone(), tc.arguments.to_string()))
29        .collect();
30
31    crate::telemetry::record_llm_span(
32        "chat.completions.api",
33        model,
34        &input,
35        &output,
36        &tc,
37        &crate::telemetry::LlmUsage { prompt_tokens: pt, completion_tokens: ct, cached_tokens: cached, response_model: model.to_string() },
38    );
39}
40
41#[cfg(not(feature = "telemetry"))]
42fn record_chat_otel(_model: &str, _messages: &[Message], _usage: Option<&openai_oxide::types::chat::Usage>, _tool_calls: &[ToolCall], _text: &str) {}
43
44#[cfg(feature = "telemetry")]
45fn last_user_content(messages: &[Message], max_len: usize) -> String {
46    messages.iter().rev()
47        .find(|m| matches!(m.role, Role::User | Role::Tool))
48        .map(|m| truncate_str(&m.content, max_len))
49        .unwrap_or_default()
50}
51
52#[cfg(feature = "telemetry")]
53fn truncate_str(s: &str, max_len: usize) -> String {
54    if s.len() > max_len { format!("{}...", &s[..max_len]) } else { s.to_string() }
55}
56
57/// LlmClient backed by openai-oxide Chat Completions API.
58pub struct OxideChatClient {
59    client: OpenAI,
60    pub(crate) model: String,
61    pub(crate) temperature: Option<f64>,
62    pub(crate) max_tokens: Option<u32>,
63    /// Reasoning effort — None disables reasoning for FC (DeepInfra Nemotron Super).
64    pub(crate) reasoning_effort: Option<openai_oxide::types::chat::ReasoningEffort>,
65    /// Server-side prompt prefix caching key (DeepInfra, OpenAI).
66    pub(crate) prompt_cache_key: Option<String>,
67}
68
69impl OxideChatClient {
70    /// Create from LlmConfig.
71    pub fn from_config(config: &LlmConfig) -> Result<Self, SgrError> {
72        let api_key = config
73            .api_key
74            .clone()
75            .or_else(|| std::env::var("OPENAI_API_KEY").ok())
76            .unwrap_or_else(|| {
77                if config.base_url.is_some() {
78                    "dummy_key".into()
79                } else {
80                    "".into()
81                }
82            });
83
84        if api_key.is_empty() {
85            return Err(SgrError::Schema("No API key for oxide chat client".into()));
86        }
87
88        let mut client_config = ClientConfig::new(&api_key);
89        if let Some(ref url) = config.base_url {
90            client_config = client_config.base_url(url.clone());
91        }
92        config.apply_headers(&mut client_config);
93
94        let reasoning_effort = config.reasoning_effort.as_deref().and_then(|s| match s {
95            "none" => Some(openai_oxide::types::chat::ReasoningEffort::None),
96            "low" => Some(openai_oxide::types::chat::ReasoningEffort::Low),
97            "medium" => Some(openai_oxide::types::chat::ReasoningEffort::Medium),
98            "high" => Some(openai_oxide::types::chat::ReasoningEffort::High),
99            _ => None,
100        });
101
102        Ok(Self {
103            client: OpenAI::with_config(client_config),
104            model: config.model.clone(),
105            temperature: Some(config.temp),
106            max_tokens: config.max_tokens,
107            reasoning_effort,
108            prompt_cache_key: config.prompt_cache_key.clone(),
109        })
110    }
111
112    fn build_messages(&self, messages: &[Message]) -> Vec<ChatCompletionMessageParam> {
113        messages
114            .iter()
115            .map(|m| match m.role {
116                Role::System => ChatCompletionMessageParam::System {
117                    content: m.content.clone(),
118                    name: None,
119                },
120                Role::User => ChatCompletionMessageParam::User {
121                    content: UserContent::Text(m.content.clone()),
122                    name: None,
123                },
124                Role::Assistant => {
125                    let tc = if m.tool_calls.is_empty() {
126                        None
127                    } else {
128                        Some(
129                            m.tool_calls
130                                .iter()
131                                .map(|tc| openai_oxide::types::chat::ToolCall {
132                                    id: tc.id.clone(),
133                                    type_: "function".into(),
134                                    function: openai_oxide::types::chat::FunctionCall {
135                                        name: tc.name.clone(),
136                                        arguments: tc.arguments.to_string(),
137                                    },
138                                })
139                                .collect(),
140                        )
141                    };
142                    ChatCompletionMessageParam::Assistant {
143                        content: if m.content.is_empty() {
144                            None
145                        } else {
146                            Some(m.content.clone())
147                        },
148                        name: None,
149                        tool_calls: tc,
150                        refusal: None,
151                    }
152                }
153                Role::Tool => ChatCompletionMessageParam::Tool {
154                    content: m.content.clone(),
155                    tool_call_id: m.tool_call_id.clone().unwrap_or_default(),
156                },
157            })
158            .collect()
159    }
160
161    fn build_request(&self, messages: &[Message]) -> ChatCompletionRequest {
162        self.build_request_with_reasoning(messages, self.reasoning_effort.as_ref())
163    }
164
165    fn build_request_no_reasoning(&self, messages: &[Message]) -> ChatCompletionRequest {
166        // Force reasoning off for action/tool execution calls (faster + cache friendly)
167        if self.reasoning_effort.is_some() {
168            self.build_request_with_reasoning(
169                messages,
170                Some(&openai_oxide::types::chat::ReasoningEffort::None),
171            )
172        } else {
173            self.build_request_with_reasoning(messages, None)
174        }
175    }
176
177    fn build_request_with_reasoning(
178        &self,
179        messages: &[Message],
180        reasoning: Option<&openai_oxide::types::chat::ReasoningEffort>,
181    ) -> ChatCompletionRequest {
182        let mut req = ChatCompletionRequest::new(&self.model, self.build_messages(messages));
183        if let Some(temp) = self.temperature {
184            req.temperature = Some(temp);
185        }
186        if let Some(max) = self.max_tokens {
187            if self.model.starts_with("gpt-5") || self.model.starts_with("o") {
188                req = req.max_completion_tokens(max as i64);
189            } else {
190                req.max_tokens = Some(max as i64);
191            }
192        }
193        if let Some(effort) = reasoning {
194            req.reasoning_effort = Some(effort.clone());
195        }
196        if let Some(ref key) = self.prompt_cache_key {
197            req.prompt_cache_key = Some(key.clone());
198        }
199        req
200    }
201
202    fn extract_tool_calls(response: &ChatCompletionResponse) -> Vec<ToolCall> {
203        let Some(choice) = response.choices.first() else {
204            return Vec::new();
205        };
206        let Some(ref calls) = choice.message.tool_calls else {
207            return Vec::new();
208        };
209        calls
210            .iter()
211            .map(|tc| ToolCall {
212                id: tc.id.clone(),
213                name: tc.function.name.clone(),
214                arguments: serde_json::from_str(&tc.function.arguments).unwrap_or(Value::Null),
215            })
216            .collect()
217    }
218}
219
220#[async_trait::async_trait]
221impl LlmClient for OxideChatClient {
222    async fn structured_call(
223        &self,
224        messages: &[Message],
225        schema: &Value,
226    ) -> Result<(Option<Value>, Vec<ToolCall>, String), SgrError> {
227        // Skip ensure_strict for pre-strict schemas (e.g., from build_action_schema)
228        let strict_schema =
229            if schema.get("additionalProperties").and_then(|v| v.as_bool()) == Some(false) {
230                schema.clone()
231            } else {
232                let mut s = schema.clone();
233                openai_oxide::parsing::ensure_strict(&mut s);
234                s
235            };
236
237        let mut req = self.build_request(messages);
238        req.response_format = Some(ResponseFormat::JsonSchema {
239            json_schema: JsonSchema {
240                name: "response".into(),
241                description: None,
242                schema: Some(strict_schema),
243                strict: Some(true),
244            },
245        });
246
247        let response = self
248            .client
249            .chat()
250            .completions()
251            .create(req)
252            .await
253            .map_err(|e| SgrError::Api {
254                status: 0,
255                body: e.to_string(),
256            })?;
257
258        let raw_text = response
259            .choices
260            .first()
261            .and_then(|c| c.message.content.clone())
262            .unwrap_or_default();
263        let tool_calls = Self::extract_tool_calls(&response);
264        let parsed = serde_json::from_str::<Value>(&raw_text).ok();
265
266        if let Some(ref usage) = response.usage {
267            let input = usage.prompt_tokens.unwrap_or(0);
268            let cached = usage
269                .prompt_tokens_details
270                .as_ref()
271                .and_then(|d| d.cached_tokens)
272                .unwrap_or(0);
273            let output = usage.completion_tokens.unwrap_or(0);
274            if cached > 0 {
275                let pct = if input > 0 { cached * 100 / input } else { 0 };
276                eprintln!(
277                    "    💰 {}in/{}out (cached: {}, {}%)",
278                    input, output, cached, pct
279                );
280            } else {
281                eprintln!("    💰 {}in/{}out", input, output);
282            }
283        }
284
285        record_chat_otel(&self.model, messages, response.usage.as_ref(), &tool_calls, &raw_text);
286        Ok((parsed, tool_calls, raw_text))
287    }
288
289    async fn tools_call(
290        &self,
291        messages: &[Message],
292        tools: &[ToolDef],
293    ) -> Result<Vec<ToolCall>, SgrError> {
294        // No reasoning for tool execution — faster + better cache hit
295        let mut req = self.build_request_no_reasoning(messages);
296
297        let chat_tools: Vec<Tool> = tools
298            .iter()
299            .map(|t| {
300                Tool::function(
301                    &t.name,
302                    if t.description.is_empty() {
303                        "No description"
304                    } else {
305                        &t.description
306                    },
307                    t.parameters.clone(),
308                )
309            })
310            .collect();
311        req.tools = Some(chat_tools);
312        req.tool_choice = Some(openai_oxide::types::chat::ToolChoice::Mode(
313            "required".into(),
314        ));
315
316        let response = self
317            .client
318            .chat()
319            .completions()
320            .create(req)
321            .await
322            .map_err(|e| SgrError::Api {
323                status: 0,
324                body: e.to_string(),
325            })?;
326
327        if let Some(ref usage) = response.usage {
328            let input = usage.prompt_tokens.unwrap_or(0);
329            let cached = usage
330                .prompt_tokens_details
331                .as_ref()
332                .and_then(|d| d.cached_tokens)
333                .unwrap_or(0);
334            let output = usage.completion_tokens.unwrap_or(0);
335            if cached > 0 {
336                let pct = if input > 0 { cached * 100 / input } else { 0 };
337                eprintln!(
338                    "    💰 {}in/{}out (cached: {}, {}%)",
339                    input, output, cached, pct
340                );
341            } else {
342                eprintln!("    💰 {}in/{}out", input, output);
343            }
344        }
345
346        let calls = Self::extract_tool_calls(&response);
347        record_chat_otel(&self.model, messages, response.usage.as_ref(), &calls, "");
348        // Don't synthesize finish — empty tool_calls signals completion to ToolCallingAgent.
349        Ok(calls)
350    }
351
352    async fn complete(&self, messages: &[Message]) -> Result<String, SgrError> {
353        let req = self.build_request(messages);
354
355        let response = self
356            .client
357            .chat()
358            .completions()
359            .create(req)
360            .await
361            .map_err(|e| SgrError::Api {
362                status: 0,
363                body: e.to_string(),
364            })?;
365
366        tracing::info!(model = %response.model, "oxide_chat.complete");
367
368        let text = response.choices.first().and_then(|c| c.message.content.clone()).unwrap_or_default();
369        record_chat_otel(&self.model, messages, response.usage.as_ref(), &[], &text);
370        Ok(text)
371    }
372}