sgr_agent/
oxide_chat_client.rs

1//! OxideChatClient — LlmClient via Chat Completions API (not Responses).
2//!
3//! For OpenAI-compatible endpoints that don't support /responses:
4//! Cloudflare AI Gateway compat, OpenRouter, local models, Workers AI.
5
6use crate::client::LlmClient;
7use crate::multimodal;
8use crate::tool::ToolDef;
9use crate::types::{LlmConfig, Message, Role, SgrError, ToolCall};
10use openai_oxide::OpenAI;
11use openai_oxide::config::ClientConfig;
12use openai_oxide::types::chat::*;
13use serde_json::Value;
14
15/// Record OTEL span for Chat Completions API call via shared telemetry helper.
16/// AI-NOTE: OxideChatClient is the primary client for Nemotron (Cloudflare Workers AI).
17#[cfg(feature = "telemetry")]
18fn record_chat_otel(
19    model: &str,
20    messages: &[Message],
21    usage: Option<&openai_oxide::types::chat::Usage>,
22    tool_calls: &[ToolCall],
23    text_output: &str,
24) {
25    let (pt, ct, cached) = usage
26        .map(|u| {
27            let pt = u.prompt_tokens.unwrap_or(0);
28            let ct = u.completion_tokens.unwrap_or(0);
29            let cached = u
30                .prompt_tokens_details
31                .as_ref()
32                .and_then(|d| d.cached_tokens)
33                .unwrap_or(0);
34            (pt, ct, cached)
35        })
36        .unwrap_or((0, 0, 0));
37
38    let input = last_user_content(messages, 500);
39    let output = truncate_str(text_output, 500);
40    let tc: Vec<(String, String)> = tool_calls
41        .iter()
42        .map(|tc| (tc.name.clone(), tc.arguments.to_string()))
43        .collect();
44
45    crate::telemetry::record_llm_span(
46        "chat.completions.api",
47        model,
48        &input,
49        &output,
50        &tc,
51        &crate::telemetry::LlmUsage {
52            prompt_tokens: pt,
53            completion_tokens: ct,
54            cached_tokens: cached,
55            response_model: model.to_string(),
56        },
57    );
58}
59
60#[cfg(not(feature = "telemetry"))]
61fn record_chat_otel(
62    _model: &str,
63    _messages: &[Message],
64    _usage: Option<&openai_oxide::types::chat::Usage>,
65    _tool_calls: &[ToolCall],
66    _text: &str,
67) {
68}
69
70#[cfg(feature = "telemetry")]
71fn last_user_content(messages: &[Message], max_len: usize) -> String {
72    messages
73        .iter()
74        .rev()
75        .find(|m| matches!(m.role, Role::User | Role::Tool))
76        .map(|m| truncate_str(&m.content, max_len))
77        .unwrap_or_default()
78}
79
80#[cfg(feature = "telemetry")]
81fn truncate_str(s: &str, max_len: usize) -> String {
82    use crate::str_ext::StrExt;
83    let t = s.trunc(max_len);
84    if t.len() < s.len() {
85        format!("{t}...")
86    } else {
87        s.to_string()
88    }
89}
90
91/// LlmClient backed by openai-oxide Chat Completions API.
92pub struct OxideChatClient {
93    client: OpenAI,
94    pub(crate) model: String,
95    pub(crate) temperature: Option<f64>,
96    pub(crate) max_tokens: Option<u32>,
97    /// Reasoning effort — None disables reasoning for FC (DeepInfra Nemotron Super).
98    pub(crate) reasoning_effort: Option<openai_oxide::types::chat::ReasoningEffort>,
99    /// Server-side prompt prefix caching key (DeepInfra, OpenAI).
100    pub(crate) prompt_cache_key: Option<String>,
101    /// Session ID for sticky routing and trace grouping.
102    pub(crate) session_id: Option<String>,
103    /// Prompt cache TTL (resolved from config).
104    cache_ttl: Option<String>,
105    /// Provider to pin on OpenRouter (resolved from config).
106    pin_provider: Option<String>,
107}
108
109impl OxideChatClient {
110    /// Create from LlmConfig.
111    pub fn from_config(config: &LlmConfig) -> Result<Self, SgrError> {
112        let api_key = config
113            .api_key
114            .clone()
115            .or_else(|| std::env::var("OPENAI_API_KEY").ok())
116            .unwrap_or_else(|| {
117                if config.base_url.is_some() {
118                    "dummy_key".into()
119                } else {
120                    "".into()
121                }
122            });
123
124        if api_key.is_empty() {
125            return Err(SgrError::Schema("No API key for oxide chat client".into()));
126        }
127
128        let mut client_config = ClientConfig::new(&api_key);
129        if let Some(ref url) = config.base_url {
130            client_config = client_config.base_url(url.clone());
131        }
132        config.apply_headers(&mut client_config);
133
134        let reasoning_effort = config.reasoning_effort.as_deref().and_then(|s| match s {
135            "none" => Some(openai_oxide::types::chat::ReasoningEffort::None),
136            "low" => Some(openai_oxide::types::chat::ReasoningEffort::Low),
137            "medium" => Some(openai_oxide::types::chat::ReasoningEffort::Medium),
138            "high" => Some(openai_oxide::types::chat::ReasoningEffort::High),
139            _ => None,
140        });
141
142        Ok(Self {
143            client: OpenAI::with_config(client_config),
144            model: config.model.clone(),
145            temperature: Some(config.temp),
146            max_tokens: config.max_tokens,
147            reasoning_effort,
148            prompt_cache_key: config.prompt_cache_key.clone(),
149            session_id: config.session_id.clone(),
150            cache_ttl: config.resolved_cache_ttl().map(String::from),
151            pin_provider: config.resolved_pin_provider().map(String::from),
152        })
153    }
154
155    fn build_messages(&self, messages: &[Message]) -> Vec<ChatCompletionMessageParam> {
156        let result: Vec<ChatCompletionMessageParam> = messages
157            .iter()
158            .map(|m| match m.role {
159                Role::System => ChatCompletionMessageParam::System {
160                    content: m.content.clone(),
161                    name: None,
162                },
163                Role::User => {
164                    // Multimodal (text + image) if the caller attached images;
165                    // otherwise plain text. `chat_parts` is the same helper the
166                    // legacy `OpenAIClient` uses — identical wire shape.
167                    let content = if m.images.is_empty() {
168                        UserContent::Text(m.content.clone())
169                    } else {
170                        UserContent::Parts(multimodal::chat_parts(&m.content, &m.images))
171                    };
172                    ChatCompletionMessageParam::User {
173                        content,
174                        name: None,
175                    }
176                }
177                Role::Assistant => {
178                    let tc = if m.tool_calls.is_empty() {
179                        None
180                    } else {
181                        Some(
182                            m.tool_calls
183                                .iter()
184                                .map(|tc| openai_oxide::types::chat::ToolCall {
185                                    id: tc.id.clone(),
186                                    type_: "function".into(),
187                                    function: openai_oxide::types::chat::FunctionCall {
188                                        name: tc.name.clone(),
189                                        arguments: tc.arguments.to_string(),
190                                    },
191                                })
192                                .collect(),
193                        )
194                    };
195                    ChatCompletionMessageParam::Assistant {
196                        content: if m.content.is_empty() {
197                            None
198                        } else {
199                            Some(m.content.clone())
200                        },
201                        name: None,
202                        tool_calls: tc,
203                        refusal: None,
204                    }
205                }
206                Role::Tool => ChatCompletionMessageParam::Tool {
207                    content: m.content.clone(),
208                    tool_call_id: m.tool_call_id.clone().unwrap_or_default(),
209                },
210            })
211            .collect();
212
213        result
214    }
215
216    fn build_request(&self, messages: &[Message]) -> ChatCompletionRequest {
217        self.build_request_with_reasoning(messages, self.reasoning_effort.as_ref())
218    }
219
220    fn build_request_no_reasoning(&self, messages: &[Message]) -> ChatCompletionRequest {
221        // Force reasoning off for action/tool execution calls (faster + cache friendly)
222        if self.reasoning_effort.is_some() {
223            self.build_request_with_reasoning(
224                messages,
225                Some(&openai_oxide::types::chat::ReasoningEffort::None),
226            )
227        } else {
228            self.build_request_with_reasoning(messages, None)
229        }
230    }
231
232    fn build_request_with_reasoning(
233        &self,
234        messages: &[Message],
235        reasoning: Option<&openai_oxide::types::chat::ReasoningEffort>,
236    ) -> ChatCompletionRequest {
237        let mut req = ChatCompletionRequest::new(&self.model, self.build_messages(messages));
238        if let Some(temp) = self.temperature {
239            req.temperature = Some(temp);
240        }
241        if let Some(max) = self.max_tokens {
242            if self.model.starts_with("gpt-5") || self.model.starts_with("o") {
243                req = req.max_completion_tokens(max as i64);
244            } else {
245                req.max_tokens = Some(max as i64);
246            }
247        }
248        if let Some(effort) = reasoning {
249            req.reasoning_effort = Some(effort.clone());
250        }
251        if let Some(ref key) = self.prompt_cache_key {
252            req.prompt_cache_key = Some(key.clone());
253        }
254        // Session ID for sticky routing + trace grouping — OpenRouter only.
255        // AI-NOTE: OpenAI Chat API rejects unknown param 'session_id'.
256        // Only set when OpenRouter features are present (cache_ttl or pin_provider).
257        if let Some(ref sid) = self.session_id
258            && (self.cache_ttl.is_some() || self.pin_provider.is_some())
259        {
260            req.session_id = Some(sid.clone());
261        }
262        // Provider capabilities (resolved from LlmConfig, not string checks)
263        if let Some(ref ttl) = self.cache_ttl {
264            req.cache_control = Some(serde_json::json!({"type": "ephemeral", "ttl": ttl}));
265        }
266        if let Some(ref provider) = self.pin_provider
267            && let Ok(prefs) =
268                openai_oxide::openrouter::ProviderPreferences::pinned(provider).to_value()
269        {
270            req.provider = Some(prefs);
271        }
272        req
273    }
274
275    fn extract_tool_calls(response: &ChatCompletionResponse) -> Vec<ToolCall> {
276        let Some(choice) = response.choices.first() else {
277            return Vec::new();
278        };
279        let Some(ref calls) = choice.message.tool_calls else {
280            return Vec::new();
281        };
282        calls
283            .iter()
284            .map(|tc| ToolCall {
285                id: tc.id.clone(),
286                name: tc.function.name.clone(),
287                arguments: crate::str_ext::parse_tool_args(&tc.function.arguments),
288            })
289            .collect()
290    }
291}
292
293#[async_trait::async_trait]
294impl LlmClient for OxideChatClient {
295    async fn structured_call(
296        &self,
297        messages: &[Message],
298        schema: &Value,
299    ) -> Result<(Option<Value>, Vec<ToolCall>, String), SgrError> {
300        // Skip ensure_strict for pre-strict schemas (e.g., from build_action_schema)
301        let strict_schema =
302            if schema.get("additionalProperties").and_then(|v| v.as_bool()) == Some(false) {
303                schema.clone()
304            } else {
305                let mut s = schema.clone();
306                openai_oxide::parsing::ensure_strict(&mut s);
307                s
308            };
309
310        let mut req = self.build_request(messages);
311        req.response_format = Some(ResponseFormat::JsonSchema {
312            json_schema: JsonSchema {
313                name: "response".into(),
314                description: None,
315                schema: Some(strict_schema),
316                strict: Some(true),
317            },
318        });
319
320        let response = self
321            .client
322            .chat()
323            .completions()
324            .create(req)
325            .await
326            .map_err(|e| SgrError::Api {
327                status: 0,
328                body: e.to_string(),
329            })?;
330
331        let raw_text = response
332            .choices
333            .first()
334            .and_then(|c| c.message.content.clone())
335            .unwrap_or_default();
336        let tool_calls = Self::extract_tool_calls(&response);
337        let parsed = serde_json::from_str::<Value>(&raw_text).ok();
338
339        if let Some(ref usage) = response.usage {
340            let input = usage.prompt_tokens.unwrap_or(0);
341            let cached = usage
342                .prompt_tokens_details
343                .as_ref()
344                .and_then(|d| d.cached_tokens)
345                .unwrap_or(0);
346            let output = usage.completion_tokens.unwrap_or(0);
347            if cached > 0 {
348                let pct = if input > 0 { cached * 100 / input } else { 0 };
349                eprintln!(
350                    "    💰 {}in/{}out (cached: {}, {}%)",
351                    input, output, cached, pct
352                );
353            } else {
354                eprintln!("    💰 {}in/{}out", input, output);
355            }
356        }
357
358        record_chat_otel(
359            &self.model,
360            messages,
361            response.usage.as_ref(),
362            &tool_calls,
363            &raw_text,
364        );
365        Ok((parsed, tool_calls, raw_text))
366    }
367
368    async fn tools_call(
369        &self,
370        messages: &[Message],
371        tools: &[ToolDef],
372    ) -> Result<Vec<ToolCall>, SgrError> {
373        // No reasoning for tool execution — faster + better cache hit
374        let mut req = self.build_request_no_reasoning(messages);
375
376        // AI-NOTE: OpenRouter routes to Azure which enforces OpenAI strict mode on ALL tools.
377        // ensure_strict adds additionalProperties:false + all properties in required.
378        // Safe for non-strict providers (just adds redundant fields).
379        let chat_tools: Vec<Tool> = tools
380            .iter()
381            .map(|t| {
382                let mut params = t.parameters.clone();
383                openai_oxide::parsing::ensure_strict(&mut params);
384                Tool::function(
385                    &t.name,
386                    if t.description.is_empty() {
387                        "No description"
388                    } else {
389                        &t.description
390                    },
391                    params,
392                )
393            })
394            .collect();
395        req.tools = Some(chat_tools);
396        req.tool_choice = Some(openai_oxide::types::chat::ToolChoice::Mode(
397            "required".into(),
398        ));
399        // AI-NOTE: OpenRouter returns 404 for parallel_tool_calls on Anthropic models.
400        // Anthropic uses disable_parallel_tool_use (different API), so skip for anthropic/.
401        if !self.model.contains("anthropic/") {
402            req.parallel_tool_calls = Some(true);
403        }
404
405        let response = self
406            .client
407            .chat()
408            .completions()
409            .create(req)
410            .await
411            .map_err(|e| SgrError::Api {
412                status: 0,
413                body: e.to_string(),
414            })?;
415
416        if let Some(ref usage) = response.usage {
417            let input = usage.prompt_tokens.unwrap_or(0);
418            let cached = usage
419                .prompt_tokens_details
420                .as_ref()
421                .and_then(|d| d.cached_tokens)
422                .unwrap_or(0);
423            let output = usage.completion_tokens.unwrap_or(0);
424            if cached > 0 {
425                let pct = if input > 0 { cached * 100 / input } else { 0 };
426                eprintln!(
427                    "    💰 {}in/{}out (cached: {}, {}%)",
428                    input, output, cached, pct
429                );
430            } else {
431                eprintln!("    💰 {}in/{}out", input, output);
432            }
433        }
434
435        let calls = Self::extract_tool_calls(&response);
436        record_chat_otel(&self.model, messages, response.usage.as_ref(), &calls, "");
437        // Don't synthesize finish — empty tool_calls signals completion to ToolCallingAgent.
438        Ok(calls)
439    }
440
441    // AI-NOTE: tools_call_with_text — single-phase agent needs text+tools in one call.
442    // tool_choice="auto" so model can return text reasoning alongside tool calls.
443    async fn tools_call_with_text(
444        &self,
445        messages: &[Message],
446        tools: &[ToolDef],
447    ) -> Result<(Vec<ToolCall>, String), SgrError> {
448        let mut req = self.build_request_no_reasoning(messages);
449
450        let chat_tools: Vec<Tool> = tools
451            .iter()
452            .map(|t| {
453                let mut params = t.parameters.clone();
454                openai_oxide::parsing::ensure_strict(&mut params);
455                Tool::function(
456                    &t.name,
457                    if t.description.is_empty() {
458                        "No description"
459                    } else {
460                        &t.description
461                    },
462                    params,
463                )
464            })
465            .collect();
466        req.tools = Some(chat_tools);
467        // "auto" not "required" — model can return text + tools or just text
468        req.tool_choice = Some(openai_oxide::types::chat::ToolChoice::Mode("auto".into()));
469
470        let response = self
471            .client
472            .chat()
473            .completions()
474            .create(req)
475            .await
476            .map_err(|e| SgrError::Api {
477                status: 0,
478                body: e.to_string(),
479            })?;
480
481        if let Some(ref usage) = response.usage {
482            let input = usage.prompt_tokens.unwrap_or(0);
483            let cached = usage
484                .prompt_tokens_details
485                .as_ref()
486                .and_then(|d| d.cached_tokens)
487                .unwrap_or(0);
488            let output = usage.completion_tokens.unwrap_or(0);
489            if cached > 0 {
490                let pct = if input > 0 { cached * 100 / input } else { 0 };
491                eprintln!(
492                    "    💰 {}in/{}out (cached: {}, {}%)",
493                    input, output, cached, pct
494                );
495            } else {
496                eprintln!("    💰 {}in/{}out", input, output);
497            }
498        }
499
500        let text = response
501            .choices
502            .first()
503            .and_then(|c| c.message.content.clone())
504            .unwrap_or_default();
505        let calls = Self::extract_tool_calls(&response);
506        record_chat_otel(
507            &self.model,
508            messages,
509            response.usage.as_ref(),
510            &calls,
511            &text,
512        );
513        Ok((calls, text))
514    }
515
516    async fn complete(&self, messages: &[Message]) -> Result<String, SgrError> {
517        let req = self.build_request(messages);
518
519        let response = self
520            .client
521            .chat()
522            .completions()
523            .create(req)
524            .await
525            .map_err(|e| SgrError::Api {
526                status: 0,
527                body: e.to_string(),
528            })?;
529
530        tracing::info!(model = %response.model, "oxide_chat.complete");
531
532        let text = response
533            .choices
534            .first()
535            .and_then(|c| c.message.content.clone())
536            .unwrap_or_default();
537        record_chat_otel(&self.model, messages, response.usage.as_ref(), &[], &text);
538        Ok(text)
539    }
540}
sgr_agent/oxide_chat_client.rs

sgr_agent/
oxide_chat_client.rs