multi_llm/providers/openai_shared/
utils.rs

1//! Utility functions and HTTP client for OpenAI-compatible providers
2//!
3//! Contains configuration utilities, HTTP client functionality,
4//! custom format parsing, and conversion functions.
5
6// Allow unwrap in custom format parser - regex captures are verified before unwrap
7#![allow(clippy::unwrap_used)]
8
9use super::types::*;
10use crate::error::{LlmError, LlmResult};
11use crate::internals::retry::{RetryExecutor, RetryPolicy};
12use crate::logging::{log_debug, log_error, log_warn};
13use crate::provider::{RequestConfig, Tool, ToolCall, ToolChoice};
14use crate::{MessageContent, MessageRole, UnifiedMessage};
15use regex::Regex;
16use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, CONTENT_TYPE};
17use serde_json::Value;
18use thiserror::Error;
19use tokio::sync::Mutex;
20
21/// Errors related to custom tool format parsing
22#[derive(Debug, Error)]
23pub enum CustomFormatError {
24    #[error("Failed to parse custom format: {0}")]
25    ParseError(String),
26    #[error("Invalid JSON in custom format: {0}")]
27    InvalidJson(#[from] serde_json::Error),
28}
29
30/// Represents a detected custom tool call format
31#[derive(Debug)]
32pub struct CustomToolCallMatch {
33    pub function_name: String,
34    pub arguments: Value,
35    pub cleaned_content: String,
36    pub raw_match: String,
37}
38
39/// Parser for custom tool call formats
40pub struct CustomFormatParser {
41    patterns: Vec<(String, Regex)>, // (format_name, pattern)
42}
43
44impl Default for CustomFormatParser {
45    fn default() -> Self {
46        Self::new()
47    }
48}
49
50impl CustomFormatParser {
51    pub fn new() -> Self {
52        // Create patterns with safe fallback on compilation failure
53        let mut patterns = Vec::new();
54
55        // Pattern 1: GPT-OSS v1 format - only capture function name, let extract_balanced_json handle the JSON
56        if let Ok(regex) =
57            Regex::new(r"commentary to=functions\.(\w+)\s+<\|constrain\|>json<\|message\|>")
58        {
59            patterns.push(("gpt_oss_v1".to_string(), regex));
60        }
61
62        // Pattern 2: XML tool_call format (Qwen models)
63        // Matches: <tool_call>{"name": "func_name", "arguments": {...}}</tool_call>
64        // Also matches: <tool_call>{"name": "func_name", "arguments": {...}} (without closing tag)
65        // Uses (?s) flag to match across multiple lines and improved capture for JSON objects
66        if let Ok(regex) = Regex::new(r#"(?s)<tool_call>\s*(\{.*?\})\s*(?:</tool_call>|$)"#) {
67            patterns.push(("xml_tool_call".to_string(), regex));
68        }
69
70        // Pattern 3: DeepSeek TOOL_REQUEST format
71        // Matches: [TOOL_REQUEST]{"name": "func_name", "arguments": {...}}[END_TOOL_REQUEST]
72        if let Ok(regex) = Regex::new(r#"(?s)\[TOOL_REQUEST\](.*?)\[END_TOOL_REQUEST\]"#) {
73            patterns.push(("deepseek_tool_request".to_string(), regex));
74        }
75
76        // Pattern 4: "Tool call:" format (self-generated format from structured content)
77        // Matches: Tool call: function_name with args: {...}
78        if let Ok(regex) = Regex::new(r#"(?s)Tool call:\s+(\w+)\s+with args:\s+(\{.*\})"#) {
79            patterns.push(("tool_call_with_args".to_string(), regex));
80        }
81
82        // Pattern 5: Bracketed JSON format (some models)
83        // Matches: {"name": "func_name", "arguments": {...}} when isolated in response
84        if let Ok(regex) = Regex::new(r#"(?s)^(\{[^{}]*"name"[^{}]*"arguments"[^{}]*\})$"#) {
85            patterns.push(("json_only".to_string(), regex));
86        }
87
88        Self { patterns }
89    }
90
91    /// Attempts to parse custom tool call formats from content
92    pub fn parse(&self, content: &str) -> Result<Option<CustomToolCallMatch>, CustomFormatError> {
93        for (format_name, pattern) in &self.patterns {
94            if let Some(result) = self.try_parse_pattern(format_name, pattern, content)? {
95                return Ok(Some(result));
96            }
97        }
98
99        Self::log_no_match(content, self.patterns.len());
100        Ok(None)
101    }
102
103    /// Try to parse a single pattern match
104    fn try_parse_pattern(
105        &self,
106        format_name: &str,
107        pattern: &regex::Regex,
108        content: &str,
109    ) -> Result<Option<CustomToolCallMatch>, CustomFormatError> {
110        let Some(captures) = pattern.captures(content) else {
111            return Ok(None);
112        };
113
114        log_debug!(
115            format_name = format_name,
116            capture_count = captures.len(),
117            "FOUND MATCH for custom tool format"
118        );
119
120        match format_name {
121            "gpt_oss_v1" => self.parse_gpt_oss_v1(&captures, content),
122            "xml_tool_call" => self.parse_xml_tool_call(&captures, content, format_name),
123            "deepseek_tool_request" => {
124                self.parse_deepseek_tool_request(&captures, content, format_name)
125            }
126            "tool_call_with_args" => {
127                self.parse_tool_call_with_args(&captures, content, format_name)
128            }
129            "json_only" => self.parse_json_only(&captures, format_name),
130            _ => Ok(None),
131        }
132    }
133
134    /// Log when no pattern matches
135    fn log_no_match(content: &str, pattern_count: usize) {
136        log_warn!(
137            content_preview = content.chars().take(300).collect::<String>(),
138            full_content = content,
139            pattern_count = pattern_count,
140            content_length = content.len(),
141            "No custom tool format patterns matched - content may contain unrecognized tool call format"
142        );
143    }
144
145    fn parse_gpt_oss_v1(
146        &self,
147        captures: &regex::Captures,
148        content: &str,
149    ) -> Result<Option<CustomToolCallMatch>, CustomFormatError> {
150        let function_name = captures
151            .get(1)
152            .ok_or_else(|| CustomFormatError::ParseError("No function name".to_string()))?
153            .as_str()
154            .to_string();
155
156        if let Some(message_start) = content.find("<|constrain|>json<|message|>") {
157            let json_start = message_start + "<|constrain|>json<|message|>".len();
158            let remaining_content = &content[json_start..];
159
160            if let Some((json_str, json_end_pos)) = Self::extract_balanced_json(remaining_content) {
161                let arguments = serde_json::from_str::<Value>(json_str.trim())?;
162
163                let pattern_start = content.find("commentary to=functions.").ok_or_else(|| {
164                    CustomFormatError::ParseError("Pattern start not found".to_string())
165                })?;
166                let pattern_end = json_start + json_end_pos;
167                let full_match = &content[pattern_start..pattern_end];
168                let cleaned_content = content.replace(full_match, "").trim().to_string();
169
170                return Ok(Some(CustomToolCallMatch {
171                    function_name,
172                    arguments,
173                    cleaned_content,
174                    raw_match: full_match.to_string(),
175                }));
176            }
177        }
178        Ok(None)
179    }
180
181    fn parse_xml_tool_call(
182        &self,
183        captures: &regex::Captures,
184        content: &str,
185        format_name: &str,
186    ) -> Result<Option<CustomToolCallMatch>, CustomFormatError> {
187        let captured_content = captures
188            .get(1)
189            .ok_or_else(|| {
190                CustomFormatError::ParseError("No content captured from XML tool call".to_string())
191            })?
192            .as_str()
193            .trim();
194
195        let json_content =
196            if let Some((extracted_json, _)) = Self::extract_balanced_json(captured_content) {
197                extracted_json
198            } else {
199                Self::attempt_json_repair(captured_content)
200            };
201
202        let json_obj = serde_json::from_str::<Value>(&json_content)?;
203        let function_name = json_obj
204            .get("name")
205            .and_then(|n| n.as_str())
206            .ok_or_else(|| {
207                CustomFormatError::ParseError("Missing 'name' field in tool call".to_string())
208            })?
209            .to_string();
210
211        let arguments = json_obj
212            .get("arguments")
213            .ok_or_else(|| {
214                CustomFormatError::ParseError("Missing 'arguments' field in tool call".to_string())
215            })?
216            .clone();
217
218        let full_match = captures.get(0).unwrap().as_str();
219        let cleaned_content = content.replace(full_match, "").trim().to_string();
220
221        log_debug!(
222            format = format_name,
223            function = &function_name,
224            json_length = json_content.len(),
225            "Successfully parsed XML tool call with balanced JSON extraction"
226        );
227
228        Ok(Some(CustomToolCallMatch {
229            function_name,
230            arguments,
231            cleaned_content,
232            raw_match: full_match.to_string(),
233        }))
234    }
235
236    fn parse_deepseek_tool_request(
237        &self,
238        captures: &regex::Captures,
239        content: &str,
240        format_name: &str,
241    ) -> Result<Option<CustomToolCallMatch>, CustomFormatError> {
242        let json_content = captures.get(1).unwrap().as_str().trim();
243
244        if let Some((json_str, _)) = Self::extract_balanced_json(json_content) {
245            let json_obj = serde_json::from_str::<Value>(&json_str)?;
246
247            let function_name = json_obj
248                .get("name")
249                .and_then(|n| n.as_str())
250                .ok_or_else(|| {
251                    CustomFormatError::ParseError(
252                        "Missing 'name' field in DeepSeek tool call".to_string(),
253                    )
254                })?
255                .to_string();
256
257            let arguments = json_obj
258                .get("arguments")
259                .ok_or_else(|| {
260                    CustomFormatError::ParseError(
261                        "Missing 'arguments' field in DeepSeek tool call".to_string(),
262                    )
263                })?
264                .clone();
265
266            let full_match = captures.get(0).unwrap().as_str();
267            let cleaned_content = content.replace(full_match, "").trim().to_string();
268
269            log_debug!(
270                format = format_name,
271                function = &function_name,
272                json_length = json_str.len(),
273                "Successfully parsed DeepSeek TOOL_REQUEST format"
274            );
275
276            return Ok(Some(CustomToolCallMatch {
277                function_name,
278                arguments,
279                cleaned_content,
280                raw_match: full_match.to_string(),
281            }));
282        }
283
284        Err(CustomFormatError::ParseError(
285            "Failed to extract balanced JSON from DeepSeek TOOL_REQUEST".to_string(),
286        ))
287    }
288
289    fn parse_tool_call_with_args(
290        &self,
291        captures: &regex::Captures,
292        content: &str,
293        format_name: &str,
294    ) -> Result<Option<CustomToolCallMatch>, CustomFormatError> {
295        let function_name = captures
296            .get(1)
297            .ok_or_else(|| {
298                CustomFormatError::ParseError(
299                    "No function name captured from tool call format".to_string(),
300                )
301            })?
302            .as_str()
303            .to_string();
304
305        let args_json = captures
306            .get(2)
307            .ok_or_else(|| {
308                CustomFormatError::ParseError(
309                    "No arguments captured from tool call format".to_string(),
310                )
311            })?
312            .as_str();
313
314        let arguments = serde_json::from_str::<Value>(args_json)?;
315        let full_match = captures.get(0).unwrap().as_str();
316        let cleaned_content = content.replace(full_match, "").trim().to_string();
317
318        log_debug!(
319            format = format_name,
320            function = &function_name,
321            "Successfully parsed 'Tool call:' format"
322        );
323
324        Ok(Some(CustomToolCallMatch {
325            function_name,
326            arguments,
327            cleaned_content,
328            raw_match: full_match.to_string(),
329        }))
330    }
331
332    fn parse_json_only(
333        &self,
334        captures: &regex::Captures,
335        format_name: &str,
336    ) -> Result<Option<CustomToolCallMatch>, CustomFormatError> {
337        let json_str = captures.get(1).unwrap().as_str();
338        let json_obj = serde_json::from_str::<Value>(json_str)?;
339
340        let function_name = json_obj
341            .get("name")
342            .and_then(|n| n.as_str())
343            .ok_or_else(|| {
344                CustomFormatError::ParseError(
345                    "Missing 'name' field in JSON-only tool call".to_string(),
346                )
347            })?
348            .to_string();
349
350        let arguments = json_obj
351            .get("arguments")
352            .ok_or_else(|| {
353                CustomFormatError::ParseError(
354                    "Missing 'arguments' field in JSON-only tool call".to_string(),
355                )
356            })?
357            .clone();
358
359        log_debug!(
360            format = format_name,
361            function = &function_name,
362            "Successfully parsed JSON-only tool call format"
363        );
364
365        Ok(Some(CustomToolCallMatch {
366            function_name,
367            arguments,
368            cleaned_content: "".to_string(),
369            raw_match: json_str.to_string(),
370        }))
371    }
372
373    /// Clean tool call patterns from content as a fallback when parsing fails
374    /// This removes obvious tool call patterns to prevent showing raw XML/tags to users
375    pub(crate) fn clean_tool_call_patterns(content: &str) -> String {
376        let mut cleaned = content.to_string();
377
378        // Remove XML tool call patterns (even malformed ones)
379        if let Ok(regex) = Regex::new(r#"(?s)<tool_call>.*?(?:</tool_call>|$)"#) {
380            cleaned = regex.replace_all(&cleaned, "").to_string();
381        }
382
383        // Remove DeepSeek tool request patterns
384        if let Ok(regex) = Regex::new(r#"(?s)\[TOOL_REQUEST\].*?(?:\[END_TOOL_REQUEST\]|$)"#) {
385            cleaned = regex.replace_all(&cleaned, "").to_string();
386        }
387
388        // Remove "Tool call:" format patterns
389        if let Ok(regex) = Regex::new(r#"(?s)Tool call:\s+\w+\s+with args:\s+\{.*?\}"#) {
390            cleaned = regex.replace_all(&cleaned, "").to_string();
391        }
392
393        // Remove standalone JSON objects that look like tool calls
394        if let Ok(regex) = Regex::new(r#"(?s)^\s*\{[^{}]*"name"[^{}]*"arguments"[^{}]*\}\s*$"#) {
395            cleaned = regex.replace_all(&cleaned, "").to_string();
396        }
397
398        cleaned.trim().to_string()
399    }
400
401    /// Attempt to repair common JSON formatting issues
402    pub(crate) fn attempt_json_repair(text: &str) -> String {
403        let trimmed = text.trim();
404
405        // If it doesn't start with {, return as-is
406        if !trimmed.starts_with('{') {
407            return trimmed.to_string();
408        }
409
410        // Count braces to see if we're missing closing braces
411        let (open_braces, close_braces) = Self::count_json_braces(trimmed);
412
413        // If we have more open braces than close braces, add missing closing braces
414        if open_braces > close_braces {
415            Self::add_missing_braces(trimmed, open_braces - close_braces)
416        } else {
417            // Return original if no obvious repair needed
418            trimmed.to_string()
419        }
420    }
421
422    /// Count open and close braces in JSON, respecting string contexts
423    pub(crate) fn count_json_braces(text: &str) -> (usize, usize) {
424        let mut open_braces = 0;
425        let mut close_braces = 0;
426        let mut in_string = false;
427        let mut escaped = false;
428
429        for ch in text.chars() {
430            match ch {
431                '"' if !escaped => in_string = !in_string,
432                '\\' if in_string => escaped = !escaped,
433                '{' if !in_string => open_braces += 1,
434                '}' if !in_string => close_braces += 1,
435                _ => escaped = false,
436            }
437
438            if ch != '\\' {
439                escaped = false;
440            }
441        }
442
443        (open_braces, close_braces)
444    }
445
446    /// Add missing closing braces to JSON text
447    pub(crate) fn add_missing_braces(text: &str, missing_count: usize) -> String {
448        let mut repaired = text.to_string();
449        for _ in 0..missing_count {
450            repaired.push('}');
451        }
452
453        log_debug!(
454            original_length = text.len(),
455            repaired_length = repaired.len(),
456            added_braces = missing_count,
457            "Repaired JSON by adding missing closing braces"
458        );
459
460        repaired
461    }
462
463    /// Extract balanced JSON from text, handling nested braces properly
464    pub(crate) fn extract_balanced_json(text: &str) -> Option<(String, usize)> {
465        let trimmed = text.trim_start();
466        if !trimmed.starts_with('{') {
467            return None;
468        }
469
470        let chars: Vec<char> = trimmed.chars().collect();
471        let json_end = Self::find_balanced_json_end(&chars)?;
472
473        let json_chars: String = chars[0..=json_end].iter().collect();
474        let json_byte_len = json_chars.len();
475        let offset = text.len() - trimmed.len(); // Account for leading whitespace
476        Some((json_chars, offset + json_byte_len))
477    }
478
479    /// Find the index where balanced JSON ends
480    fn find_balanced_json_end(chars: &[char]) -> Option<usize> {
481        let mut brace_count = 0;
482        let mut in_string = false;
483        let mut escaped = false;
484
485        for (char_idx, ch) in chars.iter().enumerate() {
486            match ch {
487                '"' if !escaped => in_string = !in_string,
488                '\\' if in_string => escaped = !escaped,
489                '{' if !in_string => brace_count += 1,
490                '}' if !in_string => {
491                    brace_count -= 1;
492                    if brace_count == 0 {
493                        return Some(char_idx);
494                    }
495                }
496                _ => escaped = false,
497            }
498
499            if *ch != '\\' {
500                escaped = false;
501            }
502        }
503
504        None // Unbalanced braces
505    }
506}
507
508/// HTTP client functionality for OpenAI-compatible providers
509pub mod http {
510    use super::*;
511
512    /// Shared HTTP client for OpenAI-compatible providers
513    #[derive(Debug)]
514    pub struct OpenAICompatibleClient {
515        client: reqwest::Client,
516        retry_executor: Mutex<RetryExecutor>,
517    }
518
519    impl Default for OpenAICompatibleClient {
520        fn default() -> Self {
521            Self::new()
522        }
523    }
524
525    impl OpenAICompatibleClient {
526        /// Create a new OpenAI-compatible HTTP client
527        pub fn new() -> Self {
528            Self {
529                client: reqwest::Client::new(),
530                retry_executor: Mutex::new(RetryExecutor::new(RetryPolicy::default())),
531            }
532        }
533
534        /// Create a new OpenAI-compatible HTTP client with custom retry policy
535        pub fn with_retry_policy(retry_policy: RetryPolicy) -> Self {
536            Self {
537                client: reqwest::Client::new(),
538                retry_executor: Mutex::new(RetryExecutor::new(retry_policy)),
539            }
540        }
541
542        /// Execute a chat completion request with retry logic
543        pub async fn execute_chat_request(
544            &self,
545            url: &str,
546            headers: &HeaderMap,
547            request: &OpenAIRequest,
548        ) -> LlmResult<OpenAIResponse> {
549            // log_debug!(
550            //     url = %url,
551            //     model = %request.model,
552            //     message_count = request.messages.len(),
553            //     "Sending OpenAI-compatible request with retry logic"
554            // );
555
556            let mut retry_executor = self.retry_executor.lock().await;
557            retry_executor
558                .execute(|| self.execute_single_request(url, headers, request))
559                .await
560        }
561
562        /// Execute authentication header for OpenAI-compatible APIs
563        pub fn build_auth_headers(api_key: &str) -> LlmResult<HeaderMap> {
564            let mut headers = HeaderMap::new();
565
566            headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
567            headers.insert(
568                AUTHORIZATION,
569                HeaderValue::from_str(&format!("Bearer {api_key}")).map_err(|e| {
570                    LlmError::configuration_error(format!("Invalid API key format: {e}"))
571                })?,
572            );
573
574            Ok(headers)
575        }
576
577        /// Execute a single HTTP request
578        async fn execute_single_request(
579            &self,
580            url: &str,
581            headers: &HeaderMap,
582            request: &OpenAIRequest,
583        ) -> LlmResult<OpenAIResponse> {
584            let response = self
585                .client
586                .post(url)
587                .headers(headers.clone())
588                .json(request)
589                .send()
590                .await
591                .map_err(|e| {
592                    log_error!(
593                        url = %url,
594                        error = %e,
595                        "HTTP request failed"
596                    );
597                    LlmError::request_failed(format!("Request failed: {e}"), Some(Box::new(e)))
598                })?;
599
600            if !response.status().is_success() {
601                return Err(handle_error_response(response).await);
602            }
603
604            parse_success_response(response).await
605        }
606
607        /// Set retry policy for subsequent requests
608        pub async fn set_retry_policy(&self, policy: RetryPolicy) {
609            let mut retry_executor = self.retry_executor.lock().await;
610            *retry_executor = RetryExecutor::new(policy);
611        }
612
613        /// Restore default retry policy
614        pub async fn restore_default_retry_policy(&self, default_policy: &RetryPolicy) {
615            let mut retry_executor = self.retry_executor.lock().await;
616            *retry_executor = RetryExecutor::new(default_policy.clone());
617        }
618    }
619
620    /// Handle non-success HTTP responses
621    async fn handle_error_response(response: reqwest::Response) -> LlmError {
622        let status = response.status();
623        let headers = response.headers().clone();
624        let error_text = response
625            .text()
626            .await
627            .unwrap_or_else(|_| "Unknown error".to_string());
628
629        log_error!(
630            status = %status,
631            error_text = %error_text,
632            "API error response"
633        );
634
635        match status.as_u16() {
636            401 => {
637                // Parse error details for authentication failures
638                if let Ok(error_json) = serde_json::from_str::<serde_json::Value>(&error_text) {
639                    if let Some(error_obj) = error_json.get("error") {
640                        if let Some(code) = error_obj.get("code").and_then(|c| c.as_str()) {
641                            if code.contains("api_key") || code.contains("auth") {
642                                return LlmError::authentication_failed(
643                                    "Invalid API key or authentication failed",
644                                );
645                            }
646                        }
647                    }
648                }
649                LlmError::authentication_failed("Authentication failed")
650            }
651            429 => {
652                let retry_after_seconds = headers
653                    .get("retry-after")
654                    .and_then(|h| h.to_str().ok())
655                    .and_then(|s| s.parse::<u64>().ok())
656                    .unwrap_or(60);
657
658                LlmError::rate_limit_exceeded(retry_after_seconds)
659            }
660            _ => LlmError::request_failed(format!("API error {status}: {error_text}"), None),
661        }
662    }
663
664    /// Parse successful HTTP response into OpenAIResponse
665    async fn parse_success_response(response: reqwest::Response) -> LlmResult<OpenAIResponse> {
666        let raw_body = response.text().await.map_err(|e| {
667            log_error!(
668                error = %e,
669                "Failed to read response body"
670            );
671            LlmError::response_parsing_error(format!("Failed to read response: {e}"))
672        })?;
673
674        serde_json::from_str(&raw_body).map_err(|e| {
675            log_error!(
676                error = %e,
677                raw_body = %raw_body,
678                "Failed to parse response"
679            );
680            LlmError::response_parsing_error(format!("Invalid response: {e}"))
681        })
682    }
683}
684
685/// Convert neutral messages to OpenAI format
686pub fn convert_neutral_messages_to_openai(messages: &[UnifiedMessage]) -> Vec<OpenAIMessage> {
687    messages
688        .iter()
689        .map(|msg| {
690            let role = match msg.role {
691                MessageRole::System => "system",
692                MessageRole::User => "user",
693                MessageRole::Assistant => "assistant",
694                MessageRole::Tool => "tool",
695            };
696
697            match &msg.content {
698                MessageContent::Text(text) => OpenAIMessage {
699                    role: role.to_string(),
700                    content: text.clone(),
701                },
702                MessageContent::Json(json_value) => OpenAIMessage {
703                    role: role.to_string(),
704                    content: serde_json::to_string_pretty(json_value).unwrap_or_default(),
705                },
706                MessageContent::ToolCall {
707                    id: _,
708                    name,
709                    arguments,
710                } => {
711                    // We shouldn't be sending tool calls TO the LLM, tool calls come FROM the LLM
712                    // This is likely an error, but convert to text for compatibility
713                    OpenAIMessage {
714                        role: role.to_string(),
715                        content: format!(
716                            "Tool call: {} with args: {}",
717                            name,
718                            serde_json::to_string(arguments).unwrap_or_default()
719                        ),
720                    }
721                }
722                MessageContent::ToolResult {
723                    tool_call_id: _,
724                    content,
725                    is_error,
726                } => {
727                    let prefix = if *is_error {
728                        "Tool error"
729                    } else {
730                        "Tool result"
731                    };
732                    OpenAIMessage {
733                        role: role.to_string(),
734                        content: format!("{}: {}", prefix, content),
735                    }
736                }
737            }
738        })
739        .collect()
740}
741
742/// Convert neutral tools to OpenAI format
743pub fn convert_neutral_tools_to_openai(tools: &[Tool]) -> Vec<Value> {
744    tools
745        .iter()
746        .map(|tool| {
747            serde_json::json!({
748                "type": "function",
749                "function": {
750                    "name": tool.name,
751                    "description": tool.description,
752                    "parameters": tool.parameters
753                }
754            })
755        })
756        .collect()
757}
758
759/// Apply LLM config to OpenAI request
760pub fn apply_config_to_request(request: &mut OpenAIRequest, config: Option<RequestConfig>) {
761    if let Some(cfg) = config {
762        apply_llm_parameters(request, &cfg);
763        apply_tools_if_user_llm(request, &cfg);
764        apply_tool_choice(request, cfg.tool_choice);
765        apply_response_format(request, cfg.response_format);
766    }
767}
768
769fn apply_llm_parameters(request: &mut OpenAIRequest, cfg: &RequestConfig) {
770    if let Some(temp) = cfg.temperature {
771        request.temperature = Some(temp);
772    }
773    if let Some(max_tokens) = cfg.max_tokens {
774        request.max_tokens = Some(max_tokens);
775    }
776    if let Some(top_p) = cfg.top_p {
777        request.top_p = Some(top_p);
778    }
779    if let Some(presence_penalty) = cfg.presence_penalty {
780        request.presence_penalty = Some(presence_penalty);
781    }
782}
783
784fn apply_tools_if_user_llm(request: &mut OpenAIRequest, cfg: &RequestConfig) {
785    if cfg.tools.is_empty() {
786        return;
787    }
788
789    let is_user_llm = cfg
790        .llm_path
791        .as_ref()
792        .map(|path| path == "user_llm")
793        .unwrap_or(true);
794
795    if is_user_llm {
796        let openai_tools = convert_neutral_tools_to_openai(&cfg.tools);
797        request.tools = Some(openai_tools);
798    }
799}
800
801fn apply_tool_choice(
802    request: &mut OpenAIRequest,
803    tool_choice: Option<crate::provider::ToolChoice>,
804) {
805    if let Some(choice) = tool_choice {
806        request.tool_choice = Some(match choice {
807            ToolChoice::Auto => "auto".to_string(),
808            ToolChoice::None => "none".to_string(),
809            ToolChoice::Required => "required".to_string(),
810            ToolChoice::Specific(tool_name) => tool_name,
811        });
812    }
813}
814
815fn apply_response_format(
816    request: &mut OpenAIRequest,
817    response_format: Option<crate::provider::ResponseFormat>,
818) {
819    if let Some(format) = response_format {
820        request.response_format = Some(OpenAIResponseFormat {
821            format_type: "json_schema".to_string(),
822            json_schema: Some(OpenAIJsonSchema {
823                name: format.name,
824                schema: format.schema,
825                strict: Some(true),
826            }),
827        });
828    }
829}
830
831/// Convert OpenAI tool calls to LLM tool calls
832pub fn convert_tool_calls(openai_calls: &[OpenAIToolCall]) -> Vec<ToolCall> {
833    openai_calls
834        .iter()
835        .map(|call| ToolCall {
836            id: call.id.clone(),
837            name: call.function.name.clone(),
838            arguments: serde_json::from_str(&call.function.arguments)
839                .unwrap_or_else(|_| serde_json::json!({})),
840        })
841        .collect()
842}
843
844/// Fast token estimation for logging and diagnostics
845/// Uses simple chars/4 approximation - sufficient for monitoring and diagnostics.
846/// Actual token usage is tracked from LLM provider responses.
847pub fn estimate_tokens(text: &str) -> u32 {
848    // Simple approximation: ~4 characters per token on average
849    // This is fast and sufficient for logging/monitoring purposes.
850    // Actual token counts come from provider responses.
851    (text.len() / 4) as u32
852}
853
854/// Fast token estimation for message arrays with formatting overhead
855/// Uses simple approximation - sufficient for logging and diagnostics.
856pub fn estimate_message_tokens(messages: &[OpenAIMessage]) -> u32 {
857    // Simple approach: combine all content and add formatting overhead
858    let total_text: String = messages
859        .iter()
860        .map(|m| format!("{}: {}", m.role, m.content))
861        .collect::<Vec<_>>()
862        .join("\n");
863
864    // Add overhead for message formatting (role markers, etc.)
865    estimate_tokens(&total_text) + (messages.len() as u32 * 8)
866}
867
868/// Result of processing tool calls and content cleaning
869#[derive(Debug)]
870pub struct ToolCallProcessingResult {
871    pub tool_calls: Vec<ToolCall>,
872    pub cleaned_content: Option<String>,
873}
874
875/// Handle tool calls from OpenAI response message, including custom formats
876/// This function integrates standard tool call parsing with custom format detection
877pub fn handle_tool_calls(
878    message: &OpenAIResponseMessage,
879) -> crate::error::LlmResult<Vec<ToolCall>> {
880    let result = handle_tool_calls_with_content_cleaning(message)?;
881    Ok(result.tool_calls)
882}
883
884/// Process standard OpenAI tool calls
885fn process_standard_tool_calls(tool_calls: &[OpenAIToolCall]) -> Option<ToolCallProcessingResult> {
886    if tool_calls.is_empty() {
887        return None;
888    }
889
890    Some(ToolCallProcessingResult {
891        tool_calls: convert_tool_calls(tool_calls),
892        cleaned_content: None,
893    })
894}
895
896/// Create tool call from custom format match result
897fn create_custom_tool_call(match_result: CustomToolCallMatch) -> ToolCallProcessingResult {
898    let tool_call = ToolCall {
899        id: format!("custom_{}", uuid::Uuid::new_v4()),
900        name: match_result.function_name,
901        arguments: match_result.arguments,
902    };
903
904    ToolCallProcessingResult {
905        tool_calls: vec![tool_call],
906        cleaned_content: Some(match_result.cleaned_content),
907    }
908}
909
910/// Handle parsing error by attempting content cleaning
911fn handle_parsing_error(
912    content: &str,
913    error: &CustomFormatError,
914) -> Option<ToolCallProcessingResult> {
915    log_warn!(
916        error = ?error,
917        content_preview = content.chars().take(100).collect::<String>(),
918        "Failed to parse custom tool format - attempting content cleaning"
919    );
920
921    let cleaned_content = CustomFormatParser::clean_tool_call_patterns(content);
922    if cleaned_content == content {
923        return None;
924    }
925
926    log_debug!(
927        original_length = content.len(),
928        cleaned_length = cleaned_content.len(),
929        "Cleaned tool call patterns from failed parse"
930    );
931
932    let final_content = if cleaned_content.trim().is_empty() {
933        "I attempted to process your request, but encountered a formatting issue. Please try rephrasing your request.".to_string()
934    } else {
935        cleaned_content
936    };
937
938    Some(ToolCallProcessingResult {
939        tool_calls: vec![],
940        cleaned_content: Some(final_content),
941    })
942}
943
944/// Try to parse custom format from content
945fn try_parse_custom_format(
946    content: &str,
947) -> Result<Option<ToolCallProcessingResult>, CustomFormatError> {
948    let parser = CustomFormatParser::new();
949
950    match parser.parse(content)? {
951        Some(match_result) => Ok(Some(create_custom_tool_call(match_result))),
952        None => Ok(None),
953    }
954}
955
956/// Handle tool calls and return both tool calls and cleaned content
957/// This function integrates standard tool call parsing with custom format detection
958/// and provides cleaned content when custom formats are detected
959pub fn handle_tool_calls_with_content_cleaning(
960    message: &OpenAIResponseMessage,
961) -> crate::error::LlmResult<ToolCallProcessingResult> {
962    // Check for standard tool calls first
963    if let Some(result) = check_standard_tool_calls(message) {
964        return Ok(result);
965    }
966
967    // Try custom format parsing if content is present
968    if let Some(result) = try_custom_format_parsing(&message.content)? {
969        return Ok(result);
970    }
971
972    Ok(ToolCallProcessingResult {
973        tool_calls: vec![],
974        cleaned_content: None,
975    })
976}
977
978/// Check for standard OpenAI tool calls
979fn check_standard_tool_calls(message: &OpenAIResponseMessage) -> Option<ToolCallProcessingResult> {
980    let tool_calls = message.tool_calls.as_ref()?;
981    process_standard_tool_calls(tool_calls)
982}
983
984/// Try parsing custom tool call formats
985fn try_custom_format_parsing(
986    content: &str,
987) -> crate::error::LlmResult<Option<ToolCallProcessingResult>> {
988    if content.is_empty() {
989        return Ok(None);
990    }
991
992    match try_parse_custom_format(content) {
993        Ok(Some(result)) => Ok(Some(result)),
994        Ok(None) => Ok(None), // No custom format found - normal case
995        Err(e) => Ok(handle_parsing_error(content, &e)),
996    }
997}
multi_llm/providers/openai_shared/utils.rs

multi_llm/providers/openai_shared/
utils.rs