Skip to main content

tool_parser/parsers/
json.rs

1use async_trait::async_trait;
2use openai_protocol::common::Tool;
3use serde_json::Value;
4
5use crate::{
6    errors::{ParserError, ParserResult},
7    parsers::helpers,
8    partial_json::PartialJson,
9    traits::ToolParser,
10    types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
11};
12
13/// JSON format parser for tool calls
14///
15/// Handles pure JSON formats for function calling:
16/// - Single tool call: {"name": "fn", "arguments": {...}}
17/// - Multiple tool calls: [{"name": "fn1", "arguments": {...}}, ...]
18/// - With parameters instead of arguments: {"name": "fn", "parameters": {...}}
19pub struct JsonParser {
20    /// Parser for handling incomplete JSON during streaming
21    partial_json: PartialJson,
22
23    /// Buffer for accumulating incomplete patterns across chunks
24    buffer: String,
25
26    /// Stores complete tool call info (name and arguments) for each tool being parsed
27    prev_tool_call_arr: Vec<Value>,
28
29    /// Index of currently streaming tool call (-1 means no active tool)
30    current_tool_id: i32,
31
32    /// Flag for whether current tool's name has been sent to client
33    current_tool_name_sent: bool,
34
35    /// Tracks raw JSON string content streamed to client for each tool's arguments
36    streamed_args_for_tool: Vec<String>,
37
38    /// Separator between multiple tool calls
39    tool_call_separator: &'static str,
40
41    /// Track whether we're parsing array format `[...]` vs single object `{...}`
42    is_array_format: bool,
43
44    /// Track whether we've already stripped the closing ] bracket (for array format)
45    array_closed: bool,
46}
47
48impl JsonParser {
49    /// Create a new JSON parser
50    pub fn new() -> Self {
51        Self {
52            partial_json: PartialJson::default(),
53            buffer: String::new(),
54            prev_tool_call_arr: Vec::new(),
55            current_tool_id: -1,
56            current_tool_name_sent: false,
57            streamed_args_for_tool: Vec::new(),
58            tool_call_separator: ",",
59            is_array_format: false,
60            array_closed: false,
61        }
62    }
63
64    /// Try to extract a first valid JSON object or array from text that may contain other content
65    /// Returns (json_string, normal_text) where normal_text is text before and after the JSON
66    fn extract_json_from_text(text: &str) -> Option<(String, String)> {
67        let mut in_string = false;
68        let mut escape = false;
69        let mut stack: Vec<char> = Vec::with_capacity(8);
70        let mut start: Option<usize> = None;
71
72        for (i, ch) in text.char_indices() {
73            if escape {
74                escape = false;
75                continue;
76            }
77
78            match ch {
79                '\\' if in_string => escape = true,
80                '"' => in_string = !in_string,
81                _ if in_string => {}
82                '{' | '[' => {
83                    if start.is_none() {
84                        start = Some(i);
85                    }
86                    stack.push(ch);
87                }
88                '}' | ']' => {
89                    let Some(open) = stack.pop() else {
90                        // Stray closer - reset and continue looking for next valid JSON
91                        start = None;
92                        continue;
93                    };
94
95                    let valid = (open == '{' && ch == '}') || (open == '[' && ch == ']');
96                    if !valid {
97                        // Mismatch - reset and continue looking
98                        start = None;
99                        stack.clear();
100                        continue;
101                    }
102
103                    if stack.is_empty() {
104                        // Safe: start is always set when stack was pushed to (opening bracket sets it)
105                        let Some(s) = start else {
106                            continue;
107                        };
108                        let e = i + ch.len_utf8();
109                        let potential_json = &text[s..e];
110
111                        // Validate that this is actually valid JSON before returning
112                        if serde_json::from_str::<Value>(potential_json).is_ok() {
113                            let json = potential_json.to_string();
114                            let normal = format!("{}{}", &text[..s], &text[e..]);
115                            return Some((json, normal));
116                        } else {
117                            // Not valid JSON, reset and continue looking
118                            start = None;
119                            continue;
120                        }
121                    }
122                }
123                _ => {}
124            }
125        }
126        None
127    }
128
129    /// Parse a single JSON object into a ToolCall
130    fn parse_single_object(obj: &Value) -> ParserResult<Option<ToolCall>> {
131        // Check if this looks like a tool call
132        let name = obj
133            .get("name")
134            .or_else(|| obj.get("function"))
135            .and_then(|v| v.as_str());
136
137        if let Some(name) = name {
138            // Get arguments - support both "arguments" and "parameters" keys
139            let empty_obj = Value::Object(serde_json::Map::new());
140            let args = obj
141                .get("arguments")
142                .or_else(|| obj.get("parameters"))
143                .unwrap_or(&empty_obj);
144
145            // Convert arguments to JSON string
146            let arguments = serde_json::to_string(args)
147                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;
148
149            Ok(Some(ToolCall {
150                function: FunctionCall {
151                    name: name.to_string(),
152                    arguments,
153                },
154            }))
155        } else {
156            Ok(None)
157        }
158    }
159
160    /// Parse JSON value(s) into tool calls
161    fn parse_json_value(value: &Value) -> ParserResult<Vec<ToolCall>> {
162        let mut tools = Vec::new();
163
164        match value {
165            Value::Array(arr) => {
166                // Parse each element in the array
167                for item in arr {
168                    if let Some(tool) = Self::parse_single_object(item)? {
169                        tools.push(tool);
170                    }
171                }
172            }
173            Value::Object(_) => {
174                // Single tool call
175                if let Some(tool) = Self::parse_single_object(value)? {
176                    tools.push(tool);
177                }
178            }
179            _ => {
180                // Not a valid tool call format
181                return Ok(vec![]);
182            }
183        }
184
185        Ok(tools)
186    }
187}
188
189impl Default for JsonParser {
190    fn default() -> Self {
191        Self::new()
192    }
193}
194
195#[async_trait]
196impl ToolParser for JsonParser {
197    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
198        // Always use extract_json_from_text to handle both pure JSON and mixed content
199        if let Some((extracted_json, normal_text)) = Self::extract_json_from_text(text) {
200            let parsed = serde_json::from_str::<Value>(&extracted_json)
201                .map_err(|e| ParserError::ParsingFailed(e.to_string()))
202                .and_then(|v| Self::parse_json_value(&v));
203
204            match parsed {
205                Ok(tools) => return Ok((normal_text, tools)),
206                Err(e) => tracing::debug!("parse_complete failed: {:?}", e),
207            }
208        }
209
210        // No valid JSON found, return original text as normal text
211        Ok((text.to_string(), vec![]))
212    }
213
214    async fn parse_incremental(
215        &mut self,
216        chunk: &str,
217        tools: &[Tool],
218    ) -> ParserResult<StreamingParseResult> {
219        // Append new text to buffer
220        self.buffer.push_str(chunk);
221        let current_text = &self.buffer.clone();
222
223        // Determine format on first parse (array vs single object)
224        if self.current_tool_id == -1 && self.has_tool_markers(current_text) {
225            self.is_array_format = current_text.trim().starts_with('[');
226        }
227
228        // Check if current_text has tool_call
229        // Once array is closed, don't treat [ or { as tool markers
230        let has_tool_start = (!self.array_closed && self.has_tool_markers(current_text))
231            || (self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator));
232
233        if !has_tool_start {
234            let mut normal_text = self.buffer.clone();
235            self.buffer.clear();
236
237            // Strip ] only once (the closing bracket of JSON array format)
238            // Only for array format and only if we haven't already closed it
239            if self.is_array_format
240                && !self.array_closed
241                && self.current_tool_id > 0
242                && normal_text.starts_with("]")
243            {
244                if let Some(stripped) = normal_text.strip_prefix(']') {
245                    normal_text = stripped.to_string();
246                }
247                self.array_closed = true;
248            }
249
250            return Ok(StreamingParseResult {
251                normal_text,
252                calls: vec![],
253            });
254        }
255
256        // Build tool indices
257        let tool_indices = helpers::get_tool_indices(tools);
258
259        // Determine start index for JSON parsing
260        // JSON can start with [ (array) or { (single object)
261        let start_idx = if let Some(bracket_pos) = current_text.find('[') {
262            let brace_pos = current_text.find('{');
263            match brace_pos {
264                Some(bp) => bp,
265                _ => bracket_pos,
266            }
267        } else if let Some(brace_pos) = current_text.find('{') {
268            brace_pos
269        } else if self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator) {
270            self.tool_call_separator.len()
271        } else {
272            0
273        };
274
275        helpers::handle_json_tool_streaming(
276            current_text,
277            start_idx,
278            &mut self.partial_json,
279            &tool_indices,
280            &mut self.buffer,
281            &mut self.current_tool_id,
282            &mut self.current_tool_name_sent,
283            &mut self.streamed_args_for_tool,
284            &mut self.prev_tool_call_arr,
285        )
286    }
287
288    fn has_tool_markers(&self, text: &str) -> bool {
289        let trimmed = text.trim();
290        trimmed.starts_with('[') || trimmed.starts_with('{')
291    }
292
293    fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
294        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
295    }
296
297    fn reset(&mut self) {
298        helpers::reset_parser_state(
299            &mut self.buffer,
300            &mut self.prev_tool_call_arr,
301            &mut self.current_tool_id,
302            &mut self.current_tool_name_sent,
303            &mut self.streamed_args_for_tool,
304        );
305        self.is_array_format = false;
306        self.array_closed = false;
307    }
308}