tool_parser/parsers/
json.rs

1use async_trait::async_trait;
2use openai_protocol::common::Tool;
3use serde_json::Value;
4
5use crate::{
6    errors::{ParserError, ParserResult},
7    parsers::helpers,
8    partial_json::PartialJson,
9    traits::ToolParser,
10    types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
11};
12
13/// JSON format parser for tool calls
14///
15/// Handles pure JSON formats for function calling:
16/// - Single tool call: {"name": "fn", "arguments": {...}}
17/// - Multiple tool calls: [{"name": "fn1", "arguments": {...}}, ...]
18/// - With parameters instead of arguments: {"name": "fn", "parameters": {...}}
19pub struct JsonParser {
20    /// Parser for handling incomplete JSON during streaming
21    partial_json: PartialJson,
22
23    /// Buffer for accumulating incomplete patterns across chunks
24    buffer: String,
25
26    /// Stores complete tool call info (name and arguments) for each tool being parsed
27    prev_tool_call_arr: Vec<Value>,
28
29    /// Index of currently streaming tool call (-1 means no active tool)
30    current_tool_id: i32,
31
32    /// Flag for whether current tool's name has been sent to client
33    current_tool_name_sent: bool,
34
35    /// Tracks raw JSON string content streamed to client for each tool's arguments
36    streamed_args_for_tool: Vec<String>,
37
38    /// Separator between multiple tool calls
39    tool_call_separator: &'static str,
40
41    /// Track whether we're parsing array format `[...]` vs single object `{...}`
42    is_array_format: bool,
43
44    /// Track whether we've already stripped the closing ] bracket (for array format)
45    array_closed: bool,
46}
47
48impl JsonParser {
49    /// Create a new JSON parser
50    pub fn new() -> Self {
51        Self {
52            partial_json: PartialJson::default(),
53            buffer: String::new(),
54            prev_tool_call_arr: Vec::new(),
55            current_tool_id: -1,
56            current_tool_name_sent: false,
57            streamed_args_for_tool: Vec::new(),
58            tool_call_separator: ",",
59            is_array_format: false,
60            array_closed: false,
61        }
62    }
63
64    /// Try to extract a first valid JSON object or array from text that may contain other content
65    /// Returns (json_string, normal_text) where normal_text is text before and after the JSON
66    fn extract_json_from_text(&self, text: &str) -> Option<(String, String)> {
67        let mut in_string = false;
68        let mut escape = false;
69        let mut stack: Vec<char> = Vec::with_capacity(8);
70        let mut start: Option<usize> = None;
71
72        for (i, ch) in text.char_indices() {
73            if escape {
74                escape = false;
75                continue;
76            }
77
78            match ch {
79                '\\' if in_string => escape = true,
80                '"' => in_string = !in_string,
81                _ if in_string => {}
82                '{' | '[' => {
83                    if start.is_none() {
84                        start = Some(i);
85                    }
86                    stack.push(ch);
87                }
88                '}' | ']' => {
89                    let Some(open) = stack.pop() else {
90                        // Stray closer - reset and continue looking for next valid JSON
91                        start = None;
92                        continue;
93                    };
94
95                    let valid = (open == '{' && ch == '}') || (open == '[' && ch == ']');
96                    if !valid {
97                        // Mismatch - reset and continue looking
98                        start = None;
99                        stack.clear();
100                        continue;
101                    }
102
103                    if stack.is_empty() {
104                        let s = start.unwrap();
105                        let e = i + ch.len_utf8();
106                        let potential_json = &text[s..e];
107
108                        // Validate that this is actually valid JSON before returning
109                        if serde_json::from_str::<Value>(potential_json).is_ok() {
110                            let json = potential_json.to_string();
111                            let normal = format!("{}{}", &text[..s], &text[e..]);
112                            return Some((json, normal));
113                        } else {
114                            // Not valid JSON, reset and continue looking
115                            start = None;
116                            continue;
117                        }
118                    }
119                }
120                _ => {}
121            }
122        }
123        None
124    }
125
126    /// Parse a single JSON object into a ToolCall
127    fn parse_single_object(&self, obj: &Value) -> ParserResult<Option<ToolCall>> {
128        // Check if this looks like a tool call
129        let name = obj
130            .get("name")
131            .or_else(|| obj.get("function"))
132            .and_then(|v| v.as_str());
133
134        if let Some(name) = name {
135            // Get arguments - support both "arguments" and "parameters" keys
136            let empty_obj = Value::Object(serde_json::Map::new());
137            let args = obj
138                .get("arguments")
139                .or_else(|| obj.get("parameters"))
140                .unwrap_or(&empty_obj);
141
142            // Convert arguments to JSON string
143            let arguments = serde_json::to_string(args)
144                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;
145
146            Ok(Some(ToolCall {
147                function: FunctionCall {
148                    name: name.to_string(),
149                    arguments,
150                },
151            }))
152        } else {
153            Ok(None)
154        }
155    }
156
157    /// Parse JSON value(s) into tool calls
158    fn parse_json_value(&self, value: &Value) -> ParserResult<Vec<ToolCall>> {
159        let mut tools = Vec::new();
160
161        match value {
162            Value::Array(arr) => {
163                // Parse each element in the array
164                for item in arr {
165                    if let Some(tool) = self.parse_single_object(item)? {
166                        tools.push(tool);
167                    }
168                }
169            }
170            Value::Object(_) => {
171                // Single tool call
172                if let Some(tool) = self.parse_single_object(value)? {
173                    tools.push(tool);
174                }
175            }
176            _ => {
177                // Not a valid tool call format
178                return Ok(vec![]);
179            }
180        }
181
182        Ok(tools)
183    }
184}
185
186impl Default for JsonParser {
187    fn default() -> Self {
188        Self::new()
189    }
190}
191
192#[async_trait]
193impl ToolParser for JsonParser {
194    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
195        // Always use extract_json_from_text to handle both pure JSON and mixed content
196        if let Some((extracted_json, normal_text)) = self.extract_json_from_text(text) {
197            let parsed = serde_json::from_str::<Value>(&extracted_json)
198                .map_err(|e| ParserError::ParsingFailed(e.to_string()))
199                .and_then(|v| self.parse_json_value(&v));
200
201            match parsed {
202                Ok(tools) => return Ok((normal_text, tools)),
203                Err(e) => tracing::debug!("parse_complete failed: {:?}", e),
204            }
205        }
206
207        // No valid JSON found, return original text as normal text
208        Ok((text.to_string(), vec![]))
209    }
210
211    async fn parse_incremental(
212        &mut self,
213        chunk: &str,
214        tools: &[Tool],
215    ) -> ParserResult<StreamingParseResult> {
216        // Append new text to buffer
217        self.buffer.push_str(chunk);
218        let current_text = &self.buffer.clone();
219
220        // Determine format on first parse (array vs single object)
221        if self.current_tool_id == -1 && self.has_tool_markers(current_text) {
222            self.is_array_format = current_text.trim().starts_with('[');
223        }
224
225        // Check if current_text has tool_call
226        // Once array is closed, don't treat [ or { as tool markers
227        let has_tool_start = (!self.array_closed && self.has_tool_markers(current_text))
228            || (self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator));
229
230        if !has_tool_start {
231            let mut normal_text = self.buffer.clone();
232            self.buffer.clear();
233
234            // Strip ] only once (the closing bracket of JSON array format)
235            // Only for array format and only if we haven't already closed it
236            if self.is_array_format
237                && !self.array_closed
238                && self.current_tool_id > 0
239                && normal_text.starts_with("]")
240            {
241                normal_text = normal_text.strip_prefix("]").unwrap().to_string();
242                self.array_closed = true;
243            }
244
245            return Ok(StreamingParseResult {
246                normal_text,
247                calls: vec![],
248            });
249        }
250
251        // Build tool indices
252        let tool_indices = helpers::get_tool_indices(tools);
253
254        // Determine start index for JSON parsing
255        // JSON can start with [ (array) or { (single object)
256        let start_idx = if let Some(bracket_pos) = current_text.find('[') {
257            let brace_pos = current_text.find('{');
258            match brace_pos {
259                Some(bp) => bp,
260                _ => bracket_pos,
261            }
262        } else if let Some(brace_pos) = current_text.find('{') {
263            brace_pos
264        } else if self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator) {
265            self.tool_call_separator.len()
266        } else {
267            0
268        };
269
270        helpers::handle_json_tool_streaming(
271            current_text,
272            start_idx,
273            &mut self.partial_json,
274            &tool_indices,
275            &mut self.buffer,
276            &mut self.current_tool_id,
277            &mut self.current_tool_name_sent,
278            &mut self.streamed_args_for_tool,
279            &mut self.prev_tool_call_arr,
280        )
281    }
282
283    fn has_tool_markers(&self, text: &str) -> bool {
284        let trimmed = text.trim();
285        trimmed.starts_with('[') || trimmed.starts_with('{')
286    }
287
288    fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
289        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
290    }
291
292    fn reset(&mut self) {
293        helpers::reset_parser_state(
294            &mut self.buffer,
295            &mut self.prev_tool_call_arr,
296            &mut self.current_tool_id,
297            &mut self.current_tool_name_sent,
298            &mut self.streamed_args_for_tool,
299        );
300        self.is_array_format = false;
301        self.array_closed = false;
302    }
303}