rig_compose/
normalizer.rs

1//! [`ToolCallNormalizer`] — converts raw LLM text output into structured
2//! [`ToolInvocation`]s.
3//!
4//! Models served via OpenAI-compatible APIs (e.g. `mlx_lm.server`) sometimes
5//! emit tool-intent as in-band text markers rather than the structured
6//! `tool_calls` JSON field. Normalizers detect and decode those markers so the
7//! kernel can dispatch them identically to first-class tool calls.
8//!
9//! # Built-in implementations
10//!
11//! | Type | Format |
12//! |------|--------|
13//! | [`LfmNormalizer`] | LiquidAI LFM `<\|tool_call_start\|>[fn(k=v)]<\|tool_call_end\|>` |
14//! | [`StructuredToolCallNormalizer`] | OpenAI Responses `function_call` output and Chat Completions `tool_calls` |
15//!
16//! # Example
17//!
18//! ```no_run
19//! use rig_compose::normalizer::{LfmNormalizer, ToolCallNormalizer};
20//!
21//! let raw = "<|tool_call_start|>[get_weather(city='Berlin')]<|tool_call_end|>";
22//! let normalizer = LfmNormalizer;
23//! let calls = normalizer.normalize(raw).expect("parse ok");
24//! assert_eq!(calls[0].name, "get_weather");
25//! ```
26
27use async_trait::async_trait;
28use serde_json::{Map, Value};
29
30use crate::registry::KernelError;
31use crate::registry::ToolRegistry;
32use crate::tool::{ToolName, ToolResultEnvelope, ToolResultEnvelopeConfig};
33use crate::trace::{DispatchTrace, DispatchTraceEvent, TracedAction, TracedOutcome};
34
35// ── Public types ─────────────────────────────────────────────────────────────
36
37/// A structured tool invocation extracted from raw model output.
38#[derive(Debug, Clone, PartialEq)]
39pub struct ToolInvocation {
40    /// Registry name of the tool to invoke (e.g. `"get_weather"`).
41    pub name: ToolName,
42    /// JSON object of arguments to pass to the tool.
43    pub args: Value,
44}
45
46impl ToolInvocation {
47    /// Build a validated [`ToolInvocation`] from a tool name and JSON args.
48    pub fn new(name: impl Into<ToolName>, args: Value) -> Result<Self, KernelError> {
49        let name = name.into();
50        if name.trim().is_empty() {
51            return Err(KernelError::NormalizerFailed(
52                "empty tool name in structured tool call".into(),
53            ));
54        }
55        validate_identifier("tool name", &name)?;
56        Ok(Self { name, args })
57    }
58
59    /// Dispatch this invocation through a [`ToolRegistry`].
60    pub async fn dispatch(&self, tools: &ToolRegistry) -> Result<Value, KernelError> {
61        tools.invoke(&self.name, self.args.clone()).await
62    }
63}
64
65/// Result of dispatching one normalized [`ToolInvocation`].
66#[derive(Debug, Clone, PartialEq)]
67pub struct ToolInvocationResult {
68    /// The normalized invocation that was dispatched.
69    pub invocation: ToolInvocation,
70    /// The JSON result returned by the invoked tool.
71    pub output: Value,
72}
73
74/// Bounded result of dispatching one normalized [`ToolInvocation`].
75///
76/// This is the model-visible companion to [`ToolInvocationResult`]: dispatch
77/// still runs through the same registry path, but the returned payload is
78/// wrapped in a [`ToolResultEnvelope`] so callers can see truncation metadata
79/// and continuation tokens before placing tool output into a prompt.
80#[derive(Debug, Clone, PartialEq)]
81pub struct BoundedToolInvocationResult {
82    /// The normalized invocation that was dispatched.
83    pub invocation: ToolInvocation,
84    /// Bounded tool output plus deterministic truncation metadata.
85    pub envelope: ToolResultEnvelope,
86}
87
88/// Decision returned by a [`ToolDispatchHook`] before a tool invocation runs.
89#[derive(Debug, Clone, PartialEq)]
90pub enum ToolDispatchAction {
91    /// Invoke the tool normally.
92    Continue,
93    /// Do not invoke the tool; record `output` as the invocation result.
94    Skip {
95        /// Synthetic output to record as the invocation result.
96        output: Value,
97        /// Optional human-readable reason the tool body was not invoked.
98        reason: Option<String>,
99    },
100    /// Stop dispatching and return [`KernelError::ToolDispatchTerminated`].
101    Terminate { reason: String },
102}
103
104/// Outcome recorded for one normalized [`ToolInvocation`].
105#[derive(Debug, Clone, PartialEq)]
106pub enum ToolInvocationOutcome {
107    /// The tool body ran and produced the result.
108    Completed,
109    /// A hook supplied a synthetic skip result instead of invoking the tool.
110    Skipped {
111        /// Optional human-readable reason the tool body was not invoked.
112        reason: Option<String>,
113    },
114}
115
116/// Hook for policy, accounting, and tracing around normalized tool dispatch.
117///
118/// Hooks are intentionally provider-neutral: they see only the normalized
119/// [`ToolInvocation`] and the resulting [`ToolInvocationResult`]. Concrete
120/// policy engines, approval systems, and trace exporters should live in
121/// downstream crates and plug into this small kernel surface.
122#[async_trait]
123pub trait ToolDispatchHook: Send + Sync {
124    /// Called before each invocation. Return [`ToolDispatchAction::Continue`]
125    /// to invoke the tool, [`ToolDispatchAction::Skip`] to synthesize a result,
126    /// or [`ToolDispatchAction::Terminate`] to stop the dispatch loop.
127    async fn before_invocation(
128        &self,
129        _invocation: &ToolInvocation,
130    ) -> Result<ToolDispatchAction, KernelError> {
131        Ok(ToolDispatchAction::Continue)
132    }
133
134    /// Called after a tool invocation or hook-provided skip result is recorded.
135    async fn after_invocation(&self, _result: &ToolInvocationResult) -> Result<(), KernelError> {
136        Ok(())
137    }
138
139    /// Called after a dispatch result is recorded, including whether it came
140    /// from real tool execution or a hook-provided skip.
141    ///
142    /// The default implementation preserves compatibility for hooks that only
143    /// care about the result payload.
144    async fn after_invocation_with_outcome(
145        &self,
146        result: &ToolInvocationResult,
147        _outcome: &ToolInvocationOutcome,
148    ) -> Result<(), KernelError> {
149        self.after_invocation(result).await
150    }
151
152    /// Called when dispatch stops after this hook may have observed the
153    /// invocation in [`Self::before_invocation`]. Hooks that reserve resources
154    /// before dispatch should release them here.
155    async fn on_invocation_error(
156        &self,
157        _invocation: &ToolInvocation,
158        _error: &KernelError,
159    ) -> Result<(), KernelError> {
160        Ok(())
161    }
162}
163
164/// Dispatch normalized tool invocations sequentially through a [`ToolRegistry`].
165///
166/// Sequential dispatch preserves model-emitted call order and avoids adding a
167/// runtime-specific concurrency policy to the kernel. Callers that know their
168/// tools are independent can still dispatch invocations concurrently by using
169/// [`ToolInvocation::dispatch`] directly.
170pub async fn dispatch_tool_invocations(
171    tools: &ToolRegistry,
172    invocations: &[ToolInvocation],
173) -> Result<Vec<ToolInvocationResult>, KernelError> {
174    dispatch_tool_invocations_with_hooks(tools, invocations, &[]).await
175}
176
177/// Dispatch normalized tool invocations with policy/accounting hooks.
178///
179/// Hooks run in the order provided. A skip result still triggers every hook's
180/// [`ToolDispatchHook::after_invocation`] callback so audit hooks can record
181/// synthetic outcomes. A terminate action stops dispatch before the tool is
182/// invoked and returns [`KernelError::ToolDispatchTerminated`].
183pub async fn dispatch_tool_invocations_with_hooks(
184    tools: &ToolRegistry,
185    invocations: &[ToolInvocation],
186    hooks: &[&dyn ToolDispatchHook],
187) -> Result<Vec<ToolInvocationResult>, KernelError> {
188    dispatch_inner(tools, invocations, hooks, None).await
189}
190
191/// Dispatch normalized tool invocations and bound every result with `config`.
192///
193/// Existing dispatch helpers intentionally return raw tool output so hosts can
194/// decide where and how to preserve full results. This helper is for the
195/// prompt/model boundary: it applies [`ToolResultEnvelope`] after successful
196/// dispatch and returns the bounded, replayable result records.
197pub async fn dispatch_tool_invocations_bounded(
198    tools: &ToolRegistry,
199    invocations: &[ToolInvocation],
200    config: &ToolResultEnvelopeConfig,
201) -> Result<Vec<BoundedToolInvocationResult>, KernelError> {
202    dispatch_tool_invocations_with_hooks_bounded(tools, invocations, &[], config).await
203}
204
205/// Dispatch normalized tool invocations with hooks and bound every result.
206///
207/// Hooks observe the raw [`ToolInvocationResult`] before bounding so accounting
208/// and audit layers can decide whether they need full output. The returned
209/// value is the bounded, model-visible projection.
210pub async fn dispatch_tool_invocations_with_hooks_bounded(
211    tools: &ToolRegistry,
212    invocations: &[ToolInvocation],
213    hooks: &[&dyn ToolDispatchHook],
214    config: &ToolResultEnvelopeConfig,
215) -> Result<Vec<BoundedToolInvocationResult>, KernelError> {
216    let results = dispatch_tool_invocations_with_hooks(tools, invocations, hooks).await?;
217    Ok(bound_invocation_results(results, config))
218}
219
220/// Dispatch normalized tool invocations and record a [`DispatchTrace`].
221///
222/// Behaves identically to [`dispatch_tool_invocations_with_hooks`], but appends
223/// a [`DispatchTraceEvent`] for every hook decision, hook error, reservation
224/// cleanup, hook-after invocation, and final per-invocation outcome. Use this
225/// when a host needs a deterministic, replayable record of policy decisions
226/// without depending on a concrete tracing backend.
227pub async fn dispatch_tool_invocations_with_trace(
228    tools: &ToolRegistry,
229    invocations: &[ToolInvocation],
230    hooks: &[&dyn ToolDispatchHook],
231    trace: &DispatchTrace,
232) -> Result<Vec<ToolInvocationResult>, KernelError> {
233    dispatch_inner(tools, invocations, hooks, Some(trace)).await
234}
235
236fn bound_invocation_results(
237    results: Vec<ToolInvocationResult>,
238    config: &ToolResultEnvelopeConfig,
239) -> Vec<BoundedToolInvocationResult> {
240    results
241        .into_iter()
242        .map(|result| BoundedToolInvocationResult {
243            invocation: result.invocation,
244            envelope: ToolResultEnvelope::bound(result.output, config),
245        })
246        .collect()
247}
248
249async fn dispatch_inner(
250    tools: &ToolRegistry,
251    invocations: &[ToolInvocation],
252    hooks: &[&dyn ToolDispatchHook],
253    trace: Option<&DispatchTrace>,
254) -> Result<Vec<ToolInvocationResult>, KernelError> {
255    let mut results = Vec::with_capacity(invocations.len());
256
257    for (invocation_index, invocation) in invocations.iter().enumerate() {
258        let mut action = ToolDispatchAction::Continue;
259        // Track how many hooks observed `before_invocation` so that, on a
260        // hook error, we can notify exactly those hooks via
261        // `on_invocation_error`. Without this, a hook that reserved a
262        // resource in `before_invocation` (e.g. `DispatchBudgetHook`)
263        // would leak that reservation when a later hook errors.
264        let mut observed: usize = 0;
265        let mut before_err: Option<(usize, KernelError)> = None;
266        for (hook_index, hook) in hooks.iter().enumerate() {
267            match hook.before_invocation(invocation).await {
268                Ok(next) => {
269                    observed += 1;
270                    if let Some(trace) = trace {
271                        trace.push(DispatchTraceEvent::HookBefore {
272                            invocation_index,
273                            hook_index,
274                            decision: TracedAction::from(&next),
275                        });
276                    }
277                    action = next;
278                    if !matches!(action, ToolDispatchAction::Continue) {
279                        break;
280                    }
281                }
282                Err(error) => {
283                    before_err = Some((hook_index, error));
284                    break;
285                }
286            }
287        }
288        if let Some((hook_index, error)) = before_err {
289            if let Some(trace) = trace {
290                trace.push(DispatchTraceEvent::HookBeforeError {
291                    invocation_index,
292                    hook_index,
293                    message: error.to_string(),
294                });
295            }
296            notify_invocation_error_subset(
297                hooks,
298                observed,
299                invocation,
300                &error,
301                trace,
302                invocation_index,
303            )
304            .await?;
305            if let Some(trace) = trace {
306                trace.push(DispatchTraceEvent::InvocationOutcome {
307                    invocation_index,
308                    outcome: TracedOutcome::Failed {
309                        message: error.to_string(),
310                    },
311                });
312            }
313            return Err(error);
314        }
315
316        let (output, outcome) = match action {
317            ToolDispatchAction::Continue => match invocation.dispatch(tools).await {
318                Ok(output) => (output, ToolInvocationOutcome::Completed),
319                Err(error) => {
320                    notify_invocation_error(hooks, invocation, &error, trace, invocation_index)
321                        .await?;
322                    if let Some(trace) = trace {
323                        trace.push(DispatchTraceEvent::InvocationOutcome {
324                            invocation_index,
325                            outcome: TracedOutcome::Failed {
326                                message: error.to_string(),
327                            },
328                        });
329                    }
330                    return Err(error);
331                }
332            },
333            ToolDispatchAction::Skip { output, reason } => {
334                (output, ToolInvocationOutcome::Skipped { reason })
335            }
336            ToolDispatchAction::Terminate { reason } => {
337                let error = KernelError::ToolDispatchTerminated(reason.clone());
338                notify_invocation_error(hooks, invocation, &error, trace, invocation_index).await?;
339                if let Some(trace) = trace {
340                    trace.push(DispatchTraceEvent::InvocationOutcome {
341                        invocation_index,
342                        outcome: TracedOutcome::Terminated { reason },
343                    });
344                }
345                return Err(error);
346            }
347        };
348
349        let result = ToolInvocationResult {
350            invocation: invocation.clone(),
351            output,
352        };
353
354        for (hook_index, hook) in hooks.iter().enumerate() {
355            hook.after_invocation_with_outcome(&result, &outcome)
356                .await?;
357            if let Some(trace) = trace {
358                trace.push(DispatchTraceEvent::HookAfter {
359                    invocation_index,
360                    hook_index,
361                });
362            }
363        }
364
365        if let Some(trace) = trace {
366            let outcome_event = match &outcome {
367                ToolInvocationOutcome::Completed => TracedOutcome::Completed,
368                ToolInvocationOutcome::Skipped { reason } => TracedOutcome::Skipped {
369                    reason: reason.clone(),
370                },
371            };
372            trace.push(DispatchTraceEvent::InvocationOutcome {
373                invocation_index,
374                outcome: outcome_event,
375            });
376        }
377
378        results.push(result);
379    }
380
381    Ok(results)
382}
383
384async fn notify_invocation_error(
385    hooks: &[&dyn ToolDispatchHook],
386    invocation: &ToolInvocation,
387    error: &KernelError,
388    trace: Option<&DispatchTrace>,
389    invocation_index: usize,
390) -> Result<(), KernelError> {
391    for (hook_index, hook) in hooks.iter().enumerate() {
392        hook.on_invocation_error(invocation, error).await?;
393        if let Some(trace) = trace {
394            trace.push(DispatchTraceEvent::HookCleanup {
395                invocation_index,
396                hook_index,
397            });
398        }
399    }
400    Ok(())
401}
402
403/// Notify the first `upto` hooks that observed `before_invocation` so they
404/// can release any resources reserved there. Used when a later hook's
405/// `before_invocation` returns an error and we must unwind partial state.
406async fn notify_invocation_error_subset(
407    hooks: &[&dyn ToolDispatchHook],
408    upto: usize,
409    invocation: &ToolInvocation,
410    error: &KernelError,
411    trace: Option<&DispatchTrace>,
412    invocation_index: usize,
413) -> Result<(), KernelError> {
414    for (hook_index, hook) in hooks.iter().take(upto).enumerate() {
415        hook.on_invocation_error(invocation, error).await?;
416        if let Some(trace) = trace {
417            trace.push(DispatchTraceEvent::HookCleanup {
418                invocation_index,
419                hook_index,
420            });
421        }
422    }
423    Ok(())
424}
425
426/// Normalizes raw LLM text output into structured [`ToolInvocation`]s.
427///
428/// Implement this trait to support additional model families that emit tool
429/// intent as in-band text markers. The trait is object-safe so normalizers can
430/// be stored as `Arc<dyn ToolCallNormalizer>` alongside other kernel objects.
431///
432/// # Contract
433///
434/// - [`normalize`](ToolCallNormalizer::normalize) returns an empty `Vec` when
435///   `raw` contains no markers this normalizer recognises. An empty result is
436///   never an error.
437/// - [`is_applicable`](ToolCallNormalizer::is_applicable) must return `true`
438///   whenever `normalize` would return a non-empty `Vec`. It is a cheap guard
439///   to short-circuit expensive parsing in pipelines.
440pub trait ToolCallNormalizer: Send + Sync {
441    /// Parse `raw` text into zero or more tool invocations.
442    fn normalize(&self, raw: &str) -> Result<Vec<ToolInvocation>, KernelError>;
443
444    /// Quick scan: does `raw` contain markers this normalizer handles?
445    fn is_applicable(&self, raw: &str) -> bool;
446}
447
448// ── Structured standards normalizer ──────────────────────────────────────────
449
450/// Normalizer for structured tool-call JSON returned by common provider APIs.
451///
452/// This type keeps the kernel independent from provider-specific Rust types by
453/// operating on `serde_json::Value` shapes. It supports:
454///
455/// - OpenAI Responses API output items: `{"type":"function_call", ...}`
456/// - OpenAI Responses API full responses: `{ "output": [function_call, ...] }`
457/// - OpenAI Chat Completions tool calls: `{ "tool_calls": [...] }`
458/// - OpenAI Chat Completions full responses: `{ "choices": [{ "message": ... }] }`
459#[derive(Debug, Clone, Default)]
460pub struct StructuredToolCallNormalizer;
461
462impl StructuredToolCallNormalizer {
463    /// Parse OpenAI Responses API `function_call` output items from either a
464    /// full response object, an `output` array, or a single output item.
465    pub fn normalize_openai_responses(value: &Value) -> Result<Vec<ToolInvocation>, KernelError> {
466        match value {
467            Value::Object(object) => {
468                if let Some(output) = object.get("output") {
469                    return normalize_responses_output(output);
470                }
471                if is_responses_function_call(object) {
472                    return parse_responses_function_call(object).map(|call| vec![call]);
473                }
474                Ok(Vec::new())
475            }
476            Value::Array(items) => items
477                .iter()
478                .map(normalize_responses_output_item)
479                .collect::<Result<Vec<_>, _>>()
480                .map(flatten_invocations),
481            _ => Ok(Vec::new()),
482        }
483    }
484
485    /// Parse OpenAI Chat Completions `tool_calls` from either a full response,
486    /// a message object, a `tool_calls` array, or a single tool call object.
487    pub fn normalize_openai_chat_completions(
488        value: &Value,
489    ) -> Result<Vec<ToolInvocation>, KernelError> {
490        match value {
491            Value::Object(object) => {
492                if let Some(choices) = object.get("choices") {
493                    return normalize_chat_choices(choices);
494                }
495                if let Some(tool_calls) = object.get("tool_calls") {
496                    return normalize_chat_tool_calls(tool_calls);
497                }
498                if is_chat_tool_call(object) {
499                    return parse_chat_tool_call(object).map(|call| vec![call]);
500                }
501                Ok(Vec::new())
502            }
503            Value::Array(items) => normalize_chat_tool_calls_array(items),
504            _ => Ok(Vec::new()),
505        }
506    }
507
508    /// Parse all supported structured standards from `value`.
509    ///
510    /// This is useful when the caller has a provider JSON blob but does not
511    /// want to branch on the provider path first. It preserves the order of
512    /// calls within each standard and tries Responses before Chat Completions.
513    pub fn normalize(value: &Value) -> Result<Vec<ToolInvocation>, KernelError> {
514        let mut invocations = Self::normalize_openai_responses(value)?;
515        invocations.extend(Self::normalize_openai_chat_completions(value)?);
516        Ok(invocations)
517    }
518}
519
520// ── LFM normalizer ────────────────────────────────────────────────────────────
521
522const LFM_START: &str = "<|tool_call_start|>";
523const LFM_END: &str = "<|tool_call_end|>";
524
525/// Normalizer for LiquidAI LFM models (e.g. `LFM2.5-1.2B-Thinking`) served
526/// through `mlx_lm.server` or similar OpenAI-compatible shims that emit tool
527/// intent as in-band text rather than the structured `tool_calls` field.
528///
529/// Recognised format:
530/// ```text
531/// <|tool_call_start|>[get_weather(city='Berlin')]<|tool_call_end|>
532/// ```
533///
534/// Multiple calls per block (`[fn1(a=1), fn2(b=2)]`) and multiple blocks per
535/// message are both handled.
536///
537/// # Example
538///
539/// ```no_run
540/// use rig_compose::normalizer::{LfmNormalizer, ToolCallNormalizer};
541/// use serde_json::json;
542///
543/// let raw = "<|tool_call_start|>[add(x=3, y=4)]<|tool_call_end|>";
544/// let calls = LfmNormalizer.normalize(raw).unwrap();
545/// assert_eq!(calls[0].name, "add");
546/// assert_eq!(calls[0].args, json!({"x": 3, "y": 4}));
547/// ```
548#[derive(Debug, Clone, Default)]
549pub struct LfmNormalizer;
550
551impl ToolCallNormalizer for LfmNormalizer {
552    fn is_applicable(&self, raw: &str) -> bool {
553        raw.contains(LFM_START)
554    }
555
556    fn normalize(&self, raw: &str) -> Result<Vec<ToolInvocation>, KernelError> {
557        let mut results = Vec::new();
558        let mut remaining = raw;
559
560        while let Some(block_start) = remaining.find(LFM_START) {
561            // Skip past the start marker.
562            let after_start = remaining
563                .get(block_start + LFM_START.len()..)
564                .ok_or_else(|| KernelError::NormalizerFailed("LFM: start marker overrun".into()))?;
565
566            let block_end = after_start.find(LFM_END).ok_or_else(|| {
567                KernelError::NormalizerFailed("LFM: unclosed <|tool_call_start|> marker".into())
568            })?;
569
570            let block = after_start.get(..block_end).ok_or_else(|| {
571                KernelError::NormalizerFailed("LFM: block slice out of bounds".into())
572            })?;
573
574            // Advance past the end marker; if nothing remains, stop.
575            remaining = after_start.get(block_end + LFM_END.len()..).unwrap_or("");
576
577            let calls = parse_lfm_block(block)?;
578            results.extend(calls);
579        }
580
581        Ok(results)
582    }
583}
584
585// ── Structured standards helpers ─────────────────────────────────────────────
586
587fn normalize_responses_output(value: &Value) -> Result<Vec<ToolInvocation>, KernelError> {
588    match value {
589        Value::Array(items) => items
590            .iter()
591            .map(normalize_responses_output_item)
592            .collect::<Result<Vec<_>, _>>()
593            .map(flatten_invocations),
594        Value::Object(object) if is_responses_function_call(object) => {
595            parse_responses_function_call(object).map(|call| vec![call])
596        }
597        _ => Ok(Vec::new()),
598    }
599}
600
601fn normalize_responses_output_item(value: &Value) -> Result<Vec<ToolInvocation>, KernelError> {
602    match value {
603        Value::Object(object) if is_responses_function_call(object) => {
604            parse_responses_function_call(object).map(|call| vec![call])
605        }
606        _ => Ok(Vec::new()),
607    }
608}
609
610fn is_responses_function_call(object: &Map<String, Value>) -> bool {
611    object
612        .get("type")
613        .and_then(Value::as_str)
614        .is_some_and(|kind| kind == "function_call")
615}
616
617fn parse_responses_function_call(
618    object: &Map<String, Value>,
619) -> Result<ToolInvocation, KernelError> {
620    let name = required_string_field(object, "name", "OpenAI Responses function_call")?;
621    let args = object
622        .get("arguments")
623        .map(parse_standard_arguments)
624        .transpose()?
625        .unwrap_or_else(|| Value::Object(Map::new()));
626    ToolInvocation::new(name, args)
627}
628
629fn normalize_chat_choices(value: &Value) -> Result<Vec<ToolInvocation>, KernelError> {
630    let choices = value.as_array().ok_or_else(|| {
631        KernelError::NormalizerFailed("OpenAI Chat Completions choices must be an array".into())
632    })?;
633
634    let mut invocations = Vec::new();
635    for choice in choices {
636        let Some(message) = choice.get("message") else {
637            continue;
638        };
639        invocations
640            .extend(StructuredToolCallNormalizer::normalize_openai_chat_completions(message)?);
641    }
642
643    Ok(invocations)
644}
645
646fn normalize_chat_tool_calls(value: &Value) -> Result<Vec<ToolInvocation>, KernelError> {
647    match value {
648        Value::Array(items) => normalize_chat_tool_calls_array(items),
649        Value::Object(object) if is_chat_tool_call(object) => {
650            parse_chat_tool_call(object).map(|call| vec![call])
651        }
652        _ => Ok(Vec::new()),
653    }
654}
655
656fn normalize_chat_tool_calls_array(items: &[Value]) -> Result<Vec<ToolInvocation>, KernelError> {
657    items
658        .iter()
659        .map(|item| match item {
660            Value::Object(object) if is_chat_tool_call(object) => parse_chat_tool_call(object),
661            Value::Object(_) => Err(KernelError::NormalizerFailed(
662                "OpenAI Chat Completions tool call missing function payload".into(),
663            )),
664            _ => Err(KernelError::NormalizerFailed(
665                "OpenAI Chat Completions tool call must be an object".into(),
666            )),
667        })
668        .collect()
669}
670
671fn is_chat_tool_call(object: &Map<String, Value>) -> bool {
672    object.get("function").is_some()
673}
674
675fn parse_chat_tool_call(object: &Map<String, Value>) -> Result<ToolInvocation, KernelError> {
676    let function = object
677        .get("function")
678        .and_then(Value::as_object)
679        .ok_or_else(|| {
680            KernelError::NormalizerFailed(
681                "OpenAI Chat Completions tool call missing function object".into(),
682            )
683        })?;
684    let name = required_string_field(function, "name", "OpenAI Chat Completions function")?;
685    let args = function
686        .get("arguments")
687        .map(parse_standard_arguments)
688        .transpose()?
689        .unwrap_or_else(|| Value::Object(Map::new()));
690
691    ToolInvocation::new(name, args)
692}
693
694fn parse_standard_arguments(value: &Value) -> Result<Value, KernelError> {
695    match value {
696        Value::String(raw) => {
697            let trimmed = raw.trim();
698            if trimmed.is_empty() {
699                return Ok(Value::Object(Map::new()));
700            }
701            serde_json::from_str(trimmed).map_err(|err| {
702                KernelError::NormalizerFailed(format!(
703                    "failed to parse standard tool-call arguments JSON: {err}"
704                ))
705            })
706        }
707        Value::Null => Ok(Value::Object(Map::new())),
708        other => Ok(other.clone()),
709    }
710}
711
712fn required_string_field(
713    object: &Map<String, Value>,
714    field: &str,
715    context: &str,
716) -> Result<String, KernelError> {
717    object
718        .get(field)
719        .and_then(Value::as_str)
720        .map(ToOwned::to_owned)
721        .ok_or_else(|| KernelError::NormalizerFailed(format!("{context} missing `{field}` string")))
722}
723
724fn flatten_invocations(nested: Vec<Vec<ToolInvocation>>) -> Vec<ToolInvocation> {
725    nested.into_iter().flatten().collect()
726}
727
728// ── Parsing helpers ───────────────────────────────────────────────────────────
729
730/// Parse one `[fn1(a=1), fn2(b=2)]` block from an LFM marker.
731fn parse_lfm_block(block: &str) -> Result<Vec<ToolInvocation>, KernelError> {
732    let block = block.trim();
733    // Strip optional surrounding `[ ]`.
734    let inner = block
735        .strip_prefix('[')
736        .and_then(|s| s.strip_suffix(']'))
737        .unwrap_or(block);
738
739    split_top_level(inner, ',')
740        .into_iter()
741        .filter(|s| !s.trim().is_empty())
742        .map(|s| parse_lfm_call(s.trim()))
743        .collect()
744}
745
746/// Parse one `fn_name(k1=v1, k2=v2)` call expression.
747fn parse_lfm_call(expr: &str) -> Result<ToolInvocation, KernelError> {
748    let (name_raw, rest) = expr.split_once('(').ok_or_else(|| {
749        KernelError::NormalizerFailed(format!("LFM: expected '(' in call: {expr:?}"))
750    })?;
751
752    let name = name_raw.trim().to_string();
753    if name.is_empty() {
754        return Err(KernelError::NormalizerFailed(
755            "LFM: empty tool name in call expression".into(),
756        ));
757    }
758    validate_identifier("tool name", &name)?;
759
760    // Use rsplit_once to handle nested parentheses in argument values.
761    let (kwargs_str, trailing) = rest.rsplit_once(')').ok_or_else(|| {
762        KernelError::NormalizerFailed(format!("LFM: missing closing ')' in: {expr:?}"))
763    })?;
764    if !trailing.trim().is_empty() {
765        return Err(KernelError::NormalizerFailed(format!(
766            "LFM: trailing content after call expression: {trailing:?}"
767        )));
768    }
769
770    let args = parse_kwargs(kwargs_str)?;
771    Ok(ToolInvocation { name, args })
772}
773
774/// Parse a comma-separated `key=value` kwargs string into a JSON object.
775fn parse_kwargs(s: &str) -> Result<Value, KernelError> {
776    let s = s.trim();
777    if s.is_empty() {
778        return Ok(Value::Object(Map::new()));
779    }
780
781    let mut map = Map::new();
782    for pair in split_top_level(s, ',') {
783        let pair = pair.trim();
784        if pair.is_empty() {
785            continue;
786        }
787        let (key_raw, val_raw) = pair.split_once('=').ok_or_else(|| {
788            KernelError::NormalizerFailed(format!("LFM: kwarg without '=': {pair:?}"))
789        })?;
790        let key = key_raw.trim().to_string();
791        if key.is_empty() {
792            return Err(KernelError::NormalizerFailed(
793                "LFM: empty kwarg name".into(),
794            ));
795        }
796        validate_identifier("kwarg name", &key)?;
797        if map.contains_key(&key) {
798            return Err(KernelError::NormalizerFailed(format!(
799                "LFM: duplicate kwarg: {key}"
800            )));
801        }
802        let val = parse_value(val_raw.trim())?;
803        map.insert(key, val);
804    }
805
806    Ok(Value::Object(map))
807}
808
809/// Best-effort conversion of a Python literal token into a JSON [`Value`].
810///
811/// Supported: single/double-quoted strings, `True`/`False`, `None`/`null`,
812/// integers, floats, lists, and dict/object literals. Anything else is
813/// returned as an unquoted string.
814fn parse_value(s: &str) -> Result<Value, KernelError> {
815    let s = s.trim();
816
817    if s.is_empty() {
818        return Ok(Value::String(String::new()));
819    }
820
821    // Single-quoted string.
822    if let Some(inner) = s.strip_prefix('\'').and_then(|t| t.strip_suffix('\'')) {
823        return Ok(Value::String(
824            inner.replace("\\'", "'").replace("\\\"", "\""),
825        ));
826    }
827    if s.starts_with('\'') {
828        return Err(KernelError::NormalizerFailed(
829            "LFM: unterminated single-quoted string".into(),
830        ));
831    }
832    // Double-quoted string.
833    if let Some(inner) = s.strip_prefix('"').and_then(|t| t.strip_suffix('"')) {
834        return Ok(Value::String(
835            inner.replace("\\'", "'").replace("\\\"", "\""),
836        ));
837    }
838    if s.starts_with('"') {
839        return Err(KernelError::NormalizerFailed(
840            "LFM: unterminated double-quoted string".into(),
841        ));
842    }
843    // Python booleans.
844    if s == "True" {
845        return Ok(Value::Bool(true));
846    }
847    if s == "False" {
848        return Ok(Value::Bool(false));
849    }
850    // Null / None.
851    if s == "None" || s == "null" {
852        return Ok(Value::Null);
853    }
854    // List / array literal.
855    if let Some(inner) = s.strip_prefix('[').and_then(|t| t.strip_suffix(']')) {
856        return parse_array(inner);
857    }
858    if s.starts_with('[') {
859        return Err(KernelError::NormalizerFailed(
860            "LFM: unterminated list literal".into(),
861        ));
862    }
863    // Dict / object literal.
864    if let Some(inner) = s.strip_prefix('{').and_then(|t| t.strip_suffix('}')) {
865        return parse_object(inner);
866    }
867    if s.starts_with('{') {
868        return Err(KernelError::NormalizerFailed(
869            "LFM: unterminated object literal".into(),
870        ));
871    }
872    // Integer.
873    if let Ok(n) = s.parse::<i64>() {
874        return Ok(Value::Number(n.into()));
875    }
876    // Float.
877    if let Ok(f) = s.parse::<f64>() {
878        let num = serde_json::Number::from_f64(f).ok_or_else(|| {
879            KernelError::NormalizerFailed(format!("LFM: non-finite float in argument: {s:?}"))
880        })?;
881        return Ok(Value::Number(num));
882    }
883    // Fall back: treat as an unquoted string literal.
884    Ok(Value::String(s.to_string()))
885}
886
887fn parse_array(inner: &str) -> Result<Value, KernelError> {
888    let inner = inner.trim();
889    if inner.is_empty() {
890        return Ok(Value::Array(Vec::new()));
891    }
892
893    let values = split_top_level(inner, ',')
894        .into_iter()
895        .filter(|part| !part.trim().is_empty())
896        .map(|part| parse_value(part.trim()))
897        .collect::<Result<Vec<_>, _>>()?;
898
899    Ok(Value::Array(values))
900}
901
902fn parse_object(inner: &str) -> Result<Value, KernelError> {
903    let inner = inner.trim();
904    if inner.is_empty() {
905        return Ok(Value::Object(Map::new()));
906    }
907
908    let mut map = Map::new();
909    for entry in split_top_level(inner, ',') {
910        let entry = entry.trim();
911        if entry.is_empty() {
912            continue;
913        }
914
915        let (key_raw, value_raw) = split_once_top_level(entry, ':').ok_or_else(|| {
916            KernelError::NormalizerFailed(format!("LFM: object entry without ':': {entry:?}"))
917        })?;
918        let key = parse_object_key(key_raw.trim())?;
919        if map.contains_key(&key) {
920            return Err(KernelError::NormalizerFailed(format!(
921                "LFM: duplicate object key: {key}"
922            )));
923        }
924
925        map.insert(key, parse_value(value_raw.trim())?);
926    }
927
928    Ok(Value::Object(map))
929}
930
931fn parse_object_key(raw: &str) -> Result<String, KernelError> {
932    match parse_value(raw)? {
933        Value::String(key) => Ok(key),
934        _ => Err(KernelError::NormalizerFailed(format!(
935            "LFM: object key must be a string: {raw:?}"
936        ))),
937    }
938}
939
940/// Validate model-emitted identifiers before they reach dispatch. Tool names
941/// allow the same separator characters commonly used in registries, while
942/// keyword argument names stay simple and JSON-object friendly.
943fn validate_identifier(kind: &str, value: &str) -> Result<(), KernelError> {
944    let valid = value
945        .chars()
946        .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | '.'));
947
948    if valid {
949        return Ok(());
950    }
951
952    Err(KernelError::NormalizerFailed(format!(
953        "invalid {kind}: {value:?}"
954    )))
955}
956
957/// Split `s` at top-level occurrences of `delim` (i.e. not inside nested
958/// brackets, parentheses, braces, or single/double quotes). Returns the
959/// subslices between delimiters — including empty slices at the edges.
960fn split_top_level(s: &str, delim: char) -> Vec<&str> {
961    let mut parts: Vec<&str> = Vec::new();
962    let mut depth: usize = 0;
963    let mut in_sq = false;
964    let mut in_dq = false;
965    let mut escape_next = false;
966    let mut start = 0usize;
967
968    for (i, ch) in s.char_indices() {
969        if escape_next {
970            escape_next = false;
971            continue;
972        }
973        if ch == '\\' && (in_sq || in_dq) {
974            escape_next = true;
975            continue;
976        }
977        if in_sq {
978            if ch == '\'' {
979                in_sq = false;
980            }
981            continue;
982        }
983        if in_dq {
984            if ch == '"' {
985                in_dq = false;
986            }
987            continue;
988        }
989        match ch {
990            '\'' => in_sq = true,
991            '"' => in_dq = true,
992            '(' | '[' | '{' => depth = depth.saturating_add(1),
993            ')' | ']' | '}' => depth = depth.saturating_sub(1),
994            c if c == delim && depth == 0 => {
995                // i is always a char boundary from char_indices(); .get() is safe.
996                parts.push(s.get(start..i).unwrap_or(""));
997                start = i + ch.len_utf8();
998            }
999            _ => {}
1000        }
1001    }
1002    parts.push(s.get(start..).unwrap_or(""));
1003    parts
1004}
1005
1006fn split_once_top_level(s: &str, delim: char) -> Option<(&str, &str)> {
1007    split_index_top_level(s, delim).map(|idx| {
1008        let left = s.get(..idx).unwrap_or("");
1009        let right = s.get(idx + delim.len_utf8()..).unwrap_or("");
1010        (left, right)
1011    })
1012}
1013
1014fn split_index_top_level(s: &str, delim: char) -> Option<usize> {
1015    let mut depth: usize = 0;
1016    let mut in_sq = false;
1017    let mut in_dq = false;
1018    let mut escape_next = false;
1019
1020    for (i, ch) in s.char_indices() {
1021        if escape_next {
1022            escape_next = false;
1023            continue;
1024        }
1025        if ch == '\\' && (in_sq || in_dq) {
1026            escape_next = true;
1027            continue;
1028        }
1029        if in_sq {
1030            if ch == '\'' {
1031                in_sq = false;
1032            }
1033            continue;
1034        }
1035        if in_dq {
1036            if ch == '"' {
1037                in_dq = false;
1038            }
1039            continue;
1040        }
1041        match ch {
1042            '\'' => in_sq = true,
1043            '"' => in_dq = true,
1044            '(' | '[' | '{' => depth = depth.saturating_add(1),
1045            ')' | ']' | '}' => depth = depth.saturating_sub(1),
1046            c if c == delim && depth == 0 => return Some(i),
1047            _ => {}
1048        }
1049    }
1050
1051    None
1052}
1053
1054// ── Unit tests ────────────────────────────────────────────────────────────────
1055
1056#[cfg(test)]
1057mod tests {
1058    use super::*;
1059    use crate::{LocalTool, ToolRegistry, ToolSchema};
1060    use serde_json::json;
1061    use std::sync::Arc;
1062
1063    // ── is_applicable ──────────────────────────────────────────────────────
1064
1065    #[test]
1066    fn not_applicable_for_plain_text() {
1067        assert!(!LfmNormalizer.is_applicable("hello world"));
1068    }
1069
1070    #[test]
1071    fn applicable_when_start_marker_present() {
1072        assert!(
1073            LfmNormalizer
1074                .is_applicable("<|tool_call_start|>[get_weather(city='Berlin')]<|tool_call_end|>")
1075        );
1076    }
1077
1078    // ── normalize: clean inputs ────────────────────────────────────────────
1079
1080    #[test]
1081    fn plain_text_returns_empty() {
1082        let calls = LfmNormalizer
1083            .normalize("The weather in Berlin is sunny.")
1084            .unwrap();
1085        assert!(calls.is_empty());
1086    }
1087
1088    #[test]
1089    fn single_call_string_arg() {
1090        let raw = "<|tool_call_start|>[get_weather(city='Berlin')]<|tool_call_end|>";
1091        let calls = LfmNormalizer.normalize(raw).unwrap();
1092        assert_eq!(calls.len(), 1);
1093        assert_eq!(calls[0].name, "get_weather");
1094        assert_eq!(calls[0].args, json!({"city": "Berlin"}));
1095    }
1096
1097    #[test]
1098    fn single_call_multiple_args() {
1099        let raw = "<|tool_call_start|>[search(query='rust async', limit=10)]<|tool_call_end|>";
1100        let calls = LfmNormalizer.normalize(raw).unwrap();
1101        assert_eq!(calls.len(), 1);
1102        assert_eq!(calls[0].name, "search");
1103        assert_eq!(calls[0].args, json!({"query": "rust async", "limit": 10}));
1104    }
1105
1106    #[test]
1107    fn single_call_no_args() {
1108        let raw = "<|tool_call_start|>[list_tools()]<|tool_call_end|>";
1109        let calls = LfmNormalizer.normalize(raw).unwrap();
1110        assert_eq!(calls.len(), 1);
1111        assert_eq!(calls[0].name, "list_tools");
1112        assert_eq!(calls[0].args, json!({}));
1113    }
1114
1115    #[test]
1116    fn multiple_calls_in_one_block() {
1117        let raw = "<|tool_call_start|>[get_weather(city='Berlin'), get_time(zone='UTC')]<|tool_call_end|>";
1118        let calls = LfmNormalizer.normalize(raw).unwrap();
1119        assert_eq!(calls.len(), 2);
1120        assert_eq!(calls[0].name, "get_weather");
1121        assert_eq!(calls[0].args, json!({"city": "Berlin"}));
1122        assert_eq!(calls[1].name, "get_time");
1123        assert_eq!(calls[1].args, json!({"zone": "UTC"}));
1124    }
1125
1126    #[test]
1127    fn multiple_blocks_in_one_message() {
1128        let raw = concat!(
1129            "<|tool_call_start|>[step_one(x=1)]<|tool_call_end|>",
1130            " some text ",
1131            "<|tool_call_start|>[step_two(y=2)]<|tool_call_end|>",
1132        );
1133        let calls = LfmNormalizer.normalize(raw).unwrap();
1134        assert_eq!(calls.len(), 2);
1135        assert_eq!(calls[0].name, "step_one");
1136        assert_eq!(calls[1].name, "step_two");
1137    }
1138
1139    #[test]
1140    fn block_without_brackets_is_parsed() {
1141        // Format without outer [ ] is also handled.
1142        let raw = "<|tool_call_start|>ping(target='8.8.8.8')<|tool_call_end|>";
1143        let calls = LfmNormalizer.normalize(raw).unwrap();
1144        assert_eq!(calls.len(), 1);
1145        assert_eq!(calls[0].name, "ping");
1146        assert_eq!(calls[0].args, json!({"target": "8.8.8.8"}));
1147    }
1148
1149    // ── value type coercion ────────────────────────────────────────────────
1150
1151    #[test]
1152    fn integer_arg() {
1153        let raw = "<|tool_call_start|>[set_limit(n=42)]<|tool_call_end|>";
1154        let calls = LfmNormalizer.normalize(raw).unwrap();
1155        assert_eq!(calls[0].args, json!({"n": 42}));
1156    }
1157
1158    #[test]
1159    fn float_arg() {
1160        let raw = "<|tool_call_start|>[set_temp(t=0.7)]<|tool_call_end|>";
1161        let calls = LfmNormalizer.normalize(raw).unwrap();
1162        assert_eq!(calls[0].args["t"].as_f64().unwrap(), 0.7);
1163    }
1164
1165    #[test]
1166    fn boolean_args() {
1167        let raw = "<|tool_call_start|>[configure(verbose=True, strict=False)]<|tool_call_end|>";
1168        let calls = LfmNormalizer.normalize(raw).unwrap();
1169        assert_eq!(calls[0].args, json!({"verbose": true, "strict": false}));
1170    }
1171
1172    #[test]
1173    fn null_args() {
1174        let raw = "<|tool_call_start|>[reset(ctx=None)]<|tool_call_end|>";
1175        let calls = LfmNormalizer.normalize(raw).unwrap();
1176        assert_eq!(calls[0].args, json!({"ctx": null}));
1177    }
1178
1179    #[test]
1180    fn double_quoted_string_arg() {
1181        let raw = r#"<|tool_call_start|>[greet(name="world")]<|tool_call_end|>"#;
1182        let calls = LfmNormalizer.normalize(raw).unwrap();
1183        assert_eq!(calls[0].args, json!({"name": "world"}));
1184    }
1185
1186    #[test]
1187    fn nested_list_and_object_args() {
1188        let raw = "<|tool_call_start|>[plan(items=['a,b', 'c'], meta={'city': 'Berlin', 'coords': [52.52, 13.405], 'active': True})]<|tool_call_end|>";
1189        let calls = LfmNormalizer.normalize(raw).unwrap();
1190        assert_eq!(calls.len(), 1);
1191        assert_eq!(
1192            calls[0].args,
1193            json!({
1194                "items": ["a,b", "c"],
1195                "meta": {
1196                    "city": "Berlin",
1197                    "coords": [52.52, 13.405],
1198                    "active": true
1199                }
1200            })
1201        );
1202    }
1203
1204    #[test]
1205    fn openai_responses_function_call_item() {
1206        let value = json!({
1207            "type": "function_call",
1208            "id": "fc_123",
1209            "call_id": "call_123",
1210            "name": "get_weather",
1211            "arguments": "{\"city\":\"Berlin\"}",
1212            "status": "completed"
1213        });
1214
1215        let calls = StructuredToolCallNormalizer::normalize_openai_responses(&value).unwrap();
1216        assert_eq!(calls.len(), 1);
1217        assert_eq!(calls[0].name, "get_weather");
1218        assert_eq!(calls[0].args, json!({"city": "Berlin"}));
1219    }
1220
1221    #[test]
1222    fn openai_responses_full_response() {
1223        let value = json!({
1224            "id": "resp_123",
1225            "output": [
1226                { "type": "message", "content": [] },
1227                {
1228                    "type": "function_call",
1229                    "id": "fc_123",
1230                    "call_id": "call_123",
1231                    "name": "search.docs",
1232                    "arguments": {"query": "tool calls"},
1233                    "status": "completed"
1234                }
1235            ]
1236        });
1237
1238        let calls = StructuredToolCallNormalizer::normalize_openai_responses(&value).unwrap();
1239        assert_eq!(calls.len(), 1);
1240        assert_eq!(calls[0].name, "search.docs");
1241        assert_eq!(calls[0].args, json!({"query": "tool calls"}));
1242    }
1243
1244    #[test]
1245    fn openai_chat_completions_tool_calls() {
1246        let value = json!({
1247            "choices": [{
1248                "message": {
1249                    "role": "assistant",
1250                    "content": null,
1251                    "tool_calls": [{
1252                        "id": "call_123",
1253                        "type": "function",
1254                        "function": {
1255                            "name": "get_weather",
1256                            "arguments": "{\"city\":\"Berlin\"}"
1257                        }
1258                    }]
1259                }
1260            }]
1261        });
1262
1263        let calls =
1264            StructuredToolCallNormalizer::normalize_openai_chat_completions(&value).unwrap();
1265        assert_eq!(calls.len(), 1);
1266        assert_eq!(calls[0].name, "get_weather");
1267        assert_eq!(calls[0].args, json!({"city": "Berlin"}));
1268    }
1269
1270    #[test]
1271    fn structured_normalizer_aggregates_supported_shapes() {
1272        let responses_value = json!({
1273            "output": [{
1274                "type": "function_call",
1275                "name": "first",
1276                "arguments": "{}"
1277            }]
1278        });
1279        let chat_value = json!({
1280            "tool_calls": [{
1281                "function": {
1282                    "name": "second",
1283                    "arguments": {"ok": true}
1284                }
1285            }]
1286        });
1287
1288        let responses_calls = StructuredToolCallNormalizer::normalize(&responses_value).unwrap();
1289        let chat_calls = StructuredToolCallNormalizer::normalize(&chat_value).unwrap();
1290
1291        assert_eq!(responses_calls[0].name, "first");
1292        assert_eq!(chat_calls[0].name, "second");
1293        assert_eq!(chat_calls[0].args, json!({"ok": true}));
1294    }
1295
1296    // ── error paths ────────────────────────────────────────────────────────
1297
1298    #[test]
1299    fn unclosed_marker_returns_error() {
1300        let raw = "<|tool_call_start|>[get_weather(city='Berlin')]";
1301        let err = LfmNormalizer.normalize(raw).unwrap_err();
1302        let msg = err.to_string();
1303        assert!(msg.contains("unclosed"), "expected 'unclosed' in: {msg}");
1304    }
1305
1306    #[test]
1307    fn missing_paren_returns_error() {
1308        // Block with no '(' — not a valid call expression.
1309        let raw = "<|tool_call_start|>[not_a_call]<|tool_call_end|>";
1310        let err = LfmNormalizer.normalize(raw).unwrap_err();
1311        let msg = err.to_string();
1312        assert!(msg.contains("expected '('"), "got: {msg}");
1313    }
1314
1315    #[test]
1316    fn kwarg_without_equals_returns_error() {
1317        let raw = "<|tool_call_start|>[fn(badarg)]<|tool_call_end|>";
1318        let err = LfmNormalizer.normalize(raw).unwrap_err();
1319        let msg = err.to_string();
1320        assert!(msg.contains("kwarg without '='"), "got: {msg}");
1321    }
1322
1323    #[test]
1324    fn invalid_tool_name_returns_error() {
1325        let raw = "<|tool_call_start|>[bad/name(arg=1)]<|tool_call_end|>";
1326        let err = LfmNormalizer.normalize(raw).unwrap_err();
1327        let msg = err.to_string();
1328        assert!(msg.contains("invalid tool name"), "got: {msg}");
1329    }
1330
1331    #[test]
1332    fn empty_kwarg_name_returns_error() {
1333        let raw = "<|tool_call_start|>[fn(=1)]<|tool_call_end|>";
1334        let err = LfmNormalizer.normalize(raw).unwrap_err();
1335        let msg = err.to_string();
1336        assert!(msg.contains("empty kwarg name"), "got: {msg}");
1337    }
1338
1339    #[test]
1340    fn duplicate_kwarg_returns_error() {
1341        let raw = "<|tool_call_start|>[fn(city='Berlin', city='Paris')]<|tool_call_end|>";
1342        let err = LfmNormalizer.normalize(raw).unwrap_err();
1343        let msg = err.to_string();
1344        assert!(msg.contains("duplicate kwarg"), "got: {msg}");
1345    }
1346
1347    #[test]
1348    fn malformed_standard_arguments_return_error() {
1349        let value = json!({
1350            "type": "function_call",
1351            "name": "bad_args",
1352            "arguments": "{not json}"
1353        });
1354
1355        let err = StructuredToolCallNormalizer::normalize_openai_responses(&value).unwrap_err();
1356        let msg = err.to_string();
1357        assert!(msg.contains("arguments JSON"), "got: {msg}");
1358    }
1359
1360    #[test]
1361    fn trailing_call_content_returns_error() {
1362        let raw = "<|tool_call_start|>[fn(arg=1) extra]<|tool_call_end|>";
1363        let err = LfmNormalizer.normalize(raw).unwrap_err();
1364        let msg = err.to_string();
1365        assert!(msg.contains("trailing content"), "got: {msg}");
1366    }
1367
1368    #[test]
1369    fn unterminated_nested_literal_returns_error() {
1370        let raw = "<|tool_call_start|>[fn(items=['a', 'b')]<|tool_call_end|>";
1371        let err = LfmNormalizer.normalize(raw).unwrap_err();
1372        let msg = err.to_string();
1373        assert!(msg.contains("unterminated list"), "got: {msg}");
1374    }
1375
1376    #[tokio::test]
1377    async fn dispatch_invocations_runs_tools_in_order() {
1378        let tools = ToolRegistry::new();
1379        tools.register(Arc::new(LocalTool::new(
1380            ToolSchema {
1381                name: "echo".into(),
1382                description: "echoes args".into(),
1383                args_schema: json!({"type": "object"}),
1384                result_schema: json!({"type": "object"}),
1385            },
1386            |args| async move { Ok(json!({"seen": args})) },
1387        )));
1388
1389        let invocations = LfmNormalizer
1390            .normalize("<|tool_call_start|>[echo(value={'nested': [1, 2]})]<|tool_call_end|>")
1391            .unwrap();
1392        let results = dispatch_tool_invocations(&tools, &invocations)
1393            .await
1394            .unwrap();
1395
1396        assert_eq!(results.len(), 1);
1397        assert_eq!(results[0].invocation.name, "echo");
1398        assert_eq!(
1399            results[0].output,
1400            json!({"seen": {"value": {"nested": [1, 2]}}})
1401        );
1402    }
1403
1404    // ── split_top_level helper ─────────────────────────────────────────────
1405
1406    #[test]
1407    fn split_respects_parens() {
1408        // Comma inside parens must not split.
1409        let parts = split_top_level("fn(a, b), fn2(c)", ',');
1410        assert_eq!(parts, vec!["fn(a, b)", " fn2(c)"]);
1411    }
1412
1413    #[test]
1414    fn split_respects_single_quotes() {
1415        let parts = split_top_level("a='x,y', b=2", ',');
1416        assert_eq!(parts, vec!["a='x,y'", " b=2"]);
1417    }
1418
1419    #[test]
1420    fn split_respects_nested_arrays_and_objects() {
1421        let parts = split_top_level("a=[1, 2], b={'x': 'y,z'}, c=3", ',');
1422        assert_eq!(parts, vec!["a=[1, 2]", " b={'x': 'y,z'}", " c=3"]);
1423    }
1424}
rig_compose/normalizer.rs

rig_compose/
normalizer.rs