mlxrs 0.1.0 - Docs.rs

//! Tool-call parsers.
//!
//! Ported from every concrete parser in `mlx-lm`'s `mlx_lm/tool_parsers/`
//! (`json_tools`, `pythonic`, `mistral`, `qwen3_coder`, `glm47`, `kimi_k2`,
//! `longcat`, `minimax_m2`, `function_gemma`, `gemma4`) plus the
//! `_infer_tool_parser` selector in `mlx_lm/tokenizer_utils.py` (~548+).
//! Cross-referenced against `mlx-swift-lm`'s `MLXLMCommon/Tool/*`
//! (`ToolCall`, `ToolCallFormat`, `ToolCallProcessor`, `Value`, `Parsers`)
//! for the abstraction shape: a `ToolParser` trait producing structured
//! `ToolCall`s, selected by chat-template content.
//!
//! Python `ast.literal_eval` is approximated with a JSON-first parse and a
//! conservative Python-literal fallback (`True`/`False`/`None`, single-quoted
//! strings); values that fail both are kept as the original string, matching
//! the Python parsers' "keep as string" behavior.

use serde_json::Value;

use crate::Error;

// Constant marker + ordered selection dispatch table, regenerated by
// `cargo xtask-codegen` from `mlxrs/data/tokenizer/tool_parsers.toml`
// (single source of truth; mlx-lm df1d3f3) into the committed
// `crate::tokenizer::generated` module. Parser *logic* below stays Rust and
// consumes this table instead of inline string literals.
use crate::tokenizer::generated::{TOOL_PARSER_MARKERS, TOOL_PARSER_SELECT};

/// `tool_call_start` for `name` from the generated table.
fn marker_start(name: &str) -> &'static str {
  TOOL_PARSER_MARKERS
    .iter()
    .find(|m| m.name == name)
    .map(|m| m.start)
    .unwrap_or("")
}

/// `tool_call_end` for `name` from the generated table.
fn marker_end(name: &str) -> &'static str {
  TOOL_PARSER_MARKERS
    .iter()
    .find(|m| m.name == name)
    .map(|m| m.end)
    .unwrap_or("")
}

/// A single parsed tool call (Swift `ToolCall` shape: name + arguments,
/// optional id for kimi-style calls).
#[derive(Debug, Clone, PartialEq)]
pub struct ToolCall {
  /// Function name.
  name: String,
  /// Arguments as a JSON object (or other JSON value for raw payloads).
  arguments: Value,
  /// Optional tool-call id (kimi_k2 emits `functions.name:idx`).
  ///
  /// **Intentional exception:** `id` stays `Option<String>` — `None` means
  /// "this parser does not emit call IDs" (e.g. `json_tools`), while
  /// `Some("")` would mean the parser emitted an explicitly-empty id. The
  /// semantic distinction is upstream-meaningful per Kimi-K2 / OpenAI
  /// tool-call conventions; collapsing to `String` (with empty = absent) would
  /// lose that signal.
  id: Option<String>,
}

impl ToolCall {
  /// Internal shorthand: name + arguments, no id (used by most parsers).
  fn new_nameless_id(name: impl Into<String>, arguments: Value) -> Self {
    Self {
      name: name.into(),
      arguments,
      id: None,
    }
  }

  /// Public constructor: name + arguments + optional id.
  ///
  /// `id` is `Option<String>` per upstream Kimi-K2 / OpenAI conventions —
  /// `None` = parser does not emit ids; `Some("")` = explicitly-empty id
  /// (semantically distinct). See field doc for rationale.
  pub fn new(name: impl Into<String>, arguments: Value, id: Option<String>) -> Self {
    Self {
      name: name.into(),
      arguments,
      id,
    }
  }

  /// Function name.
  #[inline(always)]
  pub fn name(&self) -> &str {
    &self.name
  }

  /// Arguments (JSON value).
  #[inline(always)]
  pub fn arguments(&self) -> &Value {
    &self.arguments
  }

  /// Tool-call id, if this parser emits one (see field doc for `None` semantics).
  #[inline(always)]
  pub fn id(&self) -> Option<&str> {
    self.id.as_deref()
  }
}

/// A tool-call parser (Python `tool_module.parse_tool_call`). Implementors
/// also expose the `tool_call_start` / `tool_call_end` delimiters.
pub trait ToolParser: Send + Sync {
  /// Parse one assistant tool-call payload into one or more [`ToolCall`]s.
  ///
  /// **Default:** loops on [`try_parse_one_call`](Self::try_parse_one_call)
  /// over `text` (re-feeding the trailing suffix until either `Ok(None)` —
  /// no more complete calls — or `Err`), so a parser that implements
  /// `try_parse_one_call` correctly gets `parse` for free. Parsers with
  /// richer batch semantics (e.g. Mistral, whose `parse` consumes a payload
  /// that the streaming layer never sees mid-stream because its end tag is
  /// empty) override this directly.
  fn parse(&self, text: &str, tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    default_parse_via_try_parse_one_call(self, text, tools)
  }

  /// The stable parser name (matches the Python module name).
  fn name(&self) -> &'static str;

  /// The `tool_call_start` delimiter, looked up from the marker table
  /// regenerated by `cargo xtask-codegen` from
  /// `mlxrs/data/tokenizer/tool_parsers.toml` into the committed
  /// `mlxrs/src/tokenizer/generated.rs`.
  fn tool_call_start(&self) -> &'static str {
    marker_start(self.name())
  }

  /// The `tool_call_end` delimiter, from the generated marker table.
  fn tool_call_end(&self) -> &'static str {
    marker_end(self.name())
  }

  /// Attempt to parse ONE complete tagged tool-call section starting in
  /// `buffer`.
  ///
  /// # Return contract
  ///
  /// - **`Ok(Some((calls, end_pos)))`** — `buffer[..end_pos]` IS a complete,
  ///   known-bounded section. `end_pos` is the byte position one past the
  ///   last byte the section consumed. `calls` are the [`ToolCall`]s
  ///   extracted from that section (one for singleton parsers; many for
  ///   multi-block parsers like `minimax_m2`, `kimi_k2`, `gemma4`).
  ///
  ///   `calls` MAY be empty when the section is structurally a tagged-call
  ///   shape but its body is rejected on inspection (e.g. `json_tools`
  ///   served a top-level array that fails the `name` lookup). The streaming
  ///   processor TREATS THE EMPTY CASE IDENTICALLY to the non-empty case:
  ///   the section's bytes are dropped, the suffix `buffer[end_pos..]` from
  ///   the SAME chunk is preserved as display / re-examined for back-to-back
  ///   sections. This is what makes `process_chunk("[tc]bad[/tc]visible")`
  ///   surface `visible` even when the parser rejected the body.
  ///
  /// - **`Ok(None)`** — the buffer is incomplete (no recognisable end of
  ///   section yet). The streaming caller appends the next chunk and
  ///   retries; the buffered bytes are preserved.
  ///
  ///   **`Ok(None)` invariant:** return `Ok(None)` ONLY when the
  ///   wrapper end-tag (`tool_call_end()`) is NOT yet locatable in `buffer`
  ///   at any position after the start tag. If the end-tag IS in the buffer,
  ///   the section is bounded — return
  ///   `Ok(Some((Vec::new(), end_pos)))` even when the body is unparseable.
  ///   This preserves any same-chunk suffix bytes after the close (the
  ///   `<tool_call>bad</tool_call>visible` truncation: an early `Ok(None)`
  ///   on the malformed body keeps the entire buffer, silently dropping
  ///   `visible` until cap/EOS). Parsers with an empty `tool_call_end` (e.g.
  ///   `mistral`) are exempt because the streaming processor short-circuits
  ///   them on the empty-end branch in [`ToolCallProcessor`] before
  ///   `try_parse_one_call` is ever invoked.
  ///
  /// - **`Err(_)`** — RESERVED for *truly indeterminate* failures with NO
  ///   recoverable `end_pos` (e.g. an internal error from the parser itself
  ///   or an input shape that defies even section-boundary detection). The
  ///   streaming processor treats this as if the whole buffer is bad: it
  ///   drains its tool-call buffer and resets to the normal (pre-detection)
  ///   state. A confirmed-but-rejected section that knows where it ends
  ///   MUST NOT use this arm — same-chunk suffix bytes would be permanently
  ///   lost.
  ///
  /// # Lock-step with [`parse`](Self::parse)
  ///
  /// This method UNIFIES extraction and end-detection — it must use EXACTLY
  /// the same find/rfind/iteration order as [`parse`](Self::parse) over the
  /// same payload, so streaming and batch results are identical for every
  /// input. The structural pivot: each parser owns ONE method that
  /// performs both jobs in lock-step, instead of a separate end-tag-scanner
  /// that drifts from `parse`.
  ///
  /// # Invariant (single-section parsers)
  ///
  /// Single-section parsers (`json_tools`, `pythonic`, `qwen3_coder`,
  /// `glm47`, `longcat`, `mistral`, `function_gemma`) MUST run a per-parser
  /// **bound-to-first-end-tag** step BEFORE any parser-internal opener
  /// search (e.g. `payload.find("<function=")`,
  /// `balanced_json_object_prefix(payload)`, `payload.find("[")`,
  /// `payload.find("call:")`). Without this prefix-bounding the opener
  /// search scans the WHOLE payload — so a buffer like
  /// `<tool_call>bad</tool_call>{"name":"x"}` makes the body's JSON
  /// balancer lock onto the *suffix* `{...}` AFTER the closed-malformed
  /// section, then the end-tag-after-it search fails, the call returns
  /// `Ok(None)`, and the same-chunk suffix is silently dropped until
  /// cap/EOS. The bound-step is parser-syntax-aware (string-quote-aware
  /// for JSON-body parsers, value-region-aware for XML/escape-body
  /// parsers) so an in-value end-tag literal stays buffered as before
  /// (`Ok(None)` for genuine mid-stream cases is preserved — the
  /// in-value end-tag tests stay green).
  ///
  /// Multi-block parsers (`kimi_k2`, `minimax_m2`, `gemma4`) are
  /// structurally exempt: they ALREADY race section-end-tag vs next-opener
  /// at each cursor step (the per-section opener-vs-end pattern), so
  /// every parser-internal scan is implicitly prefix-bounded.
  ///
  /// # Multi-call parsers
  ///
  /// For multi-call parsers the streaming caller does NOT loop on per-inner-
  /// block extraction here: `try_parse_one_call` peels off ONE TAGGED
  /// SECTION (a single start-tag → end-tag pair) and returns every inner
  /// block within that section. The processor then loops on the trailing
  /// suffix (truncating to `[end_pos..]`) to consume back-to-back sections.
  /// This avoids the awkward "wrapper start shared across calls" splitting
  /// while still keeping the per-section unit identical to `parse`'s output.
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error>;
}

/// Default `parse` implementation that loops on
/// [`ToolParser::try_parse_one_call`] until exhaustion. Generic over the
/// parser type. Used by every parser whose payload semantics map cleanly to
/// per-section extraction (i.e. every parser except `Mistral`, whose end tag
/// is empty and whose `parse` is only invoked at EOS over the whole buffer).
fn default_parse_via_try_parse_one_call<P: ToolParser + ?Sized>(
  parser: &P,
  text: &str,
  tools: Option<&Value>,
) -> Result<Vec<ToolCall>, Error> {
  let mut out = Vec::new();
  let mut cursor = 0usize;
  while cursor < text.len() {
    match parser.try_parse_one_call(&text[cursor..], tools)? {
      Some((calls, end_pos)) => {
        if end_pos == 0 {
          // Defensive: a zero-width advance would loop forever. Treat as
          // "no progress" and stop, mirroring the streaming caller's
          // `Ok(None)` keep-collecting behaviour at end-of-stream.
          break;
        }
        out.extend(calls);
        cursor += end_pos;
      }
      None => break,
    }
  }
  Ok(out)
}

fn err(msg: impl Into<String>) -> Error {
  Error::tokenizer(msg.into())
}

/// Locate the first occurrence of `start_tag` in `buffer` and return
/// `(payload_at, payload)` where `payload_at` is the byte offset just past
/// the start tag and `payload = &buffer[payload_at..]`. Returns `None` if
/// the start tag is empty or not present yet — the streaming caller treats
/// the buffer as incomplete.
fn locate_tagged_payload<'a>(buffer: &'a str, start_tag: &str) -> Option<(usize, &'a str)> {
  if start_tag.is_empty() {
    return None;
  }
  let start_at = buffer.find(start_tag)?;
  let payload_at = start_at + start_tag.len();
  Some((payload_at, &buffer[payload_at..]))
}

/// Trim a tagged section (`start_tag…end_tag`) down to its inner payload
/// for delegation to a parser's `parse`. Mirrors [`strip_markers`] but takes
/// the start/end tags directly so the streaming hot path doesn't re-borrow
/// the parser. Whitespace is trimmed to match `parse`'s `.trim()` calls.
fn strip_section_markers<'a>(section: &'a str, start_tag: &str, end_tag: &str) -> &'a str {
  let mut text = section;
  if !start_tag.is_empty()
    && let Some(idx) = text.find(start_tag)
  {
    text = &text[idx + start_tag.len()..];
  }
  if !end_tag.is_empty()
    && let Some(idx) = text.rfind(end_tag)
  {
    text = &text[..idx];
  }
  text.trim()
}

/// Helper for the [`ToolParser::try_parse_one_call`] `Ok(None)`
/// invariant: if `payload[payload_at..]` (within `buffer`) contains `end_tag`
/// somewhere, return `Some(end_pos)` — the byte offset one past that end_tag
/// — so an early-return path with an unparseable body can surface the
/// confirmed-bounded section as `Ok(Some((Vec::new(), end_pos)))` instead of
/// the same-chunk-suffix-dropping `Ok(None)`. Returns `None` when `end_tag`
/// is empty (e.g. mistral — that parser is short-circuited by the streaming
/// processor before this helper would be invoked) or genuinely absent (the
/// buffer is incomplete, `Ok(None)` is correct).
///
/// This plain-substring variant is intended for callers whose body grammar
/// cannot legitimately host an in-value `end_tag` literal (e.g. when the
/// body did not open with a structured shape at all — `bad</tool_call>`).
/// JSON-body parsers must use [`closed_but_malformed_end_pos_json_aware`]
/// instead.
fn closed_but_malformed_end_pos(buffer: &str, payload_at: usize, end_tag: &str) -> Option<usize> {
  if end_tag.is_empty() {
    return None;
  }
  let rel = buffer.get(payload_at..)?.find(end_tag)?;
  Some(payload_at + rel + end_tag.len())
}

/// Variant of [`closed_but_malformed_end_pos`] that scans `payload` while
/// skipping bytes inside paired multi-byte "value" delimiters (used for
/// `gemma4`'s `<|"|>STR<|"|>` regions and `function_gemma`'s
/// `<escape>STR<escape>` regions). Returns the absolute `end_pos` of the
/// FIRST `end_tag` occurrence OUTSIDE any open value region; `None` if no
/// such occurrence exists (either the end-tag is genuinely absent OR every
/// candidate is inside an open value region — body is mid-streaming).
fn closed_but_malformed_end_pos_value_aware(
  buffer: &str,
  payload_at: usize,
  end_tag: &str,
  value_open: &str,
  value_close: &str,
) -> Option<usize> {
  if end_tag.is_empty() || value_open.is_empty() || value_close.is_empty() {
    return None;
  }
  let payload = buffer.get(payload_at..)?;
  let end_bytes = end_tag.as_bytes();
  let bytes = payload.as_bytes();
  let mut i = 0;
  while i < bytes.len() {
    // Skip a balanced `value_open ... value_close` region whole. An
    // unterminated `value_open` opens a region that swallows the rest of
    // the buffer — any in-buffer end-tag inside it is in-VALUE text and
    // MUST NOT close the section; return `None` so the streaming caller
    // waits for more bytes.
    if payload.get(i..).is_some_and(|s| s.starts_with(value_open)) {
      let after_open = i + value_open.len();
      let close_rel = payload.get(after_open..)?.find(value_close)?;
      i = after_open + close_rel + value_close.len();
      continue;
    }
    if i + end_bytes.len() <= bytes.len() && &bytes[i..i + end_bytes.len()] == end_bytes {
      return Some(payload_at + i + end_bytes.len());
    }
    i += 1;
  }
  None
}

/// String-quote-aware variant of [`closed_but_malformed_end_pos`] for parsers
/// whose body grammar can host an in-string `end_tag` literal inside a
/// `"..."` or `'...'` region (JSON for `json_tools` / `glm47` / `longcat` /
/// `kimi_k2` args; Python literals for `pythonic`). Walks `payload` tracking
/// active quote state (escape-aware: `\"`, `\'`, `\\`) and returns the
/// absolute `end_pos` of the FIRST `end_tag` occurrence OUTSIDE any quoted
/// region. Returns `None` when no such occurrence exists — either the
/// end-tag is genuinely absent OR every candidate is inside a still-open
/// quoted region (the body is mid-streaming; `Ok(None)` is correct).
///
/// `quotes` is the set of opening quote characters (e.g. `b"\""` for JSON,
/// `b"\"'"` for Python). The closing quote must equal the opening (the
/// usual case for both grammars).
///
/// This distinguishes the motivating case (`<tool_call>{</tool_call>
/// visible` — end-tag outside any string, section closed-but-malformed)
/// from the existing in-string-literal contract (`<tool_call>{"s":"
/// </tool_call>` — end-tag inside a still-open string, genuinely incomplete;
/// locked by `per_parser_try_parse_one_call_routing`).
fn closed_but_malformed_end_pos_quote_aware(
  buffer: &str,
  payload_at: usize,
  end_tag: &str,
  quotes: &[u8],
) -> Option<usize> {
  if end_tag.is_empty() {
    return None;
  }
  let payload = buffer.get(payload_at..)?;
  let end_bytes = end_tag.as_bytes();
  let bytes = payload.as_bytes();
  let mut active_quote: Option<u8> = None;
  let mut escaped = false;
  let mut i = 0;
  while i < bytes.len() {
    let b = bytes[i];
    if let Some(q) = active_quote {
      if escaped {
        escaped = false;
      } else if b == b'\\' {
        escaped = true;
      } else if b == q {
        active_quote = None;
      }
      i += 1;
      continue;
    }
    if quotes.contains(&b) {
      active_quote = Some(b);
      i += 1;
      continue;
    }
    // Outside any quoted region — check for `end_tag` here.
    if i + end_bytes.len() <= bytes.len() && &bytes[i..i + end_bytes.len()] == end_bytes {
      return Some(payload_at + i + end_bytes.len());
    }
    i += 1;
  }
  None
}

/// **Context predicate** for `bound_section`. Replaces a generic
/// literal-opener race that proved too weak: a stray opener literal
/// inside MALFORMED body bytes (e.g. `bad{"` for `json_tools`, `bad[` for
/// `pythonic`, `bad call:` for `function_gemma`) still satisfies the literal
/// race, fools the gate into routing to the syntax-aware scanner, then an
/// orphan in-scanner marker hides the real wrapper close. Instead,
/// each parser supplies a STRUCTURAL predicate over the prefix-before-end-tag
/// that proves its specific grammar's opener (`{` as first non-whitespace
/// byte; `[name(`; `<function=name>`; `call:name{`; etc.) — not just a
/// literal match.
///
/// Returns (outer `Option`):
/// * `None` — `end_tag` is not yet in `payload` (streaming caller waits).
///
/// Returns (inner `Option<usize>` when outer is `Some`):
/// * `Some(end_pos)` — no parser context proven (predicate returned false).
///   The caller treats the section as plain-bounded: surface zero calls with
///   `end_pos` (relative to `payload`) one past the first end_tag, so the
///   same-chunk suffix survives. An orphan scanner-bait marker in the body
///   cannot bias the close.
/// * `None` — parser context PROVEN (predicate returned true). The caller
///   safely runs its syntax-aware scanner over `payload`; a stray marker
///   past the proven opener is real grammar, not orphan noise.
///
/// The predicate receives the prefix slice `payload[..first_end_rel]` —
/// every byte that COULD form the body before the first end_tag candidate.
/// A predicate that requires a structural shape (not just a literal byte)
/// rejects stray opener-literal bytes in malformed bodies that a
/// generic literal race accepted.
fn bound_context_or_plain_end(
  payload: &str,
  end_tag: &str,
  context_proven: impl Fn(&str) -> bool,
) -> Option<Option<usize>> {
  if end_tag.is_empty() {
    return None;
  }
  let first_end_rel = payload.find(end_tag)?;
  let prefix = &payload[..first_end_rel];
  if context_proven(prefix) {
    // Parser context proven — caller runs the syntax-aware scan over the
    // full payload (the proven opener anchors any markers past it as real
    // grammar).
    Some(None)
  } else {
    // No parser context — plain-end close. `end_pos` is one past the first
    // end_tag (relative to `payload`).
    Some(Some(first_end_rel + end_tag.len()))
  }
}

/// json_tools / glm47 Object arm / longcat Object arm context predicate:
/// the prefix's first non-whitespace byte MUST be `{`. A stray `{` anywhere
/// else in malformed bytes (e.g. `bad{`) is NOT proof of a JSON-object body.
fn json_object_context_proven(prefix: &str) -> bool {
  prefix.trim_start().starts_with('{')
}

/// glm47 Array arm context predicate: the prefix's first non-whitespace
/// byte MUST be `[`. A stray `[` anywhere else is NOT proof of a JSON-array
/// body.
fn json_array_context_proven(prefix: &str) -> bool {
  prefix.trim_start().starts_with('[')
}

/// pythonic context predicate: the prefix must contain a valid pythonic
/// call start (`[name(`) somewhere — matching the EXACT recognizer
/// [`find_first_pythonic_call_start`] / [`pythonic_call_start_at`] that the
/// parser body uses. Sharing the recognizer between the predicate and the
/// parser body prevents a drift class (predicate
/// rejecting digit-leading names or accepting whitespace the parser
/// rejects).
fn pythonic_call_context_proven(prefix: &str) -> bool {
  find_first_pythonic_call_start(prefix).is_some()
}

/// qwen3_coder context predicate: the prefix must contain a valid
/// `<function=NAME>` open-tag (literal `<function=` followed by a non-empty
/// name run followed by `>`) somewhere — matching the EXACT recognizer
/// [`find_first_qwen_function_open`] / [`qwen_function_open_at`] that the
/// parser body ([`Qwen3Coder::parse`] and
/// [`Qwen3Coder::try_parse_one_call`]) uses. Sharing the recognizer between
/// the predicate and the parser body prevents a drift class (predicate
/// REJECTING dotted/spaced names like `<function=foo.bar>`
/// or `<function=foo bar>` that the parser body accepts, causing the
/// plain-end gate to land on an in-parameter `</tool_call>` literal and
/// silently drop the call).
fn qwen_function_context_proven(prefix: &str) -> bool {
  find_first_qwen_function_open(prefix).is_some()
}

/// function_gemma context predicate: the prefix must contain a valid
/// function_gemma call start (`call:name{`) somewhere — matching the EXACT
/// recognizer [`find_first_function_gemma_call_start`] /
/// [`function_gemma_call_start_at`] that the parser body
/// ([`FunctionGemma::try_parse_one_call`] and [`gemma_call`]) uses. Sharing
/// the recognizer between the predicate and the parser body prevents a
/// drift class (predicate accepting whitespace between
/// name and `{` that the parser rejects).
fn function_gemma_call_context_proven(prefix: &str) -> bool {
  find_first_function_gemma_call_start(prefix).is_some()
}

/// Plain-literal context predicate: the prefix must CONTAIN the given
/// literal as a substring. Used for parsers whose body grammar genuinely
/// uses a single literal marker as context proof (glm47 `<arg_key>` /
/// longcat `<longcat_arg_key>`) where the XML grammar has no further
/// structural shape an orphan literal can mimic to fool the scanner: an
/// in-body `<arg_key>` literal IS a valid context marker per that
/// grammar.
fn literal_context_proven<'a>(needle: &'a str) -> impl Fn(&str) -> bool + 'a {
  move |prefix: &str| !needle.is_empty() && prefix.contains(needle)
}

/// Approximates Python `ast.literal_eval` enough for tool-arg coercion:
/// JSON first, then `True`/`False`/`None`, single-quoted strings, and bare
/// ints/floats. Falls back to returning the input as a JSON string.
fn literal_eval(s: &str) -> Value {
  let t = s.trim();
  if let Ok(v) = serde_json::from_str::<Value>(t) {
    return v;
  }
  match t {
    "True" | "true" => return Value::Bool(true),
    "False" | "false" => return Value::Bool(false),
    "None" | "null" => return Value::Null,
    _ => {}
  }
  if (t.starts_with('\'') && t.ends_with('\'') && t.len() >= 2)
    || (t.starts_with('"') && t.ends_with('"') && t.len() >= 2)
  {
    return Value::String(t[1..t.len() - 1].to_owned());
  }
  if let Ok(i) = t.parse::<i64>() {
    return Value::Number(i.into());
  }
  if let Ok(f) = t.parse::<f64>()
    && let Some(n) = serde_json::Number::from_f64(f)
  {
    return Value::Number(n);
  }
  // Python-list/tuple/dict-ish: try swapping single quotes to double quotes.
  if (t.starts_with('[') && t.ends_with(']'))
    || (t.starts_with('{') && t.ends_with('}'))
    || (t.starts_with('(') && t.ends_with(')'))
  {
    let swapped = t
      .replace('\'', "\"")
      .replace("(", "[")
      .replace(")", "]")
      .replace("True", "true")
      .replace("False", "false")
      .replace("None", "null");
    if let Ok(v) = serde_json::from_str::<Value>(&swapped) {
      return v;
    }
  }
  Value::String(t.to_owned())
}

/// JSON-then-literal deserialize (Python `_deserialize` in glm47/longcat/kimi).
fn deserialize(value: &str) -> Value {
  if let Ok(v) = serde_json::from_str::<Value>(value) {
    return v;
  }
  literal_eval(value)
}

fn obj(name: &str, args: Value) -> Vec<ToolCall> {
  vec![ToolCall::new_nameless_id(name, args)]
}

fn tool_properties<'a>(
  tools: Option<&'a Value>,
  func_name: &str,
) -> Option<&'a serde_json::Map<String, Value>> {
  let arr = tools?.as_array()?;
  for tool in arr {
    let f = tool.get("function")?;
    if f.get("name").and_then(Value::as_str) == Some(func_name) {
      return f
        .get("parameters")
        .and_then(|p| p.get("properties"))
        .and_then(Value::as_object);
    }
  }
  None
}

fn string_arg_names(tools: Option<&Value>, func_name: &str) -> Vec<String> {
  tool_properties(tools, func_name)
    .map(|props| {
      props
        .iter()
        .filter(|(_, v)| v.get("type").and_then(Value::as_str) == Some("string"))
        .map(|(k, _)| k.clone())
        .collect()
    })
    .unwrap_or_default()
}

// ----------------------------------------------------------------------------
// json_tools
// ----------------------------------------------------------------------------

/// `json_tools` — `<tool_call>{json}</tool_call>`; the payload is plain JSON.
pub struct JsonTools;

impl JsonTools {
  /// **Bound-to-first-end-tag step.** Locate the first real wrapper
  /// `end_tag` in `payload` using the SAME JSON-string-quote-aware scan
  /// that the inline body would respect. Returns `(bounded_body, end_pos)`
  /// where:
  /// * `bounded_body` is `payload[..end_rel]` — the body BEFORE the wrapper
  ///   close (no end_tag suffix);
  /// * `end_pos` is the absolute buffer offset one past the `end_tag` close.
  ///
  /// Returns `None` when no real end-tag is yet locatable (incomplete
  /// buffer; the streaming caller waits for more bytes). An in-string
  /// `</tool_call>` literal (e.g. `{"s":"</tool_call>"}` mid-stream)
  /// returns `None` because the quote-aware scan stays inside the open
  /// string — the in-value-end-tag negative tests stay green.
  ///
  /// **Why JSON-string-aware here:** json_tools bodies are top-level JSON
  /// objects. An in-string end-tag literal is legitimate value text; only
  /// an end-tag OUTSIDE every string is the real wrapper close. The
  /// plain-substring `closed_but_malformed_end_pos` would *falsely* close
  /// a mid-string section. (This exact scan is used in the body's
  /// `else` branch when `balanced_json_object_prefix` fails; it is
  /// hoisted to the FIRST step so the opener search itself runs on
  /// the bounded prefix.)
  ///
  /// **Context predicate:** [`bound_context_or_plain_end`] with the
  /// [`json_object_context_proven`] predicate gates JSON-string-quote-aware
  /// scanning behind PROOF of a JSON-object body (the prefix's first
  /// non-whitespace byte is `{`). A stray `{` inside malformed bytes
  /// (`<tool_call>bad{"</tool_call>{"name":"x"}` — the `{` in
  /// `bad{"` is a literal "opener") does NOT prove
  /// JSON-object context here because the predicate requires the
  /// LEADING shape, not any-position match. With the predicate failing,
  /// the gate returns the plain end_tag position so the suffix is
  /// preserved.
  fn bound_section<'a>(
    &self,
    payload: &'a str,
    payload_at: usize,
    end_tag: &str,
  ) -> Option<(&'a str, usize)> {
    let end_pos = match bound_context_or_plain_end(payload, end_tag, json_object_context_proven)? {
      Some(end_rel) => end_rel,
      None => closed_but_malformed_end_pos_quote_aware(payload, 0, end_tag, b"\"")?,
    };
    // `end_pos` is relative to payload (we passed payload_at=0). Body is
    // payload[..end_pos - end_tag.len()].
    let body_end = end_pos - end_tag.len();
    Some((&payload[..body_end], payload_at + end_pos))
  }
}

impl ToolParser for JsonTools {
  fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    let v: Value =
      serde_json::from_str(text.trim()).map_err(|e| err(format!("json_tools: {e}")))?;
    let name = v
      .get("name")
      .and_then(Value::as_str)
      .ok_or_else(|| err("json_tools: missing name"))?;
    let args = v.get("arguments").cloned().unwrap_or(Value::Null);
    Ok(obj(name, args))
  }
  fn name(&self) -> &'static str {
    "json_tools"
  }
  /// Lock-step with [`Self::parse`] (above):
  /// `parse` requires a top-level `{name, arguments}` JSON object. Skip past
  /// that object using the string-aware `balanced_json_object_prefix`, then
  /// extract.
  ///
  /// **Structural:** the FIRST step is
  /// `Self::bound_section` (JSON-string-quote-aware end-tag scan) — the
  /// body balancer NEVER sees bytes after the wrapper close. This kills
  /// the suffix-bias defect class: a buffer like
  /// `<tool_call>bad</tool_call>{"name":"x"}` would otherwise lock onto
  /// the *suffix* object, fail to find an end-tag after it, return
  /// `Ok(None)`, and silently drop the suffix until cap/EOS.
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
    let start_tag = self.tool_call_start();
    let end_tag = self.tool_call_end();
    let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
      return Ok(None);
    };
    // Structural: bound first, opener-search second.
    let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
      return Ok(None);
    };
    // The bounded body MAY still be malformed (e.g. `bad` or `{` unclosed
    // within the bounded prefix). `balanced_json_object_prefix` operates on
    // `bounded` only — never on bytes after the wrapper close.
    if balanced_json_object_prefix(bounded).is_none() {
      // Body cannot be balanced as JSON in the bounded prefix → bounded-but-
      // malformed section. Surface zero calls with the known end_pos so the
      // streaming processor preserves the same-chunk suffix.
      return Ok(Some((Vec::new(), end_pos)));
    }
    let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
    match self.parse(inner, tools) {
      Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
      // `parse` rejected (e.g. top-level array fails the `name` lookup) — the
      // section is structurally a tagged-call shape but contains no actual
      // tool call. Advance past the section anyway so the processor doesn't
      // loop, and surface zero calls.
      _ => Ok(Some((Vec::new(), end_pos))),
    }
  }
}

// ----------------------------------------------------------------------------
// pythonic: [func(arg="v", n=2)]
// ----------------------------------------------------------------------------

/// `pythonic` — `<|tool_call_start|>[name(a="x", b=2)]<|tool_call_end|>`.
pub struct Pythonic;

impl Pythonic {
  /// **Bound-to-first-end-tag step.** Locate the first real wrapper
  /// `end_tag` in `payload` using a Python-string-quote-aware scan that
  /// tracks BOTH `'` and `"` (with `\'`/`\"`/`\\` escape handling). An
  /// in-string `<|tool_call_end|>` literal (e.g. `[echo(s='<|tool_call_end|>')]`
  /// mid-stream) returns `None` — the negative tests stay green.
  ///
  /// **Why Python-quote-aware here:** pythonic bodies are `[name(args)]`
  /// where `args` can hold both single- and double-quoted strings. An
  /// in-string end-tag literal is legitimate value text; only an end-tag
  /// OUTSIDE every quoted region is the real wrapper close.
  ///
  /// **Context predicate:** [`bound_context_or_plain_end`] with the
  /// [`pythonic_call_context_proven`] predicate gates Python-quote-aware
  /// scanning behind PROOF of a `[name(` call body (a `[` followed by an
  /// identifier followed by `(`). A stray `[` inside malformed bytes
  /// (`<|tool_call_start|>bad[<|tool_call_end|>[name(x=1)]
  /// tail` — the `[` in `bad[` is a literal "opener")
  /// does NOT prove pythonic context here because the predicate requires
  /// the `[name(` shape, not any `[`. With the predicate failing the
  /// gate returns the plain end_tag position so the suffix is preserved.
  fn bound_section<'a>(
    &self,
    payload: &'a str,
    payload_at: usize,
    end_tag: &str,
  ) -> Option<(&'a str, usize)> {
    let end_pos = match bound_context_or_plain_end(payload, end_tag, pythonic_call_context_proven)?
    {
      Some(end_rel) => end_rel,
      None => closed_but_malformed_end_pos_quote_aware(payload, 0, end_tag, b"\"'")?,
    };
    let body_end = end_pos - end_tag.len();
    Some((&payload[..body_end], payload_at + end_pos))
  }
}

impl ToolParser for Pythonic {
  fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    // _tool_call_regex = \[(\w+)\((.*?)\)\]
    let (fname, args_str) =
      find_pythonic_call(text).ok_or_else(|| err("pythonic: No function provided."))?;
    let mut arguments = serde_json::Map::new();
    for (k, v) in parse_kw_args(&args_str)? {
      arguments.insert(k, literal_eval(&v));
    }
    Ok(obj(&fname, Value::Object(arguments)))
  }
  fn name(&self) -> &'static str {
    "pythonic"
  }
  /// Lock-step with [`Self::parse`] (above):
  /// `parse` looks for `[name(args)]` via `find_pythonic_call`. The
  /// scanner finds the FIRST `[name(` then the matching `)]` (quote- and
  /// bracket-aware).
  ///
  /// **Structural:** the FIRST step is
  /// `Self::bound_section` (Python-quote-aware end-tag scan) — the
  /// `find_pythonic_call`/`pythonic_call_close` scan NEVER sees bytes
  /// after the wrapper close. This kills the suffix-bias defect class:
  /// a buffer like `<|tool_call_start|>bad<|tool_call_end|>[echo(x=1)]`
  /// would otherwise lock onto the *suffix* `[...]`, fail to find an
  /// end-tag after it, return `Ok(None)`, and silently drop the suffix.
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
    let start_tag = self.tool_call_start();
    let end_tag = self.tool_call_end();
    let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
      return Ok(None);
    };
    // Structural: bound first, opener-search second.
    let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
      return Ok(None);
    };
    if pythonic_call_close(bounded).is_none() {
      // Body cannot be closed as a `[name(args)]` call within the bounded
      // prefix → bounded-but-malformed section. Surface zero calls with the
      // known end_pos so the streaming processor preserves the same-chunk
      // suffix.
      return Ok(Some((Vec::new(), end_pos)));
    }
    let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
    match self.parse(inner, tools) {
      Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
      _ => Ok(Some((Vec::new(), end_pos))),
    }
  }
}

/// **Shared pythonic call-start recognizer.** Returns `Some((name_start,
/// after_open_paren))` when `payload[at..]` begins with a valid pythonic
/// `[name(` opener (Python `_tool_call_regex = \[(\w+)\(`):
/// * `payload[at]` MUST be `[`,
/// * immediately followed by a non-empty ASCII alphanumeric/underscore run
///   (`\w+`, no whitespace allowed between `[` and the name),
/// * immediately followed by `(` (no whitespace allowed between the name
///   and `(`).
///
/// Returns `None` otherwise. The `name_start` is the byte index of the
/// first name byte (one past `[`); the `after_open_paren` is one past the
/// `(`. This is the EXACT recognizer used by both
/// [`find_pythonic_call`] (the parser body's call extractor) and
/// [`pythonic_call_context_proven`] (the streaming-bound context
/// predicate) — sharing it prevents the two from drifting.
fn pythonic_call_start_at(payload: &str, at: usize) -> Option<(usize, usize)> {
  let bytes = payload.as_bytes();
  if at >= bytes.len() || bytes[at] != b'[' {
    return None;
  }
  let name_start = at + 1;
  let mut j = name_start;
  while j < bytes.len() && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_') {
    j += 1;
  }
  if j == name_start {
    return None;
  }
  if j >= bytes.len() || bytes[j] != b'(' {
    return None;
  }
  Some((name_start, j + 1))
}

/// Scan `payload` for the FIRST byte position where
/// [`pythonic_call_start_at`] returns Some. Returns the
/// `after_open_paren` index of that first call start, or `None` if no
/// valid `[name(` opener appears anywhere in `payload`.
fn find_first_pythonic_call_start(payload: &str) -> Option<usize> {
  let bytes = payload.as_bytes();
  for i in 0..bytes.len() {
    if let Some((_, after_open)) = pythonic_call_start_at(payload, i) {
      return Some(after_open);
    }
  }
  None
}

fn find_pythonic_call(text: &str) -> Option<(String, String)> {
  // Equivalent of \[(\w+)\((.*?)\)\] with DOTALL, first match.
  let bytes = text.as_bytes();
  for i in 0..bytes.len() {
    let Some((name_start, after_open)) = pythonic_call_start_at(text, i) else {
      continue;
    };
    // `after_open` is one past `(`; `name_start..after_open - 1` is the
    // name bytes (the `- 1` skips back over the `(`).
    let name_end = after_open - 1;
    let name = text[name_start..name_end].to_owned();
    // non-greedy up to `)]`
    let rest = &text[after_open..];
    if let Some(close) = rest.find(")]") {
      return Some((name, rest[..close].to_owned()));
    }
  }
  None
}

/// Parse `key=value, key="value"` pairs (Python `_tool_args_regex`
/// `(\w+)=(?:"([^"]*)"|([^,]+))(?:,\s*|$)`, `findall`).
///
/// All slicing is on offsets measured against the *original* `rest` slice
/// (the leading-whitespace skip is folded into the cursor), so a non-ASCII
/// value after spaces (`city=  "é"`) can never slice a UTF-8 char boundary.
/// Malformed input returns a tokenizer parse [`Error`] rather than panicking.
fn parse_kw_args(s: &str) -> Result<Vec<(String, String)>, Error> {
  let mut out = Vec::new();
  let mut rest = s.trim();
  while !rest.is_empty() {
    let eq = match rest.find('=') {
      Some(e) => e,
      None => break,
    };
    // `rest[..eq]` ends just before an ASCII `=`, always a char boundary.
    let key = rest[..eq].trim().to_owned();
    // Absolute byte offset of the value start within `rest`, after the `=`
    // and any whitespace. `eq + 1` is past the 1-byte `=` (char boundary);
    // the trimmed prefix length is added back so every later index into
    // `rest` is computed from `rest` itself, never from a re-sliced `after`.
    let val_start = {
      let after = &rest[eq + 1..];
      eq + 1 + (after.len() - after.trim_start().len())
    };
    let value_region = &rest[val_start..];
    let (val, consumed) = if let Some(stripped) = value_region.strip_prefix('"') {
      // Quoted: `"([^"]*)"` — inner up to the next `"`.
      match stripped.find('"') {
        // val_start + 1 (open quote) + end (inner len) + 1 (close quote).
        Some(end) => (stripped[..end].to_owned(), val_start + 1 + end + 1),
        // Unterminated quote: keep the rest as the value (matches the prior
        // lenient behaviour) and stop — no further char-boundary slicing.
        None => (stripped.to_owned(), rest.len()),
      }
    } else {
      // Unquoted: `[^,]+` — up to the next `,` (or end).
      let end = value_region.find(',').unwrap_or(value_region.len());
      (value_region[..end].trim().to_owned(), val_start + end)
    };
    out.push((key, val));
    if consumed >= rest.len() {
      break;
    }
    // `consumed` is always a char boundary: it is `val_start` (boundary)
    // plus byte lengths of whole matched ASCII-delimited regions. Guard
    // anyway so corrupt offsets surface as an `Error`, never a panic.
    if !rest.is_char_boundary(consumed) {
      return Err(err("pythonic: malformed argument encoding"));
    }
    rest = rest[consumed..]
      .trim_start()
      .trim_start_matches(',')
      .trim_start();
  }
  Ok(out)
}

// ----------------------------------------------------------------------------
// mistral: name[ARGS]{json}
// ----------------------------------------------------------------------------

/// `mistral` — `[TOOL_CALLS]name[ARGS]{json}`.
pub struct Mistral;

impl ToolParser for Mistral {
  fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    // \s*(\w+)\[ARGS\]\s*(\{.*\})
    let idx = text
      .find("[ARGS]")
      .ok_or_else(|| err(format!("mistral: Could not parse tool call from: {text}")))?;
    let name = text[..idx].trim().to_owned();
    let after = text[idx + "[ARGS]".len()..].trim_start();
    let brace = after
      .find('{')
      .ok_or_else(|| err("mistral: no json args"))?;
    let json_str = after[brace..].trim_end();
    let args: Value = serde_json::from_str(json_str).map_err(|e| err(format!("mistral: {e}")))?;
    Ok(obj(&name, args))
  }
  fn name(&self) -> &'static str {
    "mistral"
  }
  /// Lock-step with [`Self::parse`] (lines ~465–479 above):
  /// `parse` looks for `[TOOL_CALLS]name[ARGS]{json}` and balances the
  /// trailing JSON object. The streaming processor short-circuits this
  /// parser because `tool_call_end` is empty (mistral is closed at EOS),
  /// so `try_parse_one_call` is invoked only by `parse`'s default loop
  /// over a complete payload and by audit/matches-parse tests. We mirror
  /// `parse`'s scan: find `[ARGS]`, then the balanced `{json}`, then
  /// report `end_pos = (one past the JSON object's `}`).
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
    let start_tag = self.tool_call_start();
    let end_tag = self.tool_call_end();
    let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
      return Ok(None);
    };
    let Some(args_rel) = payload.find("[ARGS]") else {
      // Mistral has an EMPTY end_tag and is short-circuited by the
      // streaming processor (the `end_tag.is_empty()` branch in
      // [`process_tagged_chunk`] flushes via `cap_recover_into` before
      // `try_parse_one_call` is reached). The helper returns `None` for an
      // empty end_tag so this preserves the existing batch/audit behaviour
      // while keeping the contract uniform across parsers.
      return Ok(
        closed_but_malformed_end_pos(buffer, payload_at, end_tag).map(|ep| (Vec::new(), ep)),
      );
    };
    let after_args = args_rel + "[ARGS]".len();
    let Some((obj_start_in, obj_end_in)) = balanced_json_object_prefix(&payload[after_args..])
    else {
      return Ok(
        closed_but_malformed_end_pos(buffer, payload_at, end_tag).map(|ep| (Vec::new(), ep)),
      );
    };
    // Absolute end_pos = (payload start) + (offset to `[ARGS]` + len) +
    // (offset of `}`+1 within the post-[ARGS] slice).
    let _ = obj_start_in;
    let end_pos = payload_at + after_args + obj_end_in;
    let inner = &buffer[..end_pos];
    let inner = strip_section_markers(inner, start_tag, "");
    match self.parse(inner, tools) {
      Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
      _ => Ok(Some((Vec::new(), end_pos))),
    }
  }
}

// ----------------------------------------------------------------------------
// qwen3_coder: <function=name>...<parameter=p>v</parameter></function>
// ----------------------------------------------------------------------------

/// `qwen3_coder` — XML-style `<function=name><parameter=p>v</parameter></function>`.
pub struct Qwen3Coder;

impl Qwen3Coder {
  /// **Bound-to-first-end-tag step.** Locate the first real wrapper
  /// `end_tag` (`</tool_call>`) in `payload` using a parameter-value-aware
  /// scan that SKIPS every `<parameter=...></parameter>` region whole.
  /// Parameter VALUES can legitimately carry the `</tool_call>` literal
  /// verbatim, so only an end-tag OUTSIDE every parameter region is the
  /// real wrapper close. An in-value end-tag literal (e.g.
  /// `<parameter=p></tool_call>` mid-stream, before `</parameter>`) returns
  /// `None` — the in-value-end-tag negative tests stay green.
  ///
  /// **Context predicate:** [`bound_context_or_plain_end`] with the
  /// [`qwen_function_context_proven`] predicate gates parameter-value-aware
  /// scanning behind PROOF of a `<function=NAME>` open-tag. A stray
  /// `<function=` inside malformed bytes (`<tool_call>bad
  /// <function= </tool_call><function=f><parameter=p>v</parameter></function>
  /// tail` — the `<function=` substring in `bad<function= ` is a literal
  /// "opener") does NOT prove qwen3_coder context here because
  /// the predicate requires the FULL `<function=NAME>` tag shape, not just
  /// the literal substring. With the predicate failing the gate returns
  /// the plain end_tag position so the suffix is preserved.
  ///
  /// **Shared recognizer:** the predicate delegates to
  /// [`find_first_qwen_function_open`] / [`qwen_function_open_at`] — the
  /// EXACT recognizer the parser body ([`Qwen3Coder::parse`] and
  /// [`Qwen3Coder::try_parse_one_call`]) uses. The shared recognizer
  /// accepts ANY non-empty `<function=NAME>` opener whose NAME contains
  /// neither `>` nor `<` (dotted, spaced, special-char names are all
  /// valid). This kills the residual drift class where a
  /// `[A-Za-z0-9_-]+` predicate REJECTED parser-accepted dotted/spaced
  /// names, causing the plain-end gate to land on an in-parameter
  /// `</tool_call>` literal and silently drop the call.
  fn bound_section<'a>(
    &self,
    payload: &'a str,
    payload_at: usize,
    end_tag: &str,
  ) -> Option<(&'a str, usize)> {
    let rel = match bound_context_or_plain_end(payload, end_tag, qwen_function_context_proven)? {
      Some(end_rel) => end_rel - end_tag.len(),
      None => xml_value_aware_end_tag_scan(payload, "<parameter=", "</parameter>", end_tag)?,
    };
    let end_pos = rel + end_tag.len();
    Some((&payload[..rel], payload_at + end_pos))
  }
}

impl ToolParser for Qwen3Coder {
  fn parse(&self, text: &str, tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    // _function_regex = <function=(.*?)</function>$ (DOTALL, findall->[0])
    // Use the SHARED recognizer ([`find_first_qwen_function_open`] +
    // [`qwen_function_open_at`]) to find the first `<function=NAME>`
    // opener — so the parser body and the streaming context predicate
    // ([`qwen_function_context_proven`]) cannot drift. The recognizer
    // accepts any non-empty `<function=NAME>` opener whose NAME contains
    // neither `>` nor `<` — dotted (`foo.bar`), spaced (`foo bar`), and
    // other punctuation are valid here, matching every byte the upstream
    // Python parser's `body.find('>')` step accepts (dotted/spaced names
    // are parser-accepted).
    //
    // Terminal-on-first-marker: `find_first_qwen_function_open`
    // anchors on the FIRST `<function=` literal — if that anchor is
    // malformed, the section is malformed and we reject the whole input.
    // We must NOT scan past to find a later valid opener inside a
    // malformed anchor (e.g. `<function=a<function=real>` would otherwise
    // emit a "real" call from structurally-invalid bytes).
    let marker_at = find_first_qwen_function_open(text)
      .ok_or_else(|| err("qwen3_coder: No function provided."))?;
    let (name_start, after_close_bracket) = qwen_function_open_at(text, marker_at)
      .ok_or_else(|| err("qwen3_coder: No function provided."))?;
    let after = &text[after_close_bracket..];
    let end = after
      .rfind("</function>")
      .ok_or_else(|| err("qwen3_coder: No function provided."))?;
    // `after_close_bracket - 1` is the byte position of `>` (the name's
    // terminator); slicing `text[name_start..after_close_bracket - 1]`
    // extracts the raw NAME bytes the recognizer accepted.
    let func_name = text[name_start..after_close_bracket - 1].to_owned();
    let params_str = &after[..end];
    let props = tool_properties(tools, &func_name);
    let mut args = serde_json::Map::new();
    for cap in find_all(params_str, "<parameter=", "</parameter>") {
      let pgt = match cap.find('>') {
        Some(p) => p,
        None => continue,
      };
      let pname = cap[..pgt].to_owned();
      let mut pval = cap[pgt + 1..].to_owned();
      if let Some(r) = pval.strip_prefix('\n') {
        pval = r.to_owned();
      }
      if let Some(r) = pval.strip_suffix('\n') {
        pval = r.to_owned();
      }
      args.insert(pname.clone(), convert_param_value(&pval, &pname, props));
    }
    Ok(obj(&func_name, Value::Object(args)))
  }
  fn name(&self) -> &'static str {
    "qwen3_coder"
  }
  /// Lock-step with [`Self::parse`] (above):
  /// `parse` uses the SHARED recognizer (`find_first_qwen_function_open`
  /// / `qwen_function_open_at`) to find the first valid `<function=NAME>`
  /// opener, then `after.rfind("</function>")` — the **LAST** `</function>`
  /// in the section is the real close, because parameter VALUES
  /// legitimately carry `</function>` (and `</tool_call>`) literals.
  ///
  /// **Structural:** the FIRST step is `Self::bound_section`
  /// (parameter-value-aware end-tag scan over `<parameter=...></parameter>`
  /// regions) — the `<function=` opener search NEVER sees bytes after the
  /// wrapper close. This kills the suffix-bias defect class: a buffer like
  /// `<tool_call>bad</tool_call><function=f><parameter=p>v</parameter></function>`
  /// would otherwise lock onto the *suffix* `<function=`, scan forward for
  /// `</function>`, fail to find an end-tag after it, return `Ok(None)`,
  /// and silently drop the suffix.
  ///
  /// **Shared opener recognizer:** the bounded-prefix opener gate
  /// (step (1) below) uses `find_first_qwen_function_open` — the EXACT
  /// recognizer the predicate (`qwen_function_context_proven`) and the
  /// `parse` body use. This prevents the gate from accepting an opener
  /// shape (e.g. `<function=>` with empty name, or trailing garbage past
  /// `>`) the predicate / parse body would reject.
  ///
  /// Once bounded, the existing forward-scan finds the first `</function>`
  /// outside every `<parameter=...></parameter>` region
  /// within the bounded prefix — no second end-tag search is needed because
  /// `bound_section` already located the wrapper close.
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
    let start_tag = self.tool_call_start();
    let end_tag = self.tool_call_end();
    let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
      return Ok(None);
    };
    // Structural: bound first, opener-search second.
    let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
      return Ok(None);
    };
    let function_close = "</function>";
    let parameter_open = "<parameter=";
    let parameter_close = "</parameter>";

    // (1) Find the section's `<function=NAME>` opener within the bounded
    //     prefix using the SHARED recognizer ([`qwen_function_open_at`] via
    //     [`find_first_qwen_function_open`]) so this gate cannot drift from
    //     the predicate ([`qwen_function_context_proven`]) or the parser
    //     body ([`Qwen3Coder::parse`]).
    //
    //     Terminal-on-first-marker: `find_first_qwen_function_open`
    //     returns `Some(marker_at)` ONLY if the FIRST `<function=` literal
    //     parses as a valid opener. A malformed first marker (e.g.
    //     `<function=a<function=real>...`) returns None — we MUST NOT
    //     skip past it to a nested-but-valid marker (doing
    //     so emits "real" as a tool call from structurally-malformed
    //     bytes, defeating the section-level structural rejection).
    //
    //     `qwen_function_open_at` at that marker recovers
    //     `after_close_bracket` — one past the `>` that closes the opener
    //     — which is the correct starting cursor for the `</function>`
    //     forward-scan below (an in-name `<` or `>` would never get past
    //     the recognizer).
    let Some(marker_at) = find_first_qwen_function_open(bounded) else {
      // No structurally-valid `<function=NAME>` opener anchors the
      // bounded body → malformed section. Surface zero calls with the
      // known end_pos so the streaming processor preserves the same-chunk
      // suffix.
      return Ok(Some((Vec::new(), end_pos)));
    };
    let Some((_, after_open_rel)) = qwen_function_open_at(bounded, marker_at) else {
      // Unreachable: `find_first_qwen_function_open` returned `Some` only
      // because `qwen_function_open_at` accepted the marker. Surface zero
      // calls for defense-in-depth (the recognizer is the SOLE source of
      // truth — any drift here is a structural bug).
      return Ok(Some((Vec::new(), end_pos)));
    };

    // (2) Forward-scan within `bounded` for the first `</function>` OUTSIDE
    //     any `<parameter=…>…</parameter>` region. Within a parameter region
    //     the VALUE is opaque text and can carry a literal `</function>` —
    //     that occurrence MUST be skipped. All scanning is bounded
    //     to the body — the bytes after the wrapper close are never visible.
    let mut cursor = after_open_rel;
    let fn_close_found = loop {
      let next_fclose = bounded[cursor..].find(function_close);
      let next_popen = bounded[cursor..].find(parameter_open);
      match (next_fclose, next_popen) {
        // No more `</function>` candidate within the bounded prefix — the
        // body is bounded-but-malformed (no function close before the
        // wrapper close).
        (None, _) => break None,
        // `</function>` appears before any further parameter open (or no
        // more parameter opens): that's the real function close.
        (Some(f), None) => break Some(cursor + f),
        (Some(f), Some(p)) if f <= p => break Some(cursor + f),
        // A parameter opens before the next `</function>` — jump past its
        // matching `</parameter>` and continue. If the close isn't found
        // within the bounded prefix, the body is malformed.
        (Some(_), Some(p)) => {
          let region_after_open = cursor + p + parameter_open.len();
          let Some(rel) = bounded[region_after_open..].find(parameter_close) else {
            break None;
          };
          let next_cursor = region_after_open + rel + parameter_close.len();
          // Forward progress is guaranteed because `parameter_close` is a
          // non-empty substring strictly after `region_after_open` — no
          // infinite loop is possible on a malformed buffer.
          debug_assert!(next_cursor > cursor);
          cursor = next_cursor;
        }
      }
    };
    if fn_close_found.is_none() {
      // Bounded-but-malformed: the body opened `<function=` but never
      // produced a valid `</function>` close within the bounded prefix.
      return Ok(Some((Vec::new(), end_pos)));
    }

    let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
    match self.parse(inner, tools) {
      Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
      _ => Ok(Some((Vec::new(), end_pos))),
    }
  }
}

/// **Shared qwen3_coder function-open recognizer.** Returns
/// `Some((name_start, after_close_bracket))` when `payload[at..]` begins
/// with a valid `<function=NAME>` open-tag.
///
/// **Accepted name grammar:** any **non-empty run of bytes containing
/// neither `>` nor `<`**. The `>` exclusion is the parser body's literal
/// terminator: [`Qwen3Coder::parse`] does `body.find('>')` to delimit the
/// name (so any byte before `>` is part of the name); the `<` exclusion is
/// structural because a `<` inside the name would open a sibling XML tag
/// and break the surrounding `<function=...>...</function>` framing every
/// downstream scanner (`<parameter=`, `</parameter>`, `</function>`,
/// `</tool_call>`) depends on. The name therefore admits dots (`foo.bar`),
/// spaces (`foo bar`), and other punctuation the parser body accepts —
/// it is NOT restricted to `[A-Za-z0-9_-]+`.
///
/// Returns `None` otherwise. The `name_start` is the byte index of the
/// first name byte (one past `<function=`); the `after_close_bracket` is
/// one past the `>`. This is the EXACT recognizer used by both
/// [`Qwen3Coder::parse`] / [`Qwen3Coder::try_parse_one_call`] (the parser
/// body) and [`qwen_function_context_proven`] (the streaming-bound
/// context predicate) — sharing it prevents the two from drifting (dotted
/// and spaced names are parser-accepted).
fn qwen_function_open_at(payload: &str, at: usize) -> Option<(usize, usize)> {
  let needle = "<function=";
  let bytes = payload.as_bytes();
  if at + needle.len() > bytes.len() {
    return None;
  }
  if &bytes[at..at + needle.len()] != needle.as_bytes() {
    return None;
  }
  let name_start = at + needle.len();
  let mut j = name_start;
  while j < bytes.len() && bytes[j] != b'>' && bytes[j] != b'<' {
    j += 1;
  }
  if j == name_start {
    return None;
  }
  if j >= bytes.len() || bytes[j] != b'>' {
    return None;
  }
  Some((name_start, j + 1))
}

/// Find the FIRST `<function=` literal in `payload`. Returns `Some(idx)`
/// of that literal **only** if the marker at that first occurrence is a
/// structurally-valid open (parses via [`qwen_function_open_at`]). If the
/// first `<function=` literal is malformed (e.g. name contains `<` or `>`,
/// or empty name), returns `None` — does NOT scan past to find a later
/// valid opener, because the first `<function=` IS the section's
/// structural anchor.
///
/// **Terminal-on-first-marker:** an implementation that
/// scanned every byte position for a valid open would let a malformed outer
/// opener (`<function=a<function=real>...`) correctly fail
/// `qwen_function_open_at` at the outer marker but then succeed at the
/// nested marker — causing the parser to emit `"real"` as a tool call
/// from a structurally-malformed section. Terminating on the first
/// `<function=` literal rejects the section as a whole when that anchor
/// is malformed, instead of letting a nested-but-valid opener pretend to
/// be a new section.
///
/// Examples:
/// * `<function=foo.bar>...` → first marker at 0, parses → `Some(0)`.
/// * `<function=a<function=real>...` → first marker at 0, malformed
///   (name contains `<`) → `None`. Nested marker IGNORED.
/// * `bad<function=foo>` → first marker at 3, parses → `Some(3)`. Stray
///   prefix bytes are harmless because the marker itself is structural.
/// * `<function=>...` → first marker at 0, malformed (empty name) →
///   `None`.
///
/// Call sites that need the parsed `(name_start, after_close_bracket)`
/// span call [`qwen_function_open_at`] directly at the returned marker
/// index — both [`Qwen3Coder::parse`] and
/// [`Qwen3Coder::try_parse_one_call`] do this so the recognizer remains
/// the SOLE source of truth.
fn find_first_qwen_function_open(payload: &str) -> Option<usize> {
  let marker_at = payload.find("<function=")?;
  qwen_function_open_at(payload, marker_at).map(|_| marker_at)
}

fn convert_param_value(
  v: &str,
  name: &str,
  props: Option<&serde_json::Map<String, Value>>,
) -> Value {
  if v.eq_ignore_ascii_case("null") {
    return Value::Null;
  }
  let Some(props) = props else {
    return Value::String(v.to_owned());
  };
  let Some(schema) = props.get(name) else {
    return Value::String(v.to_owned());
  };
  let ptype = schema
    .get("type")
    .and_then(Value::as_str)
    .unwrap_or("string")
    .trim()
    .to_lowercase();
  const STR: &[&str] = &["string", "str", "text", "varchar", "char", "enum"];
  const BOOL: &[&str] = &["boolean", "bool", "binary"];
  const OBJ: &[&str] = &["object", "array", "arr"];
  if STR.contains(&ptype.as_str()) {
    Value::String(v.to_owned())
  } else if ["int", "uint", "long", "short", "unsigned"]
    .iter()
    .any(|p| ptype.starts_with(p))
  {
    v.trim()
      .parse::<i64>()
      .map(|i| Value::Number(i.into()))
      .unwrap_or_else(|_| Value::String(v.to_owned()))
  } else if ptype.starts_with("num") || ptype.starts_with("float") {
    // Schema asked for a NUMBER, not an integer. ALWAYS emit a JSON
    // float (`Number::from_f64`), never promote a finite-whole `f64` to
    // `i64` — the old `f.fract() == 0.0 → (f as i64).into()` branch lost
    // the type signal even when the value fit, and saturated at
    // `i64::MIN`/`i64::MAX` for `|f| > i64` (e.g. `1e30` silently
    // collapsed to `i64::MAX`). Non-finite `f64` (NaN / ±Inf) has no JSON
    // representation, so fall back to the raw string (matching the
    // existing parse-failure branch).
    match v.trim().parse::<f64>() {
      Ok(f) => serde_json::Number::from_f64(f)
        .map(Value::Number)
        .unwrap_or_else(|| Value::String(v.to_owned())),
      Err(_) => Value::String(v.to_owned()),
    }
  } else if BOOL.contains(&ptype.as_str()) {
    Value::Bool(v.trim().eq_ignore_ascii_case("true"))
  } else if OBJ.contains(&ptype.as_str()) || ptype.starts_with("dict") || ptype.starts_with("list")
  {
    serde_json::from_str(v).unwrap_or_else(|_| literal_eval(v))
  } else {
    literal_eval(v)
  }
}

/// Find all substrings between `open` and the next `close` (DOTALL,
/// non-greedy), returning the inner text of each match.
fn find_all(text: &str, open: &str, close: &str) -> Vec<String> {
  let mut out = Vec::new();
  let mut i = 0;
  while let Some(s) = text[i..].find(open) {
    let abs = i + s + open.len();
    if let Some(e) = text[abs..].find(close) {
      out.push(text[abs..abs + e].to_owned());
      i = abs + e + close.len();
    } else {
      break;
    }
  }
  out
}

// ----------------------------------------------------------------------------
// glm47: name<arg_key>k</arg_key><arg_value>v</arg_value>...
// ----------------------------------------------------------------------------

/// `glm47` — `name<arg_key>k</arg_key><arg_value>v</arg_value>` with JSON /
/// plain-text fallbacks.
pub struct Glm47;

impl Glm47 {
  /// **Bound-to-first-end-tag step.** Glm47's payload shape branches
  /// on the body's leading byte (see [`classify_json_payload_start`]):
  /// * **Object** (`{`-leading): JSON-string-quote-aware scan. An in-string
  ///   `</tool_call>` literal stays inside; only an end-tag OUTSIDE every
  ///   JSON string is the wrapper close.
  /// * **Array** (`[`-leading): same JSON-string-quote-aware scan; glm47's
  ///   `glm_parse_json` fallback accepts a top-level array, so its strings
  ///   must be respected too.
  /// * **None** (no `{`/`[` opener): race the first end-tag against the
  ///   first `<arg_key>`. If `<arg_key>` precedes any end-tag the body is
  ///   XML-style and the end-tag scan must SKIP every `<arg_value>...
  ///   </arg_value>` region (those values can carry the wrapper end-tag
  ///   literal). Otherwise the body is plain and a plain
  ///   substring end-tag scan is sound.
  ///
  /// Mirrors the existing per-arm scanner choices the parser body already
  /// uses; those choices are hoisted to BEFORE the parser's body balancers
  /// so suffix bytes can never bias the body scan.
  ///
  /// **Context predicates per arm:** [`bound_context_or_plain_end`]
  /// with a per-arm structural predicate gates the syntax-aware scanner
  /// behind PROOF of that arm's body shape. For the Object/Array arms the
  /// predicate requires the FIRST non-whitespace byte to be `{`/`[`
  /// (`classify_json_payload_start` already determined this — the
  /// predicate is consistent with the dispatch); for the None arm the
  /// predicate is the `<arg_key>` literal (the grammar is XML where
  /// the `<arg_key>` tag IS the structural context marker — an orphan
  /// `<arg_key>` literal still ANCHORS valid XML context). Falls back to
  /// the plain end_tag position when no context is proven, preserving
  /// the same-chunk suffix.
  fn bound_section<'a>(
    &self,
    payload: &'a str,
    payload_at: usize,
    end_tag: &str,
  ) -> Option<(&'a str, usize)> {
    let end_rel = match classify_json_payload_start(payload) {
      JsonPayloadStart::Object => {
        match bound_context_or_plain_end(payload, end_tag, json_object_context_proven)? {
          Some(end_rel) => end_rel - end_tag.len(),
          None => closed_but_malformed_end_pos_quote_aware(payload, 0, end_tag, b"\"")
            .map(|ep| ep - end_tag.len())?,
        }
      }
      JsonPayloadStart::Array => {
        match bound_context_or_plain_end(payload, end_tag, json_array_context_proven)? {
          Some(end_rel) => end_rel - end_tag.len(),
          None => closed_but_malformed_end_pos_quote_aware(payload, 0, end_tag, b"\"")
            .map(|ep| ep - end_tag.len())?,
        }
      }
      JsonPayloadStart::None => {
        match bound_context_or_plain_end(payload, end_tag, literal_context_proven("<arg_key>"))? {
          Some(end_rel) => end_rel - end_tag.len(),
          None => xml_value_aware_end_tag_scan(payload, "<arg_value>", "</arg_value>", end_tag)?,
        }
      }
    };
    Some((&payload[..end_rel], payload_at + end_rel + end_tag.len()))
  }
}

impl ToolParser for Glm47 {
  fn parse(&self, text: &str, tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    if let Some(idx) = text.find("<arg_key>") {
      let func_name = text[..idx].trim().to_owned();
      let string_args = string_arg_names(tools, &func_name);
      let mut args = serde_json::Map::new();
      for (k, v) in find_kv_pairs(
        text,
        "<arg_key>",
        "</arg_key>",
        "<arg_value>",
        "</arg_value>",
      ) {
        let key = k.trim().to_owned();
        let val = if string_args.contains(&key) {
          Value::String(v.trim().to_owned())
        } else {
          deserialize(v.trim())
        };
        args.insert(key, val);
      }
      return Ok(obj(&func_name, Value::Object(args)));
    }
    // Fallbacks.
    if let Some(c) = glm_parse_json(text, tools) {
      return Ok(vec![c]);
    }
    if let Some(c) = glm_parse_plain(text, tools) {
      return Ok(vec![c]);
    }
    Ok(vec![ToolCall::new_nameless_id(
      "unknown",
      serde_json::json!({"raw": text.trim()}),
    )])
  }
  fn name(&self) -> &'static str {
    "glm47"
  }
  /// Lock-step with [`Self::parse`] (above): `parse` accepts THREE payload
  /// shapes — branching on the presence of `<arg_key>`, then
  /// `glm_parse_json` (top-level object OR array), then `glm_parse_plain`.
  ///
  /// **Structural:** the FIRST step is `Self::bound_section`
  /// (per-payload-shape end-tag scan: JSON-string-quote-aware for `{` /
  /// `[` bodies, xml-value-aware for `<arg_key>`-bodies, plain for
  /// plain-text bodies). The body scan that follows NEVER sees bytes
  /// after the wrapper close, so no suffix-bias attack on the body
  /// balancer is possible.
  ///
  /// Within the bounded prefix the existing body-shape branch (Object /
  /// Array / None) is preserved for `parse()` delegation; the early-return
  /// `Ok(None)` paths now collapse to `Ok(Some((Vec::new(), end_pos)))`
  /// because `bound_section` already located the wrapper close.
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
    let start_tag = self.tool_call_start();
    let end_tag = self.tool_call_end();
    let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
      return Ok(None);
    };
    // Structural: bound first, body-shape branch second.
    let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
      return Ok(None);
    };
    let body_balanced = match classify_json_payload_start(bounded) {
      JsonPayloadStart::Object => balanced_json_object_prefix(bounded).is_some(),
      JsonPayloadStart::Array => balanced_json_array_prefix(bounded).is_some(),
      // Non-JSON bodies: glm47 accepts XML (`<arg_key>...`) and plain text
      // via `glm_parse_plain`. Both are accepted by `parse()` — defer
      // parseability to the final delegation.
      JsonPayloadStart::None => true,
    };
    if !body_balanced {
      // Bounded-but-malformed JSON body — surface zero calls with the
      // known end_pos so the streaming processor preserves the same-chunk
      // suffix.
      return Ok(Some((Vec::new(), end_pos)));
    }
    let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
    match self.parse(inner, tools) {
      Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
      _ => Ok(Some((Vec::new(), end_pos))),
    }
  }
}

fn normalize_arguments(
  func_name: &str,
  arguments: &serde_json::Map<String, Value>,
  tools: Option<&Value>,
) -> serde_json::Map<String, Value> {
  let string_args = string_arg_names(tools, func_name);
  let mut out = serde_json::Map::new();
  for (k, v) in arguments {
    if string_args.contains(k) {
      let coerced = match v {
        Value::String(s) => Value::String(s.clone()),
        other => Value::String(other.to_string()),
      };
      out.insert(k.clone(), coerced);
    } else if let Value::String(s) = v {
      out.insert(k.clone(), deserialize(s));
    } else {
      out.insert(k.clone(), v.clone());
    }
  }
  out
}

fn glm_parse_json(text: &str, tools: Option<&Value>) -> Option<ToolCall> {
  let parsed: Value = serde_json::from_str(text.trim()).ok()?;
  let parsed = match &parsed {
    Value::Array(a) if !a.is_empty() && a[0].is_object() => a[0].clone(),
    _ => parsed,
  };
  let p = parsed.as_object()?;
  let (mut name, mut arguments) = if p.contains_key("name") && p.contains_key("arguments") {
    (p.get("name").cloned(), p.get("arguments").cloned())
  } else if p.contains_key("function") && p.contains_key("arguments") {
    (p.get("function").cloned(), p.get("arguments").cloned())
  } else if let Some(t) = p.get("tool").and_then(Value::as_object) {
    (t.get("name").cloned(), t.get("arguments").cloned())
  } else {
    (None, None)
  };
  if let Some(Value::Object(nm)) = &name {
    if arguments.is_none() {
      arguments = nm.get("arguments").cloned();
    }
    name = nm.get("name").cloned();
  }
  if let Some(Value::String(s)) = &arguments {
    arguments = Some(deserialize(s));
  }
  let name = name.and_then(|n| n.as_str().map(str::to_owned))?;
  match arguments {
    None => Some(ToolCall::new_nameless_id(name, serde_json::json!({}))),
    Some(Value::Object(m)) => {
      let norm = normalize_arguments(&name, &m, tools);
      Some(ToolCall::new_nameless_id(name, Value::Object(norm)))
    }
    _ => None,
  }
}

fn glm_parse_plain(text: &str, tools: Option<&Value>) -> Option<ToolCall> {
  let stripped = text.trim();
  if stripped.is_empty() {
    return None;
  }
  if let Some(nl) = stripped.find('\n') {
    let name = stripped[..nl].trim().to_owned();
    let rest = stripped[nl + 1..].trim();
    if !name.is_empty()
      && !rest.is_empty()
      && let Value::Object(m) = deserialize(rest)
    {
      let norm = normalize_arguments(&name, &m, tools);
      return Some(ToolCall::new_nameless_id(name, Value::Object(norm)));
    }
  }
  let (name, rest) = match stripped.split_once(' ') {
    Some((n, r)) => (n.to_owned(), r.trim().to_owned()),
    None => (stripped.to_owned(), String::new()),
  };
  if name.is_empty() {
    return None;
  }
  if rest.is_empty() {
    return Some(ToolCall::new_nameless_id(name, serde_json::json!({})));
  }
  if let Value::Object(m) = deserialize(&rest) {
    let norm = normalize_arguments(&name, &m, tools);
    return Some(ToolCall::new_nameless_id(name, Value::Object(norm)));
  }
  // key=value pairs
  if rest.contains('=') {
    let mut args = serde_json::Map::new();
    let string_args = string_arg_names(tools, &name);
    let mut ok = true;
    for token in rest.split_whitespace() {
      match token.split_once('=') {
        Some((k, v)) if !k.trim().is_empty() => {
          let key = k.trim().to_owned();
          if string_args.contains(&key) {
            args.insert(key, Value::String(v.to_owned()));
          } else {
            args.insert(key, deserialize(v));
          }
        }
        _ => {
          ok = false;
          break;
        }
      }
    }
    if ok && !args.is_empty() {
      return Some(ToolCall::new_nameless_id(name, Value::Object(args)));
    }
  }
  Some(ToolCall::new_nameless_id(
    name,
    serde_json::json!({"raw": rest}),
  ))
}

/// Find `(key, value)` pairs delimited by two open/close tag pairs.
fn find_kv_pairs(text: &str, ko: &str, kc: &str, vo: &str, vc: &str) -> Vec<(String, String)> {
  let mut out = Vec::new();
  let mut i = 0;
  while let Some(ks) = text[i..].find(ko) {
    let kabs = i + ks + ko.len();
    let Some(ke) = text[kabs..].find(kc) else {
      break;
    };
    let key = text[kabs..kabs + ke].to_owned();
    let after = kabs + ke + kc.len();
    let Some(vs) = text[after..].find(vo) else {
      break;
    };
    let vabs = after + vs + vo.len();
    let Some(ve) = text[vabs..].find(vc) else {
      break;
    };
    let val = text[vabs..vabs + ve].to_owned();
    out.push((key, val));
    i = vabs + ve + vc.len();
  }
  out
}

// ----------------------------------------------------------------------------
// kimi_k2: <|tool_call_begin|>functions.name:0<|tool_call_argument_begin|>{json}<|tool_call_end|>
// ----------------------------------------------------------------------------

/// `kimi_k2` — `<|tool_call_begin|>functions.name:0<|tool_call_argument_begin|>{json}<|tool_call_end|>`.
pub struct KimiK2;

impl KimiK2 {
  fn parse_single(text: &str) -> Result<ToolCall, Error> {
    let abeg = "<|tool_call_argument_begin|>";
    let abeg_idx = text
      .find(abeg)
      .ok_or_else(|| err("kimi_k2: No tool call found."))?;
    let head = text[..abeg_idx].trim();
    // ^\s*((?:functions\.)?(.+?):\d+)\s*
    let colon = head
      .rfind(':')
      .ok_or_else(|| err("kimi_k2: No tool call found."))?;
    if head[colon + 1..].trim().parse::<u64>().is_err() {
      return Err(err("kimi_k2: No tool call found."));
    }
    let full_id = head.to_owned();
    let base = &head[..colon];
    let func_name = base.strip_prefix("functions.").unwrap_or(base).to_owned();
    let args_part = text[abeg_idx + abeg.len()..].trim();
    let arg = deserialize(args_part);
    Ok(ToolCall::new(func_name, arg, Some(full_id)))
  }
}

impl ToolParser for KimiK2 {
  fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    let matches = find_all(text, "<|tool_call_begin|>", "<|tool_call_end|>");
    if matches.is_empty() {
      Ok(vec![Self::parse_single(text)?])
    } else {
      matches.iter().map(|m| Self::parse_single(m)).collect()
    }
  }
  fn name(&self) -> &'static str {
    "kimi_k2"
  }
  /// Lock-step with [`Self::parse`] (lines ~937–944 above): the section is
  /// `<|tool_calls_section_begin|> ... <|tool_calls_section_end|>` with
  /// zero-or-more inner
  /// `<|tool_call_begin|>name:N<|tool_call_argument_begin|>{json}<|tool_call_end|>`
  /// blocks. `parse` ingests the full section and `find_all`s the inner
  /// blocks (or falls back to single-block parsing). The streaming walker
  /// races the section `end_tag` against the next per-call opener — section
  /// end first wins; opener first consumes one inner block (using balanced
  /// JSON for the args so an in-string `<|tool_call_end|>` cannot truncate).
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
    let start_tag = self.tool_call_start();
    let end_tag = self.tool_call_end();
    let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
      return Ok(None);
    };
    let call_begin = "<|tool_call_begin|>";
    let arg_begin = "<|tool_call_argument_begin|>";
    let call_end = "<|tool_call_end|>";

    // Helpers for the inner-block early returns. The section end-tag
    // is `<|tool_calls_section_end|>`; we only fall into these branches once
    // a `<|tool_call_begin|>` opener has been seen, so any in-buffer
    // section end after the opener is either the genuine section close OR
    // sits inside the JSON args of an in-flight inner block. The
    // quote-aware variant disambiguates by skipping `"..."` regions.
    let r13_plain = || -> Option<(Vec<ToolCall>, usize)> {
      closed_but_malformed_end_pos(buffer, payload_at, end_tag).map(|ep| (Vec::new(), ep))
    };
    let r13_json = || -> Option<(Vec<ToolCall>, usize)> {
      closed_but_malformed_end_pos_quote_aware(buffer, payload_at, end_tag, b"\"")
        .map(|ep| (Vec::new(), ep))
    };

    let mut cursor = 0usize;
    let section_end_rel = loop {
      let end_rel = payload[cursor..].find(end_tag).map(|p| cursor + p);
      let open_rel = payload[cursor..].find(call_begin).map(|p| cursor + p);
      let open_rel = match (end_rel, open_rel) {
        (Some(e), Some(o)) if e <= o => break e,
        (Some(_), Some(o)) => o,
        (Some(e), None) => break e,
        (None, Some(o)) => o,
        (None, None) => return Ok(None),
      };
      let after_open = open_rel + call_begin.len();
      let arg_open_rel = match payload[after_open..].find(arg_begin) {
        Some(a) => after_open + a,
        // Opener found but no `<|tool_call_argument_begin|>` —
        // the function-name region between them is plain text (no quote
        // structure), so a plain-substring section-end search is sound.
        None => return Ok(r13_plain()),
      };
      let args_at = arg_open_rel + arg_begin.len();
      let args_region = &payload[args_at..];
      let after_args_rel = match classify_json_payload_start(args_region) {
        JsonPayloadStart::Object => {
          let Some((_, obj_end)) = balanced_json_object_prefix(args_region) else {
            // Args JSON malformed. Use JSON-string-aware section-end
            // search so an in-args-string `<|tool_calls_section_end|>`
            // literal cannot falsely close a streaming-mid-string inner
            // block, but a truly-bounded malformed body still surfaces
            // the same-chunk suffix.
            return Ok(r13_json());
          };
          let Some(end_rel) = args_region[obj_end..].find(call_end) else {
            // Balanced JSON args but no inner `<|tool_call_end|>`
            // — past the args object the bytes are plain (no string
            // structure), so plain section-end search is sound.
            return Ok(r13_plain());
          };
          obj_end + end_rel + call_end.len()
        }
        _ => {
          let Some(end_rel) = args_region.find(call_end) else {
            // Non-JSON args body (no `{` opener); the args region
            // is plain text, so plain section-end search is sound.
            return Ok(r13_plain());
          };
          end_rel + call_end.len()
        }
      };
      cursor = args_at + after_args_rel;
    };
    let end_pos = payload_at + section_end_rel + end_tag.len();
    let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
    match self.parse(inner, tools) {
      Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
      _ => Ok(Some((Vec::new(), end_pos))),
    }
  }
}

// ----------------------------------------------------------------------------
// longcat
// ----------------------------------------------------------------------------

/// `longcat` — `name<longcat_arg_key>k</longcat_arg_key><longcat_arg_value>v</longcat_arg_value>`.
pub struct Longcat;

impl Longcat {
  /// **Bound-to-first-end-tag step.** Longcat's payload is either a
  /// `{`-leading JSON object fast-path (the `else` branch requires
  /// `<longcat_arg_key>` data — there is no array variant) or the
  /// `<longcat_arg_key>/<longcat_arg_value>` XML shape. Mirrors the
  /// existing per-arm scanner choices:
  /// * **`{`-leading**: JSON-string-quote-aware scan (in-string
  ///   `</longcat_tool_call>` literals stay inside the open string).
  /// * **else**: XML-value-aware scan over `<longcat_arg_value>...
  ///   </longcat_arg_value>` regions (those values can carry the wrapper
  ///   end-tag literal verbatim — only an end-tag OUTSIDE every value
  ///   region is the real wrapper close).
  ///
  /// **Context predicates per arm:** [`bound_context_or_plain_end`]
  /// with a per-arm predicate gates the syntax-aware scanner. For
  /// `{`-leading bodies the [`json_object_context_proven`] predicate
  /// requires the FIRST non-whitespace byte to be `{`
  /// (`classify_json_payload_start` already determined this — the
  /// predicate is consistent with the dispatch); the else arm uses the
  /// `<longcat_arg_key>` literal predicate (the XML grammar's structural
  /// context marker — an orphan `<longcat_arg_key>` still ANCHORS valid
  /// XML context). Falls back to the plain end_tag position when no
  /// context is proven, preserving the same-chunk suffix.
  fn bound_section<'a>(
    &self,
    payload: &'a str,
    payload_at: usize,
    end_tag: &str,
  ) -> Option<(&'a str, usize)> {
    let end_rel = if matches!(
      classify_json_payload_start(payload),
      JsonPayloadStart::Object
    ) {
      match bound_context_or_plain_end(payload, end_tag, json_object_context_proven)? {
        Some(end_rel) => end_rel - end_tag.len(),
        None => closed_but_malformed_end_pos_quote_aware(payload, 0, end_tag, b"\"")
          .map(|ep| ep - end_tag.len())?,
      }
    } else {
      match bound_context_or_plain_end(
        payload,
        end_tag,
        literal_context_proven("<longcat_arg_key>"),
      )? {
        Some(end_rel) => end_rel - end_tag.len(),
        None => xml_value_aware_end_tag_scan(
          payload,
          "<longcat_arg_value>",
          "</longcat_arg_value>",
          end_tag,
        )?,
      }
    };
    Some((&payload[..end_rel], payload_at + end_rel + end_tag.len()))
  }
}

impl ToolParser for Longcat {
  fn parse(&self, text: &str, tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    let text = text.trim();
    if text.starts_with('{')
      && let Ok(v) = serde_json::from_str::<Value>(text)
    {
      // Python `longcat` returns the raw parsed dict here. We surface its
      // `name` / `arguments` (falling back to the whole object) so callers
      // still get a structured ToolCall.
      let name = v
        .get("name")
        .and_then(Value::as_str)
        .unwrap_or("")
        .to_owned();
      let args = match v.get("arguments") {
        Some(a) => a.clone(),
        None => v,
      };
      return Ok(obj(&name, args));
    }
    let idx = text
      .find("<longcat_arg_key>")
      .ok_or_else(|| err("longcat: no function name"))?;
    let func_name = text[..idx].trim().to_owned();
    let string_args = string_arg_names(tools, &func_name);
    let mut args = serde_json::Map::new();
    for (k, v) in find_kv_pairs(
      text,
      "<longcat_arg_key>",
      "</longcat_arg_key>",
      "<longcat_arg_value>",
      "</longcat_arg_value>",
    ) {
      let key = k.trim().to_owned();
      let val = if string_args.contains(&key) {
        Value::String(v.trim().to_owned())
      } else {
        deserialize(v.trim())
      };
      args.insert(key, val);
    }
    Ok(obj(&func_name, Value::Object(args)))
  }
  fn name(&self) -> &'static str {
    "longcat"
  }
  /// Lock-step with [`Self::parse`] (above):
  /// `parse` is either a `{...}` JSON object fast-path (not array — the
  /// `else` branch requires `<longcat_arg_key>` data) or the XML-style
  /// `<longcat_arg_key>/`<longcat_arg_value>` shape.
  ///
  /// **Structural:** the FIRST step is `Self::bound_section`
  /// (per-payload-shape end-tag scan: JSON-string-quote-aware for `{`
  /// bodies, xml-value-aware over `<longcat_arg_value>...</longcat_arg_value>`
  /// otherwise). The body scan NEVER sees bytes after the wrapper close,
  /// so no suffix-bias attack on the body balancer is possible.
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
    let start_tag = self.tool_call_start();
    let end_tag = self.tool_call_end();
    let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
      return Ok(None);
    };
    // Structural: bound first, body-shape branch second.
    let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
      return Ok(None);
    };
    if matches!(
      classify_json_payload_start(bounded),
      JsonPayloadStart::Object
    ) && balanced_json_object_prefix(bounded).is_none()
    {
      // Bounded-but-malformed JSON object body — surface zero calls with
      // the known end_pos so the streaming processor preserves the
      // same-chunk suffix.
      return Ok(Some((Vec::new(), end_pos)));
    }
    let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
    match self.parse(inner, tools) {
      Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
      _ => Ok(Some((Vec::new(), end_pos))),
    }
  }
}

// ----------------------------------------------------------------------------
// minimax_m2: <invoke name="n"><parameter name="p">v</parameter></invoke>
// ----------------------------------------------------------------------------

/// `minimax_m2` — `<invoke name="n"><parameter name="p">v</parameter></invoke>`.
pub struct MinimaxM2;

fn extract_name(s: &str) -> String {
  let s = s.trim();
  if (s.starts_with('"') && s.ends_with('"') && s.len() >= 2)
    || (s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2)
  {
    s[1..s.len() - 1].to_owned()
  } else {
    s.to_owned()
  }
}

fn schema_types(schema: &Value) -> Vec<String> {
  let mut types = std::collections::BTreeSet::new();
  if let Some(o) = schema.as_object() {
    match o.get("type") {
      Some(Value::String(s)) => {
        types.insert(s.clone());
      }
      Some(Value::Array(a)) => {
        for t in a {
          if let Some(s) = t.as_str() {
            types.insert(s.to_owned());
          }
        }
      }
      _ => {}
    }
    if let Some(Value::Array(en)) = o.get("enum") {
      for v in en {
        types.insert(
          match v {
            Value::Null => "null",
            Value::Bool(_) => "boolean",
            Value::Number(n) if n.is_i64() || n.is_u64() => "integer",
            Value::Number(_) => "number",
            Value::String(_) => "string",
            Value::Array(_) => "array",
            Value::Object(_) => "object",
          }
          .to_owned(),
        );
      }
    }
    for cf in ["anyOf", "oneOf", "allOf"] {
      if let Some(Value::Array(a)) = o.get(cf) {
        for c in a {
          for t in schema_types(c) {
            types.insert(t);
          }
        }
      }
    }
  }
  if types.is_empty() {
    vec!["string".to_owned()]
  } else {
    types.into_iter().collect()
  }
}

fn convert_with_types(value: &str, ptypes: &[String]) -> Value {
  if value.eq_ignore_ascii_case("null") {
    return Value::Null;
  }
  let norm: Vec<String> = ptypes.iter().map(|t| t.to_lowercase()).collect();
  let lv = value.to_lowercase();
  if norm.iter().any(|t| t == "null") || ["null", "none", "nil"].contains(&lv.as_str()) {
    return Value::Null;
  }
  for pt in [
    "integer", "int", "number", "float", "boolean", "bool", "object", "array", "string", "str",
    "text",
  ] {
    if !norm.iter().any(|t| t == pt) {
      continue;
    }
    match pt {
      "string" | "str" | "text" => return Value::String(value.to_owned()),
      "integer" | "int" => {
        if let Ok(i) = value.parse::<i64>() {
          return Value::Number(i.into());
        }
      }
      "number" | "float" => {
        // Schema asked for a NUMBER, not an integer. ALWAYS emit a
        // JSON float (`Number::from_f64`), never promote a finite-whole
        // `f64` to `i64` — the old `f.fract() == 0.0 → (f as i64).into()`
        // branch lost the type signal even when the value fit, and
        // saturated at `i64::MIN`/`i64::MAX` for `|f| > i64` (e.g. `1e30`
        // silently collapsed to `i64::MAX`). Non-finite `f64` (NaN /
        // ±Inf) has no JSON representation, so the union fall-through
        // below picks the string fallback.
        if let Ok(f) = value.parse::<f64>()
          && let Some(n) = serde_json::Number::from_f64(f)
        {
          return Value::Number(n);
        }
      }
      "boolean" | "bool" => {
        let l = value.trim().to_lowercase();
        if ["true", "1", "yes", "on"].contains(&l.as_str()) {
          return Value::Bool(true);
        }
        if ["false", "0", "no", "off"].contains(&l.as_str()) {
          return Value::Bool(false);
        }
      }
      "object" | "array" => {
        if let Ok(v) = serde_json::from_str::<Value>(value) {
          return v;
        }
      }
      _ => {}
    }
  }
  serde_json::from_str::<Value>(value).unwrap_or_else(|_| Value::String(value.to_owned()))
}

impl ToolParser for MinimaxM2 {
  fn parse(&self, text: &str, tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    let invokes = find_all(text, "<invoke name=", "</invoke>");
    if invokes.is_empty() {
      return Err(err("minimax_m2: No tool call found"));
    }
    let mut calls = Vec::new();
    for inv in &invokes {
      let name_seg = inv.split('>').next().unwrap_or("");
      let function_name = extract_name(name_seg);
      let props = tool_properties(tools, &function_name);
      let mut args = serde_json::Map::new();
      for pm in find_all(inv, "<parameter name=", "</parameter>") {
        let gt = match pm.find('>') {
          Some(g) => g,
          None => continue,
        };
        let pname = extract_name(&pm[..gt]);
        let mut pval = pm[gt + 1..].trim().to_owned();
        if let Some(r) = pval.strip_prefix('\n') {
          pval = r.to_owned();
        }
        if let Some(r) = pval.strip_suffix('\n') {
          pval = r.to_owned();
        }
        let ptypes = props
          .and_then(|p| p.get(&pname))
          .map(schema_types)
          .unwrap_or_else(|| vec!["string".to_owned()]);
        args.insert(pname, convert_with_types(&pval, &ptypes));
      }
      calls.push(ToolCall::new_nameless_id(
        &function_name,
        Value::Object(args),
      ));
    }
    Ok(calls)
  }
  fn name(&self) -> &'static str {
    "minimax_m2"
  }
  /// Lock-step with [`Self::parse`] (lines ~1172–1205 above): `parse` walks
  /// the section by `find_all("<invoke name=", "</invoke>")`. The streaming
  /// walker iterates the same block sequence, racing the section `end_tag`
  /// against the next `<invoke name=` opener at each cursor — section
  /// end first wins; opener first finds the corresponding `</invoke>` and
  /// loops. In-VALUE `</minimax:tool_call>` literals are safely inside the
  /// invoke close.
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
    let start_tag = self.tool_call_start();
    let end_tag = self.tool_call_end();
    let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
      return Ok(None);
    };
    let open = "<invoke name=";
    let close = "</invoke>";
    let mut cursor = 0usize;
    let section_end_rel = loop {
      let end_rel = payload[cursor..].find(end_tag).map(|p| cursor + p);
      let open_rel = payload[cursor..].find(open).map(|p| cursor + p);
      let open_rel = match (end_rel, open_rel) {
        (Some(e), Some(o)) if e <= o => break e,
        (Some(_), Some(o)) => o,
        (Some(e), None) => break e,
        (None, Some(o)) => o,
        (None, None) => return Ok(None),
      };
      let close_search_from = open_rel + open.len();
      let Some(close_rel) = payload[close_search_from..].find(close) else {
        // `<invoke name=` opened but no matching `</invoke>` found.
        // Use the parameter-value-aware scan (`<parameter name=...
        // </parameter>` regions can legitimately carry the section end-tag
        // literal — see `streaming_minimax_m2_parameter_value_with_end_tag_
        // literal_extracts_intact`-style tests). An end-tag OUTSIDE every
        // `<parameter name=...></parameter>` region surfaces as a closed-
        // but-malformed section so the same-chunk suffix is preserved; an
        // in-value end-tag stays `Ok(None)` so more chunks can complete
        // the parameter close.
        return Ok(
          xml_value_aware_end_tag_scan(payload, "<parameter name=", "</parameter>", end_tag)
            .map(|rel| (Vec::new(), payload_at + rel + end_tag.len())),
        );
      };
      cursor = close_search_from + close_rel + close.len();
    };
    let end_pos = payload_at + section_end_rel + end_tag.len();
    let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
    match self.parse(inner, tools) {
      Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
      _ => Ok(Some((Vec::new(), end_pos))),
    }
  }
}

// ----------------------------------------------------------------------------
// function_gemma: call:name{k:v,...} with <escape> string markers
// ----------------------------------------------------------------------------

/// `function_gemma` — `call:name{k:v,...}` with `<escape>`-delimited strings.
pub struct FunctionGemma;

impl FunctionGemma {
  /// **Bound-to-first-end-tag step.** Locate the first real wrapper
  /// `end_tag` (`<end_function_call>`) using an escape-region-aware scan
  /// that SKIPS every `<escape>...<escape>` region whole. String values
  /// inside `<escape>STR<escape>` can carry the wrapper end-tag literal
  /// verbatim; only an end-tag OUTSIDE every escape region is the real
  /// wrapper close. An in-escape end-tag literal mid-stream returns
  /// `None` — the in-value-end-tag negative tests stay green.
  ///
  /// **Context predicate:** [`bound_context_or_plain_end`] with the
  /// [`function_gemma_call_context_proven`] predicate gates escape-region-
  /// aware scanning behind PROOF of a `call:name{` body shape (the literal
  /// `call:` followed by an identifier followed by `{`). A stray `call:`
  /// inside malformed bytes (`<start_function_call>bad call:
  /// <escape><end_function_call>call:f{k:v} tail` — the `call:` in
  /// `bad call:` is a literal "opener") does NOT prove
  /// function_gemma context here because the predicate requires the
  /// FULL `call:name{` shape, not just the `call:` substring. With the
  /// predicate failing the gate returns the plain end_tag position so
  /// the suffix is preserved.
  fn bound_section<'a>(
    &self,
    payload: &'a str,
    payload_at: usize,
    end_tag: &str,
  ) -> Option<(&'a str, usize)> {
    let end_pos =
      match bound_context_or_plain_end(payload, end_tag, function_gemma_call_context_proven)? {
        Some(end_rel) => end_rel,
        None => {
          closed_but_malformed_end_pos_value_aware(payload, 0, end_tag, "<escape>", "<escape>")?
        }
      };
    let body_end = end_pos - end_tag.len();
    Some((&payload[..body_end], payload_at + end_pos))
  }
}

impl ToolParser for FunctionGemma {
  fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    // call:(\w+)\{(.*?)\}
    let (func_name, mut args_str) =
      gemma_call(text, false).ok_or_else(|| err("function_gemma: No function provided."))?;
    let mut arguments = serde_json::Map::new();
    let escape = "<escape>";
    while !args_str.is_empty() {
      let split = match args_str.find(':') {
        Some(s) => s,
        None => break,
      };
      let key = args_str[..split].to_owned();
      args_str = args_str[split + 1..].to_owned();
      if let Some(rest) = args_str.strip_prefix(escape)
        && let Some(end) = rest.find(escape)
      {
        arguments.insert(key, Value::String(rest[..end].to_owned()));
        // Python: `args_str = args_str[split + len(escape) + 1:]` — the `+1`
        // skips the trailing `,`. `end + escape.len()` is a char boundary
        // (just past the ASCII `<escape>`); the extra byte is only valid if
        // it is the 1-byte ASCII `,`. A non-ASCII char there must not slice
        // mid-UTF-8 — surface a parse `Error` instead of panicking.
        let after_escape = end + escape.len();
        let consumed = after_escape + 1;
        args_str = if consumed >= rest.len() {
          String::new()
        } else if rest.is_char_boundary(consumed) {
          rest[consumed..].to_owned()
        } else {
          return Err(err("function_gemma: malformed argument encoding"));
        };
        continue;
      }
      let split = args_str.find(',').unwrap_or(args_str.len());
      let value = args_str[..split].to_owned();
      args_str = if split < args_str.len() {
        args_str[(split + 1).min(args_str.len())..].to_owned()
      } else {
        String::new()
      };
      let v =
        serde_json::from_str::<Value>(&value).unwrap_or_else(|_| Value::String(value.clone()));
      arguments.insert(key, v);
    }
    Ok(obj(&func_name, Value::Object(arguments)))
  }
  fn name(&self) -> &'static str {
    "function_gemma"
  }
  /// Lock-step with [`Self::parse`] (above): `parse` calls
  /// `gemma_call(text, false)` which finds `call:name{...}` via non-greedy
  /// `\{(.*?)\}` (first `}` wins). String values inside `<escape>STR<escape>`
  /// can carry the `<end_function_call>` literal verbatim.
  ///
  /// **Structural:** the FIRST step is `Self::bound_section`
  /// (escape-region-aware end-tag scan over `<escape>...<escape>` regions).
  /// The body scan that follows (locate `call:name{`, then first `}`
  /// outside any escape region) NEVER sees bytes after the wrapper close,
  /// so no suffix-bias attack on the body scan is possible.
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
    let start_tag = self.tool_call_start();
    let end_tag = self.tool_call_end();
    let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
      return Ok(None);
    };
    // Structural: bound first, body-scan second.
    let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
      return Ok(None);
    };
    // Use the SHARED recognizer (`function_gemma_call_start_at` via
    // `find_first_function_gemma_call_start`) so the parser body and the
    // context predicate cannot drift: both accept exactly the same
    // `call:name{` shape (no whitespace anywhere between `call:`, the
    // name, and `{`).
    let Some(after_open_brace) = find_first_function_gemma_call_start(bounded) else {
      // No valid `call:name{` opener within the bounded body → bounded-
      // but-malformed section.
      return Ok(Some((Vec::new(), end_pos)));
    };
    let bytes = bounded.as_bytes();
    // Within the bounded prefix, scan past the first `}` that is not
    // inside `<escape>...<escape>`. If no such `}` exists, the body is
    // bounded-but-malformed (`{` opened but never closed before the
    // wrapper end-tag).
    let escape = "<escape>";
    let mut idx = after_open_brace;
    let mut in_escape = false;
    let body_close_found = loop {
      if idx >= bytes.len() {
        break false;
      }
      if !in_escape && bounded[idx..].starts_with(escape) {
        in_escape = true;
        idx += escape.len();
        continue;
      }
      if in_escape && bounded[idx..].starts_with(escape) {
        in_escape = false;
        idx += escape.len();
        continue;
      }
      if !in_escape && bytes[idx] == b'}' {
        break true;
      }
      idx += utf8_char_width(bytes[idx]);
    };
    if !body_close_found {
      // Bounded-but-malformed: `{` opened but never closed within the
      // bounded prefix.
      return Ok(Some((Vec::new(), end_pos)));
    }
    let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
    match self.parse(inner, tools) {
      Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
      _ => Ok(Some((Vec::new(), end_pos))),
    }
  }
}

/// **Shared function_gemma call-start recognizer.** Returns
/// `Some((name_start, after_open_brace))` when `payload[at..]` begins with
/// a valid `call:name{` opener:
/// * `payload[at..]` MUST begin with the literal `call:`,
/// * immediately followed by a non-empty run of ASCII
///   alphanumeric/underscore/hyphen bytes (the function name; no
///   whitespace allowed between `call:` and the name — matching what
///   `try_parse_one_call` and [`gemma_call`] both accept),
/// * immediately followed by `{` (no whitespace allowed between the name
///   and `{` — the parser body bails on `bytes[j] != b'{'` without
///   skipping whitespace).
///
/// Returns `None` otherwise. The `name_start` is the byte index of the
/// first name byte (one past `call:`); the `after_open_brace` is one past
/// the `{`. This is the EXACT recognizer used by both
/// [`FunctionGemma::try_parse_one_call`] (and [`gemma_call`]) and
/// [`function_gemma_call_context_proven`] (the streaming-bound context
/// predicate) — sharing it prevents the two from drifting.
fn function_gemma_call_start_at(payload: &str, at: usize) -> Option<(usize, usize)> {
  let needle = "call:";
  let bytes = payload.as_bytes();
  if at + needle.len() > bytes.len() {
    return None;
  }
  if &bytes[at..at + needle.len()] != needle.as_bytes() {
    return None;
  }
  let name_start = at + needle.len();
  let mut j = name_start;
  while j < bytes.len()
    && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_' || bytes[j] == b'-')
  {
    j += 1;
  }
  if j == name_start {
    return None;
  }
  if j >= bytes.len() || bytes[j] != b'{' {
    return None;
  }
  Some((name_start, j + 1))
}

/// Scan `payload` for the FIRST byte position where
/// [`function_gemma_call_start_at`] returns Some. Returns the
/// `after_open_brace` index of that first call start, or `None` if no
/// valid `call:name{` opener appears anywhere in `payload`.
fn find_first_function_gemma_call_start(payload: &str) -> Option<usize> {
  let bytes = payload.as_bytes();
  for i in 0..bytes.len() {
    if let Some((_, after_open)) = function_gemma_call_start_at(payload, i) {
      return Some(after_open);
    }
  }
  None
}

/// Find `call:name{...}` — non-greedy `{.*?}` when `balanced` is false.
fn gemma_call(text: &str, _balanced: bool) -> Option<(String, String)> {
  // Use the shared recognizer to locate the first valid `call:name{`
  // opener — never re-implement the grammar (the predicate and parser
  // body share one recognizer).
  let bytes = text.as_bytes();
  for i in 0..bytes.len() {
    let Some((name_start, after_open_brace)) = function_gemma_call_start_at(text, i) else {
      continue;
    };
    // `after_open_brace` is one past `{`; `name_start..after_open_brace - 1`
    // is the name bytes (`- 1` skips back over the `{`).
    let name_end = after_open_brace - 1;
    let name = text[name_start..name_end].to_owned();
    let rest = &text[after_open_brace..];
    let close = rest.find('}')?;
    return Some((name, rest[..close].to_owned()));
  }
  None
}

// ----------------------------------------------------------------------------
// gemma4: call:name{bare_keys: <|"|>str<|"|>, ...} with balanced braces
// ----------------------------------------------------------------------------

/// `gemma4` — `call:name{key: <|"|>string<|"|>, n: 2}` with bare keys, `<|"|>`
/// string delimiters and balanced nested braces.
pub struct Gemma4;

impl ToolParser for Gemma4 {
  fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
    let calls = gemma4_calls(text);
    if calls.is_empty() {
      return Err(err("gemma4: No function provided."));
    }
    let mut out = Vec::new();
    for (name, args_str) in calls {
      let json_str = gemma4_args_to_json(&args_str);
      let arguments: Value =
        serde_json::from_str(&json_str).map_err(|e| err(format!("gemma4: {e}")))?;
      out.push(ToolCall::new_nameless_id(&name, arguments));
    }
    Ok(out)
  }
  fn name(&self) -> &'static str {
    "gemma4"
  }
  /// Lock-step with [`Self::parse`] (lines ~1321–1333 above): `parse` calls
  /// `gemma4_calls(text)` which walks `call:name{...}` blocks with balanced
  /// braces (skipping `<|"|>STR<|"|>` string regions). The streaming walker
  /// races the section `end_tag` against the next `call:` opener at each
  /// cursor — section end first wins; opener first advances past
  /// the brace-matched body and loops. STR can carry the `<tool_call|>`
  /// literal verbatim.
  fn try_parse_one_call(
    &self,
    buffer: &str,
    tools: Option<&Value>,
  ) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
    let start_tag = self.tool_call_start();
    let end_tag = self.tool_call_end();
    let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
      return Ok(None);
    };
    let bytes = payload.as_bytes();
    let mut cursor = 0usize;
    let section_end_rel = loop {
      let end_rel = payload[cursor..].find(end_tag).map(|p| cursor + p);
      let call_rel = payload[cursor..].find("call:").map(|p| cursor + p);
      let call_rel = match (end_rel, call_rel) {
        (Some(e), Some(c)) if e <= c => break e,
        (Some(_), Some(c)) => c,
        (Some(e), None) => break e,
        (None, Some(c)) => c,
        (None, None) => return Ok(None),
      };
      let after_marker = call_rel + "call:".len();
      let mut j = after_marker;
      while j < bytes.len()
        && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_' || bytes[j] == b'-')
      {
        j += 1;
      }
      if j >= bytes.len() || bytes[j] != b'{' {
        cursor = after_marker;
        continue;
      }
      let body = &payload[j..];
      let Some(close_rel) = balanced_brace_end(body) else {
        // `{` opened but no matching `}` found (either truly
        // incomplete OR an `<|"|>STR<|"|>` region is unterminated). Use
        // the value-aware helper so an end-tag OUTSIDE every `<|"|>...
        // <|"|>` region surfaces as a closed-but-malformed section
        // (`<|tool_call>call:f{BAD<tool_call|>visible` → display
        // `visible`), but an end-tag INSIDE an open `<|"|>` STR (which
        // can legitimately carry the `<tool_call|>` literal verbatim)
        // stays `Ok(None)` so more chunks can complete the STR.
        return Ok(
          closed_but_malformed_end_pos_value_aware(buffer, payload_at, end_tag, "<|\"|>", "<|\"|>")
            .map(|ep| (Vec::new(), ep)),
        );
      };
      cursor = j + close_rel + 1;
    };
    let end_pos = payload_at + section_end_rel + end_tag.len();
    let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
    match self.parse(inner, tools) {
      Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
      _ => Ok(Some((Vec::new(), end_pos))),
    }
  }
}

/// Find all `call:name{...}` with balanced braces, ignoring braces inside
/// `<|"|>...<|"|>` string literals.
fn gemma4_calls(text: &str) -> Vec<(String, String)> {
  let mut out = Vec::new();
  let mut i = 0;
  while let Some(s) = text[i..].find("call:") {
    let abs = i + s + 5;
    let after = &text[abs..];
    let mut j = 0;
    let b = after.as_bytes();
    while j < b.len() && (b[j].is_ascii_alphanumeric() || b[j] == b'_' || b[j] == b'-') {
      j += 1;
    }
    if j == 0 || j >= b.len() || b[j] != b'{' {
      i = abs;
      continue;
    }
    let name = after[..j].to_owned();
    let body = &after[j..];
    if let Some(end) = balanced_brace_end(body) {
      out.push((name, body[..=end].to_owned()));
      i = abs + j + end + 1;
    } else {
      i = abs + j;
    }
  }
  out
}

/// Given text starting with `{`, return the index of the matching `}`,
/// skipping `<|"|>...<|"|>` literals.
fn balanced_brace_end(s: &str) -> Option<usize> {
  let bytes = s.as_bytes();
  if bytes.first() != Some(&b'{') {
    return None;
  }
  let lit = "<|\"|>";
  let mut depth = 0usize;
  let mut idx = 0;
  while idx < s.len() {
    if s[idx..].starts_with(lit) {
      // skip to closing literal
      let close = s[idx + lit.len()..].find(lit)?;
      idx = idx + lit.len() + close + lit.len();
      continue;
    }
    match bytes[idx] {
      b'{' => depth += 1,
      b'}' => {
        depth -= 1;
        if depth == 0 {
          return Some(idx);
        }
      }
      _ => {}
    }
    // Advance by the full UTF-8 width of the char at `idx`, not a single
    // byte: a non-ASCII char inside the braces would otherwise leave `idx`
    // mid-codepoint and panic on the next `s[idx..]` slice. ASCII (the only
    // structural chars: `{` `}` and the literal) is unaffected (width 1).
    idx += utf8_char_width(bytes[idx]);
  }
  None
}

/// UTF-8 encoded width (1..=4) of the char whose first byte is `b`. A stray
/// continuation byte yields 1 so the scan still makes progress instead of
/// looping or panicking on malformed input.
#[inline]
fn utf8_char_width(b: u8) -> usize {
  match b {
    0x00..=0x7F => 1,
    0xC0..=0xDF => 2,
    0xE0..=0xEF => 3,
    0xF0..=0xF7 => 4,
    _ => 1,
  }
}

fn gemma4_args_to_json(text: &str) -> String {
  // Extract <|"|>...<|"|> strings into placeholders.
  let lit = "<|\"|>";
  let mut strings: Vec<String> = Vec::new();
  let mut result = String::new();
  let mut i = 0;
  while i < text.len() {
    if text[i..].starts_with(lit)
      && let Some(close) = text[i + lit.len()..].find(lit)
    {
      let inner = &text[i + lit.len()..i + lit.len() + close];
      result.push('\u{0}');
      result.push_str(&strings.len().to_string());
      result.push('\u{0}');
      strings.push(inner.to_owned());
      i = i + lit.len() + close + lit.len();
      continue;
    }
    let ch = text[i..].chars().next().unwrap();
    result.push(ch);
    i += ch.len_utf8();
  }
  // Quote bare keys: (?<=[{,])(\w+):  -> "\1":
  let mut quoted = String::new();
  let rb = result.as_bytes();
  let mut k = 0;
  while k < result.len() {
    let c = rb[k];
    quoted.push(c as char);
    if c == b'{' || c == b',' {
      // read following \w+ then ':'
      let mut m = k + 1;
      while m < rb.len() && (rb[m].is_ascii_alphanumeric() || rb[m] == b'_') {
        m += 1;
      }
      if m > k + 1 && m < rb.len() && rb[m] == b':' {
        quoted.push('"');
        quoted.push_str(&result[k + 1..m]);
        quoted.push('"');
        quoted.push(':');
        k = m + 1;
        continue;
      }
    }
    k += 1;
  }
  // Restore captured strings as JSON-escaped strings.
  for (idx, sv) in strings.iter().enumerate() {
    let placeholder = format!("\u{0}{idx}\u{0}");
    let json = serde_json::to_string(sv).unwrap_or_else(|_| "\"\"".into());
    quoted = quoted.replace(&placeholder, &json);
  }
  quoted
}

// ----------------------------------------------------------------------------
// Selection
// ----------------------------------------------------------------------------

/// Build a boxed parser by Python module name. Returns `None` for unknown
/// names (mirrors importlib failing silently into "no parser").
pub fn parser_by_name(name: &str) -> Option<Box<dyn ToolParser>> {
  Some(match name {
    "json_tools" => Box::new(JsonTools),
    "pythonic" => Box::new(Pythonic),
    "mistral" => Box::new(Mistral),
    "qwen3_coder" => Box::new(Qwen3Coder),
    "glm47" => Box::new(Glm47),
    "kimi_k2" => Box::new(KimiK2),
    "longcat" => Box::new(Longcat),
    "minimax_m2" => Box::new(MinimaxM2),
    "function_gemma" => Box::new(FunctionGemma),
    "gemma4" => Box::new(Gemma4),
    _ => return None,
  })
}

/// Auto-infer a tool-parser name from a chat-template string. Direct port of
/// Python `_infer_tool_parser` (`tokenizer_utils.py` ~548), order-preserving.
///
/// The selection chain is the `cargo xtask-codegen`-generated
/// `TOOL_PARSER_SELECT` table (sourced from
/// `mlxrs/data/tokenizer/tool_parsers.toml`, mlx-lm df1d3f3),
/// walked in declaration order: the first rule whose `all` substrings are all
/// present *and* (`any_of` empty or one present) wins — semantically
/// identical to the Python if/elif chain.
pub fn infer_tool_parser(chat_template: Option<&str>) -> Option<&'static str> {
  let ct = chat_template?;
  for rule in TOOL_PARSER_SELECT {
    let all_ok = rule.all.iter().all(|s| ct.contains(s));
    let any_ok = rule.any_of.is_empty() || rule.any_of.iter().any(|s| ct.contains(s));
    if all_ok && any_ok {
      return Some(rule.name);
    }
  }
  None
}

// ----------------------------------------------------------------------------
// Streaming processor
// ----------------------------------------------------------------------------

/// Strip the parser's `tool_call_start` / `tool_call_end` delimiters from a
/// buffered tool-call payload, returning the trimmed inner text.
///
/// In `mlx-swift-lm` each `ToolCallParser` strips its own tags inside `parse`
/// (e.g. `JSONToolCallParser` removes `<tool_call>` / `</tool_call>`). mlxrs
/// keeps delimiters in one shared marker table instead, so the streaming
/// processor strips them centrally before delegating — the existing
/// [`ToolParser`]s expect the bare payload (`JsonTools` feeds the body
/// straight to `serde_json`; the tag-scanning parsers are unaffected because
/// stripping a delimiter they would only have searched for is idempotent).
/// Only the first start tag and the last end tag are removed, matching the
/// Swift `range(of:)` strip; literal delimiter text inside argument values is
/// otherwise preserved.
fn strip_markers<'a>(parser: &dyn ToolParser, buffer: &'a str) -> &'a str {
  let mut text = buffer;
  let start = parser.tool_call_start();
  if !start.is_empty()
    && let Some(idx) = text.find(start)
  {
    text = &text[idx + start.len()..];
  }
  let end = parser.tool_call_end();
  if !end.is_empty()
    && let Some(idx) = text.rfind(end)
  {
    text = &text[..idx];
  }
  text.trim()
}

/// Streaming `parseEOS` over a [`ToolParser`], mirroring the default
/// `ToolCallParser.parseEOS` extension in `mlx-swift-lm`
/// (`MLXLMCommon/Tool/ToolCallFormat.swift`).
///
/// When the parser has a non-empty `tool_call_start`, the buffer is split on
/// it and every non-empty segment is parsed individually; otherwise the whole
/// buffer is parsed once. A segment that fails to parse is dropped (Swift
/// `compactMap`), so a malformed tail can never panic the stream. Each
/// segment has its end tag stripped via [`strip_markers`] before delegating.
fn parse_eos(parser: &dyn ToolParser, buffer: &str, tools: Option<&Value>) -> Vec<ToolCall> {
  let start = parser.tool_call_start();
  if start.is_empty() {
    let inner = strip_markers(parser, buffer);
    return parser.parse(inner, tools).unwrap_or_default();
  }
  buffer
    .split(start)
    .filter(|seg| !seg.is_empty())
    .filter_map(|seg| parser.parse(strip_markers(parser, seg), tools).ok())
    .flatten()
    .collect()
}

/// Bound on the streaming tool-call buffer, in bytes.
///
/// Once a chunk has entered `PotentialToolCall` or `CollectingToolCall` every
/// subsequent chunk is appended until an end tag / balanced JSON / EOS
/// arrives. A malformed or adversarial generation (inline JSON whose braces
/// never balance, or a tagged format — including Mistral's empty end tag —
/// whose end tag never appears) would otherwise let the buffer grow without
/// bound and OOM before EOS.
///
/// 256 KiB is far larger than any genuine tool-call payload (function names +
/// JSON arguments are kilobytes at most) yet small enough that retaining it is
/// harmless. After **each** chunk is appended the **combined** size
/// `tool_call_buffer.len() + pending_display.len()` is checked against this
/// cap; on exceeding it `ToolCallProcessor` *recovers* rather than panics (see
/// `recover_at_cap`): a not-yet-confirmed start flushes the buffered bytes
/// (including any `pending_display`) back as ordinary display text; a
/// confirmed-but-overlong tool call drops `tool_call_buffer` but still
/// surfaces any `pending_display` accumulated before the start tag was
/// confirmed (those bytes are unambiguously display text). Either way the
/// buffers are emptied and the state reset, so growth is `O(1)` per
/// generation rather than `O(total output)` — the **combined** buffers peak
/// at this cap plus at most one chunk (a single detokenized token's worth of
/// text), never unbounded.
const MAX_TOOL_CALL_BUFFER_BYTES: usize = 256 * 1024;

/// Streaming state-machine for detecting and extracting tool calls while a
/// model is still generating, fed text chunk-by-chunk.
///
/// Direct port of `mlx-swift-lm`'s `ToolCallProcessor`
/// (`MLXLMCommon/Tool/ToolCallProcessor.swift`): partial content is buffered
/// so a half-finished tool call is never leaked to the UI, and a complete
/// tool call is extracted into [`tool_calls`](Self::tool_calls) the moment its
/// closing delimiter (or balanced JSON) arrives. It reuses the per-format
/// [`ToolParser`]s above rather than re-implementing any parsing.
///
/// Two delimiter regimes, dispatched on whether the parser has a start tag:
///
/// - **Tagged** (`tool_call_start` non-empty, e.g. `json_tools`'
///   `<tool_call>`): the buffer is matched against the start tag character by
///   character; once the full start tag is seen the state advances to
///   collecting, and the call is parsed when the end tag arrives.
/// - **Inline** (`tool_call_start` empty): a JSON-string-aware balanced-object
///   scan drives detection — while the JSON object is still open the content
///   is buffered; a complete object that fails to parse is flushed back out as
///   ordinary text, and any text *after* the object is processed separately so
///   extraction does not depend on chunk boundaries.
///
/// The buffer is bounded by an internal `MAX_TOOL_CALL_BUFFER_BYTES` cap: a
/// malformed stream that never completes a tool call recovers (false starts
/// flush as display text, runaway tool content is dropped) instead of growing
/// without bound.
///
/// # Example
///
/// ```
/// use mlxrs::tokenizer::tools::{JsonTools, ToolCallProcessor};
///
/// let mut proc = ToolCallProcessor::new(Box::new(JsonTools), None);
/// // Regular text passes straight through.
/// assert_eq!(proc.process_chunk("Sure! ").as_deref(), Some("Sure! "));
/// // A tool call split across chunks is buffered (returns `None`) ...
/// assert_eq!(proc.process_chunk("<tool_call>{\"name\": \"now\","), None);
/// // ... and emitted once its end tag arrives.
/// assert_eq!(proc.process_chunk("\"arguments\": {}}</tool_call>"), None);
/// assert_eq!(proc.tool_calls.len(), 1);
/// assert_eq!(proc.tool_calls[0].name(), "now");
/// ```
pub struct ToolCallProcessor {
  /// The per-format parser the state machine delegates structured parsing to.
  parser: Box<dyn ToolParser>,
  /// Optional tool schemas forwarded to the parser for type-aware coercion.
  tools: Option<Value>,
  /// Current state machine position.
  state: State,
  /// Buffered partial tool-call text not yet emitted or parsed.
  tool_call_buffer: String,
  /// Display text seen *before* a potential / unconfirmed start tag that has
  /// not yet been emitted. This persistent field replaces a prior
  /// per-chunk `leading_token` local so the bytes
  /// preceding a start tag survive every chunk boundary — including a split
  /// landing *inside* the start tag itself (`"Let me <"` then `"tool_call>…"`)
  /// — and are emitted in stream order on confirmation, or flushed back as
  /// display text on false-start divergence. This eliminates the whole class
  /// of "split-inside-a-start-tag drops leading text" defects rather than
  /// patching split positions one-by-one. Bounded jointly with
  /// [`tool_call_buffer`](Self::tool_call_buffer) by
  /// [`MAX_TOOL_CALL_BUFFER_BYTES`].
  pending_display: String,
  /// Tool calls extracted so far, in arrival order.
  pub tool_calls: Vec<ToolCall>,
}

/// State-machine position (Swift `ToolCallProcessor.State`).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum State {
  /// Passing generated text straight through; no tool call in progress.
  Normal,
  /// A start-tag prefix has been seen; still confirming it is a real tag.
  PotentialToolCall,
  /// Inside a confirmed tool call, buffering until it completes.
  CollectingToolCall,
}

impl ToolCallProcessor {
  /// Create a processor driving `parser`, with optional `tools` schemas for
  /// type-aware argument coercion.
  pub fn new(parser: Box<dyn ToolParser>, tools: Option<Value>) -> Self {
    Self {
      parser,
      tools,
      state: State::Normal,
      tool_call_buffer: String::new(),
      pending_display: String::new(),
      tool_calls: Vec::new(),
    }
  }

  /// Whether this processor uses an inline format (no start tag).
  fn is_inline_format(&self) -> bool {
    self.parser.tool_call_start().is_empty()
  }

  /// The first `char` of the start tag, for quick chunk pre-screening.
  fn start_tag_first_char(&self) -> Option<char> {
    self.parser.tool_call_start().chars().next()
  }

  /// Process one generated text chunk.
  ///
  /// Returns the regular (non-tool-call) text that should be displayed, or
  /// `None` while partial tool-call content is being buffered.
  pub fn process_chunk(&mut self, chunk: &str) -> Option<String> {
    if self.is_inline_format() {
      self.process_inline_chunk(chunk)
    } else {
      self.process_tagged_chunk(chunk)
    }
  }

  /// Process end-of-sequence, parsing any buffered content as tool call(s).
  ///
  /// Call this when generation ends (e.g. on an EOS token) to handle formats
  /// whose end tag is never delivered as text (e.g. `mistral`, whose `</s>`
  /// is intercepted at the token-ID level). For formats whose end tag does
  /// appear in the text stream the buffer is already empty here, making this
  /// a no-op.
  pub fn process_eos(&mut self) {
    if self.state != State::CollectingToolCall && self.state != State::PotentialToolCall {
      return;
    }
    if self.tool_call_buffer.is_empty() {
      // Pending display text accumulated before a never-arrived start char
      // must not leak into a subsequent generation on the same processor.
      self.pending_display.clear();
      self.state = State::Normal;
      return;
    }
    let parsed = parse_eos(
      self.parser.as_ref(),
      &self.tool_call_buffer,
      self.tools.as_ref(),
    );
    self.tool_calls.extend(parsed);
    self.tool_call_buffer.clear();
    // `process_eos` returns no display text by API, but the pending leading
    // text must still be cleared here so it cannot leak into a later
    // generation on a reused processor.
    self.pending_display.clear();
    self.state = State::Normal;
  }

  /// Recover when the **combined** size of
  /// [`tool_call_buffer`](Self::tool_call_buffer) and
  /// [`pending_display`](Self::pending_display) has reached
  /// [`MAX_TOOL_CALL_BUFFER_BYTES`] without the tool call completing.
  ///
  /// This enforces the bounded-memory contract: both
  /// buffers are *always* emptied here and the state reset to
  /// [`State::Normal`], so neither can grow past the cap. The recovery action
  /// depends on how far detection had progressed:
  ///
  /// - [`State::PotentialToolCall`] — the start tag was never confirmed, so
  ///   the buffered bytes are (at worst) a false start; both `pending_display`
  ///   (the text that arrived *before* the start char) and `tool_call_buffer`
  ///   (the ambiguous tag-prefix) are flushed back to the caller verbatim as
  ///   ordinary display text in stream order, losing nothing.
  /// - [`State::CollectingToolCall`] — a real tool call was in progress but is
  ///   pathologically long / never terminates; its partial content
  ///   (`tool_call_buffer`) is dropped (it is not valid display text and
  ///   cannot be parsed). Any `pending_display` accumulated *before* the
  ///   start tag was confirmed is still surfaced — those bytes are
  ///   unambiguously display text and dropping them would be a silent
  ///   data-loss bug.
  /// - [`State::Normal`] — unreachable (the buffers only fill past `Normal`);
  ///   handled defensively by flushing both.
  ///
  /// Returns the text to display, if any.
  fn recover_at_cap(&mut self) -> Option<String> {
    let drop_tool_buffer = self.state == State::CollectingToolCall;
    let pending = std::mem::take(&mut self.pending_display);
    let recovered_buffer = std::mem::take(&mut self.tool_call_buffer);
    self.state = State::Normal;
    let mut out: Option<String> = None;
    if !pending.is_empty() {
      push_display(&mut out, &pending);
    }
    if !drop_tool_buffer && !recovered_buffer.is_empty() {
      // False start (or defensive `Normal`) — the bytes are display text.
      push_display(&mut out, &recovered_buffer);
    }
    out
  }

  /// Enforce the buffer cap once per appended chunk.
  ///
  /// If the **combined** size of `tool_call_buffer` and `pending_display` has
  /// exceeded `MAX_TOOL_CALL_BUFFER_BYTES` this runs `recover_at_cap` and
  /// folds any flushed display text into `display`; otherwise it does
  /// nothing. Called from every buffering branch so the bound holds after
  /// each chunk regardless of which state the processor is in. The
  /// combined bound is required because `pending_display` is *also* an
  /// adversary-controlled buffer (leading text can be arbitrarily long).
  fn cap_recover_into(&mut self, display: &mut Option<String>) {
    if self.tool_call_buffer.len() + self.pending_display.len() <= MAX_TOOL_CALL_BUFFER_BYTES {
      return;
    }
    if let Some(flushed) = self.recover_at_cap() {
      push_display(display, &flushed);
    }
  }

  /// Unconditional reset after a parser returns `Err` from
  /// [`ToolParser::try_parse_one_call`].
  ///
  /// Mirrors [`Self::recover_at_cap`]'s flush logic — `pending_display` is
  /// surfaced as display text, `tool_call_buffer` is dropped (a confirmed
  /// tool call that the parser rejected is not valid display text) — but
  /// without the [`MAX_TOOL_CALL_BUFFER_BYTES`] gate. The buffer is
  /// structurally malformed for THIS parser, so holding onto it until the cap
  /// fires would suppress every subsequent output token until cap or EOS.
  /// Resetting eagerly lets the next chunk start fresh in [`State::Normal`].
  ///
  /// **Contract:** dropping the whole buffer here also drops any
  /// suffix bytes that arrived in the SAME chunk after a malformed-section
  /// close. The tightened [`ToolParser::try_parse_one_call`] return contract
  /// reserves `Err` for *truly indeterminate* failures where no `end_pos`
  /// is known — confirmed-but-rejected sections (the common case for
  /// production parsers) MUST return `Ok(Some((Vec::new(), end_pos)))`
  /// instead, so the processor preserves the same-chunk suffix.
  fn reset_on_malformed(&mut self, display: &mut Option<String>) {
    if let Some(flushed) = self.recover_at_cap() {
      push_display(display, &flushed);
    }
  }

  /// Process a chunk for inline formats (no wrapper tags).
  ///
  /// Uses a JSON-string-aware balanced-object scan to decide when output
  /// looks like a JSON tool call. While the object is still open the content
  /// is buffered (returns `None`) so partial JSON is never leaked; a balanced
  /// buffer that fails to parse is not a tool call and is flushed back out.
  /// Any text *after* the first balanced JSON object in the same buffer is
  /// processed separately, so extraction never depends on where chunk
  /// boundaries fall.
  fn process_inline_chunk(&mut self, chunk: &str) -> Option<String> {
    // Leading display text in front of the first `{` of a *fresh* detection.
    let leading = match self.state {
      State::Normal => {
        let Some(brace) = chunk.find('{') else {
          // No brace seen — pass through as regular text.
          return Some(chunk.to_owned());
        };
        let leading = chunk[..brace].to_owned();
        self.tool_call_buffer.clear();
        self.tool_call_buffer.push_str(&chunk[brace..]);
        self.state = State::CollectingToolCall;
        leading
      }
      State::PotentialToolCall | State::CollectingToolCall => {
        self.tool_call_buffer.push_str(chunk);
        String::new()
      }
    };

    let mut display = self.drain_inline_buffer();

    // Bounded-memory guard: if the buffer is still holding
    // an unterminated JSON object after draining, recover (drop the runaway
    // content) instead of buffering without bound. No-op below the cap.
    if self.state == State::CollectingToolCall {
      self.cap_recover_into(&mut display);
    }

    // Prepend any leading text from this chunk.
    if leading.is_empty() {
      display
    } else {
      Some(leading + display.as_deref().unwrap_or(""))
    }
  }

  /// Iteratively consume balanced JSON-object prefixes from
  /// [`tool_call_buffer`](Self::tool_call_buffer).
  ///
  /// Each complete `{ ... }` object (string/escape aware) is
  /// parsed: a successful parse appends [`ToolCall`]s, a failed parse means
  /// "complete JSON that is not a tool call" and the object's bytes become
  /// display text. Whatever follows the object in the buffer is then examined
  /// again — extraction therefore never depends on chunk boundaries:
  /// `{...} done` and the same bytes split after `}` behave
  /// identically. Looping stops when the remainder is an incomplete object
  /// (kept buffered, state stays `CollectingToolCall`) or has no `{` left
  /// (any plain remainder is emitted and state returns to `Normal`).
  ///
  /// The loop is bounded by the strictly shrinking buffer (each iteration
  /// removes a non-empty prefix), so it is iterative with no recursion.
  fn drain_inline_buffer(&mut self) -> Option<String> {
    let mut display: Option<String> = None;
    loop {
      match balanced_json_object_prefix(&self.tool_call_buffer) {
        Some((obj_start, obj_end)) => {
          // A complete JSON object occupies `[obj_start..obj_end]`; anything
          // before `obj_start` is plain leading text, anything after
          // `obj_end` is a suffix to re-examine.
          if obj_start > 0 {
            push_display(&mut display, &self.tool_call_buffer[..obj_start]);
          }
          let object: String = self.tool_call_buffer[obj_start..obj_end].to_owned();
          let suffix: String = self.tool_call_buffer[obj_end..].to_owned();

          let inner = strip_markers(self.parser.as_ref(), &object);
          match self.parser.parse(inner, self.tools.as_ref()) {
            Ok(calls) if !calls.is_empty() => self.tool_calls.extend(calls),
            // Balanced JSON that is not a tool call — surface as display text.
            _ => push_display(&mut display, &object),
          }

          self.tool_call_buffer = suffix;
          if self.tool_call_buffer.is_empty() {
            self.state = State::Normal;
            return display;
          }
          // Re-examine the suffix on the next iteration.
        }
        None => {
          if self.tool_call_buffer.contains('{') {
            // An object is open but not yet closed — keep buffering.
            return display;
          }
          // No (more) JSON object — flush any plain remainder as display text.
          self.state = State::Normal;
          let remainder = std::mem::take(&mut self.tool_call_buffer);
          push_display(&mut display, &remainder);
          return display;
        }
      }
    }
  }

  /// Process a chunk for tagged formats.
  ///
  /// When the text after an end tag itself contains the start character there
  /// may be further back-to-back tool calls. This is handled by an explicit
  /// **loop** over that trailing suffix — re-feeding it as
  /// the next chunk — rather than a recursive `process_chunk` self-call, so a
  /// single batched chunk packed with many tool calls cannot overflow the
  /// stack. Each iteration's output is concatenated in stream order, exactly
  /// matching the previous recursive behaviour.
  ///
  /// Leading display text seen *before* a candidate
  /// start tag is accumulated into [`pending_display`](Self::pending_display),
  /// not a per-chunk local. That makes a chunk boundary landing *inside* a
  /// start tag (`"Let me <"` then `"tool_call>…"`) byte-for-byte equivalent
  /// to feeding the whole stream in one chunk — the leading prose survives
  /// across chunks and is emitted at start-tag confirmation (or flushed back
  /// to display on strict-prefix divergence) regardless of where the split
  /// landed.
  fn process_tagged_chunk(&mut self, chunk: &str) -> Option<String> {
    let start_tag = self.parser.tool_call_start();
    let Some(start_char) = self.start_tag_first_char() else {
      return Some(chunk.to_owned());
    };

    // The chunk currently being processed. After an end tag, the trailing
    // suffix is fed back here for the next loop iteration (no recursion).
    let mut chunk: std::borrow::Cow<'_, str> = std::borrow::Cow::Borrowed(chunk);
    // Display text accumulated across iterations, in stream order.
    let mut display: Option<String> = None;

    loop {
      // In `Normal`, ignore chunks that cannot begin a tag; once past `Normal`
      // every chunk is appended regardless.
      if self.state == State::Normal && !chunk.contains(start_char) {
        push_display(&mut display, &chunk);
        return display;
      }

      if self.state == State::Normal {
        // Split the chunk at the first start char: everything before it is
        // unambiguous leading display text (parked in `pending_display`), and
        // the start char onwards is the candidate tag in `tool_call_buffer`.
        // Doing the split here — not after appending to `tool_call_buffer` —
        // ensures the bytes-before-start-char NEVER enter `tool_call_buffer`,
        // so a chunk boundary landing inside a start tag preserves the
        // already-seen leading text in `pending_display` across chunks.
        if let Some(idx) = chunk.find(start_char) {
          if idx > 0 {
            self.pending_display.push_str(&chunk[..idx]);
          }
          self.tool_call_buffer.push_str(&chunk[idx..]);
        } else {
          // No start char in this chunk — unreachable thanks to the guard
          // above, but handled defensively.
          self.pending_display.push_str(&chunk);
        }
        self.state = State::PotentialToolCall;
      } else {
        // Past `Normal`: every chunk is appended to the active buffer
        // (`tool_call_buffer`). `pending_display` is carried as-is.
        self.tool_call_buffer.push_str(&chunk);
      }

      if self.state == State::PotentialToolCall {
        if partial_match(&self.tool_call_buffer, start_tag) {
          if self.tool_call_buffer.starts_with(start_tag) {
            // Confirmed start tag — fall through to collecting. The leading
            // text (`pending_display`) is now unambiguously display text and
            // is flushed in stream order *before* the call (`pending_display`
            // carries it across chunks).
            self.state = State::CollectingToolCall;
            let leading = std::mem::take(&mut self.pending_display);
            push_display(&mut display, &leading);
          } else {
            // Still an ambiguous start-tag prefix. `partial_match` only holds
            // here while `tool_call_buffer` is a *strict* prefix of
            // `start_tag`, so the tag-prefix portion is bounded by the (tiny)
            // tag length — but `pending_display` is adversary-controlled and
            // can grow unbounded across chunks, so apply the combined cap
            // unconditionally.
            self.cap_recover_into(&mut display);
            return display;
          }
        } else {
          // Not a tool call after all — flush `pending_display` (leading
          // text) and `tool_call_buffer` (the false-start prefix) back to
          // display in stream order, then reset.
          self.state = State::Normal;
          let leading = std::mem::take(&mut self.pending_display);
          let buffer = std::mem::take(&mut self.tool_call_buffer);
          push_display(&mut display, &leading);
          push_display(&mut display, &buffer);
          return display;
        }
      }

      // State::CollectingToolCall
      let end_tag = self.parser.tool_call_end();
      if end_tag.is_empty() {
        // No end tag (e.g. `mistral`): the call is closed at EOS, not
        // in-stream. With no closing delimiter the buffer would otherwise
        // grow without bound on a runaway generation.
        self.cap_recover_into(&mut display);
        return display;
      }

      // Structural unification: extract + end-detect via the parser's
      // sole `try_parse_one_call` method. Lock-step with `parse()` removes
      // the drift between a separate end-tag scanner and
      // each parser's own extraction logic. Match outcomes (the tightened
      // contract — see the trait doc on `ToolParser::try_parse_one_call`):
      //   Ok(Some((calls, end_pos))) → confirmed-bounded section. Emit any
      //     calls (may be EMPTY for a structurally-tagged but body-rejected
      //     section — the bytes are dropped but the same-chunk
      //     suffix `[end_pos..]` is preserved as display / re-examined for
      //     back-to-back sections);
      //   Ok(None) → buffer is incomplete, keep collecting (apply cap);
      //   Err → truly indeterminate (no end_pos available), recover via
      //     unconditional reset.
      let outcome = self
        .parser
        .try_parse_one_call(&self.tool_call_buffer, self.tools.as_ref());
      match outcome {
        Ok(Some((calls, end_pos))) => {
          if end_pos == 0 {
            // Defensive: zero-width advance would loop forever — treat as
            // incomplete and keep collecting under the cap.
            self.cap_recover_into(&mut display);
            return display;
          }
          // `extend(empty)` is a no-op for body-rejected sections;
          // the section bytes are dropped, but the trailing suffix from the
          // SAME chunk is preserved verbatim (display or re-examination).
          self.tool_calls.extend(calls);
          let trailing_token = self.tool_call_buffer[end_pos..].to_owned();
          self.tool_call_buffer.clear();
          self.state = State::Normal;
          if trailing_token.contains(start_char) {
            chunk = std::borrow::Cow::Owned(trailing_token);
            // Re-enter the loop with the trailing suffix as the next chunk.
          } else {
            push_display(&mut display, &trailing_token);
            return display;
          }
        }
        Ok(None) => {
          // Section not yet complete — confirmed tool call still collecting.
          // Cap the buffer so a never-terminated tagged call cannot OOM;
          // recovery here drops the malformed content.
          self.cap_recover_into(&mut display);
          return display;
        }
        Err(_) => {
          // Truly indeterminate: the
          // parser knows the section is bad but cannot identify its end
          // boundary. Reset state UNCONDITIONALLY — `cap_recover_into` is a
          // no-op below `MAX_TOOL_CALL_BUFFER_BYTES`, which would suppress
          // every later output token until the cap fires or EOS.
          // `reset_on_malformed` drains immediately so the next chunk starts
          // fresh in `State::Normal`. Note: parsers that DO know the section
          // end MUST use `Ok(Some((Vec::new(), end_pos)))` instead — this
          // arm necessarily drops any same-chunk suffix because no end_pos
          // is available to slice it off.
          self.reset_on_malformed(&mut display);
          return display;
        }
      }
    }
  }
}

/// Append `text` to an optional display accumulator, allocating the `String`
/// lazily so a pure-`None` (fully buffered) result stays `None`.
fn push_display(display: &mut Option<String>, text: &str) {
  if text.is_empty() {
    return;
  }
  display.get_or_insert_with(String::new).push_str(text);
}

/// Find the first balanced top-level JSON-*object* span of `text`.
///
/// Returns `(obj_start, obj_end)` byte offsets: `text[obj_start]` is the
/// first `{`, and `text[obj_start..obj_end]` is the shortest complete
/// `{ ... }` object (depth returns to zero). Returning the start as well as
/// the end lets the caller slice the object and its trailing suffix without a
/// second brace search, and process any suffix separately — fixing the
/// chunk-boundary dependency.
///
/// Unlike the Swift `jsonBracesBalanced` byte-counter this is JSON-string
/// aware: `{`/`}` inside a `"..."` string literal — including after a `\"`
/// escape — are not counted, so input such as
/// `{"unrelated":"}"}` is balanced correctly.
///
/// Scanning starts at the first `{`; bytes before it are ignored (they are
/// the caller's leading text). A `}` that would drive depth below zero makes
/// the object unparseable, so `None` is returned (the caller then flushes the
/// buffer as display text). `None` is also returned while the object is still
/// open (depth never returns to zero), meaning "keep buffering". Brace bytes
/// are ASCII, so every returned index is a UTF-8 char boundary even for
/// multibyte content inside string values.
fn balanced_json_object_prefix(text: &str) -> Option<(usize, usize)> {
  let bytes = text.as_bytes();
  let start = bytes.iter().position(|&b| b == b'{')?;
  let mut depth: i32 = 0;
  let mut in_string = false;
  let mut escaped = false;
  for (i, &b) in bytes.iter().enumerate().skip(start) {
    if in_string {
      if escaped {
        escaped = false;
      } else if b == b'\\' {
        escaped = true;
      } else if b == b'"' {
        in_string = false;
      }
      continue;
    }
    match b {
      b'"' => in_string = true,
      b'{' => depth += 1,
      b'}' => {
        depth -= 1;
        if depth == 0 {
          return Some((start, i + 1));
        }
        if depth < 0 {
          // More closes than opens: not a well-formed object prefix.
          return None;
        }
      }
      _ => {}
    }
  }
  None
}

/// Find the first balanced top-level JSON-*array* span of `text`.
///
/// Sibling of [`balanced_json_object_prefix`] with the bracket pair
/// `[` / `]`. Returns `(arr_start, arr_end)` byte offsets: `text[arr_start]`
/// is the first `[`, and `text[arr_start..arr_end]` is the shortest complete
/// `[ ... ]` array (depth returns to zero).
///
/// Like its sibling, the scanner is JSON-string aware: `[` / `]` inside a
/// `"..."` literal — including after a `\"` escape — are not counted, so an
/// in-element string carrying an end-tag (`</tool_call>`) is *inside* the
/// returned span and not at the end-tag-search horizon. Scanning starts at
/// the first `[`; bytes before it are ignored. A `]` driving depth below
/// zero, or a never-closing array, yield `None` (caller keeps buffering).
/// Bracket bytes are ASCII so every returned index is a UTF-8 char boundary
/// even for multibyte content inside string values.
///
/// Mirrors `balanced_json_object_prefix`'s structure 1:1: same in-string /
/// escape / depth bookkeeping, same return shape, same non-allocating
/// panic-free discipline. Object `{`/`}` and array `[`/`]` *within the same
/// scan* are NOT tracked separately — the scanner returns at the first
/// matching top-level close — which is correct because any nested object's
/// `{}` are inside the array's string-or-structural scope and don't drive
/// the *top-level* array depth.
fn balanced_json_array_prefix(text: &str) -> Option<(usize, usize)> {
  let bytes = text.as_bytes();
  let start = bytes.iter().position(|&b| b == b'[')?;
  let mut depth: i32 = 0;
  let mut in_string = false;
  let mut escaped = false;
  for (i, &b) in bytes.iter().enumerate().skip(start) {
    if in_string {
      if escaped {
        escaped = false;
      } else if b == b'\\' {
        escaped = true;
      } else if b == b'"' {
        in_string = false;
      }
      continue;
    }
    match b {
      b'"' => in_string = true,
      b'[' => depth += 1,
      b']' => {
        depth -= 1;
        if depth == 0 {
          return Some((start, i + 1));
        }
        if depth < 0 {
          // More closes than opens: not a well-formed array prefix.
          return None;
        }
      }
      _ => {}
    }
  }
  None
}

/// Shape classification of a tagged tool-call payload's leading non-whitespace
/// byte. Used by glm47 / longcat to pick the appropriate JSON balancer
/// (object vs array) before falling back to plain-substring for non-JSON
/// payloads.
///
/// JSON whitespace per RFC 8259 §2 is exactly space, tab, LF, CR — all ASCII,
/// so a byte-level scan is correct even with multibyte content elsewhere.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum JsonPayloadStart {
  /// Empty / all-whitespace / anything that is not `{` or `[`. Plain
  /// substring end-tag search applies.
  None,
  /// Payload begins with `{` (after optional RFC-8259 whitespace) — drives
  /// the [`balanced_json_object_prefix`] scan.
  Object,
  /// Payload begins with `[` (after optional RFC-8259 whitespace) — drives
  /// the [`balanced_json_array_prefix`] scan. Used by glm47's
  /// `glm_parse_json` fallback which accepts `Value::Array`, so a JSON-array
  /// payload must also use a balanced (string-aware) scan rather than the
  /// plain substring search that would cut on an in-string end tag.
  Array,
}

/// Classify the leading shape of `payload` (the bytes between `start_tag`
/// and an as-yet-unknown `end_tag`) into one of [`JsonPayloadStart`].
///
/// Skips leading ASCII whitespace then peeks one byte. Panic-free and
/// allocation-free.
fn classify_json_payload_start(payload: &str) -> JsonPayloadStart {
  match payload
    .bytes()
    .find(|b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
  {
    Some(b'{') => JsonPayloadStart::Object,
    Some(b'[') => JsonPayloadStart::Array,
    _ => JsonPayloadStart::None,
  }
}

/// Return the byte offset just past the `)]` that closes the *first* Pythonic
/// call body in `payload`, or `None` while the call is still open / there is
/// no `[name(`. Tracks `[`/`]`/`(`/`)` depth and respects single- and double-
/// quoted strings (escape-aware: `\'`, `\"`, `\\`) so a `)]` literal *inside*
/// a string value never closes the call.
///
/// Pythonic grammar (Python `_tool_call_regex = \[(\w+)\((.*?)\)\]`): outer
/// `[` opens, `name(` follows, then the args region, then `)]` closes. We
/// support repeating call sequences like `[a(),b()]` only insofar as the
/// returned offset is the FIRST `)]`; the caller then searches for `end_tag`
/// after it, which is the expected per-call boundary because the Python
/// regex matches the first complete `\[…\)\]`.
fn pythonic_call_close(payload: &str) -> Option<usize> {
  let bytes = payload.as_bytes();
  // Find the first `[`; require it to be followed by a `(` somewhere ahead
  // (the call form). If there is no `[`, no call here yet.
  let start = bytes.iter().position(|&b| b == b'[')?;
  let mut idx = start;
  // Bracket / paren depth, and string state.
  let mut bracket_depth: i32 = 0;
  let mut paren_depth: i32 = 0;
  let mut in_str: Option<u8> = None;
  let mut escaped = false;
  while idx < bytes.len() {
    let b = bytes[idx];
    if let Some(q) = in_str {
      if escaped {
        escaped = false;
      } else if b == b'\\' {
        escaped = true;
      } else if b == q {
        in_str = None;
      }
      idx += utf8_char_width(b);
      continue;
    }
    match b {
      b'\'' | b'"' => in_str = Some(b),
      b'[' => bracket_depth += 1,
      b']' => {
        bracket_depth -= 1;
        if bracket_depth == 0 {
          // The closing `]` is at `idx`; for the call form we additionally
          // require that the previous *structural* byte (skipping strings)
          // was `)`. The simplest robust check: idx >= 1 and the byte at
          // idx-1 is `)`.
          if idx >= 1 && bytes[idx - 1] == b')' {
            return Some(idx + 1);
          }
          // Closed without a `)` immediately before: still consume this
          // outermost `]`; if we reach a bracket close without a `)` the
          // Pythonic regex would not match a call here either, so we
          // continue scanning past this `]` looking for another `[name(...)]`.
          // Reset state to look for a fresh `[`.
          // Move past this `]` and reset.
          idx += 1;
          while idx < bytes.len() && bytes[idx] != b'[' {
            idx += utf8_char_width(bytes[idx]);
          }
          if idx >= bytes.len() {
            return None;
          }
          // Re-enter the loop at the next `[`.
          bracket_depth = 0;
          paren_depth = 0;
          in_str = None;
          escaped = false;
          continue;
        }
        if bracket_depth < 0 {
          return None;
        }
      }
      b'(' => paren_depth += 1,
      b')' if paren_depth > 0 => {
        paren_depth -= 1;
      }
      _ => {}
    }
    idx += utf8_char_width(b);
  }
  None
}

/// Value-aware XML end-tag scan: walk `payload` looking for `end_tag`, but
/// SKIP any region delimited by `value_open` ... `value_close` (treat its
/// bytes as opaque value data). Returns the byte offset of the first
/// `end_tag` outside any value region, or `None` if not yet found / still
/// inside an unterminated value.
///
/// Handles glm47 + longcat XML-style fallback payloads: their values
/// are extracted via `<arg_value>...</arg_value>` (resp.
/// `<longcat_arg_value>...</longcat_arg_value>`) and a `</tool_call>` (resp.
/// `</longcat_tool_call>`) wrapper-end literal inside such a value is VALID
/// value text — NOT the wrapper close. The previous plain substring scanner
/// truncated mid-call. This scanner:
///
/// 1. Walks left to right; on encountering `value_open`, jumps to the next
///    `value_close` (returning `None` if absent — buffer needs more bytes
///    and the streaming caller will retry on the next chunk).
/// 2. Otherwise checks whether `end_tag` starts at the current position; if
///    so, returns the position.
/// 3. Returns `None` if neither `value_open` nor `end_tag` is reachable
///    inside the current payload.
fn xml_value_aware_end_tag_scan(
  payload: &str,
  value_open: &str,
  value_close: &str,
  end_tag: &str,
) -> Option<usize> {
  let mut idx = 0usize;
  while idx <= payload.len() {
    let next_value = payload[idx..].find(value_open).map(|p| idx + p);
    let next_end = payload[idx..].find(end_tag).map(|p| idx + p);
    match (next_value, next_end) {
      (Some(v), Some(e)) if v < e => {
        // A value region opens BEFORE the next end-tag candidate. Skip the
        // entire value body so an in-value `end_tag` literal cannot match.
        let after_open = v + value_open.len();
        let close_rel = payload[after_open..].find(value_close)?;
        idx = after_open + close_rel + value_close.len();
      }
      (_, Some(e)) => return Some(e),
      (Some(v), None) => {
        // No end_tag yet, but a value opens — must still skip it to keep
        // the cursor honest; if its close hasn't arrived yet, return None
        // (the streaming caller will retry).
        let after_open = v + value_open.len();
        let close_rel = payload[after_open..].find(value_close)?;
        idx = after_open + close_rel + value_close.len();
      }
      (None, None) => return None,
    }
  }
  None
}

/// Whether `buffer` is a prefix-compatible partial (or full) match of `tag`:
/// every char they share in order must be equal (Swift `partialMatch`). An
/// empty buffer trivially matches; a buffer longer than `tag` matches iff it
/// starts with `tag`.
fn partial_match(buffer: &str, tag: &str) -> bool {
  buffer.chars().zip(tag.chars()).all(|(b, t)| b == t)
}

#[cfg(test)]
mod tests;