harn-vm 0.8.76

Async bytecode virtual machine for the Harn programming language
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
use std::sync::OnceLock;

use super::super::ts_value_parser::TsValueParser;

/// Strip leaked thinking tags from model output. Some models (Qwen, Gemma)
/// emit `</think>` or `<think>` markers in their response text when the
/// streaming transport merges thinking and content channels. These tags
/// break tool-call parsing because they appear between or before valid
/// tool invocations.
pub(super) fn strip_thinking_tags(text: &str) -> std::borrow::Cow<'_, str> {
    if !text.contains("<think>") && !text.contains("</think>") {
        return std::borrow::Cow::Borrowed(text);
    }
    let mut result = text.to_string();
    while let Some(start) = result.find("<think>") {
        if let Some(end) = result[start..].find("</think>") {
            result.replace_range(start..start + end + "</think>".len(), "");
        } else {
            result.replace_range(start..start + "<think>".len(), "");
        }
    }
    while result.contains("</think>") {
        result = result.replace("</think>", "");
    }
    std::borrow::Cow::Owned(result)
}

/// Strip `<tool_call>`/`</tool_call>` (and the compact `<toolcall>` spelling)
/// wrapper tags from a bare-mode body, replacing each with a newline.
///
/// Text-format models emit these wrappers unpredictably even when the prompt
/// asks for bare `name({ ... })` calls (OpenRouter `qwen/qwen3-coder` does this
/// on most turns). Without stripping, two failures occur in the bare scanner:
///   1. `<tool_call>run({...})</tool_call>` on one line hides the call —
///      `run(` is not at a line start, so the scanner never recognizes it and
///      the whole turn comes back with zero tool calls.
///   2. A trailing `</tool_call>` (or leading `<tool_call>`) on its own line is
///      not a call, so it leaks into the visible prose as a `</tool_call>` /
///      `_call>` fragment.
///
/// Replacing each tag token with `\n` fixes both: the inner call lands at a
/// line start and the wrapper bytes never reach `prose`. Returns a borrowed
/// `Cow` unchanged when no wrapper tags are present.
pub(super) fn strip_tool_call_wrappers(text: &str) -> std::borrow::Cow<'_, str> {
    use super::super::{
        TEXT_TOOL_CALL_CLOSE, TEXT_TOOL_CALL_CLOSE_COMPACT, TEXT_TOOL_CALL_OPEN,
        TEXT_TOOL_CALL_OPEN_COMPACT,
    };
    const TAGS: [&str; 4] = [
        TEXT_TOOL_CALL_OPEN,
        TEXT_TOOL_CALL_CLOSE,
        TEXT_TOOL_CALL_OPEN_COMPACT,
        TEXT_TOOL_CALL_CLOSE_COMPACT,
    ];
    if !TAGS.iter().any(|tag| text.contains(tag)) {
        return std::borrow::Cow::Borrowed(text);
    }
    // Replace each wrapper tag with a newline, but copy quoted string spans and
    // `<<TAG ... TAG` heredoc bodies through verbatim: a wrapper-tag literal
    // inside a string/heredoc argument is file content, not structure, and
    // stripping it would corrupt the value. Same content-vs-structure rule
    // `find_close_tag` applies at the block boundary, here at the
    // wrapper-stripping boundary.
    let bytes = text.as_bytes();
    let mut out = String::with_capacity(text.len());
    let mut i = 0;
    while i < text.len() {
        if matches!(bytes[i], b'"' | b'\'' | b'`') {
            if let Some(after) = skip_string_span(text, i) {
                out.push_str(&text[i..after]);
                i = after;
                continue;
            }
        }
        if bytes[i] == b'<' && bytes.get(i + 1) == Some(&b'<') {
            if let Some(after) = skip_heredoc_body(text, i) {
                out.push_str(&text[i..after]);
                i = after;
                continue;
            }
        }
        if let Some(tag) = TAGS.iter().find(|tag| text[i..].starts_with(**tag)) {
            out.push('\n');
            i += tag.len();
            continue;
        }
        let ch_len = text[i..].chars().next().map_or(1, char::len_utf8);
        out.push_str(&text[i..i + ch_len]);
        i += ch_len;
    }
    std::borrow::Cow::Owned(out)
}

/// Match a balanced `<tag>...</tag>` block starting at `start` in `src`.
/// Returns `(body_slice, end_cursor)` on success. Does not support nested
/// same-name tags — not needed for this grammar and attempting to support
/// them bloats the error surface for no real benefit.
pub(super) fn match_block<'a>(src: &'a str, start: usize, tag: &str) -> Option<(&'a str, usize)> {
    let open = format!("<{tag}>");
    if !src[start..].starts_with(&open) {
        return None;
    }
    let body_start = start + open.len();
    let close = format!("</{tag}>");
    let close_idx = src[body_start..].find(&close)?;
    let body_end = body_start + close_idx;
    let after = body_end + close.len();
    Some((&src[body_start..body_end], after))
}

/// Render a parsed tool call back to the bare TS syntax used inside
/// `<tool_call>` tags. Used to build the canonical history entry.
pub(super) fn render_canonical_call(name: &str, args: &serde_json::Value) -> String {
    // JSON object literals are accepted by our tool-call grammar, so
    // pretty-printed JSON is sufficient for replay.
    let rendered_args = serde_json::to_string_pretty(args).unwrap_or_else(|_| "{}".to_string());
    format!("{name}({rendered_args})")
}

pub(super) fn preview_str(s: &str, max: usize) -> String {
    let chars: Vec<char> = s.chars().collect();
    if chars.len() <= max {
        return s.to_string();
    }
    let kept: String = chars.into_iter().take(max).collect();
    format!("{kept}")
}

pub(super) fn has_object_literal_arg_start(text: &str, open_paren_idx: usize) -> bool {
    let bytes = text.as_bytes();
    let mut idx = open_paren_idx;
    while idx < bytes.len() && (bytes[idx] == b' ' || bytes[idx] == b'\t') {
        idx += 1;
    }
    bytes.get(idx) == Some(&b'{')
}

/// Parse a TypeScript-ish object literal starting at the beginning of `text`.
/// Returns the parsed object and bytes consumed through the closing `}`.
pub(super) fn parse_object_literal_from(
    text: &str,
    name: &str,
) -> Result<(serde_json::Value, usize), String> {
    let mut parser = TsValueParser::new(text);
    parser.skip_ws_and_comments();
    let value = parser.parse_value().map_err(|error| {
        format!(
            "TOOL CALL PARSE ERROR: `{name}{{...}}` — {error}. \
             Tool arguments must be a TypeScript object literal."
        )
    })?;
    match value {
        serde_json::Value::Object(map) => Ok((serde_json::Value::Object(map), parser.position())),
        other => Err(format!(
            "TOOL CALL PARSE ERROR: `{name}{{...}}` — expected an object literal argument, got `{other}`."
        )),
    }
}

pub(super) fn unwrap_exact_code_wrapper(text: &str) -> Option<&str> {
    let trimmed = text.trim();
    if let Some(rest) = trimmed.strip_prefix("```") {
        let newline = rest.find('\n')?;
        let after_opener = &rest[newline + 1..];
        let inner = after_opener.strip_suffix("```")?;
        return Some(inner.trim());
    }
    let inner = trimmed.strip_prefix('`')?.strip_suffix('`')?;
    if inner.contains('`') {
        return None;
    }
    Some(inner.trim())
}

/// Collapse runs of ≥3 consecutive newlines down to 2 (one blank line). Used
/// to tidy the `prose` output after tool-call ranges are excised, so the
/// removed bytes don't leave an ugly vertical gap between surrounding prose.
pub(super) fn collapse_blank_lines(text: &str) -> String {
    let mut out = String::with_capacity(text.len());
    let mut newline_run = 0usize;
    for ch in text.chars() {
        if ch == '\n' {
            newline_run += 1;
            if newline_run <= 2 {
                out.push(ch);
            }
        } else {
            newline_run = 0;
            out.push(ch);
        }
    }
    out
}

/// Strip empty Markdown fence pairs (```lang\n``` or ```lang\n\n```) from text.
/// Models sometimes emit these as failed tool-call attempts. If left in prose
/// they accumulate in conversation history and cause duplication loops.
pub(super) fn strip_empty_fences(text: &str) -> String {
    static EMPTY_FENCE_RE: OnceLock<regex::Regex> = OnceLock::new();
    let re = EMPTY_FENCE_RE.get_or_init(|| {
        regex::Regex::new(r"(?m)^[ \t]*```[^\n]*\n\s*```[ \t]*\n?")
            .expect("strip_empty_fences regex is statically valid")
    });
    re.replace_all(text, "").to_string()
}

/// A located `<<TAG ... TAG` heredoc body.
pub(crate) struct HeredocSpan {
    /// Byte range of the body content, with the trailing newline excluded
    /// (matching `Parser::parse_heredoc`'s returned string).
    pub content: std::ops::Range<usize>,
    /// Byte offset immediately after the closing tag on its line.
    pub end: usize,
    /// True when the heredoc body used literal JSON/string escape sequences
    /// (`\n`, `\t`, ...) as line separators instead of real newlines — the
    /// degraded form cheap models emit when they treat the heredoc body as a
    /// one-line JSON string. The caller must unescape `content` before use.
    /// A body that used real newlines (the normal case) is always `false`.
    pub escaped: bool,
}

/// Why a `<<` opener is not a complete heredoc. Carries the tag where one was
/// read so the value parser can reproduce its precise model-facing diagnostics.
pub(crate) enum HeredocError {
    /// `<<` was not followed by an identifier tag (e.g. a bare shift operator).
    MissingTag,
    /// The opening `<<TAG` line was not terminated by a newline.
    MissingNewline { tag: String },
    /// End of input reached before a line opening with the closing tag.
    Unterminated { tag: String },
}

/// The single authority for the `<<TAG\n...\nTAG` heredoc grammar shared by the
/// TS value parser (`Parser::parse_heredoc`) and the top-level chunker
/// (`skip_heredoc_body`). `start` must sit on the opening `<<`. The tag is any
/// run of `[A-Za-z0-9_]`, optionally wrapped in `'`/`"`; the body runs to a
/// line that — after leading whitespace — begins with the tag at a word
/// boundary. Anything after the tag on the closing line is left to the caller.
pub(crate) fn scan_heredoc(src: &str, start: usize) -> Result<HeredocSpan, HeredocError> {
    let bytes = src.as_bytes();
    if bytes.get(start) != Some(&b'<') || bytes.get(start + 1) != Some(&b'<') {
        return Err(HeredocError::MissingTag);
    }
    let mut pos = start + 2;
    let quote_char = bytes.get(pos).copied();
    let has_quote = matches!(quote_char, Some(b'\'') | Some(b'"'));
    if has_quote {
        pos += 1;
    }
    let tag_start = pos;
    while let Some(byte) = bytes.get(pos) {
        if byte.is_ascii_alphanumeric() || *byte == b'_' {
            pos += 1;
        } else {
            break;
        }
    }
    if pos == tag_start {
        return Err(HeredocError::MissingTag);
    }
    let tag = src[tag_start..pos].to_string();
    if has_quote && bytes.get(pos).copied() == quote_char {
        pos += 1;
    }
    if bytes.get(pos) == Some(&b'\r') {
        pos += 1;
    }
    if bytes.get(pos) != Some(&b'\n') {
        // Degraded form: cheap models (e.g. qwen3.6) JSON-escape the heredoc
        // body, so the line break after the tag is the two literal bytes
        // backslash + 'n' rather than a real `\n`. Recover those calls by
        // scanning the escaped body to a literal-`\n`-delimited closing tag
        // line; the caller unescapes `content`. A genuinely-truncated opener
        // (`<<EOF` then end-of-input, a real shift operator, etc.) still hits
        // the original MissingNewline error below.
        if bytes.get(pos) == Some(&b'\\') && bytes.get(pos + 1) == Some(&b'n') {
            return scan_escaped_heredoc_body(src, pos, tag);
        }
        return Err(HeredocError::MissingNewline { tag });
    }
    pos += 1;
    let content_start = pos;
    while pos < bytes.len() {
        let line_start = pos;
        while let Some(byte) = bytes.get(pos) {
            if *byte == b'\n' {
                break;
            }
            pos += 1;
        }
        let line = &src[line_start..pos];
        let leading_ws_len = line.len() - line.trim_start().len();
        let after_ws = &line[leading_ws_len..];
        if let Some(rest) = after_ws.strip_prefix(&tag) {
            let at_word_boundary = rest
                .chars()
                .next()
                .is_none_or(|ch| !(ch.is_ascii_alphanumeric() || ch == '_'));
            if at_word_boundary {
                let raw = &src[content_start..line_start];
                let stripped = raw.strip_suffix('\n').unwrap_or(raw);
                let stripped = stripped.strip_suffix('\r').unwrap_or(stripped);
                return Ok(HeredocSpan {
                    content: content_start..content_start + stripped.len(),
                    end: line_start + leading_ws_len + tag.len(),
                    escaped: false,
                });
            }
        }
        if bytes.get(pos) == Some(&b'\n') {
            pos += 1;
        } else {
            return Err(HeredocError::Unterminated { tag });
        }
    }
    Err(HeredocError::Unterminated { tag })
}

/// Scan a JSON/string-escaped heredoc body whose line breaks are the two
/// literal bytes `\` + `n` instead of real newlines. `esc_nl_start` must sit on
/// the `\` of the `\n` that immediately follows the opening `<<TAG`. The closing
/// tag is found on a literal-`\n`-delimited "line" that — after optional literal
/// leading whitespace — begins with `tag` at a word boundary, mirroring the
/// real-newline grammar. The returned `content` range is the still-escaped body
/// (callers unescape it via [`unescape_heredoc_body`]); `escaped` is `true`.
fn scan_escaped_heredoc_body(
    src: &str,
    esc_nl_start: usize,
    tag: String,
) -> Result<HeredocSpan, HeredocError> {
    let bytes = src.as_bytes();
    // Body content starts after the leading literal `\n`.
    let content_start = esc_nl_start + 2;
    let mut pos = content_start;
    // `line_start` tracks the first content byte of the current escaped "line".
    let mut line_start = content_start;
    while pos < bytes.len() {
        // An escaped backslash `\\` is one decoded `\` — consume both bytes so a
        // following `n` (e.g. a Go source `"...\n"`, on the wire `\\n`) is NOT
        // misread as the escaped line separator. Keeps splitting consistent with
        // `unescape_heredoc_body`.
        if bytes.get(pos) == Some(&b'\\') && bytes.get(pos + 1) == Some(&b'\\') {
            pos += 2;
            continue;
        }
        // A literal `\n` (backslash + 'n') is the escaped line separator.
        if bytes.get(pos) == Some(&b'\\') && bytes.get(pos + 1) == Some(&b'n') {
            if let Some(span) = escaped_close_at(src, content_start, line_start, pos, &tag) {
                return Ok(span);
            }
            pos += 2;
            line_start = pos;
            continue;
        }
        pos += src[pos..].chars().next().map_or(1, char::len_utf8);
    }
    // The closing tag may sit on the final escaped line with no trailing `\n`
    // (e.g. `...\nEOF` at end of the string value, just before the closing
    // quote/paren). Check the trailing line.
    if let Some(span) = escaped_close_at(src, content_start, line_start, bytes.len(), &tag) {
        return Ok(span);
    }
    Err(HeredocError::Unterminated { tag })
}

/// Test whether the escaped "line" `src[line_start..line_end]` is the closing
/// tag line. `line_end` is the offset of the separating literal `\n` (or the end
/// of the body). On a match, returns a [`HeredocSpan`] whose `content` runs from
/// `content_start` to the start of the closing line's leading whitespace and
/// whose `end` is just past the tag. Returns `None` when the line is body text.
fn escaped_close_at(
    src: &str,
    content_start: usize,
    line_start: usize,
    line_end: usize,
    tag: &str,
) -> Option<HeredocSpan> {
    let line = &src[line_start..line_end];
    let leading_ws_len = line.len() - line.trim_start().len();
    let after_ws = &line[leading_ws_len..];
    let rest = after_ws.strip_prefix(tag)?;
    let at_word_boundary = rest
        .chars()
        .next()
        .is_none_or(|ch| !(ch.is_ascii_alphanumeric() || ch == '_'));
    if !at_word_boundary {
        return None;
    }
    // Exclude the closing line (and the literal `\n` that introduced it) from
    // the body content, matching the real-newline grammar which excludes the
    // trailing newline before the close tag. The first body line shares its
    // start with `content_start`, so clamp to avoid an inverted range.
    let content_end = line_start.saturating_sub(2).max(content_start);
    // Real-newline closes leave the trailing newline + any tail (`EOF\n})`) for
    // the outer parser's `skip_ws_and_comments`. In the escaped form that
    // separator is the two literal bytes `\` + `n`, which the outer parser does
    // NOT treat as whitespace — so consume one optional trailing literal `\n`
    // here, leaving `end` on the structural tail (`})`/`,`).
    let bytes = src.as_bytes();
    let mut end = line_start + leading_ws_len + tag.len();
    if bytes.get(end) == Some(&b'\\') && bytes.get(end + 1) == Some(&b'n') {
        end += 2;
    }
    Some(HeredocSpan {
        content: content_start..content_end,
        end,
        escaped: true,
    })
}

/// Unescape a JSON/string-escaped heredoc body recovered from the degraded
/// literal-`\n` form. Decodes `\n`, `\t`, `\r`, `\"`, and `\\`; any other escape
/// is left verbatim (both the backslash and the following byte) so unrecognized
/// sequences in code survive unchanged. A trailing lone backslash is preserved.
pub(crate) fn unescape_heredoc_body(raw: &str) -> String {
    let mut out = String::with_capacity(raw.len());
    let mut chars = raw.chars();
    while let Some(ch) = chars.next() {
        if ch != '\\' {
            out.push(ch);
            continue;
        }
        match chars.next() {
            Some('n') => out.push('\n'),
            Some('t') => out.push('\t'),
            Some('r') => out.push('\r'),
            Some('"') => out.push('"'),
            Some('\\') => out.push('\\'),
            Some(other) => {
                out.push('\\');
                out.push(other);
            }
            None => out.push('\\'),
        }
    }
    out
}

/// Skip past a `<<TAG\n...\nTAG` heredoc body starting at `start` in `src`.
/// Returns the byte position immediately after the closing tag, or `None` when
/// the heredoc is malformed or unterminated. Used by the top-level scanner so a
/// stray-bytes chunker doesn't truncate bare `name({ key: <<EOF\n...\nEOF })`
/// tool calls at the `<<` opener.
pub(super) fn skip_heredoc_body(src: &str, start: usize) -> Option<usize> {
    scan_heredoc(src, start).ok().map(|span| span.end)
}

/// Outcome of searching for a tag while stepping over heredoc bodies.
pub(super) enum CloseScan {
    /// The tag begins at this byte offset, outside any heredoc body.
    Found(usize),
    /// A `<<TAG` heredoc opened but its closing tag line hasn't arrived yet —
    /// the streaming caller must wait for more input; a buffered caller treats
    /// this as "no usable close" (the block is truncated mid-heredoc).
    NeedMore,
    /// Scanned to the end without finding the tag outside a heredoc.
    NotFound,
}

/// Skip a `"..."`, `'...'`, or `` `...` `` string span starting at `start`
/// (which must sit on the opening quote). Returns the byte offset just past the
/// closing quote, honoring `\`-escapes, or `None` when the string is
/// unterminated. Lets the close-tag scan treat a `<<TAG` or a `</tool_call>`
/// *inside a quoted argument* as content, not structure.
fn skip_string_span(src: &str, start: usize) -> Option<usize> {
    let bytes = src.as_bytes();
    let quote = *bytes.get(start)?;
    if !matches!(quote, b'"' | b'\'' | b'`') {
        return None;
    }
    let mut i = start + 1;
    while i < src.len() {
        match bytes[i] {
            b'\\' => {
                i += 1;
                if i < src.len() {
                    i += src[i..].chars().next().map_or(1, char::len_utf8);
                }
            }
            byte if byte == quote => return Some(i + 1),
            _ => i += src[i..].chars().next().map_or(1, char::len_utf8),
        }
    }
    None
}

/// Find `needle` in `src[from..]`, stepping over quoted string spans and
/// complete `<<TAG ... TAG` heredoc bodies so an occurrence inside either —
/// a `</tool_call>` a model wrote as file content, or a bash `<<EOF` inside a
/// `command` string — is treated as content, not as the structural close. A
/// string or heredoc that is still incomplete yields [`CloseScan::NeedMore`].
/// This is the one place that knows "where does a tagged block really end",
/// shared by the buffered matcher, the truncation detector, and the streaming
/// scanner.
pub(super) fn find_close_tag(src: &str, from: usize, needle: &str) -> CloseScan {
    let bytes = src.as_bytes();
    let mut i = from;
    while i < src.len() {
        match bytes[i] {
            b'"' | b'\'' | b'`' => match skip_string_span(src, i) {
                Some(after) => {
                    i = after;
                    continue;
                }
                // Unterminated string: streaming waits, a buffered caller treats
                // the block as truncated mid-string.
                None => return CloseScan::NeedMore,
            },
            b'<' if bytes.get(i + 1) == Some(&b'<') => match scan_heredoc(src, i) {
                Ok(span) => {
                    i = span.end;
                    continue;
                }
                Err(HeredocError::MissingNewline { .. })
                | Err(HeredocError::Unterminated { .. }) => {
                    return CloseScan::NeedMore;
                }
                // Not a heredoc (bare `<<`); fall through and treat as content.
                Err(HeredocError::MissingTag) => {}
            },
            _ => {}
        }
        if src[i..].starts_with(needle) {
            return CloseScan::Found(i);
        }
        i += src[i..].chars().next().map_or(1, char::len_utf8);
    }
    CloseScan::NotFound
}

/// Heredoc-aware variant of [`match_block`] for the `<tool_call>` tags: it skips
/// `<<TAG ... TAG` bodies when locating `</tool_call>`, so a literal close tag
/// inside a heredoc argument doesn't shred the call. Scoped to the tool-call
/// tags — `match_block` stays a cheap `find` for the prose/done blocks that
/// never carry heredocs.
pub(super) fn match_tool_call_block<'a>(
    src: &'a str,
    start: usize,
    tag: &str,
) -> Option<(&'a str, usize)> {
    let open = format!("<{tag}>");
    if !src[start..].starts_with(&open) {
        return None;
    }
    let body_start = start + open.len();
    let close = format!("</{tag}>");
    match find_close_tag(src, body_start, &close) {
        CloseScan::Found(idx) => Some((&src[body_start..idx], idx + close.len())),
        CloseScan::NeedMore | CloseScan::NotFound => None,
    }
}

/// Length of a JavaScript-ish identifier starting at bytes[0]. Returns None
/// if the first byte is not a valid identifier start.
pub(crate) fn ident_length(bytes: &[u8]) -> Option<usize> {
    if bytes.is_empty() {
        return None;
    }
    let first = bytes[0];
    if !(first.is_ascii_alphabetic() || first == b'_' || first == b'$') {
        return None;
    }
    let mut i = 1;
    while i < bytes.len() {
        let byte = bytes[i];
        if byte.is_ascii_alphanumeric() || byte == b'_' || byte == b'$' {
            i += 1;
        } else {
            break;
        }
    }
    Some(i)
}

/// Parse a full `name(args)` TS call expression starting at the beginning of
/// `text`. Returns the parsed argument JSON and the number of bytes consumed
/// (from the start of the name through the closing paren), or an error with
/// a diagnostic suitable to show the model.
pub(crate) fn parse_ts_call_from(
    text: &str,
    name: String,
) -> Result<(serde_json::Value, usize), String> {
    let bytes = text.as_bytes();
    let paren_open = name.len();
    if bytes.get(paren_open) != Some(&b'(') {
        return Err(format!(
            "TOOL CALL PARSE ERROR: `{name}(` expected immediately after the tool name."
        ));
    }
    let mut parser = TsValueParser::new(&text[paren_open + 1..]);
    parser.skip_ws_and_comments();
    // An empty arg list `name()` is legal and produces an empty object.
    let args_value = if parser.peek() == Some(b')') {
        serde_json::Value::Object(serde_json::Map::new())
    } else {
        parser.parse_value().map_err(|error| {
            format!(
                "TOOL CALL PARSE ERROR: `{name}(...)` — {error}. \
                 Tool arguments must be a TypeScript object literal: `{{ key: value, key: value }}`."
            )
        })?
    };
    parser.skip_ws_and_comments();
    if parser.peek() != Some(b')') {
        return Err(format!(
            "TOOL CALL PARSE ERROR: `{name}(...)` — missing closing `)`. \
             Every tool call must be a complete TypeScript expression."
        ));
    }
    let consumed_in_parser = parser.position();
    let total_consumed = paren_open + 1 + consumed_in_parser + 1; // +1 for the ')'

    // Tool contract: every call takes a single object literal. Bare
    // positional scalars error precisely rather than being promoted.
    match args_value {
        serde_json::Value::Object(map) => Ok((serde_json::Value::Object(map), total_consumed)),
        other => Err(format!(
            "TOOL CALL PARSE ERROR: `{name}(...)` — expected an object literal argument, \
             got `{other}`. Wrap the value in braces: `{name}({{ key: value }})`."
        )),
    }
}