use std::sync::OnceLock;
use super::super::ts_value_parser::TsValueParser;
pub(super) fn strip_thinking_tags(text: &str) -> std::borrow::Cow<'_, str> {
if !text.contains("<think>") && !text.contains("</think>") {
return std::borrow::Cow::Borrowed(text);
}
let mut result = text.to_string();
while let Some(start) = result.find("<think>") {
if let Some(end) = result[start..].find("</think>") {
result.replace_range(start..start + end + "</think>".len(), "");
} else {
result.replace_range(start..start + "<think>".len(), "");
}
}
while result.contains("</think>") {
result = result.replace("</think>", "");
}
std::borrow::Cow::Owned(result)
}
pub(super) fn strip_tool_call_wrappers(text: &str) -> std::borrow::Cow<'_, str> {
use super::super::{
TEXT_TOOL_CALL_CLOSE, TEXT_TOOL_CALL_CLOSE_COMPACT, TEXT_TOOL_CALL_OPEN,
TEXT_TOOL_CALL_OPEN_COMPACT,
};
const TAGS: [&str; 4] = [
TEXT_TOOL_CALL_OPEN,
TEXT_TOOL_CALL_CLOSE,
TEXT_TOOL_CALL_OPEN_COMPACT,
TEXT_TOOL_CALL_CLOSE_COMPACT,
];
if !TAGS.iter().any(|tag| text.contains(tag)) {
return std::borrow::Cow::Borrowed(text);
}
let bytes = text.as_bytes();
let mut out = String::with_capacity(text.len());
let mut i = 0;
while i < text.len() {
if matches!(bytes[i], b'"' | b'\'' | b'`') {
if let Some(after) = skip_string_span(text, i) {
out.push_str(&text[i..after]);
i = after;
continue;
}
}
if bytes[i] == b'<' && bytes.get(i + 1) == Some(&b'<') {
if let Some(after) = skip_heredoc_body(text, i) {
out.push_str(&text[i..after]);
i = after;
continue;
}
}
if let Some(tag) = TAGS.iter().find(|tag| text[i..].starts_with(**tag)) {
out.push('\n');
i += tag.len();
continue;
}
let ch_len = text[i..].chars().next().map_or(1, char::len_utf8);
out.push_str(&text[i..i + ch_len]);
i += ch_len;
}
std::borrow::Cow::Owned(out)
}
pub(super) fn match_block<'a>(src: &'a str, start: usize, tag: &str) -> Option<(&'a str, usize)> {
let open = format!("<{tag}>");
if !src[start..].starts_with(&open) {
return None;
}
let body_start = start + open.len();
let close = format!("</{tag}>");
let close_idx = src[body_start..].find(&close)?;
let body_end = body_start + close_idx;
let after = body_end + close.len();
Some((&src[body_start..body_end], after))
}
pub(super) fn render_canonical_call(name: &str, args: &serde_json::Value) -> String {
let rendered_args = serde_json::to_string_pretty(args).unwrap_or_else(|_| "{}".to_string());
format!("{name}({rendered_args})")
}
pub(super) fn preview_str(s: &str, max: usize) -> String {
let chars: Vec<char> = s.chars().collect();
if chars.len() <= max {
return s.to_string();
}
let kept: String = chars.into_iter().take(max).collect();
format!("{kept}…")
}
pub(super) fn has_object_literal_arg_start(text: &str, open_paren_idx: usize) -> bool {
let bytes = text.as_bytes();
let mut idx = open_paren_idx;
while idx < bytes.len() && (bytes[idx] == b' ' || bytes[idx] == b'\t') {
idx += 1;
}
bytes.get(idx) == Some(&b'{')
}
pub(super) fn parse_object_literal_from(
text: &str,
name: &str,
) -> Result<(serde_json::Value, usize), String> {
let mut parser = TsValueParser::new(text);
parser.skip_ws_and_comments();
let value = parser.parse_value().map_err(|error| {
format!(
"TOOL CALL PARSE ERROR: `{name}{{...}}` — {error}. \
Tool arguments must be a TypeScript object literal."
)
})?;
match value {
serde_json::Value::Object(map) => Ok((serde_json::Value::Object(map), parser.position())),
other => Err(format!(
"TOOL CALL PARSE ERROR: `{name}{{...}}` — expected an object literal argument, got `{other}`."
)),
}
}
pub(super) fn unwrap_exact_code_wrapper(text: &str) -> Option<&str> {
let trimmed = text.trim();
if let Some(rest) = trimmed.strip_prefix("```") {
let newline = rest.find('\n')?;
let after_opener = &rest[newline + 1..];
let inner = after_opener.strip_suffix("```")?;
return Some(inner.trim());
}
let inner = trimmed.strip_prefix('`')?.strip_suffix('`')?;
if inner.contains('`') {
return None;
}
Some(inner.trim())
}
pub(super) fn collapse_blank_lines(text: &str) -> String {
let mut out = String::with_capacity(text.len());
let mut newline_run = 0usize;
for ch in text.chars() {
if ch == '\n' {
newline_run += 1;
if newline_run <= 2 {
out.push(ch);
}
} else {
newline_run = 0;
out.push(ch);
}
}
out
}
pub(super) fn strip_empty_fences(text: &str) -> String {
static EMPTY_FENCE_RE: OnceLock<regex::Regex> = OnceLock::new();
let re = EMPTY_FENCE_RE.get_or_init(|| {
regex::Regex::new(r"(?m)^[ \t]*```[^\n]*\n\s*```[ \t]*\n?")
.expect("strip_empty_fences regex is statically valid")
});
re.replace_all(text, "").to_string()
}
pub(crate) struct HeredocSpan {
pub content: std::ops::Range<usize>,
pub end: usize,
pub escaped: bool,
}
pub(crate) enum HeredocError {
MissingTag,
MissingNewline { tag: String },
Unterminated { tag: String },
}
pub(crate) fn scan_heredoc(src: &str, start: usize) -> Result<HeredocSpan, HeredocError> {
let bytes = src.as_bytes();
if bytes.get(start) != Some(&b'<') || bytes.get(start + 1) != Some(&b'<') {
return Err(HeredocError::MissingTag);
}
let mut pos = start + 2;
let quote_char = bytes.get(pos).copied();
let has_quote = matches!(quote_char, Some(b'\'') | Some(b'"'));
if has_quote {
pos += 1;
}
let tag_start = pos;
while let Some(byte) = bytes.get(pos) {
if byte.is_ascii_alphanumeric() || *byte == b'_' {
pos += 1;
} else {
break;
}
}
if pos == tag_start {
return Err(HeredocError::MissingTag);
}
let tag = src[tag_start..pos].to_string();
if has_quote && bytes.get(pos).copied() == quote_char {
pos += 1;
}
if bytes.get(pos) == Some(&b'\r') {
pos += 1;
}
if bytes.get(pos) != Some(&b'\n') {
if bytes.get(pos) == Some(&b'\\') && bytes.get(pos + 1) == Some(&b'n') {
return scan_escaped_heredoc_body(src, pos, tag);
}
return Err(HeredocError::MissingNewline { tag });
}
pos += 1;
let content_start = pos;
while pos < bytes.len() {
let line_start = pos;
while let Some(byte) = bytes.get(pos) {
if *byte == b'\n' {
break;
}
pos += 1;
}
let line = &src[line_start..pos];
let leading_ws_len = line.len() - line.trim_start().len();
let after_ws = &line[leading_ws_len..];
if let Some(rest) = after_ws.strip_prefix(&tag) {
let at_word_boundary = rest
.chars()
.next()
.is_none_or(|ch| !(ch.is_ascii_alphanumeric() || ch == '_'));
if at_word_boundary {
let raw = &src[content_start..line_start];
let stripped = raw.strip_suffix('\n').unwrap_or(raw);
let stripped = stripped.strip_suffix('\r').unwrap_or(stripped);
return Ok(HeredocSpan {
content: content_start..content_start + stripped.len(),
end: line_start + leading_ws_len + tag.len(),
escaped: false,
});
}
}
if bytes.get(pos) == Some(&b'\n') {
pos += 1;
} else {
return Err(HeredocError::Unterminated { tag });
}
}
Err(HeredocError::Unterminated { tag })
}
fn scan_escaped_heredoc_body(
src: &str,
esc_nl_start: usize,
tag: String,
) -> Result<HeredocSpan, HeredocError> {
let bytes = src.as_bytes();
let content_start = esc_nl_start + 2;
let mut pos = content_start;
let mut line_start = content_start;
while pos < bytes.len() {
if bytes.get(pos) == Some(&b'\\') && bytes.get(pos + 1) == Some(&b'\\') {
pos += 2;
continue;
}
if bytes.get(pos) == Some(&b'\\') && bytes.get(pos + 1) == Some(&b'n') {
if let Some(span) = escaped_close_at(src, content_start, line_start, pos, &tag) {
return Ok(span);
}
pos += 2;
line_start = pos;
continue;
}
pos += src[pos..].chars().next().map_or(1, char::len_utf8);
}
if let Some(span) = escaped_close_at(src, content_start, line_start, bytes.len(), &tag) {
return Ok(span);
}
Err(HeredocError::Unterminated { tag })
}
fn escaped_close_at(
src: &str,
content_start: usize,
line_start: usize,
line_end: usize,
tag: &str,
) -> Option<HeredocSpan> {
let line = &src[line_start..line_end];
let leading_ws_len = line.len() - line.trim_start().len();
let after_ws = &line[leading_ws_len..];
let rest = after_ws.strip_prefix(tag)?;
let at_word_boundary = rest
.chars()
.next()
.is_none_or(|ch| !(ch.is_ascii_alphanumeric() || ch == '_'));
if !at_word_boundary {
return None;
}
let content_end = line_start.saturating_sub(2).max(content_start);
let bytes = src.as_bytes();
let mut end = line_start + leading_ws_len + tag.len();
if bytes.get(end) == Some(&b'\\') && bytes.get(end + 1) == Some(&b'n') {
end += 2;
}
Some(HeredocSpan {
content: content_start..content_end,
end,
escaped: true,
})
}
pub(crate) fn unescape_heredoc_body(raw: &str) -> String {
let mut out = String::with_capacity(raw.len());
let mut chars = raw.chars();
while let Some(ch) = chars.next() {
if ch != '\\' {
out.push(ch);
continue;
}
match chars.next() {
Some('n') => out.push('\n'),
Some('t') => out.push('\t'),
Some('r') => out.push('\r'),
Some('"') => out.push('"'),
Some('\\') => out.push('\\'),
Some(other) => {
out.push('\\');
out.push(other);
}
None => out.push('\\'),
}
}
out
}
pub(super) fn skip_heredoc_body(src: &str, start: usize) -> Option<usize> {
scan_heredoc(src, start).ok().map(|span| span.end)
}
pub(super) enum CloseScan {
Found(usize),
NeedMore,
NotFound,
}
fn skip_string_span(src: &str, start: usize) -> Option<usize> {
let bytes = src.as_bytes();
let quote = *bytes.get(start)?;
if !matches!(quote, b'"' | b'\'' | b'`') {
return None;
}
let mut i = start + 1;
while i < src.len() {
match bytes[i] {
b'\\' => {
i += 1;
if i < src.len() {
i += src[i..].chars().next().map_or(1, char::len_utf8);
}
}
byte if byte == quote => return Some(i + 1),
_ => i += src[i..].chars().next().map_or(1, char::len_utf8),
}
}
None
}
pub(super) fn find_close_tag(src: &str, from: usize, needle: &str) -> CloseScan {
let bytes = src.as_bytes();
let mut i = from;
while i < src.len() {
match bytes[i] {
b'"' | b'\'' | b'`' => match skip_string_span(src, i) {
Some(after) => {
i = after;
continue;
}
None => return CloseScan::NeedMore,
},
b'<' if bytes.get(i + 1) == Some(&b'<') => match scan_heredoc(src, i) {
Ok(span) => {
i = span.end;
continue;
}
Err(HeredocError::MissingNewline { .. })
| Err(HeredocError::Unterminated { .. }) => {
return CloseScan::NeedMore;
}
Err(HeredocError::MissingTag) => {}
},
_ => {}
}
if src[i..].starts_with(needle) {
return CloseScan::Found(i);
}
i += src[i..].chars().next().map_or(1, char::len_utf8);
}
CloseScan::NotFound
}
pub(super) fn match_tool_call_block<'a>(
src: &'a str,
start: usize,
tag: &str,
) -> Option<(&'a str, usize)> {
let open = format!("<{tag}>");
if !src[start..].starts_with(&open) {
return None;
}
let body_start = start + open.len();
let close = format!("</{tag}>");
match find_close_tag(src, body_start, &close) {
CloseScan::Found(idx) => Some((&src[body_start..idx], idx + close.len())),
CloseScan::NeedMore | CloseScan::NotFound => None,
}
}
pub(crate) fn ident_length(bytes: &[u8]) -> Option<usize> {
if bytes.is_empty() {
return None;
}
let first = bytes[0];
if !(first.is_ascii_alphabetic() || first == b'_' || first == b'$') {
return None;
}
let mut i = 1;
while i < bytes.len() {
let byte = bytes[i];
if byte.is_ascii_alphanumeric() || byte == b'_' || byte == b'$' {
i += 1;
} else {
break;
}
}
Some(i)
}
pub(crate) fn parse_ts_call_from(
text: &str,
name: String,
) -> Result<(serde_json::Value, usize), String> {
let bytes = text.as_bytes();
let paren_open = name.len();
if bytes.get(paren_open) != Some(&b'(') {
return Err(format!(
"TOOL CALL PARSE ERROR: `{name}(` expected immediately after the tool name."
));
}
let mut parser = TsValueParser::new(&text[paren_open + 1..]);
parser.skip_ws_and_comments();
let args_value = if parser.peek() == Some(b')') {
serde_json::Value::Object(serde_json::Map::new())
} else {
parser.parse_value().map_err(|error| {
format!(
"TOOL CALL PARSE ERROR: `{name}(...)` — {error}. \
Tool arguments must be a TypeScript object literal: `{{ key: value, key: value }}`."
)
})?
};
parser.skip_ws_and_comments();
if parser.peek() != Some(b')') {
return Err(format!(
"TOOL CALL PARSE ERROR: `{name}(...)` — missing closing `)`. \
Every tool call must be a complete TypeScript expression."
));
}
let consumed_in_parser = parser.position();
let total_consumed = paren_open + 1 + consumed_in_parser + 1;
match args_value {
serde_json::Value::Object(map) => Ok((serde_json::Value::Object(map), total_consumed)),
other => Err(format!(
"TOOL CALL PARSE ERROR: `{name}(...)` — expected an object literal argument, \
got `{other}`. Wrap the value in braces: `{name}({{ key: value }})`."
)),
}
}