use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::borrow::Cow;
use tracing::warn;
pub(super) fn strip_appended_diagnostics(raw: &str) -> Cow<'_, str> {
crate::traits::extract_primary_message_content(raw, &[])
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum ToolFailureClass {
Semantic,
Transient,
}
#[allow(clippy::enum_variant_names)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub(super) enum ExecutionFailureKind {
ToolContractFailure,
ToolInvocationFailure,
EnvironmentFailure,
LogicFailure,
}
pub(super) fn semantic_failure_limit(tool_name: &str) -> usize {
match tool_name {
"cli_agent" => 2,
"http_request" => 5,
_ => 3,
}
}
fn contains_any(haystack: &str, needles: &[&str]) -> bool {
needles.iter().any(|needle| haystack.contains(needle))
}
static HTTP_STATUS_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?i)\bhttp/\d(?:\.\d+)?\s+([1-5][0-9]{2})\b|\bhttp\s+([1-5][0-9]{2})\b|\bstatus\s+code\s+([1-5][0-9]{2})\b|\bstatus\s*[:=]\s*([1-5][0-9]{2})\b|"(?:status|status_code|statusCode|http_status|httpStatus)"\s*:\s*"?([1-5][0-9]{2})"?|\battempt\s+\d+\s*[:=-]\s*([1-5][0-9]{2})\b|\b(?:http\s*)?code\s*[:=]\s*([1-5][0-9]{2})\b"#)
.expect("http status regex must compile")
});
static HTTPBIN_STATUS_URL_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"(?i)\bhttps?://(?:[^/\s]+\.)?httpbin\.org/status/([1-5][0-9]{2})(?:[/?#][^\s]*)?"#,
)
.expect("httpbin status url regex must compile")
});
static EXIT_CODE_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)(?:\bexit\s*:\s*|\bexit\s*code\s*[:=]?\s*|\[exit\s*code:\s*)(-?\d+)")
.expect("exit code regex must compile")
});
fn classify_http_status(status: u16) -> Option<ToolFailureClass> {
if status >= 500 || matches!(status, 408 | 409 | 425 | 429) {
return Some(ToolFailureClass::Transient);
}
if status >= 400 {
return Some(ToolFailureClass::Semantic);
}
None
}
fn extract_status_from_value(value: &Value) -> Option<u16> {
match value {
Value::Number(n) => n.as_u64().and_then(|v| u16::try_from(v).ok()),
Value::String(s) => s.parse::<u16>().ok(),
_ => None,
}
}
fn classify_text_error(lower: &str) -> ToolFailureClass {
if contains_any(
lower,
&[
"temporarily unavailable in this session",
"unavailable in this session",
],
) {
return ToolFailureClass::Semantic;
}
if contains_any(
lower,
&[
"rate limit",
"too many requests",
"timed out",
"timeout",
"temporarily unavailable",
"service unavailable",
"bad gateway",
"gateway timeout",
"network error",
"connection reset",
"connection refused",
"connection aborted",
"connection failed",
"retry later",
"try again later",
"econnreset",
"etimedout",
"ehostunreach",
"unreachable",
"dns",
],
) {
return ToolFailureClass::Transient;
}
ToolFailureClass::Semantic
}
fn looks_like_tool_contract_error(lower: &str) -> bool {
contains_any(
lower,
&[
"missing required parameter",
"missing required field",
"invalid arguments",
"invalid argument",
"invalid json",
"invalid request format",
"failed to parse",
"parse error",
"expected object",
"schema mismatch",
"unexpected field",
"unknown action",
"requires `goal_id`",
"requires 'goal_id'",
"\"goal_id\" is required",
"tool-only parameters were embedded in the url",
],
)
}
fn looks_like_environment_error(lower: &str) -> bool {
contains_any(
lower,
&[
"file not found",
"no such file",
"no such directory",
"does not exist",
"path is a directory",
"not a git repository",
"permission denied",
"operation not permitted",
"access denied",
"unauthorized",
"forbidden",
"missing auth",
"missing api key",
"credentials",
"command not found",
"service not running",
"not configured",
],
)
}
fn metadata_indicates_tool_invocation_failure(metadata: &crate::traits::ToolCallMetadata) -> bool {
if metadata.timed_out {
return true;
}
metadata
.transport_error
.as_ref()
.is_some_and(|transport_err| {
let lower_err = transport_err.to_ascii_lowercase();
contains_any(
&lower_err,
&[
"timed out",
"timeout",
"connection refused",
"connection reset",
"broken pipe",
"network",
"rate limit",
"429",
"503",
"502",
"504",
"econnrefused",
"econnreset",
"etimedout",
"ehostunreach",
"dns",
],
)
})
}
fn is_file_lookup_tool(tool_name: &str) -> bool {
matches!(
tool_name,
"read_file" | "write_file" | "edit_file" | "search_files" | "project_inspect" | "send_file"
)
}
fn looks_like_file_lookup_miss(lower: &str) -> bool {
contains_any(
lower,
&[
"file not found",
"no such file",
"no such file or directory",
"does not exist",
"enoent",
],
)
}
pub(super) fn is_file_lookup_miss_for_tool(tool_name: &str, lower_text: &str) -> bool {
is_file_lookup_tool(tool_name) && looks_like_file_lookup_miss(lower_text)
}
fn looks_like_error_signal(lower: &str) -> bool {
contains_any(
lower,
&[
"error:",
"failed to ",
"exception",
"traceback",
"unknown tool",
"not a real tool",
"invalid",
"missing required",
"permission denied",
"unauthorized",
"forbidden",
"file not found",
"command not found",
"resource not found",
"404 not found",
"no such file",
"status code ",
"http/",
"timed out",
"timeout",
"rate limit",
"too many requests",
"connection reset",
"connection refused",
"connection aborted",
"connection failed",
"connection timed out",
"temporarily unavailable in this session",
"unavailable in this session",
],
)
}
fn classify_json_error(value: &Value) -> Option<ToolFailureClass> {
match value {
Value::Object(map) => {
for key in [
"status",
"status_code",
"statusCode",
"http_status",
"httpStatus",
"code",
] {
if let Some(status_value) = map.get(key) {
if let Some(status) = extract_status_from_value(status_value) {
if let Some(kind) = classify_http_status(status) {
return Some(kind);
}
}
}
}
if map.get("success").and_then(Value::as_bool) == Some(false)
|| map.get("ok").and_then(Value::as_bool) == Some(false)
{
if let Some(error_value) = map.get("error") {
if !error_value.is_null() {
if let Some(kind) = classify_json_error(error_value) {
return Some(kind);
}
}
}
if let Some(message) = map.get("message").and_then(Value::as_str) {
let lower = message.to_ascii_lowercase();
if looks_like_error_signal(&lower) {
return Some(classify_text_error(&lower));
}
}
return Some(ToolFailureClass::Semantic);
}
if let Some(error_value) = map.get("error") {
if !error_value.is_null() {
if let Some(kind) = classify_json_error(error_value) {
return Some(kind);
}
return Some(ToolFailureClass::Semantic);
}
}
if let Some(errors_value) = map.get("errors") {
match errors_value {
Value::Array(arr) if !arr.is_empty() => {
for entry in arr {
if let Some(kind) = classify_json_error(entry) {
return Some(kind);
}
}
return Some(ToolFailureClass::Semantic);
}
Value::Object(obj) if !obj.is_empty() => {
return Some(ToolFailureClass::Semantic)
}
Value::String(s) if !s.trim().is_empty() => {
let lower = s.to_ascii_lowercase();
return Some(classify_text_error(&lower));
}
_ => {}
}
}
if let Some(message) = map.get("message").and_then(Value::as_str) {
let lower = message.to_ascii_lowercase();
if looks_like_error_signal(&lower) {
return Some(classify_text_error(&lower));
}
}
for nested in map.values() {
if let Some(kind) = classify_json_error(nested) {
return Some(kind);
}
}
None
}
Value::Array(arr) => arr.iter().find_map(classify_json_error),
Value::String(s) => {
let lower = s.to_ascii_lowercase();
if looks_like_error_signal(&lower) {
Some(classify_text_error(&lower))
} else {
None
}
}
_ => None,
}
}
fn extract_http_status_from_text(text: &str) -> Option<u16> {
let caps = HTTP_STATUS_RE.captures(text)?;
for idx in 1..caps.len() {
if let Some(m) = caps.get(idx) {
if let Ok(code) = m.as_str().parse::<u16>() {
return Some(code);
}
}
}
None
}
fn extract_httpbin_status_hint_from_command(command: &str) -> Option<u16> {
let caps = HTTPBIN_STATUS_URL_RE.captures(command)?;
caps.get(1)?.as_str().parse::<u16>().ok()
}
fn looks_like_empty_tool_output(text: &str) -> bool {
let cleaned = strip_appended_diagnostics(text);
let cleaned = cleaned.trim();
cleaned.is_empty() || cleaned.eq_ignore_ascii_case("(no output)")
}
fn classify_embedded_json_error(text: &str) -> Option<ToolFailureClass> {
let mut in_string = false;
let mut escaped = false;
let mut depth = 0usize;
let mut start: Option<usize> = None;
let mut candidates = 0usize;
for (idx, ch) in text.char_indices() {
if in_string {
if escaped {
escaped = false;
continue;
}
match ch {
'\\' => escaped = true,
'"' => in_string = false,
_ => {}
}
continue;
}
match ch {
'"' => in_string = true,
'{' => {
if depth == 0 {
start = Some(idx);
}
depth = depth.saturating_add(1);
}
'}' => {
if depth == 0 {
continue;
}
depth -= 1;
if depth == 0 {
if let Some(s) = start.take() {
let end = idx + ch.len_utf8();
if let Ok(value) = serde_json::from_str::<Value>(&text[s..end]) {
if let Some(kind) = classify_json_error(&value) {
return Some(kind);
}
}
candidates += 1;
if candidates >= 8 {
break;
}
}
}
}
_ => {}
}
}
None
}
fn extract_nonzero_exit_code(text: &str) -> Option<i32> {
let captures = EXIT_CODE_RE.captures(text)?;
let parsed = captures.get(1)?.as_str().parse::<i32>().ok()?;
if parsed == 0 {
None
} else {
Some(parsed)
}
}
fn is_data_content_tool(tool_name: &str) -> bool {
matches!(
tool_name,
"read_file" | "search_files" | "read_channel_history" | "web_search" | "policy_metrics"
)
}
fn is_external_data_tool(tool_name: &str) -> bool {
matches!(tool_name, "web_fetch" | "http_request")
}
fn manage_memories_list_output_is_report(result_text: &str, tool_arguments: Option<&str>) -> bool {
let action = tool_arguments
.and_then(|args| serde_json::from_str::<Value>(args).ok())
.and_then(|args| {
args.get("action")
.and_then(Value::as_str)
.map(str::to_string)
});
if action.as_deref() != Some("list") {
return false;
}
let cleaned = strip_appended_diagnostics(result_text);
let trimmed = cleaned.trim_start();
trimmed.starts_with("**Stored Memories**") || trimmed == "No memories stored."
}
pub(super) fn classify_tool_result_failure(
tool_name: &str,
result_text: &str,
) -> Option<ToolFailureClass> {
let cleaned = strip_appended_diagnostics(result_text);
let cleaned = cleaned.trim();
if cleaned.is_empty() {
return None;
}
let lower = cleaned.to_ascii_lowercase();
if is_file_lookup_miss_for_tool(tool_name, &lower) {
return Some(ToolFailureClass::Transient);
}
if cleaned.starts_with("ERROR:")
|| cleaned.starts_with("Error:")
|| cleaned.starts_with("Failed to ")
|| cleaned.starts_with("Request blocked:")
|| cleaned.starts_with("Blocked:")
|| cleaned.starts_with("[SYSTEM] BLOCKED:")
{
return Some(classify_text_error(&lower));
}
if is_data_content_tool(tool_name) {
return None;
}
if let Some(status) = extract_http_status_from_text(cleaned) {
if let Some(kind) = classify_http_status(status) {
return Some(kind);
}
}
if is_external_data_tool(tool_name) {
return None;
}
if cleaned.starts_with('{') || cleaned.starts_with('[') {
if let Ok(value) = serde_json::from_str::<Value>(cleaned) {
if let Some(kind) = classify_json_error(&value) {
return Some(kind);
}
}
}
if let Some(kind) = classify_embedded_json_error(cleaned) {
return Some(kind);
}
if let Some(exit_code) = extract_nonzero_exit_code(cleaned) {
let _ = exit_code;
if matches!(tool_name, "terminal" | "run_command") && cleaned.len() > 500 {
return None;
}
return Some(classify_text_error(&lower));
}
if lower == "null" {
return Some(ToolFailureClass::Semantic);
}
if looks_like_error_signal(&lower) {
if matches!(tool_name, "terminal" | "run_command") && cleaned.len() > 500 {
return None;
}
return Some(classify_text_error(&lower));
}
None
}
pub(super) fn classify_tool_result_failure_with_args(
tool_name: &str,
result_text: &str,
tool_arguments: Option<&str>,
) -> Option<ToolFailureClass> {
if tool_name == "manage_memories"
&& manage_memories_list_output_is_report(result_text, tool_arguments)
{
return None;
}
let classified = classify_tool_result_failure(tool_name, result_text);
if classified.is_some() {
return classified;
}
if !matches!(tool_name, "terminal" | "run_command")
|| !looks_like_empty_tool_output(result_text)
{
return None;
}
let command = tool_arguments.and_then(extract_command_from_args)?;
let hinted_status = extract_httpbin_status_hint_from_command(&command)?;
classify_http_status(hinted_status)
}
pub(super) fn classify_tool_result_failure_with_context(
tool_name: &str,
result_text: &str,
tool_arguments: Option<&str>,
metadata: Option<&crate::traits::ToolCallMetadata>,
) -> Option<ToolFailureClass> {
if let Some(meta) = metadata {
if let Some(ref transport_err) = meta.transport_error {
let lower_err = transport_err.to_ascii_lowercase();
let is_transport = contains_any(
&lower_err,
&[
"timed out",
"timeout",
"connection refused",
"connection reset",
"broken pipe",
"network",
"rate limit",
"429",
"503",
"502",
"504",
"econnrefused",
"econnreset",
"etimedout",
"ehostunreach",
"dns",
],
);
if is_transport {
return Some(ToolFailureClass::Transient);
}
}
if meta.timed_out {
return Some(ToolFailureClass::Transient);
}
if let Some(code) = meta.exit_code {
if code != 0 {
let is_test_runner = tool_arguments.is_some_and(|args| {
let lower = args.to_ascii_lowercase();
lower.contains("test")
|| lower.contains("pytest")
|| lower.contains("jest")
|| lower.contains("mocha")
|| lower.contains("rspec")
|| lower.contains("cargo test")
|| lower.contains("go test")
|| lower.contains("phpunit")
|| lower.contains("unittest")
|| lower.contains("npm run test")
|| lower.contains("yarn test")
|| lower.contains("make test")
});
if !is_test_runner {
return Some(ToolFailureClass::Semantic);
}
}
if matches!(tool_name, "terminal" | "run_command") {
return None;
}
}
if let Some(status) = meta.http_status {
if let Some(kind) = classify_http_status(status) {
return Some(kind);
}
if matches!(tool_name, "http_request" | "web_fetch") {
return None;
}
}
}
classify_tool_result_failure_with_args(tool_name, result_text, tool_arguments)
}
pub(super) fn classify_execution_failure_kind(
tool_name: &str,
result_text: &str,
tool_arguments: Option<&str>,
metadata: Option<&crate::traits::ToolCallMetadata>,
had_deterministic_contract_violation: bool,
) -> Option<ExecutionFailureKind> {
if had_deterministic_contract_violation {
return Some(ExecutionFailureKind::ToolContractFailure);
}
let lower = strip_appended_diagnostics(result_text).to_ascii_lowercase();
if looks_like_tool_contract_error(&lower) {
return Some(ExecutionFailureKind::ToolContractFailure);
}
if looks_like_environment_error(&lower) {
return Some(ExecutionFailureKind::EnvironmentFailure);
}
if metadata.is_some_and(metadata_indicates_tool_invocation_failure) {
return Some(ExecutionFailureKind::ToolInvocationFailure);
}
classify_tool_result_failure_with_context(tool_name, result_text, tool_arguments, metadata).map(
|failure_class| match failure_class {
ToolFailureClass::Transient => ExecutionFailureKind::ToolInvocationFailure,
ToolFailureClass::Semantic => ExecutionFailureKind::LogicFailure,
},
)
}
pub(super) fn extract_key_error_line(error_text: &str) -> String {
let cleaned = strip_appended_diagnostics(error_text);
if cleaned.is_empty() {
return String::new();
}
const PATTERNS: &[&str] = &[
"error:",
"Error:",
"ERROR:",
"fatal",
"FATAL",
"unable to",
"Unable to",
"command not found",
"not found",
"does not exist",
"No such",
"cannot",
"Cannot",
"denied",
"Denied",
"failed",
"Failed",
];
for line in cleaned.lines() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
for pattern in PATTERNS {
if trimmed.contains(pattern) {
let truncated: String = trimmed.chars().take(200).collect();
return truncated;
}
}
}
for line in cleaned.lines() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
for prefix in &["missing", "Missing"] {
if let Some(idx) = trimmed.find(prefix) {
let after = idx + prefix.len();
if after >= trimmed.len() {
let truncated: String = trimmed.chars().take(200).collect();
return truncated;
}
let next_char = trimmed.as_bytes()[after];
if next_char == b' ' || next_char == b':' || next_char == b',' {
let truncated: String = trimmed.chars().take(200).collect();
return truncated;
}
}
}
}
for line in cleaned.lines() {
let trimmed = line.trim();
if !trimmed.is_empty() {
let truncated: String = trimmed.chars().take(200).collect();
return truncated;
}
}
String::new()
}
pub(super) fn build_task_boundary_hint(user_text: &str, max_chars: usize) -> String {
let mut compact = String::new();
let mut last_was_space = false;
for ch in user_text.chars() {
let normalized = match ch {
'\n' | '\r' | '\t' => ' ',
'"' | '\'' | '`' | '[' | ']' | '{' | '}' | '<' | '>' => ' ',
_ if ch.is_control() => continue,
_ => ch,
};
if normalized.is_whitespace() {
if !last_was_space {
compact.push(' ');
last_was_space = true;
}
} else {
compact.push(normalized);
last_was_space = false;
}
}
let compact = compact.trim();
if compact.is_empty() {
return "No user-request summary available".to_string();
}
let mut out: String = compact.chars().take(max_chars).collect();
if compact.chars().count() > max_chars {
out.push_str("...");
}
out
}
pub(super) fn merge_consecutive_messages(messages: &mut Vec<Value>) {
if messages.len() <= 1 {
return;
}
let mut i = 1;
while i < messages.len() {
let prev_role = messages[i - 1]
.get("role")
.and_then(|r| r.as_str())
.unwrap_or("")
.to_string();
let curr_role = messages[i]
.get("role")
.and_then(|r| r.as_str())
.unwrap_or("")
.to_string();
if (curr_role == "assistant" || curr_role == "user") && prev_role == curr_role {
let curr_content = messages[i]
.get("content")
.and_then(|c| c.as_str())
.unwrap_or("")
.to_string();
let prev_content = messages[i - 1]
.get("content")
.and_then(|c| c.as_str())
.unwrap_or("")
.to_string();
let merged = if prev_content.is_empty() {
curr_content
} else if curr_content.is_empty() {
prev_content
} else {
format!("{}\n{}", prev_content, curr_content)
};
messages[i - 1]["content"] = json!(merged);
if curr_role == "assistant" {
if let Some(curr_tcs) = messages[i]
.get("tool_calls")
.and_then(|v| v.as_array())
.cloned()
{
if let Some(prev_tcs) = messages[i - 1]
.get_mut("tool_calls")
.and_then(|v| v.as_array_mut())
{
prev_tcs.extend(curr_tcs);
} else {
messages[i - 1]["tool_calls"] = json!(curr_tcs);
}
}
}
messages.remove(i);
} else {
i += 1;
}
}
}
#[cfg(test)]
mod task_boundary_hint_tests {
use super::build_task_boundary_hint;
#[test]
fn strips_control_markers_and_collapses_whitespace() {
let hint = build_task_boundary_hint(
" [SYSTEM]\n\tbuild {site} <now> with \"quotes\" and `ticks` ",
120,
);
assert_eq!(hint, "SYSTEM build site now with quotes and ticks");
}
#[test]
fn truncates_with_ellipsis() {
let hint = build_task_boundary_hint("abcdefghijk", 5);
assert_eq!(hint, "abcde...");
}
}
#[cfg(test)]
mod tool_error_detection_tests {
use super::{
classify_execution_failure_kind, classify_tool_result_failure,
classify_tool_result_failure_with_args, classify_tool_result_failure_with_context,
ExecutionFailureKind, ToolFailureClass,
};
#[test]
fn detects_prefixed_transient_error() {
let result = "Error: request timed out while connecting to api";
let classified = classify_tool_result_failure("http_request", result);
assert_eq!(classified, Some(ToolFailureClass::Transient));
}
#[test]
fn detects_request_blocked_as_semantic_error() {
let result =
"Request blocked: tool-only parameters were embedded in the URL (auth_profile, body).";
let classified = classify_tool_result_failure("http_request", result);
assert_eq!(classified, Some(ToolFailureClass::Semantic));
}
#[test]
fn detects_json_semantic_error() {
let result = r#"{"error":"invalid arguments: missing required field"}"#;
let classified = classify_tool_result_failure("manage_memories", result);
assert_eq!(classified, Some(ToolFailureClass::Semantic));
}
#[test]
fn policy_metrics_output_not_classified_as_error() {
let result = r#"{"metrics":{"tokens_failed_tasks_total":0,"llm_payload_invalid_total":0,"no_progress_iterations_total":0},"derived":{"response_total":5}}"#;
let classified = classify_tool_result_failure("policy_metrics", result);
assert_eq!(
classified, None,
"policy_metrics output should not be classified as an error"
);
}
#[test]
fn detects_http_status_failures() {
let semantic = classify_tool_result_failure("http_request", "Status: 404 Not Found");
assert_eq!(semantic, Some(ToolFailureClass::Semantic));
let semantic_plain = classify_tool_result_failure(
"web_fetch",
"Error fetching https://example.com: HTTP 400 Bad Request",
);
assert_eq!(semantic_plain, Some(ToolFailureClass::Semantic));
let transient =
classify_tool_result_failure("http_request", "HTTP/1.1 503 Service Unavailable");
assert_eq!(transient, Some(ToolFailureClass::Transient));
let transient_h2 = classify_tool_result_failure("http_request", "HTTP/2 503");
assert_eq!(transient_h2, Some(ToolFailureClass::Transient));
let transient_attempt = classify_tool_result_failure("terminal", "Attempt 2: 503");
assert_eq!(transient_attempt, Some(ToolFailureClass::Transient));
let semantic_attempt = classify_tool_result_failure("terminal", "Attempt 1: 404");
assert_eq!(semantic_attempt, Some(ToolFailureClass::Semantic));
}
#[test]
fn treats_session_scoped_unavailability_as_semantic_error() {
let result = "Filesystem MCP server is temporarily unavailable in this session.";
let classified = classify_tool_result_failure("manage_skills", result);
assert_eq!(classified, Some(ToolFailureClass::Semantic));
let failure_kind =
classify_execution_failure_kind("manage_skills", result, None, None, false);
assert_eq!(failure_kind, Some(ExecutionFailureKind::LogicFailure));
}
#[test]
fn treats_file_lookup_miss_as_transient_for_file_tools() {
let classified = classify_tool_result_failure(
"read_file",
"Error: ENOENT: no such file or directory, open '/tmp/missing.txt'",
);
assert_eq!(classified, Some(ToolFailureClass::Transient));
}
#[test]
fn keeps_non_file_tool_enoent_as_semantic() {
let classified = classify_tool_result_failure(
"terminal",
"Error: ENOENT: no such file or directory, open '/tmp/missing.txt'",
);
assert_eq!(classified, Some(ToolFailureClass::Semantic));
}
#[test]
fn infers_httpbin_status_for_empty_curl_output() {
let args = r#"{"action":"run","command":"curl -s https://httpbin.org/status/503"}"#;
let classified =
classify_tool_result_failure_with_args("terminal", "(no output)", Some(args));
assert_eq!(classified, Some(ToolFailureClass::Transient));
}
#[test]
fn manage_memories_list_output_is_not_classified_as_error() {
let args = r#"{"action":"list"}"#;
let result = "**Stored Memories** (showing 2 of 2 facts)\n\n- **api_auth_resolution**: Uses manage_oauth to resolve HTTP 401 Unauthorized errors";
let classified =
classify_tool_result_failure_with_args("manage_memories", result, Some(args));
assert_eq!(classified, None);
}
#[test]
fn does_not_infer_httpbin_status_when_output_is_non_empty() {
let args = r#"{"action":"run","command":"curl -s https://httpbin.org/status/503"}"#;
let classified = classify_tool_result_failure_with_args("terminal", "ok", Some(args));
assert_eq!(classified, None);
}
#[test]
fn does_not_infer_non_httpbin_status_urls() {
let args = r#"{"action":"run","command":"curl -s https://example.com/status/503"}"#;
let classified =
classify_tool_result_failure_with_args("terminal", "(no output)", Some(args));
assert_eq!(classified, None);
}
#[test]
fn detects_nonzero_exit_without_prefix() {
let classified = classify_tool_result_failure("run_command", "$ make test (exit: 2, 22ms)");
assert_eq!(classified, Some(ToolFailureClass::Semantic));
}
#[test]
fn detects_terminal_style_exit_code_marker() {
let classified =
classify_tool_result_failure("terminal", "[Process pid=123 finished]\n[exit code: 42]");
assert_eq!(classified, Some(ToolFailureClass::Semantic));
}
#[test]
fn detects_embedded_json_error_payload_inside_wrapper_text() {
let wrapped = "[UNTRUSTED EXTERNAL DATA from 'web_fetch']\n{\"error\":\"not found\",\"status\":404}\n[END UNTRUSTED EXTERNAL DATA]";
let classified = classify_tool_result_failure("web_fetch", wrapped);
assert_eq!(classified, Some(ToolFailureClass::Semantic));
}
#[test]
fn detects_embedded_json_error_payload_with_command_header() {
let result =
"$ curl -s https://api.example.com (exit: 0, 9ms)\n\n{\"error\":\"not found\"}";
let classified = classify_tool_result_failure("run_command", result);
assert_eq!(classified, Some(ToolFailureClass::Semantic));
}
#[test]
fn does_not_flag_success_output() {
let classified = classify_tool_result_failure("terminal", "$ cargo test (exit: 0, 1.2s)");
assert_eq!(classified, None);
}
#[test]
fn does_not_flag_generic_connection_success_text() {
let classified = classify_tool_result_failure(
"terminal",
"Connection established successfully to upstream service",
);
assert_eq!(classified, None);
}
#[test]
fn does_not_flag_generic_not_found_summary_text() {
let classified = classify_tool_result_failure(
"search_files",
"Search complete: 24 files scanned, 0 patterns not found",
);
assert_eq!(classified, None);
}
#[test]
fn structured_exit_code_zero_prevents_terminal_text_false_positive() {
let metadata = crate::traits::ToolCallMetadata {
exit_code: Some(0),
..Default::default()
};
let classified = classify_tool_result_failure_with_context(
"terminal",
"pytest summary includes: error: expected failure pattern",
None,
Some(&metadata),
);
assert_eq!(classified, None);
}
#[test]
fn structured_exit_code_nonzero_marks_terminal_failure() {
let metadata = crate::traits::ToolCallMetadata {
exit_code: Some(2),
..Default::default()
};
let classified = classify_tool_result_failure_with_context(
"terminal",
"all good text otherwise",
None,
Some(&metadata),
);
assert_eq!(classified, Some(ToolFailureClass::Semantic));
}
#[test]
fn transport_error_semantic_not_classified_as_transient() {
let metadata = crate::traits::ToolCallMetadata {
transport_error: Some("Path is a directory, not a file: /tmp/test86".to_string()),
..Default::default()
};
let classified = classify_tool_result_failure_with_context(
"read_file",
"Error: Path is a directory, not a file: /tmp/test86",
None,
Some(&metadata),
);
assert_eq!(classified, Some(ToolFailureClass::Semantic));
}
#[test]
fn transport_error_actual_timeout_still_transient() {
let metadata = crate::traits::ToolCallMetadata {
transport_error: Some("connection refused".to_string()),
..Default::default()
};
let classified = classify_tool_result_failure_with_context(
"web_fetch",
"Error: connection refused",
None,
Some(&metadata),
);
assert_eq!(classified, Some(ToolFailureClass::Transient));
}
#[test]
fn transport_error_file_not_found_not_transient() {
let metadata = crate::traits::ToolCallMetadata {
transport_error: Some("File not found: /tmp/missing.txt".to_string()),
..Default::default()
};
let classified = classify_tool_result_failure_with_context(
"read_file",
"Error: File not found: /tmp/missing.txt",
None,
Some(&metadata),
);
assert_eq!(classified, Some(ToolFailureClass::Transient));
}
#[test]
fn execution_failure_kind_detects_tool_contract_errors() {
let classified = classify_execution_failure_kind(
"manage_memories",
r#"{"error":"invalid arguments: missing required field"}"#,
None,
None,
false,
);
assert_eq!(classified, Some(ExecutionFailureKind::ToolContractFailure));
}
#[test]
fn execution_failure_kind_detects_environment_errors() {
let classified = classify_execution_failure_kind(
"read_file",
"Error: File not found: /tmp/missing.txt",
None,
None,
false,
);
assert_eq!(classified, Some(ExecutionFailureKind::EnvironmentFailure));
}
#[test]
fn execution_failure_kind_detects_transport_failures() {
let metadata = crate::traits::ToolCallMetadata {
transport_error: Some("connection refused".to_string()),
..Default::default()
};
let classified = classify_execution_failure_kind(
"web_fetch",
"Error: connection refused",
None,
Some(&metadata),
false,
);
assert_eq!(
classified,
Some(ExecutionFailureKind::ToolInvocationFailure)
);
}
#[test]
fn execution_failure_kind_detects_logic_failures() {
let metadata = crate::traits::ToolCallMetadata {
exit_code: Some(2),
..Default::default()
};
let classified = classify_execution_failure_kind(
"terminal",
"tests failed",
None,
Some(&metadata),
false,
);
assert_eq!(classified, Some(ExecutionFailureKind::LogicFailure));
}
#[test]
fn classify_uses_structured_http_status_over_text_scraping() {
let meta = crate::traits::ToolCallMetadata {
http_status: Some(403),
..Default::default()
};
let result = classify_tool_result_failure_with_context(
"http_request",
"some response body without status line",
None,
Some(&meta),
);
assert_eq!(result, Some(ToolFailureClass::Semantic));
}
#[test]
fn classify_structured_http_status_transient() {
let meta = crate::traits::ToolCallMetadata {
http_status: Some(429),
..Default::default()
};
let result = classify_tool_result_failure_with_context(
"http_request",
"rate limited",
None,
Some(&meta),
);
assert_eq!(result, Some(ToolFailureClass::Transient));
}
#[test]
fn classify_structured_http_200_is_not_error() {
let meta = crate::traits::ToolCallMetadata {
http_status: Some(200),
..Default::default()
};
let result = classify_tool_result_failure_with_context(
"http_request",
"success body",
None,
Some(&meta),
);
assert_eq!(result, None);
}
}
pub(super) fn fixup_message_ordering(messages: &mut Vec<Value>) {
merge_consecutive_messages(messages);
let assistant_tool_call_ids: std::collections::HashSet<String> = messages
.iter()
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("assistant"))
.filter_map(|m| m.get("tool_calls"))
.filter_map(|tcs| tcs.as_array())
.flat_map(|arr| arr.iter())
.filter_map(|tc| tc.get("id").and_then(|id| id.as_str()))
.map(|s| s.to_string())
.collect();
messages.retain(|m| {
if m.get("role").and_then(|r| r.as_str()) != Some("tool") {
return true;
}
let tc_id = m
.get("tool_call_id")
.and_then(|id| id.as_str())
.unwrap_or("");
if assistant_tool_call_ids.contains(tc_id) {
true
} else {
warn!(tool_call_id = tc_id, "Dropping orphaned tool message");
false
}
});
let tool_result_ids: std::collections::HashSet<String> = messages
.iter()
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("tool"))
.filter_map(|m| m.get("tool_call_id").and_then(|id| id.as_str()))
.map(|s| s.to_string())
.collect();
for m in messages.iter_mut() {
if m.get("role").and_then(|r| r.as_str()) != Some("assistant") {
continue;
}
if let Some(tcs) = m.get("tool_calls").and_then(|v| v.as_array()).cloned() {
let kept: Vec<Value> = tcs
.into_iter()
.filter(|tc| {
tc.get("id")
.and_then(|id| id.as_str())
.is_some_and(|id| tool_result_ids.contains(id))
})
.collect();
if kept.is_empty() {
m.as_object_mut().map(|o| o.remove("tool_calls"));
} else {
m["tool_calls"] = Value::Array(kept);
}
}
}
messages.retain(|m| {
if m.get("role").and_then(|r| r.as_str()) != Some("assistant") {
return true;
}
let has_content = m
.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| !s.is_empty());
let has_tool_calls = m
.get("tool_calls")
.and_then(|v| v.as_array())
.is_some_and(|a| !a.is_empty());
has_content || has_tool_calls
});
merge_consecutive_messages(messages);
if let Some(first_non_system) = messages
.iter()
.position(|m| m.get("role").and_then(|r| r.as_str()) != Some("system"))
{
let first_role = messages[first_non_system]
.get("role")
.and_then(|r| r.as_str())
.unwrap_or("");
if first_role != "user" {
if let Some(first_user_rel) = messages[first_non_system..]
.iter()
.position(|m| m.get("role").and_then(|r| r.as_str()) == Some("user"))
{
let abs_end = first_non_system + first_user_rel;
warn!(
dropped = abs_end - first_non_system,
"Dropping leading non-user messages to satisfy provider ordering"
);
messages.drain(first_non_system..abs_end);
}
}
}
let mut i = 1;
while i < messages.len() {
let prev_role = messages[i - 1]
.get("role")
.and_then(|r| r.as_str())
.unwrap_or("");
let curr_role = messages[i]
.get("role")
.and_then(|r| r.as_str())
.unwrap_or("");
let curr_has_tc = messages[i]
.get("tool_calls")
.and_then(|v| v.as_array())
.is_some_and(|a| !a.is_empty());
if prev_role == "assistant" && curr_role == "assistant" && curr_has_tc {
warn!(
"Pass 5: Found consecutive assistant messages, merging to satisfy Gemini constraint"
);
let curr_content = messages[i]
.get("content")
.and_then(|c| c.as_str())
.unwrap_or("")
.to_string();
let prev_content = messages[i - 1]
.get("content")
.and_then(|c| c.as_str())
.unwrap_or("")
.to_string();
if !curr_content.is_empty() {
let merged = if prev_content.is_empty() {
curr_content
} else {
format!("{}\n{}", prev_content, curr_content)
};
messages[i - 1]["content"] = json!(merged);
}
if let Some(curr_tcs) = messages[i]
.get("tool_calls")
.and_then(|v| v.as_array())
.cloned()
{
if let Some(prev_tcs) = messages[i - 1]
.get_mut("tool_calls")
.and_then(|v| v.as_array_mut())
{
prev_tcs.extend(curr_tcs);
} else {
messages[i - 1]["tool_calls"] = json!(curr_tcs);
}
}
messages.remove(i);
} else {
i += 1;
}
}
let mut i = 1;
while i < messages.len() {
let curr_role = messages[i]
.get("role")
.and_then(|r| r.as_str())
.unwrap_or("");
let curr_has_tc = messages[i]
.get("tool_calls")
.and_then(|v| v.as_array())
.is_some_and(|a| !a.is_empty());
if curr_role == "assistant" && curr_has_tc {
let prev_role = messages[i - 1]
.get("role")
.and_then(|r| r.as_str())
.unwrap_or("");
if prev_role != "user" && prev_role != "tool" {
warn!(
prev_role,
"Pass 6: Stripping tool_calls from assistant that doesn't follow user/tool"
);
messages[i].as_object_mut().map(|o| o.remove("tool_calls"));
let has_content = messages[i]
.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| !s.is_empty());
if !has_content {
messages.remove(i);
continue;
}
}
}
i += 1;
}
}
pub(super) fn collapse_repeated_tool_errors(messages: &mut [Value]) -> usize {
let Some(boundary) = messages
.iter()
.rposition(|m| m.get("role").and_then(|r| r.as_str()) == Some("user"))
else {
return 0;
};
let mut collapsed = 0usize;
let mut last_error_idx_by_tool: HashMap<String, usize> = HashMap::new();
for idx in boundary.saturating_add(1)..messages.len() {
let role = messages[idx]
.get("role")
.and_then(|r| r.as_str())
.unwrap_or("");
if role != "tool" {
continue;
}
let name = messages[idx]
.get("name")
.and_then(|n| n.as_str())
.unwrap_or("")
.to_string();
if name.is_empty() {
continue;
}
let content = messages[idx]
.get("content")
.and_then(|c| c.as_str())
.unwrap_or("");
let is_error = classify_tool_result_failure(&name, content).is_some();
if is_error {
if let Some(prev_idx) = last_error_idx_by_tool.insert(name.clone(), idx) {
messages[prev_idx]["content"] = json!(format!(
"Error: (previous {} error collapsed; see the most recent {} error for details)",
name, name
));
collapsed += 1;
}
} else {
last_error_idx_by_tool.remove(&name);
}
}
collapsed
}
pub(super) fn extract_command_from_args(args_json: &str) -> Option<String> {
serde_json::from_str::<Value>(args_json)
.ok()
.and_then(|v| v.get("command")?.as_str().map(String::from))
}
pub(super) fn extract_file_path_from_args(args_json: &str) -> Option<String> {
let v: Value = serde_json::from_str(args_json).ok()?;
for key in ["file_path", "path", "file", "filename"] {
if let Some(s) = v.get(key).and_then(|x| x.as_str()) {
return Some(s.to_string());
}
}
None
}
pub(super) fn extract_send_file_dedupe_key_from_args(args_json: &str) -> Option<String> {
let parsed = serde_json::from_str::<Value>(args_json).ok()?;
let file_path = parsed.get("file_path")?.as_str()?.trim();
if file_path.is_empty() {
return None;
}
let expanded_path = shellexpand::tilde(file_path).to_string();
let caption = parsed
.get("caption")
.and_then(|v| v.as_str())
.unwrap_or("")
.trim();
Some(format!("{}|{}", expanded_path, caption))
}
pub(super) fn is_trigger_session(session_id: &str) -> bool {
session_id.contains("trigger") || session_id.starts_with("event_")
}
pub(super) fn hash_tool_call(name: &str, arguments: &str) -> u64 {
let mut hasher = std::collections::hash_map::DefaultHasher::new();
name.hash(&mut hasher);
arguments.hash(&mut hasher);
hasher.finish()
}
#[cfg(test)]
mod extract_key_error_line_tests {
use super::extract_key_error_line;
#[test]
fn finds_error_patterns() {
let drupal_error = "$ ddev drush en entity_reference (exit: 1, 3.2s)\n\n\
Unable to install modules entity_reference due to missing modules entity_reference.";
let result = extract_key_error_line(drupal_error);
assert!(result.contains("Unable to install modules entity_reference"));
let not_found = "bash: drush: command not found";
assert!(extract_key_error_line(not_found).contains("command not found"));
let error_prefix = "Error: ENOENT: no such file or directory, open '/app/config.json'";
assert!(extract_key_error_line(error_prefix).contains("Error: ENOENT"));
}
#[test]
fn strips_coaching_markers() {
let with_coaching = "Unable to install modules entity_reference\n\n\
[SYSTEM] This tool has errored 2 semantic times. Do NOT retry it.\n\n\
[DIAGNOSTIC] Similar errors resolved before:\n- Try another approach";
let result = extract_key_error_line(with_coaching);
assert!(result.contains("Unable to install modules"));
assert!(!result.contains("[SYSTEM]"));
assert!(!result.contains("[DIAGNOSTIC]"));
}
#[test]
fn fallback_first_line() {
let no_pattern = "Some unexpected output that doesn't match patterns";
let result = extract_key_error_line(no_pattern);
assert_eq!(result, "Some unexpected output that doesn't match patterns");
}
#[test]
fn empty_input() {
assert!(extract_key_error_line("").is_empty());
assert!(extract_key_error_line(" ").is_empty());
assert!(extract_key_error_line("\n\n[SYSTEM] coaching message only").is_empty());
}
#[test]
fn missing_pattern_matches_with_boundary() {
let with_colon = "missing: required field 'name'";
assert!(extract_key_error_line(with_colon).contains("missing:"));
let with_space = "missing modules entity_reference";
assert!(extract_key_error_line(with_space).contains("missing modules"));
}
#[test]
fn truncates_long_lines() {
let long_error = format!("Error: {}", "x".repeat(300));
let result = extract_key_error_line(&long_error);
assert!(result.len() <= 200);
}
}
#[cfg(test)]
#[path = "message_ordering_tests.rs"]
mod message_ordering_tests;