use crate::providers::{ChatMessage, ToolCall};
pub const AUTO_COMPACT_THRESHOLD: usize = 85;
pub const CONTEXT_WARN_THRESHOLD: usize = 80;
pub const CHARS_PER_TOKEN: f64 = 3.5;
pub const PER_MESSAGE_OVERHEAD: usize = 10;
pub const SYSTEM_PROMPT_OVERHEAD: usize = 100;
pub fn estimate_tokens(messages: &[ChatMessage]) -> usize {
messages
.iter()
.map(|m| {
let content_len = m.content.as_deref().map_or(0, |c| c.len());
let tc_len = m
.tool_calls
.as_ref()
.map_or(0, |tc| serde_json::to_string(tc).map_or(0, |s| s.len()));
((content_len + tc_len) as f64 / CHARS_PER_TOKEN) as usize + PER_MESSAGE_OVERHEAD
})
.sum()
}
pub const INTERRUPTED_TURN_SENTINEL: &str = "[Turn interrupted — pick up from where you left off.]";
pub fn assemble_messages(
system_message: &ChatMessage,
history: &[crate::db::Message],
) -> Vec<ChatMessage> {
let mut messages = vec![system_message.clone()];
for msg in history {
let role = msg.role.as_str();
let is_plain_user = role == "user" && msg.tool_call_id.is_none();
if is_plain_user {
let prev_is_user_side = messages
.last()
.is_some_and(|p| p.role == "user" || p.role == "tool");
if prev_is_user_side {
messages.push(ChatMessage::text("assistant", INTERRUPTED_TURN_SENTINEL));
}
}
let tool_calls: Option<Vec<ToolCall>> = msg
.tool_calls
.as_deref()
.and_then(|tc| serde_json::from_str(tc).ok());
messages.push(ChatMessage {
role: role.to_string(),
content: msg.content.clone(),
tool_calls,
tool_call_id: msg.tool_call_id.clone(),
images: None,
});
}
messages
}
pub fn is_server_error(err: &anyhow::Error) -> bool {
let msg = format!("{err:#}").to_lowercase();
msg.contains("500")
|| msg.contains("502")
|| msg.contains("503")
|| msg.contains("internal server error")
|| msg.contains("bad gateway")
|| msg.contains("service unavailable")
}
pub fn is_rate_limit_error(err: &anyhow::Error) -> bool {
let msg = format!("{err:#}").to_lowercase();
msg.contains("429")
|| msg.contains("529") || msg.contains("rate limit")
|| msg.contains("rate_limit")
|| msg.contains("too many requests")
|| msg.contains("quota exceeded")
|| msg.contains("overloaded") }
pub const RATE_LIMIT_MAX_RETRIES: u32 = 5;
pub fn rate_limit_backoff(attempt: u32) -> std::time::Duration {
let secs = 2u64.pow(attempt).min(32);
std::time::Duration::from_secs(secs)
}
pub fn is_context_overflow_error(err: &anyhow::Error) -> bool {
let msg = format!("{err:#}").to_lowercase();
msg.contains("too long")
|| msg.contains("context_length_exceeded")
|| msg.contains("maximum context length")
|| msg.contains("token limit")
|| msg.contains("exceeds the model")
|| msg.contains("request too large")
|| (msg.contains("413") && msg.contains("too large"))
}
pub fn is_image_rejection_error(err: &anyhow::Error) -> bool {
let msg = format!("{err:#}").to_lowercase();
(msg.contains("image") && (msg.contains("support") || msg.contains("invalid")))
|| (msg.contains("vision")
&& (msg.contains("support") || msg.contains("not") || msg.contains("unavailable")))
|| (msg.contains("multimodal")
&& (msg.contains("support") || msg.contains("not") || msg.contains("unavailable")))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::persistence::{Message, Role};
fn msg(role: &str, content: Option<&str>, tool_call_id: Option<&str>) -> Message {
Message {
id: 0,
session_id: String::new(),
role: role.parse().unwrap_or(Role::User),
content: content.map(Into::into),
full_content: None,
tool_calls: None,
tool_call_id: tool_call_id.map(Into::into),
prompt_tokens: None,
completion_tokens: None,
cache_read_tokens: None,
cache_creation_tokens: None,
thinking_tokens: None,
thinking_content: None,
created_at: None,
}
}
fn system() -> ChatMessage {
ChatMessage::text("system", "You are helpful.")
}
#[test]
fn no_sentinel_for_clean_conversation() {
let history = vec![
msg("user", Some("hello"), None),
msg("assistant", Some("hi!"), None),
msg("user", Some("refactor X"), None),
msg("assistant", Some("done"), None),
];
let out = assemble_messages(&system(), &history);
assert_eq!(out.len(), 5, "no sentinel expected; got {out:?}");
assert!(
out.iter()
.all(|m| m.content.as_deref() != Some(INTERRUPTED_TURN_SENTINEL)),
"sentinel must not appear in clean conversation",
);
}
#[test]
fn sentinel_injected_for_user_after_user() {
let history = vec![
msg("user", Some("refactor X"), None),
msg("user", Some("continue"), None),
];
let out = assemble_messages(&system(), &history);
assert_eq!(out.len(), 4, "expected sentinel; got {out:?}");
assert_eq!(out[2].role, "assistant");
assert_eq!(out[2].content.as_deref(), Some(INTERRUPTED_TURN_SENTINEL));
assert_eq!(out[3].content.as_deref(), Some("continue"));
}
#[test]
fn sentinel_injected_for_user_after_tool_result() {
let history = vec![
msg("user", Some("read the file"), None),
msg("assistant", Some("sure"), None),
msg("tool", Some("file contents"), Some("tc_1")),
msg("user", Some("continue"), None),
];
let out = assemble_messages(&system(), &history);
assert_eq!(
out.len(),
6,
"expected sentinel after tool result; got {out:?}"
);
assert_eq!(out[4].role, "assistant");
assert_eq!(out[4].content.as_deref(), Some(INTERRUPTED_TURN_SENTINEL));
}
#[test]
fn no_sentinel_before_tool_result() {
let history = vec![
msg("user", Some("read it"), None),
msg("assistant", Some("ok"), None),
msg("tool", Some("contents"), Some("tc_1")),
];
let out = assemble_messages(&system(), &history);
assert_eq!(out.len(), 4);
assert!(
out.iter().all(|m| m.role != "assistant"
|| m.content.as_deref() != Some(INTERRUPTED_TURN_SENTINEL))
);
}
#[test]
fn no_sentinel_between_consecutive_tool_results() {
let history = vec![
msg("user", Some("do stuff"), None),
msg("assistant", Some("calling tools"), None),
msg("tool", Some("r1"), Some("tc_1")),
msg("tool", Some("r2"), Some("tc_2")),
];
let out = assemble_messages(&system(), &history);
assert_eq!(out.len(), 5); }
#[test]
fn no_sentinel_for_first_user_message() {
let history = vec![msg("user", Some("hello"), None)];
let out = assemble_messages(&system(), &history);
assert_eq!(out.len(), 2); assert_eq!(out[1].role, "user");
}
#[test]
fn test_is_context_overflow_error() {
assert!(is_context_overflow_error(&anyhow::anyhow!(
"Anthropic API returned 400: prompt is too long"
)));
assert!(is_context_overflow_error(&anyhow::anyhow!(
"context_length_exceeded: max 200000 tokens"
)));
assert!(is_context_overflow_error(&anyhow::anyhow!(
"maximum context length exceeded"
)));
assert!(is_context_overflow_error(&anyhow::anyhow!(
"request exceeds the model's input limit"
)));
assert!(!is_context_overflow_error(&anyhow::anyhow!(
"rate limit exceeded"
)));
assert!(!is_context_overflow_error(&anyhow::anyhow!(
"connection refused"
)));
}
#[test]
fn test_is_rate_limit_error() {
assert!(is_rate_limit_error(&anyhow::anyhow!(
"429 Too Many Requests"
)));
assert!(is_rate_limit_error(&anyhow::anyhow!("529 API overloaded")));
assert!(is_rate_limit_error(&anyhow::anyhow!("rate limit exceeded")));
assert!(is_rate_limit_error(&anyhow::anyhow!("rate_limit_exceeded")));
assert!(is_rate_limit_error(&anyhow::anyhow!("too many requests")));
assert!(is_rate_limit_error(&anyhow::anyhow!("quota exceeded")));
assert!(is_rate_limit_error(&anyhow::anyhow!(
"Anthropic API is overloaded"
)));
assert!(!is_rate_limit_error(&anyhow::anyhow!("prompt is too long")));
assert!(!is_rate_limit_error(&anyhow::anyhow!("connection refused")));
}
#[test]
fn test_rate_limit_backoff() {
assert_eq!(rate_limit_backoff(0).as_secs(), 1);
assert_eq!(rate_limit_backoff(1).as_secs(), 2);
assert_eq!(rate_limit_backoff(2).as_secs(), 4);
assert_eq!(rate_limit_backoff(3).as_secs(), 8);
assert_eq!(rate_limit_backoff(10).as_secs(), 32); }
#[test]
fn test_estimate_tokens() {
let messages = vec![
ChatMessage::text("system", "You are helpful."),
ChatMessage::text("user", "Hello world"),
];
let tokens = estimate_tokens(&messages);
assert!(tokens > 20 && tokens < 40, "tokens={tokens}");
}
#[test]
fn test_is_server_error_http_codes() {
for code in ["500", "502", "503"] {
let err = anyhow::anyhow!("HTTP {code} from provider");
assert!(is_server_error(&err), "{code} should be server error");
}
}
#[test]
fn test_is_server_error_text_patterns() {
let patterns = [
"internal server error",
"bad gateway",
"service unavailable",
];
for text in patterns {
let err = anyhow::anyhow!("{text}");
assert!(is_server_error(&err), "'{text}' should be server error");
}
}
#[test]
fn test_is_server_error_case_insensitive() {
let err = anyhow::anyhow!("Internal Server Error from upstream");
assert!(is_server_error(&err));
}
#[test]
fn test_is_not_server_error_for_rate_limit() {
let err = anyhow::anyhow!("429 Too Many Requests");
assert!(
!is_server_error(&err),
"rate limit should not be server error"
);
}
#[test]
fn test_is_not_server_error_for_auth() {
let err = anyhow::anyhow!("401 Unauthorized");
assert!(!is_server_error(&err));
}
#[test]
fn test_is_image_rejection_error_matches() {
assert!(is_image_rejection_error(&anyhow::anyhow!(
"LLM API returned 400: This model does not support image input"
)));
assert!(is_image_rejection_error(&anyhow::anyhow!(
"Invalid image. The model does not support vision input."
)));
assert!(is_image_rejection_error(&anyhow::anyhow!(
"multimodal content is not supported by this endpoint"
)));
assert!(is_image_rejection_error(&anyhow::anyhow!(
"Vision capability not available"
)));
assert!(is_image_rejection_error(&anyhow::anyhow!(
"400 Bad Request: Images are not supported for this model"
)));
assert!(is_image_rejection_error(&anyhow::anyhow!(
"400 Bad Request: Invalid image: unable to decode image data"
)));
}
#[test]
fn test_is_image_rejection_error_no_false_positives() {
assert!(!is_image_rejection_error(&anyhow::anyhow!(
"rate limit exceeded"
)));
assert!(!is_image_rejection_error(&anyhow::anyhow!(
"prompt is too long"
)));
assert!(!is_image_rejection_error(&anyhow::anyhow!(
"502 bad gateway"
)));
assert!(!is_image_rejection_error(&anyhow::anyhow!(
"failed to load image/png from request body"
)));
assert!(!is_image_rejection_error(&anyhow::anyhow!(
"Invalid API key for vision endpoint"
)));
assert!(!is_image_rejection_error(&anyhow::anyhow!(
"multimodal endpoint rate limit"
)));
}
}