use crate::providers::{ChatMessage, ToolCall};
pub const PREFLIGHT_COMPACT_THRESHOLD: usize = 90;
pub const CHARS_PER_TOKEN: f64 = 3.5;
pub const PER_MESSAGE_OVERHEAD: usize = 10;
pub const SYSTEM_PROMPT_OVERHEAD: usize = 100;
pub fn estimate_tokens(messages: &[ChatMessage]) -> usize {
messages
.iter()
.map(|m| {
let content_len = m.content.as_deref().map_or(0, |c| c.len());
let tc_len = m
.tool_calls
.as_ref()
.map_or(0, |tc| serde_json::to_string(tc).map_or(0, |s| s.len()));
((content_len + tc_len) as f64 / CHARS_PER_TOKEN) as usize + PER_MESSAGE_OVERHEAD
})
.sum()
}
pub fn assemble_messages(
system_message: &ChatMessage,
history: &[crate::db::Message],
) -> Vec<ChatMessage> {
let mut messages = vec![system_message.clone()];
for msg in history {
let tool_calls: Option<Vec<ToolCall>> = msg
.tool_calls
.as_deref()
.and_then(|tc| serde_json::from_str(tc).ok());
messages.push(ChatMessage {
role: msg.role.as_str().to_string(),
content: msg.content.clone(),
tool_calls,
tool_call_id: msg.tool_call_id.clone(),
images: None,
});
}
messages
}
pub fn is_rate_limit_error(err: &anyhow::Error) -> bool {
let msg = format!("{err:#}").to_lowercase();
msg.contains("429")
|| msg.contains("rate limit")
|| msg.contains("rate_limit")
|| msg.contains("too many requests")
|| msg.contains("quota exceeded")
}
pub const RATE_LIMIT_MAX_RETRIES: u32 = 5;
pub fn rate_limit_backoff(attempt: u32) -> std::time::Duration {
let secs = 2u64.pow(attempt).min(32);
std::time::Duration::from_secs(secs)
}
pub fn is_context_overflow_error(err: &anyhow::Error) -> bool {
let msg = format!("{err:#}").to_lowercase();
msg.contains("too long")
|| msg.contains("context_length_exceeded")
|| msg.contains("maximum context length")
|| msg.contains("token limit")
|| msg.contains("exceeds the model")
|| msg.contains("request too large")
|| (msg.contains("413") && msg.contains("too large"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_context_overflow_error() {
assert!(is_context_overflow_error(&anyhow::anyhow!(
"Anthropic API returned 400: prompt is too long"
)));
assert!(is_context_overflow_error(&anyhow::anyhow!(
"context_length_exceeded: max 200000 tokens"
)));
assert!(is_context_overflow_error(&anyhow::anyhow!(
"maximum context length exceeded"
)));
assert!(is_context_overflow_error(&anyhow::anyhow!(
"request exceeds the model's input limit"
)));
assert!(!is_context_overflow_error(&anyhow::anyhow!(
"rate limit exceeded"
)));
assert!(!is_context_overflow_error(&anyhow::anyhow!(
"connection refused"
)));
}
#[test]
fn test_is_rate_limit_error() {
assert!(is_rate_limit_error(&anyhow::anyhow!(
"429 Too Many Requests"
)));
assert!(is_rate_limit_error(&anyhow::anyhow!("rate limit exceeded")));
assert!(is_rate_limit_error(&anyhow::anyhow!("rate_limit_exceeded")));
assert!(is_rate_limit_error(&anyhow::anyhow!("too many requests")));
assert!(is_rate_limit_error(&anyhow::anyhow!("quota exceeded")));
assert!(!is_rate_limit_error(&anyhow::anyhow!("prompt is too long")));
assert!(!is_rate_limit_error(&anyhow::anyhow!("connection refused")));
}
#[test]
fn test_rate_limit_backoff() {
assert_eq!(rate_limit_backoff(0).as_secs(), 1);
assert_eq!(rate_limit_backoff(1).as_secs(), 2);
assert_eq!(rate_limit_backoff(2).as_secs(), 4);
assert_eq!(rate_limit_backoff(3).as_secs(), 8);
assert_eq!(rate_limit_backoff(10).as_secs(), 32); }
#[test]
fn test_estimate_tokens() {
let messages = vec![
ChatMessage::text("system", "You are helpful."),
ChatMessage::text("user", "Hello world"),
];
let tokens = estimate_tokens(&messages);
assert!(tokens > 20 && tokens < 40, "tokens={tokens}");
}
}