use crate::messages::AssistantMessage;
const OVERFLOW_PATTERNS: &[&str] = &[
"prompt is too long", "request_too_large", "input is too long for requested model", "exceeds the context window", "exceeds the maximum number of tokens", "maximum prompt length", "reduce the length of the messages", "maximum context length", "exceeds the limit of", "exceeds the available context size", "greater than the context length", "context window exceeds limit", "exceeded model token limit", "too large for model with", "model_context_window_exceeded", "prompt too long", "context_length_exceeded", "context length exceeded", "too many tokens", "token limit exceeded", ];
const NON_OVERFLOW_PATTERNS: &[&str] = &[
"Throttling error:", "Service unavailable:", "rate limit", "too many requests", ];
pub fn is_context_overflow(message: &AssistantMessage, context_window: Option<usize>) -> bool {
if message.stop_reason == crate::types::StopReason::Error {
if let Some(ref error_msg) = message.error_message {
let is_non_overflow = NON_OVERFLOW_PATTERNS
.iter()
.any(|p: &&str| error_msg.contains(p));
if !is_non_overflow {
let is_overflow = OVERFLOW_PATTERNS
.iter()
.any(|p: &&str| error_msg.contains(p));
if is_overflow {
return true;
}
}
if (error_msg.contains("400") || error_msg.contains("413"))
&& (error_msg.contains("no body") || error_msg.trim().len() < 50)
{
return true;
}
}
}
let Some(window) = context_window else {
return false;
};
if message.stop_reason == crate::types::StopReason::Stop {
let input_tokens = message.usage.input + message.usage.cache_read;
if input_tokens > window {
return true;
}
}
if message.stop_reason == crate::types::StopReason::Length && message.usage.output == 0 {
let input_tokens = message.usage.input + message.usage.cache_read;
if input_tokens >= (window as f64 * 0.99) as usize {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{Cost, StopReason, Usage};
fn make_error_message(error: &str) -> AssistantMessage {
let mut msg =
AssistantMessage::new(crate::types::Api::OpenAiCompletions, "test", "test-model");
msg.stop_reason = StopReason::Error;
msg.error_message = Some(error.to_string());
msg
}
fn make_success_message(input: usize, output: usize) -> AssistantMessage {
let mut msg =
AssistantMessage::new(crate::types::Api::OpenAiCompletions, "test", "test-model");
msg.stop_reason = StopReason::Stop;
msg.usage = Usage {
input,
output,
total_tokens: input + output,
cache_read: 0,
cache_write: 0,
cost: Cost::default(),
};
msg
}
fn make_length_message(input: usize, output: usize) -> AssistantMessage {
let mut msg =
AssistantMessage::new(crate::types::Api::OpenAiCompletions, "test", "test-model");
msg.stop_reason = StopReason::Length;
msg.usage = Usage {
input,
output,
total_tokens: input + output,
cache_read: 0,
cache_write: 0,
cost: Cost::default(),
};
msg
}
#[test]
fn test_anthropic_overflow() {
let msg = make_error_message("prompt is too long: 213462 tokens > 200000 maximum");
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_anthropic_request_too_large() {
let msg = make_error_message("request_too_large: Request exceeds maximum size");
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_openai_overflow() {
let msg = make_error_message("Your input exceeds the context window of this model");
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_google_overflow() {
let msg = make_error_message("The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)");
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_xai_overflow() {
let msg = make_error_message(
"This model's maximum prompt length is 131072 but the request contains 537812 tokens",
);
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_groq_overflow() {
let msg = make_error_message("Please reduce the length of the messages or completion");
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_mistral_overflow() {
let msg = make_error_message(
"Prompt contains X tokens ... too large for model with Y maximum context length",
);
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_non_overflow_rate_limit() {
let msg = make_error_message("rate limit exceeded");
assert!(!is_context_overflow(&msg, None));
}
#[test]
fn test_non_overflow_throttling() {
let msg = make_error_message("Throttling error: Too many tokens, please wait");
assert!(!is_context_overflow(&msg, None));
}
#[test]
fn test_silent_overflow() {
let msg = make_success_message(150_000, 500);
assert!(is_context_overflow(&msg, Some(128_000)));
}
#[test]
fn test_no_silent_overflow() {
let msg = make_success_message(100_000, 500);
assert!(!is_context_overflow(&msg, Some(128_000)));
}
#[test]
fn test_length_stop_overflow() {
let msg = make_length_message(127_500, 0);
assert!(is_context_overflow(&msg, Some(128_000)));
}
#[test]
fn test_length_stop_no_overflow() {
let msg = make_length_message(100_000, 0);
assert!(!is_context_overflow(&msg, Some(128_000)));
}
#[test]
fn test_length_stop_with_output() {
let msg = make_length_message(100_000, 500);
assert!(!is_context_overflow(&msg, Some(128_000)));
}
#[test]
fn test_no_error_no_overflow() {
let msg = make_success_message(100, 50);
assert!(!is_context_overflow(&msg, None));
}
#[test]
fn test_cerebras_overflow() {
let msg = make_error_message("400 status code (no body)");
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_bedrock_overflow() {
let msg = make_error_message("input is too long for requested model");
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_llamacpp_overflow() {
let msg =
make_error_message("the request exceeds the available context size, try increasing it");
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_minimax_overflow() {
let msg = make_error_message("invalid params, context window exceeds limit");
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_kimi_overflow() {
let msg = make_error_message(
"Your request exceeded model token limit: 128000 (requested: 200000)",
);
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_generic_context_length_exceeded() {
let msg = make_error_message("context_length_exceeded");
assert!(is_context_overflow(&msg, None));
}
#[test]
fn test_service_unavailable_not_overflow() {
let msg = make_error_message("Service unavailable: too many tokens, try again later");
assert!(!is_context_overflow(&msg, None));
}
}