use serde::{Deserialize, Serialize};
pub const LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS: u32 = 128_000;
pub const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS: u32 = 1_000_000;
pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 102_400;
const COMPACTION_THRESHOLD_PERCENT: u32 = 80;
pub const DEFAULT_AUTO_COMPACT_MAX_CONTEXT_WINDOW_TOKENS: u32 = 262_144;
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MessageRequest {
pub model: String,
pub messages: Vec<Message>,
pub max_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub system: Option<SystemPrompt>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tools: Option<Vec<Tool>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub thinking: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub stream: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)]
pub enum SystemPrompt {
Text(String),
Blocks(Vec<SystemBlock>),
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct SystemBlock {
#[serde(rename = "type")]
pub block_type: String,
pub text: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_control: Option<CacheControl>,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct ImageUrlContent {
pub url: String,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Message {
pub role: String,
pub content: Vec<ContentBlock>,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type")]
pub enum ContentBlock {
#[serde(rename = "text")]
Text {
text: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "image_url")]
ImageUrl { image_url: ImageUrlContent },
#[serde(rename = "thinking")]
Thinking {
thinking: String,
#[serde(skip_serializing_if = "Option::is_none", default)]
signature: Option<String>,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
name: String,
input: serde_json::Value,
#[serde(skip_serializing_if = "Option::is_none")]
caller: Option<ToolCaller>,
},
#[serde(rename = "tool_result")]
ToolResult {
tool_use_id: String,
content: String,
#[serde(skip_serializing_if = "Option::is_none")]
is_error: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
content_blocks: Option<Vec<serde_json::Value>>,
},
#[serde(rename = "server_tool_use")]
ServerToolUse {
id: String,
name: String,
input: serde_json::Value,
},
#[serde(rename = "tool_search_tool_result")]
ToolSearchToolResult {
tool_use_id: String,
content: serde_json::Value,
},
#[serde(rename = "code_execution_tool_result")]
CodeExecutionToolResult {
tool_use_id: String,
content: serde_json::Value,
},
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct CacheControl {
#[serde(rename = "type")]
pub cache_type: String,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct ToolCaller {
#[serde(rename = "type")]
pub caller_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_id: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Tool {
#[serde(rename = "type", skip_serializing_if = "Option::is_none")]
pub tool_type: Option<String>,
pub name: String,
pub description: String,
pub input_schema: serde_json::Value,
#[serde(skip_serializing_if = "Option::is_none")]
pub allowed_callers: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub defer_loading: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub input_examples: Option<Vec<serde_json::Value>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub strict: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_control: Option<CacheControl>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ContainerInfo {
pub id: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub expires_at: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq, Eq)]
pub struct ServerToolUsage {
#[serde(skip_serializing_if = "Option::is_none")]
pub code_execution_requests: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_search_requests: Option<u32>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MessageResponse {
pub id: String,
pub r#type: String,
pub role: String,
pub content: Vec<ContentBlock>,
pub model: String,
pub stop_reason: Option<String>,
pub stop_sequence: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub container: Option<ContainerInfo>,
pub usage: Usage,
}
#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq, Eq)]
pub struct Usage {
pub input_tokens: u32,
pub output_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_hit_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_miss_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_replay_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub server_tool_use: Option<ServerToolUsage>,
}
#[must_use]
pub fn context_window_for_model(model: &str) -> Option<u32> {
if let Some(window) = crate::model_catalog::resolved_context_window(model) {
return Some(window);
}
let lower = model.to_lowercase();
if let Some(explicit_window) = explicit_context_window_hint(&lower) {
return Some(explicit_window);
}
if lower.contains("deepseek") {
if lower.contains("v4") {
return Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS);
}
return Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
}
if is_openai_gpt_55_api_model(&lower) {
return Some(1_050_000);
}
if is_openai_codex_model(&lower) {
return Some(400_000);
}
if let Some(window) = known_context_window_for_model(&lower) {
return Some(window);
}
if lower.contains("claude") {
return Some(200_000);
}
None
}
fn known_context_window_for_model(model_lower: &str) -> Option<u32> {
match model_lower {
"gpt-5-codex" | "gpt-5.3-codex" => Some(400_000),
"claude-opus-4-8" | "claude-sonnet-4-6" => Some(1_000_000),
"claude-haiku-4-5" => Some(200_000),
"trinity-mini" => Some(128_000),
"arcee-ai/trinity-large-thinking" | "trinity-large-thinking" | "trinity-large-preview" => {
Some(262_144)
}
"google/gemma-4-31b-it"
| "google/gemma-4-31b-it:free"
| "google/gemma-4-26b-a4b-it"
| "google/gemma-4-26b-a4b-it:free"
| "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free"
| "qwen/qwen3.6-35b-a3b"
| "qwen/qwen3.6-max-preview"
| "qwen/qwen3.6-27b"
| "tencent/hy3-preview"
| "moonshotai/kimi-k2.7-code"
| "moonshotai/kimi-k2.6"
| "moonshotai/kimi-k2.6:free"
| "kimi-k2.7-code"
| "kimi-k2.6"
| "kimi-for-coding" => Some(262_144),
"minimax-m2.7"
| "minimax/minimax-2.7"
| "minimax-m2.7-highspeed"
| "minimax-m2.5"
| "minimax-m2.5-highspeed"
| "minimax-m2.1"
| "minimax-m2.1-highspeed"
| "minimax-m2" => Some(204_800),
"z-ai/glm-5.1" | "z-ai/glm-5v-turbo" | "glm-5.1" | "glm-5v-turbo" => Some(202_752),
"z-ai/glm-5.2" | "glm-5.2" => Some(1_000_000),
"minimax/minimax-m3" | "minimax-m3" | "qwen/qwen3.6-flash" | "qwen/qwen3.6-plus" => {
Some(1_000_000)
}
"nvidia/nemotron-3-ultra-550b-a55b" | "nvidia/nemotron-3-ultra-550b-a55b:free" => {
Some(1_000_000)
}
"xiaomi/mimo-v2.5-pro" | "xiaomi/mimo-v2.5" | "mimo-v2.5-pro" | "mimo-v2.5" => {
Some(1_000_000)
}
"mimo-v2.5-asr"
| "mimo-v2.5-tts"
| "mimo-v2.5-tts-voicedesign"
| "mimo-v2.5-tts-voiceclone"
| "mimo-v2-tts" => Some(8_000),
_ => None,
}
}
#[must_use]
pub fn max_output_tokens_for_model(model: &str) -> Option<u32> {
if let Some(max_output) = crate::model_catalog::resolved_max_output(model) {
return Some(max_output);
}
let lower = model.to_lowercase();
if lower.contains("deepseek") && lower.contains("v4") {
return Some(384_000);
}
if is_openai_gpt_55_api_model(&lower) || is_openai_codex_model(&lower) {
return Some(128_000);
}
match lower.as_str() {
"gpt-5-codex" | "gpt-5.3-codex" => Some(128_000),
"claude-opus-4-8" => Some(128_000),
"claude-sonnet-4-6" | "claude-haiku-4-5" => Some(64_000),
"arcee-ai/trinity-large-thinking"
| "trinity-large-thinking"
| "moonshotai/kimi-k2.7-code"
| "moonshotai/kimi-k2.6"
| "kimi-k2.7-code"
| "kimi-k2.6"
| "kimi-for-coding" => Some(262_144),
"minimax/minimax-m3" | "minimax-m3" => Some(524_288),
"qwen/qwen3.6-35b-a3b" | "qwen/qwen3.6-27b" => Some(262_140),
"qwen/qwen3.6-flash" | "qwen/qwen3.6-max-preview" | "qwen/qwen3.6-plus" => Some(65_536),
"z-ai/glm-5.1" | "z-ai/glm-5.2" | "glm-5.1" | "glm-5.2" => Some(131_072),
"xiaomi/mimo-v2.5-pro" | "xiaomi/mimo-v2.5" | "mimo-v2.5-pro" | "mimo-v2.5" => {
Some(131_072)
}
"mimo-v2.5-asr" => Some(2_048),
"mimo-v2.5-tts"
| "mimo-v2.5-tts-voicedesign"
| "mimo-v2.5-tts-voiceclone"
| "mimo-v2-tts" => Some(8_192),
"nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free" => Some(65_536),
"nvidia/nemotron-3-ultra-550b-a55b" => Some(16_384),
"nvidia/nemotron-3-ultra-550b-a55b:free" => Some(65_536),
"google/gemma-4-31b-it" => Some(16_384),
"google/gemma-4-31b-it:free" | "google/gemma-4-26b-a4b-it:free" => Some(32_768),
_ => None,
}
}
#[must_use]
pub fn model_supports_reasoning(model: &str) -> bool {
if let Some(supports_reasoning) = crate::model_catalog::resolved_supports_reasoning(model) {
return supports_reasoning;
}
let lower = model.to_lowercase();
if lower.contains("deepseek") && lower.contains("v4") {
return true;
}
if lower.starts_with("kimi-") {
return true;
}
matches!(
lower.as_str(),
"claude-opus-4-8"
| "claude-sonnet-4-6"
| "gpt-5-codex"
| "gpt-5.3-codex"
| "arcee-ai/trinity-large-thinking"
| "trinity-large-thinking"
| "google/gemma-4-31b-it"
| "google/gemma-4-31b-it:free"
| "google/gemma-4-26b-a4b-it"
| "google/gemma-4-26b-a4b-it:free"
| "moonshotai/kimi-k2.7-code"
| "moonshotai/kimi-k2.6"
| "moonshotai/kimi-k2.6:free"
| "kimi-k2.7-code"
| "kimi-k2.6"
| "kimi-for-coding"
| "minimax/minimax-m3"
| "minimax/minimax-2.7"
| "minimax-m3"
| "minimax-m2.7"
| "minimax-m2.7-highspeed"
| "minimax-m2.5"
| "minimax-m2.5-highspeed"
| "minimax-m2.1"
| "minimax-m2.1-highspeed"
| "minimax-m2"
| "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free"
| "nvidia/nemotron-3-ultra-550b-a55b"
| "nvidia/nemotron-3-ultra-550b-a55b:free"
| "qwen/qwen3.6-flash"
| "qwen/qwen3.6-35b-a3b"
| "qwen/qwen3.6-max-preview"
| "qwen/qwen3.6-27b"
| "qwen/qwen3.6-plus"
| "tencent/hy3-preview"
| "xiaomi/mimo-v2.5-pro"
| "xiaomi/mimo-v2.5"
| "mimo-v2.5-pro"
| "mimo-v2.5"
| "z-ai/glm-5.1"
| "z-ai/glm-5.2"
| "glm-5.1"
| "glm-5.2"
) || is_openai_gpt_55_api_model(&lower)
|| is_openai_codex_model(&lower)
}
fn is_openai_gpt_55_api_model(model_lower: &str) -> bool {
matches!(model_lower, "gpt-5.5" | "gpt-5.5-pro")
|| has_date_snapshot_suffix(model_lower, "gpt-5.5-")
|| has_date_snapshot_suffix(model_lower, "gpt-5.5-pro-")
}
fn is_openai_codex_model(model_lower: &str) -> bool {
matches!(
model_lower,
"gpt-5-codex"
| "gpt-5.1-codex"
| "gpt-5.1-codex-mini"
| "gpt-5.1-codex-max"
| "gpt-5.2-codex"
| "gpt-5.3-codex"
| "codex-gpt-5.5"
| "chatgpt-gpt-5.5"
| "gpt-5.5-codex"
| "gpt-5.5-codex-preview"
| "codex-gpt-5.5-preview"
| "chatgpt-gpt-5.5-preview"
)
}
fn has_date_snapshot_suffix(model_lower: &str, prefix: &str) -> bool {
let Some(rest) = model_lower.strip_prefix(prefix) else {
return false;
};
let bytes = rest.as_bytes();
bytes.len() == 10
&& bytes[4] == b'-'
&& bytes[7] == b'-'
&& bytes
.iter()
.enumerate()
.all(|(idx, byte)| idx == 4 || idx == 7 || byte.is_ascii_digit())
}
fn explicit_context_window_hint(model_lower: &str) -> Option<u32> {
let bytes = model_lower.as_bytes();
let mut i = 0usize;
while i < bytes.len() {
if bytes[i].is_ascii_digit() {
let start = i;
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
if i >= bytes.len() || bytes[i] != b'k' {
continue;
}
let before_ok = start == 0 || !bytes[start - 1].is_ascii_alphanumeric();
let after_ok = i + 1 >= bytes.len() || !bytes[i + 1].is_ascii_alphanumeric();
if !before_ok || !after_ok {
continue;
}
if let Ok(kilo_tokens) = model_lower[start..i].parse::<u32>()
&& (8..=1024).contains(&kilo_tokens)
{
return Some(kilo_tokens.saturating_mul(1000));
}
} else {
i += 1;
}
}
None
}
#[must_use]
pub fn compaction_threshold_for_model_at_percent(model: &str, percent: f64) -> usize {
let Some(window) = context_window_for_model(model) else {
return DEFAULT_COMPACTION_TOKEN_THRESHOLD;
};
let percent = percent.clamp(10.0, 100.0);
let threshold = (f64::from(window) * percent / 100.0).round();
let threshold = if threshold.is_finite() && threshold > 0.0 {
threshold as u64
} else {
u64::from(window) * u64::from(COMPACTION_THRESHOLD_PERCENT) / 100
};
usize::try_from(threshold).unwrap_or(DEFAULT_COMPACTION_TOKEN_THRESHOLD)
}
#[must_use]
pub fn auto_compact_default_for_model(model: &str) -> bool {
context_window_for_model(model)
.is_some_and(|window| window <= DEFAULT_AUTO_COMPACT_MAX_CONTEXT_WINDOW_TOKENS)
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone)]
#[serde(tag = "type")]
pub enum StreamEvent {
#[serde(rename = "message_start")]
MessageStart { message: MessageResponse },
#[serde(rename = "content_block_start")]
ContentBlockStart {
index: u32,
content_block: ContentBlockStart,
},
#[serde(rename = "content_block_delta")]
ContentBlockDelta { index: u32, delta: Delta },
#[serde(rename = "content_block_stop")]
ContentBlockStop { index: u32 },
#[serde(rename = "message_delta")]
MessageDelta {
delta: MessageDelta,
usage: Option<Usage>,
},
#[serde(rename = "message_stop")]
MessageStop,
#[serde(rename = "ping")]
Ping,
#[serde(rename = "error")]
Error { error: serde_json::Value },
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone)]
#[serde(tag = "type")]
pub enum ContentBlockStart {
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "thinking")]
Thinking { thinking: String },
#[serde(rename = "tool_use")]
ToolUse {
id: String,
name: String,
input: serde_json::Value, #[serde(skip_serializing_if = "Option::is_none")]
caller: Option<ToolCaller>,
},
#[serde(rename = "server_tool_use")]
ServerToolUse {
id: String,
name: String,
input: serde_json::Value,
},
}
#[allow(clippy::enum_variant_names)]
#[derive(Debug, Deserialize, Clone)]
#[serde(tag = "type")]
pub enum Delta {
#[serde(rename = "text_delta")]
TextDelta { text: String },
#[serde(rename = "thinking_delta")]
ThinkingDelta { thinking: String },
#[serde(rename = "input_json_delta")]
InputJsonDelta { partial_json: String },
#[serde(rename = "signature_delta")]
SignatureDelta { signature: String },
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone)]
pub struct MessageDelta {
pub stop_reason: Option<String>,
pub stop_sequence: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::BTreeMap;
#[test]
fn v4_snapshots_preserve_context_window() {
assert_eq!(
context_window_for_model("deepseek-v4-flash-20260423"),
Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS)
);
assert_eq!(
context_window_for_model("deepseek-v4-pro-20260423"),
Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS)
);
}
#[test]
fn unknown_legacy_deepseek_models_map_to_128k_context_window() {
assert_eq!(
context_window_for_model("deepseek-coder"),
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
);
assert_eq!(
context_window_for_model("deepseek-v3.2-0324"),
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
);
}
#[test]
fn deepseek_v4_models_map_to_1m_context_window() {
assert_eq!(
context_window_for_model("deepseek-v4-pro"),
Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS)
);
assert_eq!(
context_window_for_model("deepseek-v4-flash"),
Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS)
);
assert_eq!(
context_window_for_model("deepseek-ai/deepseek-v4-pro"),
Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS)
);
}
#[test]
fn recent_openrouter_large_models_have_static_windows() {
for (model, expected_window) in [
("arcee-ai/trinity-large-thinking", 262_144),
("trinity-large-thinking", 262_144),
(concat!("qwen/", "qwen3.6-flash"), 1_000_000),
(concat!("qwen/", "qwen3.6-35b-a3b"), 262_144),
(concat!("qwen/", "qwen3.6-max-preview"), 262_144),
(concat!("qwen/", "qwen3.6-plus"), 1_000_000),
(concat!("xiaomi/", "mimo-v2.5-pro"), 1_000_000),
("mimo-v2.5-pro", 1_000_000),
("mimo-v2.5", 1_000_000),
("minimax/minimax-m3", 1_000_000),
("minimax/minimax-2.7", 204_800),
("moonshotai/kimi-k2.7-code", 262_144),
("moonshotai/kimi-k2.6", 262_144),
("google/gemma-4-31b-it", 262_144),
("z-ai/glm-5.1", 202_752),
("z-ai/glm-5.2", 1_000_000),
] {
assert_eq!(context_window_for_model(model), Some(expected_window));
assert!(model_supports_reasoning(model));
}
}
#[test]
fn openai_api_and_codex_models_have_verified_context_metadata() {
for model in [
"gpt-5.5",
"gpt-5.5-pro",
"gpt-5.5-2026-04-23",
"gpt-5.5-pro-2026-04-23",
] {
assert_eq!(context_window_for_model(model), Some(1_050_000));
assert_eq!(max_output_tokens_for_model(model), Some(128_000));
assert!(model_supports_reasoning(model));
assert_eq!(
compaction_threshold_for_model_at_percent(model, 80.0),
840_000
);
}
for model in [
"gpt-5-codex",
"gpt-5.1-codex",
"gpt-5.1-codex-mini",
"gpt-5.1-codex-max",
"gpt-5.2-codex",
"gpt-5.3-codex",
"codex-gpt-5.5",
"chatgpt-gpt-5.5",
"gpt-5.5-codex",
"gpt-5.5-codex-preview",
] {
assert_eq!(context_window_for_model(model), Some(400_000));
assert_eq!(max_output_tokens_for_model(model), Some(128_000));
assert!(model_supports_reasoning(model));
assert_eq!(
compaction_threshold_for_model_at_percent(model, 80.0),
320_000
);
}
assert_eq!(context_window_for_model("gpt-5.5-nano"), None);
assert_eq!(max_output_tokens_for_model("gpt-5.5-nano"), None);
assert!(!model_supports_reasoning("gpt-5.5-nano"));
}
#[test]
fn model_metadata_catalog_override_flows_through_models_chokepoint() {
let _lock = crate::model_catalog::test_catalog_lock();
let mut overrides = BTreeMap::new();
overrides.insert(
"catalog-only-model".to_string(),
crate::model_catalog::CatalogEntry {
id: "catalog-only-model".to_string(),
context_window: Some(777_000),
max_output: Some(55_000),
supports_reasoning: Some(true),
input_usd_per_million: None,
output_usd_per_million: None,
modalities: Vec::new(),
supported_parameters: Vec::new(),
provider_model_id: None,
provenance: crate::model_catalog::MetadataProvenance::UserOverride,
},
);
let catalog = crate::model_catalog::MergedCatalog::from_sources(
overrides,
None,
crate::model_catalog::bundled_catalog(),
chrono::Utc::now(),
);
let _guard = crate::model_catalog::replace_active_catalog_for_test(catalog);
assert_eq!(
context_window_for_model("catalog-only-model"),
Some(777_000)
);
assert_eq!(
max_output_tokens_for_model("catalog-only-model"),
Some(55_000)
);
assert!(model_supports_reasoning("catalog-only-model"));
}
#[test]
fn moonshot_native_kimi_ids_support_reasoning_including_coding_route() {
assert!(model_supports_reasoning("kimi-k2.7-code"));
assert!(model_supports_reasoning("kimi-k2.6"));
assert!(model_supports_reasoning("kimi-for-coding"));
assert!(model_supports_reasoning("kimi-k2.5"));
}
#[test]
fn arcee_direct_models_have_static_windows_without_reasoning_flag() {
assert_eq!(
context_window_for_model("trinity-large-preview"),
Some(262_144)
);
assert!(!model_supports_reasoning("trinity-large-preview"));
assert_eq!(context_window_for_model("trinity-mini"), Some(128_000));
assert!(!model_supports_reasoning("trinity-mini"));
}
#[test]
fn recent_openrouter_large_models_have_known_output_caps() {
assert_eq!(
max_output_tokens_for_model("arcee-ai/trinity-large-thinking"),
Some(262_144)
);
assert_eq!(
max_output_tokens_for_model("trinity-large-thinking"),
Some(262_144)
);
assert_eq!(
max_output_tokens_for_model(concat!("qwen/", "qwen3.6-flash")),
Some(65_536)
);
assert_eq!(
max_output_tokens_for_model(concat!("qwen/", "qwen3.6-max-preview")),
Some(65_536)
);
assert_eq!(
max_output_tokens_for_model(concat!("qwen/", "qwen3.6-plus")),
Some(65_536)
);
assert_eq!(
max_output_tokens_for_model(concat!("xiaomi/", "mimo-v2.5-pro")),
Some(131_072)
);
assert_eq!(max_output_tokens_for_model("mimo-v2.5-pro"), Some(131_072));
assert_eq!(max_output_tokens_for_model("mimo-v2.5"), Some(131_072));
assert_eq!(
max_output_tokens_for_model("minimax/minimax-m3"),
Some(524_288)
);
assert_eq!(max_output_tokens_for_model("z-ai/glm-5.1"), Some(131_072));
assert_eq!(max_output_tokens_for_model("z-ai/glm-5.2"), Some(131_072));
}
#[test]
fn bare_provider_model_ids_mirror_vendor_prefixed_rows() {
for (model, expected_window) in [
("kimi-k2.7-code", 262_144),
("kimi-k2.6", 262_144),
("minimax-m3", 1_000_000),
("minimax-m2.7", 204_800),
("minimax-m2.5-highspeed", 204_800),
("minimax-m2", 204_800),
("glm-5.1", 202_752),
("glm-5.2", 1_000_000),
] {
assert_eq!(context_window_for_model(model), Some(expected_window));
assert!(model_supports_reasoning(model));
}
assert_eq!(context_window_for_model("kimi-for-coding"), Some(262_144));
assert!(model_supports_reasoning("kimi-for-coding"));
assert_eq!(context_window_for_model("glm-5v-turbo"), Some(202_752));
assert!(!model_supports_reasoning("glm-5v-turbo"));
assert_eq!(max_output_tokens_for_model("kimi-k2.7-code"), Some(262_144));
assert_eq!(max_output_tokens_for_model("kimi-k2.6"), Some(262_144));
assert_eq!(
max_output_tokens_for_model("kimi-for-coding"),
Some(262_144)
);
assert_eq!(max_output_tokens_for_model("minimax-m3"), Some(524_288));
assert_eq!(max_output_tokens_for_model("glm-5.1"), Some(131_072));
assert_eq!(max_output_tokens_for_model("glm-5.2"), Some(131_072));
}
#[test]
fn deepseek_models_with_k_suffix_use_hint() {
assert_eq!(context_window_for_model("deepseek-v3.2-32k"), Some(32_000));
assert_eq!(
context_window_for_model("deepseek-v3.2-256k-preview"),
Some(256_000)
);
assert_eq!(
context_window_for_model("deepseek-v3.2-2k-preview"),
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
);
}
#[test]
fn compaction_threshold_scales_with_context_window() {
assert_eq!(
compaction_threshold_for_model_at_percent("deepseek-v3.2-128k", 80.0),
102_400
);
assert_eq!(
compaction_threshold_for_model_at_percent("unknown-model", 80.0),
102_400
);
}
#[test]
fn compaction_scales_for_deepseek_v4_1m_context() {
assert_eq!(
compaction_threshold_for_model_at_percent("deepseek-v4-pro", 80.0),
800_000
);
}
#[test]
fn compaction_threshold_honors_configured_percent() {
assert_eq!(
compaction_threshold_for_model_at_percent("deepseek-v4-pro", 75.0),
750_000
);
assert_eq!(
compaction_threshold_for_model_at_percent("trinity-large-thinking", 80.0),
209_715
);
}
#[test]
fn auto_compaction_defaults_on_for_256k_class_models_only() {
assert!(auto_compact_default_for_model("trinity-large-thinking"));
assert!(auto_compact_default_for_model("deepseek-v3.2-128k"));
assert!(!auto_compact_default_for_model("deepseek-v4-pro"));
assert!(!auto_compact_default_for_model("mimo-v2.5-pro"));
assert!(!auto_compact_default_for_model("unknown-model"));
}
}