use serde::{Deserialize, Serialize};
pub const LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS: u32 = 128_000;
pub const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS: u32 = 1_000_000;
pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 102_400;
const COMPACTION_THRESHOLD_PERCENT: u32 = 80;
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MessageRequest {
pub model: String,
pub messages: Vec<Message>,
pub max_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub system: Option<SystemPrompt>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tools: Option<Vec<Tool>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub thinking: Option<serde_json::Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub stream: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)]
pub enum SystemPrompt {
Text(String),
Blocks(Vec<SystemBlock>),
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct SystemBlock {
#[serde(rename = "type")]
pub block_type: String,
pub text: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_control: Option<CacheControl>,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Message {
pub role: String,
pub content: Vec<ContentBlock>,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type")]
pub enum ContentBlock {
#[serde(rename = "text")]
Text {
text: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "thinking")]
Thinking { thinking: String },
#[serde(rename = "tool_use")]
ToolUse {
id: String,
name: String,
input: serde_json::Value,
#[serde(skip_serializing_if = "Option::is_none")]
caller: Option<ToolCaller>,
},
#[serde(rename = "tool_result")]
ToolResult {
tool_use_id: String,
content: String,
#[serde(skip_serializing_if = "Option::is_none")]
is_error: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
content_blocks: Option<Vec<serde_json::Value>>,
},
#[serde(rename = "server_tool_use")]
ServerToolUse {
id: String,
name: String,
input: serde_json::Value,
},
#[serde(rename = "tool_search_tool_result")]
ToolSearchToolResult {
tool_use_id: String,
content: serde_json::Value,
},
#[serde(rename = "code_execution_tool_result")]
CodeExecutionToolResult {
tool_use_id: String,
content: serde_json::Value,
},
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct CacheControl {
#[serde(rename = "type")]
pub cache_type: String,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct ToolCaller {
#[serde(rename = "type")]
pub caller_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_id: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Tool {
#[serde(rename = "type", skip_serializing_if = "Option::is_none")]
pub tool_type: Option<String>,
pub name: String,
pub description: String,
pub input_schema: serde_json::Value,
#[serde(skip_serializing_if = "Option::is_none")]
pub allowed_callers: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub defer_loading: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub input_examples: Option<Vec<serde_json::Value>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub strict: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_control: Option<CacheControl>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct ContainerInfo {
pub id: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub expires_at: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq, Eq)]
pub struct ServerToolUsage {
#[serde(skip_serializing_if = "Option::is_none")]
pub code_execution_requests: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_search_requests: Option<u32>,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct MessageResponse {
pub id: String,
pub r#type: String,
pub role: String,
pub content: Vec<ContentBlock>,
pub model: String,
pub stop_reason: Option<String>,
pub stop_sequence: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub container: Option<ContainerInfo>,
pub usage: Usage,
}
#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq, Eq)]
pub struct Usage {
pub input_tokens: u32,
pub output_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_hit_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_miss_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_replay_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub server_tool_use: Option<ServerToolUsage>,
}
#[must_use]
pub fn context_window_for_model(model: &str) -> Option<u32> {
let lower = model.to_lowercase();
if lower.contains("deepseek") {
if let Some(explicit_window) = deepseek_context_window_hint(&lower) {
return Some(explicit_window);
}
if lower.contains("v4") {
return Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS);
}
return Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
}
if lower.contains("claude") {
return Some(200_000);
}
None
}
fn deepseek_context_window_hint(model_lower: &str) -> Option<u32> {
let bytes = model_lower.as_bytes();
let mut i = 0usize;
while i < bytes.len() {
if bytes[i].is_ascii_digit() {
let start = i;
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
if i >= bytes.len() || bytes[i] != b'k' {
continue;
}
let before_ok = start == 0 || !bytes[start - 1].is_ascii_alphanumeric();
let after_ok = i + 1 >= bytes.len() || !bytes[i + 1].is_ascii_alphanumeric();
if !before_ok || !after_ok {
continue;
}
if let Ok(kilo_tokens) = model_lower[start..i].parse::<u32>()
&& (8..=1024).contains(&kilo_tokens)
{
return Some(kilo_tokens.saturating_mul(1000));
}
} else {
i += 1;
}
}
None
}
#[must_use]
pub fn compaction_threshold_for_model(model: &str) -> usize {
let Some(window) = context_window_for_model(model) else {
return DEFAULT_COMPACTION_TOKEN_THRESHOLD;
};
let threshold = (u64::from(window) * u64::from(COMPACTION_THRESHOLD_PERCENT)) / 100;
usize::try_from(threshold).unwrap_or(DEFAULT_COMPACTION_TOKEN_THRESHOLD)
}
#[must_use]
pub fn compaction_threshold_for_model_and_effort(
model: &str,
_reasoning_effort: Option<&str>,
) -> usize {
compaction_threshold_for_model(model)
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone)]
#[serde(tag = "type")]
pub enum StreamEvent {
#[serde(rename = "message_start")]
MessageStart { message: MessageResponse },
#[serde(rename = "content_block_start")]
ContentBlockStart {
index: u32,
content_block: ContentBlockStart,
},
#[serde(rename = "content_block_delta")]
ContentBlockDelta { index: u32, delta: Delta },
#[serde(rename = "content_block_stop")]
ContentBlockStop { index: u32 },
#[serde(rename = "message_delta")]
MessageDelta {
delta: MessageDelta,
usage: Option<Usage>,
},
#[serde(rename = "message_stop")]
MessageStop,
#[serde(rename = "ping")]
Ping,
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone)]
#[serde(tag = "type")]
pub enum ContentBlockStart {
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "thinking")]
Thinking { thinking: String },
#[serde(rename = "tool_use")]
ToolUse {
id: String,
name: String,
input: serde_json::Value, #[serde(skip_serializing_if = "Option::is_none")]
caller: Option<ToolCaller>,
},
#[serde(rename = "server_tool_use")]
ServerToolUse {
id: String,
name: String,
input: serde_json::Value,
},
}
#[allow(clippy::enum_variant_names)]
#[derive(Debug, Deserialize, Clone)]
#[serde(tag = "type")]
pub enum Delta {
#[serde(rename = "text_delta")]
TextDelta { text: String },
#[serde(rename = "thinking_delta")]
ThinkingDelta { thinking: String },
#[serde(rename = "input_json_delta")]
InputJsonDelta { partial_json: String },
}
#[allow(dead_code)]
#[derive(Debug, Deserialize, Clone)]
pub struct MessageDelta {
pub stop_reason: Option<String>,
pub stop_sequence: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn v4_snapshots_preserve_context_window() {
assert_eq!(
context_window_for_model("deepseek-v4-flash-20260423"),
Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS)
);
assert_eq!(
context_window_for_model("deepseek-v4-pro-20260423"),
Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS)
);
}
#[test]
fn unknown_legacy_deepseek_models_map_to_128k_context_window() {
assert_eq!(
context_window_for_model("deepseek-coder"),
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
);
assert_eq!(
context_window_for_model("deepseek-v3.2-0324"),
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
);
}
#[test]
fn deepseek_v4_models_map_to_1m_context_window() {
assert_eq!(
context_window_for_model("deepseek-v4-pro"),
Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS)
);
assert_eq!(
context_window_for_model("deepseek-v4-flash"),
Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS)
);
assert_eq!(
context_window_for_model("deepseek-ai/deepseek-v4-pro"),
Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS)
);
}
#[test]
fn deepseek_models_with_k_suffix_use_hint() {
assert_eq!(context_window_for_model("deepseek-v3.2-32k"), Some(32_000));
assert_eq!(
context_window_for_model("deepseek-v3.2-256k-preview"),
Some(256_000)
);
assert_eq!(
context_window_for_model("deepseek-v3.2-2k-preview"),
Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
);
}
#[test]
fn compaction_threshold_scales_with_context_window() {
assert_eq!(
compaction_threshold_for_model("deepseek-v3.2-128k"),
102_400
);
assert_eq!(compaction_threshold_for_model("unknown-model"), 102_400);
}
#[test]
fn compaction_scales_for_deepseek_v4_1m_context() {
assert_eq!(compaction_threshold_for_model("deepseek-v4-pro"), 800_000);
}
#[test]
fn v4_replacement_compaction_ignores_reasoning_effort() {
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("off")),
800_000
);
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("high")),
800_000
);
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("max")),
800_000
);
}
#[test]
fn v4_soft_caps_only_apply_to_v4_models() {
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v3.2-128k", Some("max")),
102_400
);
assert_eq!(
compaction_threshold_for_model_and_effort("unknown-model", Some("max")),
102_400
);
}
#[test]
fn v4_replacement_compaction_defaults_to_late_guard_when_effort_unknown() {
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v4-pro", None),
800_000
);
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("unknown")),
800_000
);
}
}