use std::time::Duration;
use crate::agent_surface::ReasoningEffort;
use crate::client::DeepSeekClient;
use crate::config::Config;
use crate::llm_client::LlmClient;
use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt};
#[must_use]
pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String {
let len = input.chars().count();
let lower = input.to_lowercase();
let complex_keywords = [
"refactor",
"architecture",
"design",
"debug",
"security",
"review",
"audit",
"migrate",
"optimize",
"rewrite",
"implement",
"analyze",
];
if complex_keywords.iter().any(|kw| lower.contains(kw)) {
return "deepseek-v4-pro".to_string();
}
if len < 100 {
return "deepseek-v4-flash".to_string();
}
if len > 500 {
return "deepseek-v4-pro".to_string();
}
"deepseek-v4-flash".to_string()
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AutoRouteRecommendation {
pub model: String,
pub reasoning_effort: Option<ReasoningEffort>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AutoRouteSource {
FlashRouter,
Heuristic,
}
impl AutoRouteSource {
#[must_use]
pub fn label(self) -> &'static str {
match self {
AutoRouteSource::FlashRouter => "flash-router",
AutoRouteSource::Heuristic => "heuristic",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AutoRouteSelection {
pub model: String,
pub reasoning_effort: Option<ReasoningEffort>,
pub source: AutoRouteSource,
}
pub const AUTO_MODEL_ROUTER_SYSTEM_PROMPT: &str = "\
You are the Zagens agent runtime auto-routing classifier. Return only compact JSON: \
{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
Use deepseek-v4-flash for trivial, conversational, status, or single-step work. \
Use deepseek-v4-pro for coding, debugging, release work, multi-step tasks, high-risk decisions, \
tool-heavy work, ambiguous requests, or anything that benefits from deeper reasoning. \
Use thinking off only for trivial no-tool answers, high for ordinary reasoning, and max for \
agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work.";
pub fn parse_auto_route_recommendation(raw: &str) -> Option<AutoRouteRecommendation> {
let json = extract_first_json_object(raw)?;
let value: serde_json::Value = serde_json::from_str(json).ok()?;
let model = value.get("model").and_then(serde_json::Value::as_str)?;
let model = normalize_auto_route_model(model)?;
let reasoning_effort = value
.get("thinking")
.or_else(|| value.get("reasoning_effort"))
.or_else(|| value.get("effort"))
.and_then(serde_json::Value::as_str)
.and_then(parse_auto_route_reasoning_effort);
Some(AutoRouteRecommendation {
model: model.to_string(),
reasoning_effort,
})
}
fn extract_first_json_object(raw: &str) -> Option<&str> {
let start = raw.find('{')?;
let end = raw.rfind('}')?;
(end >= start).then_some(&raw[start..=end])
}
fn normalize_auto_route_model(model: &str) -> Option<&'static str> {
match model.trim().to_ascii_lowercase().as_str() {
"deepseek-v4-pro" | "v4-pro" | "pro" => Some("deepseek-v4-pro"),
"deepseek-v4-flash" | "v4-flash" | "flash" => Some("deepseek-v4-flash"),
_ => None,
}
}
fn parse_auto_route_reasoning_effort(effort: &str) -> Option<ReasoningEffort> {
match effort.trim().to_ascii_lowercase().as_str() {
"off" | "disabled" | "none" | "false" => Some(ReasoningEffort::Off),
"low" | "minimal" | "medium" | "mid" => Some(ReasoningEffort::High),
"high" => Some(ReasoningEffort::High),
"max" | "maximum" | "xhigh" => Some(ReasoningEffort::Max),
_ => None,
}
}
#[must_use]
pub fn normalize_auto_route_effort(effort: ReasoningEffort) -> ReasoningEffort {
match effort {
ReasoningEffort::Low | ReasoningEffort::Medium => ReasoningEffort::High,
other => other,
}
}
pub async fn resolve_auto_route_with_flash(
config: &Config,
latest_request: &str,
recent_context: &str,
selected_model_mode: &str,
selected_thinking_mode: &str,
) -> AutoRouteSelection {
match auto_route_flash_recommendation(
config,
latest_request,
recent_context,
selected_model_mode,
selected_thinking_mode,
)
.await
{
Ok(Some(recommendation)) => AutoRouteSelection {
model: recommendation.model,
reasoning_effort: recommendation.reasoning_effort,
source: AutoRouteSource::FlashRouter,
},
Ok(None) | Err(_) => fallback_auto_route(latest_request, selected_model_mode),
}
}
fn fallback_auto_route(latest_request: &str, selected_model_mode: &str) -> AutoRouteSelection {
AutoRouteSelection {
model: auto_model_heuristic(latest_request, selected_model_mode),
reasoning_effort: Some(normalize_auto_route_effort(crate::auto_reasoning::select(
false,
latest_request,
))),
source: AutoRouteSource::Heuristic,
}
}
async fn auto_route_flash_recommendation(
config: &Config,
latest_request: &str,
recent_context: &str,
selected_model_mode: &str,
selected_thinking_mode: &str,
) -> anyhow::Result<Option<AutoRouteRecommendation>> {
if cfg!(test) {
return Ok(None);
}
let client = DeepSeekClient::new(config)?;
let request = MessageRequest {
model: "deepseek-v4-flash".to_string(),
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: auto_route_prompt(
latest_request,
recent_context,
selected_model_mode,
selected_thinking_mode,
),
cache_control: None,
}],
}],
max_tokens: 96,
system: Some(SystemPrompt::Text(
AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string(),
)),
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: Some("off".to_string()),
stream: Some(false),
temperature: Some(0.0),
top_p: None,
};
let response =
tokio::time::timeout(Duration::from_secs(4), client.create_message(request)).await??;
Ok(parse_auto_route_recommendation(&message_response_text(
&response,
)))
}
fn auto_route_prompt(
latest_request: &str,
recent_context: &str,
selected_model_mode: &str,
selected_thinking_mode: &str,
) -> String {
format!(
"Session mode: agent\nSelected model mode: {}\nSelected thinking mode: {}\n\nRecent context:\n{}\n\nLatest user request:\n{}\n\nReturn JSON only.",
selected_model_mode,
selected_thinking_mode,
if recent_context.trim().is_empty() {
"No prior context."
} else {
recent_context
},
truncate_for_auto_router(latest_request, 4_000)
)
}
fn message_response_text(response: &MessageResponse) -> String {
let mut out = String::new();
for block in &response.content {
match block {
ContentBlock::Text { text, .. } | ContentBlock::ToolResult { content: text, .. } => {
append_router_text(&mut out, text);
}
ContentBlock::Thinking { thinking } => {
append_router_text(&mut out, thinking);
}
ContentBlock::ToolUse { name, .. } => {
append_router_text(&mut out, &format!("[tool call: {name}]"));
}
_ => {}
}
}
out
}
fn append_router_text(out: &mut String, text: &str) {
if !out.is_empty() {
out.push('\n');
}
out.push_str(text);
}
fn truncate_for_auto_router(text: &str, max_chars: usize) -> String {
let mut chars = text.chars();
let truncated: String = chars.by_ref().take(max_chars).collect();
if chars.next().is_some() {
format!("{truncated}...")
} else {
truncated
}
}