use crate::llm::api::{DeltaSender, LlmRequestPayload, LlmResult, ThinkingConfig};
use crate::llm::provider::{LlmProvider, LlmProviderChat};
use crate::value::VmError;
pub(crate) fn gpt_generation(model: &str) -> Option<(u32, u32)> {
let lower = model.to_lowercase();
let stripped = match lower.rsplit_once('/') {
Some((_, tail)) => tail,
None => lower.as_str(),
};
let needle = "gpt-";
let idx = stripped.find(needle)?;
let tail = &stripped[idx + needle.len()..];
if let Some((major, rest)) = tail.split_once('.') {
if let Ok(major) = major.parse::<u32>() {
let minor_str: String = rest.chars().take_while(|c| c.is_ascii_digit()).collect();
if let Ok(minor) = minor_str.parse::<u32>() {
return Some((major, minor));
}
}
}
let mut parts = tail.split('-');
if let Some(major_str) = parts.next() {
if let Ok(major) = major_str.parse::<u32>() {
if let Some(minor_str) = parts.next() {
if let Ok(minor) = minor_str.parse::<u32>() {
let minor = if minor >= 1000 { 0 } else { minor };
return Some((major, minor));
}
}
return Some((major, 0));
}
}
None
}
#[allow(dead_code)]
pub(crate) fn gpt_model_supports_tool_search(model: &str) -> bool {
match gpt_generation(model) {
Some((major, minor)) => (major, minor) >= (5, 4),
None => false,
}
}
pub(crate) struct OpenAiCompatibleProvider {
provider_name: String,
}
impl OpenAiCompatibleProvider {
pub(crate) fn new(name: String) -> Self {
Self {
provider_name: name,
}
}
pub(crate) fn classify_http_error(
provider: &str,
status: reqwest::StatusCode,
retry_after: Option<&str>,
body: &str,
) -> crate::llm::api::LlmErrorInfo {
crate::llm::api::classify_provider_http_error(provider, status, retry_after, body)
}
}
impl LlmProvider for OpenAiCompatibleProvider {
fn name(&self) -> &str {
&self.provider_name
}
fn transform_request(&self, body: &mut serde_json::Value) {
if self.provider_name.to_lowercase().contains("openrouter") {
if let Some(obj) = body.as_object_mut() {
obj.remove("chat_template_kwargs");
}
}
}
}
impl LlmProviderChat for OpenAiCompatibleProvider {
fn chat<'a>(
&'a self,
request: &'a LlmRequestPayload,
delta_tx: Option<DeltaSender>,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<LlmResult, VmError>> + 'a>> {
Box::pin(self.chat_impl(request, delta_tx))
}
}
impl OpenAiCompatibleProvider {
pub(crate) fn build_request_body(
opts: &LlmRequestPayload,
force_string_content: bool,
) -> serde_json::Value {
let mut msgs = Vec::new();
if let Some(ref sys) = opts.system {
msgs.push(serde_json::json!({"role": "system", "content": sys}));
}
msgs.extend(opts.messages.iter().cloned().map(|mut message| {
if let Some(object) = message.as_object_mut() {
if let Some(content) = object.get("content").cloned() {
object.insert(
"content".to_string(),
crate::llm::content::openai_content(&content),
);
}
}
message
}));
if let Some(ref prefill) = opts.prefill {
msgs.push(serde_json::json!({
"role": "assistant",
"content": prefill,
}));
}
msgs = crate::llm::api::normalize_openai_style_messages(msgs, force_string_content);
let mut body = serde_json::json!({
"model": opts.model,
"messages": msgs,
});
let caps = crate::llm::capabilities::lookup(&opts.provider, &opts.model);
if opts.max_tokens > 0 {
let token_limit_field = if caps.requires_completion_tokens {
"max_completion_tokens"
} else {
"max_tokens"
};
body[token_limit_field] = serde_json::json!(opts.max_tokens);
}
if let Some(temp) = opts.temperature {
body["temperature"] = serde_json::json!(temp);
}
if let Some(top_p) = opts.top_p {
body["top_p"] = serde_json::json!(top_p);
}
if opts.logprobs {
body["logprobs"] = serde_json::json!(true);
if let Some(top_logprobs) = opts.top_logprobs.filter(|value| *value > 0) {
body["top_logprobs"] = serde_json::json!(top_logprobs);
}
}
if let Some(ref stop) = opts.stop {
body["stop"] = serde_json::json!(stop);
}
if let Some(seed) = opts.seed {
body["seed"] = serde_json::json!(seed);
}
if let Some(fp) = opts.frequency_penalty {
body["frequency_penalty"] = serde_json::json!(fp);
}
if let Some(pp) = opts.presence_penalty {
body["presence_penalty"] = serde_json::json!(pp);
}
if opts.provider == "openrouter" {
if let Some(reasoning) = openrouter_reasoning_config(&opts.thinking) {
body["reasoning"] = reasoning;
}
} else {
if opts.provider == "together" && !caps.honors_chat_template_kwargs {
if let Some(reasoning) = together_reasoning_config(&opts.thinking, &caps) {
body["reasoning"] = reasoning;
}
}
if caps.reasoning_effort_supported {
if let ThinkingConfig::Effort { level } = &opts.thinking {
if *level != crate::llm::api::ReasoningEffort::None || opts.provider == "openai"
{
body["reasoning_effort"] = serde_json::json!(level.as_str());
}
}
}
}
match &opts.output_format {
crate::llm::api::OutputFormat::Text => {}
crate::llm::api::OutputFormat::JsonObject => {
body["response_format"] = serde_json::json!({"type": "json_object"});
}
crate::llm::api::OutputFormat::JsonSchema { schema, strict } => {
body["response_format"] = serde_json::json!({
"type": "json_schema",
"json_schema": {
"name": "response",
"schema": schema,
"strict": strict,
}
});
}
}
if let Some(ref tools) = opts.native_tools {
if !tools.is_empty() {
body["tools"] = serde_json::json!(tools);
}
}
if let Some(ref tc) = opts.tool_choice {
body["tool_choice"] = tc.clone();
}
if caps.honors_chat_template_kwargs {
let mut chat_template_kwargs = serde_json::json!({
"enable_thinking": opts.thinking.is_enabled(),
});
if opts.prefill.is_some() {
chat_template_kwargs["add_generation_prompt"] = serde_json::json!(false);
chat_template_kwargs["continue_final_message"] = serde_json::json!(true);
}
if caps.preserve_thinking {
chat_template_kwargs["preserve_thinking"] = serde_json::json!(true);
}
body["chat_template_kwargs"] = chat_template_kwargs;
}
body
}
pub(crate) async fn chat_impl(
&self,
request: &LlmRequestPayload,
delta_tx: Option<DeltaSender>,
) -> Result<LlmResult, VmError> {
let mut body = Self::build_request_body(request, false);
self.transform_request(&mut body);
crate::llm::api::vm_call_llm_api_with_body(
request, delta_tx, body, false, false, )
.await
}
}
fn openrouter_reasoning_config(thinking: &ThinkingConfig) -> Option<serde_json::Value> {
match thinking {
ThinkingConfig::Disabled => None,
ThinkingConfig::Enabled {
budget_tokens: None,
} => Some(serde_json::json!({
"enabled": true
})),
ThinkingConfig::Enabled {
budget_tokens: Some(max_tokens),
} => Some(serde_json::json!({
"max_tokens": max_tokens
})),
ThinkingConfig::Adaptive => Some(serde_json::json!({
"enabled": true
})),
ThinkingConfig::Effort {
level: crate::llm::api::ReasoningEffort::None,
} => Some(serde_json::json!({
"enabled": false
})),
ThinkingConfig::Effort { level } => Some(serde_json::json!({
"effort": level.as_str()
})),
}
}
fn together_reasoning_config(
thinking: &ThinkingConfig,
caps: &crate::llm::capabilities::Capabilities,
) -> Option<serde_json::Value> {
let supports_enabled = caps.thinking_modes.iter().any(|mode| mode == "enabled");
if !supports_enabled {
return None;
}
match thinking {
ThinkingConfig::Disabled
| ThinkingConfig::Effort {
level: crate::llm::api::ReasoningEffort::None,
} => Some(serde_json::json!({ "enabled": false })),
ThinkingConfig::Enabled { .. } | ThinkingConfig::Adaptive => {
Some(serde_json::json!({ "enabled": true }))
}
ThinkingConfig::Effort { .. } => Some(serde_json::json!({ "enabled": true })),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm::api::{
LlmErrorKind, LlmErrorReason, LlmRequestPayload, ReasoningEffort, ThinkingConfig,
};
use serde_json::json;
#[test]
fn tool_search_supported_for_gpt_5_4_and_up() {
assert!(gpt_model_supports_tool_search("gpt-5.4"));
assert!(gpt_model_supports_tool_search("gpt-5.4-preview"));
assert!(gpt_model_supports_tool_search("gpt-5.4-turbo"));
assert!(gpt_model_supports_tool_search("gpt-5-4"));
assert!(gpt_model_supports_tool_search("gpt-5.5"));
assert!(gpt_model_supports_tool_search("gpt-6.0"));
}
#[test]
fn tool_search_unsupported_for_pre_5_4() {
assert!(!gpt_model_supports_tool_search("gpt-4o"));
assert!(!gpt_model_supports_tool_search("gpt-4.1"));
assert!(!gpt_model_supports_tool_search("gpt-4-turbo"));
assert!(!gpt_model_supports_tool_search("gpt-3.5-turbo"));
assert!(!gpt_model_supports_tool_search("gpt-5.0"));
assert!(!gpt_model_supports_tool_search("gpt-5.3-preview"));
assert!(!gpt_model_supports_tool_search("gpt-5"));
}
#[test]
fn tool_search_unsupported_for_non_gpt() {
assert!(!gpt_model_supports_tool_search("claude-opus-4-7"));
assert!(!gpt_model_supports_tool_search("llama-3.1-70b"));
assert!(!gpt_model_supports_tool_search(""));
}
#[test]
fn gpt_generation_handles_openrouter_prefix() {
assert_eq!(gpt_generation("openai/gpt-5.4-preview"), Some((5, 4)));
assert_eq!(gpt_generation("azure/gpt-5.5-turbo"), Some((5, 5)));
assert!(gpt_model_supports_tool_search("openai/gpt-5.4"));
assert!(!gpt_model_supports_tool_search("openai/gpt-4o"));
}
#[test]
fn gpt_generation_ignores_date_suffix_as_minor() {
assert_eq!(gpt_generation("gpt-5-20260115"), Some((5, 0)));
assert!(!gpt_model_supports_tool_search("gpt-5-20260115"));
}
#[test]
fn native_tool_search_variants_lists_hosted_first() {
let provider = OpenAiCompatibleProvider::new("openai".to_string());
let variants = provider.native_tool_search_variants("gpt-5.4-preview");
assert_eq!(variants, vec!["hosted".to_string(), "client".to_string()]);
}
#[test]
fn native_tool_search_variants_empty_for_old_model() {
let provider = OpenAiCompatibleProvider::new("openai".to_string());
assert!(provider.native_tool_search_variants("gpt-4o").is_empty());
}
#[test]
fn classifies_openai_context_length_as_terminal_context_overflow() {
let info = OpenAiCompatibleProvider::classify_http_error(
"openai",
reqwest::StatusCode::BAD_REQUEST,
None,
r#"{"error":{"code":"context_length_exceeded","message":"maximum context length"}}"#,
);
assert_eq!(info.kind, LlmErrorKind::Terminal);
assert_eq!(info.reason, LlmErrorReason::ContextOverflow);
}
#[test]
fn classifies_openai_rate_limit_as_transient_rate_limit() {
let info = OpenAiCompatibleProvider::classify_http_error(
"openai",
reqwest::StatusCode::TOO_MANY_REQUESTS,
Some("5"),
r#"{"error":{"type":"rate_limit_error","message":"slow down"}}"#,
);
assert_eq!(info.kind, LlmErrorKind::Transient);
assert_eq!(info.reason, LlmErrorReason::RateLimit);
assert!(info.message.contains("retry-after: 5"));
}
#[test]
fn supports_defer_loading_matches_tool_search_gate() {
let provider = OpenAiCompatibleProvider::new("openai".to_string());
assert!(provider.supports_defer_loading("gpt-5.4"));
assert!(!provider.supports_defer_loading("gpt-4o"));
}
#[test]
fn openrouter_thinking_enabled_maps_to_reasoning_enabled() {
let provider = OpenAiCompatibleProvider::new("openrouter".to_string());
let mut payload = base_request_payload();
payload.thinking = ThinkingConfig::Enabled {
budget_tokens: None,
};
let mut body = OpenAiCompatibleProvider::build_request_body(&payload, false);
provider.transform_request(&mut body);
assert_eq!(body["reasoning"]["enabled"], true);
assert!(body.get("chat_template_kwargs").is_none());
}
#[test]
fn openrouter_thinking_budget_maps_to_reasoning_max_tokens() {
let provider = OpenAiCompatibleProvider::new("openrouter".to_string());
let mut payload = base_request_payload();
payload.thinking = ThinkingConfig::Enabled {
budget_tokens: Some(2048),
};
let mut body = OpenAiCompatibleProvider::build_request_body(&payload, false);
provider.transform_request(&mut body);
assert_eq!(body["reasoning"]["max_tokens"], 2048);
assert!(body.get("chat_template_kwargs").is_none());
}
#[test]
fn qwen36_emits_preserve_thinking_in_chat_template_kwargs() {
let mut payload = base_request_payload();
payload.provider = "local".to_string();
payload.model = "Qwen/Qwen3.6-35B-A3B".to_string();
payload.thinking = ThinkingConfig::Enabled {
budget_tokens: None,
};
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert_eq!(
body["chat_template_kwargs"]["preserve_thinking"], true,
"Qwen3.6 should request preserve_thinking so <think> blocks survive across agentic turns"
);
assert_eq!(body["chat_template_kwargs"]["enable_thinking"], true);
}
#[test]
fn ollama_qwen35_does_not_emit_chat_template_kwargs() {
let mut payload = base_request_payload();
payload.provider = "ollama".to_string();
payload.model = "qwen3.5:35b-a3b-coding-nvfp4".to_string();
payload.thinking = ThinkingConfig::Enabled {
budget_tokens: None,
};
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert!(
body.get("chat_template_kwargs").is_none(),
"Ollama silently drops chat_template_kwargs today; gate them so strict validation would not break requests"
);
}
#[test]
fn qwen35_local_disables_thinking_when_absent() {
let mut payload = base_request_payload();
payload.provider = "local".to_string();
payload.model = "Qwen/Qwen3.5-Coder-32B".to_string();
payload.thinking = ThinkingConfig::Disabled;
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert_eq!(body["chat_template_kwargs"]["enable_thinking"], false);
}
#[test]
fn openai_effort_maps_to_reasoning_effort() {
let mut payload = base_request_payload();
payload.provider = "openai".to_string();
payload.model = "o3".to_string();
payload.thinking = ThinkingConfig::Effort {
level: ReasoningEffort::High,
};
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert_eq!(body["reasoning_effort"], "high");
assert_eq!(body["max_completion_tokens"], 64);
assert!(body.get("max_tokens").is_none());
assert!(body.get("reasoning").is_none());
}
#[test]
fn openai_none_effort_maps_to_reasoning_effort_none() {
let mut payload = base_request_payload();
payload.provider = "openai".to_string();
payload.model = "gpt-5.5".to_string();
payload.thinking = ThinkingConfig::Effort {
level: ReasoningEffort::None,
};
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert_eq!(body["reasoning_effort"], "none");
}
#[test]
fn together_hybrid_reasoning_uses_reasoning_enabled() {
let mut payload = base_request_payload();
payload.provider = "together".to_string();
payload.model = "moonshotai/Kimi-K2.5".to_string();
payload.thinking = ThinkingConfig::Enabled {
budget_tokens: None,
};
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert_eq!(body["reasoning"]["enabled"], true);
assert!(body.get("chat_template_kwargs").is_none());
assert!(body.get("reasoning_effort").is_none());
}
#[test]
fn together_gpt_oss_effort_uses_reasoning_effort() {
let mut payload = base_request_payload();
payload.provider = "together".to_string();
payload.model = "openai/gpt-oss-120b".to_string();
payload.thinking = ThinkingConfig::Effort {
level: ReasoningEffort::Medium,
};
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert_eq!(body["reasoning_effort"], "medium");
assert!(body.get("reasoning").is_none());
}
#[test]
fn openai_non_reasoning_model_uses_legacy_max_tokens() {
let mut payload = base_request_payload();
payload.provider = "openai".to_string();
payload.model = "gpt-4o".to_string();
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert_eq!(body["max_tokens"], 64);
assert!(body.get("max_completion_tokens").is_none());
assert!(body.get("reasoning_effort").is_none());
}
#[test]
fn openrouter_effort_maps_to_nested_reasoning_effort() {
let provider = OpenAiCompatibleProvider::new("openrouter".to_string());
let mut payload = base_request_payload();
payload.thinking = ThinkingConfig::Effort {
level: ReasoningEffort::Medium,
};
let mut body = OpenAiCompatibleProvider::build_request_body(&payload, false);
provider.transform_request(&mut body);
assert_eq!(body["reasoning"]["effort"], "medium");
assert!(body.get("reasoning_effort").is_none());
}
#[test]
fn image_content_maps_to_openai_image_url_block() {
let mut payload = base_request_payload();
payload.provider = "openai".to_string();
payload.model = "gpt-4o".to_string();
payload.messages = vec![json!({
"role": "user",
"content": [
{"type": "text", "text": "caption"},
{"type": "image", "base64": "iVBORw0KGgo=", "media_type": "image/png", "detail": "low"}
],
})];
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert_eq!(body["messages"][0]["content"][0]["text"], "caption");
assert_eq!(
body["messages"][0]["content"][1],
json!({
"type": "image_url",
"image_url": {
"url": "data:image/png;base64,iVBORw0KGgo=",
"detail": "low",
}
})
);
}
#[test]
fn image_url_content_maps_to_openai_image_url_block() {
let mut payload = base_request_payload();
payload.provider = "openai".to_string();
payload.model = "gpt-4o".to_string();
payload.messages = vec![json!({
"role": "user",
"content": [
{"type": "image", "url": "https://example.com/image.png", "media_type": "image/png", "detail": "high"}
],
})];
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert_eq!(
body["messages"][0]["content"][0],
json!({
"type": "image_url",
"image_url": {
"url": "https://example.com/image.png",
"detail": "high",
}
})
);
}
#[test]
fn output_format_json_schema_maps_to_openai_response_format() {
let mut payload = base_request_payload();
payload.output_format = crate::llm::api::OutputFormat::JsonSchema {
schema: serde_json::json!({
"type": "object",
"properties": {"answer": {"type": "string"}},
"required": ["answer"],
}),
strict: false,
};
let body = OpenAiCompatibleProvider::build_request_body(&payload, false);
assert_eq!(body["response_format"]["type"], "json_schema");
assert_eq!(
body["response_format"]["json_schema"]["schema"]["properties"]["answer"]["type"],
"string"
);
assert_eq!(
body["response_format"]["json_schema"]["strict"],
serde_json::json!(false)
);
}
fn base_request_payload() -> LlmRequestPayload {
LlmRequestPayload {
provider: "openrouter".to_string(),
model: "google/gemini-2.5-pro".to_string(),
api_key: String::new(),
fallback_chain: Vec::new(),
route_fallbacks: Vec::new(),
session_id: None,
messages: vec![json!({"role": "user", "content": "hello"})],
system: None,
max_tokens: 64,
temperature: Some(0.0),
top_p: None,
top_k: None,
logprobs: false,
top_logprobs: None,
stop: None,
seed: None,
frequency_penalty: None,
presence_penalty: None,
output_format: crate::llm::api::OutputFormat::Text,
response_format: None,
json_schema: None,
thinking: ThinkingConfig::Disabled,
anthropic_beta_features: Vec::new(),
vision: false,
native_tools: None,
tool_choice: None,
cache: false,
timeout: None,
stream: false,
provider_overrides: None,
prefill: None,
}
}
}