use crate::value::VmValue;
pub(crate) type DeltaSender = tokio::sync::mpsc::UnboundedSender<String>;
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize)]
#[serde(rename_all = "lowercase")]
pub(crate) enum ReasoningEffort {
None,
Minimal,
Low,
Medium,
High,
XHigh,
}
impl ReasoningEffort {
pub(crate) fn as_str(self) -> &'static str {
match self {
ReasoningEffort::None => "none",
ReasoningEffort::Minimal => "minimal",
ReasoningEffort::Low => "low",
ReasoningEffort::Medium => "medium",
ReasoningEffort::High => "high",
ReasoningEffort::XHigh => "xhigh",
}
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize)]
#[serde(tag = "mode", rename_all = "snake_case")]
pub(crate) enum ThinkingConfig {
#[default]
Disabled,
Enabled {
budget_tokens: Option<u32>,
},
Adaptive,
Effort {
level: ReasoningEffort,
},
}
impl ThinkingConfig {
pub(crate) fn is_disabled(&self) -> bool {
matches!(
self,
Self::Disabled
| Self::Effort {
level: ReasoningEffort::None
}
)
}
pub(crate) fn is_enabled(&self) -> bool {
!self.is_disabled()
}
}
#[derive(Clone, Debug, Default, PartialEq, serde::Serialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub(crate) enum OutputFormat {
#[default]
Text,
JsonObject,
JsonSchema {
schema: serde_json::Value,
strict: bool,
},
}
impl OutputFormat {
pub(crate) fn is_structured(&self) -> bool {
!matches!(self, Self::Text)
}
pub(crate) fn schema(&self) -> Option<&serde_json::Value> {
match self {
Self::JsonSchema { schema, .. } => Some(schema),
_ => None,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum ToolSearchVariant {
Bm25,
Regex,
Hybrid,
}
impl ToolSearchVariant {
pub(crate) fn as_short(self) -> &'static str {
match self {
ToolSearchVariant::Bm25 => "bm25",
ToolSearchVariant::Regex => "regex",
ToolSearchVariant::Hybrid => "hybrid",
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum ToolSearchMode {
Auto,
Native,
Client,
}
#[derive(Clone, Debug)]
pub(crate) struct ToolSearchConfig {
pub variant: ToolSearchVariant,
pub mode: ToolSearchMode,
}
impl ToolSearchConfig {
pub(crate) fn default_bm25_auto() -> Self {
Self {
variant: ToolSearchVariant::Bm25,
mode: ToolSearchMode::Auto,
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub(crate) enum LlmRoutePolicy {
Manual,
Always(String),
CheapestOverQuality(String),
FastestOverQuality(String),
PreferenceList {
targets: Vec<String>,
strategy: String,
},
}
impl LlmRoutePolicy {
pub(crate) fn as_label(&self) -> String {
match self {
Self::Manual => "manual".to_string(),
Self::Always(target) => format!("always({target})"),
Self::CheapestOverQuality(target) => format!("cheapest_over_quality({target})"),
Self::FastestOverQuality(target) => format!("fastest_over_quality({target})"),
Self::PreferenceList { targets, strategy } => {
format!("preference_list({strategy}: {})", targets.join(","))
}
}
}
}
#[derive(Clone, Debug, serde::Serialize, PartialEq)]
pub(crate) struct LlmRouteAlternative {
pub provider: String,
pub model: String,
pub quality_tier: String,
pub available: bool,
pub selected: bool,
pub cost_per_1k_in: Option<f64>,
pub cost_per_1k_out: Option<f64>,
pub latency_p50_ms: Option<u64>,
pub reason: String,
}
#[derive(Clone, Debug, serde::Serialize, PartialEq)]
pub(crate) struct LlmRoutingDecision {
pub policy: String,
pub requested_quality: Option<String>,
pub selected_provider: String,
pub selected_model: String,
pub alternatives: Vec<LlmRouteAlternative>,
}
#[derive(Clone, Debug, serde::Serialize, PartialEq)]
pub(crate) struct LlmRouteFallback {
pub provider: String,
pub model: String,
}
#[derive(Clone)]
pub(crate) struct LlmCallOptions {
pub provider: String,
pub model: String,
pub api_key: String,
pub route_policy: LlmRoutePolicy,
pub fallback_chain: Vec<String>,
pub route_fallbacks: Vec<LlmRouteFallback>,
pub routing_decision: Option<LlmRoutingDecision>,
pub session_id: Option<String>,
pub messages: Vec<serde_json::Value>,
pub system: Option<String>,
pub transcript_summary: Option<String>,
pub max_tokens: i64,
pub temperature: Option<f64>,
pub top_p: Option<f64>,
pub top_k: Option<i64>,
pub logprobs: bool,
pub top_logprobs: Option<i64>,
pub stop: Option<Vec<String>>,
pub seed: Option<i64>,
pub frequency_penalty: Option<f64>,
pub presence_penalty: Option<f64>,
pub output_format: OutputFormat,
pub response_format: Option<String>,
pub json_schema: Option<serde_json::Value>,
pub output_schema: Option<serde_json::Value>,
pub output_validation: Option<String>,
pub thinking: ThinkingConfig,
pub anthropic_beta_features: Vec<String>,
pub vision: bool,
pub tools: Option<VmValue>,
pub native_tools: Option<Vec<serde_json::Value>>,
pub tool_choice: Option<serde_json::Value>,
#[allow(dead_code)] pub tool_search: Option<ToolSearchConfig>,
pub cache: bool,
pub timeout: Option<u64>,
pub idle_timeout: Option<u64>,
pub stream: bool,
pub provider_overrides: Option<serde_json::Value>,
pub budget: Option<crate::llm::cost::LlmBudgetEnvelope>,
pub prefill: Option<String>,
pub structural_experiment:
Option<crate::llm::structural_experiments::StructuralExperimentConfig>,
pub applied_structural_experiment:
Option<crate::llm::structural_experiments::AppliedStructuralExperiment>,
}
fn resolve_timeout(explicit: Option<u64>) -> u64 {
explicit.unwrap_or_else(|| {
std::env::var("HARN_LLM_TIMEOUT")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(120)
})
}
impl LlmCallOptions {
pub(crate) fn resolve_timeout(&self) -> u64 {
resolve_timeout(self.timeout)
}
pub(crate) fn anthropic_beta_features_for_request(&self) -> Vec<String> {
let caps = crate::llm::capabilities::lookup(&self.provider, &self.model);
let mut features = caps.anthropic_beta_features;
for feature in &self.anthropic_beta_features {
push_unique_anthropic_beta_feature(&mut features, feature);
}
if matches!(
self.thinking,
ThinkingConfig::Enabled { .. } | ThinkingConfig::Adaptive
) && caps.interleaved_thinking_supported
{
push_unique_anthropic_beta_feature(
&mut features,
crate::llm::providers::anthropic::ANTHROPIC_INTERLEAVED_THINKING_BETA,
);
}
features
}
}
pub(crate) fn push_unique_anthropic_beta_feature(features: &mut Vec<String>, feature: &str) {
if !features.iter().any(|existing| existing == feature) {
features.push(feature.to_string());
}
}
#[derive(Clone, Debug, serde::Serialize)]
pub(crate) struct LlmRequestPayload {
pub provider: String,
pub model: String,
pub api_key: String,
pub fallback_chain: Vec<String>,
pub route_fallbacks: Vec<LlmRouteFallback>,
pub messages: Vec<serde_json::Value>,
pub system: Option<String>,
pub max_tokens: i64,
pub temperature: Option<f64>,
pub top_p: Option<f64>,
pub top_k: Option<i64>,
pub logprobs: bool,
pub top_logprobs: Option<i64>,
pub stop: Option<Vec<String>>,
pub seed: Option<i64>,
pub frequency_penalty: Option<f64>,
pub presence_penalty: Option<f64>,
pub output_format: OutputFormat,
pub response_format: Option<String>,
pub json_schema: Option<serde_json::Value>,
pub thinking: ThinkingConfig,
pub anthropic_beta_features: Vec<String>,
pub vision: bool,
pub native_tools: Option<Vec<serde_json::Value>>,
pub tool_choice: Option<serde_json::Value>,
pub cache: bool,
pub timeout: Option<u64>,
pub stream: bool,
pub provider_overrides: Option<serde_json::Value>,
pub prefill: Option<String>,
pub session_id: Option<String>,
}
impl LlmRequestPayload {
pub(crate) fn resolve_timeout(&self) -> u64 {
resolve_timeout(self.timeout)
}
}
impl From<&LlmCallOptions> for LlmRequestPayload {
fn from(opts: &LlmCallOptions) -> Self {
let mut payload = Self {
provider: opts.provider.clone(),
model: opts.model.clone(),
api_key: opts.api_key.clone(),
fallback_chain: opts.fallback_chain.clone(),
route_fallbacks: opts.route_fallbacks.clone(),
messages: opts.messages.clone(),
system: opts.system.clone(),
max_tokens: opts.max_tokens,
temperature: opts.temperature,
top_p: opts.top_p,
top_k: opts.top_k,
logprobs: opts.logprobs,
top_logprobs: opts.top_logprobs,
stop: opts.stop.clone(),
seed: opts.seed,
frequency_penalty: opts.frequency_penalty,
presence_penalty: opts.presence_penalty,
output_format: opts.output_format.clone(),
response_format: opts.response_format.clone(),
json_schema: opts.json_schema.clone(),
thinking: opts.thinking.clone(),
anthropic_beta_features: opts.anthropic_beta_features_for_request(),
vision: opts.vision,
native_tools: opts.native_tools.clone(),
tool_choice: opts.tool_choice.clone(),
cache: opts.cache,
timeout: opts.timeout,
stream: opts.stream,
provider_overrides: opts.provider_overrides.clone(),
prefill: opts.prefill.clone(),
session_id: opts.session_id.clone(),
};
apply_thinking_disable_directive(&mut payload);
payload
}
}
fn apply_thinking_disable_directive(payload: &mut LlmRequestPayload) {
if !payload.thinking.is_disabled() {
return;
}
let caps = crate::llm::capabilities::lookup(&payload.provider, &payload.model);
let Some(directive) = caps.thinking_disable_directive.as_deref() else {
return;
};
let directive = directive.trim();
if directive.is_empty() {
return;
}
let already_present = payload
.system
.as_deref()
.map(|sys| sys.trim_start().starts_with(directive))
.unwrap_or(false);
if already_present {
return;
}
let new_system = match payload.system.as_deref().filter(|s| !s.is_empty()) {
Some(existing) => format!("{directive}\n{existing}"),
None => directive.to_string(),
};
payload.system = Some(new_system);
}
#[cfg(test)]
pub(super) fn base_opts(provider: &str) -> LlmCallOptions {
use std::rc::Rc;
LlmCallOptions {
provider: provider.to_string(),
model: "test-model".to_string(),
api_key: String::new(),
route_policy: LlmRoutePolicy::Manual,
fallback_chain: Vec::new(),
route_fallbacks: Vec::new(),
routing_decision: None,
session_id: None,
messages: vec![serde_json::json!({"role": "user", "content": "hello"})],
system: None,
transcript_summary: Some("summary".to_string()),
max_tokens: 64,
temperature: Some(0.2),
top_p: Some(0.8),
top_k: Some(40),
logprobs: false,
top_logprobs: None,
stop: Some(vec!["STOP".to_string()]),
seed: Some(7),
frequency_penalty: Some(0.1),
presence_penalty: Some(0.2),
output_format: OutputFormat::JsonSchema {
schema: serde_json::json!({"type": "object"}),
strict: true,
},
response_format: Some("json".to_string()),
json_schema: Some(serde_json::json!({"type": "object"})),
output_schema: Some(serde_json::json!({"type": "object"})),
output_validation: Some("error".to_string()),
thinking: ThinkingConfig::Disabled,
anthropic_beta_features: Vec::new(),
vision: false,
tools: Some(VmValue::String(Rc::from("vm-local-tools"))),
native_tools: Some(vec![
serde_json::json!({"type": "function", "function": {"name": "tool"}}),
]),
tool_choice: Some(serde_json::json!({
"type": "function",
"function": {"name": "tool"}
})),
tool_search: None,
cache: true,
stream: true,
timeout: Some(5),
idle_timeout: None,
provider_overrides: Some(serde_json::json!({"custom_flag": true})),
budget: None,
prefill: None,
structural_experiment: None,
applied_structural_experiment: None,
}
}
#[cfg(test)]
mod tests {
use super::{base_opts, LlmRequestPayload, ThinkingConfig};
fn assert_send<T: Send>() {}
#[test]
fn request_payload_is_send_safe_and_drops_vm_local_fields() {
let payload = LlmRequestPayload::from(&base_opts("openai"));
assert_send::<LlmRequestPayload>();
assert_eq!(payload.provider, "openai");
assert_eq!(payload.model, "test-model");
assert!(payload.native_tools.is_some());
assert!(payload.tool_choice.is_some());
assert_eq!(
payload.provider_overrides,
Some(serde_json::json!({"custom_flag": true}))
);
}
#[test]
fn thinking_disable_directive_prepended_for_qwen3_when_disabled() {
let mut opts = base_opts("ollama");
opts.model = "qwen3.5:30b".to_string();
opts.system = Some("you are an agent.".to_string());
opts.thinking = ThinkingConfig::Disabled;
let payload = LlmRequestPayload::from(&opts);
assert_eq!(
payload.system.as_deref(),
Some("/no_think\nyou are an agent."),
"Qwen3-on-Ollama with thinking: false should auto-prepend /no_think to system",
);
}
#[test]
fn thinking_disable_directive_skipped_when_thinking_enabled() {
let mut opts = base_opts("ollama");
opts.model = "qwen3.5:30b".to_string();
opts.system = Some("you are an agent.".to_string());
opts.thinking = ThinkingConfig::Enabled {
budget_tokens: None,
};
let payload = LlmRequestPayload::from(&opts);
assert_eq!(payload.system.as_deref(), Some("you are an agent."));
}
#[test]
fn thinking_disable_directive_idempotent_when_already_present() {
let mut opts = base_opts("ollama");
opts.model = "qwen3.5:30b".to_string();
opts.system = Some("/no_think\nyou are an agent.".to_string());
opts.thinking = ThinkingConfig::Disabled;
let payload = LlmRequestPayload::from(&opts);
assert_eq!(
payload.system.as_deref(),
Some("/no_think\nyou are an agent."),
"Should not double-prepend /no_think when already at the head of system",
);
}
#[test]
fn thinking_disable_directive_creates_system_when_none() {
let mut opts = base_opts("ollama");
opts.model = "qwen3.5:30b".to_string();
opts.system = None;
opts.thinking = ThinkingConfig::Disabled;
let payload = LlmRequestPayload::from(&opts);
assert_eq!(payload.system.as_deref(), Some("/no_think"));
}
#[test]
fn thinking_disable_directive_noop_for_provider_without_capability() {
let mut opts = base_opts("anthropic");
opts.model = "claude-haiku-4-7".to_string();
opts.system = Some("you are an agent.".to_string());
opts.thinking = ThinkingConfig::Disabled;
let payload = LlmRequestPayload::from(&opts);
assert_eq!(
payload.system.as_deref(),
Some("you are an agent."),
"Anthropic has no thinking_disable_directive — system should be untouched",
);
}
}