use crate::value::VmValue;
pub(crate) type DeltaSender = tokio::sync::mpsc::UnboundedSender<String>;
#[derive(Clone, Debug, serde::Serialize)]
pub(crate) enum ThinkingConfig {
Enabled,
WithBudget(i64),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum ToolSearchVariant {
Bm25,
Regex,
}
impl ToolSearchVariant {
pub(crate) fn as_short(self) -> &'static str {
match self {
ToolSearchVariant::Bm25 => "bm25",
ToolSearchVariant::Regex => "regex",
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum ToolSearchStrategy {
Bm25,
Regex,
Semantic,
Host,
}
impl ToolSearchStrategy {
pub(crate) fn as_short(self) -> &'static str {
match self {
ToolSearchStrategy::Bm25 => "bm25",
ToolSearchStrategy::Regex => "regex",
ToolSearchStrategy::Semantic => "semantic",
ToolSearchStrategy::Host => "host",
}
}
#[allow(dead_code)] pub(crate) fn is_in_tree(self) -> bool {
matches!(self, ToolSearchStrategy::Bm25 | ToolSearchStrategy::Regex)
}
pub(crate) fn as_in_tree(self) -> crate::llm::tool_search::InTreeStrategy {
match self {
ToolSearchStrategy::Bm25 => crate::llm::tool_search::InTreeStrategy::Bm25,
ToolSearchStrategy::Regex => crate::llm::tool_search::InTreeStrategy::Regex,
_ => unreachable!("as_in_tree called on {self:?}"),
}
}
pub(crate) fn default_for_variant(variant: ToolSearchVariant) -> Self {
match variant {
ToolSearchVariant::Bm25 => ToolSearchStrategy::Bm25,
ToolSearchVariant::Regex => ToolSearchStrategy::Regex,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum ToolSearchMode {
Auto,
Native,
Client,
}
#[derive(Clone, Debug)]
pub(crate) struct ToolSearchConfig {
pub variant: ToolSearchVariant,
pub mode: ToolSearchMode,
pub always_loaded: Vec<String>,
pub strategy: Option<ToolSearchStrategy>,
pub budget_tokens: Option<i64>,
pub name: Option<String>,
pub include_stub_listing: bool,
pub deferred_bodies: std::collections::BTreeMap<String, serde_json::Value>,
}
impl ToolSearchConfig {
pub(crate) fn default_bm25_auto() -> Self {
Self {
variant: ToolSearchVariant::Bm25,
mode: ToolSearchMode::Auto,
always_loaded: Vec::new(),
strategy: None,
budget_tokens: None,
name: None,
include_stub_listing: false,
deferred_bodies: std::collections::BTreeMap::new(),
}
}
pub(crate) fn effective_strategy(&self) -> ToolSearchStrategy {
self.strategy
.unwrap_or_else(|| ToolSearchStrategy::default_for_variant(self.variant))
}
pub(crate) fn effective_name(&self) -> &str {
self.name.as_deref().unwrap_or("__harn_tool_search")
}
}
#[derive(Clone, Debug, PartialEq)]
pub(crate) enum LlmRoutePolicy {
Manual,
Always(String),
CheapestOverQuality(String),
FastestOverQuality(String),
}
impl LlmRoutePolicy {
pub(crate) fn as_label(&self) -> String {
match self {
Self::Manual => "manual".to_string(),
Self::Always(target) => format!("always({target})"),
Self::CheapestOverQuality(target) => format!("cheapest_over_quality({target})"),
Self::FastestOverQuality(target) => format!("fastest_over_quality({target})"),
}
}
}
#[derive(Clone, Debug, serde::Serialize, PartialEq)]
pub(crate) struct LlmRouteAlternative {
pub provider: String,
pub model: String,
pub quality_tier: String,
pub available: bool,
pub selected: bool,
pub cost_per_1k_in: Option<f64>,
pub cost_per_1k_out: Option<f64>,
pub latency_p50_ms: Option<u64>,
pub reason: String,
}
#[derive(Clone, Debug, serde::Serialize, PartialEq)]
pub(crate) struct LlmRoutingDecision {
pub policy: String,
pub requested_quality: Option<String>,
pub selected_provider: String,
pub selected_model: String,
pub alternatives: Vec<LlmRouteAlternative>,
}
#[derive(Clone)]
pub(crate) struct LlmCallOptions {
pub provider: String,
pub model: String,
pub api_key: String,
pub route_policy: LlmRoutePolicy,
pub fallback_chain: Vec<String>,
pub routing_decision: Option<LlmRoutingDecision>,
pub messages: Vec<serde_json::Value>,
pub system: Option<String>,
pub transcript_summary: Option<String>,
pub max_tokens: i64,
pub temperature: Option<f64>,
pub top_p: Option<f64>,
pub top_k: Option<i64>,
pub stop: Option<Vec<String>>,
pub seed: Option<i64>,
pub frequency_penalty: Option<f64>,
pub presence_penalty: Option<f64>,
pub response_format: Option<String>,
pub json_schema: Option<serde_json::Value>,
pub output_schema: Option<serde_json::Value>,
pub output_validation: Option<String>,
pub thinking: Option<ThinkingConfig>,
pub tools: Option<VmValue>,
pub native_tools: Option<Vec<serde_json::Value>>,
pub tool_choice: Option<serde_json::Value>,
#[allow(dead_code)] pub tool_search: Option<ToolSearchConfig>,
pub cache: bool,
pub timeout: Option<u64>,
pub idle_timeout: Option<u64>,
pub stream: bool,
pub provider_overrides: Option<serde_json::Value>,
pub prefill: Option<String>,
pub structural_experiment:
Option<crate::llm::structural_experiments::StructuralExperimentConfig>,
pub applied_structural_experiment:
Option<crate::llm::structural_experiments::AppliedStructuralExperiment>,
}
fn resolve_timeout(explicit: Option<u64>) -> u64 {
explicit.unwrap_or_else(|| {
std::env::var("HARN_LLM_TIMEOUT")
.ok()
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(120)
})
}
impl LlmCallOptions {
pub(crate) fn resolve_timeout(&self) -> u64 {
resolve_timeout(self.timeout)
}
}
#[derive(Clone, Debug, serde::Serialize)]
pub(crate) struct LlmRequestPayload {
pub provider: String,
pub model: String,
pub api_key: String,
pub fallback_chain: Vec<String>,
pub messages: Vec<serde_json::Value>,
pub system: Option<String>,
pub max_tokens: i64,
pub temperature: Option<f64>,
pub top_p: Option<f64>,
pub top_k: Option<i64>,
pub stop: Option<Vec<String>>,
pub seed: Option<i64>,
pub frequency_penalty: Option<f64>,
pub presence_penalty: Option<f64>,
pub response_format: Option<String>,
pub json_schema: Option<serde_json::Value>,
pub thinking: Option<ThinkingConfig>,
pub native_tools: Option<Vec<serde_json::Value>>,
pub tool_choice: Option<serde_json::Value>,
pub cache: bool,
pub timeout: Option<u64>,
pub stream: bool,
pub provider_overrides: Option<serde_json::Value>,
pub prefill: Option<String>,
}
impl LlmRequestPayload {
pub(crate) fn resolve_timeout(&self) -> u64 {
resolve_timeout(self.timeout)
}
}
impl From<&LlmCallOptions> for LlmRequestPayload {
fn from(opts: &LlmCallOptions) -> Self {
Self {
provider: opts.provider.clone(),
model: opts.model.clone(),
api_key: opts.api_key.clone(),
fallback_chain: opts.fallback_chain.clone(),
messages: opts.messages.clone(),
system: opts.system.clone(),
max_tokens: opts.max_tokens,
temperature: opts.temperature,
top_p: opts.top_p,
top_k: opts.top_k,
stop: opts.stop.clone(),
seed: opts.seed,
frequency_penalty: opts.frequency_penalty,
presence_penalty: opts.presence_penalty,
response_format: opts.response_format.clone(),
json_schema: opts.json_schema.clone(),
thinking: opts.thinking.clone(),
native_tools: opts.native_tools.clone(),
tool_choice: opts.tool_choice.clone(),
cache: opts.cache,
timeout: opts.timeout,
stream: opts.stream,
provider_overrides: opts.provider_overrides.clone(),
prefill: opts.prefill.clone(),
}
}
}
#[cfg(test)]
pub(super) fn base_opts(provider: &str) -> LlmCallOptions {
use std::rc::Rc;
LlmCallOptions {
provider: provider.to_string(),
model: "test-model".to_string(),
api_key: String::new(),
route_policy: LlmRoutePolicy::Manual,
fallback_chain: Vec::new(),
routing_decision: None,
messages: vec![serde_json::json!({"role": "user", "content": "hello"})],
system: None,
transcript_summary: Some("summary".to_string()),
max_tokens: 64,
temperature: Some(0.2),
top_p: Some(0.8),
top_k: Some(40),
stop: Some(vec!["STOP".to_string()]),
seed: Some(7),
frequency_penalty: Some(0.1),
presence_penalty: Some(0.2),
response_format: Some("json".to_string()),
json_schema: Some(serde_json::json!({"type": "object"})),
output_schema: Some(serde_json::json!({"type": "object"})),
output_validation: Some("error".to_string()),
thinking: None,
tools: Some(VmValue::String(Rc::from("vm-local-tools"))),
native_tools: Some(vec![
serde_json::json!({"type": "function", "function": {"name": "tool"}}),
]),
tool_choice: Some(serde_json::json!({
"type": "function",
"function": {"name": "tool"}
})),
tool_search: None,
cache: true,
stream: true,
timeout: Some(5),
idle_timeout: None,
provider_overrides: Some(serde_json::json!({"custom_flag": true})),
prefill: None,
structural_experiment: None,
applied_structural_experiment: None,
}
}
#[cfg(test)]
mod tests {
use super::{base_opts, LlmRequestPayload};
fn assert_send<T: Send>() {}
#[test]
fn request_payload_is_send_safe_and_drops_vm_local_fields() {
let payload = LlmRequestPayload::from(&base_opts("openai"));
assert_send::<LlmRequestPayload>();
assert_eq!(payload.provider, "openai");
assert_eq!(payload.model, "test-model");
assert!(payload.native_tools.is_some());
assert!(payload.tool_choice.is_some());
assert_eq!(
payload.provider_overrides,
Some(serde_json::json!({"custom_flag": true}))
);
}
}