heartbit_core/config/agent.rs
1#![allow(missing_docs)]
2use serde::{Deserialize, Serialize};
3
4use crate::agent::routing::RoutingMode;
5
6use super::guardrails::GuardrailsConfig;
7
8pub use crate::types::{DispatchMode, SpawnConfig};
9
10/// Context window management strategy.
11#[derive(Debug, Clone, Deserialize, PartialEq)]
12#[serde(tag = "type", rename_all = "snake_case")]
13pub enum ContextStrategyConfig {
14 /// No trimming (default).
15 Unlimited,
16 /// Sliding window: trim old messages to stay within `max_tokens`.
17 SlidingWindow { max_tokens: u32 },
18 /// Summarize: compress old messages when context exceeds `threshold` tokens.
19 Summarize { threshold: u32 },
20}
21
22/// Per-agent provider override. When set on an agent, overrides the
23/// orchestrator's default provider for that agent only.
24#[derive(Debug, Clone, Deserialize)]
25pub struct AgentProviderConfig {
26 pub name: String,
27 pub model: String,
28 /// Custom API endpoint URL (overrides the default for the provider).
29 /// Useful for self-hosted models, Azure, or proxies.
30 #[serde(default)]
31 pub base_url: Option<String>,
32 /// Direct API key (alternative to environment variable).
33 /// Prefer env vars in production; this is for testing/local dev.
34 #[serde(default)]
35 pub api_key: Option<String>,
36 /// Enable Anthropic prompt caching for this agent.
37 #[serde(default)]
38 pub prompt_caching: bool,
39 /// Per-agent model cascading override.
40 pub cascade: Option<super::provider::CascadeConfig>,
41}
42
43/// Orchestrator-level settings with sensible defaults.
44#[derive(Debug, Deserialize)]
45pub struct OrchestratorConfig {
46 #[serde(default = "default_max_turns")]
47 pub max_turns: usize,
48 #[serde(default = "default_max_tokens")]
49 pub max_tokens: u32,
50 /// Context window management strategy for the orchestrator's own conversation.
51 pub context_strategy: Option<ContextStrategyConfig>,
52 /// Token threshold for summarization of the orchestrator's own context.
53 pub summarize_threshold: Option<u32>,
54 /// Timeout in seconds for the orchestrator's own tool calls.
55 pub tool_timeout_seconds: Option<u64>,
56 /// Maximum byte size for tool output on the orchestrator's own tools.
57 pub max_tool_output_bytes: Option<usize>,
58 /// Wall-clock deadline in seconds for the entire orchestrator run.
59 pub run_timeout_seconds: Option<u64>,
60 /// Enable the `form_squad` tool for dynamic agent squad formation.
61 /// When `None` (default), auto-enabled when there are >= 2 agents.
62 /// Set to `false` to disable for a simpler prompt with fewer tokens.
63 pub enable_squads: Option<bool>,
64 /// Reasoning/thinking effort level. Enables extended thinking on models
65 /// that support it (e.g., Qwen3 via OpenRouter, Claude with extended thinking).
66 /// Valid values: "high", "medium", "low", "none".
67 pub reasoning_effort: Option<String>,
68 /// Enable reflection prompts after tool results. When true, the agent pauses
69 /// to assess tool outputs before deciding the next action (Reflexion/CRITIC pattern).
70 pub enable_reflection: Option<bool>,
71 /// Tool output compression threshold in bytes. Outputs exceeding this size
72 /// are compressed via an LLM call that preserves factual content.
73 pub tool_output_compression_threshold: Option<usize>,
74 /// Maximum number of tool definitions sent per LLM turn. When agents have
75 /// many tools, filtering to the most relevant reduces context usage and cost.
76 pub max_tools_per_turn: Option<usize>,
77 /// Tool profile for pre-filtering tool definitions. Valid values:
78 /// "conversational", "standard", "full". Defaults to no filtering.
79 pub tool_profile: Option<String>,
80 /// Maximum consecutive identical tool-call turns before doom loop detection
81 /// triggers. When reached, tool calls get error results instead of executing.
82 pub max_identical_tool_calls: Option<u32>,
83 /// Maximum consecutive fuzzy-identical tool-call turns before doom loop detection.
84 /// Fuzzy matching compares sorted tool names (ignoring inputs).
85 pub max_fuzzy_identical_tool_calls: Option<u32>,
86 /// Maximum number of tool calls allowed in a single LLM turn. When a turn
87 /// contains more tool calls than this limit, the excess calls are rejected
88 /// with an error result (per-turn cap, not cumulative).
89 pub max_tool_calls_per_turn: Option<u32>,
90 /// Dispatch mode for orchestrator delegation. When `Sequential`, the
91 /// delegate_task schema constrains `maxItems: 1` so the LLM dispatches
92 /// one agent at a time. Defaults to `Parallel` when absent.
93 pub dispatch_mode: Option<DispatchMode>,
94 /// Task routing strategy: `auto` (default), `always_orchestrate`, `single_agent`.
95 /// `auto` uses heuristic scoring + capability matching to route simple tasks
96 /// to a single agent and complex tasks to the orchestrator.
97 #[serde(default)]
98 pub routing: RoutingMode,
99 /// Escalate from single-agent to orchestrator on failure. Default: true.
100 /// When a single-agent run fails with MaxTurnsExceeded, doom loop, or
101 /// excessive compaction, the task is re-run through the orchestrator.
102 #[serde(default = "super::default_true")]
103 pub escalation: bool,
104 /// Append the multi-agent collaboration prompt to sub-agent system prompts.
105 /// Teaches sub-agents blackboard protocol, dedup, cross-verification, and
106 /// structured execution. Default: true.
107 #[serde(default)]
108 pub multi_agent_prompt: Option<bool>,
109 /// Dynamic agent spawning configuration. When present, enables the `spawn_agent`
110 /// tool on the orchestrator, allowing the LLM to create specialist agents at runtime.
111 pub spawn: Option<SpawnConfig>,
112 /// Per-tenant in-flight token cap for the `TenantTokenTracker`.
113 /// When `None`, in-flight token tracking is disabled (effectively unbounded).
114 /// Must be > 0 when set.
115 #[serde(default, skip_serializing_if = "Option::is_none")]
116 pub max_tokens_in_flight_per_tenant: Option<usize>,
117}
118
119pub(super) fn default_max_turns() -> usize {
120 10
121}
122
123pub(super) fn default_max_tokens() -> u32 {
124 4096
125}
126
127impl Default for OrchestratorConfig {
128 fn default() -> Self {
129 Self {
130 max_turns: default_max_turns(),
131 max_tokens: default_max_tokens(),
132 context_strategy: None,
133 summarize_threshold: None,
134 tool_timeout_seconds: None,
135 max_tool_output_bytes: None,
136 run_timeout_seconds: None,
137 enable_squads: None,
138 reasoning_effort: None,
139 enable_reflection: None,
140 tool_output_compression_threshold: None,
141 max_tools_per_turn: None,
142 tool_profile: None,
143 max_identical_tool_calls: None,
144 max_fuzzy_identical_tool_calls: None,
145 max_tool_calls_per_turn: None,
146 dispatch_mode: None,
147 routing: RoutingMode::default(),
148 escalation: true,
149 multi_agent_prompt: None,
150 spawn: None,
151 max_tokens_in_flight_per_tenant: None,
152 }
153 }
154}
155
156/// An MCP server entry: a bare URL string, a full HTTP config with auth, or a
157/// stdio command to spawn as a child process.
158///
159/// Supports backward-compatible TOML: bare strings (`"http://..."`) deserialize
160/// as `Simple`, inline tables with `url` (`{ url = "...", auth_header = "..." }`)
161/// as `Full`, and inline tables with `command` (`{ command = "npx", args = [...] }`)
162/// as `Stdio`.
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
164#[serde(untagged)]
165pub enum McpServerEntry {
166 /// Bare URL string (backward-compatible).
167 Simple(String),
168 /// Full HTTP entry with optional auth header.
169 Full {
170 url: String,
171 #[serde(default)]
172 auth_header: Option<String>,
173 /// RFC 8707 resource indicator — audience for exchanged tokens.
174 /// Defaults to the `url` value when absent.
175 #[serde(default)]
176 resource: Option<String>,
177 /// OAuth scopes required by this MCP server (e.g., `["gmail.readonly"]`).
178 #[serde(default)]
179 scopes: Option<Vec<String>>,
180 },
181 /// Stdio transport — spawn a child process communicating via stdin/stdout.
182 Stdio {
183 command: String,
184 #[serde(default)]
185 args: Vec<String>,
186 #[serde(default)]
187 env: std::collections::HashMap<String, String>,
188 },
189}
190
191impl McpServerEntry {
192 /// Get the server URL (empty string for stdio entries).
193 pub fn url(&self) -> &str {
194 match self {
195 McpServerEntry::Simple(url) => url,
196 McpServerEntry::Full { url, .. } => url,
197 McpServerEntry::Stdio { .. } => "",
198 }
199 }
200
201 /// Get the optional auth header value.
202 pub fn auth_header(&self) -> Option<&str> {
203 match self {
204 McpServerEntry::Simple(_) => None,
205 McpServerEntry::Full { auth_header, .. } => auth_header.as_deref(),
206 McpServerEntry::Stdio { .. } => None,
207 }
208 }
209
210 /// Whether this entry uses stdio transport.
211 pub fn is_stdio(&self) -> bool {
212 matches!(self, McpServerEntry::Stdio { .. })
213 }
214
215 /// Get the RFC 8707 resource indicator (audience for token exchange).
216 /// Returns the explicit `resource` if set, otherwise falls back to the URL.
217 pub fn resource(&self) -> Option<&str> {
218 match self {
219 McpServerEntry::Simple(url) => Some(url.as_str()),
220 McpServerEntry::Full { resource, url, .. } => {
221 Some(resource.as_deref().unwrap_or(url.as_str()))
222 }
223 McpServerEntry::Stdio { .. } => None,
224 }
225 }
226
227 /// Get the OAuth scopes configured for this MCP server.
228 pub fn scopes(&self) -> Option<&[String]> {
229 match self {
230 McpServerEntry::Full { scopes, .. } => scopes.as_deref(),
231 _ => None,
232 }
233 }
234
235 /// Human-readable description for logging.
236 pub fn display_name(&self) -> String {
237 match self {
238 McpServerEntry::Simple(url) => url.clone(),
239 McpServerEntry::Full { url, .. } => url.clone(),
240 McpServerEntry::Stdio { command, args, .. } => {
241 if args.is_empty() {
242 command.clone()
243 } else {
244 format!("{} {}", command, args.join(" "))
245 }
246 }
247 }
248 }
249}
250
251/// How MCP resources are surfaced to agents.
252#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
253#[serde(rename_all = "snake_case")]
254pub enum McpResourceMode {
255 /// Resources become callable tools (agent decides when to read).
256 #[default]
257 Tools,
258 /// Pre-fetch resource content and inject into system prompt.
259 Context,
260 /// Skip resource discovery entirely.
261 None,
262}
263
264/// A sub-agent defined in the configuration file.
265#[derive(Debug, Deserialize)]
266pub struct AgentConfig {
267 pub name: String,
268 pub description: String,
269 #[serde(default)]
270 pub system_prompt: String,
271 /// Agent template to use as a base. The template provides default values
272 /// for system_prompt, max_tokens, max_turns, and other settings.
273 /// User-specified values override template defaults.
274 #[serde(default)]
275 pub template: Option<String>,
276 /// Skills to auto-inject into the system prompt at config resolution time.
277 /// Each skill name maps to a bundled or filesystem SKILL.md file.
278 #[serde(default)]
279 pub skills: Vec<String>,
280 #[serde(default)]
281 pub mcp_servers: Vec<McpServerEntry>,
282 /// A2A agent endpoints to discover and register as tools.
283 #[serde(default)]
284 pub a2a_agents: Vec<McpServerEntry>,
285 /// Context window management strategy for this agent.
286 pub context_strategy: Option<ContextStrategyConfig>,
287 /// Token threshold at which to trigger automatic summarization.
288 /// Only valid when `context_strategy` is not `SlidingWindow`.
289 pub summarize_threshold: Option<u32>,
290 /// Timeout in seconds for individual tool executions.
291 pub tool_timeout_seconds: Option<u64>,
292 /// Maximum byte size for individual tool output. Results exceeding this
293 /// limit are truncated with a `[truncated]` suffix.
294 pub max_tool_output_bytes: Option<usize>,
295 /// Per-agent turn limit. Overrides the orchestrator default when set.
296 pub max_turns: Option<usize>,
297 /// Per-agent token limit. Overrides the orchestrator default when set.
298 pub max_tokens: Option<u32>,
299 /// Optional JSON Schema for structured output. Expressed as an inline
300 /// TOML table that maps to the JSON Schema object. When set, the agent
301 /// receives a synthetic `__respond__` tool and returns structured JSON.
302 pub response_schema: Option<serde_json::Value>,
303 /// Wall-clock deadline in seconds for this agent's run.
304 pub run_timeout_seconds: Option<u64>,
305 /// Optional per-agent LLM provider override. When set, this agent uses
306 /// a different model/provider instead of the orchestrator's default.
307 pub provider: Option<AgentProviderConfig>,
308 /// Reasoning/thinking effort level. Overrides the orchestrator default.
309 /// Valid values: "high", "medium", "low", "none".
310 pub reasoning_effort: Option<String>,
311 /// Enable reflection prompts after tool results. Overrides the orchestrator default.
312 pub enable_reflection: Option<bool>,
313 /// Tool output compression threshold in bytes. Overrides the orchestrator default.
314 pub tool_output_compression_threshold: Option<usize>,
315 /// Maximum tools per turn for this agent. Overrides the orchestrator default.
316 pub max_tools_per_turn: Option<usize>,
317 /// Tool profile for pre-filtering tool definitions. Valid values:
318 /// "conversational" (memory + question only), "standard" (builtins only),
319 /// "full" (all tools). When absent, no pre-filtering is applied.
320 pub tool_profile: Option<String>,
321 /// Maximum consecutive identical tool-call turns before doom loop detection.
322 /// Overrides the orchestrator default.
323 pub max_identical_tool_calls: Option<u32>,
324 /// Maximum consecutive fuzzy-identical tool-call turns before doom loop detection.
325 /// Fuzzy matching compares sorted tool names (ignoring inputs). Overrides orchestrator default.
326 pub max_fuzzy_identical_tool_calls: Option<u32>,
327 /// Maximum number of tool calls allowed in a single LLM turn. Overrides the orchestrator default.
328 pub max_tool_calls_per_turn: Option<u32>,
329 /// Session pruning: truncate old tool results to save tokens.
330 /// When set, enables session-level pruning before each LLM call.
331 pub session_prune: Option<SessionPruneConfigToml>,
332 /// Enable recursive (cluster-then-summarize) summarization for long conversations.
333 pub recursive_summarization: Option<bool>,
334 /// Cumulative importance threshold for memory reflection triggers.
335 /// When the sum of stored memory importance values exceeds this threshold,
336 /// the store tool appends a reflection hint to guide the agent.
337 pub reflection_threshold: Option<u32>,
338 /// When true, run memory consolidation at session end (clusters related
339 /// episodic memories into semantic summaries). Requires memory and adds
340 /// LLM calls at session end.
341 pub consolidate_on_exit: Option<bool>,
342 /// Hard limit on cumulative tokens (input + output) across all turns.
343 /// When exceeded, the agent returns an error with partial usage data.
344 pub max_total_tokens: Option<u64>,
345 /// Per-agent guardrails override. When set, overrides the top-level
346 /// `[guardrails]` section for this agent.
347 pub guardrails: Option<GuardrailsConfig>,
348 /// LRU response cache capacity (number of entries). When set, identical
349 /// LLM requests (same system prompt, messages, tool names) return cached
350 /// responses without calling the LLM. Only non-streaming calls are cached.
351 #[serde(default)]
352 pub response_cache_size: Option<usize>,
353 /// How MCP resources are surfaced to the agent.
354 /// `"tools"` (default) — resources become callable tools.
355 /// `"context"` — pre-fetch and inject into system prompt.
356 /// `"none"` — skip resource discovery.
357 #[serde(default)]
358 pub mcp_resources: McpResourceMode,
359 /// Enable dangerous tools (bash) for this agent. Default: false in daemon mode.
360 #[serde(default)]
361 pub dangerous_tools: bool,
362 /// Audit mode: "full" (default) or "metadata_only".
363 /// MetadataOnly strips user content from audit records.
364 #[serde(default)]
365 pub audit_mode: Option<String>,
366 /// Optional allowlist of builtin tool names for this agent.
367 /// When set, only listed builtins are included. When absent, all builtins load.
368 /// Empty list `[]` disables all builtins (MCP-only agent).
369 #[serde(default)]
370 pub builtin_tools: Option<Vec<String>>,
371}
372
373/// TOML representation of session pruning configuration.
374#[derive(Debug, Clone, Deserialize)]
375pub struct SessionPruneConfigToml {
376 /// Number of recent message pairs to keep at full fidelity. Default: 2.
377 #[serde(default = "default_keep_recent_n")]
378 pub keep_recent_n: usize,
379 /// Maximum bytes for a pruned tool result. Default: 200.
380 #[serde(default = "default_pruned_max_bytes")]
381 pub pruned_tool_result_max_bytes: usize,
382 /// Whether to preserve the first user message (task). Default: true.
383 #[serde(default = "default_preserve_task")]
384 pub preserve_task: bool,
385}
386
387fn default_keep_recent_n() -> usize {
388 2
389}
390
391fn default_pruned_max_bytes() -> usize {
392 200
393}
394
395fn default_preserve_task() -> bool {
396 true
397}
398
399impl AgentConfig {
400 /// Clone all fields of this config into a new `AgentConfig`.
401 ///
402 /// `AgentConfig` intentionally does not derive `Clone` (to keep the derive
403 /// list short and avoid accidental copies in hot paths). Use this method
404 /// when an explicit copy is needed (e.g., template resolution).
405 pub fn clone_config(&self) -> Self {
406 Self {
407 name: self.name.clone(),
408 description: self.description.clone(),
409 system_prompt: self.system_prompt.clone(),
410 template: self.template.clone(),
411 skills: self.skills.clone(),
412 mcp_servers: self.mcp_servers.clone(),
413 a2a_agents: self.a2a_agents.clone(),
414 context_strategy: self.context_strategy.clone(),
415 summarize_threshold: self.summarize_threshold,
416 tool_timeout_seconds: self.tool_timeout_seconds,
417 max_tool_output_bytes: self.max_tool_output_bytes,
418 max_turns: self.max_turns,
419 max_tokens: self.max_tokens,
420 response_schema: self.response_schema.clone(),
421 run_timeout_seconds: self.run_timeout_seconds,
422 provider: self.provider.clone(),
423 reasoning_effort: self.reasoning_effort.clone(),
424 enable_reflection: self.enable_reflection,
425 tool_output_compression_threshold: self.tool_output_compression_threshold,
426 max_tools_per_turn: self.max_tools_per_turn,
427 tool_profile: self.tool_profile.clone(),
428 max_identical_tool_calls: self.max_identical_tool_calls,
429 max_fuzzy_identical_tool_calls: self.max_fuzzy_identical_tool_calls,
430 max_tool_calls_per_turn: self.max_tool_calls_per_turn,
431 session_prune: self.session_prune.clone(),
432 recursive_summarization: self.recursive_summarization,
433 reflection_threshold: self.reflection_threshold,
434 consolidate_on_exit: self.consolidate_on_exit,
435 max_total_tokens: self.max_total_tokens,
436 guardrails: self.guardrails.clone(),
437 response_cache_size: self.response_cache_size,
438 mcp_resources: self.mcp_resources,
439 dangerous_tools: self.dangerous_tools,
440 audit_mode: self.audit_mode.clone(),
441 builtin_tools: self.builtin_tools.clone(),
442 }
443 }
444}
445
446impl AgentProviderConfig {
447 /// Clone via Option::as_ref → clone pattern for non-Clone containers.
448 pub fn take_ref(opt: &Option<Self>) -> Option<Self> {
449 opt.clone()
450 }
451}
452
453impl SessionPruneConfigToml {
454 /// Clone via Option::as_ref → clone pattern for non-Clone containers.
455 pub fn take_ref(opt: &Option<Self>) -> Option<Self> {
456 opt.clone()
457 }
458}