Skip to main content

heartbit_core/config/
agent.rs

1#![allow(missing_docs)]
2use serde::{Deserialize, Serialize};
3
4use crate::agent::routing::RoutingMode;
5
6use super::guardrails::GuardrailsConfig;
7
8pub use crate::types::{DispatchMode, SpawnConfig};
9
10/// Context window management strategy.
11#[derive(Debug, Clone, Deserialize, PartialEq)]
12#[serde(tag = "type", rename_all = "snake_case")]
13pub enum ContextStrategyConfig {
14    /// No trimming (default).
15    Unlimited,
16    /// Sliding window: trim old messages to stay within `max_tokens`.
17    SlidingWindow { max_tokens: u32 },
18    /// Summarize: compress old messages when context exceeds `threshold` tokens.
19    Summarize { threshold: u32 },
20}
21
22/// Per-agent provider override. When set on an agent, overrides the
23/// orchestrator's default provider for that agent only.
24#[derive(Debug, Clone, Deserialize)]
25pub struct AgentProviderConfig {
26    pub name: String,
27    pub model: String,
28    /// Custom API endpoint URL (overrides the default for the provider).
29    /// Useful for self-hosted models, Azure, or proxies.
30    #[serde(default)]
31    pub base_url: Option<String>,
32    /// Direct API key (alternative to environment variable).
33    /// Prefer env vars in production; this is for testing/local dev.
34    #[serde(default)]
35    pub api_key: Option<String>,
36    /// Enable Anthropic prompt caching for this agent.
37    #[serde(default)]
38    pub prompt_caching: bool,
39    /// Per-agent model cascading override.
40    pub cascade: Option<super::provider::CascadeConfig>,
41}
42
43/// Orchestrator-level settings with sensible defaults.
44#[derive(Debug, Deserialize)]
45pub struct OrchestratorConfig {
46    #[serde(default = "default_max_turns")]
47    pub max_turns: usize,
48    #[serde(default = "default_max_tokens")]
49    pub max_tokens: u32,
50    /// Context window management strategy for the orchestrator's own conversation.
51    pub context_strategy: Option<ContextStrategyConfig>,
52    /// Token threshold for summarization of the orchestrator's own context.
53    pub summarize_threshold: Option<u32>,
54    /// Timeout in seconds for the orchestrator's own tool calls.
55    pub tool_timeout_seconds: Option<u64>,
56    /// Maximum byte size for tool output on the orchestrator's own tools.
57    pub max_tool_output_bytes: Option<usize>,
58    /// Wall-clock deadline in seconds for the entire orchestrator run.
59    pub run_timeout_seconds: Option<u64>,
60    /// Enable the `form_squad` tool for dynamic agent squad formation.
61    /// When `None` (default), auto-enabled when there are >= 2 agents.
62    /// Set to `false` to disable for a simpler prompt with fewer tokens.
63    pub enable_squads: Option<bool>,
64    /// Reasoning/thinking effort level. Enables extended thinking on models
65    /// that support it (e.g., Qwen3 via OpenRouter, Claude with extended thinking).
66    /// Valid values: "high", "medium", "low", "none".
67    pub reasoning_effort: Option<String>,
68    /// Enable reflection prompts after tool results. When true, the agent pauses
69    /// to assess tool outputs before deciding the next action (Reflexion/CRITIC pattern).
70    pub enable_reflection: Option<bool>,
71    /// Tool output compression threshold in bytes. Outputs exceeding this size
72    /// are compressed via an LLM call that preserves factual content.
73    pub tool_output_compression_threshold: Option<usize>,
74    /// Maximum number of tool definitions sent per LLM turn. When agents have
75    /// many tools, filtering to the most relevant reduces context usage and cost.
76    pub max_tools_per_turn: Option<usize>,
77    /// Tool profile for pre-filtering tool definitions. Valid values:
78    /// "conversational", "standard", "full". Defaults to no filtering.
79    pub tool_profile: Option<String>,
80    /// Maximum consecutive identical tool-call turns before doom loop detection
81    /// triggers. When reached, tool calls get error results instead of executing.
82    pub max_identical_tool_calls: Option<u32>,
83    /// Maximum consecutive fuzzy-identical tool-call turns before doom loop detection.
84    /// Fuzzy matching compares sorted tool names (ignoring inputs).
85    pub max_fuzzy_identical_tool_calls: Option<u32>,
86    /// Maximum number of tool calls allowed in a single LLM turn. When a turn
87    /// contains more tool calls than this limit, the excess calls are rejected
88    /// with an error result (per-turn cap, not cumulative).
89    pub max_tool_calls_per_turn: Option<u32>,
90    /// Dispatch mode for orchestrator delegation. When `Sequential`, the
91    /// delegate_task schema constrains `maxItems: 1` so the LLM dispatches
92    /// one agent at a time. Defaults to `Parallel` when absent.
93    pub dispatch_mode: Option<DispatchMode>,
94    /// Task routing strategy: `auto` (default), `always_orchestrate`, `single_agent`.
95    /// `auto` uses heuristic scoring + capability matching to route simple tasks
96    /// to a single agent and complex tasks to the orchestrator.
97    #[serde(default)]
98    pub routing: RoutingMode,
99    /// Escalate from single-agent to orchestrator on failure. Default: true.
100    /// When a single-agent run fails with MaxTurnsExceeded, doom loop, or
101    /// excessive compaction, the task is re-run through the orchestrator.
102    #[serde(default = "super::default_true")]
103    pub escalation: bool,
104    /// Append the multi-agent collaboration prompt to sub-agent system prompts.
105    /// Teaches sub-agents blackboard protocol, dedup, cross-verification, and
106    /// structured execution. Default: true.
107    #[serde(default)]
108    pub multi_agent_prompt: Option<bool>,
109    /// Dynamic agent spawning configuration. When present, enables the `spawn_agent`
110    /// tool on the orchestrator, allowing the LLM to create specialist agents at runtime.
111    pub spawn: Option<SpawnConfig>,
112    /// Per-tenant in-flight token cap for the `TenantTokenTracker`.
113    /// When `None`, in-flight token tracking is disabled (effectively unbounded).
114    /// Must be > 0 when set.
115    #[serde(default, skip_serializing_if = "Option::is_none")]
116    pub max_tokens_in_flight_per_tenant: Option<usize>,
117}
118
119pub(super) fn default_max_turns() -> usize {
120    10
121}
122
123pub(super) fn default_max_tokens() -> u32 {
124    4096
125}
126
127impl Default for OrchestratorConfig {
128    fn default() -> Self {
129        Self {
130            max_turns: default_max_turns(),
131            max_tokens: default_max_tokens(),
132            context_strategy: None,
133            summarize_threshold: None,
134            tool_timeout_seconds: None,
135            max_tool_output_bytes: None,
136            run_timeout_seconds: None,
137            enable_squads: None,
138            reasoning_effort: None,
139            enable_reflection: None,
140            tool_output_compression_threshold: None,
141            max_tools_per_turn: None,
142            tool_profile: None,
143            max_identical_tool_calls: None,
144            max_fuzzy_identical_tool_calls: None,
145            max_tool_calls_per_turn: None,
146            dispatch_mode: None,
147            routing: RoutingMode::default(),
148            escalation: true,
149            multi_agent_prompt: None,
150            spawn: None,
151            max_tokens_in_flight_per_tenant: None,
152        }
153    }
154}
155
156/// An MCP server entry: a bare URL string, a full HTTP config with auth, or a
157/// stdio command to spawn as a child process.
158///
159/// Supports backward-compatible TOML: bare strings (`"http://..."`) deserialize
160/// as `Simple`, inline tables with `url` (`{ url = "...", auth_header = "..." }`)
161/// as `Full`, and inline tables with `command` (`{ command = "npx", args = [...] }`)
162/// as `Stdio`.
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
164#[serde(untagged)]
165pub enum McpServerEntry {
166    /// Bare URL string (backward-compatible).
167    Simple(String),
168    /// Full HTTP entry with optional auth header.
169    Full {
170        url: String,
171        #[serde(default)]
172        auth_header: Option<String>,
173        /// RFC 8707 resource indicator — audience for exchanged tokens.
174        /// Defaults to the `url` value when absent.
175        #[serde(default)]
176        resource: Option<String>,
177        /// OAuth scopes required by this MCP server (e.g., `["gmail.readonly"]`).
178        #[serde(default)]
179        scopes: Option<Vec<String>>,
180    },
181    /// Stdio transport — spawn a child process communicating via stdin/stdout.
182    Stdio {
183        command: String,
184        #[serde(default)]
185        args: Vec<String>,
186        #[serde(default)]
187        env: std::collections::HashMap<String, String>,
188    },
189}
190
191impl McpServerEntry {
192    /// Get the server URL (empty string for stdio entries).
193    pub fn url(&self) -> &str {
194        match self {
195            McpServerEntry::Simple(url) => url,
196            McpServerEntry::Full { url, .. } => url,
197            McpServerEntry::Stdio { .. } => "",
198        }
199    }
200
201    /// Get the optional auth header value.
202    pub fn auth_header(&self) -> Option<&str> {
203        match self {
204            McpServerEntry::Simple(_) => None,
205            McpServerEntry::Full { auth_header, .. } => auth_header.as_deref(),
206            McpServerEntry::Stdio { .. } => None,
207        }
208    }
209
210    /// Whether this entry uses stdio transport.
211    pub fn is_stdio(&self) -> bool {
212        matches!(self, McpServerEntry::Stdio { .. })
213    }
214
215    /// Get the RFC 8707 resource indicator (audience for token exchange).
216    /// Returns the explicit `resource` if set, otherwise falls back to the URL.
217    pub fn resource(&self) -> Option<&str> {
218        match self {
219            McpServerEntry::Simple(url) => Some(url.as_str()),
220            McpServerEntry::Full { resource, url, .. } => {
221                Some(resource.as_deref().unwrap_or(url.as_str()))
222            }
223            McpServerEntry::Stdio { .. } => None,
224        }
225    }
226
227    /// Get the OAuth scopes configured for this MCP server.
228    pub fn scopes(&self) -> Option<&[String]> {
229        match self {
230            McpServerEntry::Full { scopes, .. } => scopes.as_deref(),
231            _ => None,
232        }
233    }
234
235    /// Human-readable description for logging.
236    pub fn display_name(&self) -> String {
237        match self {
238            McpServerEntry::Simple(url) => url.clone(),
239            McpServerEntry::Full { url, .. } => url.clone(),
240            McpServerEntry::Stdio { command, args, .. } => {
241                if args.is_empty() {
242                    command.clone()
243                } else {
244                    format!("{} {}", command, args.join(" "))
245                }
246            }
247        }
248    }
249}
250
251/// How MCP resources are surfaced to agents.
252#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
253#[serde(rename_all = "snake_case")]
254pub enum McpResourceMode {
255    /// Resources become callable tools (agent decides when to read).
256    #[default]
257    Tools,
258    /// Pre-fetch resource content and inject into system prompt.
259    Context,
260    /// Skip resource discovery entirely.
261    None,
262}
263
264/// A sub-agent defined in the configuration file.
265#[derive(Debug, Deserialize)]
266pub struct AgentConfig {
267    pub name: String,
268    pub description: String,
269    #[serde(default)]
270    pub system_prompt: String,
271    /// Agent template to use as a base. The template provides default values
272    /// for system_prompt, max_tokens, max_turns, and other settings.
273    /// User-specified values override template defaults.
274    #[serde(default)]
275    pub template: Option<String>,
276    /// Skills to auto-inject into the system prompt at config resolution time.
277    /// Each skill name maps to a bundled or filesystem SKILL.md file.
278    #[serde(default)]
279    pub skills: Vec<String>,
280    #[serde(default)]
281    pub mcp_servers: Vec<McpServerEntry>,
282    /// A2A agent endpoints to discover and register as tools.
283    #[serde(default)]
284    pub a2a_agents: Vec<McpServerEntry>,
285    /// Context window management strategy for this agent.
286    pub context_strategy: Option<ContextStrategyConfig>,
287    /// Token threshold at which to trigger automatic summarization.
288    /// Only valid when `context_strategy` is not `SlidingWindow`.
289    pub summarize_threshold: Option<u32>,
290    /// Timeout in seconds for individual tool executions.
291    pub tool_timeout_seconds: Option<u64>,
292    /// Maximum byte size for individual tool output. Results exceeding this
293    /// limit are truncated with a `[truncated]` suffix.
294    pub max_tool_output_bytes: Option<usize>,
295    /// Per-agent turn limit. Overrides the orchestrator default when set.
296    pub max_turns: Option<usize>,
297    /// Per-agent token limit. Overrides the orchestrator default when set.
298    pub max_tokens: Option<u32>,
299    /// Optional JSON Schema for structured output. Expressed as an inline
300    /// TOML table that maps to the JSON Schema object. When set, the agent
301    /// receives a synthetic `__respond__` tool and returns structured JSON.
302    pub response_schema: Option<serde_json::Value>,
303    /// Wall-clock deadline in seconds for this agent's run.
304    pub run_timeout_seconds: Option<u64>,
305    /// Optional per-agent LLM provider override. When set, this agent uses
306    /// a different model/provider instead of the orchestrator's default.
307    pub provider: Option<AgentProviderConfig>,
308    /// Reasoning/thinking effort level. Overrides the orchestrator default.
309    /// Valid values: "high", "medium", "low", "none".
310    pub reasoning_effort: Option<String>,
311    /// Enable reflection prompts after tool results. Overrides the orchestrator default.
312    pub enable_reflection: Option<bool>,
313    /// Tool output compression threshold in bytes. Overrides the orchestrator default.
314    pub tool_output_compression_threshold: Option<usize>,
315    /// Maximum tools per turn for this agent. Overrides the orchestrator default.
316    pub max_tools_per_turn: Option<usize>,
317    /// Tool profile for pre-filtering tool definitions. Valid values:
318    /// "conversational" (memory + question only), "standard" (builtins only),
319    /// "full" (all tools). When absent, no pre-filtering is applied.
320    pub tool_profile: Option<String>,
321    /// Maximum consecutive identical tool-call turns before doom loop detection.
322    /// Overrides the orchestrator default.
323    pub max_identical_tool_calls: Option<u32>,
324    /// Maximum consecutive fuzzy-identical tool-call turns before doom loop detection.
325    /// Fuzzy matching compares sorted tool names (ignoring inputs). Overrides orchestrator default.
326    pub max_fuzzy_identical_tool_calls: Option<u32>,
327    /// Maximum number of tool calls allowed in a single LLM turn. Overrides the orchestrator default.
328    pub max_tool_calls_per_turn: Option<u32>,
329    /// Session pruning: truncate old tool results to save tokens.
330    /// When set, enables session-level pruning before each LLM call.
331    pub session_prune: Option<SessionPruneConfigToml>,
332    /// Enable recursive (cluster-then-summarize) summarization for long conversations.
333    pub recursive_summarization: Option<bool>,
334    /// Cumulative importance threshold for memory reflection triggers.
335    /// When the sum of stored memory importance values exceeds this threshold,
336    /// the store tool appends a reflection hint to guide the agent.
337    pub reflection_threshold: Option<u32>,
338    /// When true, run memory consolidation at session end (clusters related
339    /// episodic memories into semantic summaries). Requires memory and adds
340    /// LLM calls at session end.
341    pub consolidate_on_exit: Option<bool>,
342    /// Hard limit on cumulative tokens (input + output) across all turns.
343    /// When exceeded, the agent returns an error with partial usage data.
344    pub max_total_tokens: Option<u64>,
345    /// Per-agent guardrails override. When set, overrides the top-level
346    /// `[guardrails]` section for this agent.
347    pub guardrails: Option<GuardrailsConfig>,
348    /// LRU response cache capacity (number of entries). When set, identical
349    /// LLM requests (same system prompt, messages, tool names) return cached
350    /// responses without calling the LLM. Only non-streaming calls are cached.
351    #[serde(default)]
352    pub response_cache_size: Option<usize>,
353    /// How MCP resources are surfaced to the agent.
354    /// `"tools"` (default) — resources become callable tools.
355    /// `"context"` — pre-fetch and inject into system prompt.
356    /// `"none"` — skip resource discovery.
357    #[serde(default)]
358    pub mcp_resources: McpResourceMode,
359    /// Enable dangerous tools (bash) for this agent. Default: false in daemon mode.
360    #[serde(default)]
361    pub dangerous_tools: bool,
362    /// Audit mode: "full" (default) or "metadata_only".
363    /// MetadataOnly strips user content from audit records.
364    #[serde(default)]
365    pub audit_mode: Option<String>,
366    /// Optional allowlist of builtin tool names for this agent.
367    /// When set, only listed builtins are included. When absent, all builtins load.
368    /// Empty list `[]` disables all builtins (MCP-only agent).
369    #[serde(default)]
370    pub builtin_tools: Option<Vec<String>>,
371}
372
373/// TOML representation of session pruning configuration.
374#[derive(Debug, Clone, Deserialize)]
375pub struct SessionPruneConfigToml {
376    /// Number of recent message pairs to keep at full fidelity. Default: 2.
377    #[serde(default = "default_keep_recent_n")]
378    pub keep_recent_n: usize,
379    /// Maximum bytes for a pruned tool result. Default: 200.
380    #[serde(default = "default_pruned_max_bytes")]
381    pub pruned_tool_result_max_bytes: usize,
382    /// Whether to preserve the first user message (task). Default: true.
383    #[serde(default = "default_preserve_task")]
384    pub preserve_task: bool,
385}
386
387fn default_keep_recent_n() -> usize {
388    2
389}
390
391fn default_pruned_max_bytes() -> usize {
392    200
393}
394
395fn default_preserve_task() -> bool {
396    true
397}
398
399impl AgentConfig {
400    /// Clone all fields of this config into a new `AgentConfig`.
401    ///
402    /// `AgentConfig` intentionally does not derive `Clone` (to keep the derive
403    /// list short and avoid accidental copies in hot paths). Use this method
404    /// when an explicit copy is needed (e.g., template resolution).
405    pub fn clone_config(&self) -> Self {
406        Self {
407            name: self.name.clone(),
408            description: self.description.clone(),
409            system_prompt: self.system_prompt.clone(),
410            template: self.template.clone(),
411            skills: self.skills.clone(),
412            mcp_servers: self.mcp_servers.clone(),
413            a2a_agents: self.a2a_agents.clone(),
414            context_strategy: self.context_strategy.clone(),
415            summarize_threshold: self.summarize_threshold,
416            tool_timeout_seconds: self.tool_timeout_seconds,
417            max_tool_output_bytes: self.max_tool_output_bytes,
418            max_turns: self.max_turns,
419            max_tokens: self.max_tokens,
420            response_schema: self.response_schema.clone(),
421            run_timeout_seconds: self.run_timeout_seconds,
422            provider: self.provider.clone(),
423            reasoning_effort: self.reasoning_effort.clone(),
424            enable_reflection: self.enable_reflection,
425            tool_output_compression_threshold: self.tool_output_compression_threshold,
426            max_tools_per_turn: self.max_tools_per_turn,
427            tool_profile: self.tool_profile.clone(),
428            max_identical_tool_calls: self.max_identical_tool_calls,
429            max_fuzzy_identical_tool_calls: self.max_fuzzy_identical_tool_calls,
430            max_tool_calls_per_turn: self.max_tool_calls_per_turn,
431            session_prune: self.session_prune.clone(),
432            recursive_summarization: self.recursive_summarization,
433            reflection_threshold: self.reflection_threshold,
434            consolidate_on_exit: self.consolidate_on_exit,
435            max_total_tokens: self.max_total_tokens,
436            guardrails: self.guardrails.clone(),
437            response_cache_size: self.response_cache_size,
438            mcp_resources: self.mcp_resources,
439            dangerous_tools: self.dangerous_tools,
440            audit_mode: self.audit_mode.clone(),
441            builtin_tools: self.builtin_tools.clone(),
442        }
443    }
444}
445
446impl AgentProviderConfig {
447    /// Clone via Option::as_ref → clone pattern for non-Clone containers.
448    pub fn take_ref(opt: &Option<Self>) -> Option<Self> {
449        opt.clone()
450    }
451}
452
453impl SessionPruneConfigToml {
454    /// Clone via Option::as_ref → clone pattern for non-Clone containers.
455    pub fn take_ref(opt: &Option<Self>) -> Option<Self> {
456        opt.clone()
457    }
458}