heartbit-core 2026.507.3

The Rust agentic framework — agents, tools, LLM providers, memory, evaluation.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
#![allow(missing_docs)]
use serde::{Deserialize, Serialize};

use crate::agent::routing::RoutingMode;

use super::guardrails::GuardrailsConfig;

pub use crate::types::{DispatchMode, SpawnConfig};

/// Context window management strategy.
#[derive(Debug, Clone, Deserialize, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContextStrategyConfig {
    /// No trimming (default).
    Unlimited,
    /// Sliding window: trim old messages to stay within `max_tokens`.
    SlidingWindow { max_tokens: u32 },
    /// Summarize: compress old messages when context exceeds `threshold` tokens.
    Summarize { threshold: u32 },
}

/// Per-agent provider override. When set on an agent, overrides the
/// orchestrator's default provider for that agent only.
#[derive(Debug, Clone, Deserialize)]
pub struct AgentProviderConfig {
    pub name: String,
    pub model: String,
    /// Custom API endpoint URL (overrides the default for the provider).
    /// Useful for self-hosted models, Azure, or proxies.
    #[serde(default)]
    pub base_url: Option<String>,
    /// Direct API key (alternative to environment variable).
    /// Prefer env vars in production; this is for testing/local dev.
    #[serde(default)]
    pub api_key: Option<String>,
    /// Enable Anthropic prompt caching for this agent.
    #[serde(default)]
    pub prompt_caching: bool,
    /// Per-agent model cascading override.
    pub cascade: Option<super::provider::CascadeConfig>,
}

/// Orchestrator-level settings with sensible defaults.
#[derive(Debug, Deserialize)]
pub struct OrchestratorConfig {
    #[serde(default = "default_max_turns")]
    pub max_turns: usize,
    #[serde(default = "default_max_tokens")]
    pub max_tokens: u32,
    /// Context window management strategy for the orchestrator's own conversation.
    pub context_strategy: Option<ContextStrategyConfig>,
    /// Token threshold for summarization of the orchestrator's own context.
    pub summarize_threshold: Option<u32>,
    /// Timeout in seconds for the orchestrator's own tool calls.
    pub tool_timeout_seconds: Option<u64>,
    /// Maximum byte size for tool output on the orchestrator's own tools.
    pub max_tool_output_bytes: Option<usize>,
    /// Wall-clock deadline in seconds for the entire orchestrator run.
    pub run_timeout_seconds: Option<u64>,
    /// Enable the `form_squad` tool for dynamic agent squad formation.
    /// When `None` (default), auto-enabled when there are >= 2 agents.
    /// Set to `false` to disable for a simpler prompt with fewer tokens.
    pub enable_squads: Option<bool>,
    /// Reasoning/thinking effort level. Enables extended thinking on models
    /// that support it (e.g., Qwen3 via OpenRouter, Claude with extended thinking).
    /// Valid values: "high", "medium", "low", "none".
    pub reasoning_effort: Option<String>,
    /// Enable reflection prompts after tool results. When true, the agent pauses
    /// to assess tool outputs before deciding the next action (Reflexion/CRITIC pattern).
    pub enable_reflection: Option<bool>,
    /// Tool output compression threshold in bytes. Outputs exceeding this size
    /// are compressed via an LLM call that preserves factual content.
    pub tool_output_compression_threshold: Option<usize>,
    /// Maximum number of tool definitions sent per LLM turn. When agents have
    /// many tools, filtering to the most relevant reduces context usage and cost.
    pub max_tools_per_turn: Option<usize>,
    /// Tool profile for pre-filtering tool definitions. Valid values:
    /// "conversational", "standard", "full". Defaults to no filtering.
    pub tool_profile: Option<String>,
    /// Maximum consecutive identical tool-call turns before doom loop detection
    /// triggers. When reached, tool calls get error results instead of executing.
    pub max_identical_tool_calls: Option<u32>,
    /// Maximum consecutive fuzzy-identical tool-call turns before doom loop detection.
    /// Fuzzy matching compares sorted tool names (ignoring inputs).
    pub max_fuzzy_identical_tool_calls: Option<u32>,
    /// Maximum number of tool calls allowed in a single LLM turn. When a turn
    /// contains more tool calls than this limit, the excess calls are rejected
    /// with an error result (per-turn cap, not cumulative).
    pub max_tool_calls_per_turn: Option<u32>,
    /// Dispatch mode for orchestrator delegation. When `Sequential`, the
    /// delegate_task schema constrains `maxItems: 1` so the LLM dispatches
    /// one agent at a time. Defaults to `Parallel` when absent.
    pub dispatch_mode: Option<DispatchMode>,
    /// Task routing strategy: `auto` (default), `always_orchestrate`, `single_agent`.
    /// `auto` uses heuristic scoring + capability matching to route simple tasks
    /// to a single agent and complex tasks to the orchestrator.
    #[serde(default)]
    pub routing: RoutingMode,
    /// Escalate from single-agent to orchestrator on failure. Default: true.
    /// When a single-agent run fails with MaxTurnsExceeded, doom loop, or
    /// excessive compaction, the task is re-run through the orchestrator.
    #[serde(default = "super::default_true")]
    pub escalation: bool,
    /// Append the multi-agent collaboration prompt to sub-agent system prompts.
    /// Teaches sub-agents blackboard protocol, dedup, cross-verification, and
    /// structured execution. Default: true.
    #[serde(default)]
    pub multi_agent_prompt: Option<bool>,
    /// Dynamic agent spawning configuration. When present, enables the `spawn_agent`
    /// tool on the orchestrator, allowing the LLM to create specialist agents at runtime.
    pub spawn: Option<SpawnConfig>,
    /// Per-tenant in-flight token cap for the `TenantTokenTracker`.
    /// When `None`, in-flight token tracking is disabled (effectively unbounded).
    /// Must be > 0 when set.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub max_tokens_in_flight_per_tenant: Option<usize>,
}

pub(super) fn default_max_turns() -> usize {
    10
}

pub(super) fn default_max_tokens() -> u32 {
    4096
}

impl Default for OrchestratorConfig {
    fn default() -> Self {
        Self {
            max_turns: default_max_turns(),
            max_tokens: default_max_tokens(),
            context_strategy: None,
            summarize_threshold: None,
            tool_timeout_seconds: None,
            max_tool_output_bytes: None,
            run_timeout_seconds: None,
            enable_squads: None,
            reasoning_effort: None,
            enable_reflection: None,
            tool_output_compression_threshold: None,
            max_tools_per_turn: None,
            tool_profile: None,
            max_identical_tool_calls: None,
            max_fuzzy_identical_tool_calls: None,
            max_tool_calls_per_turn: None,
            dispatch_mode: None,
            routing: RoutingMode::default(),
            escalation: true,
            multi_agent_prompt: None,
            spawn: None,
            max_tokens_in_flight_per_tenant: None,
        }
    }
}

/// An MCP server entry: a bare URL string, a full HTTP config with auth, or a
/// stdio command to spawn as a child process.
///
/// Supports backward-compatible TOML: bare strings (`"http://..."`) deserialize
/// as `Simple`, inline tables with `url` (`{ url = "...", auth_header = "..." }`)
/// as `Full`, and inline tables with `command` (`{ command = "npx", args = [...] }`)
/// as `Stdio`.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum McpServerEntry {
    /// Bare URL string (backward-compatible).
    Simple(String),
    /// Full HTTP entry with optional auth header.
    Full {
        url: String,
        #[serde(default)]
        auth_header: Option<String>,
        /// RFC 8707 resource indicator — audience for exchanged tokens.
        /// Defaults to the `url` value when absent.
        #[serde(default)]
        resource: Option<String>,
        /// OAuth scopes required by this MCP server (e.g., `["gmail.readonly"]`).
        #[serde(default)]
        scopes: Option<Vec<String>>,
    },
    /// Stdio transport — spawn a child process communicating via stdin/stdout.
    Stdio {
        command: String,
        #[serde(default)]
        args: Vec<String>,
        #[serde(default)]
        env: std::collections::HashMap<String, String>,
    },
}

impl McpServerEntry {
    /// Get the server URL (empty string for stdio entries).
    pub fn url(&self) -> &str {
        match self {
            McpServerEntry::Simple(url) => url,
            McpServerEntry::Full { url, .. } => url,
            McpServerEntry::Stdio { .. } => "",
        }
    }

    /// Get the optional auth header value.
    pub fn auth_header(&self) -> Option<&str> {
        match self {
            McpServerEntry::Simple(_) => None,
            McpServerEntry::Full { auth_header, .. } => auth_header.as_deref(),
            McpServerEntry::Stdio { .. } => None,
        }
    }

    /// Whether this entry uses stdio transport.
    pub fn is_stdio(&self) -> bool {
        matches!(self, McpServerEntry::Stdio { .. })
    }

    /// Get the RFC 8707 resource indicator (audience for token exchange).
    /// Returns the explicit `resource` if set, otherwise falls back to the URL.
    pub fn resource(&self) -> Option<&str> {
        match self {
            McpServerEntry::Simple(url) => Some(url.as_str()),
            McpServerEntry::Full { resource, url, .. } => {
                Some(resource.as_deref().unwrap_or(url.as_str()))
            }
            McpServerEntry::Stdio { .. } => None,
        }
    }

    /// Get the OAuth scopes configured for this MCP server.
    pub fn scopes(&self) -> Option<&[String]> {
        match self {
            McpServerEntry::Full { scopes, .. } => scopes.as_deref(),
            _ => None,
        }
    }

    /// Human-readable description for logging.
    pub fn display_name(&self) -> String {
        match self {
            McpServerEntry::Simple(url) => url.clone(),
            McpServerEntry::Full { url, .. } => url.clone(),
            McpServerEntry::Stdio { command, args, .. } => {
                if args.is_empty() {
                    command.clone()
                } else {
                    format!("{} {}", command, args.join(" "))
                }
            }
        }
    }
}

/// How MCP resources are surfaced to agents.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum McpResourceMode {
    /// Resources become callable tools (agent decides when to read).
    #[default]
    Tools,
    /// Pre-fetch resource content and inject into system prompt.
    Context,
    /// Skip resource discovery entirely.
    None,
}

/// A sub-agent defined in the configuration file.
#[derive(Debug, Deserialize)]
pub struct AgentConfig {
    pub name: String,
    pub description: String,
    #[serde(default)]
    pub system_prompt: String,
    /// Agent template to use as a base. The template provides default values
    /// for system_prompt, max_tokens, max_turns, and other settings.
    /// User-specified values override template defaults.
    #[serde(default)]
    pub template: Option<String>,
    /// Skills to auto-inject into the system prompt at config resolution time.
    /// Each skill name maps to a bundled or filesystem SKILL.md file.
    #[serde(default)]
    pub skills: Vec<String>,
    #[serde(default)]
    pub mcp_servers: Vec<McpServerEntry>,
    /// A2A agent endpoints to discover and register as tools.
    #[serde(default)]
    pub a2a_agents: Vec<McpServerEntry>,
    /// Context window management strategy for this agent.
    pub context_strategy: Option<ContextStrategyConfig>,
    /// Token threshold at which to trigger automatic summarization.
    /// Only valid when `context_strategy` is not `SlidingWindow`.
    pub summarize_threshold: Option<u32>,
    /// Timeout in seconds for individual tool executions.
    pub tool_timeout_seconds: Option<u64>,
    /// Maximum byte size for individual tool output. Results exceeding this
    /// limit are truncated with a `[truncated]` suffix.
    pub max_tool_output_bytes: Option<usize>,
    /// Per-agent turn limit. Overrides the orchestrator default when set.
    pub max_turns: Option<usize>,
    /// Per-agent token limit. Overrides the orchestrator default when set.
    pub max_tokens: Option<u32>,
    /// Optional JSON Schema for structured output. Expressed as an inline
    /// TOML table that maps to the JSON Schema object. When set, the agent
    /// receives a synthetic `__respond__` tool and returns structured JSON.
    pub response_schema: Option<serde_json::Value>,
    /// Wall-clock deadline in seconds for this agent's run.
    pub run_timeout_seconds: Option<u64>,
    /// Optional per-agent LLM provider override. When set, this agent uses
    /// a different model/provider instead of the orchestrator's default.
    pub provider: Option<AgentProviderConfig>,
    /// Reasoning/thinking effort level. Overrides the orchestrator default.
    /// Valid values: "high", "medium", "low", "none".
    pub reasoning_effort: Option<String>,
    /// Enable reflection prompts after tool results. Overrides the orchestrator default.
    pub enable_reflection: Option<bool>,
    /// Tool output compression threshold in bytes. Overrides the orchestrator default.
    pub tool_output_compression_threshold: Option<usize>,
    /// Maximum tools per turn for this agent. Overrides the orchestrator default.
    pub max_tools_per_turn: Option<usize>,
    /// Tool profile for pre-filtering tool definitions. Valid values:
    /// "conversational" (memory + question only), "standard" (builtins only),
    /// "full" (all tools). When absent, no pre-filtering is applied.
    pub tool_profile: Option<String>,
    /// Maximum consecutive identical tool-call turns before doom loop detection.
    /// Overrides the orchestrator default.
    pub max_identical_tool_calls: Option<u32>,
    /// Maximum consecutive fuzzy-identical tool-call turns before doom loop detection.
    /// Fuzzy matching compares sorted tool names (ignoring inputs). Overrides orchestrator default.
    pub max_fuzzy_identical_tool_calls: Option<u32>,
    /// Maximum number of tool calls allowed in a single LLM turn. Overrides the orchestrator default.
    pub max_tool_calls_per_turn: Option<u32>,
    /// Session pruning: truncate old tool results to save tokens.
    /// When set, enables session-level pruning before each LLM call.
    pub session_prune: Option<SessionPruneConfigToml>,
    /// Enable recursive (cluster-then-summarize) summarization for long conversations.
    pub recursive_summarization: Option<bool>,
    /// Cumulative importance threshold for memory reflection triggers.
    /// When the sum of stored memory importance values exceeds this threshold,
    /// the store tool appends a reflection hint to guide the agent.
    pub reflection_threshold: Option<u32>,
    /// When true, run memory consolidation at session end (clusters related
    /// episodic memories into semantic summaries). Requires memory and adds
    /// LLM calls at session end.
    pub consolidate_on_exit: Option<bool>,
    /// Hard limit on cumulative tokens (input + output) across all turns.
    /// When exceeded, the agent returns an error with partial usage data.
    pub max_total_tokens: Option<u64>,
    /// Per-agent guardrails override. When set, overrides the top-level
    /// `[guardrails]` section for this agent.
    pub guardrails: Option<GuardrailsConfig>,
    /// LRU response cache capacity (number of entries). When set, identical
    /// LLM requests (same system prompt, messages, tool names) return cached
    /// responses without calling the LLM. Only non-streaming calls are cached.
    #[serde(default)]
    pub response_cache_size: Option<usize>,
    /// How MCP resources are surfaced to the agent.
    /// `"tools"` (default) — resources become callable tools.
    /// `"context"` — pre-fetch and inject into system prompt.
    /// `"none"` — skip resource discovery.
    #[serde(default)]
    pub mcp_resources: McpResourceMode,
    /// Enable dangerous tools (bash) for this agent. Default: false in daemon mode.
    #[serde(default)]
    pub dangerous_tools: bool,
    /// Audit mode: "full" (default) or "metadata_only".
    /// MetadataOnly strips user content from audit records.
    #[serde(default)]
    pub audit_mode: Option<String>,
    /// Optional allowlist of builtin tool names for this agent.
    /// When set, only listed builtins are included. When absent, all builtins load.
    /// Empty list `[]` disables all builtins (MCP-only agent).
    #[serde(default)]
    pub builtin_tools: Option<Vec<String>>,
}

/// TOML representation of session pruning configuration.
#[derive(Debug, Clone, Deserialize)]
pub struct SessionPruneConfigToml {
    /// Number of recent message pairs to keep at full fidelity. Default: 2.
    #[serde(default = "default_keep_recent_n")]
    pub keep_recent_n: usize,
    /// Maximum bytes for a pruned tool result. Default: 200.
    #[serde(default = "default_pruned_max_bytes")]
    pub pruned_tool_result_max_bytes: usize,
    /// Whether to preserve the first user message (task). Default: true.
    #[serde(default = "default_preserve_task")]
    pub preserve_task: bool,
}

fn default_keep_recent_n() -> usize {
    2
}

fn default_pruned_max_bytes() -> usize {
    200
}

fn default_preserve_task() -> bool {
    true
}

impl AgentConfig {
    /// Clone all fields of this config into a new `AgentConfig`.
    ///
    /// `AgentConfig` intentionally does not derive `Clone` (to keep the derive
    /// list short and avoid accidental copies in hot paths). Use this method
    /// when an explicit copy is needed (e.g., template resolution).
    pub fn clone_config(&self) -> Self {
        Self {
            name: self.name.clone(),
            description: self.description.clone(),
            system_prompt: self.system_prompt.clone(),
            template: self.template.clone(),
            skills: self.skills.clone(),
            mcp_servers: self.mcp_servers.clone(),
            a2a_agents: self.a2a_agents.clone(),
            context_strategy: self.context_strategy.clone(),
            summarize_threshold: self.summarize_threshold,
            tool_timeout_seconds: self.tool_timeout_seconds,
            max_tool_output_bytes: self.max_tool_output_bytes,
            max_turns: self.max_turns,
            max_tokens: self.max_tokens,
            response_schema: self.response_schema.clone(),
            run_timeout_seconds: self.run_timeout_seconds,
            provider: self.provider.clone(),
            reasoning_effort: self.reasoning_effort.clone(),
            enable_reflection: self.enable_reflection,
            tool_output_compression_threshold: self.tool_output_compression_threshold,
            max_tools_per_turn: self.max_tools_per_turn,
            tool_profile: self.tool_profile.clone(),
            max_identical_tool_calls: self.max_identical_tool_calls,
            max_fuzzy_identical_tool_calls: self.max_fuzzy_identical_tool_calls,
            max_tool_calls_per_turn: self.max_tool_calls_per_turn,
            session_prune: self.session_prune.clone(),
            recursive_summarization: self.recursive_summarization,
            reflection_threshold: self.reflection_threshold,
            consolidate_on_exit: self.consolidate_on_exit,
            max_total_tokens: self.max_total_tokens,
            guardrails: self.guardrails.clone(),
            response_cache_size: self.response_cache_size,
            mcp_resources: self.mcp_resources,
            dangerous_tools: self.dangerous_tools,
            audit_mode: self.audit_mode.clone(),
            builtin_tools: self.builtin_tools.clone(),
        }
    }
}

impl AgentProviderConfig {
    /// Clone via Option::as_ref → clone pattern for non-Clone containers.
    pub fn take_ref(opt: &Option<Self>) -> Option<Self> {
        opt.clone()
    }
}

impl SessionPruneConfigToml {
    /// Clone via Option::as_ref → clone pattern for non-Clone containers.
    pub fn take_ref(opt: &Option<Self>) -> Option<Self> {
        opt.clone()
    }
}