heartbit-core 2026.506.2

The Rust agentic framework — agents, tools, LLM providers, memory, evaluation.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
#![allow(missing_docs)]
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;

use crate::error::Error;
use crate::knowledge::KnowledgeBase;
use crate::llm::LlmProvider;
use crate::llm::types::ToolDefinition;
use crate::memory::Memory;
use crate::tool::Tool;
use crate::tool::builtins::OnQuestion;

use super::audit;
use super::cache;
use super::context::ContextStrategy;
use super::events::OnEvent;
use super::guardrail::Guardrail;
use super::instructions;
use super::observability;
use super::permission;
use super::pruner;
use super::runner::{AgentRunner, OnInput, RESOURCEFULNESS_GUIDELINES};
use super::tool_filter;

/// Builder for [`AgentRunner`].
///
/// Construct via [`AgentRunner::builder`], configure the agent with chainable
/// setter methods, then call [`build`](AgentRunnerBuilder::build) to produce
/// the runner. Build validates several invariants: `on_input` and
/// `structured_schema` are mutually exclusive, `max_tool_calls_per_turn = 0` is
/// rejected, and turn/token limits must be non-zero. For multi-agent scenarios
/// use [`OrchestratorBuilder`](crate::agent::orchestrator::OrchestratorBuilder)
/// instead, which internally wraps an `AgentRunner` with sub-agent delegation.
pub struct AgentRunnerBuilder<P: LlmProvider> {
    pub(super) provider: Arc<P>,
    pub(super) name: String,
    pub(super) system_prompt: String,
    pub(super) tools: Vec<Arc<dyn Tool>>,
    pub(super) max_turns: usize,
    pub(super) max_tokens: u32,
    pub(super) context_strategy: Option<ContextStrategy>,
    pub(super) summarize_threshold: Option<u32>,
    pub(super) memory: Option<Arc<dyn Memory>>,
    pub(super) knowledge_base: Option<Arc<dyn KnowledgeBase>>,
    pub(super) on_text: Option<Arc<crate::llm::OnText>>,
    pub(super) on_approval: Option<Arc<crate::llm::OnApproval>>,
    pub(super) tool_timeout: Option<Duration>,
    pub(super) max_tool_output_bytes: Option<usize>,
    pub(super) structured_schema: Option<serde_json::Value>,
    pub(super) on_event: Option<Arc<OnEvent>>,
    pub(super) guardrails: Vec<Arc<dyn Guardrail>>,
    pub(super) on_question: Option<Arc<OnQuestion>>,
    pub(super) on_input: Option<Arc<OnInput>>,
    pub(super) run_timeout: Option<Duration>,
    pub(super) reasoning_effort: Option<crate::llm::types::ReasoningEffort>,
    pub(super) enable_reflection: bool,
    pub(super) tool_output_compression_threshold: Option<usize>,
    pub(super) max_tools_per_turn: Option<usize>,
    pub(super) tool_profile: Option<tool_filter::ToolProfile>,
    pub(super) max_identical_tool_calls: Option<u32>,
    pub(super) max_fuzzy_identical_tool_calls: Option<u32>,
    /// Hard cap on the number of tool invocations the LLM may emit per turn.
    /// Distinct from `max_tools_per_turn` (which limits tool *definitions* offered
    /// to the LLM). `None` = unlimited. Zero is rejected at build time.
    pub(super) max_tool_calls_per_turn: Option<u32>,
    pub(super) permission_rules: permission::PermissionRuleset,
    /// Instruction file contents to prepend to the system prompt.
    pub(super) instruction_text: Option<String>,
    pub(super) learned_permissions: Option<Arc<std::sync::Mutex<permission::LearnedPermissions>>>,
    pub(super) lsp_manager: Option<Arc<crate::lsp::LspManager>>,
    pub(super) session_prune_config: Option<pruner::SessionPruneConfig>,
    pub(super) enable_recursive_summarization: bool,
    pub(super) reflection_threshold: Option<u32>,
    pub(super) consolidate_on_exit: bool,
    pub(super) observability_mode: Option<observability::ObservabilityMode>,
    /// Optional workspace root for file tool path resolution and system prompt.
    pub(super) workspace: Option<std::path::PathBuf>,
    /// Hard limit on cumulative tokens (input + output) across all turns.
    pub(super) max_total_tokens: Option<u64>,
    /// Controls whether audit records include full content or metadata only.
    pub(super) audit_mode: audit::AuditMode,
    /// Optional audit trail for recording untruncated agent decisions.
    pub(super) audit_trail: Option<Arc<dyn audit::AuditTrail>>,
    /// Optional user context for multi-tenant audit enrichment.
    pub(super) audit_user_id: Option<String>,
    pub(super) audit_tenant_id: Option<String>,
    /// Delegation chain for audit records (e.g., `["heartbit-agent"]` when acting OBO user).
    pub(super) audit_delegation_chain: Vec<String>,
    /// Optional LRU response cache size. When set, builds a `ResponseCache`.
    pub(super) response_cache_size: Option<usize>,
    /// Optional per-tenant in-flight token tracker. When set, the runner calls
    /// `tracker.adjust(&scope, delta)` after each LLM response to reconcile
    /// actual usage against the estimate. Has no effect when `audit_tenant_id`
    /// is unset.
    pub(super) tenant_tracker: Option<Arc<crate::agent::tenant_tracker::TenantTokenTracker>>,
}

impl<P: LlmProvider> AgentRunnerBuilder<P> {
    pub fn name(mut self, name: impl Into<String>) -> Self {
        self.name = name.into();
        self
    }

    pub fn system_prompt(mut self, prompt: impl Into<String>) -> Self {
        self.system_prompt = prompt.into();
        self
    }

    pub fn tool(mut self, tool: Arc<dyn Tool>) -> Self {
        self.tools.push(tool);
        self
    }

    /// Register a batch of tools.
    ///
    /// SECURITY (F-MCP-2): when MCP-discovered tools and builtins coexist,
    /// **register the trusted builtins first**. The runner deduplicates by
    /// name with first-wins semantics, so a hostile MCP server that exports a
    /// tool named `bash` will be shadowed by the local `bash` builtin only if
    /// the builtin was added before. The collision is logged at `error!` and
    /// emits a `tool_name_collision` audit signal.
    pub fn tools(mut self, tools: Vec<Arc<dyn Tool>>) -> Self {
        self.tools.extend(tools);
        self
    }

    pub fn max_turns(mut self, max_turns: usize) -> Self {
        self.max_turns = max_turns;
        self
    }

    pub fn max_tokens(mut self, max_tokens: u32) -> Self {
        self.max_tokens = max_tokens;
        self
    }

    pub fn context_strategy(mut self, strategy: ContextStrategy) -> Self {
        self.context_strategy = Some(strategy);
        self
    }

    /// Set the token threshold at which to trigger automatic summarization.
    pub fn summarize_threshold(mut self, threshold: u32) -> Self {
        self.summarize_threshold = Some(threshold);
        self
    }

    /// Attach a memory store to the agent. Memory tools (store, recall, update,
    /// forget, consolidate) are created at `build()` time using the builder's `name`.
    ///
    /// Call `.name()` before or after `.memory()` — the agent name is resolved at build.
    pub fn memory(mut self, memory: Arc<dyn Memory>) -> Self {
        self.memory = Some(memory);
        self
    }

    /// Attach a knowledge base to the agent. The `knowledge_search` tool is
    /// added at `build()` time.
    pub fn knowledge(mut self, kb: Arc<dyn KnowledgeBase>) -> Self {
        self.knowledge_base = Some(kb);
        self
    }

    /// Set a callback for streaming text output. When set, the agent uses
    /// `stream_complete` instead of `complete`, calling the callback for each
    /// text delta as it arrives from the LLM.
    ///
    /// The callback must not panic. A panic inside the callback will propagate
    /// through the agent loop and abort the run.
    pub fn on_text(mut self, callback: Arc<crate::llm::OnText>) -> Self {
        self.on_text = Some(callback);
        self
    }

    /// Set a callback for human-in-the-loop approval before tool execution.
    ///
    /// When set, the callback is invoked with the list of tool calls before
    /// each execution round. If it returns `false`, tool execution is denied
    /// and the agent receives error results, allowing the LLM to adjust.
    pub fn on_approval(mut self, callback: Arc<crate::llm::OnApproval>) -> Self {
        self.on_approval = Some(callback);
        self
    }

    /// Set a timeout for individual tool executions. If a tool does not
    /// complete within this duration, the execution is cancelled and an
    /// error result is returned to the LLM.
    ///
    /// Default: `None` (no timeout).
    pub fn tool_timeout(mut self, timeout: Duration) -> Self {
        self.tool_timeout = Some(timeout);
        self
    }

    /// Set a maximum byte size for individual tool output content.
    ///
    /// Tool results exceeding this limit are truncated with a
    /// `[truncated: N bytes omitted]` suffix, preventing oversized results
    /// from blowing out the context window.
    ///
    /// Default: `None` (no truncation).
    pub fn max_tool_output_bytes(mut self, max: usize) -> Self {
        self.max_tool_output_bytes = Some(max);
        self
    }

    /// Set a JSON Schema for structured output. The agent will receive a
    /// synthetic `__respond__` tool with this schema. When the LLM calls
    /// `__respond__`, its input is extracted as `AgentOutput::structured`.
    ///
    /// The agent can still use regular tools before producing output.
    pub fn structured_schema(mut self, schema: serde_json::Value) -> Self {
        self.structured_schema = Some(schema);
        self
    }

    /// Set a callback for structured agent events. Events are emitted at key
    /// points in the agent loop: run start/end, turn transitions, LLM responses,
    /// tool call start/completion, approval decisions, and context summarization.
    pub fn on_event(mut self, callback: Arc<OnEvent>) -> Self {
        self.on_event = Some(callback);
        self
    }

    /// Add a single guardrail. Multiple guardrails are evaluated in order;
    /// first `Deny` wins.
    pub fn guardrail(mut self, guardrail: Arc<dyn Guardrail>) -> Self {
        self.guardrails.push(guardrail);
        self
    }

    /// Add multiple guardrails at once.
    pub fn guardrails(mut self, guardrails: Vec<Arc<dyn Guardrail>>) -> Self {
        self.guardrails.extend(guardrails);
        self
    }

    /// Set a callback for structured questions to the user. When set, a
    /// `question` tool is added at `build()` time allowing the agent to
    /// ask the user structured questions with predefined options.
    pub fn on_question(mut self, callback: Arc<OnQuestion>) -> Self {
        self.on_question = Some(callback);
        self
    }

    /// Set a callback for interactive mode. When set and the LLM returns
    /// text without tool calls, the callback is invoked to get the next
    /// user message. Return `Some(message)` to continue the conversation
    /// or `None` to end the session.
    pub fn on_input(mut self, callback: Arc<OnInput>) -> Self {
        self.on_input = Some(callback);
        self
    }

    /// Set a wall-clock deadline for the entire run. If the agent does not
    /// complete within this duration, `Error::RunTimeout` is returned.
    ///
    /// Default: `None` (no deadline).
    pub fn run_timeout(mut self, timeout: Duration) -> Self {
        self.run_timeout = Some(timeout);
        self
    }

    /// Set the reasoning/thinking effort level. Enables extended thinking
    /// on models that support it (e.g., Qwen3 via OpenRouter, Claude).
    ///
    /// Default: `None` (no reasoning).
    pub fn reasoning_effort(mut self, effort: crate::llm::types::ReasoningEffort) -> Self {
        self.reasoning_effort = Some(effort);
        self
    }

    pub fn enable_reflection(mut self, enabled: bool) -> Self {
        self.enable_reflection = enabled;
        self
    }

    pub fn tool_output_compression_threshold(mut self, threshold: usize) -> Self {
        self.tool_output_compression_threshold = Some(threshold);
        self
    }

    pub fn max_tools_per_turn(mut self, max: usize) -> Self {
        self.max_tools_per_turn = Some(max);
        self
    }

    /// Set a static tool profile to pre-filter tools before dynamic selection.
    ///
    /// When set, tool definitions are filtered to the profile's subset before
    /// `max_tools_per_turn` scoring applies. Use `ToolProfile::Conversational`
    /// for chat-only agents, `Standard` for code agents, `Full` for all tools.
    pub fn tool_profile(mut self, profile: tool_filter::ToolProfile) -> Self {
        self.tool_profile = Some(profile);
        self
    }

    /// Set the maximum number of consecutive identical tool-call turns before
    /// the agent receives an error result instead of executing the tools.
    ///
    /// This detects "doom loops" where the LLM keeps repeating the exact same
    /// tool calls. After `max` consecutive identical turns, all tool calls in
    /// the turn receive an error result asking the LLM to try a different approach.
    ///
    /// Default: `None` (no detection).
    pub fn max_identical_tool_calls(mut self, max: u32) -> Self {
        self.max_identical_tool_calls = Some(max);
        self
    }

    /// Set the maximum number of consecutive fuzzy-identical tool-call turns
    /// before the agent receives an error result. Fuzzy matching compares sorted
    /// tool names (ignoring inputs), catching loops where the agent retries the
    /// same tools with different arguments.
    ///
    /// Default: `None` (no fuzzy detection).
    pub fn max_fuzzy_identical_tool_calls(mut self, max: u32) -> Self {
        self.max_fuzzy_identical_tool_calls = Some(max);
        self
    }

    /// Cap the number of tool *invocations* the LLM may emit per turn.
    /// When the LLM returns more tool_use blocks than `cap`, the run
    /// returns `Error::Agent` (wrapped in `Error::WithPartialUsage`) and
    /// no tools are dispatched.
    ///
    /// **Distinct from `max_tools_per_turn`**: that one limits the *tool
    /// definitions* offered to the LLM before it responds (pre-filter).
    /// This one caps the *invocations* in the LLM's actual response
    /// (post-response). Both can be set independently.
    ///
    /// Default: `None` (unlimited). Recommended for production: 8.
    /// Zero is rejected at build time.
    pub fn max_tool_calls_per_turn(mut self, cap: u32) -> Self {
        self.max_tool_calls_per_turn = Some(cap);
        self
    }

    /// Set declarative permission rules for tool calls.
    ///
    /// Rules are evaluated per tool call before the `on_approval` callback.
    /// `Allow` executes without asking, `Deny` returns an error result,
    /// `Ask` falls through to the `on_approval` callback.
    pub fn permission_rules(mut self, rules: permission::PermissionRuleset) -> Self {
        self.permission_rules = rules;
        self
    }

    /// Set learned permissions for persisting AlwaysAllow/AlwaysDeny decisions.
    ///
    /// When set, approval decisions with `AlwaysAllow` or `AlwaysDeny` are
    /// saved to disk and injected into the live permission ruleset.
    pub fn learned_permissions(
        mut self,
        learned: Arc<std::sync::Mutex<permission::LearnedPermissions>>,
    ) -> Self {
        self.learned_permissions = Some(learned);
        self
    }

    /// Set an LSP manager for collecting diagnostics after file-modifying tools.
    ///
    /// When set, after any tool named `write`, `edit`, or `patch` completes,
    /// the manager reads the modified file and collects diagnostics from the
    /// language server. Diagnostics are appended to the tool result so the
    /// LLM sees compilation errors immediately.
    pub fn lsp_manager(mut self, manager: Arc<crate::lsp::LspManager>) -> Self {
        self.lsp_manager = Some(manager);
        self
    }

    /// Enable session pruning to reduce token usage by truncating old tool results.
    pub fn session_prune_config(mut self, config: pruner::SessionPruneConfig) -> Self {
        self.session_prune_config = Some(config);
        self
    }

    /// Enable recursive (cluster-then-summarize) summarization for long conversations.
    pub fn enable_recursive_summarization(mut self, enable: bool) -> Self {
        self.enable_recursive_summarization = enable;
        self
    }

    /// Set cumulative importance threshold for memory reflection triggers.
    /// When the sum of stored memory importance values exceeds this threshold,
    /// the store tool appends a reflection hint to guide the agent.
    pub fn reflection_threshold(mut self, threshold: u32) -> Self {
        self.reflection_threshold = Some(threshold);
        self
    }

    /// Enable automatic memory consolidation at session end.
    ///
    /// When enabled, clusters related episodic memories by keyword overlap
    /// and merges them into semantic summaries. Requires memory to be configured.
    /// Adds LLM calls at session end (one per cluster).
    pub fn consolidate_on_exit(mut self, enable: bool) -> Self {
        self.consolidate_on_exit = enable;
        self
    }

    /// Set the observability verbosity mode for this agent.
    ///
    /// Controls how much detail is recorded in tracing spans:
    /// - `Production`: span names + durations only (near-zero overhead)
    /// - `Analysis`: + metrics (tokens, latencies, costs)
    /// - `Debug`: + full payloads (truncated to 4KB)
    ///
    /// When not set, resolved via `HEARTBIT_OBSERVABILITY` env var or default (`Production`).
    pub fn observability_mode(mut self, mode: observability::ObservabilityMode) -> Self {
        self.observability_mode = Some(mode);
        self
    }

    /// Provide pre-loaded instruction text to prepend to the system prompt.
    ///
    /// Use [`instructions::load_instructions`] to load from file paths, or
    /// [`instructions::discover_instruction_files`] to auto-discover them.
    pub fn instruction_text(mut self, text: impl Into<String>) -> Self {
        let text = text.into();
        if !text.is_empty() {
            self.instruction_text = Some(text);
        }
        self
    }

    /// Set a hard limit on cumulative tokens (input + output) across all turns.
    ///
    /// When the total tokens consumed exceed this limit, the agent returns
    /// `Error::BudgetExceeded` with partial usage data.
    ///
    /// Default: `None` (no budget).
    pub fn max_total_tokens(mut self, max: u64) -> Self {
        self.max_total_tokens = Some(max);
        self
    }

    /// Set the audit mode controlling what data is stored in audit records.
    ///
    /// - `Full` (default): all content is recorded.
    /// - `MetadataOnly`: user content fields are replaced with `[stripped]`.
    pub fn audit_mode(mut self, mode: audit::AuditMode) -> Self {
        self.audit_mode = mode;
        self
    }

    /// Attach an audit trail for recording untruncated agent decisions.
    ///
    /// When set, every LLM response, tool call, tool result, run completion,
    /// run failure, and guardrail denial is recorded with full payloads.
    /// Recording is best-effort: failures are logged, never abort the agent.
    pub fn audit_trail(mut self, trail: Arc<dyn audit::AuditTrail>) -> Self {
        self.audit_trail = Some(trail);
        self
    }

    /// Set user context for multi-tenant audit enrichment.
    /// When set, all `AuditRecord` entries include the user and tenant IDs.
    pub fn audit_user_context(
        mut self,
        user_id: impl Into<String>,
        tenant_id: impl Into<String>,
    ) -> Self {
        self.audit_user_id = Some(user_id.into());
        self.audit_tenant_id = Some(tenant_id.into());
        self
    }

    /// Set the delegation chain for audit records.
    ///
    /// Populated when the daemon acts on behalf of a user via RFC 8693 token exchange.
    /// The chain records which agent(s) are in the delegation path.
    pub fn audit_delegation_chain(mut self, chain: Vec<String>) -> Self {
        self.audit_delegation_chain = chain;
        self
    }

    /// Enable an LRU response cache with the given maximum number of entries.
    /// Identical requests (same system prompt, messages, and tool names) return
    /// cached responses without calling the LLM. Only non-streaming calls are cached.
    /// Size must be at least 1.
    pub fn response_cache_size(mut self, size: usize) -> Self {
        self.response_cache_size = Some(size);
        self
    }

    /// Set the agent's workspace directory. When set, file tools resolve
    /// relative paths against this directory, BashTool starts here, and a
    /// workspace hint is appended to the system prompt.
    pub fn workspace(mut self, path: impl Into<std::path::PathBuf>) -> Self {
        self.workspace = Some(path.into());
        self
    }

    /// Optional per-tenant in-flight token tracker. When set, the runner
    /// calls `tracker.adjust(&scope, delta)` after each LLM response,
    /// reconciling the per-tenant `in_flight` counter against the
    /// estimated reservation made at submit time. Has no effect when
    /// `audit_tenant_id` is unset.
    pub fn tenant_tracker(
        mut self,
        tracker: Arc<crate::agent::tenant_tracker::TenantTokenTracker>,
    ) -> Self {
        self.tenant_tracker = Some(tracker);
        self
    }

    pub fn build(self) -> Result<AgentRunner<P>, Error> {
        if self.name.is_empty() {
            return Err(Error::Config("agent name must not be empty".into()));
        }
        if self.max_turns == 0 {
            return Err(Error::Config("max_turns must be at least 1".into()));
        }
        if self.max_tokens == 0 {
            return Err(Error::Config("max_tokens must be at least 1".into()));
        }
        if matches!(
            self.context_strategy,
            Some(ContextStrategy::SlidingWindow { .. })
        ) && self.summarize_threshold.is_some()
        {
            return Err(Error::Config(
                "cannot use summarize_threshold with SlidingWindow context strategy".into(),
            ));
        }
        if self.on_input.is_some() && self.structured_schema.is_some() {
            return Err(Error::Config(
                "on_input (interactive mode) and structured_schema are mutually exclusive".into(),
            ));
        }
        if self.max_tools_per_turn == Some(0) {
            return Err(Error::Config(
                "max_tools_per_turn must be at least 1".into(),
            ));
        }
        if self.tool_output_compression_threshold == Some(0) {
            return Err(Error::Config(
                "tool_output_compression_threshold must be at least 1".into(),
            ));
        }
        if self.max_identical_tool_calls == Some(0) {
            return Err(Error::Config(
                "max_identical_tool_calls must be at least 1".into(),
            ));
        }
        if self.max_fuzzy_identical_tool_calls == Some(0) {
            return Err(Error::Config(
                "max_fuzzy_identical_tool_calls must be at least 1".into(),
            ));
        }
        if self.max_tool_calls_per_turn == Some(0) {
            return Err(Error::Config(
                "max_tool_calls_per_turn must be > 0 if set".into(),
            ));
        }
        if self.max_total_tokens == Some(0) {
            return Err(Error::Config("max_total_tokens must be at least 1".into()));
        }
        if self.response_cache_size == Some(0) {
            return Err(Error::Config(
                "response_cache_size must be at least 1".into(),
            ));
        }

        // Collect all tools, including memory and knowledge tools
        let mut all_tools = self.tools;
        let memory_scope = crate::auth::TenantScope::from_audit_fields(
            self.audit_tenant_id.as_deref(),
            self.audit_user_id.as_deref(),
        );
        let memory_ref = self.memory.clone();
        if let Some(memory) = self.memory {
            all_tools.extend(crate::memory::tools::memory_tools_with_reflection(
                memory,
                &self.name,
                memory_scope,
                self.reflection_threshold,
            ));
        }
        if let Some(kb) = self.knowledge_base {
            // SECURITY (F-KB-1): scope the KB tool to this runner's tenant.
            let kb_scope = crate::auth::TenantScope::from_audit_fields(
                self.audit_tenant_id.as_deref(),
                self.audit_user_id.as_deref(),
            );
            all_tools.extend(crate::knowledge::tools::knowledge_tools(kb, kb_scope));
        }
        if let Some(on_question) = self.on_question {
            all_tools.push(Arc::new(crate::tool::builtins::QuestionTool::new(
                on_question,
            )));
        }

        let mut tools: HashMap<String, Arc<dyn Tool>> = HashMap::with_capacity(all_tools.len());
        let mut tool_defs: Vec<ToolDefinition> = Vec::with_capacity(all_tools.len());

        for t in all_tools {
            let def = t.definition();
            if tools.contains_key(&def.name) {
                // SECURITY (F-MCP-2): elevate the log level — a duplicate tool
                // name is a potential MCP-shadowing attempt. The existing
                // first-wins behavior is preserved (so trusted builtins added
                // first take precedence) but the event is now auditable.
                tracing::error!(
                    tool = %def.name,
                    "duplicate tool name (potential MCP-shadowing); keeping first registration"
                );
                continue;
            }
            tool_defs.push(def.clone());
            tools.insert(def.name, t);
        }

        // Inject the synthetic __respond__ tool for structured output.
        // Only the ToolDefinition is added — there's no Tool impl because
        // the execute loop intercepts __respond__ calls before tool dispatch.
        if let Some(ref schema) = self.structured_schema {
            tool_defs.push(ToolDefinition {
                name: crate::llm::types::RESPOND_TOOL_NAME.into(),
                description: crate::llm::types::RESPOND_TOOL_DESCRIPTION.into(),
                input_schema: schema.clone(),
            });
        }

        // Prepend instruction text to the system prompt if provided.
        let mut system_prompt = match self.instruction_text {
            Some(ref text) => instructions::prepend_instructions(&self.system_prompt, text),
            None => self.system_prompt,
        };

        // Append workspace hint to the system prompt if configured.
        if let Some(ref ws) = self.workspace {
            system_prompt.push_str(&format!(
                "\n\nYour workspace directory is {}. You can freely create, organize, and manage \
                 files there. Use it for notes, intermediate results, generated artifacts, and \
                 anything you want to persist. Paths can be relative (resolved against workspace) \
                 or absolute.",
                ws.display()
            ));
        }

        // Append resourcefulness guidelines only when the agent has power tools
        // (bash, write, patch, edit) that make the guidance relevant. Saves ~180
        // tokens for conversational-only agents.
        let has_power_tools = tool_defs
            .iter()
            .any(|t| matches!(t.name.as_str(), "bash" | "write" | "patch" | "edit"));
        if has_power_tools {
            system_prompt.push_str(RESOURCEFULNESS_GUIDELINES);
        }

        // Inject current date/time so the model knows "today".
        use chrono::Utc;
        system_prompt.push_str(&format!(
            "\n\nCurrent date and time: {} UTC",
            Utc::now().format("%A, %B %-d, %Y %H:%M")
        ));

        Ok(AgentRunner {
            provider: self.provider,
            name: self.name,
            system_prompt,
            tools,
            tool_defs,
            max_turns: self.max_turns,
            max_tokens: self.max_tokens,
            context_strategy: self.context_strategy.unwrap_or(ContextStrategy::Unlimited),
            summarize_threshold: self.summarize_threshold,
            on_text: self.on_text,
            on_approval: self.on_approval,
            tool_timeout: self.tool_timeout,
            max_tool_output_bytes: self.max_tool_output_bytes,
            structured_schema: self.structured_schema,
            on_event: self.on_event,
            guardrails: self.guardrails,
            on_input: self.on_input,
            run_timeout: self.run_timeout,
            reasoning_effort: self.reasoning_effort,
            enable_reflection: self.enable_reflection,
            tool_output_compression_threshold: self.tool_output_compression_threshold,
            max_tools_per_turn: self.max_tools_per_turn,
            tool_profile: self.tool_profile,
            max_identical_tool_calls: self.max_identical_tool_calls,
            max_fuzzy_identical_tool_calls: self.max_fuzzy_identical_tool_calls,
            max_tool_calls_per_turn: self.max_tool_calls_per_turn,
            permission_rules: parking_lot::RwLock::new(self.permission_rules),
            learned_permissions: self.learned_permissions,
            lsp_manager: self.lsp_manager,
            session_prune_config: self.session_prune_config,
            memory: memory_ref,
            enable_recursive_summarization: self.enable_recursive_summarization,
            consolidate_on_exit: self.consolidate_on_exit,
            observability_mode: observability::ObservabilityMode::resolve(
                observability::OBSERVABILITY_ENV_KEY,
                None,
                self.observability_mode,
            ),
            max_total_tokens: self.max_total_tokens,
            audit_mode: self.audit_mode,
            audit_trail: self.audit_trail,
            audit_user_id: self.audit_user_id,
            audit_tenant_id: self.audit_tenant_id,
            audit_delegation_chain: self.audit_delegation_chain,
            response_cache: self.response_cache_size.map(cache::ResponseCache::new),
            tenant_tracker: self.tenant_tracker,
            cumulative_actual_tokens: std::sync::atomic::AtomicUsize::new(0),
        })
    }
}