koda_core/tools/
mod.rs

1//! Tool registry and execution engine.
2//!
3//! Each tool is a function that takes JSON arguments and returns a string result.
4//! Path validation is enforced here to prevent directory traversal.
5//!
6//! ## Available tools
7//!
8//! | Tool | Module | Effect | Description |
9//! |---|---|---|---|
10//! | **Read** | `file_tools` | ReadOnly | Read file contents with line numbers |
11//! | **Write** | `file_tools` | LocalMutation | Create or overwrite a file |
12//! | **Edit** | `file_tools` | LocalMutation | Find-and-replace in an existing file |
13//! | **Delete** | `file_tools` | Destructive | Delete a file |
14//! | **List** | `file_tools` | ReadOnly | List files and directories |
15//! | **Bash** | `shell` | LocalMutation | Execute shell commands (with background mode) |
16//! | **Grep** | `grep` | ReadOnly | Recursive text search (respects .gitignore) |
17//! | **Glob** | `glob_tool` | ReadOnly | Find files by glob pattern |
18//! | **WebFetch** | `web_fetch` | RemoteAction | Fetch URL content (HTML→text) |
19//! | **WebSearch** | `web_search` | RemoteAction | Web search via DuckDuckGo |
20//! | **InvokeAgent** | `agent` | LocalMutation | Delegate task to a sub-agent |
21//! | **ListAgents** | `agent` | ReadOnly | List available sub-agents |
22//! | **MemoryRead** | `memory` | ReadOnly | Read project/global memory |
23//! | **MemoryWrite** | `memory` | LocalMutation | Save facts to memory |
24//! | **TodoWrite** | `todo` | LocalMutation | Update task list |
25//! | **AskUser** | `ask_user` | ReadOnly | Ask the user a question |
26//! | **ActivateSkill** | `skills` | ReadOnly | Load a skill's instructions |
27//! | **ListSkills** | `skills` | ReadOnly | List available skills |
28//! | **ListBackgroundTasks** | `bg_task_tools` | ReadOnly | Snapshot background tasks owned by the caller |
29//! | **CancelTask** | `bg_task_tools` | ReadOnly | Cancel a background agent or process |
30//! | **WaitTask** | `bg_task_tools` | ReadOnly | Block until a background task finishes (max 300 s) |
31//!
32//! ## Safety model
33//!
34//! Every tool call is classified by `ToolEffect` and checked against the
35//! current approval mode before execution. See
36//! `classify_tool` for the effect of each tool.
37
38/// Effect classification for tool calls.
39///
40/// Two-axis model: what does the tool touch (local vs. remote)
41/// and how severe are its effects (read vs. mutate vs. destroy)?
42///
43/// # Examples
44///
45/// ```
46/// use koda_core::tools::{ToolEffect, classify_tool};
47///
48/// assert_eq!(classify_tool("Read"), ToolEffect::ReadOnly);
49/// assert_eq!(classify_tool("Write"), ToolEffect::LocalMutation);
50/// assert_eq!(classify_tool("Delete"), ToolEffect::Destructive);
51/// ```
52#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
53#[serde(rename_all = "PascalCase")]
54pub enum ToolEffect {
55    /// No side-effects: file reads, grep, git status.
56    ReadOnly,
57    /// Side-effects on remote services only: GitHub API, WebFetch POST.
58    RemoteAction,
59    /// Mutates local filesystem or state: Write, Edit, Delete, MemoryWrite.
60    LocalMutation,
61    /// Irreversible or high-blast-radius: rm -rf, git push --force, DROP TABLE.
62    Destructive,
63}
64
65/// Classify a built-in tool by name.
66///
67/// For `Bash`, this returns the *default* classification (`LocalMutation`);
68/// the actual effect depends on the command string and must be refined
69/// via [`crate::bash_safety::classify_bash_command`].
70///
71/// Unknown tools default to `LocalMutation` (conservative — always asks).
72///
73/// For MCP tools (names containing `__`), call
74/// [`ToolRegistry::classify_tool_with_mcp`] instead to use server-provided
75/// annotations.
76pub fn classify_tool(name: &str) -> ToolEffect {
77    match name {
78        // Pure reads — zero side-effects
79        "Read" | "List" | "Grep" | "Glob" | "MemoryRead" | "ListAgents" | "ListSkills"
80        | "ActivateSkill" | "RecallContext" | "AskUser" => ToolEffect::ReadOnly,
81
82        // Remote actions — side-effects on remote services only
83        "WebFetch" => ToolEffect::ReadOnly,    // GET-only fetch
84        "WebSearch" => ToolEffect::ReadOnly,   // read-only search
85        "InvokeAgent" => ToolEffect::ReadOnly, // sub-agents inherit parent's mode
86
87        // Background task management (Layer 2 of #996). ListBackgroundTasks
88        // is a pure read; CancelTask / WaitTask signal but don't write
89        // files — they're idempotent observation/control of work the
90        // model already started. Treating as ReadOnly avoids an approval
91        // prompt every time the model checks on a bg task.
92        "ListBackgroundTasks" | "CancelTask" | "WaitTask" => ToolEffect::ReadOnly,
93
94        // Local mutations — write to filesystem or local state
95        "Write" | "Edit" | "MemoryWrite" | "TodoWrite" => ToolEffect::LocalMutation,
96
97        // Bash — default to LocalMutation; refined by classify_bash_command()
98        "Bash" => ToolEffect::LocalMutation,
99
100        // Delete is destructive (irreversible without undo)
101        "Delete" => ToolEffect::Destructive,
102
103        // MCP tools — use annotations-based classification.
104        name if crate::mcp::is_mcp_tool_name(name) => ToolEffect::RemoteAction,
105
106        // Unknown tools — default to LocalMutation (conservative)
107        _ => ToolEffect::LocalMutation,
108    }
109}
110
111/// Returns true if the tool performs a mutating operation.
112///
113/// Convenience wrapper over [`classify_tool`] for call sites that only
114/// need a bool (e.g., loop guard).
115///
116/// ```
117/// use koda_core::tools::is_mutating_tool;
118///
119/// assert!(!is_mutating_tool("Read"));
120/// assert!(is_mutating_tool("Write"));
121/// assert!(is_mutating_tool("Delete"));
122/// ```
123pub fn is_mutating_tool(name: &str) -> bool {
124    !matches!(classify_tool(name), ToolEffect::ReadOnly)
125}
126
127/// Sub-agent invocation tool (`InvokeAgent`, `ListAgents`).
128pub mod agent;
129pub mod ask_user;
130pub mod bg_process;
131/// Background-task management tools — `ListBackgroundTasks`,
132/// `CancelTask`, `WaitTask` (Layer 2 of #996).
133pub mod bg_task_tools;
134/// File CRUD tools (`Read`, `Write`, `Edit`, `Delete`, `List`).
135pub mod file_tools;
136pub mod fuzzy;
137/// Glob pattern search tool (`Glob`).
138pub mod glob_tool;
139/// Recursive text search tool (`Grep`).
140pub mod grep;
141/// Project memory read/write tools (`MemoryRead`, `MemoryWrite`).
142pub mod memory;
143/// On-demand conversation history retrieval (`RecallContext`).
144pub mod recall;
145/// Shell command execution tool (`Bash`).
146pub mod shell;
147/// Skill discovery and activation tools (`ListSkills`, `ActivateSkill`).
148pub mod skill_tools;
149/// Session-scoped task list tool (`TodoWrite`).
150pub mod todo;
151/// Pre-flight validation for tool calls (runs before approval).
152pub mod validate;
153/// HTTP fetch tool (`WebFetch`).
154pub mod web_fetch;
155/// Web search tool (`WebSearch`).
156pub mod web_search;
157
158use anyhow::Result;
159use koda_sandbox::fs::{FileSystem, LocalFileSystem};
160use path_clean::PathClean;
161use serde_json::Value;
162use std::collections::HashMap;
163use std::path::{Path, PathBuf};
164use std::sync::Arc;
165use std::time::SystemTime;
166
167use crate::output_caps::OutputCaps;
168
169use crate::providers::ToolDefinition;
170
171/// Shared file-read cache: tracks `(size, mtime, sha256_hex)` per cache key.
172///
173/// The SHA-256 field is populated on full-file reads and used by `edit_file`
174/// to detect whether the file changed between when the model last read it and
175/// when it attempts an edit (Gemini CLI strategy, better than mtime-only because
176/// mtime has 1-second granularity and can miss sub-second bash mutations).
177///
178/// `sha256_hex` is empty for line-range reads where only a slice was fetched.
179///
180/// Wrapped in `Arc` so parent and sub-agent `ToolRegistry` instances
181/// share the same cache — reads by one agent benefit all others.
182pub type FileReadCache = Arc<std::sync::Mutex<HashMap<String, (u64, SystemTime, String)>>>;
183
184/// Tracks which tool last wrote each absolute file path.
185///
186/// Keyed by canonical `PathBuf`; value is `(tool_name, when)` using a
187/// monotonic `Instant`. Populated on every successful Write and Edit so
188/// the validation layer can include the responsible tool in staleness
189/// error messages (#804 item 7).
190pub type LastWriterCache = Arc<std::sync::Mutex<HashMap<PathBuf, (String, std::time::Instant)>>>;
191
192/// Tracks the most recent successful Bash invocation.
193///
194/// Stores `(command_snippet, when)`. Only the latest call is kept — enough
195/// context to tell the model "Bash ran 2s ago, it may have changed the file".
196pub type LastBashCache = Arc<std::sync::Mutex<Option<(String, std::time::Instant)>>>;
197
198/// Result of executing a tool.
199///
200/// The `success` field is set automatically by `ToolRegistry::execute()` —
201/// `Ok(…)` → `true`, `Err(…)` → `false`. Individual tool functions just
202/// return `Result<String>`.
203///
204/// ```
205/// use koda_core::tools::ToolResult;
206///
207/// let ok = ToolResult { output: "done".into(), success: true, full_output: None };
208/// assert!(ok.success);
209/// ```
210#[derive(Debug, Clone)]
211pub struct ToolResult {
212    /// The tool's output string (model-facing; may be a summary for Bash).
213    pub output: String,
214    /// Whether the tool executed successfully.
215    ///
216    /// Set automatically by `ToolRegistry::execute()` — `Ok(…)` → `true`,
217    /// `Err(…)` → `false`. Individual tools never set this directly;
218    /// they just return `Result<String>`.
219    pub success: bool,
220    /// Full untruncated output, stored separately in DB for later retrieval.
221    ///
222    /// Only populated by Bash when output exceeds the summary threshold.
223    /// `RecallContext` can search this to retrieve details the model didn't
224    /// see in its context window.
225    pub full_output: Option<String>,
226}
227
228/// The tool registry: maps tool names to their definitions and handlers.
229pub struct ToolRegistry {
230    project_root: PathBuf,
231    definitions: HashMap<String, ToolDefinition>,
232    read_cache: FileReadCache,
233    /// Filesystem abstraction — `LocalFileSystem` by default; swap to
234    /// `SandboxedFileSystem` when a sandbox slot is active (Phase 2d, #934).
235    /// Explicit `+ Send + Sync` is required: trait objects don't
236    /// auto-inherit auto-traits from the supertrait, so without these
237    /// bounds `ToolRegistry` becomes `!Send` and any future holding
238    /// it (e.g. `execute_sub_agent`) cannot be `tokio::spawn`'d.
239    fs: Arc<dyn FileSystem + Send + Sync>,
240    /// Per-file last-writer tracking for richer staleness errors (#804 item 7).
241    last_writer: LastWriterCache,
242    /// Most recent Bash invocation for staleness error context (#804 item 7).
243    last_bash: LastBashCache,
244    /// Undo stack for file mutations.
245    pub undo: std::sync::Mutex<crate::undo::UndoStack>,
246    /// Discovered skills.
247    pub skill_registry: crate::skills::SkillRegistry,
248    /// Database handle for tools that need session access (RecallContext).
249    db: std::sync::RwLock<Option<std::sync::Arc<crate::db::Database>>>,
250    /// Current session ID (for RecallContext).
251    session_id: std::sync::RwLock<Option<String>>,
252    /// Context-scaled output caps for all tools.
253    pub caps: OutputCaps,
254    /// Background process registry — tracks processes spawned with `background: true`.
255    /// Dropped (SIGTERM all) when the session ends.
256    pub bg_registry: bg_process::BgRegistry,
257    /// Trust mode — determines sandbox configuration for Bash tool.
258    trust: crate::trust::TrustMode,
259    /// Active sandbox policy. Phase 5 PR-2 of #934 wires this through
260    /// the Bash dispatch path so per-agent variation becomes possible.
261    /// Today every constructor seeds it with `SandboxPolicy::strict_default()`
262    /// so behavior is byte-for-byte unchanged — PR-3 starts populating it
263    /// with non-default values via [`crate::sandbox::policy_for_agent`].
264    sandbox_policy: koda_sandbox::SandboxPolicy,
265    /// MCP connection manager — owns all MCP server connections (#662).
266    /// `None` until attached via `set_mcp_manager()`.
267    mcp_manager: std::sync::RwLock<Option<Arc<tokio::sync::RwLock<crate::mcp::McpManager>>>>,
268    /// Loopback port of the per-session HTTP CONNECT proxy (Phase 3b of
269    /// #934). When `Some`, [`crate::sandbox::build`] attaches the
270    /// canonical `HTTPS_PROXY`/`NO_PROXY`/etc. env-var bouquet to every
271    /// Bash invocation so child processes route HTTP through the proxy.
272    /// `None` (default) preserves the pre-3b unfiltered behavior —
273    /// session code opts in by calling [`Self::set_proxy_port`].
274    proxy_port: std::sync::RwLock<Option<u16>>,
275    /// Loopback port of the per-session SOCKS5 proxy (Phase 3d.1 of
276    /// #934). When `Some`, [`crate::sandbox::build`] appends
277    /// `ALL_PROXY=socks5h://127.0.0.1:port` (+ lowercase alias) so
278    /// raw-TCP clients (git over ssh, gRPC) that ignore `HTTPS_PROXY`
279    /// also route through the hostname-filtered proxy. Independent
280    /// from `proxy_port` so tests can attach one without the other.
281    socks5_port: std::sync::RwLock<Option<u16>>,
282}
283
284impl ToolRegistry {
285    /// Create a new registry with all built-in tools.
286    ///
287    /// `max_context_tokens` scales all output caps (see `OutputCaps`).
288    pub fn new(project_root: PathBuf, max_context_tokens: usize) -> Self {
289        Self::with_trust(
290            project_root,
291            max_context_tokens,
292            crate::trust::TrustMode::Safe,
293        )
294    }
295
296    /// Create a new registry with a specific trust mode.
297    pub fn with_trust(
298        project_root: PathBuf,
299        max_context_tokens: usize,
300        trust: crate::trust::TrustMode,
301    ) -> Self {
302        let mut definitions = HashMap::new();
303
304        // Register all built-in tools
305        for def in file_tools::definitions() {
306            definitions.insert(def.name.clone(), def);
307        }
308
309        for def in grep::definitions() {
310            definitions.insert(def.name.clone(), def);
311        }
312        for def in shell::definitions() {
313            definitions.insert(def.name.clone(), def);
314        }
315        for def in agent::definitions() {
316            definitions.insert(def.name.clone(), def);
317        }
318        for def in bg_task_tools::definitions() {
319            definitions.insert(def.name.clone(), def);
320        }
321        for def in ask_user::definitions() {
322            definitions.insert(def.name.clone(), def);
323        }
324        for def in glob_tool::definitions() {
325            definitions.insert(def.name.clone(), def);
326        }
327        for def in web_fetch::definitions() {
328            definitions.insert(def.name.clone(), def);
329        }
330        for def in web_search::definitions() {
331            definitions.insert(def.name.clone(), def);
332        }
333        for def in todo::definitions() {
334            definitions.insert(def.name.clone(), def);
335        }
336        for def in memory::definitions() {
337            definitions.insert(def.name.clone(), def);
338        }
339        for def in skill_tools::definitions() {
340            definitions.insert(def.name.clone(), def);
341        }
342        // RecallContext — on-demand history retrieval
343        let recall_def = recall::definition();
344        definitions.insert(recall_def.name.clone(), recall_def);
345        let skill_registry = crate::skills::SkillRegistry::discover(&project_root);
346
347        Self {
348            project_root,
349            definitions,
350            read_cache: Arc::new(std::sync::Mutex::new(HashMap::new())),
351            fs: Arc::new(LocalFileSystem::new()),
352            last_writer: Arc::new(std::sync::Mutex::new(HashMap::new())),
353            last_bash: Arc::new(std::sync::Mutex::new(None)),
354            undo: std::sync::Mutex::new(crate::undo::UndoStack::new()),
355            skill_registry,
356            db: std::sync::RwLock::new(None),
357            session_id: std::sync::RwLock::new(None),
358            caps: OutputCaps::for_context(max_context_tokens),
359            bg_registry: bg_process::BgRegistry::new(),
360            trust,
361            // Phase 5 PR-2 of #934: seed with strict_default(). Callers
362            // can override via [`Self::with_sandbox_policy`] (sub-agent
363            // dispatch does this; the main agent inherits the default).
364            sandbox_policy: koda_sandbox::SandboxPolicy::strict_default(),
365            mcp_manager: std::sync::RwLock::new(None),
366            proxy_port: std::sync::RwLock::new(None),
367            socks5_port: std::sync::RwLock::new(None),
368        }
369    }
370
371    /// Share an existing file-read cache (e.g. from the parent agent).
372    ///
373    /// Sub-agents that share the parent's cache avoid redundant disk reads
374    /// for files already loaded in the same session.
375    pub fn with_shared_cache(mut self, cache: FileReadCache) -> Self {
376        self.read_cache = cache;
377        self
378    }
379
380    /// Override the active sandbox policy.
381    ///
382    /// Phase 5 PR-2 of #934. Builder-style; chains after `with_trust`
383    /// (or `new`). Sub-agent dispatch uses this to install the policy
384    /// produced by [`crate::sandbox::policy_for_agent`] on the child's
385    /// registry. The main agent path doesn't call this and inherits
386    /// the `strict_default()` seed from `with_trust` — byte-for-byte
387    /// unchanged behavior in PR-2.
388    pub fn with_sandbox_policy(mut self, policy: koda_sandbox::SandboxPolicy) -> Self {
389        self.sandbox_policy = policy;
390        self
391    }
392
393    /// Borrow the active sandbox policy. Used by the Bash dispatch
394    /// path to thread the per-registry policy into
395    /// [`crate::sandbox::build`].
396    pub fn sandbox_policy(&self) -> &koda_sandbox::SandboxPolicy {
397        &self.sandbox_policy
398    }
399
400    /// Inject a custom [`FileSystem`] implementation.
401    ///
402    /// Call this after construction to swap `LocalFileSystem` for
403    /// `SandboxedFileSystem` when a sandbox slot is ready (#934).
404    pub fn set_fs(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
405        self.fs = fs;
406    }
407
408    /// Get a clone of the `Arc` file-read cache for sharing with sub-agents.
409    pub fn file_read_cache(&self) -> FileReadCache {
410        Arc::clone(&self.read_cache)
411    }
412
413    /// Get a clone of the last-writer cache for passing to validation.
414    pub fn last_writer_cache(&self) -> LastWriterCache {
415        Arc::clone(&self.last_writer)
416    }
417
418    /// Get a clone of the last-bash cache for passing to validation.
419    pub fn last_bash_cache(&self) -> LastBashCache {
420        Arc::clone(&self.last_bash)
421    }
422
423    /// Attach database + session for tools that need history access.
424    pub fn set_session(&self, db: std::sync::Arc<crate::db::Database>, session_id: String) {
425        if let Ok(mut guard) = self.db.write() {
426            *guard = Some(db);
427        }
428        if let Ok(mut guard) = self.session_id.write() {
429            *guard = Some(session_id);
430        }
431    }
432
433    /// Attach an MCP connection manager and register its tools (#662).
434    ///
435    /// Called after MCP servers have connected and discovered their tools.
436    /// Tool definitions are merged into the registry so the LLM can see them.
437    pub fn set_mcp_manager(&self, manager: Arc<tokio::sync::RwLock<crate::mcp::McpManager>>) {
438        if let Ok(mut guard) = self.mcp_manager.write() {
439            *guard = Some(manager);
440        }
441    }
442
443    /// Get the MCP manager (if attached).
444    pub fn mcp_manager(&self) -> Option<Arc<tokio::sync::RwLock<crate::mcp::McpManager>>> {
445        self.mcp_manager.read().ok().and_then(|guard| guard.clone())
446    }
447
448    /// Attach (or detach) the per-session HTTP CONNECT proxy port.
449    ///
450    /// Called from [`crate::session::KodaSession::new`] after spawning
451    /// the always-on [`koda_sandbox::BuiltInProxy`]. Pass `None` to
452    /// detach (Bash invocations revert to unfiltered network access —
453    /// only used in standalone-ToolRegistry tests; production sessions
454    /// keep this set for their full lifetime). Lock-poisoning is
455    /// non-fatal — we silently keep the previous value, matching the
456    /// precedent set by `set_mcp_manager`.
457    pub fn set_proxy_port(&self, port: Option<u16>) {
458        if let Ok(mut guard) = self.proxy_port.write() {
459            *guard = port;
460        }
461    }
462
463    /// Current proxy port, if one has been attached. Read by the Bash
464    /// dispatch path; threaded into [`crate::sandbox::build`] which
465    /// turns it into the env-var bouquet on the spawned `Command`.
466    pub fn proxy_port(&self) -> Option<u16> {
467        self.proxy_port.read().ok().and_then(|guard| *guard)
468    }
469
470    /// Attach (or detach) the per-session SOCKS5 proxy port. Mirrors
471    /// [`Self::set_proxy_port`] — see that fn's docs for the
472    /// lock-poisoning policy.
473    pub fn set_socks5_port(&self, port: Option<u16>) {
474        if let Ok(mut guard) = self.socks5_port.write() {
475            *guard = port;
476        }
477    }
478
479    /// Current SOCKS5 port, if one has been attached. Threaded into
480    /// [`crate::sandbox::build`] which appends `ALL_PROXY` to the
481    /// spawned `Command`'s env.
482    pub fn socks5_port(&self) -> Option<u16> {
483        self.socks5_port.read().ok().and_then(|guard| *guard)
484    }
485
486    /// Classify a tool, using MCP annotations when available.
487    ///
488    /// For built-in tools, delegates to `classify_tool()`.
489    /// For MCP tools, looks up cached annotations in the manager.
490    pub fn classify_tool_with_mcp(&self, name: &str) -> ToolEffect {
491        if crate::mcp::is_mcp_tool_name(name) {
492            if let Some(mgr) = self.mcp_manager()
493                && let Ok(mgr) = mgr.try_read()
494            {
495                return mgr.classify_tool(name);
496            }
497            // Fallback: no manager or lock contention.
498            return ToolEffect::RemoteAction;
499        }
500        classify_tool(name)
501    }
502
503    /// Get all built-in tool names.
504    /// Used by wiring tests to verify every tool is properly integrated.
505    pub fn all_builtin_tool_names(&self) -> Vec<String> {
506        let mut names: Vec<String> = self.definitions.keys().cloned().collect();
507        names.sort();
508        names
509    }
510
511    /// Check whether a tool name is known.
512    pub fn has_tool(&self, name: &str) -> bool {
513        self.definitions.contains_key(name)
514    }
515
516    /// List all available skills as `(name, description, source)` tuples.
517    pub fn list_skills(&self) -> Vec<(String, String, String)> {
518        self.skill_registry
519            .list()
520            .into_iter()
521            .map(|m| {
522                let source = match m.source {
523                    crate::skills::SkillSource::BuiltIn => "built-in",
524                    crate::skills::SkillSource::User => "user",
525                    crate::skills::SkillSource::Project => "project",
526                };
527                (m.name.clone(), m.description.clone(), source.to_string())
528            })
529            .collect()
530    }
531
532    /// Search skills by query, returning `(name, description, source)` tuples.
533    pub fn search_skills(&self, query: &str) -> Vec<(String, String, String)> {
534        self.skill_registry
535            .search(query)
536            .into_iter()
537            .map(|m| {
538                let source = match m.source {
539                    crate::skills::SkillSource::BuiltIn => "built-in",
540                    crate::skills::SkillSource::User => "user",
541                    crate::skills::SkillSource::Project => "project",
542                };
543                (m.name.clone(), m.description.clone(), source.to_string())
544            })
545            .collect()
546    }
547
548    /// Get tool definitions, optionally filtered by allow/deny lists.
549    ///
550    /// Includes MCP tool definitions if a manager is attached.
551    ///
552    /// - `allowed` non-empty → only those tools (allowlist).
553    /// - `denied` non-empty → all tools except those (denylist).
554    /// - Both empty → all tools.
555    /// - If both are specified, allowlist wins (deny is ignored).
556    pub fn get_definitions(&self, allowed: &[String], denied: &[String]) -> Vec<ToolDefinition> {
557        let mut defs: Vec<ToolDefinition> = if !allowed.is_empty() {
558            allowed
559                .iter()
560                .filter_map(|name| self.definitions.get(name).cloned())
561                .collect()
562        } else if !denied.is_empty() {
563            self.definitions
564                .values()
565                .filter(|d| !denied.contains(&d.name))
566                .cloned()
567                .collect()
568        } else {
569            self.definitions.values().cloned().collect()
570        };
571
572        // Append MCP tool definitions.
573        if let Some(mgr) = self.mcp_manager()
574            && let Ok(mgr) = mgr.try_read()
575        {
576            let mcp_defs = mgr.all_tool_definitions();
577            if !allowed.is_empty() {
578                // Allowlist mode: only include MCP tools in the allowlist.
579                for def in mcp_defs {
580                    if allowed.contains(&def.name) {
581                        defs.push(def);
582                    }
583                }
584            } else if !denied.is_empty() {
585                // Denylist mode: include MCP tools not in the denylist.
586                for def in mcp_defs {
587                    if !denied.contains(&def.name) {
588                        defs.push(def);
589                    }
590                }
591            } else {
592                // No filter: include all MCP tools.
593                defs.extend(mcp_defs);
594            }
595        }
596
597        defs
598    }
599
600    /// Execute a tool by name with the given JSON arguments.
601    ///
602    /// Empty or whitespace-only `arguments` are treated as `{}` (no args)
603    /// so that tools can fall through to their own defaults instead of
604    /// surfacing a raw JSON parse error.  See #513.
605    ///
606    /// `sink_for_streaming` is an optional `(sink, call_id)` pair. When
607    /// provided, the Bash tool streams each output line as a
608    /// `ToolOutputLine` event in real-time.
609    pub async fn execute(
610        &self,
611        name: &str,
612        arguments: &str,
613        sink_for_streaming: Option<(&dyn crate::engine::EngineSink, &str)>,
614        // Phase E of #996: forwarded to `Bash` so that bg-shell
615        // entries are tagged with the calling agent's invocation id.
616        // Every other tool ignores this. Top-level callers pass `None`.
617        caller_spawner: Option<u32>,
618    ) -> ToolResult {
619        let raw = arguments.trim();
620        let raw = if raw.is_empty() { "{}" } else { raw };
621        let args: Value = match serde_json::from_str(raw) {
622            Ok(v) => v,
623            Err(e) => {
624                return ToolResult {
625                    output: format!("Invalid JSON arguments: {e}"),
626                    success: false,
627                    full_output: None,
628                };
629            }
630        };
631
632        tracing::info!(
633            "Executing tool: {name} with args: [{} chars]",
634            arguments.len()
635        );
636
637        // Snapshot file before mutation (for /undo)
638        if let Some(file_path) = crate::undo::is_mutating_tool(name)
639            .then(|| crate::undo::extract_file_path(name, &args))
640            .flatten()
641        {
642            let resolved = self.project_root.join(&file_path);
643            if let Ok(mut undo) = self.undo.lock() {
644                undo.snapshot(&resolved);
645            }
646        }
647
648        let result = match name {
649            // File tools
650            "Read" => {
651                file_tools::read_file(&self.project_root, &args, &self.read_cache, &*self.fs).await
652            }
653            "Write" => file_tools::write_file(&self.project_root, &args, &*self.fs).await,
654            "Edit" => {
655                file_tools::edit_file(&self.project_root, &args, &self.read_cache, &*self.fs).await
656            }
657            "Delete" => file_tools::delete_file(&self.project_root, &args).await,
658            "List" => {
659                file_tools::list_files(&self.project_root, &args, self.caps.list_entries).await
660            }
661
662            // Search tools
663            "Grep" => {
664                grep::grep(&self.project_root, &args, self.caps.grep_matches, &*self.fs).await
665            }
666            "Glob" => {
667                glob_tool::glob_search(&self.project_root, &args, self.caps.glob_results, &*self.fs)
668                    .await
669            }
670
671            // Shell
672            // Shell — returns ShellOutput with summary + full output.
673            "Bash" => {
674                let shell_result = shell::run_shell_command(
675                    &self.project_root,
676                    &args,
677                    self.caps.shell_output_lines,
678                    &self.bg_registry,
679                    sink_for_streaming,
680                    &self.trust,
681                    self.sandbox_policy(),
682                    self.proxy_port(),
683                    self.socks5_port(),
684                    caller_spawner,
685                )
686                .await;
687                return match shell_result {
688                    Ok(so) => {
689                        // Record the invocation so validate_edit can hint at it
690                        // in staleness error messages (#804 item 7).
691                        let snippet = args["command"]
692                            .as_str()
693                            .unwrap_or("")
694                            .chars()
695                            .take(72)
696                            .collect::<String>();
697                        if !snippet.is_empty()
698                            && let Ok(mut guard) = self.last_bash.lock()
699                        {
700                            *guard = Some((snippet, std::time::Instant::now()));
701                        }
702                        ToolResult {
703                            output: so.summary,
704                            success: true,
705                            full_output: so.full_output,
706                        }
707                    }
708                    Err(e) => ToolResult {
709                        output: format!("Error: {e}"),
710                        success: false,
711                        full_output: None,
712                    },
713                };
714            }
715
716            // Web
717            "WebFetch" => web_fetch::web_fetch(&args, self.caps.web_body_chars).await,
718            "WebSearch" => web_search::web_search(&args).await,
719            "TodoWrite" => {
720                let db_opt = self.db.read().ok().and_then(|g| g.clone());
721                let sid_opt = self.session_id.read().ok().and_then(|g| g.clone());
722                match (db_opt, sid_opt) {
723                    (Some(db), Some(sid)) => match todo::todo_write(&db, &sid, &args).await {
724                        Ok(outcome) => {
725                            // #1077 Phase A: surface the transition to
726                            // every client (TUI / ACP / headless) via
727                            // EngineEvent::TodoUpdate. The dedup-nudge
728                            // path returns an empty diff so we suppress
729                            // the event there — unchanged-list writes
730                            // are a no-op for clients, only a reminder
731                            // for the model.
732                            if !outcome.diff.is_empty()
733                                && let Some((sink, _call_id)) = sink_for_streaming
734                            {
735                                sink.emit(crate::engine::EngineEvent::TodoUpdate {
736                                    items: outcome.items.clone(),
737                                    diff: outcome.diff.clone(),
738                                });
739                            }
740                            Ok(outcome.message)
741                        }
742                        Err(e) => Err(e),
743                    },
744                    _ => Ok("TodoWrite requires an active session.".to_string()),
745                }
746            }
747
748            // Memory
749            "MemoryRead" => memory::memory_read(&self.project_root).await,
750            "MemoryWrite" => memory::memory_write(&self.project_root, &args).await,
751
752            // Agent tools
753            "ListAgents" => {
754                let detail = args["detail"].as_bool().unwrap_or(false);
755                if detail {
756                    Ok(agent::list_agents_detail(&self.project_root))
757                } else {
758                    let agents = agent::list_agents(&self.project_root);
759                    if agents.is_empty() {
760                        Ok("No sub-agents configured.".to_string())
761                    } else {
762                        let lines: Vec<String> = agents
763                            .iter()
764                            .map(|(name, desc, source)| {
765                                if source == "built-in" {
766                                    format!("  {name} — {desc}")
767                                } else {
768                                    format!("  {name} — {desc} [{source}]")
769                                }
770                            })
771                            .collect();
772                        Ok(lines.join("\n"))
773                    }
774                }
775            }
776            // Skill tools
777            "ListSkills" => Ok(skill_tools::list_skills(&self.skill_registry, &args)),
778            "ActivateSkill" => Ok(skill_tools::activate_skill(&self.skill_registry, &args)),
779
780            // Recall context tool
781            "RecallContext" => {
782                let db_opt = self.db.read().ok().and_then(|g| g.clone());
783                let sid_opt = self.session_id.read().ok().and_then(|g| g.clone());
784                if let (Some(db), Some(sid)) = (db_opt, sid_opt) {
785                    Ok(recall::recall_context(&db, &sid, &args).await)
786                } else {
787                    Ok("RecallContext requires an active session.".to_string())
788                }
789            }
790
791            "InvokeAgent" => {
792                // Handled by tool_dispatch.rs before reaching here.
793                // This branch should not be reached in normal flow.
794                return ToolResult {
795                    output: "InvokeAgent is handled by the inference loop.".to_string(),
796                    success: false,
797                    full_output: None,
798                };
799            }
800
801            "AskUser" => {
802                // Handled by execute_tools_sequential (needs sink + cmd_rx).
803                // This branch should not be reached in normal flow.
804                return ToolResult {
805                    output: "AskUser is handled by the inference loop.".to_string(),
806                    success: false,
807                    full_output: None,
808                };
809            }
810
811            other => {
812                // MCP tool dispatch (#662): route `server__tool` calls
813                // to the appropriate MCP server.
814                if crate::mcp::is_mcp_tool_name(other) {
815                    if let Some(mgr) = self.mcp_manager() {
816                        let result = {
817                            let mgr = mgr.read().await;
818                            mgr.call_tool(other, args.clone()).await
819                        };
820                        return match result {
821                            Ok(output) => ToolResult {
822                                output,
823                                success: true,
824                                full_output: None,
825                            },
826                            Err(e) => ToolResult {
827                                output: format!("Error: {e}"),
828                                success: false,
829                                full_output: None,
830                            },
831                        };
832                    }
833                    return ToolResult {
834                        output: format!(
835                            "MCP tool '{other}' not available — \
836                             no MCP servers connected."
837                        ),
838                        success: false,
839                        full_output: None,
840                    };
841                }
842
843                // Detect garbled tool names (JSON blobs, very long strings)
844                // — a sign the model can't do structured tool calling.
845                let warning = if other.contains('{') || other.len() > 64 {
846                    format!(
847                        "Unknown tool: {other}. \
848                         This model appears to struggle with tool calling. \
849                         Consider switching to a model with native function-call support."
850                    )
851                } else {
852                    format!("Unknown tool: {other}")
853                };
854                Err(anyhow::anyhow!(warning))
855            }
856        };
857
858        match result {
859            Ok(output) => {
860                // Record successful Write/Edit so the validation layer can
861                // name the responsible tool in staleness error messages.
862                if matches!(name, "Write" | "Edit")
863                    && let Some(path) =
864                        crate::file_tracker::resolve_file_path_from_args(&args, &self.project_root)
865                    && let Ok(mut guard) = self.last_writer.lock()
866                {
867                    guard.insert(path, (name.to_string(), std::time::Instant::now()));
868                }
869                ToolResult {
870                    output,
871                    success: true,
872                    full_output: None,
873                }
874            }
875            Err(e) => ToolResult {
876                output: format!("Error: {e}"),
877                success: false,
878                full_output: None,
879            },
880        }
881    }
882}
883
884/// Validate and resolve a path, preventing directory traversal.
885///
886/// Works for both existing and non-existing files (no `canonicalize!`).
887/// Relative paths are joined to `project_root`; absolute paths must
888/// still be within `project_root` **or** under an allowed tempdir
889/// (`/tmp`, `/private/tmp`, `/var/tmp`, or `$TMPDIR`).
890///
891/// # Examples
892///
893/// ```
894/// use koda_core::tools::safe_resolve_path;
895/// use std::path::Path;
896///
897/// let root = Path::new("/home/user/project");
898///
899/// // Relative paths resolve within project
900/// let p = safe_resolve_path(root, "src/main.rs").unwrap();
901/// assert_eq!(p, Path::new("/home/user/project/src/main.rs"));
902///
903/// // Traversal is blocked
904/// assert!(safe_resolve_path(root, "../../etc/passwd").is_err());
905///
906/// // Tempdirs are allowed (matches the kernel sandbox policy)
907/// assert!(safe_resolve_path(root, "/tmp/scratch.txt").is_ok());
908/// ```
909pub fn safe_resolve_path(project_root: &Path, requested: &str) -> Result<PathBuf> {
910    // NOTE: used only for Write / Edit / Delete.  Read-only tools call
911    // resolve_path_unrestricted — see docs/src/sandbox.md for the rationale.
912    let requested_path = Path::new(requested);
913
914    // Build absolute path and normalize (removes .., . etc.)
915    let resolved = if requested_path.is_absolute() {
916        requested_path.to_path_buf().clean()
917    } else {
918        project_root.join(requested_path).clean()
919    };
920
921    // Security check: must be within project root OR an allowed tempdir.
922    // Only Write / Edit / Delete are gated here — reads are unrestricted
923    // (see resolve_path_unrestricted and docs/src/sandbox.md).
924    //
925    // The tempdir allow-list keeps in-process policy in sync with the
926    // kernel sandbox (Seatbelt on macOS, bwrap on Linux), which already
927    // permits writes to /tmp + cache dirs. Pre-fix this layer was the
928    // outlier (#947): `bash -c 'cat > /tmp/x'` succeeded but `Write /tmp/x`
929    // was rejected, blocking common scratch-file workflows.
930    if !resolved.starts_with(project_root) && !is_allowed_write_root(&resolved) {
931        anyhow::bail!(
932            "Path {requested:?} is outside the project root ({project_root:?}) \
933             and not under a writable tempdir (/tmp, /var/tmp, $TMPDIR). \
934             Write, Edit, and Delete are restricted to the project directory \
935             and tempdirs to prevent accidental modification of files \
936             elsewhere. Tell the user: to write outside these locations, \
937             restart koda from a parent directory that contains both paths."
938        );
939    }
940
941    // Defense in depth: even within an allowed tempdir, never let writes
942    // touch koda's own credential store. (`is_fully_denied` matches the
943    // path against the credential-config denylist used by the read-only
944    // tools, keeping all three perimeters — read, write, sandbox — in sync.)
945    if crate::sandbox::is_fully_denied(&resolved) {
946        anyhow::bail!(
947            "Path {requested:?} is denied: this path contains koda's \
948             internal secrets and cannot be modified by tool calls."
949        );
950    }
951
952    Ok(resolved)
953}
954
955/// Returns true if `path` lives under a system tempdir that the kernel
956/// sandbox (Seatbelt / bwrap) already permits writes to.
957///
958/// This intentionally mirrors the `(subpath "/tmp")` and
959/// `(subpath "/private/tmp")` allow rules in `sandbox.rs` so the in-process
960/// file tools accept the same set of paths as `bash -c 'cat > ...'`.
961///
962/// The check is logical (no `canonicalize`) to match `safe_resolve_path`'s
963/// behaviour for non-existing files. The kernel sandbox is the real enforcer
964/// at runtime; this helper is the policy-symmetry layer.
965fn is_allowed_write_root(path: &Path) -> bool {
966    // Hard-coded tempdir paths that the kernel sandbox always allows.
967    // `/private/tmp` is macOS's realpath of `/tmp` (which is a symlink).
968    const TEMPDIR_PREFIXES: &[&str] = &["/tmp", "/private/tmp", "/var/tmp"];
969    if TEMPDIR_PREFIXES
970        .iter()
971        .any(|prefix| path.starts_with(prefix))
972    {
973        return true;
974    }
975
976    // Per-user $TMPDIR (macOS: /var/folders/.../T/, Linux: usually /tmp).
977    // Resolved at call time so test environments overriding TMPDIR
978    // are honoured. `temp_dir()` is infallible — falls back to /tmp on Unix.
979    path.starts_with(std::env::temp_dir())
980}
981
982/// Normalise a path without enforcing any scope restriction.
983///
984/// Low-level primitive — **tool implementations should call
985/// [`resolve_read_path`] instead**, which adds the fully-denied list check
986/// that keeps in-process policy in sync with the subprocess sandbox.
987///
988/// Relative paths are resolved against `project_root`; absolute paths are
989/// cleaned in-place.  The result may point anywhere on the filesystem.
990pub(crate) fn resolve_path_unrestricted(project_root: &Path, requested: &str) -> PathBuf {
991    let path = Path::new(requested);
992    if path.is_absolute() {
993        path.to_path_buf().clean()
994    } else {
995        project_root.join(path).clean()
996    }
997}
998
999/// Normalise a read-only path and enforce the fully-denied list.
1000///
1001/// This is the entry-point for **all read-only tools** (Read, List, Grep,
1002/// Glob).  It wraps `resolve_path_unrestricted` with a check against
1003/// `sandbox::is_fully_denied` so that the same paths blocked by the
1004/// subprocess sandbox (bwrap / Seatbelt) are also blocked when the model
1005/// accesses them through in-process tools.
1006///
1007/// Currently the only denied path is `~/.config/koda/db` — koda's own SQLite
1008/// database containing plaintext API keys.  Ordinary credential directories
1009/// (`~/.ssh`, `~/.aws`, …) are readable, matching the Bash sandbox policy.
1010///
1011/// See issue #884 for Option B (OS-level enforcement via sandboxed worker).
1012pub fn resolve_read_path(project_root: &Path, requested: &str) -> Result<PathBuf> {
1013    let resolved = resolve_path_unrestricted(project_root, requested);
1014    if crate::sandbox::is_fully_denied(&resolved) {
1015        anyhow::bail!(
1016            "Access to {requested:?} is denied: this path contains koda's \
1017             internal secrets and cannot be read by model tool calls."
1018        );
1019    }
1020    Ok(resolved)
1021}
1022
1023#[cfg(test)]
1024mod tests {
1025    use super::*;
1026    use std::path::PathBuf;
1027
1028    fn root() -> PathBuf {
1029        PathBuf::from("/home/user/project")
1030    }
1031
1032    // ── Phase 3b: proxy port wiring (Bash → sandbox::build) ──────────
1033
1034    #[test]
1035    fn proxy_port_defaults_to_none() {
1036        // Standalone ToolRegistry (no KodaSession) starts with no port —
1037        // production sessions overwrite this in `KodaSession::new`.
1038        let registry = ToolRegistry::new(root(), 100_000);
1039        assert_eq!(registry.proxy_port(), None);
1040    }
1041
1042    #[test]
1043    fn proxy_port_round_trips_through_setter() {
1044        let registry = ToolRegistry::new(root(), 100_000);
1045        registry.set_proxy_port(Some(31415));
1046        assert_eq!(registry.proxy_port(), Some(31415));
1047    }
1048
1049    // ── Phase 3d.2: SOCKS5 port wiring (Bash → sandbox::build) ───────
1050
1051    #[test]
1052    fn socks5_port_defaults_to_none() {
1053        let registry = ToolRegistry::new(root(), 100_000);
1054        assert_eq!(registry.socks5_port(), None);
1055    }
1056
1057    #[test]
1058    fn socks5_port_round_trips_through_setter() {
1059        let registry = ToolRegistry::new(root(), 100_000);
1060        registry.set_socks5_port(Some(27182));
1061        assert_eq!(registry.socks5_port(), Some(27182));
1062    }
1063
1064    #[test]
1065    fn socks5_and_http_ports_are_independent() {
1066        // Setting one must not clobber the other — the two proxies are
1067        // spawned independently and may live or die independently.
1068        let registry = ToolRegistry::new(root(), 100_000);
1069        registry.set_proxy_port(Some(8080));
1070        registry.set_socks5_port(Some(1080));
1071        assert_eq!(registry.proxy_port(), Some(8080));
1072        assert_eq!(registry.socks5_port(), Some(1080));
1073        registry.set_proxy_port(None);
1074        assert_eq!(registry.socks5_port(), Some(1080));
1075    }
1076
1077    #[test]
1078    fn test_relative_path_resolves_inside_root() {
1079        let result = safe_resolve_path(&root(), "src/main.rs").unwrap();
1080        assert_eq!(result, PathBuf::from("/home/user/project/src/main.rs"));
1081    }
1082
1083    #[test]
1084    fn test_dot_path_resolves_to_root() {
1085        let result = safe_resolve_path(&root(), ".").unwrap();
1086        assert_eq!(result, PathBuf::from("/home/user/project"));
1087    }
1088
1089    #[test]
1090    fn test_new_file_in_new_dir_resolves() {
1091        let result = safe_resolve_path(&root(), "src/brand_new/feature.rs").unwrap();
1092        assert_eq!(
1093            result,
1094            PathBuf::from("/home/user/project/src/brand_new/feature.rs")
1095        );
1096    }
1097
1098    #[test]
1099    fn test_dotdot_traversal_blocked() {
1100        let result = safe_resolve_path(&root(), "../../etc/passwd");
1101        assert!(result.is_err());
1102    }
1103
1104    #[test]
1105    fn test_dotdot_sneaky_traversal_blocked() {
1106        let result = safe_resolve_path(&root(), "src/../../etc/passwd");
1107        assert!(result.is_err());
1108    }
1109
1110    #[test]
1111    fn test_absolute_path_inside_root_allowed() {
1112        let result = safe_resolve_path(&root(), "/home/user/project/src/lib.rs").unwrap();
1113        assert_eq!(result, PathBuf::from("/home/user/project/src/lib.rs"));
1114    }
1115
1116    #[test]
1117    fn test_absolute_path_outside_root_blocked() {
1118        let result = safe_resolve_path(&root(), "/etc/shadow");
1119        assert!(result.is_err());
1120    }
1121
1122    #[test]
1123    fn test_outside_root_error_is_actionable_for_user() {
1124        let err = safe_resolve_path(&root(), "../../etc/passwd").unwrap_err();
1125        let msg = err.to_string();
1126        assert!(
1127            msg.contains("outside the project root"),
1128            "error must say 'outside the project root'; got: {msg}"
1129        );
1130        assert!(
1131            msg.contains("Tell the user"),
1132            "error must direct model to surface this to the user; got: {msg}"
1133        );
1134        // Must NOT suggest Bash — that would bypass the file-tool safety layer.
1135        assert!(
1136            !msg.contains("Bash"),
1137            "error must not suggest Bash as a workaround; got: {msg}"
1138        );
1139    }
1140
1141    #[test]
1142    fn test_empty_path_resolves_to_root() {
1143        let result = safe_resolve_path(&root(), "").unwrap();
1144        assert_eq!(result, PathBuf::from("/home/user/project"));
1145    }
1146
1147    // ── resolve_read_path ──────────────────────────────────────────────────
1148
1149    #[test]
1150    fn read_path_allows_project_file() {
1151        let p = resolve_read_path(&root(), "src/lib.rs").unwrap();
1152        assert_eq!(p, PathBuf::from("/home/user/project/src/lib.rs"));
1153    }
1154
1155    #[test]
1156    fn read_path_allows_outside_project() {
1157        // Reads outside the project root are intentionally unrestricted.
1158        let p = resolve_read_path(&root(), "/etc/hosts").unwrap();
1159        assert_eq!(p, PathBuf::from("/etc/hosts"));
1160    }
1161
1162    #[test]
1163    fn read_path_blocks_koda_db() {
1164        let home = std::env::var("HOME").unwrap_or_else(|_| "/home/user".into());
1165        let koda_db = format!("{home}/.config/koda/db/koda.db");
1166        let err = resolve_read_path(&root(), &koda_db).unwrap_err();
1167        assert!(
1168            err.to_string().contains("denied"),
1169            "expected 'denied' in error, got: {err}"
1170        );
1171    }
1172
1173    // ── #947: writes to tempdirs ASCII─ART─ ─────────────────────────
1174    //
1175    // The kernel sandbox (Seatbelt / bwrap) explicitly permits writes to
1176    // /tmp + cache dirs.  Pre-fix, `safe_resolve_path` rejected absolute
1177    // paths outside `project_root`, so `bash -c 'cat > /tmp/x'` succeeded
1178    // but `Write /tmp/x` failed — forcing models into shell heredoc
1179    // workarounds that often quote-escape badly.  These tests lock in the
1180    // symmetry between the two perimeters.
1181
1182    #[test]
1183    fn write_path_allows_tmp() {
1184        let p = safe_resolve_path(&root(), "/tmp/koda-scratch.txt").unwrap();
1185        assert_eq!(p, PathBuf::from("/tmp/koda-scratch.txt"));
1186    }
1187
1188    #[test]
1189    fn write_path_allows_private_tmp_macos_realpath() {
1190        // macOS resolves /tmp → /private/tmp via a symlink. Some tools (`find`,
1191        // `realpath`) emit the realpath form, so absolute paths beginning
1192        // with /private/tmp must also be accepted.
1193        let p = safe_resolve_path(&root(), "/private/tmp/koda-scratch.txt").unwrap();
1194        assert_eq!(p, PathBuf::from("/private/tmp/koda-scratch.txt"));
1195    }
1196
1197    #[test]
1198    fn write_path_allows_var_tmp() {
1199        let p = safe_resolve_path(&root(), "/var/tmp/koda-scratch.txt").unwrap();
1200        assert_eq!(p, PathBuf::from("/var/tmp/koda-scratch.txt"));
1201    }
1202
1203    #[test]
1204    fn write_path_allows_per_user_tmpdir() {
1205        // Whatever `std::env::temp_dir()` returns on this host — macOS gives
1206        // /var/folders/.../T/, Linux usually /tmp.  Either way it's writable.
1207        let tmpdir = std::env::temp_dir();
1208        let target = tmpdir.join("koda-scratch.txt");
1209        let p = safe_resolve_path(&root(), target.to_str().unwrap()).unwrap();
1210        assert_eq!(p, target.clean());
1211    }
1212
1213    #[test]
1214    fn write_path_blocks_etc_hosts() {
1215        // System config dirs stay denied — only tempdirs are added.
1216        let err = safe_resolve_path(&root(), "/etc/hosts").unwrap_err();
1217        let msg = err.to_string();
1218        assert!(
1219            msg.contains("outside the project root"),
1220            "system paths must still be rejected; got: {msg}"
1221        );
1222    }
1223
1224    #[test]
1225    fn write_path_blocks_ssh_authorized_keys() {
1226        // Credential dirs in $HOME stay denied — they're outside both
1227        // project_root and any tempdir, so the existing perimeter holds.
1228        let home = std::env::var("HOME").unwrap_or_else(|_| "/home/user".into());
1229        let target = format!("{home}/.ssh/authorized_keys");
1230        assert!(
1231            safe_resolve_path(&root(), &target).is_err(),
1232            "~/.ssh writes must remain blocked"
1233        );
1234    }
1235
1236    #[test]
1237    fn write_path_blocks_koda_db_even_via_tmp_traversal() {
1238        // Defense in depth: even if a model crafts a path that lands in a
1239        // tempdir but cleans into koda's own credential store, `is_fully_denied`
1240        // catches it. Constructed path: `/tmp/../<home>/.config/koda/db/x`
1241        // cleans to `<home>/.config/koda/db/x` — NOT a tempdir, NOT project,
1242        // hits the standard "outside the project root" path. So this is
1243        // already covered by the primary check; this test pins it down.
1244        let home = std::env::var("HOME").unwrap_or_else(|_| "/home/user".into());
1245        let target = format!("/tmp/../{home}/.config/koda/db/koda.db");
1246        assert!(
1247            safe_resolve_path(&root(), &target).is_err(),
1248            "traversal out of /tmp must not bypass the gate"
1249        );
1250    }
1251
1252    #[test]
1253    fn write_path_traversal_inside_tmp_stays_in_tmp() {
1254        // /tmp/foo/../bar cleans to /tmp/bar — still in /tmp, still allowed.
1255        let p = safe_resolve_path(&root(), "/tmp/foo/../bar").unwrap();
1256        assert_eq!(p, PathBuf::from("/tmp/bar"));
1257    }
1258
1259    // ── #1077 Phase A: TodoWrite event-emission contract ─────────
1260    //
1261    // The dispatch arm in `execute()` must:
1262    // 1. emit `EngineEvent::TodoUpdate` with structured items+diff on
1263    //    accepted writes that change the persisted list;
1264    // 2. emit nothing on the dedup-nudge path (empty diff);
1265    // 3. always return the model-facing message string regardless.
1266    //
1267    // These are the contract a future TUI / ACP renderer will rely on.
1268    // If you find yourself loosening any of them, revisit `DESIGN.md
1269    // § Progress Tracking: Model-Owned, History-Persisted,
1270    // Engine-Surfaced` first — the suppression rule in particular is
1271    // load-bearing for not spamming clients on idempotent rewrites.
1272
1273    async fn registry_with_session() -> (
1274        ToolRegistry,
1275        tempfile::TempDir,
1276        std::sync::Arc<crate::db::Database>,
1277        String,
1278    ) {
1279        use crate::persistence::Persistence;
1280        let dir = tempfile::TempDir::new().unwrap();
1281        let db = std::sync::Arc::new(
1282            crate::db::Database::open(&dir.path().join("test.db"))
1283                .await
1284                .unwrap(),
1285        );
1286        let sid = db.create_session("koda", dir.path()).await.unwrap();
1287        let registry = ToolRegistry::new(dir.path().to_path_buf(), 100_000);
1288        // Wire DB + session id the same way KodaSession::new does.
1289        *registry.db.write().unwrap() = Some(db.clone());
1290        *registry.session_id.write().unwrap() = Some(sid.clone());
1291        (registry, dir, db, sid)
1292    }
1293
1294    #[tokio::test]
1295    async fn todo_write_emits_todo_update_event_on_first_write() {
1296        let (registry, _dir, _db, _sid) = registry_with_session().await;
1297        let sink = crate::engine::sink::TestSink::new();
1298        let result = registry
1299            .execute(
1300                "TodoWrite",
1301                r#"{"todos":[{"content":"Add tests","status":"pending","priority":"high"}]}"#,
1302                Some((&sink, "call-1")),
1303                None,
1304            )
1305            .await;
1306        assert!(result.success, "first write must succeed: {result:?}");
1307        assert_eq!(sink.len(), 1, "first write must emit exactly one event");
1308        match &sink.events()[0] {
1309            crate::engine::EngineEvent::TodoUpdate { items, diff } => {
1310                assert_eq!(items.len(), 1);
1311                assert_eq!(items[0].content, "Add tests");
1312                assert_eq!(diff.added.len(), 1, "first write → everything in added");
1313                assert!(diff.changed.is_empty());
1314                assert!(diff.removed.is_empty());
1315            }
1316            other => panic!("expected TodoUpdate, got {other:?}"),
1317        }
1318    }
1319
1320    #[tokio::test]
1321    async fn todo_write_suppresses_event_on_unchanged_rewrite() {
1322        let (registry, _dir, _db, _sid) = registry_with_session().await;
1323        let payload = r#"{"todos":[{"content":"A","status":"pending","priority":"high"}]}"#;
1324
1325        // First write: should emit.
1326        let sink1 = crate::engine::sink::TestSink::new();
1327        registry
1328            .execute("TodoWrite", payload, Some((&sink1, "c1")), None)
1329            .await;
1330        assert_eq!(sink1.len(), 1);
1331
1332        // Identical second write: must NOT emit. The dedup-nudge
1333        // message goes back to the model, but clients see nothing.
1334        let sink2 = crate::engine::sink::TestSink::new();
1335        let result2 = registry
1336            .execute("TodoWrite", payload, Some((&sink2, "c2")), None)
1337            .await;
1338        assert!(result2.success);
1339        assert!(
1340            result2.output.contains("unchanged"),
1341            "model-facing message must still nudge: {}",
1342            result2.output
1343        );
1344        assert_eq!(
1345            sink2.len(),
1346            0,
1347            "unchanged rewrite must NOT emit a TodoUpdate event"
1348        );
1349    }
1350
1351    #[tokio::test]
1352    async fn todo_write_returns_model_message_even_without_sink() {
1353        // Production paths sometimes call `execute` with `None` for
1354        // the sink (top-level tool runs that aren't streaming). Must
1355        // still succeed and return the formatted message.
1356        let (registry, _dir, _db, _sid) = registry_with_session().await;
1357        let result = registry
1358            .execute(
1359                "TodoWrite",
1360                r#"{"todos":[{"content":"X","status":"pending","priority":"low"}]}"#,
1361                None,
1362                None,
1363            )
1364            .await;
1365        assert!(result.success);
1366        assert!(result.output.contains("0/1 done"));
1367    }
1368
1369    #[tokio::test]
1370    async fn todo_write_rejects_two_in_progress_at_dispatch() {
1371        // Engine-enforced single-in-progress: must surface as a
1372        // failed ToolResult, not a successful one with a warning.
1373        // Models notice failures more reliably than warnings.
1374        let (registry, _dir, _db, _sid) = registry_with_session().await;
1375        let sink = crate::engine::sink::TestSink::new();
1376        let result = registry
1377            .execute(
1378                "TodoWrite",
1379                r#"{"todos":[
1380                    {"content":"A","status":"in_progress","priority":"high"},
1381                    {"content":"B","status":"in_progress","priority":"medium"}
1382                ]}"#,
1383                Some((&sink, "c1")),
1384                None,
1385            )
1386            .await;
1387        assert!(
1388            !result.success,
1389            "two in_progress must produce a failed ToolResult"
1390        );
1391        assert!(
1392            result.output.contains("Only one task"),
1393            "failure message must explain the rule: {}",
1394            result.output
1395        );
1396        assert_eq!(sink.len(), 0, "failed validation must not emit an event");
1397    }
1398}
1399
1400// ── Tool action descriptions ──────────────────────────────────
1401
1402/// Generate a human-readable description of a tool action for approval prompts.
1403pub fn describe_action(tool_name: &str, args: &serde_json::Value) -> String {
1404    match tool_name {
1405        "Bash" => {
1406            let cmd = args
1407                .get("command")
1408                .or(args.get("cmd"))
1409                .and_then(|v| v.as_str())
1410                .unwrap_or("?");
1411            let bg = args
1412                .get("background")
1413                .and_then(|v| v.as_bool())
1414                .unwrap_or(false);
1415            if bg {
1416                format!("[bg] {cmd}")
1417            } else {
1418                cmd.to_string()
1419            }
1420        }
1421        "Delete" => {
1422            let path = args
1423                .get("file_path")
1424                .or(args.get("path"))
1425                .and_then(|v| v.as_str())
1426                .unwrap_or("?");
1427            let recursive = args
1428                .get("recursive")
1429                .and_then(|v| v.as_bool())
1430                .unwrap_or(false);
1431            if recursive {
1432                format!("Delete directory (recursive): {path}")
1433            } else {
1434                format!("Delete: {path}")
1435            }
1436        }
1437        "Write" => {
1438            let path = args
1439                .get("path")
1440                .or(args.get("file_path"))
1441                .and_then(|v| v.as_str())
1442                .unwrap_or("?");
1443            let overwrite = args
1444                .get("overwrite")
1445                .and_then(|v| v.as_bool())
1446                .unwrap_or(false);
1447            if overwrite {
1448                format!("Overwrite file: {path}")
1449            } else {
1450                format!("Create file: {path}")
1451            }
1452        }
1453        "Edit" => {
1454            let path = if let Some(payload) = args.get("payload") {
1455                payload
1456                    .get("file_path")
1457                    .or(payload.get("path"))
1458                    .and_then(|v| v.as_str())
1459                    .unwrap_or("?")
1460            } else {
1461                args.get("file_path")
1462                    .or(args.get("path"))
1463                    .and_then(|v| v.as_str())
1464                    .unwrap_or("?")
1465            };
1466            format!("Edit file: {path}")
1467        }
1468        "WebFetch" => {
1469            let url = args.get("url").and_then(|v| v.as_str()).unwrap_or("?");
1470            format!("Fetch URL: {url}")
1471        }
1472        "WebSearch" => {
1473            let q = args.get("query").and_then(|v| v.as_str()).unwrap_or("?");
1474            format!("Web search: {q}")
1475        }
1476        "TodoWrite" => {
1477            let n = args
1478                .get("todos")
1479                .and_then(|v| v.as_array())
1480                .map(|a| a.len())
1481                .unwrap_or(0);
1482            format!("Update todo list ({n} tasks)")
1483        }
1484        "MemoryWrite" => {
1485            let fact = args.get("fact").and_then(|v| v.as_str()).unwrap_or("?");
1486            let preview = if fact.len() > 60 {
1487                format!("{}…", &fact[..57])
1488            } else {
1489                fact.to_string()
1490            };
1491            format!("Save to memory: {preview}")
1492        }
1493        _ => format!("Execute: {tool_name}"),
1494    }
1495}
1496
1497#[cfg(test)]
1498mod describe_action_tests {
1499    use super::*;
1500    use serde_json::json;
1501
1502    #[test]
1503    fn test_describe_bash() {
1504        let desc = describe_action("Bash", &json!({"command": "cargo build"}));
1505        assert!(desc.contains("cargo build"));
1506    }
1507
1508    #[test]
1509    fn test_describe_delete() {
1510        let desc = describe_action("Delete", &json!({"file_path": "old.rs"}));
1511        assert!(desc.contains("old.rs"));
1512    }
1513
1514    #[test]
1515    fn test_describe_edit() {
1516        let desc = describe_action("Edit", &json!({"payload": {"file_path": "src/main.rs"}}));
1517        assert!(desc.contains("src/main.rs"));
1518    }
1519
1520    #[test]
1521    fn test_describe_write() {
1522        let desc = describe_action("Write", &json!({"path": "new.rs"}));
1523        assert!(desc.contains("Create file"));
1524        assert!(desc.contains("new.rs"));
1525    }
1526
1527    #[test]
1528    fn test_describe_write_overwrite() {
1529        let desc = describe_action("Write", &json!({"path": "x.rs", "overwrite": true}));
1530        assert!(desc.contains("Overwrite"));
1531    }
1532
1533    #[test]
1534    fn test_get_definitions_deny_list() {
1535        let registry = ToolRegistry::new(PathBuf::from("/tmp"), 128_000);
1536        let denied = vec![
1537            "Write".to_string(),
1538            "Edit".to_string(),
1539            "Delete".to_string(),
1540        ];
1541        let defs = registry.get_definitions(&[], &denied);
1542        let names: Vec<&str> = defs.iter().map(|d| d.name.as_str()).collect();
1543        assert!(!names.contains(&"Write"));
1544        assert!(!names.contains(&"Edit"));
1545        assert!(!names.contains(&"Delete"));
1546        assert!(names.contains(&"Read"));
1547        assert!(names.contains(&"Grep"));
1548    }
1549
1550    #[test]
1551    fn test_get_definitions_allow_list_wins_over_deny() {
1552        let registry = ToolRegistry::new(PathBuf::from("/tmp"), 128_000);
1553        let allowed = vec!["Read".to_string(), "Write".to_string()];
1554        let denied = vec!["Write".to_string()];
1555        // allow wins — Write should be present
1556        let defs = registry.get_definitions(&allowed, &denied);
1557        let names: Vec<&str> = defs.iter().map(|d| d.name.as_str()).collect();
1558        assert_eq!(names.len(), 2);
1559        assert!(names.contains(&"Read"));
1560        assert!(names.contains(&"Write"));
1561    }
1562
1563    #[test]
1564    fn test_get_definitions_both_empty_returns_all() {
1565        let registry = ToolRegistry::new(PathBuf::from("/tmp"), 128_000);
1566        let all = registry.get_definitions(&[], &[]);
1567        assert!(all.len() > 10, "Should have many tools");
1568    }
1569
1570    // ── Phase 5 PR-2 of #934: SandboxPolicy threading on ToolRegistry ──
1571    //
1572    // The Bash dispatch path now reads `self.sandbox_policy()` instead
1573    // of synthesizing `strict_default()` inline. These tests pin:
1574    //   1. The default seed is `strict_default()` so unchanged callers
1575    //      preserve byte-for-byte behavior.
1576    //   2. `with_sandbox_policy` actually replaces the field (the
1577    //      threading is real, not a stub).
1578    //   3. The accessor returns the most recent setter's value (no
1579    //      caching/aliasing surprises).
1580
1581    #[test]
1582    fn registry_sandbox_policy_defaults_to_strict() {
1583        let registry = ToolRegistry::new(PathBuf::from("/tmp"), 128_000);
1584        assert_eq!(
1585            *registry.sandbox_policy(),
1586            koda_sandbox::SandboxPolicy::strict_default(),
1587            "PR-2 contract: ToolRegistry::new must seed strict_default() so \
1588             pre-PR callers see unchanged behavior"
1589        );
1590    }
1591
1592    #[test]
1593    fn with_sandbox_policy_overrides_the_default() {
1594        // Build a deliberately-non-default policy by mutating one field.
1595        // We don't care which field — only that round-tripping through
1596        // `with_sandbox_policy` preserves the override and the default
1597        // would not match.
1598        let mut custom = koda_sandbox::SandboxPolicy::strict_default();
1599        custom
1600            .fs
1601            .allow_write
1602            .push(koda_sandbox::PathPattern::new("/pr2-marker"));
1603
1604        let registry =
1605            ToolRegistry::new(PathBuf::from("/tmp"), 128_000).with_sandbox_policy(custom.clone());
1606
1607        assert_eq!(
1608            *registry.sandbox_policy(),
1609            custom,
1610            "with_sandbox_policy must replace the field, not no-op"
1611        );
1612        assert_ne!(
1613            *registry.sandbox_policy(),
1614            koda_sandbox::SandboxPolicy::strict_default(),
1615            "sanity: the override is observably different from the default"
1616        );
1617    }
1618}
koda_core/tools/mod.rs

koda_core/tools/
mod.rs