koda_core/tools/mod.rs
1//! Tool registry and execution engine.
2//!
3//! Each tool is a function that takes JSON arguments and returns a string result.
4//! Path validation is enforced here to prevent directory traversal.
5//!
6//! ## Available tools
7//!
8//! | Tool | Module | Effect | Description |
9//! |---|---|---|---|
10//! | **Read** | `file_tools` | ReadOnly | Read file contents with line numbers |
11//! | **Write** | `file_tools` | LocalMutation | Create or overwrite a file |
12//! | **Edit** | `file_tools` | LocalMutation | Find-and-replace in an existing file |
13//! | **Delete** | `file_tools` | Destructive | Delete a file |
14//! | **List** | `file_tools` | ReadOnly | List files and directories |
15//! | **Bash** | `shell` | LocalMutation | Execute shell commands (with background mode) |
16//! | **Grep** | `grep` | ReadOnly | Recursive text search (respects .gitignore) |
17//! | **Glob** | `glob_tool` | ReadOnly | Find files by glob pattern |
18//! | **WebFetch** | `web_fetch` | RemoteAction | Fetch URL content (HTML→text) |
19//! | **WebSearch** | `web_search` | RemoteAction | Web search via DuckDuckGo |
20//! | **InvokeAgent** | `agent` | LocalMutation | Delegate task to a sub-agent |
21//! | **ListAgents** | `agent` | ReadOnly | List available sub-agents |
22//! | **MemoryRead** | `memory` | ReadOnly | Read project/global memory |
23//! | **MemoryWrite** | `memory` | LocalMutation | Save facts to memory |
24//! | **TodoWrite** | `todo` | LocalMutation | Update task list |
25//! | **AskUser** | `ask_user` | ReadOnly | Ask the user a question |
26//! | **ActivateSkill** | `skills` | ReadOnly | Load a skill's instructions |
27//! | **ListSkills** | `skills` | ReadOnly | List available skills |
28//! | **ListBackgroundTasks** | `bg_task_tools` | ReadOnly | Snapshot background tasks owned by the caller |
29//! | **CancelTask** | `bg_task_tools` | ReadOnly | Cancel a background agent or process |
30//! | **WaitTask** | `bg_task_tools` | ReadOnly | Block until a background task finishes (max 300 s) |
31//!
32//! ## Safety model
33//!
34//! Every tool call is classified by `ToolEffect` and checked against the
35//! current approval mode before execution. See
36//! `classify_tool` for the effect of each tool.
37
38/// Effect classification for tool calls.
39///
40/// Two-axis model: what does the tool touch (local vs. remote)
41/// and how severe are its effects (read vs. mutate vs. destroy)?
42///
43/// # Examples
44///
45/// ```
46/// use koda_core::tools::{ToolEffect, classify_tool};
47///
48/// assert_eq!(classify_tool("Read"), ToolEffect::ReadOnly);
49/// assert_eq!(classify_tool("Write"), ToolEffect::LocalMutation);
50/// assert_eq!(classify_tool("Delete"), ToolEffect::Destructive);
51/// ```
52#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
53#[serde(rename_all = "PascalCase")]
54pub enum ToolEffect {
55 /// No side-effects: file reads, grep, git status.
56 ReadOnly,
57 /// Side-effects on remote services only: GitHub API, WebFetch POST.
58 RemoteAction,
59 /// Mutates local filesystem or state: Write, Edit, Delete, MemoryWrite.
60 LocalMutation,
61 /// Irreversible or high-blast-radius: rm -rf, git push --force, DROP TABLE.
62 Destructive,
63}
64
65/// Classify a built-in tool by name.
66///
67/// For `Bash`, this returns the *default* classification (`LocalMutation`);
68/// the actual effect depends on the command string and must be refined
69/// via [`crate::bash_safety::classify_bash_command`].
70///
71/// Unknown tools default to `LocalMutation` (conservative — always asks).
72///
73/// For MCP tools (names containing `__`), call
74/// [`ToolRegistry::classify_tool_with_mcp`] instead to use server-provided
75/// annotations.
76pub fn classify_tool(name: &str) -> ToolEffect {
77 match name {
78 // Pure reads — zero side-effects
79 "Read" | "List" | "Grep" | "Glob" | "MemoryRead" | "ListAgents" | "ListSkills"
80 | "ActivateSkill" | "RecallContext" | "AskUser" => ToolEffect::ReadOnly,
81
82 // Remote actions — side-effects on remote services only
83 "WebFetch" => ToolEffect::ReadOnly, // GET-only fetch
84 "WebSearch" => ToolEffect::ReadOnly, // read-only search
85 "InvokeAgent" => ToolEffect::ReadOnly, // sub-agents inherit parent's mode
86
87 // Background task management (Layer 2 of #996). ListBackgroundTasks
88 // is a pure read; CancelTask / WaitTask signal but don't write
89 // files — they're idempotent observation/control of work the
90 // model already started. Treating as ReadOnly avoids an approval
91 // prompt every time the model checks on a bg task.
92 "ListBackgroundTasks" | "CancelTask" | "WaitTask" => ToolEffect::ReadOnly,
93
94 // Local mutations — write to filesystem or local state
95 "Write" | "Edit" | "MemoryWrite" | "TodoWrite" => ToolEffect::LocalMutation,
96
97 // Bash — default to LocalMutation; refined by classify_bash_command()
98 "Bash" => ToolEffect::LocalMutation,
99
100 // Delete is destructive (irreversible without undo)
101 "Delete" => ToolEffect::Destructive,
102
103 // MCP tools — use annotations-based classification.
104 name if crate::mcp::is_mcp_tool_name(name) => ToolEffect::RemoteAction,
105
106 // Unknown tools — default to LocalMutation (conservative)
107 _ => ToolEffect::LocalMutation,
108 }
109}
110
111/// Returns true if the tool performs a mutating operation.
112///
113/// Convenience wrapper over [`classify_tool`] for call sites that only
114/// need a bool (e.g., loop guard).
115///
116/// ```
117/// use koda_core::tools::is_mutating_tool;
118///
119/// assert!(!is_mutating_tool("Read"));
120/// assert!(is_mutating_tool("Write"));
121/// assert!(is_mutating_tool("Delete"));
122/// ```
123pub fn is_mutating_tool(name: &str) -> bool {
124 !matches!(classify_tool(name), ToolEffect::ReadOnly)
125}
126
127/// Sub-agent invocation tool (`InvokeAgent`, `ListAgents`).
128pub mod agent;
129pub mod ask_user;
130pub mod bg_process;
131/// Background-task management tools — `ListBackgroundTasks`,
132/// `CancelTask`, `WaitTask` (Layer 2 of #996).
133pub mod bg_task_tools;
134/// File CRUD tools (`Read`, `Write`, `Edit`, `Delete`, `List`).
135pub mod file_tools;
136pub mod fuzzy;
137/// Glob pattern search tool (`Glob`).
138pub mod glob_tool;
139/// Recursive text search tool (`Grep`).
140pub mod grep;
141/// Project memory read/write tools (`MemoryRead`, `MemoryWrite`).
142pub mod memory;
143/// On-demand conversation history retrieval (`RecallContext`).
144pub mod recall;
145/// Shell command execution tool (`Bash`).
146pub mod shell;
147/// Skill discovery and activation tools (`ListSkills`, `ActivateSkill`).
148pub mod skill_tools;
149/// Session-scoped task list tool (`TodoWrite`).
150pub mod todo;
151/// Pre-flight validation for tool calls (runs before approval).
152pub mod validate;
153/// HTTP fetch tool (`WebFetch`).
154pub mod web_fetch;
155/// Web search tool (`WebSearch`).
156pub mod web_search;
157
158use anyhow::Result;
159use koda_sandbox::fs::{FileSystem, LocalFileSystem};
160use path_clean::PathClean;
161use serde_json::Value;
162use std::collections::HashMap;
163use std::path::{Path, PathBuf};
164use std::sync::Arc;
165use std::time::SystemTime;
166
167use crate::output_caps::OutputCaps;
168
169use crate::providers::ToolDefinition;
170
171/// Shared file-read cache: tracks `(size, mtime, sha256_hex)` per cache key.
172///
173/// The SHA-256 field is populated on full-file reads and used by `edit_file`
174/// to detect whether the file changed between when the model last read it and
175/// when it attempts an edit (Gemini CLI strategy, better than mtime-only because
176/// mtime has 1-second granularity and can miss sub-second bash mutations).
177///
178/// `sha256_hex` is empty for line-range reads where only a slice was fetched.
179///
180/// Wrapped in `Arc` so parent and sub-agent `ToolRegistry` instances
181/// share the same cache — reads by one agent benefit all others.
182pub type FileReadCache = Arc<std::sync::Mutex<HashMap<String, (u64, SystemTime, String)>>>;
183
184/// Tracks which tool last wrote each absolute file path.
185///
186/// Keyed by canonical `PathBuf`; value is `(tool_name, when)` using a
187/// monotonic `Instant`. Populated on every successful Write and Edit so
188/// the validation layer can include the responsible tool in staleness
189/// error messages (#804 item 7).
190pub type LastWriterCache = Arc<std::sync::Mutex<HashMap<PathBuf, (String, std::time::Instant)>>>;
191
192/// Tracks the most recent successful Bash invocation.
193///
194/// Stores `(command_snippet, when)`. Only the latest call is kept — enough
195/// context to tell the model "Bash ran 2s ago, it may have changed the file".
196pub type LastBashCache = Arc<std::sync::Mutex<Option<(String, std::time::Instant)>>>;
197
198/// Result of executing a tool.
199///
200/// The `success` field is set automatically by `ToolRegistry::execute()` —
201/// `Ok(…)` → `true`, `Err(…)` → `false`. Individual tool functions just
202/// return `Result<String>`.
203///
204/// ```
205/// use koda_core::tools::ToolResult;
206///
207/// let ok = ToolResult { output: "done".into(), success: true, full_output: None };
208/// assert!(ok.success);
209/// ```
210#[derive(Debug, Clone)]
211pub struct ToolResult {
212 /// The tool's output string (model-facing; may be a summary for Bash).
213 pub output: String,
214 /// Whether the tool executed successfully.
215 ///
216 /// Set automatically by `ToolRegistry::execute()` — `Ok(…)` → `true`,
217 /// `Err(…)` → `false`. Individual tools never set this directly;
218 /// they just return `Result<String>`.
219 pub success: bool,
220 /// Full untruncated output, stored separately in DB for later retrieval.
221 ///
222 /// Only populated by Bash when output exceeds the summary threshold.
223 /// `RecallContext` can search this to retrieve details the model didn't
224 /// see in its context window.
225 pub full_output: Option<String>,
226}
227
228/// The tool registry: maps tool names to their definitions and handlers.
229pub struct ToolRegistry {
230 project_root: PathBuf,
231 definitions: HashMap<String, ToolDefinition>,
232 read_cache: FileReadCache,
233 /// Filesystem abstraction — `LocalFileSystem` by default; swap to
234 /// `SandboxedFileSystem` when a sandbox slot is active (Phase 2d, #934).
235 /// Explicit `+ Send + Sync` is required: trait objects don't
236 /// auto-inherit auto-traits from the supertrait, so without these
237 /// bounds `ToolRegistry` becomes `!Send` and any future holding
238 /// it (e.g. `execute_sub_agent`) cannot be `tokio::spawn`'d.
239 fs: Arc<dyn FileSystem + Send + Sync>,
240 /// Per-file last-writer tracking for richer staleness errors (#804 item 7).
241 last_writer: LastWriterCache,
242 /// Most recent Bash invocation for staleness error context (#804 item 7).
243 last_bash: LastBashCache,
244 /// Undo stack for file mutations.
245 pub undo: std::sync::Mutex<crate::undo::UndoStack>,
246 /// Discovered skills.
247 pub skill_registry: crate::skills::SkillRegistry,
248 /// Database handle for tools that need session access (RecallContext).
249 db: std::sync::RwLock<Option<std::sync::Arc<crate::db::Database>>>,
250 /// Current session ID (for RecallContext).
251 session_id: std::sync::RwLock<Option<String>>,
252 /// Context-scaled output caps for all tools.
253 pub caps: OutputCaps,
254 /// Background process registry — tracks processes spawned with `background: true`.
255 /// Dropped (SIGTERM all) when the session ends.
256 pub bg_registry: bg_process::BgRegistry,
257 /// Trust mode — determines sandbox configuration for Bash tool.
258 trust: crate::trust::TrustMode,
259 /// Active sandbox policy. Phase 5 PR-2 of #934 wires this through
260 /// the Bash dispatch path so per-agent variation becomes possible.
261 /// Today every constructor seeds it with `SandboxPolicy::strict_default()`
262 /// so behavior is byte-for-byte unchanged — PR-3 starts populating it
263 /// with non-default values via [`crate::sandbox::policy_for_agent`].
264 sandbox_policy: koda_sandbox::SandboxPolicy,
265 /// MCP connection manager — owns all MCP server connections (#662).
266 /// `None` until attached via `set_mcp_manager()`.
267 mcp_manager: std::sync::RwLock<Option<Arc<tokio::sync::RwLock<crate::mcp::McpManager>>>>,
268 /// Loopback port of the per-session HTTP CONNECT proxy (Phase 3b of
269 /// #934). When `Some`, [`crate::sandbox::build`] attaches the
270 /// canonical `HTTPS_PROXY`/`NO_PROXY`/etc. env-var bouquet to every
271 /// Bash invocation so child processes route HTTP through the proxy.
272 /// `None` (default) preserves the pre-3b unfiltered behavior —
273 /// session code opts in by calling [`Self::set_proxy_port`].
274 proxy_port: std::sync::RwLock<Option<u16>>,
275 /// Loopback port of the per-session SOCKS5 proxy (Phase 3d.1 of
276 /// #934). When `Some`, [`crate::sandbox::build`] appends
277 /// `ALL_PROXY=socks5h://127.0.0.1:port` (+ lowercase alias) so
278 /// raw-TCP clients (git over ssh, gRPC) that ignore `HTTPS_PROXY`
279 /// also route through the hostname-filtered proxy. Independent
280 /// from `proxy_port` so tests can attach one without the other.
281 socks5_port: std::sync::RwLock<Option<u16>>,
282}
283
284impl ToolRegistry {
285 /// Create a new registry with all built-in tools.
286 ///
287 /// `max_context_tokens` scales all output caps (see `OutputCaps`).
288 pub fn new(project_root: PathBuf, max_context_tokens: usize) -> Self {
289 Self::with_trust(
290 project_root,
291 max_context_tokens,
292 crate::trust::TrustMode::Safe,
293 )
294 }
295
296 /// Create a new registry with a specific trust mode.
297 pub fn with_trust(
298 project_root: PathBuf,
299 max_context_tokens: usize,
300 trust: crate::trust::TrustMode,
301 ) -> Self {
302 let mut definitions = HashMap::new();
303
304 // Register all built-in tools
305 for def in file_tools::definitions() {
306 definitions.insert(def.name.clone(), def);
307 }
308
309 for def in grep::definitions() {
310 definitions.insert(def.name.clone(), def);
311 }
312 for def in shell::definitions() {
313 definitions.insert(def.name.clone(), def);
314 }
315 for def in agent::definitions() {
316 definitions.insert(def.name.clone(), def);
317 }
318 for def in bg_task_tools::definitions() {
319 definitions.insert(def.name.clone(), def);
320 }
321 for def in ask_user::definitions() {
322 definitions.insert(def.name.clone(), def);
323 }
324 for def in glob_tool::definitions() {
325 definitions.insert(def.name.clone(), def);
326 }
327 for def in web_fetch::definitions() {
328 definitions.insert(def.name.clone(), def);
329 }
330 for def in web_search::definitions() {
331 definitions.insert(def.name.clone(), def);
332 }
333 for def in todo::definitions() {
334 definitions.insert(def.name.clone(), def);
335 }
336 for def in memory::definitions() {
337 definitions.insert(def.name.clone(), def);
338 }
339 for def in skill_tools::definitions() {
340 definitions.insert(def.name.clone(), def);
341 }
342 // RecallContext — on-demand history retrieval
343 let recall_def = recall::definition();
344 definitions.insert(recall_def.name.clone(), recall_def);
345 let skill_registry = crate::skills::SkillRegistry::discover(&project_root);
346
347 Self {
348 project_root,
349 definitions,
350 read_cache: Arc::new(std::sync::Mutex::new(HashMap::new())),
351 fs: Arc::new(LocalFileSystem::new()),
352 last_writer: Arc::new(std::sync::Mutex::new(HashMap::new())),
353 last_bash: Arc::new(std::sync::Mutex::new(None)),
354 undo: std::sync::Mutex::new(crate::undo::UndoStack::new()),
355 skill_registry,
356 db: std::sync::RwLock::new(None),
357 session_id: std::sync::RwLock::new(None),
358 caps: OutputCaps::for_context(max_context_tokens),
359 bg_registry: bg_process::BgRegistry::new(),
360 trust,
361 // Phase 5 PR-2 of #934: seed with strict_default(). Callers
362 // can override via [`Self::with_sandbox_policy`] (sub-agent
363 // dispatch does this; the main agent inherits the default).
364 sandbox_policy: koda_sandbox::SandboxPolicy::strict_default(),
365 mcp_manager: std::sync::RwLock::new(None),
366 proxy_port: std::sync::RwLock::new(None),
367 socks5_port: std::sync::RwLock::new(None),
368 }
369 }
370
371 /// Share an existing file-read cache (e.g. from the parent agent).
372 ///
373 /// Sub-agents that share the parent's cache avoid redundant disk reads
374 /// for files already loaded in the same session.
375 pub fn with_shared_cache(mut self, cache: FileReadCache) -> Self {
376 self.read_cache = cache;
377 self
378 }
379
380 /// Override the active sandbox policy.
381 ///
382 /// Phase 5 PR-2 of #934. Builder-style; chains after `with_trust`
383 /// (or `new`). Sub-agent dispatch uses this to install the policy
384 /// produced by [`crate::sandbox::policy_for_agent`] on the child's
385 /// registry. The main agent path doesn't call this and inherits
386 /// the `strict_default()` seed from `with_trust` — byte-for-byte
387 /// unchanged behavior in PR-2.
388 pub fn with_sandbox_policy(mut self, policy: koda_sandbox::SandboxPolicy) -> Self {
389 self.sandbox_policy = policy;
390 self
391 }
392
393 /// Borrow the active sandbox policy. Used by the Bash dispatch
394 /// path to thread the per-registry policy into
395 /// [`crate::sandbox::build`].
396 pub fn sandbox_policy(&self) -> &koda_sandbox::SandboxPolicy {
397 &self.sandbox_policy
398 }
399
400 /// Inject a custom [`FileSystem`] implementation.
401 ///
402 /// Call this after construction to swap `LocalFileSystem` for
403 /// `SandboxedFileSystem` when a sandbox slot is ready (#934).
404 pub fn set_fs(&mut self, fs: Arc<dyn FileSystem + Send + Sync>) {
405 self.fs = fs;
406 }
407
408 /// Get a clone of the `Arc` file-read cache for sharing with sub-agents.
409 pub fn file_read_cache(&self) -> FileReadCache {
410 Arc::clone(&self.read_cache)
411 }
412
413 /// Get a clone of the last-writer cache for passing to validation.
414 pub fn last_writer_cache(&self) -> LastWriterCache {
415 Arc::clone(&self.last_writer)
416 }
417
418 /// Get a clone of the last-bash cache for passing to validation.
419 pub fn last_bash_cache(&self) -> LastBashCache {
420 Arc::clone(&self.last_bash)
421 }
422
423 /// Attach database + session for tools that need history access.
424 pub fn set_session(&self, db: std::sync::Arc<crate::db::Database>, session_id: String) {
425 if let Ok(mut guard) = self.db.write() {
426 *guard = Some(db);
427 }
428 if let Ok(mut guard) = self.session_id.write() {
429 *guard = Some(session_id);
430 }
431 }
432
433 /// Attach an MCP connection manager and register its tools (#662).
434 ///
435 /// Called after MCP servers have connected and discovered their tools.
436 /// Tool definitions are merged into the registry so the LLM can see them.
437 pub fn set_mcp_manager(&self, manager: Arc<tokio::sync::RwLock<crate::mcp::McpManager>>) {
438 if let Ok(mut guard) = self.mcp_manager.write() {
439 *guard = Some(manager);
440 }
441 }
442
443 /// Get the MCP manager (if attached).
444 pub fn mcp_manager(&self) -> Option<Arc<tokio::sync::RwLock<crate::mcp::McpManager>>> {
445 self.mcp_manager.read().ok().and_then(|guard| guard.clone())
446 }
447
448 /// Attach (or detach) the per-session HTTP CONNECT proxy port.
449 ///
450 /// Called from [`crate::session::KodaSession::new`] after spawning
451 /// the always-on [`koda_sandbox::BuiltInProxy`]. Pass `None` to
452 /// detach (Bash invocations revert to unfiltered network access —
453 /// only used in standalone-ToolRegistry tests; production sessions
454 /// keep this set for their full lifetime). Lock-poisoning is
455 /// non-fatal — we silently keep the previous value, matching the
456 /// precedent set by `set_mcp_manager`.
457 pub fn set_proxy_port(&self, port: Option<u16>) {
458 if let Ok(mut guard) = self.proxy_port.write() {
459 *guard = port;
460 }
461 }
462
463 /// Current proxy port, if one has been attached. Read by the Bash
464 /// dispatch path; threaded into [`crate::sandbox::build`] which
465 /// turns it into the env-var bouquet on the spawned `Command`.
466 pub fn proxy_port(&self) -> Option<u16> {
467 self.proxy_port.read().ok().and_then(|guard| *guard)
468 }
469
470 /// Attach (or detach) the per-session SOCKS5 proxy port. Mirrors
471 /// [`Self::set_proxy_port`] — see that fn's docs for the
472 /// lock-poisoning policy.
473 pub fn set_socks5_port(&self, port: Option<u16>) {
474 if let Ok(mut guard) = self.socks5_port.write() {
475 *guard = port;
476 }
477 }
478
479 /// Current SOCKS5 port, if one has been attached. Threaded into
480 /// [`crate::sandbox::build`] which appends `ALL_PROXY` to the
481 /// spawned `Command`'s env.
482 pub fn socks5_port(&self) -> Option<u16> {
483 self.socks5_port.read().ok().and_then(|guard| *guard)
484 }
485
486 /// Classify a tool, using MCP annotations when available.
487 ///
488 /// For built-in tools, delegates to `classify_tool()`.
489 /// For MCP tools, looks up cached annotations in the manager.
490 pub fn classify_tool_with_mcp(&self, name: &str) -> ToolEffect {
491 if crate::mcp::is_mcp_tool_name(name) {
492 if let Some(mgr) = self.mcp_manager()
493 && let Ok(mgr) = mgr.try_read()
494 {
495 return mgr.classify_tool(name);
496 }
497 // Fallback: no manager or lock contention.
498 return ToolEffect::RemoteAction;
499 }
500 classify_tool(name)
501 }
502
503 /// Get all built-in tool names.
504 /// Used by wiring tests to verify every tool is properly integrated.
505 pub fn all_builtin_tool_names(&self) -> Vec<String> {
506 let mut names: Vec<String> = self.definitions.keys().cloned().collect();
507 names.sort();
508 names
509 }
510
511 /// Check whether a tool name is known.
512 pub fn has_tool(&self, name: &str) -> bool {
513 self.definitions.contains_key(name)
514 }
515
516 /// List all available skills as `(name, description, source)` tuples.
517 pub fn list_skills(&self) -> Vec<(String, String, String)> {
518 self.skill_registry
519 .list()
520 .into_iter()
521 .map(|m| {
522 let source = match m.source {
523 crate::skills::SkillSource::BuiltIn => "built-in",
524 crate::skills::SkillSource::User => "user",
525 crate::skills::SkillSource::Project => "project",
526 };
527 (m.name.clone(), m.description.clone(), source.to_string())
528 })
529 .collect()
530 }
531
532 /// Search skills by query, returning `(name, description, source)` tuples.
533 pub fn search_skills(&self, query: &str) -> Vec<(String, String, String)> {
534 self.skill_registry
535 .search(query)
536 .into_iter()
537 .map(|m| {
538 let source = match m.source {
539 crate::skills::SkillSource::BuiltIn => "built-in",
540 crate::skills::SkillSource::User => "user",
541 crate::skills::SkillSource::Project => "project",
542 };
543 (m.name.clone(), m.description.clone(), source.to_string())
544 })
545 .collect()
546 }
547
548 /// Get tool definitions, optionally filtered by allow/deny lists.
549 ///
550 /// Includes MCP tool definitions if a manager is attached.
551 ///
552 /// - `allowed` non-empty → only those tools (allowlist).
553 /// - `denied` non-empty → all tools except those (denylist).
554 /// - Both empty → all tools.
555 /// - If both are specified, allowlist wins (deny is ignored).
556 pub fn get_definitions(&self, allowed: &[String], denied: &[String]) -> Vec<ToolDefinition> {
557 let mut defs: Vec<ToolDefinition> = if !allowed.is_empty() {
558 allowed
559 .iter()
560 .filter_map(|name| self.definitions.get(name).cloned())
561 .collect()
562 } else if !denied.is_empty() {
563 self.definitions
564 .values()
565 .filter(|d| !denied.contains(&d.name))
566 .cloned()
567 .collect()
568 } else {
569 self.definitions.values().cloned().collect()
570 };
571
572 // Append MCP tool definitions.
573 if let Some(mgr) = self.mcp_manager()
574 && let Ok(mgr) = mgr.try_read()
575 {
576 let mcp_defs = mgr.all_tool_definitions();
577 if !allowed.is_empty() {
578 // Allowlist mode: only include MCP tools in the allowlist.
579 for def in mcp_defs {
580 if allowed.contains(&def.name) {
581 defs.push(def);
582 }
583 }
584 } else if !denied.is_empty() {
585 // Denylist mode: include MCP tools not in the denylist.
586 for def in mcp_defs {
587 if !denied.contains(&def.name) {
588 defs.push(def);
589 }
590 }
591 } else {
592 // No filter: include all MCP tools.
593 defs.extend(mcp_defs);
594 }
595 }
596
597 defs
598 }
599
600 /// Execute a tool by name with the given JSON arguments.
601 ///
602 /// Empty or whitespace-only `arguments` are treated as `{}` (no args)
603 /// so that tools can fall through to their own defaults instead of
604 /// surfacing a raw JSON parse error. See #513.
605 ///
606 /// `sink_for_streaming` is an optional `(sink, call_id)` pair. When
607 /// provided, the Bash tool streams each output line as a
608 /// `ToolOutputLine` event in real-time.
609 pub async fn execute(
610 &self,
611 name: &str,
612 arguments: &str,
613 sink_for_streaming: Option<(&dyn crate::engine::EngineSink, &str)>,
614 // Phase E of #996: forwarded to `Bash` so that bg-shell
615 // entries are tagged with the calling agent's invocation id.
616 // Every other tool ignores this. Top-level callers pass `None`.
617 caller_spawner: Option<u32>,
618 ) -> ToolResult {
619 let raw = arguments.trim();
620 let raw = if raw.is_empty() { "{}" } else { raw };
621 let args: Value = match serde_json::from_str(raw) {
622 Ok(v) => v,
623 Err(e) => {
624 return ToolResult {
625 output: format!("Invalid JSON arguments: {e}"),
626 success: false,
627 full_output: None,
628 };
629 }
630 };
631
632 tracing::info!(
633 "Executing tool: {name} with args: [{} chars]",
634 arguments.len()
635 );
636
637 // Snapshot file before mutation (for /undo)
638 if let Some(file_path) = crate::undo::is_mutating_tool(name)
639 .then(|| crate::undo::extract_file_path(name, &args))
640 .flatten()
641 {
642 let resolved = self.project_root.join(&file_path);
643 if let Ok(mut undo) = self.undo.lock() {
644 undo.snapshot(&resolved);
645 }
646 }
647
648 let result = match name {
649 // File tools
650 "Read" => {
651 file_tools::read_file(&self.project_root, &args, &self.read_cache, &*self.fs).await
652 }
653 "Write" => file_tools::write_file(&self.project_root, &args, &*self.fs).await,
654 "Edit" => {
655 file_tools::edit_file(&self.project_root, &args, &self.read_cache, &*self.fs).await
656 }
657 "Delete" => file_tools::delete_file(&self.project_root, &args).await,
658 "List" => {
659 file_tools::list_files(&self.project_root, &args, self.caps.list_entries).await
660 }
661
662 // Search tools
663 "Grep" => {
664 grep::grep(&self.project_root, &args, self.caps.grep_matches, &*self.fs).await
665 }
666 "Glob" => {
667 glob_tool::glob_search(&self.project_root, &args, self.caps.glob_results, &*self.fs)
668 .await
669 }
670
671 // Shell
672 // Shell — returns ShellOutput with summary + full output.
673 "Bash" => {
674 let shell_result = shell::run_shell_command(
675 &self.project_root,
676 &args,
677 self.caps.shell_output_lines,
678 &self.bg_registry,
679 sink_for_streaming,
680 &self.trust,
681 self.sandbox_policy(),
682 self.proxy_port(),
683 self.socks5_port(),
684 caller_spawner,
685 )
686 .await;
687 return match shell_result {
688 Ok(so) => {
689 // Record the invocation so validate_edit can hint at it
690 // in staleness error messages (#804 item 7).
691 let snippet = args["command"]
692 .as_str()
693 .unwrap_or("")
694 .chars()
695 .take(72)
696 .collect::<String>();
697 if !snippet.is_empty()
698 && let Ok(mut guard) = self.last_bash.lock()
699 {
700 *guard = Some((snippet, std::time::Instant::now()));
701 }
702 ToolResult {
703 output: so.summary,
704 success: true,
705 full_output: so.full_output,
706 }
707 }
708 Err(e) => ToolResult {
709 output: format!("Error: {e}"),
710 success: false,
711 full_output: None,
712 },
713 };
714 }
715
716 // Web
717 "WebFetch" => web_fetch::web_fetch(&args, self.caps.web_body_chars).await,
718 "WebSearch" => web_search::web_search(&args).await,
719 "TodoWrite" => {
720 let db_opt = self.db.read().ok().and_then(|g| g.clone());
721 let sid_opt = self.session_id.read().ok().and_then(|g| g.clone());
722 match (db_opt, sid_opt) {
723 (Some(db), Some(sid)) => match todo::todo_write(&db, &sid, &args).await {
724 Ok(outcome) => {
725 // #1077 Phase A: surface the transition to
726 // every client (TUI / ACP / headless) via
727 // EngineEvent::TodoUpdate. The dedup-nudge
728 // path returns an empty diff so we suppress
729 // the event there — unchanged-list writes
730 // are a no-op for clients, only a reminder
731 // for the model.
732 if !outcome.diff.is_empty()
733 && let Some((sink, _call_id)) = sink_for_streaming
734 {
735 sink.emit(crate::engine::EngineEvent::TodoUpdate {
736 items: outcome.items.clone(),
737 diff: outcome.diff.clone(),
738 });
739 }
740 Ok(outcome.message)
741 }
742 Err(e) => Err(e),
743 },
744 _ => Ok("TodoWrite requires an active session.".to_string()),
745 }
746 }
747
748 // Memory
749 "MemoryRead" => memory::memory_read(&self.project_root).await,
750 "MemoryWrite" => memory::memory_write(&self.project_root, &args).await,
751
752 // Agent tools
753 "ListAgents" => {
754 let detail = args["detail"].as_bool().unwrap_or(false);
755 if detail {
756 Ok(agent::list_agents_detail(&self.project_root))
757 } else {
758 let agents = agent::list_agents(&self.project_root);
759 if agents.is_empty() {
760 Ok("No sub-agents configured.".to_string())
761 } else {
762 let lines: Vec<String> = agents
763 .iter()
764 .map(|(name, desc, source)| {
765 if source == "built-in" {
766 format!(" {name} — {desc}")
767 } else {
768 format!(" {name} — {desc} [{source}]")
769 }
770 })
771 .collect();
772 Ok(lines.join("\n"))
773 }
774 }
775 }
776 // Skill tools
777 "ListSkills" => Ok(skill_tools::list_skills(&self.skill_registry, &args)),
778 "ActivateSkill" => Ok(skill_tools::activate_skill(&self.skill_registry, &args)),
779
780 // Recall context tool
781 "RecallContext" => {
782 let db_opt = self.db.read().ok().and_then(|g| g.clone());
783 let sid_opt = self.session_id.read().ok().and_then(|g| g.clone());
784 if let (Some(db), Some(sid)) = (db_opt, sid_opt) {
785 Ok(recall::recall_context(&db, &sid, &args).await)
786 } else {
787 Ok("RecallContext requires an active session.".to_string())
788 }
789 }
790
791 "InvokeAgent" => {
792 // Handled by tool_dispatch.rs before reaching here.
793 // This branch should not be reached in normal flow.
794 return ToolResult {
795 output: "InvokeAgent is handled by the inference loop.".to_string(),
796 success: false,
797 full_output: None,
798 };
799 }
800
801 "AskUser" => {
802 // Handled by execute_tools_sequential (needs sink + cmd_rx).
803 // This branch should not be reached in normal flow.
804 return ToolResult {
805 output: "AskUser is handled by the inference loop.".to_string(),
806 success: false,
807 full_output: None,
808 };
809 }
810
811 other => {
812 // MCP tool dispatch (#662): route `server__tool` calls
813 // to the appropriate MCP server.
814 if crate::mcp::is_mcp_tool_name(other) {
815 if let Some(mgr) = self.mcp_manager() {
816 let result = {
817 let mgr = mgr.read().await;
818 mgr.call_tool(other, args.clone()).await
819 };
820 return match result {
821 Ok(output) => ToolResult {
822 output,
823 success: true,
824 full_output: None,
825 },
826 Err(e) => ToolResult {
827 output: format!("Error: {e}"),
828 success: false,
829 full_output: None,
830 },
831 };
832 }
833 return ToolResult {
834 output: format!(
835 "MCP tool '{other}' not available — \
836 no MCP servers connected."
837 ),
838 success: false,
839 full_output: None,
840 };
841 }
842
843 // Detect garbled tool names (JSON blobs, very long strings)
844 // — a sign the model can't do structured tool calling.
845 let warning = if other.contains('{') || other.len() > 64 {
846 format!(
847 "Unknown tool: {other}. \
848 This model appears to struggle with tool calling. \
849 Consider switching to a model with native function-call support."
850 )
851 } else {
852 format!("Unknown tool: {other}")
853 };
854 Err(anyhow::anyhow!(warning))
855 }
856 };
857
858 match result {
859 Ok(output) => {
860 // Record successful Write/Edit so the validation layer can
861 // name the responsible tool in staleness error messages.
862 if matches!(name, "Write" | "Edit")
863 && let Some(path) =
864 crate::file_tracker::resolve_file_path_from_args(&args, &self.project_root)
865 && let Ok(mut guard) = self.last_writer.lock()
866 {
867 guard.insert(path, (name.to_string(), std::time::Instant::now()));
868 }
869 ToolResult {
870 output,
871 success: true,
872 full_output: None,
873 }
874 }
875 Err(e) => ToolResult {
876 output: format!("Error: {e}"),
877 success: false,
878 full_output: None,
879 },
880 }
881 }
882}
883
884/// Validate and resolve a path, preventing directory traversal.
885///
886/// Works for both existing and non-existing files (no `canonicalize!`).
887/// Relative paths are joined to `project_root`; absolute paths must
888/// still be within `project_root` **or** under an allowed tempdir
889/// (`/tmp`, `/private/tmp`, `/var/tmp`, or `$TMPDIR`).
890///
891/// # Examples
892///
893/// ```
894/// use koda_core::tools::safe_resolve_path;
895/// use std::path::Path;
896///
897/// let root = Path::new("/home/user/project");
898///
899/// // Relative paths resolve within project
900/// let p = safe_resolve_path(root, "src/main.rs").unwrap();
901/// assert_eq!(p, Path::new("/home/user/project/src/main.rs"));
902///
903/// // Traversal is blocked
904/// assert!(safe_resolve_path(root, "../../etc/passwd").is_err());
905///
906/// // Tempdirs are allowed (matches the kernel sandbox policy)
907/// assert!(safe_resolve_path(root, "/tmp/scratch.txt").is_ok());
908/// ```
909pub fn safe_resolve_path(project_root: &Path, requested: &str) -> Result<PathBuf> {
910 // NOTE: used only for Write / Edit / Delete. Read-only tools call
911 // resolve_path_unrestricted — see docs/src/sandbox.md for the rationale.
912 let requested_path = Path::new(requested);
913
914 // Build absolute path and normalize (removes .., . etc.)
915 let resolved = if requested_path.is_absolute() {
916 requested_path.to_path_buf().clean()
917 } else {
918 project_root.join(requested_path).clean()
919 };
920
921 // Security check: must be within project root OR an allowed tempdir.
922 // Only Write / Edit / Delete are gated here — reads are unrestricted
923 // (see resolve_path_unrestricted and docs/src/sandbox.md).
924 //
925 // The tempdir allow-list keeps in-process policy in sync with the
926 // kernel sandbox (Seatbelt on macOS, bwrap on Linux), which already
927 // permits writes to /tmp + cache dirs. Pre-fix this layer was the
928 // outlier (#947): `bash -c 'cat > /tmp/x'` succeeded but `Write /tmp/x`
929 // was rejected, blocking common scratch-file workflows.
930 if !resolved.starts_with(project_root) && !is_allowed_write_root(&resolved) {
931 anyhow::bail!(
932 "Path {requested:?} is outside the project root ({project_root:?}) \
933 and not under a writable tempdir (/tmp, /var/tmp, $TMPDIR). \
934 Write, Edit, and Delete are restricted to the project directory \
935 and tempdirs to prevent accidental modification of files \
936 elsewhere. Tell the user: to write outside these locations, \
937 restart koda from a parent directory that contains both paths."
938 );
939 }
940
941 // Defense in depth: even within an allowed tempdir, never let writes
942 // touch koda's own credential store. (`is_fully_denied` matches the
943 // path against the credential-config denylist used by the read-only
944 // tools, keeping all three perimeters — read, write, sandbox — in sync.)
945 if crate::sandbox::is_fully_denied(&resolved) {
946 anyhow::bail!(
947 "Path {requested:?} is denied: this path contains koda's \
948 internal secrets and cannot be modified by tool calls."
949 );
950 }
951
952 Ok(resolved)
953}
954
955/// Returns true if `path` lives under a system tempdir that the kernel
956/// sandbox (Seatbelt / bwrap) already permits writes to.
957///
958/// This intentionally mirrors the `(subpath "/tmp")` and
959/// `(subpath "/private/tmp")` allow rules in `sandbox.rs` so the in-process
960/// file tools accept the same set of paths as `bash -c 'cat > ...'`.
961///
962/// The check is logical (no `canonicalize`) to match `safe_resolve_path`'s
963/// behaviour for non-existing files. The kernel sandbox is the real enforcer
964/// at runtime; this helper is the policy-symmetry layer.
965fn is_allowed_write_root(path: &Path) -> bool {
966 // Hard-coded tempdir paths that the kernel sandbox always allows.
967 // `/private/tmp` is macOS's realpath of `/tmp` (which is a symlink).
968 const TEMPDIR_PREFIXES: &[&str] = &["/tmp", "/private/tmp", "/var/tmp"];
969 if TEMPDIR_PREFIXES
970 .iter()
971 .any(|prefix| path.starts_with(prefix))
972 {
973 return true;
974 }
975
976 // Per-user $TMPDIR (macOS: /var/folders/.../T/, Linux: usually /tmp).
977 // Resolved at call time so test environments overriding TMPDIR
978 // are honoured. `temp_dir()` is infallible — falls back to /tmp on Unix.
979 path.starts_with(std::env::temp_dir())
980}
981
982/// Normalise a path without enforcing any scope restriction.
983///
984/// Low-level primitive — **tool implementations should call
985/// [`resolve_read_path`] instead**, which adds the fully-denied list check
986/// that keeps in-process policy in sync with the subprocess sandbox.
987///
988/// Relative paths are resolved against `project_root`; absolute paths are
989/// cleaned in-place. The result may point anywhere on the filesystem.
990pub(crate) fn resolve_path_unrestricted(project_root: &Path, requested: &str) -> PathBuf {
991 let path = Path::new(requested);
992 if path.is_absolute() {
993 path.to_path_buf().clean()
994 } else {
995 project_root.join(path).clean()
996 }
997}
998
999/// Normalise a read-only path and enforce the fully-denied list.
1000///
1001/// This is the entry-point for **all read-only tools** (Read, List, Grep,
1002/// Glob). It wraps `resolve_path_unrestricted` with a check against
1003/// `sandbox::is_fully_denied` so that the same paths blocked by the
1004/// subprocess sandbox (bwrap / Seatbelt) are also blocked when the model
1005/// accesses them through in-process tools.
1006///
1007/// Currently the only denied path is `~/.config/koda/db` — koda's own SQLite
1008/// database containing plaintext API keys. Ordinary credential directories
1009/// (`~/.ssh`, `~/.aws`, …) are readable, matching the Bash sandbox policy.
1010///
1011/// See issue #884 for Option B (OS-level enforcement via sandboxed worker).
1012pub fn resolve_read_path(project_root: &Path, requested: &str) -> Result<PathBuf> {
1013 let resolved = resolve_path_unrestricted(project_root, requested);
1014 if crate::sandbox::is_fully_denied(&resolved) {
1015 anyhow::bail!(
1016 "Access to {requested:?} is denied: this path contains koda's \
1017 internal secrets and cannot be read by model tool calls."
1018 );
1019 }
1020 Ok(resolved)
1021}
1022
1023#[cfg(test)]
1024mod tests {
1025 use super::*;
1026 use std::path::PathBuf;
1027
1028 fn root() -> PathBuf {
1029 PathBuf::from("/home/user/project")
1030 }
1031
1032 // ── Phase 3b: proxy port wiring (Bash → sandbox::build) ──────────
1033
1034 #[test]
1035 fn proxy_port_defaults_to_none() {
1036 // Standalone ToolRegistry (no KodaSession) starts with no port —
1037 // production sessions overwrite this in `KodaSession::new`.
1038 let registry = ToolRegistry::new(root(), 100_000);
1039 assert_eq!(registry.proxy_port(), None);
1040 }
1041
1042 #[test]
1043 fn proxy_port_round_trips_through_setter() {
1044 let registry = ToolRegistry::new(root(), 100_000);
1045 registry.set_proxy_port(Some(31415));
1046 assert_eq!(registry.proxy_port(), Some(31415));
1047 }
1048
1049 // ── Phase 3d.2: SOCKS5 port wiring (Bash → sandbox::build) ───────
1050
1051 #[test]
1052 fn socks5_port_defaults_to_none() {
1053 let registry = ToolRegistry::new(root(), 100_000);
1054 assert_eq!(registry.socks5_port(), None);
1055 }
1056
1057 #[test]
1058 fn socks5_port_round_trips_through_setter() {
1059 let registry = ToolRegistry::new(root(), 100_000);
1060 registry.set_socks5_port(Some(27182));
1061 assert_eq!(registry.socks5_port(), Some(27182));
1062 }
1063
1064 #[test]
1065 fn socks5_and_http_ports_are_independent() {
1066 // Setting one must not clobber the other — the two proxies are
1067 // spawned independently and may live or die independently.
1068 let registry = ToolRegistry::new(root(), 100_000);
1069 registry.set_proxy_port(Some(8080));
1070 registry.set_socks5_port(Some(1080));
1071 assert_eq!(registry.proxy_port(), Some(8080));
1072 assert_eq!(registry.socks5_port(), Some(1080));
1073 registry.set_proxy_port(None);
1074 assert_eq!(registry.socks5_port(), Some(1080));
1075 }
1076
1077 #[test]
1078 fn test_relative_path_resolves_inside_root() {
1079 let result = safe_resolve_path(&root(), "src/main.rs").unwrap();
1080 assert_eq!(result, PathBuf::from("/home/user/project/src/main.rs"));
1081 }
1082
1083 #[test]
1084 fn test_dot_path_resolves_to_root() {
1085 let result = safe_resolve_path(&root(), ".").unwrap();
1086 assert_eq!(result, PathBuf::from("/home/user/project"));
1087 }
1088
1089 #[test]
1090 fn test_new_file_in_new_dir_resolves() {
1091 let result = safe_resolve_path(&root(), "src/brand_new/feature.rs").unwrap();
1092 assert_eq!(
1093 result,
1094 PathBuf::from("/home/user/project/src/brand_new/feature.rs")
1095 );
1096 }
1097
1098 #[test]
1099 fn test_dotdot_traversal_blocked() {
1100 let result = safe_resolve_path(&root(), "../../etc/passwd");
1101 assert!(result.is_err());
1102 }
1103
1104 #[test]
1105 fn test_dotdot_sneaky_traversal_blocked() {
1106 let result = safe_resolve_path(&root(), "src/../../etc/passwd");
1107 assert!(result.is_err());
1108 }
1109
1110 #[test]
1111 fn test_absolute_path_inside_root_allowed() {
1112 let result = safe_resolve_path(&root(), "/home/user/project/src/lib.rs").unwrap();
1113 assert_eq!(result, PathBuf::from("/home/user/project/src/lib.rs"));
1114 }
1115
1116 #[test]
1117 fn test_absolute_path_outside_root_blocked() {
1118 let result = safe_resolve_path(&root(), "/etc/shadow");
1119 assert!(result.is_err());
1120 }
1121
1122 #[test]
1123 fn test_outside_root_error_is_actionable_for_user() {
1124 let err = safe_resolve_path(&root(), "../../etc/passwd").unwrap_err();
1125 let msg = err.to_string();
1126 assert!(
1127 msg.contains("outside the project root"),
1128 "error must say 'outside the project root'; got: {msg}"
1129 );
1130 assert!(
1131 msg.contains("Tell the user"),
1132 "error must direct model to surface this to the user; got: {msg}"
1133 );
1134 // Must NOT suggest Bash — that would bypass the file-tool safety layer.
1135 assert!(
1136 !msg.contains("Bash"),
1137 "error must not suggest Bash as a workaround; got: {msg}"
1138 );
1139 }
1140
1141 #[test]
1142 fn test_empty_path_resolves_to_root() {
1143 let result = safe_resolve_path(&root(), "").unwrap();
1144 assert_eq!(result, PathBuf::from("/home/user/project"));
1145 }
1146
1147 // ── resolve_read_path ──────────────────────────────────────────────────
1148
1149 #[test]
1150 fn read_path_allows_project_file() {
1151 let p = resolve_read_path(&root(), "src/lib.rs").unwrap();
1152 assert_eq!(p, PathBuf::from("/home/user/project/src/lib.rs"));
1153 }
1154
1155 #[test]
1156 fn read_path_allows_outside_project() {
1157 // Reads outside the project root are intentionally unrestricted.
1158 let p = resolve_read_path(&root(), "/etc/hosts").unwrap();
1159 assert_eq!(p, PathBuf::from("/etc/hosts"));
1160 }
1161
1162 #[test]
1163 fn read_path_blocks_koda_db() {
1164 let home = std::env::var("HOME").unwrap_or_else(|_| "/home/user".into());
1165 let koda_db = format!("{home}/.config/koda/db/koda.db");
1166 let err = resolve_read_path(&root(), &koda_db).unwrap_err();
1167 assert!(
1168 err.to_string().contains("denied"),
1169 "expected 'denied' in error, got: {err}"
1170 );
1171 }
1172
1173 // ── #947: writes to tempdirs ASCII─ART─ ─────────────────────────
1174 //
1175 // The kernel sandbox (Seatbelt / bwrap) explicitly permits writes to
1176 // /tmp + cache dirs. Pre-fix, `safe_resolve_path` rejected absolute
1177 // paths outside `project_root`, so `bash -c 'cat > /tmp/x'` succeeded
1178 // but `Write /tmp/x` failed — forcing models into shell heredoc
1179 // workarounds that often quote-escape badly. These tests lock in the
1180 // symmetry between the two perimeters.
1181
1182 #[test]
1183 fn write_path_allows_tmp() {
1184 let p = safe_resolve_path(&root(), "/tmp/koda-scratch.txt").unwrap();
1185 assert_eq!(p, PathBuf::from("/tmp/koda-scratch.txt"));
1186 }
1187
1188 #[test]
1189 fn write_path_allows_private_tmp_macos_realpath() {
1190 // macOS resolves /tmp → /private/tmp via a symlink. Some tools (`find`,
1191 // `realpath`) emit the realpath form, so absolute paths beginning
1192 // with /private/tmp must also be accepted.
1193 let p = safe_resolve_path(&root(), "/private/tmp/koda-scratch.txt").unwrap();
1194 assert_eq!(p, PathBuf::from("/private/tmp/koda-scratch.txt"));
1195 }
1196
1197 #[test]
1198 fn write_path_allows_var_tmp() {
1199 let p = safe_resolve_path(&root(), "/var/tmp/koda-scratch.txt").unwrap();
1200 assert_eq!(p, PathBuf::from("/var/tmp/koda-scratch.txt"));
1201 }
1202
1203 #[test]
1204 fn write_path_allows_per_user_tmpdir() {
1205 // Whatever `std::env::temp_dir()` returns on this host — macOS gives
1206 // /var/folders/.../T/, Linux usually /tmp. Either way it's writable.
1207 let tmpdir = std::env::temp_dir();
1208 let target = tmpdir.join("koda-scratch.txt");
1209 let p = safe_resolve_path(&root(), target.to_str().unwrap()).unwrap();
1210 assert_eq!(p, target.clean());
1211 }
1212
1213 #[test]
1214 fn write_path_blocks_etc_hosts() {
1215 // System config dirs stay denied — only tempdirs are added.
1216 let err = safe_resolve_path(&root(), "/etc/hosts").unwrap_err();
1217 let msg = err.to_string();
1218 assert!(
1219 msg.contains("outside the project root"),
1220 "system paths must still be rejected; got: {msg}"
1221 );
1222 }
1223
1224 #[test]
1225 fn write_path_blocks_ssh_authorized_keys() {
1226 // Credential dirs in $HOME stay denied — they're outside both
1227 // project_root and any tempdir, so the existing perimeter holds.
1228 let home = std::env::var("HOME").unwrap_or_else(|_| "/home/user".into());
1229 let target = format!("{home}/.ssh/authorized_keys");
1230 assert!(
1231 safe_resolve_path(&root(), &target).is_err(),
1232 "~/.ssh writes must remain blocked"
1233 );
1234 }
1235
1236 #[test]
1237 fn write_path_blocks_koda_db_even_via_tmp_traversal() {
1238 // Defense in depth: even if a model crafts a path that lands in a
1239 // tempdir but cleans into koda's own credential store, `is_fully_denied`
1240 // catches it. Constructed path: `/tmp/../<home>/.config/koda/db/x`
1241 // cleans to `<home>/.config/koda/db/x` — NOT a tempdir, NOT project,
1242 // hits the standard "outside the project root" path. So this is
1243 // already covered by the primary check; this test pins it down.
1244 let home = std::env::var("HOME").unwrap_or_else(|_| "/home/user".into());
1245 let target = format!("/tmp/../{home}/.config/koda/db/koda.db");
1246 assert!(
1247 safe_resolve_path(&root(), &target).is_err(),
1248 "traversal out of /tmp must not bypass the gate"
1249 );
1250 }
1251
1252 #[test]
1253 fn write_path_traversal_inside_tmp_stays_in_tmp() {
1254 // /tmp/foo/../bar cleans to /tmp/bar — still in /tmp, still allowed.
1255 let p = safe_resolve_path(&root(), "/tmp/foo/../bar").unwrap();
1256 assert_eq!(p, PathBuf::from("/tmp/bar"));
1257 }
1258
1259 // ── #1077 Phase A: TodoWrite event-emission contract ─────────
1260 //
1261 // The dispatch arm in `execute()` must:
1262 // 1. emit `EngineEvent::TodoUpdate` with structured items+diff on
1263 // accepted writes that change the persisted list;
1264 // 2. emit nothing on the dedup-nudge path (empty diff);
1265 // 3. always return the model-facing message string regardless.
1266 //
1267 // These are the contract a future TUI / ACP renderer will rely on.
1268 // If you find yourself loosening any of them, revisit `DESIGN.md
1269 // § Progress Tracking: Model-Owned, History-Persisted,
1270 // Engine-Surfaced` first — the suppression rule in particular is
1271 // load-bearing for not spamming clients on idempotent rewrites.
1272
1273 async fn registry_with_session() -> (
1274 ToolRegistry,
1275 tempfile::TempDir,
1276 std::sync::Arc<crate::db::Database>,
1277 String,
1278 ) {
1279 use crate::persistence::Persistence;
1280 let dir = tempfile::TempDir::new().unwrap();
1281 let db = std::sync::Arc::new(
1282 crate::db::Database::open(&dir.path().join("test.db"))
1283 .await
1284 .unwrap(),
1285 );
1286 let sid = db.create_session("koda", dir.path()).await.unwrap();
1287 let registry = ToolRegistry::new(dir.path().to_path_buf(), 100_000);
1288 // Wire DB + session id the same way KodaSession::new does.
1289 *registry.db.write().unwrap() = Some(db.clone());
1290 *registry.session_id.write().unwrap() = Some(sid.clone());
1291 (registry, dir, db, sid)
1292 }
1293
1294 #[tokio::test]
1295 async fn todo_write_emits_todo_update_event_on_first_write() {
1296 let (registry, _dir, _db, _sid) = registry_with_session().await;
1297 let sink = crate::engine::sink::TestSink::new();
1298 let result = registry
1299 .execute(
1300 "TodoWrite",
1301 r#"{"todos":[{"content":"Add tests","status":"pending","priority":"high"}]}"#,
1302 Some((&sink, "call-1")),
1303 None,
1304 )
1305 .await;
1306 assert!(result.success, "first write must succeed: {result:?}");
1307 assert_eq!(sink.len(), 1, "first write must emit exactly one event");
1308 match &sink.events()[0] {
1309 crate::engine::EngineEvent::TodoUpdate { items, diff } => {
1310 assert_eq!(items.len(), 1);
1311 assert_eq!(items[0].content, "Add tests");
1312 assert_eq!(diff.added.len(), 1, "first write → everything in added");
1313 assert!(diff.changed.is_empty());
1314 assert!(diff.removed.is_empty());
1315 }
1316 other => panic!("expected TodoUpdate, got {other:?}"),
1317 }
1318 }
1319
1320 #[tokio::test]
1321 async fn todo_write_suppresses_event_on_unchanged_rewrite() {
1322 let (registry, _dir, _db, _sid) = registry_with_session().await;
1323 let payload = r#"{"todos":[{"content":"A","status":"pending","priority":"high"}]}"#;
1324
1325 // First write: should emit.
1326 let sink1 = crate::engine::sink::TestSink::new();
1327 registry
1328 .execute("TodoWrite", payload, Some((&sink1, "c1")), None)
1329 .await;
1330 assert_eq!(sink1.len(), 1);
1331
1332 // Identical second write: must NOT emit. The dedup-nudge
1333 // message goes back to the model, but clients see nothing.
1334 let sink2 = crate::engine::sink::TestSink::new();
1335 let result2 = registry
1336 .execute("TodoWrite", payload, Some((&sink2, "c2")), None)
1337 .await;
1338 assert!(result2.success);
1339 assert!(
1340 result2.output.contains("unchanged"),
1341 "model-facing message must still nudge: {}",
1342 result2.output
1343 );
1344 assert_eq!(
1345 sink2.len(),
1346 0,
1347 "unchanged rewrite must NOT emit a TodoUpdate event"
1348 );
1349 }
1350
1351 #[tokio::test]
1352 async fn todo_write_returns_model_message_even_without_sink() {
1353 // Production paths sometimes call `execute` with `None` for
1354 // the sink (top-level tool runs that aren't streaming). Must
1355 // still succeed and return the formatted message.
1356 let (registry, _dir, _db, _sid) = registry_with_session().await;
1357 let result = registry
1358 .execute(
1359 "TodoWrite",
1360 r#"{"todos":[{"content":"X","status":"pending","priority":"low"}]}"#,
1361 None,
1362 None,
1363 )
1364 .await;
1365 assert!(result.success);
1366 assert!(result.output.contains("0/1 done"));
1367 }
1368
1369 #[tokio::test]
1370 async fn todo_write_rejects_two_in_progress_at_dispatch() {
1371 // Engine-enforced single-in-progress: must surface as a
1372 // failed ToolResult, not a successful one with a warning.
1373 // Models notice failures more reliably than warnings.
1374 let (registry, _dir, _db, _sid) = registry_with_session().await;
1375 let sink = crate::engine::sink::TestSink::new();
1376 let result = registry
1377 .execute(
1378 "TodoWrite",
1379 r#"{"todos":[
1380 {"content":"A","status":"in_progress","priority":"high"},
1381 {"content":"B","status":"in_progress","priority":"medium"}
1382 ]}"#,
1383 Some((&sink, "c1")),
1384 None,
1385 )
1386 .await;
1387 assert!(
1388 !result.success,
1389 "two in_progress must produce a failed ToolResult"
1390 );
1391 assert!(
1392 result.output.contains("Only one task"),
1393 "failure message must explain the rule: {}",
1394 result.output
1395 );
1396 assert_eq!(sink.len(), 0, "failed validation must not emit an event");
1397 }
1398}
1399
1400// ── Tool action descriptions ──────────────────────────────────
1401
1402/// Generate a human-readable description of a tool action for approval prompts.
1403pub fn describe_action(tool_name: &str, args: &serde_json::Value) -> String {
1404 match tool_name {
1405 "Bash" => {
1406 let cmd = args
1407 .get("command")
1408 .or(args.get("cmd"))
1409 .and_then(|v| v.as_str())
1410 .unwrap_or("?");
1411 let bg = args
1412 .get("background")
1413 .and_then(|v| v.as_bool())
1414 .unwrap_or(false);
1415 if bg {
1416 format!("[bg] {cmd}")
1417 } else {
1418 cmd.to_string()
1419 }
1420 }
1421 "Delete" => {
1422 let path = args
1423 .get("file_path")
1424 .or(args.get("path"))
1425 .and_then(|v| v.as_str())
1426 .unwrap_or("?");
1427 let recursive = args
1428 .get("recursive")
1429 .and_then(|v| v.as_bool())
1430 .unwrap_or(false);
1431 if recursive {
1432 format!("Delete directory (recursive): {path}")
1433 } else {
1434 format!("Delete: {path}")
1435 }
1436 }
1437 "Write" => {
1438 let path = args
1439 .get("path")
1440 .or(args.get("file_path"))
1441 .and_then(|v| v.as_str())
1442 .unwrap_or("?");
1443 let overwrite = args
1444 .get("overwrite")
1445 .and_then(|v| v.as_bool())
1446 .unwrap_or(false);
1447 if overwrite {
1448 format!("Overwrite file: {path}")
1449 } else {
1450 format!("Create file: {path}")
1451 }
1452 }
1453 "Edit" => {
1454 let path = if let Some(payload) = args.get("payload") {
1455 payload
1456 .get("file_path")
1457 .or(payload.get("path"))
1458 .and_then(|v| v.as_str())
1459 .unwrap_or("?")
1460 } else {
1461 args.get("file_path")
1462 .or(args.get("path"))
1463 .and_then(|v| v.as_str())
1464 .unwrap_or("?")
1465 };
1466 format!("Edit file: {path}")
1467 }
1468 "WebFetch" => {
1469 let url = args.get("url").and_then(|v| v.as_str()).unwrap_or("?");
1470 format!("Fetch URL: {url}")
1471 }
1472 "WebSearch" => {
1473 let q = args.get("query").and_then(|v| v.as_str()).unwrap_or("?");
1474 format!("Web search: {q}")
1475 }
1476 "TodoWrite" => {
1477 let n = args
1478 .get("todos")
1479 .and_then(|v| v.as_array())
1480 .map(|a| a.len())
1481 .unwrap_or(0);
1482 format!("Update todo list ({n} tasks)")
1483 }
1484 "MemoryWrite" => {
1485 let fact = args.get("fact").and_then(|v| v.as_str()).unwrap_or("?");
1486 let preview = if fact.len() > 60 {
1487 format!("{}…", &fact[..57])
1488 } else {
1489 fact.to_string()
1490 };
1491 format!("Save to memory: {preview}")
1492 }
1493 _ => format!("Execute: {tool_name}"),
1494 }
1495}
1496
1497#[cfg(test)]
1498mod describe_action_tests {
1499 use super::*;
1500 use serde_json::json;
1501
1502 #[test]
1503 fn test_describe_bash() {
1504 let desc = describe_action("Bash", &json!({"command": "cargo build"}));
1505 assert!(desc.contains("cargo build"));
1506 }
1507
1508 #[test]
1509 fn test_describe_delete() {
1510 let desc = describe_action("Delete", &json!({"file_path": "old.rs"}));
1511 assert!(desc.contains("old.rs"));
1512 }
1513
1514 #[test]
1515 fn test_describe_edit() {
1516 let desc = describe_action("Edit", &json!({"payload": {"file_path": "src/main.rs"}}));
1517 assert!(desc.contains("src/main.rs"));
1518 }
1519
1520 #[test]
1521 fn test_describe_write() {
1522 let desc = describe_action("Write", &json!({"path": "new.rs"}));
1523 assert!(desc.contains("Create file"));
1524 assert!(desc.contains("new.rs"));
1525 }
1526
1527 #[test]
1528 fn test_describe_write_overwrite() {
1529 let desc = describe_action("Write", &json!({"path": "x.rs", "overwrite": true}));
1530 assert!(desc.contains("Overwrite"));
1531 }
1532
1533 #[test]
1534 fn test_get_definitions_deny_list() {
1535 let registry = ToolRegistry::new(PathBuf::from("/tmp"), 128_000);
1536 let denied = vec![
1537 "Write".to_string(),
1538 "Edit".to_string(),
1539 "Delete".to_string(),
1540 ];
1541 let defs = registry.get_definitions(&[], &denied);
1542 let names: Vec<&str> = defs.iter().map(|d| d.name.as_str()).collect();
1543 assert!(!names.contains(&"Write"));
1544 assert!(!names.contains(&"Edit"));
1545 assert!(!names.contains(&"Delete"));
1546 assert!(names.contains(&"Read"));
1547 assert!(names.contains(&"Grep"));
1548 }
1549
1550 #[test]
1551 fn test_get_definitions_allow_list_wins_over_deny() {
1552 let registry = ToolRegistry::new(PathBuf::from("/tmp"), 128_000);
1553 let allowed = vec!["Read".to_string(), "Write".to_string()];
1554 let denied = vec!["Write".to_string()];
1555 // allow wins — Write should be present
1556 let defs = registry.get_definitions(&allowed, &denied);
1557 let names: Vec<&str> = defs.iter().map(|d| d.name.as_str()).collect();
1558 assert_eq!(names.len(), 2);
1559 assert!(names.contains(&"Read"));
1560 assert!(names.contains(&"Write"));
1561 }
1562
1563 #[test]
1564 fn test_get_definitions_both_empty_returns_all() {
1565 let registry = ToolRegistry::new(PathBuf::from("/tmp"), 128_000);
1566 let all = registry.get_definitions(&[], &[]);
1567 assert!(all.len() > 10, "Should have many tools");
1568 }
1569
1570 // ── Phase 5 PR-2 of #934: SandboxPolicy threading on ToolRegistry ──
1571 //
1572 // The Bash dispatch path now reads `self.sandbox_policy()` instead
1573 // of synthesizing `strict_default()` inline. These tests pin:
1574 // 1. The default seed is `strict_default()` so unchanged callers
1575 // preserve byte-for-byte behavior.
1576 // 2. `with_sandbox_policy` actually replaces the field (the
1577 // threading is real, not a stub).
1578 // 3. The accessor returns the most recent setter's value (no
1579 // caching/aliasing surprises).
1580
1581 #[test]
1582 fn registry_sandbox_policy_defaults_to_strict() {
1583 let registry = ToolRegistry::new(PathBuf::from("/tmp"), 128_000);
1584 assert_eq!(
1585 *registry.sandbox_policy(),
1586 koda_sandbox::SandboxPolicy::strict_default(),
1587 "PR-2 contract: ToolRegistry::new must seed strict_default() so \
1588 pre-PR callers see unchanged behavior"
1589 );
1590 }
1591
1592 #[test]
1593 fn with_sandbox_policy_overrides_the_default() {
1594 // Build a deliberately-non-default policy by mutating one field.
1595 // We don't care which field — only that round-tripping through
1596 // `with_sandbox_policy` preserves the override and the default
1597 // would not match.
1598 let mut custom = koda_sandbox::SandboxPolicy::strict_default();
1599 custom
1600 .fs
1601 .allow_write
1602 .push(koda_sandbox::PathPattern::new("/pr2-marker"));
1603
1604 let registry =
1605 ToolRegistry::new(PathBuf::from("/tmp"), 128_000).with_sandbox_policy(custom.clone());
1606
1607 assert_eq!(
1608 *registry.sandbox_policy(),
1609 custom,
1610 "with_sandbox_policy must replace the field, not no-op"
1611 );
1612 assert_ne!(
1613 *registry.sandbox_policy(),
1614 koda_sandbox::SandboxPolicy::strict_default(),
1615 "sanity: the override is observably different from the default"
1616 );
1617 }
1618}