Skip to main content

hematite/agent/
tool_registry.rs

1use crate::agent::inference::{tool_metadata_for_name, ToolDefinition, ToolFunction};
2use serde_json::Value;
3
4fn make_tool(name: &str, description: &str, parameters: Value) -> ToolDefinition {
5    ToolDefinition {
6        tool_type: "function".into(),
7        function: ToolFunction {
8            name: name.into(),
9            description: description.into(),
10            parameters,
11        },
12        metadata: tool_metadata_for_name(name),
13    }
14}
15
16/// Returns the full set of tools exposed to the model.
17pub fn get_tools() -> Vec<ToolDefinition> {
18    let os = std::env::consts::OS;
19    let mut tools = vec![
20        make_tool(
21            "shell",
22            &format!(
23                "Execute a command in the host shell ({os}). \
24                     Use this ONLY for building, testing, or advanced system operations that have no dedicated Hematite tool. \
25                     FORBIDDEN: Never use shell to run `mkdir`, `rm`, `cat`, `head`, `tail`, or `write-file` equivalents. \
26                     Use the dedicated surgical tools (create_directory, read_file, tail_file) instead. \
27                     Output is capped at 64KB. Prefer non-interactive commands."
28            ),
29            serde_json::json!({
30                "type": "object",
31                "properties": {
32                    "command": {
33                        "type": "string",
34                        "description": "The command to run"
35                    },
36                    "reason": {
37                        "type": "string",
38                        "description": "For risky shell calls, explain what this command is verifying or changing."
39                    },
40                    "timeout_secs": {
41                        "type": "integer",
42                        "description": "Optional timeout in seconds (default 60)"
43                    }
44                },
45                "required": ["command"]
46            }),
47        ),
48        make_tool(
49            "run_code",
50            "Execute a short JavaScript/TypeScript or Python snippet in a sandboxed subprocess. \
51             No network access, no filesystem escape, hard 10-second timeout. \
52             Use this to verify logic, test algorithms, compute values, or test functions \
53             when you need real output rather than a guess. \
54             ALWAYS include the `language` field — there is no default. \
55             \
56             JAVASCRIPT/TYPESCRIPT (language: \"javascript\"): \
57             Runs via Deno, NOT Node.js. `require()` does not exist — never use it. \
58             URL imports (e.g. from 'https://deno.land/...') are blocked — network is off. \
59             Use built-in Web APIs only: `crypto.subtle`, `TextEncoder`, `URL`, `atob`/`btoa`, etc. \
60             SHA-256 example: \
61               const buf = await crypto.subtle.digest('SHA-256', new TextEncoder().encode('hello')); \
62               console.log([...new Uint8Array(buf)].map(b=>b.toString(16).padStart(2,'0')).join('')); \
63             \
64             PYTHON (language: \"python\"): \
65             Standard library is available. `hashlib`, `json`, `math`, `datetime`, `re`, `itertools` all work. \
66             `subprocess`, `socket`, `urllib`, `requests` are blocked. \
67             SHA-256 example: import hashlib; print(hashlib.sha256(b'hello').hexdigest()) \
68             \
69             Do NOT use this tool for PowerShell or shell scripting. This is strictly for high-precision computation in JavaScript, TypeScript, or Python only. \
70             Do NOT fall back to shell to run deno, python, or node — use this tool directly.",
71            serde_json::json!({
72                "type": "object",
73                "properties": {
74                    "language": {
75                        "type": "string",
76                        "enum": ["javascript", "typescript", "python"],
77                        "description": "The language to run. javascript/typescript requires Deno; python requires Python 3."
78                    },
79                    "code": {
80                        "type": "string",
81                        "description": "The code to execute. Keep it short and self-contained. Print results to stdout."
82                    },
83                    "timeout_seconds": {
84                        "type": "integer",
85                        "description": "Max execution time in seconds (default 10, max 60). Use higher values for longer computations."
86                    }
87                },
88                "required": ["language", "code"]
89            }),
90        ),
91
92        make_tool(
93            "trace_runtime_flow",
94            "Return an authoritative read-only trace of Hematite runtime flow. \
95             Use this for architecture questions about keyboard input to final output, \
96             reasoning/specular separation, startup wiring, runtime subsystems, \
97             voice synthesis and Ctrl+T toggle, or \
98             session reset commands like /clear, /new, and /forget. Prefer this over guessing.",
99            serde_json::json!({
100                "type": "object",
101                "properties": {
102                    "topic": {
103                        "type": "string",
104                        "enum": ["user_turn", "session_reset", "reasoning_split", "runtime_subsystems", "startup", "voice"],
105                        "description": "Which verified runtime report to return. Use 'voice' for any question about Ctrl+T, voice toggle, or TTS pipeline. Use 'user_turn' for keyboard-to-output flow. Use 'session_reset' for /clear, /forget, /new. Use 'startup' for startup wiring. Use 'reasoning_split' for specular/thought routing. Use 'runtime_subsystems' for background subsystem overview."
106                    },
107                    "input": {
108                        "type": "string",
109                        "description": "Optional user input to label a normal user-turn trace"
110                    },
111                    "command": {
112                        "type": "string",
113                        "enum": ["/clear", "/new", "/forget", "all"],
114                        "description": "Optional reset command when topic=session_reset"
115                    }
116                },
117                "required": ["topic"]
118            }),
119        ),
120        make_tool(
121            "describe_toolchain",
122            "Return an authoritative read-only description of Hematite's actual tool surface and investigation strategy. \
123             Use this for tooling-discipline questions, best-tool selection, or read-only plans for tracing runtime behavior. \
124             Prefer this over improvising tool names or investigation steps from memory.",
125            serde_json::json!({
126                "type": "object",
127                "properties": {
128                    "topic": {
129                        "type": "string",
130                        "enum": ["read_only_codebase", "user_turn_plan", "voice_latency_plan", "host_inspection_plan", "all"],
131                        "description": "Which authoritative toolchain report to return"
132                    },
133                    "question": {
134                        "type": "string",
135                        "description": "Optional user question to label or tailor the read-only investigation plan"
136                    }
137                }
138            }),
139        ),
140        make_tool(
141            "inspect_host",
142            "Return a structured read-only inspection of the current machine and environment. \
143             Prefer this over raw shell for questions about OS configuration (firewall, power, uptime), plain-English system health reports, silicon health and high-fidelity hardware telemetry (NVIDIA clocks/fans/power, CPU frequency averaging), installed developer tools, PATH issues, package-manager and environment health, network state, service state, running processes, desktop items, Downloads size, listening ports, repo health, or directory/disk summaries. \
144             For high-performance hardware testing, use topic=disk_benchmark to measure real-time kernel disk queue intensity. \
145             For remediation questions phrased like 'how do I fix cargo not found', 'how do I fix port 3000 already in use', or 'how do I fix LM Studio not reachable', use topic=fix_plan instead of diagnosis-only topics like env_doctor, path, or ports. \
146             Use topic=summary for a compact host snapshot, topic=toolchains for common dev tool versions, topic=path for PATH analysis, topic=env_doctor for package-manager and PATH health, topic=fix_plan for structured remediation plans, topic=network for adapters/IPs/gateways/DNS, topic=services for service status and startup mode, \
147             topic=processes for top processes by memory/cpu and real-time disk/network I/O stats (look for [I/O R:N/W:N] tags to identify disk-heavy processes), \
148             topic=desktop or topic=downloads for known folders, topic=ports for listening endpoints, topic=repo_doctor for a structured workspace health report, \
149             topic=log_check for recent critical/error events from system event logs or journalctl, topic=startup_items for programs and services that run at boot (registry Run keys and startup folders on Windows; systemd enabled units on Linux), \
150             topic=health_report for a plain-English tiered system health verdict (disk, RAM, tools, recent errors), \
151             topic=storage for all drives with capacity/free space plus large developer cache directories, \
152             topic=hardware for CPU model/cores, RAM size/speed, GPU name/driver, motherboard, BIOS, and display configuration, \
153             topic=updates for Windows Update status (last install date, pending update count, WU service state), \
154             topic=security for Windows Defender real-time protection status, last scan date, signature age, firewall profile states, Windows activation, and UAC state, \
155             topic=pending_reboot to check whether a system restart is required and why (Windows Update, CBS, file rename operations), \
156             topic=disk_health for physical drive health via Get-PhysicalDisk and SMART failure prediction, \
157             topic=battery for charge level, status, estimated runtime, and wear level (laptops only — reports no battery on desktops), \
158             topic=recent_crashes for BSOD and unexpected shutdown events plus application crash/hang events from the Windows event log, \
159             topic=scheduled_tasks for all non-disabled scheduled tasks including name, path, last run time, and executable, \
160             topic=dev_conflicts for cross-tool environment conflict detection (Node.js version managers, Python 2 vs 3 ambiguity, conda env shadowing, Rust toolchain path conflicts, Git identity/signing config, duplicate PATH entries), \
161             topic=bitlocker for drive encryption status (BitLocker on Windows, LUKS on Linux), \
162             topic=ad_user for Active Directory / Managed Identity details (SID, group memberships, domain role), \
163             topic=user_accounts for Local User and Group diagnostics (Built-in Administrators, local account state), \
164             topic=rdp for Remote Desktop configuration, port, and active sessions, \
165             topic=shadow_copies for Volume Shadow Copies (VSS) and system restore points, \
166             topic=pagefile for Windows page file configuration and current usage, \
167             topic=windows_features for enabled Windows optional features (IIS, Hyper-V, etc.), \
168             topic=printers for installed printers and active print jobs, \
169             topic=winrm for Windows Remote Management (WinRM) and PS Remoting status, \
170             topic=network_stats for adapter throughput (RX/TX), errors, and dropped packets, \
171             topic=udp_ports for active UDP listeners and notable port annotations, \
172             topic=gpo for applied Group Policy Objects, topic=certificates for local personal certificates, topic=integrity for Windows component store health (SFC/DISM state), topic=domain for Active Directory and domain join status, \
173             topic=device_health for identifying malfunctioning hardware with ConfigManager error codes (Yellow Bangs), topic=drivers for auditing active system drivers and their states, topic=peripherals for enumerating connected USB, input, and display hardware, \
174             topic=sessions for auditing active and disconnected user logon sessions, \
175             topic=ad_user for specific Active Directory user identity, SID, and group membership auditing, \
176             topic=dns_lookup for precision DNS record queries (SRV, MX, TXT), \
177             topic=hyperv for local Hyper-V VM inventory and real-time load, \
178             topic=ip_config for detailed adapter configuration and DHCP lease state, \
179             topic=disk_benchmark for high-performance silicon-aware stress testing, \
180             and topic=directory or topic=disk for arbitrary paths.",
181            serde_json::json!({
182                "type": "object",
183                "properties": {
184                    "topic": {
185                        "type": "string",
186                        "enum": ["summary", "toolchains", "path", "env_doctor", "fix_plan", "network", "services", "processes", "desktop", "downloads", "directory", "disk", "ports", "repo_doctor", "log_check", "startup_items", "health_report", "storage", "hardware", "updates", "security", "pending_reboot", "disk_health", "battery", "recent_crashes", "scheduled_tasks", "dev_conflicts", "os_config", "bitlocker", "rdp", "shadow_copies", "pagefile", "windows_features", "printers", "winrm", "network_stats", "udp_ports", "gpo", "certificates", "integrity", "domain", "device_health", "drivers", "peripherals", "disk_benchmark", "permissions", "login_history", "registry_audit", "share_access", "thermal", "activation", "patch_history", "ad_user", "dns_lookup", "hyperv", "ip_config"],
187                        "description": "Which structured host inspection to run. Use topic=ad_user for domain identity audit, topic=dns_lookup for SRV/MX records, topic=hyperv for VM load, and topic=ip_config for detailed adapter info."
188                    },
189                    "name": {
190                        "type": "string",
191                        "description": "Optional when topic=processes or topic=services. Case-insensitive substring filter for process or service names."
192                    },
193                    "issue": {
194                        "type": "string",
195                        "description": "Optional when topic=fix_plan. Plain-English issue description such as 'cargo not found', 'port 3000 already in use', or 'LM Studio not reachable on localhost:1234'."
196                    },
197                    "path": {
198                        "type": "string",
199                        "description": "Required when topic=directory. Optional for topic=disk or topic=repo_doctor. Absolute or relative path to inspect."
200                    },
201                    "port": {
202                        "type": "integer",
203                        "description": "Optional when topic=ports or topic=fix_plan. Filter the result to one listening TCP port or anchor a port-conflict fix plan."
204                    },
205                    "max_entries": {
206                        "type": "integer",
207                        "description": "Optional cap for listed entries. Defaults to 10 and is capped internally."
208                    }
209                }
210            }),
211        ),
212        make_tool(
213            "resolve_host_issue",
214            "A safe, bounded tool for remediating OS and environment issues automatically with user approval. \
215             Use this to fix missing dependencies, restart stuck services, or clear disk space instead of using raw shell. \
216             The user will be prompted to approve the action. Keep targets exact.",
217            serde_json::json!({
218                "type": "object",
219                "properties": {
220                    "action": {
221                        "type": "string",
222                        "enum": ["install_package", "restart_service", "clear_temp"],
223                        "description": "The type of remediation to perform."
224                    },
225                    "target": {
226                        "type": "string",
227                        "description": "The specific target (e.g., 'python' for install_package, or 'docker' for restart_service). Optional for clear_temp."
228                    }
229                },
230                "required": ["action"]
231            }),
232        ),
233        make_tool(
234            "run_hematite_maintainer_workflow",
235            "Run one of Hematite's known maintainer or release workflows with explicit approval. \
236             Prefer this over raw shell when the user explicitly asks to run one of Hematite's own scripts such as `clean.ps1`, `scripts/package-windows.ps1`, or `release.ps1`. \
237             Use workflow=clean for cleanup, workflow=package_windows for rebuilding the local Windows portable or installer, and workflow=release for the normal version bump/tag/push/publish flow. \
238             Keep this tool constrained to Hematite's own known workflows instead of inventing ad hoc shell commands or pretending to run arbitrary project scripts.",
239            serde_json::json!({
240                "type": "object",
241                "properties": {
242                    "workflow": {
243                        "type": "string",
244                        "enum": ["clean", "package_windows", "release"],
245                        "description": "Which known Hematite maintainer workflow to run."
246                    },
247                    "deep": {
248                        "type": "boolean",
249                        "description": "For workflow=clean. Also remove heavy build/runtime artifacts such as target/ and vein.db."
250                    },
251                    "reset": {
252                        "type": "boolean",
253                        "description": "For workflow=clean. Reset PLAN/TASK state in addition to normal cleanup."
254                    },
255                    "prune_dist": {
256                        "type": "boolean",
257                        "description": "For workflow=clean. Keep only the current Cargo.toml version under dist/."
258                    },
259                    "installer": {
260                        "type": "boolean",
261                        "description": "For workflow=package_windows. Also build the Windows installer."
262                    },
263                    "add_to_path": {
264                        "type": "boolean",
265                        "description": "For workflow=package_windows or workflow=release. Update the user PATH to the rebuilt portable."
266                    },
267                    "version": {
268                        "type": "string",
269                        "description": "For workflow=release. Exact semantic version such as 0.4.5."
270                    },
271                    "bump": {
272                        "type": "string",
273                        "enum": ["patch", "minor", "major"],
274                        "description": "For workflow=release. Ask release.ps1 to calculate the next version."
275                    },
276                    "push": {
277                        "type": "boolean",
278                        "description": "For workflow=release. Push main and the new tag."
279                    },
280                    "skip_installer": {
281                        "type": "boolean",
282                        "description": "For workflow=release. Skip the Windows installer build."
283                    },
284                    "publish_crates": {
285                        "type": "boolean",
286                        "description": "For workflow=release. Publish hematite-cli to crates.io after a successful push."
287                    },
288                    "publish_voice_crate": {
289                        "type": "boolean",
290                        "description": "For workflow=release. Publish hematite-kokoros first, then hematite-cli."
291                    }
292                },
293                "required": ["workflow"]
294            }),
295        ),
296        make_tool(
297            "run_workspace_workflow",
298            "Run an approval-gated workflow or script in the locked project workspace root. \
299             Use this for the current project's build, test, lint, fix, package.json scripts, just/task/make targets, explicit local script paths, or an exact workspace command. \
300             FORBIDDEN: The `command` field MUST be a real executable shell command (e.g. `npm install`, `cargo build`). \
301             NEVER put natural language, user-requests, or conversational intent into the `command` field. \
302             This tool is for the active workspace, not for Hematite's own maintainer scripts.",
303            serde_json::json!({
304                "type": "object",
305                "properties": {
306                    "workflow": {
307                        "type": "string",
308                        "enum": ["build", "test", "lint", "fix", "package_script", "task", "just", "make", "script_path", "command"],
309                        "description": "Which workspace workflow to run."
310                    },
311                    "name": {
312                        "type": "string",
313                        "description": "Required for workflow=package_script, task, just, or make. The script or target name."
314                    },
315                    "path": {
316                        "type": "string",
317                        "description": "Required for workflow=script_path. Relative path to a script inside the locked workspace root."
318                    },
319                    "command": {
320                        "type": "string",
321                        "description": "Required for workflow=command. Exact command to execute from the locked workspace root."
322                    },
323                    "timeout_ms": {
324                        "type": "integer",
325                        "description": "Optional timeout override in milliseconds."
326                    }
327                },
328                "required": ["workflow"]
329            }),
330        ),
331        make_tool(
332            "read_file",
333            "Read the contents of a file. For large files, use 'offset' and 'limit' to navigate.",
334            serde_json::json!({
335                "type": "object",
336                "properties": {
337                    "path": {
338                        "type": "string",
339                        "description": "Path to the file, relative to the project root"
340                    },
341                    "offset": {
342                        "type": "integer",
343                        "description": "Starting line number (0-indexed)"
344                    },
345                    "limit": {
346                        "type": "integer",
347                        "description": "Number of lines to read"
348                    }
349                },
350                "required": ["path"]
351            }),
352        ),
353        make_tool(
354            "lsp_definitions",
355            "Get the precise definition location (file:line:char) for a symbol at a specific position. \
356             Use this to jump to function/struct source code accurately.",
357            serde_json::json!({
358                "type": "object",
359                "properties": {
360                    "path": { "type": "string", "description": "File path" },
361                    "line": { "type": "integer", "description": "0-indexed line" },
362                    "character": { "type": "integer", "description": "0-indexed character" }
363                },
364                "required": ["path", "line", "character"]
365            }),
366        ),
367        make_tool(
368            "lsp_references",
369            "Find all locations where a symbol is used across the entire workspace. \
370             Use this to understand the impact of a refactor or discover internal API users.",
371            serde_json::json!({
372                "type": "object",
373                "properties": {
374                    "path": { "type": "string", "description": "File path" },
375                    "line": { "type": "integer", "description": "0-indexed line" },
376                    "character": { "type": "integer", "description": "0-indexed character" }
377                },
378                "required": ["path", "line", "character"]
379            }),
380        ),
381        make_tool(
382            "lsp_hover",
383            "Get hover information (documentation, function signature, type details) for a symbol. \
384             Use this for rapid spatial awareness without opening every file.",
385            serde_json::json!({
386                "type": "object",
387                "properties": {
388                    "path": { "type": "string", "description": "File path" },
389                    "line": { "type": "integer", "description": "0-indexed line" },
390                    "character": { "type": "integer", "description": "0-indexed character" }
391                },
392                "required": ["path", "line", "character"]
393            }),
394        ),
395        make_tool(
396            "lsp_rename_symbol",
397            "Rename a symbol project-wide using the Language Server. Ensures all references are updated safely.",
398            serde_json::json!({
399                "type": "object",
400                "properties": {
401                    "path": { "type": "string", "description": "File path" },
402                    "line": { "type": "integer", "description": "0-indexed line" },
403                    "character": { "type": "integer", "description": "0-indexed character" },
404                    "new_name": { "type": "string", "description": "The new name for the symbol" }
405                },
406                "required": ["path", "line", "character", "new_name"]
407            }),
408        ),
409        make_tool(
410            "lsp_get_diagnostics",
411            "Get a list of current compiler errors and warnings for a specific file. \
412             Use this to verify your code compiles and and to find exactly where errors are located.",
413            serde_json::json!({
414                "type": "object",
415                "properties": {
416                    "path": { "type": "string", "description": "File path" }
417                },
418                "required": ["path"]
419            }),
420        ),
421        make_tool(
422            "vision_analyze",
423            "Send an image file (screenshot, diagram, or UI mockup) to the multimodal vision model for technical analysis. \
424             Use this to identify UI bugs, confirm visual states, or understand architectural diagrams.",
425            serde_json::json!({
426                "type": "object",
427                "properties": {
428                    "path": { "type": "string", "description": "Absolute or relative path to the image file." },
429                    "prompt": { "type": "string", "description": "The specific question or analysis request for the vision model." }
430                },
431                "required": ["path", "prompt"]
432            }),
433        ),
434        make_tool(
435            "patch_hunk",
436            "Replace a specific line range [start_line, end_line] with new content. \
437             This is the most precise way to edit code and avoids search string failures.",
438            serde_json::json!({
439                "type": "object",
440                "properties": {
441                    "path": { "type": "string", "description": "File path" },
442                    "start_line": { "type": "integer", "description": "Starting line (1-indexed)" },
443                    "end_line": { "type": "integer", "description": "Ending line (inclusive)" },
444                    "replacement": { "type": "string", "description": "The new content for this range" }
445                },
446                "required": ["path", "start_line", "end_line", "replacement"]
447            }),
448        ),
449        make_tool(
450            "multi_search_replace",
451            "Replace multiple existing code blocks in a single file with new content. \
452             Each hunk specifies an EXACT 'search' string and a 'replace' string. \
453             The 'search' string MUST exactly match the existing file contents (including whitespace). \
454             This is the safest and most reliable way to make multiple structural edits.",
455            serde_json::json!({
456                "type": "object",
457                "properties": {
458                    "path": { "type": "string", "description": "File path" },
459                    "hunks": {
460                        "type": "array",
461                        "items": {
462                            "type": "object",
463                            "properties": {
464                                "search": { "type": "string", "description": "Exact existing text to find and replace" },
465                                "replace": { "type": "string", "description": "The new replacement text" }
466                            },
467                            "required": ["search", "replace"]
468                        }
469                    }
470                },
471                "required": ["path", "hunks"]
472            }),
473        ),
474        make_tool(
475            "write_file",
476            "Write content to a file, creating it (and any parent dirs) if needed. \
477             Overwrites existing files. \
478             SOVEREIGN PATHING: For files in common areas, use `@DESKTOP/file.txt`, `@DOCUMENTS/file.txt`, `@DOWNLOADS/file.txt`, or `@HOME/file.txt` to ensure 100% path accuracy.",
479            serde_json::json!({
480                "type": "object",
481                "properties": {
482                    "path": { "type": "string", "description": "File path" },
483                    "content": { "type": "string", "description": "Full file content to write" }
484                },
485                "required": ["path", "content"]
486            }),
487        ),
488        make_tool(
489            "create_directory",
490            "Authoritatively create a new directory (and any parent dirs) if they do not exist. \
491             Use this instead of raw shell (mkdir) for all filesystem organization. \
492             Supports both relative paths and absolute paths. \
493             SOVEREIGN PATHING: For directories in common areas, use `@DESKTOP/folder`, `@DOCUMENTS/folder`, `@DOWNLOADS/folder`, or `@HOME/folder` to ensure 100% path accuracy.",
494            serde_json::json!({
495                "type": "object",
496                "properties": {
497                    "path": { "type": "string", "description": "Relative or absolute directory path" }
498                },
499                "required": ["path"]
500            }),
501        ),
502        make_tool(
503            "research_web",
504            "Perform a zero-cost technical search using DuckDuckGo. \
505             Use this to find documentation, latest API changes, or solutions to complex errors \
506             when your internal knowledge is insufficient. Returns snippets and URLs.",
507            serde_json::json!({
508                "type": "object",
509                "properties": {
510                    "query": { "type": "string", "description": "The technical search query" }
511                },
512                "required": ["query"]
513            }),
514        ),
515        make_tool(
516            "fetch_docs",
517            "Fetch a URL and convert it to clean Markdown. Use this to 'read' the documentation \
518             links found via research_web. This tool uses a proxy to bypass IP blocks.",
519            serde_json::json!({
520                "type": "object",
521                "properties": {
522                    "url": { "type": "string", "description": "The URL of the documentation to fetch" }
523                },
524                "required": ["url"]
525            }),
526        ),
527        make_tool(
528            "edit_file",
529            "Edit a file by replacing an exact string with another. \
530             The 'search' string does NOT need perfectly matching indentation (it is fuzzy), \
531             but the non-whitespace text must match exactly. Use this for targeted edits.",
532            serde_json::json!({
533                "type": "object",
534                "properties": {
535                    "path": { "type": "string", "description": "File path" },
536                    "search": {
537                        "type": "string",
538                        "description": "The exact text to find (must match whitespace/indentation precisely)"
539                    },
540                    "replace": {
541                        "type": "string",
542                        "description": "The replacement text"
543                    }
544                },
545                "required": ["path", "search", "replace"]
546            }),
547        ),
548        make_tool(
549            "auto_pin_context",
550            "Select 1-3 core files to 'Lock' into prioritized memory. \
551             Use this to ensure the most important architecture files \
552             are always visible during complex refactorings.",
553            serde_json::json!({
554                "type": "object",
555                "properties": {
556                    "paths": {
557                        "type": "array",
558                        "items": { "type": "string" }
559                    },
560                    "reason": { "type": "string" }
561                },
562                "required": ["paths", "reason"]
563            }),
564        ),
565        make_tool(
566            "list_pinned",
567            "List all files currently pinned in the model's active context.",
568            serde_json::json!({
569                "type": "object",
570                "properties": {}
571            }),
572        ),
573        make_tool(
574            "list_files",
575            "List files in a directory, optionally filtered by extension.",
576            serde_json::json!({
577                "type": "object",
578                "properties": {
579                    "path": {
580                        "type": "string",
581                        "description": "Directory to list (default: current dir)"
582                    },
583                    "extension": {
584                        "type": "string",
585                        "description": "Only return files with this extension, e.g. 'rs', 'toml' (no dot)"
586                    }
587                },
588                "required": []
589            }),
590        ),
591        make_tool(
592            "tail_file",
593            "Read the last N lines of a file — useful for log files, test output, \
594             build artifacts, and any large file where only the tail is relevant. \
595             Supports an optional grep filter to show only matching lines from the tail. \
596             Use this instead of read_file when you only need the end of a large file.",
597            serde_json::json!({
598                "type": "object",
599                "properties": {
600                    "path": {
601                        "type": "string",
602                        "description": "Path to the file, relative to the project root"
603                    },
604                    "lines": {
605                        "type": "integer",
606                        "description": "Number of lines to return from the end (default: 50, max: 500)"
607                    },
608                    "grep": {
609                        "type": "string",
610                        "description": "Optional regex pattern — only return lines matching this pattern (applied before the tail slice)"
611                    }
612                },
613                "required": ["path"]
614            }),
615        ),
616        make_tool(
617            "grep_files",
618            "Search file contents for a regex pattern. Supports context lines, files-only mode, \
619             and pagination. Returns file:line:content format by default.",
620            serde_json::json!({
621                "type": "object",
622                "properties": {
623                    "pattern": {
624                        "type": "string",
625                        "description": "Regex pattern to search for (case-insensitive by default)"
626                    },
627                    "path": {
628                        "type": "string",
629                        "description": "Directory to search (default: current dir)"
630                    },
631                    "extension": {
632                        "type": "string",
633                        "description": "Only search files with this extension, e.g. 'rs'"
634                    },
635                    "mode": {
636                        "type": "string",
637                        "enum": ["content", "files_only"],
638                        "description": "'content' (default) returns matching lines; 'files_only' returns only filenames"
639                    },
640                    "context": {
641                        "type": "integer",
642                        "description": "Lines of context before AND after each match (like rg -C)"
643                    },
644                    "before": {
645                        "type": "integer",
646                        "description": "Lines of context before each match (overrides context)"
647                    },
648                    "after": {
649                        "type": "integer",
650                        "description": "Lines of context after each match (overrides context)"
651                    },
652                    "head_limit": {
653                        "type": "integer",
654                        "description": "Max hunks (or files in files_only) to return (default: 50)"
655                    },
656                    "offset": {
657                        "type": "integer",
658                        "description": "Skip first N hunks/files - for pagination (default: 0)"
659                    }
660                },
661                "required": ["pattern"]
662            }),
663        ),
664        make_tool(
665            "git_commit",
666            "Stage all changes (git add -A) and create a commit. You MUST use 'Conventional Commits' (e.g. 'feat: description').",
667            serde_json::json!({
668                "type": "object",
669                "properties": {
670                    "message": { "type": "string", "description": "Commit message (Conventional Commit style)" }
671                },
672                "required": ["message"]
673            }),
674        ),
675        make_tool(
676            "git_push",
677            "Push current branched changes to the remote origin. Requires an existing remote connection.",
678            serde_json::json!({
679                "type": "object",
680                "properties": {},
681                "required": []
682            }),
683        ),
684        make_tool(
685            "git_remote",
686            "View or manage git remotes. Use this for onboarding to GitHub/GitLab services.",
687            serde_json::json!({
688                "type": "object",
689                "properties": {
690                    "action": {
691                        "type": "string",
692                        "enum": ["list", "add", "remove"],
693                        "description": "Operation to perform"
694                    },
695                    "name": { "type": "string", "description": "Remote name (e.g. origin)" },
696                    "url": { "type": "string", "description": "Remote URL (for 'add' action)" }
697                },
698                "required": ["action"]
699            }),
700        ),
701        make_tool(
702            "git_onboarding",
703            "High-level wizard to connect this repository to a remote host (GitHub/GitLab). \
704             Handles adding the remote and performing the initial tracking push in one step.",
705            serde_json::json!({
706                "type": "object",
707                "properties": {
708                    "url": { "type": "string", "description": "The remote repository URL (HTTPS or SSH)" },
709                    "name": { "type": "string", "description": "The remote name (default: origin)" },
710                    "push": { "type": "boolean", "description": "Whether to perform an initial push to establish tracking (default: false)" }
711                },
712                "required": ["url"]
713            }),
714        ),
715        make_tool(
716            "verify_build",
717            "Run project verification for build, test, lint, or fix workflows. \
718             Prefer per-project verify profiles from `.hematite/settings.json`, and fall back to \
719             auto-detected defaults when no profile is configured. Returns BUILD OK or BUILD FAILED \
720             with command output. ALWAYS call this after scaffolding a new project or making structural changes.",
721            serde_json::json!({
722                "type": "object",
723                "properties": {
724                    "action": {
725                        "type": "string",
726                        "enum": ["build", "test", "lint", "fix"],
727                        "description": "Which verification action to run. Defaults to build."
728                    },
729                    "profile": {
730                        "type": "string",
731                        "description": "Optional named verify profile from `.hematite/settings.json`."
732                    },
733                    "timeout_secs": {
734                        "type": "integer",
735                        "description": "Optional timeout override for this verification run."
736                    }
737                }
738            }),
739        ),
740        make_tool(
741            "git_worktree",
742            "Manage Git worktrees - isolated working directories on separate branches. \
743             Use 'add' to create a safe sandbox for risky/experimental work, \
744             'list' to see all worktrees, 'remove' to clean up, 'prune' to remove stale entries.",
745            serde_json::json!({
746                "type": "object",
747                "properties": {
748                    "action": {
749                        "type": "string",
750                        "enum": ["list", "add", "remove", "prune"],
751                        "description": "Worktree operation to perform"
752                    },
753                    "path": {
754                        "type": "string",
755                        "description": "Directory path for the new worktree (required for add/remove)"
756                    },
757                    "branch": {
758                        "type": "string",
759                        "description": "Branch name for the worktree (add only; defaults to path basename)"
760                    }
761                },
762                "required": ["action"]
763            }),
764        ),
765        make_tool(
766            "clarify",
767            "Ask the user a clarifying question when you genuinely cannot proceed without \
768             more information. Use this ONLY when you are blocked and cannot make a \
769             reasonable assumption. Do NOT use it to ask permission - just act.",
770            serde_json::json!({
771                "type": "object",
772                "properties": {
773                    "question": {
774                        "type": "string",
775                        "description": "The specific question to ask the user"
776                    }
777                },
778                "required": ["question"]
779            }),
780        ),
781        make_tool(
782            "manage_tasks",
783            "Manage the persistent task ledger in .hematite/TASK.md. Use this to track long-term goals across restarts.",
784            crate::tools::tasks::get_tasks_params(),
785        ),
786        make_tool(
787            "maintain_plan",
788            "Document the architectural strategy and session blueprint in .hematite/PLAN.md. Use this to maintain context across restarts.",
789            crate::tools::plan::get_plan_params(),
790        ),
791        make_tool(
792            "generate_walkthrough",
793            "Generate a final session report in .hematite/WALKTHROUGH.md including achievements and verification results.",
794            crate::tools::plan::get_walkthrough_params(),
795        ),
796        make_tool(
797            "swarm",
798            "Delegate high-volume parallel tasks to a swarm of background workers. \
799             Use this for large-scale refactors, multi-file research, or parallel documentation updates. \
800             You must provide a 'tasks' array where each task has an 'id', 'target' (file), and 'instruction'.",
801            serde_json::json!({
802                "type": "object",
803                "properties": {
804                    "tasks": {
805                        "type": "array",
806                        "items": {
807                            "type": "object",
808                            "properties": {
809                                "id": { "type": "string" },
810                                "target": { "type": "string", "description": "Target file or directory" },
811                                "instruction": { "type": "string", "description": "Specific task for this worker" }
812                            },
813                            "required": ["id", "target", "instruction"]
814                        }
815                    },
816                    "max_workers": {
817                        "type": "integer",
818                        "description": "Max parallel workers (default 3, auto-throttled by hardware)",
819                        "default": 3
820                    }
821                },
822                "required": ["tasks"]
823            }),
824        ),
825    ];
826
827    let lsp_defs = crate::tools::lsp_tools::get_lsp_definitions();
828    tools.push(make_tool(
829        "lsp_search_symbol",
830        "Find the location (file/line) of any function, struct, or variable in the entire project workspace. \
831         This is the fastest 'Golden Path' for navigating to a symbol by name.",
832        serde_json::json!({
833            "type": "object",
834            "properties": {
835                "query": { "type": "string", "description": "The name of the symbol to find (e.g. 'initialize_mcp')" }
836            },
837            "required": ["query"]
838        }),
839    ));
840    for def in lsp_defs {
841        let name = def["name"].as_str().unwrap();
842        tools.push(ToolDefinition {
843            tool_type: "function".into(),
844            function: ToolFunction {
845                name: name.into(),
846                description: def["description"].as_str().unwrap().into(),
847                parameters: def["parameters"].clone(),
848            },
849            metadata: tool_metadata_for_name(name),
850        });
851    }
852
853    tools
854}
855
856pub async fn dispatch_builtin_tool(name: &str, args: &Value) -> Result<String, String> {
857    match name {
858        "shell" => crate::tools::shell::execute(args).await,
859        "run_code" => crate::tools::code_sandbox::execute(args).await,
860        "trace_runtime_flow" => crate::tools::runtime_trace::trace_runtime_flow(args).await,
861        "describe_toolchain" => crate::tools::toolchain::describe_toolchain(args).await,
862        "inspect_host" => crate::tools::host_inspect::inspect_host(args).await,
863        "resolve_host_issue" => crate::tools::host_inspect::resolve_host_issue(args).await,
864        "run_hematite_maintainer_workflow" => {
865            crate::tools::repo_script::run_hematite_maintainer_workflow(args).await
866        }
867        "run_workspace_workflow" => crate::tools::workspace_workflow::run_workspace_workflow(args).await,
868        "read_file" => crate::tools::file_ops::read_file(args).await,
869        "inspect_lines" => crate::tools::file_ops::inspect_lines(args).await,
870        "tail_file" => crate::tools::file_ops::tail_file(args).await,
871        "write_file" => crate::tools::file_ops::write_file(args).await,
872        "create_directory" => crate::tools::file_ops::create_directory(args).await,
873        "edit_file" => crate::tools::file_ops::edit_file(args).await,
874        "patch_hunk" => crate::tools::file_ops::patch_hunk(args).await,
875        "multi_search_replace" => crate::tools::file_ops::multi_search_replace(args).await,
876        "list_files" => crate::tools::file_ops::list_files(args).await,
877        "grep_files" => crate::tools::file_ops::grep_files(args).await,
878        "git_commit" => crate::tools::git::execute(args).await,
879        "git_push" => crate::tools::git::execute_push(args).await,
880        "git_remote" => crate::tools::git::execute_remote(args).await,
881        "git_onboarding" => crate::tools::git_onboarding::execute(args).await,
882        "verify_build" => crate::tools::verify_build::execute(args).await,
883        "git_worktree" => crate::tools::git::execute_worktree(args).await,
884        "health" => crate::tools::health::execute(args).await,
885        "research_web" => crate::tools::research::execute_search(args).await,
886        "fetch_docs" => crate::tools::research::execute_fetch(args).await,
887        "manage_tasks" => crate::tools::tasks::manage_tasks(args).await,
888        "maintain_plan" => crate::tools::plan::maintain_plan(args).await,
889        "generate_walkthrough" => crate::tools::plan::generate_walkthrough(args).await,
890        "clarify" => {
891            let q = args.get("question").and_then(|v| v.as_str()).unwrap_or("?");
892            Ok(format!("[clarify] {q}"))
893        }
894        "vision_analyze" => Err(
895            "Tool 'vision_analyze' must be dispatched by ConversationManager (it requires hardware engine access)."
896                .into(),
897        ),
898        other => {
899            if other.contains('.') || other.contains('/') || other.contains('\\') {
900                Err(format!(
901                    "'{}' is a PATH, not a tool. You correctly identified the location, but you MUST use `read_file` or `list_files` (internal) or `powershell` (external) to access it.",
902                    other
903                ))
904            } else if matches!(other.to_lowercase().as_str(), "hematite" | "assistant" | "ai") {
905                Err(format!(
906                    "'{}' is YOUR IDENTITY, not a tool. Use list_files or read_file to explore the codebase.",
907                    other
908                ))
909            } else if matches!(
910                other.to_lowercase().as_str(),
911                "thought" | "think" | "reasoning" | "thinking" | "internal"
912            ) {
913                Err(format!(
914                    "'{}' is NOT a tool - it is a reasoning tag. Output your answer as plain text after your <think> block.",
915                    other
916                ))
917            } else {
918                Err(format!("Unknown tool: '{}'", other))
919            }
920        }
921    }
922}
923
924pub fn get_mutation_label(name: &str, args: &Value) -> Option<String> {
925    match name {
926        "shell" => {
927            let cmd = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
928            if cmd.contains("rm ") || cmd.contains("del ") {
929                Some("Destructive File Deletion".into())
930            } else if cmd.contains("mkdir ") {
931                Some("Directory Creation".into())
932            } else {
933                Some("Execute Shell Command".into())
934            }
935        }
936        "write_file" => {
937            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
938            Some(format!("Create/Overwrite File: {}", path))
939        }
940        "create_directory" => {
941            let path = args
942                .get("path")
943                .and_then(|v| v.as_str())
944                .unwrap_or("folder");
945            Some(format!("Create Directory: {}", path))
946        }
947        "edit_file" | "patch_hunk" | "multi_search_replace" => {
948            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
949            Some(format!("Surgical Code Mutation: {}", path))
950        }
951        "git_commit" => Some("Permanent Version History Commit".into()),
952        "git_push" => Some("Remote Origin Synchronisation (Push)".into()),
953        "resolve_host_issue" => Some("System-Level Host Remediation".into()),
954        "run_workspace_workflow" => Some("Automated Workspace Re-alignment".into()),
955        _ => None,
956    }
957}