Skip to main content

hematite/agent/
tool_registry.rs

1use crate::agent::config::HematiteConfig;
2use crate::agent::inference::tool_metadata_for_name;
3use crate::agent::types::{ToolDefinition, ToolFunction};
4use serde_json::Value;
5
6fn make_tool(name: &str, description: &str, parameters: Value) -> ToolDefinition {
7    ToolDefinition {
8        tool_type: "function".into(),
9        function: ToolFunction {
10            name: name.into(),
11            description: description.into(),
12            parameters,
13        },
14        metadata: tool_metadata_for_name(name),
15    }
16}
17
18/// Returns the full set of tools exposed to the model.
19pub fn get_tools() -> Vec<ToolDefinition> {
20    let os = std::env::consts::OS;
21    let mut tools = vec![
22        make_tool(
23            "shell",
24            &format!(
25                "Execute a command in the host shell ({os}). \
26                     Use this ONLY for building, testing, or advanced system operations that have no dedicated Hematite tool. \
27                     FORBIDDEN: Never use shell to run `mkdir`, `rm`, `cat`, `head`, `tail`, or `write-file` equivalents. \
28                     Use the dedicated surgical tools (create_directory, read_file, tail_file) instead. \
29                     Output is capped at 64KB. Prefer non-interactive commands."
30            ),
31            serde_json::json!({
32                "type": "object",
33                "properties": {
34                    "command": {
35                        "type": "string",
36                        "description": "The command to run"
37                    },
38                    "reason": {
39                        "type": "string",
40                        "description": "For risky shell calls, explain what this command is verifying or changing."
41                    },
42                    "timeout_secs": {
43                        "type": "integer",
44                        "description": "Optional timeout in seconds (default 60)"
45                    }
46                },
47                "required": ["command"]
48            }),
49        ),
50        make_tool(
51            "run_code",
52            "Execute a short JavaScript/TypeScript or Python snippet in a sandboxed subprocess. \
53             No network access, no filesystem escape, hard 10-second timeout. \
54             Use this to verify logic, test algorithms, compute values, or test functions \
55             when you need real output rather than a guess. \
56             ALWAYS include the `language` field — there is no default. \
57             \
58             JAVASCRIPT/TYPESCRIPT (language: \"javascript\"): \
59             Runs via Deno, NOT Node.js. `require()` does not exist — never use it. \
60             URL imports (e.g. from 'https://deno.land/...') are blocked — network is off. \
61             Use built-in Web APIs only: `crypto.subtle`, `TextEncoder`, `URL`, `atob`/`btoa`, etc. \
62             SHA-256 example: \
63               const buf = await crypto.subtle.digest('SHA-256', new TextEncoder().encode('hello')); \
64               console.log([...new Uint8Array(buf)].map(b=>b.toString(16).padStart(2,'0')).join('')); \
65             \
66             PYTHON (language: \"python\"): \
67             Standard library is available. `hashlib`, `json`, `math`, `datetime`, `re`, `itertools` all work. \
68             `subprocess`, `socket`, `urllib`, `requests` are blocked. \
69             SHA-256 example: import hashlib; print(hashlib.sha256(b'hello').hexdigest()) \
70             \
71             Do NOT use this tool for PowerShell or shell scripting. This is strictly for high-precision computation in JavaScript, TypeScript, or Python only. \
72             Do NOT fall back to shell to run deno, python, or node — use this tool directly.",
73            serde_json::json!({
74                "type": "object",
75                "properties": {
76                    "language": {
77                        "type": "string",
78                        "enum": ["javascript", "typescript", "python"],
79                        "description": "The language to run. javascript/typescript requires Deno; python requires Python 3."
80                    },
81                    "code": {
82                        "type": "string",
83                        "description": "The code to execute. Keep it short and self-contained. Print results to stdout."
84                    },
85                    "timeout_seconds": {
86                        "type": "integer",
87                        "description": "Max execution time in seconds (default 10, max 60). Use higher values for longer computations."
88                    }
89                },
90                "required": ["language", "code"]
91            }),
92        ),
93
94        make_tool(
95            "trace_runtime_flow",
96            "Return an authoritative read-only trace of Hematite runtime flow. \
97             Use this for architecture questions about keyboard input to final output, \
98             reasoning/specular separation, startup wiring, runtime subsystems, \
99             voice synthesis and Ctrl+T toggle, or \
100             session reset commands like /clear, /new, and /forget. Prefer this over guessing.",
101            serde_json::json!({
102                "type": "object",
103                "properties": {
104                    "topic": {
105                        "type": "string",
106                        "enum": ["user_turn", "session_reset", "reasoning_split", "runtime_subsystems", "startup", "voice"],
107                        "description": "Which verified runtime report to return. Use 'voice' for any question about Ctrl+T, voice toggle, or TTS pipeline. Use 'user_turn' for keyboard-to-output flow. Use 'session_reset' for /clear, /forget, /new. Use 'startup' for startup wiring. Use 'reasoning_split' for specular/thought routing. Use 'runtime_subsystems' for background subsystem overview."
108                    },
109                    "input": {
110                        "type": "string",
111                        "description": "Optional user input to label a normal user-turn trace"
112                    },
113                    "command": {
114                        "type": "string",
115                        "enum": ["/clear", "/new", "/forget", "all"],
116                        "description": "Optional reset command when topic=session_reset"
117                    }
118                },
119                "required": ["topic"]
120            }),
121        ),
122        make_tool(
123            "describe_toolchain",
124            "Return an authoritative read-only description of Hematite's actual tool surface and investigation strategy. \
125             Use this for tooling-discipline questions, best-tool selection, or read-only plans for tracing runtime behavior. \
126             Prefer this over improvising tool names or investigation steps from memory.",
127            serde_json::json!({
128                "type": "object",
129                "properties": {
130                    "topic": {
131                        "type": "string",
132                        "enum": ["read_only_codebase", "user_turn_plan", "voice_latency_plan", "host_inspection_plan", "all"],
133                        "description": "Which authoritative toolchain report to return"
134                    },
135                    "question": {
136                        "type": "string",
137                        "description": "Optional user question to label or tailor the read-only investigation plan"
138                    }
139                }
140            }),
141        ),
142        make_tool(
143            "inspect_host",
144            "Return a structured read-only inspection of the current machine and environment. \
145             Prefer this over raw shell for questions about OS configuration (firewall, power, uptime), plain-English system health reports, silicon health and high-fidelity hardware telemetry (NVIDIA clocks/fans/power, CPU frequency averaging), installed developer tools, PATH issues, package-manager and environment health, network state, service state, running processes, desktop items, Downloads size, listening ports, repo health, or directory/disk summaries. \
146             For high-performance hardware testing, use topic=disk_benchmark to measure real-time kernel disk queue intensity. \
147             For remediation questions phrased like 'how do I fix cargo not found', 'how do I fix port 3000 already in use', or 'how do I fix LM Studio not reachable', use topic=fix_plan instead of diagnosis-only topics like env_doctor, path, or ports. \
148             Use topic=summary for a compact host snapshot, topic=toolchains for common dev tool versions, topic=path for PATH analysis, topic=env_doctor for package-manager and PATH health, topic=fix_plan for structured remediation plans, topic=network for adapters/IPs/gateways/DNS, topic=services for service status and startup mode, \
149             topic=processes for top processes by memory/cpu and real-time disk/network I/O stats (look for [I/O R:N/W:N] tags to identify disk-heavy processes), \
150             topic=desktop or topic=downloads for known folders, topic=ports for listening endpoints, topic=repo_doctor for a structured workspace health report, \
151             topic=log_check for recent critical/error events from system event logs or journalctl, topic=startup_items for programs and services that run at boot (registry Run keys and startup folders on Windows; systemd enabled units on Linux), \
152             topic=health_report for a plain-English tiered system health verdict (disk, RAM, tools, recent errors), \
153             topic=storage for all drives with capacity/free space plus large developer cache directories, \
154             topic=hardware for CPU model/cores, RAM size/speed, GPU name/driver, motherboard, BIOS, and display configuration, \
155             topic=updates for Windows Update status (last install date, pending update count, WU service state), \
156             topic=security for Windows Defender real-time protection status, last scan date, signature age, firewall profile states, Windows activation, and UAC state, \
157             topic=pending_reboot to check whether a system restart is required and why (Windows Update, CBS, file rename operations), \
158             topic=disk_health for physical drive health via Get-PhysicalDisk and SMART failure prediction, \
159             topic=battery for charge level, status, estimated runtime, and wear level (laptops only — reports no battery on desktops), \
160             topic=recent_crashes for BSOD and unexpected shutdown events plus application crash/hang events from the Windows event log, \
161             topic=scheduled_tasks for all non-disabled scheduled tasks including name, path, last run time, and executable, \
162             topic=dev_conflicts for cross-tool environment conflict detection (Node.js version managers, Python 2 vs 3 ambiguity, conda env shadowing, Rust toolchain path conflicts, Git identity/signing config, duplicate PATH entries), \
163             topic=bitlocker for drive encryption status (BitLocker on Windows, LUKS on Linux), \
164             topic=ad_user for Active Directory / Managed Identity details (SID, group memberships, domain role), \
165             topic=user_accounts for Local User and Group diagnostics (Built-in Administrators, local account state), \
166             topic=rdp for Remote Desktop configuration, port, and active sessions, \
167             topic=shadow_copies for Volume Shadow Copies (VSS) and system restore points, \
168             topic=pagefile for Windows page file configuration and current usage, \
169             topic=windows_features for enabled Windows optional features (IIS, Hyper-V, etc.), \
170             topic=printers for installed printers and active print jobs, \
171             topic=winrm for Windows Remote Management (WinRM) and PS Remoting status, \
172             topic=network_stats for adapter throughput (RX/TX), errors, and dropped packets, \
173             topic=udp_ports for active UDP listeners and notable port annotations, \
174             topic=gpo for applied Group Policy Objects, topic=certificates for local personal certificates, topic=integrity for Windows component store health (SFC/DISM state), topic=domain for Active Directory and domain join status, \
175             topic=device_health for identifying malfunctioning hardware with ConfigManager error codes (Yellow Bangs), topic=drivers for auditing active system drivers and their states, topic=peripherals for enumerating connected USB, input, and display hardware, \
176             topic=sessions for auditing active and disconnected user logon sessions, \
177             topic=ad_user for specific Active Directory user identity, SID, and group membership auditing, \
178             topic=dns_lookup for precision DNS record queries (SRV, MX, TXT), \
179             topic=mdm_enrollment for Intune/MDM enrollment state, Azure AD join, and device management health, \
180             topic=hyperv for local Hyper-V VM inventory and real-time load, \
181             topic=ip_config for detailed adapter configuration and DHCP lease state, \
182             topic=disk_benchmark for high-performance silicon-aware stress testing, \
183             and topic=directory or topic=disk for arbitrary paths.",
184            serde_json::json!({
185                "type": "object",
186                "properties": {
187                    "topic": {
188                        "type": "string",
189                        "enum": ["summary", "toolchains", "path", "env_doctor", "fix_plan", "network", "services", "processes", "desktop", "downloads", "directory", "disk", "ports", "repo_doctor", "log_check", "startup_items", "health_report", "storage", "hardware", "updates", "security", "pending_reboot", "disk_health", "battery", "recent_crashes", "scheduled_tasks", "dev_conflicts", "os_config", "bitlocker", "rdp", "shadow_copies", "pagefile", "windows_features", "printers", "winrm", "network_stats", "udp_ports", "gpo", "certificates", "integrity", "domain", "device_health", "drivers", "peripherals", "disk_benchmark", "permissions", "login_history", "registry_audit", "share_access", "thermal", "activation", "patch_history", "ad_user", "dns_lookup", "hyperv", "ip_config", "mdm_enrollment"],
190                        "description": "Which structured host inspection to run. Use topic=ad_user for domain identity audit, topic=dns_lookup for SRV/MX records, topic=hyperv for VM load, topic=ip_config for detailed adapter info, and topic=mdm_enrollment for Intune/MDM enrollment state."
191                    },
192                    "name": {
193                        "type": "string",
194                        "description": "Optional when topic=processes or topic=services. Case-insensitive substring filter for process or service names."
195                    },
196                    "issue": {
197                        "type": "string",
198                        "description": "Optional when topic=fix_plan. Plain-English issue description such as 'cargo not found', 'port 3000 already in use', or 'LM Studio not reachable on localhost:1234'."
199                    },
200                    "path": {
201                        "type": "string",
202                        "description": "Required when topic=directory. Optional for topic=disk or topic=repo_doctor. Absolute or relative path to inspect."
203                    },
204                    "port": {
205                        "type": "integer",
206                        "description": "Optional when topic=ports or topic=fix_plan. Filter the result to one listening TCP port or anchor a port-conflict fix plan."
207                    },
208                    "max_entries": {
209                        "type": "integer",
210                        "description": "Optional cap for listed entries. Defaults to 10 and is capped internally."
211                    }
212                }
213            }),
214        ),
215        make_tool(
216            "resolve_host_issue",
217            "A safe, bounded tool for remediating OS and environment issues automatically with user approval. \
218             Use this to fix missing dependencies, restart stuck services, or clear disk space instead of using raw shell. \
219             The user will be prompted to approve the action. Keep targets exact.",
220            serde_json::json!({
221                "type": "object",
222                "properties": {
223                    "action": {
224                        "type": "string",
225                        "enum": ["install_package", "restart_service", "clear_temp"],
226                        "description": "The type of remediation to perform."
227                    },
228                    "target": {
229                        "type": "string",
230                        "description": "The specific target (e.g., 'python' for install_package, or 'docker' for restart_service). Optional for clear_temp."
231                    }
232                },
233                "required": ["action"]
234            }),
235        ),
236        make_tool(
237            "run_hematite_maintainer_workflow",
238            "Run one of Hematite's known maintainer or release workflows with explicit approval. \
239             Prefer this over raw shell when the user explicitly asks to run one of Hematite's own scripts such as `clean.ps1`, `scripts/package-windows.ps1`, or `release.ps1`. \
240             Use workflow=clean for cleanup, workflow=package_windows for rebuilding the local Windows portable or installer, and workflow=release for the normal version bump/tag/push/publish flow. \
241             Keep this tool constrained to Hematite's own known workflows instead of inventing ad hoc shell commands or pretending to run arbitrary project scripts.",
242            serde_json::json!({
243                "type": "object",
244                "properties": {
245                    "workflow": {
246                        "type": "string",
247                        "enum": ["clean", "package_windows", "release"],
248                        "description": "Which known Hematite maintainer workflow to run."
249                    },
250                    "deep": {
251                        "type": "boolean",
252                        "description": "For workflow=clean. Also remove heavy build/runtime artifacts such as target/ and vein.db."
253                    },
254                    "reset": {
255                        "type": "boolean",
256                        "description": "For workflow=clean. Reset PLAN/TASK state in addition to normal cleanup."
257                    },
258                    "prune_dist": {
259                        "type": "boolean",
260                        "description": "For workflow=clean. Keep only the current Cargo.toml version under dist/."
261                    },
262                    "installer": {
263                        "type": "boolean",
264                        "description": "For workflow=package_windows. Also build the Windows installer."
265                    },
266                    "add_to_path": {
267                        "type": "boolean",
268                        "description": "For workflow=package_windows or workflow=release. Update the user PATH to the rebuilt portable."
269                    },
270                    "version": {
271                        "type": "string",
272                        "description": "For workflow=release. Exact semantic version such as 0.4.5."
273                    },
274                    "bump": {
275                        "type": "string",
276                        "enum": ["patch", "minor", "major"],
277                        "description": "For workflow=release. Ask release.ps1 to calculate the next version."
278                    },
279                    "push": {
280                        "type": "boolean",
281                        "description": "For workflow=release. Push main and the new tag."
282                    },
283                    "skip_installer": {
284                        "type": "boolean",
285                        "description": "For workflow=release. Skip the Windows installer build."
286                    },
287                    "publish_crates": {
288                        "type": "boolean",
289                        "description": "For workflow=release. Publish hematite-cli to crates.io after a successful push."
290                    },
291                    "publish_voice_crate": {
292                        "type": "boolean",
293                        "description": "For workflow=release. Publish hematite-kokoros first, then hematite-cli."
294                    }
295                },
296                "required": ["workflow"]
297            }),
298        ),
299        make_tool(
300            "run_workspace_workflow",
301            "Run an approval-gated workflow or script in the locked project workspace root. \
302             Use this for the current project's build, test, lint, fix, package.json scripts, just/task/make targets, explicit local script paths, exact workspace commands, or typed website server control. \
303             Website workflows are preferred when working on a local web app because they give Hematite a structured start/probe/validate/status/stop loop with stored runtime metadata instead of improvised shell. \
304             FORBIDDEN: The `command` field MUST be a real executable shell command (e.g. `npm install`, `cargo build`). \
305             NEVER put natural language, user-requests, or conversational intent into the `command` field. \
306             This tool is for the active workspace, not for Hematite's own maintainer scripts.",
307            serde_json::json!({
308                "type": "object",
309                "properties": {
310                    "workflow": {
311                        "type": "string",
312                        "enum": ["build", "test", "lint", "fix", "package_script", "task", "just", "make", "script_path", "command", "website_start", "website_probe", "website_validate", "website_status", "website_stop"],
313                        "description": "Which workspace workflow to run."
314                    },
315                    "name": {
316                        "type": "string",
317                        "description": "Required for workflow=package_script, task, just, or make. The script or target name."
318                    },
319                    "path": {
320                        "type": "string",
321                        "description": "Required for workflow=script_path. Relative path to a script inside the locked workspace root."
322                    },
323                    "command": {
324                        "type": "string",
325                        "description": "Required for workflow=command. Exact command to execute from the locked workspace root."
326                    },
327                    "mode": {
328                        "type": "string",
329                        "enum": ["dev", "preview", "start"],
330                        "description": "Optional for workflow=website_start. Which website server mode to infer. Defaults to dev."
331                    },
332                    "script": {
333                        "type": "string",
334                        "description": "Optional for workflow=website_start. Exact package.json script to run instead of inferring one."
335                    },
336                    "url": {
337                        "type": "string",
338                        "description": "Optional for workflow=website_start, website_probe, or website_validate. Explicit local URL to probe, such as http://127.0.0.1:5173/."
339                    },
340                    "host": {
341                        "type": "string",
342                        "description": "Optional for workflow=website_start. Host used when constructing an inferred probe URL. Defaults to 127.0.0.1."
343                    },
344                    "port": {
345                        "type": "integer",
346                        "description": "Optional for workflow=website_start. Port used when constructing an inferred probe URL."
347                    },
348                    "label": {
349                        "type": "string",
350                        "description": "Optional for website workflows. Logical server name for storing runtime metadata. Defaults to default."
351                    },
352                    "routes": {
353                        "type": "array",
354                        "items": { "type": "string" },
355                        "description": "Optional for workflow=website_validate. Relative routes or absolute URLs to validate, such as [\"/\", \"/pricing\", \"/about\"]."
356                    },
357                    "asset_limit": {
358                        "type": "integer",
359                        "description": "Optional for workflow=website_validate. Maximum number of linked local assets to probe after route validation."
360                    },
361                    "request_timeout_ms": {
362                        "type": "integer",
363                        "description": "Optional for workflow=website_start. Per-request HTTP timeout used by the readiness probe."
364                    },
365                    "timeout_ms": {
366                        "type": "integer",
367                        "description": "Optional timeout override in milliseconds. For website_start this is the boot/readiness timeout. For website_probe and website_status it is the probe timeout."
368                    }
369                },
370                "required": ["workflow"]
371            }),
372        ),
373        make_tool(
374            "read_file",
375            "Read the contents of a file. For large files, use 'offset' and 'limit' to navigate.",
376            serde_json::json!({
377                "type": "object",
378                "properties": {
379                    "path": {
380                        "type": "string",
381                        "description": "Path to the file, relative to the project root"
382                    },
383                    "offset": {
384                        "type": "integer",
385                        "description": "Starting line number (0-indexed)"
386                    },
387                    "limit": {
388                        "type": "integer",
389                        "description": "Number of lines to read"
390                    }
391                },
392                "required": ["path"]
393            }),
394        ),
395        make_tool(
396            "lsp_definitions",
397            "Get the precise definition location (file:line:char) for a symbol at a specific position. \
398             Use this to jump to function/struct source code accurately.",
399            serde_json::json!({
400                "type": "object",
401                "properties": {
402                    "path": { "type": "string", "description": "File path" },
403                    "line": { "type": "integer", "description": "0-indexed line" },
404                    "character": { "type": "integer", "description": "0-indexed character" }
405                },
406                "required": ["path", "line", "character"]
407            }),
408        ),
409        make_tool(
410            "lsp_references",
411            "Find all locations where a symbol is used across the entire workspace. \
412             Use this to understand the impact of a refactor or discover internal API users.",
413            serde_json::json!({
414                "type": "object",
415                "properties": {
416                    "path": { "type": "string", "description": "File path" },
417                    "line": { "type": "integer", "description": "0-indexed line" },
418                    "character": { "type": "integer", "description": "0-indexed character" }
419                },
420                "required": ["path", "line", "character"]
421            }),
422        ),
423        make_tool(
424            "lsp_hover",
425            "Get hover information (documentation, function signature, type details) for a symbol. \
426             Use this for rapid spatial awareness without opening every file.",
427            serde_json::json!({
428                "type": "object",
429                "properties": {
430                    "path": { "type": "string", "description": "File path" },
431                    "line": { "type": "integer", "description": "0-indexed line" },
432                    "character": { "type": "integer", "description": "0-indexed character" }
433                },
434                "required": ["path", "line", "character"]
435            }),
436        ),
437        make_tool(
438            "lsp_rename_symbol",
439            "Rename a symbol project-wide using the Language Server. Ensures all references are updated safely.",
440            serde_json::json!({
441                "type": "object",
442                "properties": {
443                    "path": { "type": "string", "description": "File path" },
444                    "line": { "type": "integer", "description": "0-indexed line" },
445                    "character": { "type": "integer", "description": "0-indexed character" },
446                    "new_name": { "type": "string", "description": "The new name for the symbol" }
447                },
448                "required": ["path", "line", "character", "new_name"]
449            }),
450        ),
451        make_tool(
452            "lsp_get_diagnostics",
453            "Get a list of current compiler errors and warnings for a specific file. \
454             Use this to verify your code compiles and and to find exactly where errors are located.",
455            serde_json::json!({
456                "type": "object",
457                "properties": {
458                    "path": { "type": "string", "description": "File path" }
459                },
460                "required": ["path"]
461            }),
462        ),
463        make_tool(
464            "vision_analyze",
465            "Send an image file (screenshot, diagram, or UI mockup) to the multimodal vision model for technical analysis. \
466             Use this to identify UI bugs, confirm visual states, or understand architectural diagrams.",
467            serde_json::json!({
468                "type": "object",
469                "properties": {
470                    "path": { "type": "string", "description": "Absolute or relative path to the image file." },
471                    "prompt": { "type": "string", "description": "The specific question or analysis request for the vision model." }
472                },
473                "required": ["path", "prompt"]
474            }),
475        ),
476        make_tool(
477            "patch_hunk",
478            "Replace a specific line range [start_line, end_line] with new content. \
479             This is the most precise way to edit code and avoids search string failures.",
480            serde_json::json!({
481                "type": "object",
482                "properties": {
483                    "path": { "type": "string", "description": "File path" },
484                    "start_line": { "type": "integer", "description": "Starting line (1-indexed)" },
485                    "end_line": { "type": "integer", "description": "Ending line (inclusive)" },
486                    "replacement": { "type": "string", "description": "The new content for this range" }
487                },
488                "required": ["path", "start_line", "end_line", "replacement"]
489            }),
490        ),
491        make_tool(
492            "multi_search_replace",
493            "Replace multiple existing code blocks in a single file with new content. \
494             Each hunk specifies an EXACT 'search' string and a 'replace' string. \
495             The 'search' string MUST exactly match the existing file contents (including whitespace). \
496             This is the safest and most reliable way to make multiple structural edits.",
497            serde_json::json!({
498                "type": "object",
499                "properties": {
500                    "path": { "type": "string", "description": "File path" },
501                    "hunks": {
502                        "type": "array",
503                        "items": {
504                            "type": "object",
505                            "properties": {
506                                "search": { "type": "string", "description": "Exact existing text to find and replace" },
507                                "replace": { "type": "string", "description": "The new replacement text" }
508                            },
509                            "required": ["search", "replace"]
510                        }
511                    }
512                },
513                "required": ["path", "hunks"]
514            }),
515        ),
516        make_tool(
517            "write_file",
518            "Write content to a file, creating it (and any parent dirs) if needed. \
519             Overwrites existing files. \
520             SOVEREIGN PATHING: For files in common areas, use `@DESKTOP/file.txt`, `@DOCUMENTS/file.txt`, `@DOWNLOADS/file.txt`, or `@HOME/file.txt` to ensure 100% path accuracy.",
521            serde_json::json!({
522                "type": "object",
523                "properties": {
524                    "path": { "type": "string", "description": "File path" },
525                    "content": { "type": "string", "description": "Full file content to write" }
526                },
527                "required": ["path", "content"]
528            }),
529        ),
530        make_tool(
531            "create_directory",
532            "Authoritatively create a new directory (and any parent dirs) if they do not exist. \
533             Use this instead of raw shell (mkdir) for all filesystem organization. \
534             Supports both relative paths and absolute paths. \
535             SOVEREIGN PATHING: For directories in common areas, use `@DESKTOP/folder`, `@DOCUMENTS/folder`, `@DOWNLOADS/folder`, or `@HOME/folder` to ensure 100% path accuracy.",
536            serde_json::json!({
537                "type": "object",
538                "properties": {
539                    "path": { "type": "string", "description": "Relative or absolute directory path" }
540                },
541                "required": ["path"]
542            }),
543        ),
544        make_tool(
545            "research_web",
546            "Perform a zero-cost technical search using DuckDuckGo. \
547             Use this to find documentation, latest API changes, or solutions to complex errors \
548             when your internal knowledge is insufficient. Returns snippets and URLs.",
549            serde_json::json!({
550                "type": "object",
551                "properties": {
552                    "query": { "type": "string", "description": "The technical search query" }
553                },
554                "required": ["query"]
555            }),
556        ),
557        make_tool(
558            "fetch_docs",
559            "Fetch a URL and convert it to clean Markdown. Use this to 'read' the documentation \
560             links found via research_web. This tool uses a proxy to bypass IP blocks.",
561            serde_json::json!({
562                "type": "object",
563                "properties": {
564                    "url": { "type": "string", "description": "The URL of the documentation to fetch" }
565                },
566                "required": ["url"]
567            }),
568        ),
569        make_tool(
570            "edit_file",
571            "Edit a file by replacing an exact string with another. \
572             The 'search' string does NOT need perfectly matching indentation (it is fuzzy), \
573             but the non-whitespace text must match exactly. Use this for targeted edits.",
574            serde_json::json!({
575                "type": "object",
576                "properties": {
577                    "path": { "type": "string", "description": "File path" },
578                    "search": {
579                        "type": "string",
580                        "description": "The exact text to find (must match whitespace/indentation precisely)"
581                    },
582                    "replace": {
583                        "type": "string",
584                        "description": "The replacement text"
585                    }
586                },
587                "required": ["path", "search", "replace"]
588            }),
589        ),
590        make_tool(
591            "auto_pin_context",
592            "Select 1-3 core files to 'Lock' into prioritized memory. \
593             Use this to ensure the most important architecture files \
594             are always visible during complex refactorings.",
595            serde_json::json!({
596                "type": "object",
597                "properties": {
598                    "paths": {
599                        "type": "array",
600                        "items": { "type": "string" }
601                    },
602                    "reason": { "type": "string" }
603                },
604                "required": ["paths", "reason"]
605            }),
606        ),
607        make_tool(
608            "list_pinned",
609            "List all files currently pinned in the model's active context.",
610            serde_json::json!({
611                "type": "object",
612                "properties": {}
613            }),
614        ),
615        make_tool(
616            "list_files",
617            "List files in a directory, optionally filtered by extension.",
618            serde_json::json!({
619                "type": "object",
620                "properties": {
621                    "path": {
622                        "type": "string",
623                        "description": "Directory to list (default: current dir)"
624                    },
625                    "extension": {
626                        "type": "string",
627                        "description": "Only return files with this extension, e.g. 'rs', 'toml' (no dot)"
628                    }
629                },
630                "required": []
631            }),
632        ),
633        make_tool(
634            "tail_file",
635            "Read the last N lines of a file — useful for log files, test output, \
636             build artifacts, and any large file where only the tail is relevant. \
637             Supports an optional grep filter to show only matching lines from the tail. \
638             Use this instead of read_file when you only need the end of a large file.",
639            serde_json::json!({
640                "type": "object",
641                "properties": {
642                    "path": {
643                        "type": "string",
644                        "description": "Path to the file, relative to the project root"
645                    },
646                    "lines": {
647                        "type": "integer",
648                        "description": "Number of lines to return from the end (default: 50, max: 500)"
649                    },
650                    "grep": {
651                        "type": "string",
652                        "description": "Optional regex pattern — only return lines matching this pattern (applied before the tail slice)"
653                    }
654                },
655                "required": ["path"]
656            }),
657        ),
658        make_tool(
659            "grep_files",
660            "Search file contents for a regex pattern. Supports context lines, files-only mode, \
661             and pagination. Returns file:line:content format by default.",
662            serde_json::json!({
663                "type": "object",
664                "properties": {
665                    "pattern": {
666                        "type": "string",
667                        "description": "Regex pattern to search for (case-insensitive by default)"
668                    },
669                    "path": {
670                        "type": "string",
671                        "description": "Directory to search (default: current dir)"
672                    },
673                    "extension": {
674                        "type": "string",
675                        "description": "Only search files with this extension, e.g. 'rs'"
676                    },
677                    "mode": {
678                        "type": "string",
679                        "enum": ["content", "files_only"],
680                        "description": "'content' (default) returns matching lines; 'files_only' returns only filenames"
681                    },
682                    "context": {
683                        "type": "integer",
684                        "description": "Lines of context before AND after each match (like rg -C)"
685                    },
686                    "before": {
687                        "type": "integer",
688                        "description": "Lines of context before each match (overrides context)"
689                    },
690                    "after": {
691                        "type": "integer",
692                        "description": "Lines of context after each match (overrides context)"
693                    },
694                    "head_limit": {
695                        "type": "integer",
696                        "description": "Max hunks (or files in files_only) to return (default: 50)"
697                    },
698                    "offset": {
699                        "type": "integer",
700                        "description": "Skip first N hunks/files - for pagination (default: 0)"
701                    }
702                },
703                "required": ["pattern"]
704            }),
705        ),
706        make_tool(
707            "github_ops",
708            "Interact with GitHub via the `gh` CLI. Requires `gh` installed and `gh auth login` completed. \
709             Use for pull requests, issues, CI run status, and repo metadata. \
710             Never use `shell` to call `gh` — use this tool instead.",
711            serde_json::json!({
712                "type": "object",
713                "properties": {
714                    "action": {
715                        "type": "string",
716                        "enum": [
717                            "pr_list", "pr_view", "pr_create", "pr_status", "pr_checks", "pr_merge",
718                            "issue_list", "issue_view", "issue_create",
719                            "ci_status", "run_view",
720                            "repo_view", "release_list"
721                        ],
722                        "description": "GitHub operation to perform"
723                    },
724                    "title": { "type": "string", "description": "PR or issue title (for create actions)" },
725                    "body": { "type": "string", "description": "PR or issue body (for create actions)" },
726                    "base": { "type": "string", "description": "Base branch for PR (default: main)" },
727                    "draft": { "type": "boolean", "description": "Create PR as draft" },
728                    "pr": { "type": "string", "description": "PR number or URL (for view/checks/merge)" },
729                    "number": { "description": "Issue number (for issue_view)" },
730                    "state": { "type": "string", "enum": ["open", "closed", "all"], "description": "Filter state for listings" },
731                    "strategy": { "type": "string", "enum": ["merge", "squash", "rebase"], "description": "Merge strategy for pr_merge" },
732                    "branch": { "type": "string", "description": "Branch name for ci_status (defaults to current branch)" },
733                    "run_id": { "type": "string", "description": "Run ID for run_view" },
734                    "limit": { "type": "integer", "description": "Max results to return (default 10)" }
735                },
736                "required": ["action"]
737            }),
738        ),
739        make_tool(
740            "git_commit",
741            "Stage all changes (git add -A) and create a commit. You MUST use 'Conventional Commits' (e.g. 'feat: description').",
742            serde_json::json!({
743                "type": "object",
744                "properties": {
745                    "message": { "type": "string", "description": "Commit message (Conventional Commit style)" }
746                },
747                "required": ["message"]
748            }),
749        ),
750        make_tool(
751            "git_push",
752            "Push current branched changes to the remote origin. Requires an existing remote connection.",
753            serde_json::json!({
754                "type": "object",
755                "properties": {},
756                "required": []
757            }),
758        ),
759        make_tool(
760            "git_remote",
761            "View or manage git remotes. Use this for onboarding to GitHub/GitLab services.",
762            serde_json::json!({
763                "type": "object",
764                "properties": {
765                    "action": {
766                        "type": "string",
767                        "enum": ["list", "add", "remove"],
768                        "description": "Operation to perform"
769                    },
770                    "name": { "type": "string", "description": "Remote name (e.g. origin)" },
771                    "url": { "type": "string", "description": "Remote URL (for 'add' action)" }
772                },
773                "required": ["action"]
774            }),
775        ),
776        make_tool(
777            "git_onboarding",
778            "High-level wizard to connect this repository to a remote host (GitHub/GitLab). \
779             Handles adding the remote and performing the initial tracking push in one step.",
780            serde_json::json!({
781                "type": "object",
782                "properties": {
783                    "url": { "type": "string", "description": "The remote repository URL (HTTPS or SSH)" },
784                    "name": { "type": "string", "description": "The remote name (default: origin)" },
785                    "push": { "type": "boolean", "description": "Whether to perform an initial push to establish tracking (default: false)" }
786                },
787                "required": ["url"]
788            }),
789        ),
790        make_tool(
791            "verify_build",
792            "Run project verification for build, test, lint, or fix workflows. \
793             Prefer per-project verify profiles from `.hematite/settings.json`, and fall back to \
794             auto-detected defaults when no profile is configured. Returns BUILD OK or BUILD FAILED \
795             with command output. ALWAYS call this after scaffolding a new project or making structural changes.",
796            serde_json::json!({
797                "type": "object",
798                "properties": {
799                    "action": {
800                        "type": "string",
801                        "enum": ["build", "test", "lint", "fix"],
802                        "description": "Which verification action to run. Defaults to build."
803                    },
804                    "profile": {
805                        "type": "string",
806                        "description": "Optional named verify profile from `.hematite/settings.json`."
807                    },
808                    "timeout_secs": {
809                        "type": "integer",
810                        "description": "Optional timeout override for this verification run."
811                    }
812                }
813            }),
814        ),
815        make_tool(
816            "git_worktree",
817            "Manage Git worktrees - isolated working directories on separate branches. \
818             Use 'add' to create a safe sandbox for risky/experimental work, \
819             'list' to see all worktrees, 'remove' to clean up, 'prune' to remove stale entries.",
820            serde_json::json!({
821                "type": "object",
822                "properties": {
823                    "action": {
824                        "type": "string",
825                        "enum": ["list", "add", "remove", "prune"],
826                        "description": "Worktree operation to perform"
827                    },
828                    "path": {
829                        "type": "string",
830                        "description": "Directory path for the new worktree (required for add/remove)"
831                    },
832                    "branch": {
833                        "type": "string",
834                        "description": "Branch name for the worktree (add only; defaults to path basename)"
835                    }
836                },
837                "required": ["action"]
838            }),
839        ),
840        make_tool(
841            "clarify",
842            "Ask the user a clarifying question when you genuinely cannot proceed without \
843             more information. Use this ONLY when you are blocked and cannot make a \
844             reasonable assumption. Do NOT use it to ask permission - just act.",
845            serde_json::json!({
846                "type": "object",
847                "properties": {
848                    "question": {
849                        "type": "string",
850                        "description": "The specific question to ask the user"
851                    }
852                },
853                "required": ["question"]
854            }),
855        ),
856        make_tool(
857            "manage_tasks",
858            "Manage the persistent task ledger in .hematite/TASK.md. Use this to track long-term goals across restarts.",
859            crate::tools::tasks::get_tasks_params(),
860        ),
861        make_tool(
862            "maintain_plan",
863            "Document the architectural strategy and session blueprint in .hematite/PLAN.md. Use this to maintain context across restarts.",
864            crate::tools::plan::get_plan_params(),
865        ),
866        make_tool(
867            "generate_walkthrough",
868            "Generate a final session report in .hematite/WALKTHROUGH.md including achievements and verification results.",
869            crate::tools::plan::get_walkthrough_params(),
870        ),
871        make_tool(
872            "swarm",
873            "Delegate high-volume parallel tasks to a swarm of background workers. \
874             Use this for large-scale refactors, multi-file research, or parallel documentation updates. \
875             You must provide a 'tasks' array where each task has an 'id', 'target' (file), and 'instruction'.",
876            serde_json::json!({
877                "type": "object",
878                "properties": {
879                    "tasks": {
880                        "type": "array",
881                        "items": {
882                            "type": "object",
883                            "properties": {
884                                "id": { "type": "string" },
885                                "target": { "type": "string", "description": "Target file or directory" },
886                                "instruction": { "type": "string", "description": "Specific task for this worker" }
887                            },
888                            "required": ["id", "target", "instruction"]
889                        }
890                    },
891                    "max_workers": {
892                        "type": "integer",
893                        "description": "Max parallel workers (default 3, auto-throttled by hardware)",
894                        "default": 3
895                    }
896                },
897                "required": ["tasks"]
898            }),
899        ),
900    ];
901
902    let lsp_defs = crate::tools::lsp_tools::get_lsp_definitions();
903    tools.push(make_tool(
904        "lsp_search_symbol",
905        "Find the location (file/line) of any function, struct, or variable in the entire project workspace. \
906         This is the fastest 'Golden Path' for navigating to a symbol by name.",
907        serde_json::json!({
908            "type": "object",
909            "properties": {
910                "query": { "type": "string", "description": "The name of the symbol to find (e.g. 'initialize_mcp')" }
911            },
912            "required": ["query"]
913        }),
914    ));
915    for def in lsp_defs {
916        let name = def["name"].as_str().unwrap();
917        tools.push(ToolDefinition {
918            tool_type: "function".into(),
919            function: ToolFunction {
920                name: name.into(),
921                description: def["description"].as_str().unwrap().into(),
922                parameters: def["parameters"].clone(),
923            },
924            metadata: tool_metadata_for_name(name),
925        });
926    }
927
928    tools
929}
930
931pub async fn dispatch_builtin_tool(
932    name: &str,
933    args: &Value,
934    config: &HematiteConfig,
935    budget_tokens: usize,
936) -> Result<String, String> {
937    match name {
938        "shell" => crate::tools::shell::execute(args, budget_tokens).await,
939        "run_code" => crate::tools::code_sandbox::execute(args).await,
940        "trace_runtime_flow" => crate::tools::runtime_trace::trace_runtime_flow(args).await,
941        "describe_toolchain" => crate::tools::toolchain::describe_toolchain(args).await,
942        "inspect_host" => crate::tools::host_inspect::inspect_host(args).await,
943        "resolve_host_issue" => crate::tools::host_inspect::resolve_host_issue(args).await,
944        "run_hematite_maintainer_workflow" => {
945            crate::tools::repo_script::run_hematite_maintainer_workflow(args).await
946        }
947        "run_workspace_workflow" => crate::tools::workspace_workflow::run_workspace_workflow(args).await,
948        "read_file" => crate::tools::file_ops::read_file(args, budget_tokens).await,
949        "inspect_lines" => crate::tools::file_ops::inspect_lines(args).await,
950        "tail_file" => crate::tools::file_ops::tail_file(args).await,
951        "write_file" => crate::tools::file_ops::write_file(args).await,
952        "create_directory" => crate::tools::file_ops::create_directory(args).await,
953        "edit_file" => crate::tools::file_ops::edit_file(args).await,
954        "patch_hunk" => crate::tools::file_ops::patch_hunk(args).await,
955        "multi_search_replace" => crate::tools::file_ops::multi_search_replace(args).await,
956        "list_files" => crate::tools::file_ops::list_files(args, budget_tokens).await,
957        "grep_files" => crate::tools::file_ops::grep_files(args, budget_tokens).await,
958        "github_ops" => crate::tools::github::execute(args).await,
959        "git_commit" => crate::tools::git::execute(args).await,
960        "git_push" => crate::tools::git::execute_push(args).await,
961        "git_remote" => crate::tools::git::execute_remote(args).await,
962        "git_onboarding" => crate::tools::git_onboarding::execute(args).await,
963        "verify_build" => crate::tools::verify_build::execute(args).await,
964        "git_worktree" => crate::tools::git::execute_worktree(args).await,
965        "health" => crate::tools::health::execute(args).await,
966        "research_web" => {
967            crate::tools::research::execute_search(args, config.searx_url.clone()).await
968        }
969        "fetch_docs" => crate::tools::research::execute_fetch(args).await,
970        "manage_tasks" => crate::tools::tasks::manage_tasks(args).await,
971        "maintain_plan" => crate::tools::plan::maintain_plan(args).await,
972        "generate_walkthrough" => crate::tools::plan::generate_walkthrough(args).await,
973        "clarify" => {
974            let q = args.get("question").and_then(|v| v.as_str()).unwrap_or("?");
975            Ok(format!("[clarify] {q}"))
976        }
977        "vision_analyze" => Err(
978            "Tool 'vision_analyze' must be dispatched by ConversationManager (it requires hardware engine access)."
979                .into(),
980        ),
981        other => {
982            if other.contains('.') || other.contains('/') || other.contains('\\') {
983                Err(format!(
984                    "'{}' is a PATH, not a tool. You correctly identified the location, but you MUST use `read_file` or `list_files` (internal) or `powershell` (external) to access it.",
985                    other
986                ))
987            } else if matches!(other.to_lowercase().as_str(), "hematite" | "assistant" | "ai") {
988                Err(format!(
989                    "'{}' is YOUR IDENTITY, not a tool. Use list_files or read_file to explore the codebase.",
990                    other
991                ))
992            } else if matches!(
993                other.to_lowercase().as_str(),
994                "thought" | "think" | "reasoning" | "thinking" | "internal"
995            ) {
996                Err(format!(
997                    "'{}' is NOT a tool - it is a reasoning tag. Output your answer as plain text after your <think> block.",
998                    other
999                ))
1000            } else {
1001                Err(format!("Unknown tool: '{}'", other))
1002            }
1003        }
1004    }
1005}
1006
1007pub fn get_mutation_label(name: &str, args: &Value) -> Option<String> {
1008    match name {
1009        "shell" => {
1010            let cmd = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1011            if cmd.contains("rm ") || cmd.contains("del ") {
1012                Some("Destructive File Deletion".into())
1013            } else if cmd.contains("mkdir ") {
1014                Some("Directory Creation".into())
1015            } else {
1016                Some("Execute Shell Command".into())
1017            }
1018        }
1019        "write_file" => {
1020            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
1021            Some(format!("Create/Overwrite File: {}", path))
1022        }
1023        "create_directory" => {
1024            let path = args
1025                .get("path")
1026                .and_then(|v| v.as_str())
1027                .unwrap_or("folder");
1028            Some(format!("Create Directory: {}", path))
1029        }
1030        "edit_file" | "patch_hunk" | "multi_search_replace" => {
1031            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
1032            Some(format!("Surgical Code Mutation: {}", path))
1033        }
1034        "github_ops" => {
1035            let action = args.get("action").and_then(|v| v.as_str()).unwrap_or("?");
1036            match action {
1037                "pr_create" | "pr_merge" | "issue_create" => Some(format!("GitHub: {}", action)),
1038                _ => None,
1039            }
1040        }
1041        "git_commit" => Some("Permanent Version History Commit".into()),
1042        "git_push" => Some("Remote Origin Synchronisation (Push)".into()),
1043        "resolve_host_issue" => Some("System-Level Host Remediation".into()),
1044        "run_workspace_workflow" => Some("Automated Workspace Re-alignment".into()),
1045        _ => None,
1046    }
1047}