Skip to main content

hematite/agent/
tool_registry.rs

1use crate::agent::inference::{tool_metadata_for_name, ToolDefinition, ToolFunction};
2use serde_json::Value;
3
4fn make_tool(name: &str, description: &str, parameters: Value) -> ToolDefinition {
5    ToolDefinition {
6        tool_type: "function".into(),
7        function: ToolFunction {
8            name: name.into(),
9            description: description.into(),
10            parameters,
11        },
12        metadata: tool_metadata_for_name(name),
13    }
14}
15
16/// Returns the full set of tools exposed to the model.
17pub fn get_tools() -> Vec<ToolDefinition> {
18    let os = std::env::consts::OS;
19    let mut tools = vec![
20        make_tool(
21            "shell",
22            &format!(
23                "Execute a command in the host shell ({os}). \
24                     Use this for building, testing, or system operations. \
25                     Output is capped at 64KB. Prefer non-interactive commands."
26            ),
27            serde_json::json!({
28                "type": "object",
29                "properties": {
30                    "command": {
31                        "type": "string",
32                        "description": "The command to run"
33                    },
34                    "reason": {
35                        "type": "string",
36                        "description": "For risky shell calls, explain what this command is verifying or changing."
37                    },
38                    "timeout_secs": {
39                        "type": "integer",
40                        "description": "Optional timeout in seconds (default 60)"
41                    }
42                },
43                "required": ["command"]
44            }),
45        ),
46        make_tool(
47            "run_code",
48            "Execute a short JavaScript/TypeScript or Python snippet in a sandboxed subprocess. \
49             No network access, no filesystem escape, hard 10-second timeout. \
50             Use this to verify logic, test algorithms, compute values, or test functions \
51             when you need real output rather than a guess. \
52             ALWAYS include the `language` field — there is no default. \
53             \
54             JAVASCRIPT/TYPESCRIPT (language: \"javascript\"): \
55             Runs via Deno, NOT Node.js. `require()` does not exist — never use it. \
56             URL imports (e.g. from 'https://deno.land/...') are blocked — network is off. \
57             Use built-in Web APIs only: `crypto.subtle`, `TextEncoder`, `URL`, `atob`/`btoa`, etc. \
58             SHA-256 example: \
59               const buf = await crypto.subtle.digest('SHA-256', new TextEncoder().encode('hello')); \
60               console.log([...new Uint8Array(buf)].map(b=>b.toString(16).padStart(2,'0')).join('')); \
61             \
62             PYTHON (language: \"python\"): \
63             Standard library is available. `hashlib`, `json`, `math`, `datetime`, `re`, `itertools` all work. \
64             `subprocess`, `socket`, `urllib`, `requests` are blocked. \
65             SHA-256 example: import hashlib; print(hashlib.sha256(b'hello').hexdigest()) \
66             \
67             Do NOT use this tool for PowerShell or shell scripting. This is strictly for high-precision computation in JavaScript, TypeScript, or Python only. \
68             Do NOT fall back to shell to run deno, python, or node — use this tool directly.",
69            serde_json::json!({
70                "type": "object",
71                "properties": {
72                    "language": {
73                        "type": "string",
74                        "enum": ["javascript", "typescript", "python"],
75                        "description": "The language to run. javascript/typescript requires Deno; python requires Python 3."
76                    },
77                    "code": {
78                        "type": "string",
79                        "description": "The code to execute. Keep it short and self-contained. Print results to stdout."
80                    },
81                    "timeout_seconds": {
82                        "type": "integer",
83                        "description": "Max execution time in seconds (default 10, max 60). Use higher values for longer computations."
84                    }
85                },
86                "required": ["language", "code"]
87            }),
88        ),
89
90        make_tool(
91            "trace_runtime_flow",
92            "Return an authoritative read-only trace of Hematite runtime flow. \
93             Use this for architecture questions about keyboard input to final output, \
94             reasoning/specular separation, startup wiring, runtime subsystems, \
95             voice synthesis and Ctrl+T toggle, or \
96             session reset commands like /clear, /new, and /forget. Prefer this over guessing.",
97            serde_json::json!({
98                "type": "object",
99                "properties": {
100                    "topic": {
101                        "type": "string",
102                        "enum": ["user_turn", "session_reset", "reasoning_split", "runtime_subsystems", "startup", "voice"],
103                        "description": "Which verified runtime report to return. Use 'voice' for any question about Ctrl+T, voice toggle, or TTS pipeline. Use 'user_turn' for keyboard-to-output flow. Use 'session_reset' for /clear, /forget, /new. Use 'startup' for startup wiring. Use 'reasoning_split' for specular/thought routing. Use 'runtime_subsystems' for background subsystem overview."
104                    },
105                    "input": {
106                        "type": "string",
107                        "description": "Optional user input to label a normal user-turn trace"
108                    },
109                    "command": {
110                        "type": "string",
111                        "enum": ["/clear", "/new", "/forget", "all"],
112                        "description": "Optional reset command when topic=session_reset"
113                    }
114                },
115                "required": ["topic"]
116            }),
117        ),
118        make_tool(
119            "describe_toolchain",
120            "Return an authoritative read-only description of Hematite's actual tool surface and investigation strategy. \
121             Use this for tooling-discipline questions, best-tool selection, or read-only plans for tracing runtime behavior. \
122             Prefer this over improvising tool names or investigation steps from memory.",
123            serde_json::json!({
124                "type": "object",
125                "properties": {
126                    "topic": {
127                        "type": "string",
128                        "enum": ["read_only_codebase", "user_turn_plan", "voice_latency_plan", "host_inspection_plan", "all"],
129                        "description": "Which authoritative toolchain report to return"
130                    },
131                    "question": {
132                        "type": "string",
133                        "description": "Optional user question to label or tailor the read-only investigation plan"
134                    }
135                }
136            }),
137        ),
138        make_tool(
139            "inspect_host",
140            "Return a structured read-only inspection of the current machine and environment. \
141             Prefer this over raw shell for questions about OS configuration (firewall, power, uptime), plain-English system health reports, installed developer tools, PATH issues, package-manager and environment health, network state, service state, running processes, desktop items, Downloads size, listening ports, repo health, or directory/disk summaries. \
142             For high-performance hardware testing, use topic=disk_benchmark to measure real-time kernel disk queue intensity. \
143             For remediation questions phrased like 'how do I fix cargo not found', 'how do I fix port 3000 already in use', or 'how do I fix LM Studio not reachable', use topic=fix_plan instead of diagnosis-only topics like env_doctor, path, or ports. \
144             Use topic=summary for a compact host snapshot, topic=toolchains for common dev tool versions, topic=path for PATH analysis, topic=env_doctor for package-manager and PATH health, topic=fix_plan for structured remediation plans, topic=network for adapters/IPs/gateways/DNS, topic=services for service status and startup mode, \
145             topic=processes for top processes by memory/cpu and real-time disk/network I/O stats (look for [I/O R:N/W:N] tags to identify disk-heavy processes), \
146             topic=desktop or topic=downloads for known folders, topic=ports for listening endpoints, topic=repo_doctor for a structured workspace health report, \
147             topic=log_check for recent critical/error events from system event logs or journalctl, topic=startup_items for programs and services that run at boot (registry Run keys and startup folders on Windows; systemd enabled units on Linux), \
148             topic=health_report for a plain-English tiered system health verdict (disk, RAM, tools, recent errors), \
149             topic=storage for all drives with capacity/free space plus large developer cache directories, \
150             topic=hardware for CPU model/cores, RAM size/speed, GPU name/driver, motherboard, BIOS, and display configuration, \
151             topic=updates for Windows Update status (last install date, pending update count, WU service state), \
152             topic=security for Windows Defender real-time protection status, last scan date, signature age, firewall profile states, Windows activation, and UAC state, \
153             topic=pending_reboot to check whether a system restart is required and why (Windows Update, CBS, file rename operations), \
154             topic=disk_health for physical drive health via Get-PhysicalDisk and SMART failure prediction, \
155             topic=battery for charge level, status, estimated runtime, and wear level (laptops only — reports no battery on desktops), \
156             topic=recent_crashes for BSOD and unexpected shutdown events plus application crash/hang events from the Windows event log, \
157             topic=scheduled_tasks for all non-disabled scheduled tasks including name, path, last run time, and executable, \
158             topic=dev_conflicts for cross-tool environment conflict detection (Node.js version managers, Python 2 vs 3 ambiguity, conda env shadowing, Rust toolchain path conflicts, Git identity/signing config, duplicate PATH entries), \
159             topic=bitlocker for drive encryption status (BitLocker on Windows, LUKS on Linux), \
160             topic=ad_user for Active Directory / Managed Identity details (SID, group memberships, domain role), \
161             topic=user_accounts for Local User and Group diagnostics (Built-in Administrators, local account state), \
162             topic=rdp for Remote Desktop configuration, port, and active sessions, \
163             topic=shadow_copies for Volume Shadow Copies (VSS) and system restore points, \
164             topic=pagefile for Windows page file configuration and current usage, \
165             topic=windows_features for enabled Windows optional features (IIS, Hyper-V, etc.), \
166             topic=printers for installed printers and active print jobs, \
167             topic=winrm for Windows Remote Management (WinRM) and PS Remoting status, \
168             topic=network_stats for adapter throughput (RX/TX), errors, and dropped packets, \
169             topic=udp_ports for active UDP listeners and notable port annotations, \
170             topic=gpo for applied Group Policy Objects, topic=certificates for local personal certificates, topic=integrity for Windows component store health (SFC/DISM state), topic=domain for Active Directory and domain join status, \
171             topic=device_health for identifying malfunctioning hardware with ConfigManager error codes (Yellow Bangs), topic=drivers for auditing active system drivers and their states, topic=peripherals for enumerating connected USB, input, and display hardware, \
172             topic=sessions for auditing active and disconnected user logon sessions, \
173             topic=ad_user for specific Active Directory user identity, SID, and group membership auditing, \
174             topic=dns_lookup for precision DNS record queries (SRV, MX, TXT), \
175             topic=hyperv for local Hyper-V VM inventory and real-time load, \
176             topic=ip_config for detailed adapter configuration and DHCP lease state, \
177             topic=disk_benchmark for high-performance silicon-aware stress testing, \
178             and topic=directory or topic=disk for arbitrary paths.",
179            serde_json::json!({
180                "type": "object",
181                "properties": {
182                    "topic": {
183                        "type": "string",
184                        "enum": ["summary", "toolchains", "path", "env_doctor", "fix_plan", "network", "services", "processes", "desktop", "downloads", "directory", "disk", "ports", "repo_doctor", "log_check", "startup_items", "health_report", "storage", "hardware", "updates", "security", "pending_reboot", "disk_health", "battery", "recent_crashes", "scheduled_tasks", "dev_conflicts", "os_config", "bitlocker", "rdp", "shadow_copies", "pagefile", "windows_features", "printers", "winrm", "network_stats", "udp_ports", "gpo", "certificates", "integrity", "domain", "device_health", "drivers", "peripherals", "disk_benchmark", "permissions", "login_history", "registry_audit", "share_access", "thermal", "activation", "patch_history", "ad_user", "dns_lookup", "hyperv", "ip_config"],
185                        "description": "Which structured host inspection to run. Use topic=ad_user for domain identity audit, topic=dns_lookup for SRV/MX records, topic=hyperv for VM load, and topic=ip_config for detailed adapter info."
186                    },
187                    "name": {
188                        "type": "string",
189                        "description": "Optional when topic=processes or topic=services. Case-insensitive substring filter for process or service names."
190                    },
191                    "issue": {
192                        "type": "string",
193                        "description": "Optional when topic=fix_plan. Plain-English issue description such as 'cargo not found', 'port 3000 already in use', or 'LM Studio not reachable on localhost:1234'."
194                    },
195                    "path": {
196                        "type": "string",
197                        "description": "Required when topic=directory. Optional for topic=disk or topic=repo_doctor. Absolute or relative path to inspect."
198                    },
199                    "port": {
200                        "type": "integer",
201                        "description": "Optional when topic=ports or topic=fix_plan. Filter the result to one listening TCP port or anchor a port-conflict fix plan."
202                    },
203                    "max_entries": {
204                        "type": "integer",
205                        "description": "Optional cap for listed entries. Defaults to 10 and is capped internally."
206                    }
207                }
208            }),
209        ),
210        make_tool(
211            "resolve_host_issue",
212            "A safe, bounded tool for remediating OS and environment issues automatically with user approval. \
213             Use this to fix missing dependencies, restart stuck services, or clear disk space instead of using raw shell. \
214             The user will be prompted to approve the action. Keep targets exact.",
215            serde_json::json!({
216                "type": "object",
217                "properties": {
218                    "action": {
219                        "type": "string",
220                        "enum": ["install_package", "restart_service", "clear_temp"],
221                        "description": "The type of remediation to perform."
222                    },
223                    "target": {
224                        "type": "string",
225                        "description": "The specific target (e.g., 'python' for install_package, or 'docker' for restart_service). Optional for clear_temp."
226                    }
227                },
228                "required": ["action"]
229            }),
230        ),
231        make_tool(
232            "run_hematite_maintainer_workflow",
233            "Run one of Hematite's known maintainer or release workflows with explicit approval. \
234             Prefer this over raw shell when the user explicitly asks to run one of Hematite's own scripts such as `clean.ps1`, `scripts/package-windows.ps1`, or `release.ps1`. \
235             Use workflow=clean for cleanup, workflow=package_windows for rebuilding the local Windows portable or installer, and workflow=release for the normal version bump/tag/push/publish flow. \
236             Keep this tool constrained to Hematite's own known workflows instead of inventing ad hoc shell commands or pretending to run arbitrary project scripts.",
237            serde_json::json!({
238                "type": "object",
239                "properties": {
240                    "workflow": {
241                        "type": "string",
242                        "enum": ["clean", "package_windows", "release"],
243                        "description": "Which known Hematite maintainer workflow to run."
244                    },
245                    "deep": {
246                        "type": "boolean",
247                        "description": "For workflow=clean. Also remove heavy build/runtime artifacts such as target/ and vein.db."
248                    },
249                    "reset": {
250                        "type": "boolean",
251                        "description": "For workflow=clean. Reset PLAN/TASK state in addition to normal cleanup."
252                    },
253                    "prune_dist": {
254                        "type": "boolean",
255                        "description": "For workflow=clean. Keep only the current Cargo.toml version under dist/."
256                    },
257                    "installer": {
258                        "type": "boolean",
259                        "description": "For workflow=package_windows. Also build the Windows installer."
260                    },
261                    "add_to_path": {
262                        "type": "boolean",
263                        "description": "For workflow=package_windows or workflow=release. Update the user PATH to the rebuilt portable."
264                    },
265                    "version": {
266                        "type": "string",
267                        "description": "For workflow=release. Exact semantic version such as 0.4.5."
268                    },
269                    "bump": {
270                        "type": "string",
271                        "enum": ["patch", "minor", "major"],
272                        "description": "For workflow=release. Ask release.ps1 to calculate the next version."
273                    },
274                    "push": {
275                        "type": "boolean",
276                        "description": "For workflow=release. Push main and the new tag."
277                    },
278                    "skip_installer": {
279                        "type": "boolean",
280                        "description": "For workflow=release. Skip the Windows installer build."
281                    },
282                    "publish_crates": {
283                        "type": "boolean",
284                        "description": "For workflow=release. Publish hematite-cli to crates.io after a successful push."
285                    },
286                    "publish_voice_crate": {
287                        "type": "boolean",
288                        "description": "For workflow=release. Publish hematite-kokoros first, then hematite-cli."
289                    }
290                },
291                "required": ["workflow"]
292            }),
293        ),
294        make_tool(
295            "run_workspace_workflow",
296            "Run an approval-gated workflow or script in the locked project workspace root. \
297             Use this for the current project's build, test, lint, fix, package.json scripts, just/task/make targets, explicit local script paths, or an exact workspace command. \
298             This tool is for the active workspace, not for Hematite's own maintainer scripts.",
299            serde_json::json!({
300                "type": "object",
301                "properties": {
302                    "workflow": {
303                        "type": "string",
304                        "enum": ["build", "test", "lint", "fix", "package_script", "task", "just", "make", "script_path", "command"],
305                        "description": "Which workspace workflow to run."
306                    },
307                    "name": {
308                        "type": "string",
309                        "description": "Required for workflow=package_script, task, just, or make. The script or target name."
310                    },
311                    "path": {
312                        "type": "string",
313                        "description": "Required for workflow=script_path. Relative path to a script inside the locked workspace root."
314                    },
315                    "command": {
316                        "type": "string",
317                        "description": "Required for workflow=command. Exact command to execute from the locked workspace root."
318                    },
319                    "timeout_ms": {
320                        "type": "integer",
321                        "description": "Optional timeout override in milliseconds."
322                    }
323                },
324                "required": ["workflow"]
325            }),
326        ),
327        make_tool(
328            "read_file",
329            "Read the contents of a file. For large files, use 'offset' and 'limit' to navigate.",
330            serde_json::json!({
331                "type": "object",
332                "properties": {
333                    "path": {
334                        "type": "string",
335                        "description": "Path to the file, relative to the project root"
336                    },
337                    "offset": {
338                        "type": "integer",
339                        "description": "Starting line number (0-indexed)"
340                    },
341                    "limit": {
342                        "type": "integer",
343                        "description": "Number of lines to read"
344                    }
345                },
346                "required": ["path"]
347            }),
348        ),
349        make_tool(
350            "lsp_definitions",
351            "Get the precise definition location (file:line:char) for a symbol at a specific position. \
352             Use this to jump to function/struct source code accurately.",
353            serde_json::json!({
354                "type": "object",
355                "properties": {
356                    "path": { "type": "string", "description": "File path" },
357                    "line": { "type": "integer", "description": "0-indexed line" },
358                    "character": { "type": "integer", "description": "0-indexed character" }
359                },
360                "required": ["path", "line", "character"]
361            }),
362        ),
363        make_tool(
364            "lsp_references",
365            "Find all locations where a symbol is used across the entire workspace. \
366             Use this to understand the impact of a refactor or discover internal API users.",
367            serde_json::json!({
368                "type": "object",
369                "properties": {
370                    "path": { "type": "string", "description": "File path" },
371                    "line": { "type": "integer", "description": "0-indexed line" },
372                    "character": { "type": "integer", "description": "0-indexed character" }
373                },
374                "required": ["path", "line", "character"]
375            }),
376        ),
377        make_tool(
378            "lsp_hover",
379            "Get hover information (documentation, function signature, type details) for a symbol. \
380             Use this for rapid spatial awareness without opening every file.",
381            serde_json::json!({
382                "type": "object",
383                "properties": {
384                    "path": { "type": "string", "description": "File path" },
385                    "line": { "type": "integer", "description": "0-indexed line" },
386                    "character": { "type": "integer", "description": "0-indexed character" }
387                },
388                "required": ["path", "line", "character"]
389            }),
390        ),
391        make_tool(
392            "lsp_rename_symbol",
393            "Rename a symbol project-wide using the Language Server. Ensures all references are updated safely.",
394            serde_json::json!({
395                "type": "object",
396                "properties": {
397                    "path": { "type": "string", "description": "File path" },
398                    "line": { "type": "integer", "description": "0-indexed line" },
399                    "character": { "type": "integer", "description": "0-indexed character" },
400                    "new_name": { "type": "string", "description": "The new name for the symbol" }
401                },
402                "required": ["path", "line", "character", "new_name"]
403            }),
404        ),
405        make_tool(
406            "lsp_get_diagnostics",
407            "Get a list of current compiler errors and warnings for a specific file. \
408             Use this to verify your code compiles and and to find exactly where errors are located.",
409            serde_json::json!({
410                "type": "object",
411                "properties": {
412                    "path": { "type": "string", "description": "File path" }
413                },
414                "required": ["path"]
415            }),
416        ),
417        make_tool(
418            "vision_analyze",
419            "Send an image file (screenshot, diagram, or UI mockup) to the multimodal vision model for technical analysis. \
420             Use this to identify UI bugs, confirm visual states, or understand architectural diagrams.",
421            serde_json::json!({
422                "type": "object",
423                "properties": {
424                    "path": { "type": "string", "description": "Absolute or relative path to the image file." },
425                    "prompt": { "type": "string", "description": "The specific question or analysis request for the vision model." }
426                },
427                "required": ["path", "prompt"]
428            }),
429        ),
430        make_tool(
431            "patch_hunk",
432            "Replace a specific line range [start_line, end_line] with new content. \
433             This is the most precise way to edit code and avoids search string failures.",
434            serde_json::json!({
435                "type": "object",
436                "properties": {
437                    "path": { "type": "string", "description": "File path" },
438                    "start_line": { "type": "integer", "description": "Starting line (1-indexed)" },
439                    "end_line": { "type": "integer", "description": "Ending line (inclusive)" },
440                    "replacement": { "type": "string", "description": "The new content for this range" }
441                },
442                "required": ["path", "start_line", "end_line", "replacement"]
443            }),
444        ),
445        make_tool(
446            "multi_search_replace",
447            "Replace multiple existing code blocks in a single file with new content. \
448             Each hunk specifies an EXACT 'search' string and a 'replace' string. \
449             The 'search' string MUST exactly match the existing file contents (including whitespace). \
450             This is the safest and most reliable way to make multiple structural edits.",
451            serde_json::json!({
452                "type": "object",
453                "properties": {
454                    "path": { "type": "string", "description": "File path" },
455                    "hunks": {
456                        "type": "array",
457                        "items": {
458                            "type": "object",
459                            "properties": {
460                                "search": { "type": "string", "description": "Exact existing text to find and replace" },
461                                "replace": { "type": "string", "description": "The new replacement text" }
462                            },
463                            "required": ["search", "replace"]
464                        }
465                    }
466                },
467                "required": ["path", "hunks"]
468            }),
469        ),
470        make_tool(
471            "write_file",
472            "Write content to a file, creating it (and any parent dirs) if needed. \
473             Overwrites existing files.",
474            serde_json::json!({
475                "type": "object",
476                "properties": {
477                    "path": { "type": "string", "description": "File path" },
478                    "content": { "type": "string", "description": "Full file content to write" }
479                },
480                "required": ["path", "content"]
481            }),
482        ),
483        make_tool(
484            "research_web",
485            "Perform a zero-cost technical search using DuckDuckGo. \
486             Use this to find documentation, latest API changes, or solutions to complex errors \
487             when your internal knowledge is insufficient. Returns snippets and URLs.",
488            serde_json::json!({
489                "type": "object",
490                "properties": {
491                    "query": { "type": "string", "description": "The technical search query" }
492                },
493                "required": ["query"]
494            }),
495        ),
496        make_tool(
497            "fetch_docs",
498            "Fetch a URL and convert it to clean Markdown. Use this to 'read' the documentation \
499             links found via research_web. This tool uses a proxy to bypass IP blocks.",
500            serde_json::json!({
501                "type": "object",
502                "properties": {
503                    "url": { "type": "string", "description": "The URL of the documentation to fetch" }
504                },
505                "required": ["url"]
506            }),
507        ),
508        make_tool(
509            "edit_file",
510            "Edit a file by replacing an exact string with another. \
511             The 'search' string does NOT need perfectly matching indentation (it is fuzzy), \
512             but the non-whitespace text must match exactly. Use this for targeted edits.",
513            serde_json::json!({
514                "type": "object",
515                "properties": {
516                    "path": { "type": "string", "description": "File path" },
517                    "search": {
518                        "type": "string",
519                        "description": "The exact text to find (must match whitespace/indentation precisely)"
520                    },
521                    "replace": {
522                        "type": "string",
523                        "description": "The replacement text"
524                    }
525                },
526                "required": ["path", "search", "replace"]
527            }),
528        ),
529        make_tool(
530            "auto_pin_context",
531            "Select 1-3 core files to 'Lock' into prioritized memory. \
532             Use this to ensure the most important architecture files \
533             are always visible during complex refactorings.",
534            serde_json::json!({
535                "type": "object",
536                "properties": {
537                    "paths": {
538                        "type": "array",
539                        "items": { "type": "string" }
540                    },
541                    "reason": { "type": "string" }
542                },
543                "required": ["paths", "reason"]
544            }),
545        ),
546        make_tool(
547            "list_pinned",
548            "List all files currently pinned in the model's active context.",
549            serde_json::json!({
550                "type": "object",
551                "properties": {}
552            }),
553        ),
554        make_tool(
555            "list_files",
556            "List files in a directory, optionally filtered by extension.",
557            serde_json::json!({
558                "type": "object",
559                "properties": {
560                    "path": {
561                        "type": "string",
562                        "description": "Directory to list (default: current dir)"
563                    },
564                    "extension": {
565                        "type": "string",
566                        "description": "Only return files with this extension, e.g. 'rs', 'toml' (no dot)"
567                    }
568                },
569                "required": []
570            }),
571        ),
572        make_tool(
573            "tail_file",
574            "Read the last N lines of a file — useful for log files, test output, \
575             build artifacts, and any large file where only the tail is relevant. \
576             Supports an optional grep filter to show only matching lines from the tail. \
577             Use this instead of read_file when you only need the end of a large file.",
578            serde_json::json!({
579                "type": "object",
580                "properties": {
581                    "path": {
582                        "type": "string",
583                        "description": "Path to the file, relative to the project root"
584                    },
585                    "lines": {
586                        "type": "integer",
587                        "description": "Number of lines to return from the end (default: 50, max: 500)"
588                    },
589                    "grep": {
590                        "type": "string",
591                        "description": "Optional regex pattern — only return lines matching this pattern (applied before the tail slice)"
592                    }
593                },
594                "required": ["path"]
595            }),
596        ),
597        make_tool(
598            "grep_files",
599            "Search file contents for a regex pattern. Supports context lines, files-only mode, \
600             and pagination. Returns file:line:content format by default.",
601            serde_json::json!({
602                "type": "object",
603                "properties": {
604                    "pattern": {
605                        "type": "string",
606                        "description": "Regex pattern to search for (case-insensitive by default)"
607                    },
608                    "path": {
609                        "type": "string",
610                        "description": "Directory to search (default: current dir)"
611                    },
612                    "extension": {
613                        "type": "string",
614                        "description": "Only search files with this extension, e.g. 'rs'"
615                    },
616                    "mode": {
617                        "type": "string",
618                        "enum": ["content", "files_only"],
619                        "description": "'content' (default) returns matching lines; 'files_only' returns only filenames"
620                    },
621                    "context": {
622                        "type": "integer",
623                        "description": "Lines of context before AND after each match (like rg -C)"
624                    },
625                    "before": {
626                        "type": "integer",
627                        "description": "Lines of context before each match (overrides context)"
628                    },
629                    "after": {
630                        "type": "integer",
631                        "description": "Lines of context after each match (overrides context)"
632                    },
633                    "head_limit": {
634                        "type": "integer",
635                        "description": "Max hunks (or files in files_only) to return (default: 50)"
636                    },
637                    "offset": {
638                        "type": "integer",
639                        "description": "Skip first N hunks/files - for pagination (default: 0)"
640                    }
641                },
642                "required": ["pattern"]
643            }),
644        ),
645        make_tool(
646            "git_commit",
647            "Stage all changes (git add -A) and create a commit. You MUST use 'Conventional Commits' (e.g. 'feat: description').",
648            serde_json::json!({
649                "type": "object",
650                "properties": {
651                    "message": { "type": "string", "description": "Commit message (Conventional Commit style)" }
652                },
653                "required": ["message"]
654            }),
655        ),
656        make_tool(
657            "git_push",
658            "Push current branched changes to the remote origin. Requires an existing remote connection.",
659            serde_json::json!({
660                "type": "object",
661                "properties": {},
662                "required": []
663            }),
664        ),
665        make_tool(
666            "git_remote",
667            "View or manage git remotes. Use this for onboarding to GitHub/GitLab services.",
668            serde_json::json!({
669                "type": "object",
670                "properties": {
671                    "action": {
672                        "type": "string",
673                        "enum": ["list", "add", "remove"],
674                        "description": "Operation to perform"
675                    },
676                    "name": { "type": "string", "description": "Remote name (e.g. origin)" },
677                    "url": { "type": "string", "description": "Remote URL (for 'add' action)" }
678                },
679                "required": ["action"]
680            }),
681        ),
682        make_tool(
683            "git_onboarding",
684            "High-level wizard to connect this repository to a remote host (GitHub/GitLab). \
685             Handles adding the remote and performing the initial tracking push in one step.",
686            serde_json::json!({
687                "type": "object",
688                "properties": {
689                    "url": { "type": "string", "description": "The remote repository URL (HTTPS or SSH)" },
690                    "name": { "type": "string", "description": "The remote name (default: origin)" },
691                    "push": { "type": "boolean", "description": "Whether to perform an initial push to establish tracking (default: false)" }
692                },
693                "required": ["url"]
694            }),
695        ),
696        make_tool(
697            "verify_build",
698            "Run project verification for build, test, lint, or fix workflows. \
699             Prefer per-project verify profiles from `.hematite/settings.json`, and fall back to \
700             auto-detected defaults when no profile is configured. Returns BUILD OK or BUILD FAILED \
701             with command output. ALWAYS call this after scaffolding a new project or making structural changes.",
702            serde_json::json!({
703                "type": "object",
704                "properties": {
705                    "action": {
706                        "type": "string",
707                        "enum": ["build", "test", "lint", "fix"],
708                        "description": "Which verification action to run. Defaults to build."
709                    },
710                    "profile": {
711                        "type": "string",
712                        "description": "Optional named verify profile from `.hematite/settings.json`."
713                    },
714                    "timeout_secs": {
715                        "type": "integer",
716                        "description": "Optional timeout override for this verification run."
717                    }
718                }
719            }),
720        ),
721        make_tool(
722            "git_worktree",
723            "Manage Git worktrees - isolated working directories on separate branches. \
724             Use 'add' to create a safe sandbox for risky/experimental work, \
725             'list' to see all worktrees, 'remove' to clean up, 'prune' to remove stale entries.",
726            serde_json::json!({
727                "type": "object",
728                "properties": {
729                    "action": {
730                        "type": "string",
731                        "enum": ["list", "add", "remove", "prune"],
732                        "description": "Worktree operation to perform"
733                    },
734                    "path": {
735                        "type": "string",
736                        "description": "Directory path for the new worktree (required for add/remove)"
737                    },
738                    "branch": {
739                        "type": "string",
740                        "description": "Branch name for the worktree (add only; defaults to path basename)"
741                    }
742                },
743                "required": ["action"]
744            }),
745        ),
746        make_tool(
747            "clarify",
748            "Ask the user a clarifying question when you genuinely cannot proceed without \
749             more information. Use this ONLY when you are blocked and cannot make a \
750             reasonable assumption. Do NOT use it to ask permission - just act.",
751            serde_json::json!({
752                "type": "object",
753                "properties": {
754                    "question": {
755                        "type": "string",
756                        "description": "The specific question to ask the user"
757                    }
758                },
759                "required": ["question"]
760            }),
761        ),
762        make_tool(
763            "manage_tasks",
764            "Manage the persistent task ledger in .hematite/TASK.md. Use this to track long-term goals across restarts.",
765            crate::tools::tasks::get_tasks_params(),
766        ),
767        make_tool(
768            "maintain_plan",
769            "Document the architectural strategy and session blueprint in .hematite/PLAN.md. Use this to maintain context across restarts.",
770            crate::tools::plan::get_plan_params(),
771        ),
772        make_tool(
773            "generate_walkthrough",
774            "Generate a final session report in .hematite/WALKTHROUGH.md including achievements and verification results.",
775            crate::tools::plan::get_walkthrough_params(),
776        ),
777        make_tool(
778            "swarm",
779            "Delegate high-volume parallel tasks to a swarm of background workers. \
780             Use this for large-scale refactors, multi-file research, or parallel documentation updates. \
781             You must provide a 'tasks' array where each task has an 'id', 'target' (file), and 'instruction'.",
782            serde_json::json!({
783                "type": "object",
784                "properties": {
785                    "tasks": {
786                        "type": "array",
787                        "items": {
788                            "type": "object",
789                            "properties": {
790                                "id": { "type": "string" },
791                                "target": { "type": "string", "description": "Target file or directory" },
792                                "instruction": { "type": "string", "description": "Specific task for this worker" }
793                            },
794                            "required": ["id", "target", "instruction"]
795                        }
796                    },
797                    "max_workers": {
798                        "type": "integer",
799                        "description": "Max parallel workers (default 3, auto-throttled by hardware)",
800                        "default": 3
801                    }
802                },
803                "required": ["tasks"]
804            }),
805        ),
806    ];
807
808    let lsp_defs = crate::tools::lsp_tools::get_lsp_definitions();
809    tools.push(make_tool(
810        "lsp_search_symbol",
811        "Find the location (file/line) of any function, struct, or variable in the entire project workspace. \
812         This is the fastest 'Golden Path' for navigating to a symbol by name.",
813        serde_json::json!({
814            "type": "object",
815            "properties": {
816                "query": { "type": "string", "description": "The name of the symbol to find (e.g. 'initialize_mcp')" }
817            },
818            "required": ["query"]
819        }),
820    ));
821    for def in lsp_defs {
822        let name = def["name"].as_str().unwrap();
823        tools.push(ToolDefinition {
824            tool_type: "function".into(),
825            function: ToolFunction {
826                name: name.into(),
827                description: def["description"].as_str().unwrap().into(),
828                parameters: def["parameters"].clone(),
829            },
830            metadata: tool_metadata_for_name(name),
831        });
832    }
833
834    tools
835}
836
837pub async fn dispatch_builtin_tool(name: &str, args: &Value) -> Result<String, String> {
838    match name {
839        "shell" => crate::tools::shell::execute(args).await,
840        "run_code" => crate::tools::code_sandbox::execute(args).await,
841        "trace_runtime_flow" => crate::tools::runtime_trace::trace_runtime_flow(args).await,
842        "describe_toolchain" => crate::tools::toolchain::describe_toolchain(args).await,
843        "inspect_host" => crate::tools::host_inspect::inspect_host(args).await,
844        "resolve_host_issue" => crate::tools::host_inspect::resolve_host_issue(args).await,
845        "run_hematite_maintainer_workflow" => {
846            crate::tools::repo_script::run_hematite_maintainer_workflow(args).await
847        }
848        "run_workspace_workflow" => crate::tools::workspace_workflow::run_workspace_workflow(args).await,
849        "read_file" => crate::tools::file_ops::read_file(args).await,
850        "inspect_lines" => crate::tools::file_ops::inspect_lines(args).await,
851        "tail_file" => crate::tools::file_ops::tail_file(args).await,
852        "write_file" => crate::tools::file_ops::write_file(args).await,
853        "edit_file" => crate::tools::file_ops::edit_file(args).await,
854        "patch_hunk" => crate::tools::file_ops::patch_hunk(args).await,
855        "multi_search_replace" => crate::tools::file_ops::multi_search_replace(args).await,
856        "list_files" => crate::tools::file_ops::list_files(args).await,
857        "grep_files" => crate::tools::file_ops::grep_files(args).await,
858        "git_commit" => crate::tools::git::execute(args).await,
859        "git_push" => crate::tools::git::execute_push(args).await,
860        "git_remote" => crate::tools::git::execute_remote(args).await,
861        "git_onboarding" => crate::tools::git_onboarding::execute(args).await,
862        "verify_build" => crate::tools::verify_build::execute(args).await,
863        "git_worktree" => crate::tools::git::execute_worktree(args).await,
864        "health" => crate::tools::health::execute(args).await,
865        "research_web" => crate::tools::research::execute_search(args).await,
866        "fetch_docs" => crate::tools::research::execute_fetch(args).await,
867        "manage_tasks" => crate::tools::tasks::manage_tasks(args).await,
868        "maintain_plan" => crate::tools::plan::maintain_plan(args).await,
869        "generate_walkthrough" => crate::tools::plan::generate_walkthrough(args).await,
870        "clarify" => {
871            let q = args.get("question").and_then(|v| v.as_str()).unwrap_or("?");
872            Ok(format!("[clarify] {q}"))
873        }
874        "vision_analyze" => Err(
875            "Tool 'vision_analyze' must be dispatched by ConversationManager (it requires hardware engine access)."
876                .into(),
877        ),
878        other => {
879            if other.contains('.') || other.contains('/') || other.contains('\\') {
880                Err(format!(
881                    "'{}' is a PATH, not a tool. You correctly identified the location, but you MUST use `read_file` or `list_files` (internal) or `powershell` (external) to access it.",
882                    other
883                ))
884            } else if matches!(other.to_lowercase().as_str(), "hematite" | "assistant" | "ai") {
885                Err(format!(
886                    "'{}' is YOUR IDENTITY, not a tool. Use list_files or read_file to explore the codebase.",
887                    other
888                ))
889            } else if matches!(
890                other.to_lowercase().as_str(),
891                "thought" | "think" | "reasoning" | "thinking" | "internal"
892            ) {
893                Err(format!(
894                    "'{}' is NOT a tool - it is a reasoning tag. Output your answer as plain text after your <think> block.",
895                    other
896                ))
897            } else {
898                Err(format!("Unknown tool: '{}'", other))
899            }
900        }
901    }
902}