1use crate::agent::config::HematiteConfig;
2use crate::agent::inference::{tool_metadata_for_name, ToolDefinition, ToolFunction};
3use serde_json::Value;
4
5fn make_tool(name: &str, description: &str, parameters: Value) -> ToolDefinition {
6 ToolDefinition {
7 tool_type: "function".into(),
8 function: ToolFunction {
9 name: name.into(),
10 description: description.into(),
11 parameters,
12 },
13 metadata: tool_metadata_for_name(name),
14 }
15}
16
17pub fn get_tools() -> Vec<ToolDefinition> {
19 let os = std::env::consts::OS;
20 let mut tools = vec![
21 make_tool(
22 "shell",
23 &format!(
24 "Execute a command in the host shell ({os}). \
25 Use this ONLY for building, testing, or advanced system operations that have no dedicated Hematite tool. \
26 FORBIDDEN: Never use shell to run `mkdir`, `rm`, `cat`, `head`, `tail`, or `write-file` equivalents. \
27 Use the dedicated surgical tools (create_directory, read_file, tail_file) instead. \
28 Output is capped at 64KB. Prefer non-interactive commands."
29 ),
30 serde_json::json!({
31 "type": "object",
32 "properties": {
33 "command": {
34 "type": "string",
35 "description": "The command to run"
36 },
37 "reason": {
38 "type": "string",
39 "description": "For risky shell calls, explain what this command is verifying or changing."
40 },
41 "timeout_secs": {
42 "type": "integer",
43 "description": "Optional timeout in seconds (default 60)"
44 }
45 },
46 "required": ["command"]
47 }),
48 ),
49 make_tool(
50 "run_code",
51 "Execute a short JavaScript/TypeScript or Python snippet in a sandboxed subprocess. \
52 No network access, no filesystem escape, hard 10-second timeout. \
53 Use this to verify logic, test algorithms, compute values, or test functions \
54 when you need real output rather than a guess. \
55 ALWAYS include the `language` field — there is no default. \
56 \
57 JAVASCRIPT/TYPESCRIPT (language: \"javascript\"): \
58 Runs via Deno, NOT Node.js. `require()` does not exist — never use it. \
59 URL imports (e.g. from 'https://deno.land/...') are blocked — network is off. \
60 Use built-in Web APIs only: `crypto.subtle`, `TextEncoder`, `URL`, `atob`/`btoa`, etc. \
61 SHA-256 example: \
62 const buf = await crypto.subtle.digest('SHA-256', new TextEncoder().encode('hello')); \
63 console.log([...new Uint8Array(buf)].map(b=>b.toString(16).padStart(2,'0')).join('')); \
64 \
65 PYTHON (language: \"python\"): \
66 Standard library is available. `hashlib`, `json`, `math`, `datetime`, `re`, `itertools` all work. \
67 `subprocess`, `socket`, `urllib`, `requests` are blocked. \
68 SHA-256 example: import hashlib; print(hashlib.sha256(b'hello').hexdigest()) \
69 \
70 Do NOT use this tool for PowerShell or shell scripting. This is strictly for high-precision computation in JavaScript, TypeScript, or Python only. \
71 Do NOT fall back to shell to run deno, python, or node — use this tool directly.",
72 serde_json::json!({
73 "type": "object",
74 "properties": {
75 "language": {
76 "type": "string",
77 "enum": ["javascript", "typescript", "python"],
78 "description": "The language to run. javascript/typescript requires Deno; python requires Python 3."
79 },
80 "code": {
81 "type": "string",
82 "description": "The code to execute. Keep it short and self-contained. Print results to stdout."
83 },
84 "timeout_seconds": {
85 "type": "integer",
86 "description": "Max execution time in seconds (default 10, max 60). Use higher values for longer computations."
87 }
88 },
89 "required": ["language", "code"]
90 }),
91 ),
92
93 make_tool(
94 "trace_runtime_flow",
95 "Return an authoritative read-only trace of Hematite runtime flow. \
96 Use this for architecture questions about keyboard input to final output, \
97 reasoning/specular separation, startup wiring, runtime subsystems, \
98 voice synthesis and Ctrl+T toggle, or \
99 session reset commands like /clear, /new, and /forget. Prefer this over guessing.",
100 serde_json::json!({
101 "type": "object",
102 "properties": {
103 "topic": {
104 "type": "string",
105 "enum": ["user_turn", "session_reset", "reasoning_split", "runtime_subsystems", "startup", "voice"],
106 "description": "Which verified runtime report to return. Use 'voice' for any question about Ctrl+T, voice toggle, or TTS pipeline. Use 'user_turn' for keyboard-to-output flow. Use 'session_reset' for /clear, /forget, /new. Use 'startup' for startup wiring. Use 'reasoning_split' for specular/thought routing. Use 'runtime_subsystems' for background subsystem overview."
107 },
108 "input": {
109 "type": "string",
110 "description": "Optional user input to label a normal user-turn trace"
111 },
112 "command": {
113 "type": "string",
114 "enum": ["/clear", "/new", "/forget", "all"],
115 "description": "Optional reset command when topic=session_reset"
116 }
117 },
118 "required": ["topic"]
119 }),
120 ),
121 make_tool(
122 "describe_toolchain",
123 "Return an authoritative read-only description of Hematite's actual tool surface and investigation strategy. \
124 Use this for tooling-discipline questions, best-tool selection, or read-only plans for tracing runtime behavior. \
125 Prefer this over improvising tool names or investigation steps from memory.",
126 serde_json::json!({
127 "type": "object",
128 "properties": {
129 "topic": {
130 "type": "string",
131 "enum": ["read_only_codebase", "user_turn_plan", "voice_latency_plan", "host_inspection_plan", "all"],
132 "description": "Which authoritative toolchain report to return"
133 },
134 "question": {
135 "type": "string",
136 "description": "Optional user question to label or tailor the read-only investigation plan"
137 }
138 }
139 }),
140 ),
141 make_tool(
142 "inspect_host",
143 "Return a structured read-only inspection of the current machine and environment. \
144 Prefer this over raw shell for questions about OS configuration (firewall, power, uptime), plain-English system health reports, silicon health and high-fidelity hardware telemetry (NVIDIA clocks/fans/power, CPU frequency averaging), installed developer tools, PATH issues, package-manager and environment health, network state, service state, running processes, desktop items, Downloads size, listening ports, repo health, or directory/disk summaries. \
145 For high-performance hardware testing, use topic=disk_benchmark to measure real-time kernel disk queue intensity. \
146 For remediation questions phrased like 'how do I fix cargo not found', 'how do I fix port 3000 already in use', or 'how do I fix LM Studio not reachable', use topic=fix_plan instead of diagnosis-only topics like env_doctor, path, or ports. \
147 Use topic=summary for a compact host snapshot, topic=toolchains for common dev tool versions, topic=path for PATH analysis, topic=env_doctor for package-manager and PATH health, topic=fix_plan for structured remediation plans, topic=network for adapters/IPs/gateways/DNS, topic=services for service status and startup mode, \
148 topic=processes for top processes by memory/cpu and real-time disk/network I/O stats (look for [I/O R:N/W:N] tags to identify disk-heavy processes), \
149 topic=desktop or topic=downloads for known folders, topic=ports for listening endpoints, topic=repo_doctor for a structured workspace health report, \
150 topic=log_check for recent critical/error events from system event logs or journalctl, topic=startup_items for programs and services that run at boot (registry Run keys and startup folders on Windows; systemd enabled units on Linux), \
151 topic=health_report for a plain-English tiered system health verdict (disk, RAM, tools, recent errors), \
152 topic=storage for all drives with capacity/free space plus large developer cache directories, \
153 topic=hardware for CPU model/cores, RAM size/speed, GPU name/driver, motherboard, BIOS, and display configuration, \
154 topic=updates for Windows Update status (last install date, pending update count, WU service state), \
155 topic=security for Windows Defender real-time protection status, last scan date, signature age, firewall profile states, Windows activation, and UAC state, \
156 topic=pending_reboot to check whether a system restart is required and why (Windows Update, CBS, file rename operations), \
157 topic=disk_health for physical drive health via Get-PhysicalDisk and SMART failure prediction, \
158 topic=battery for charge level, status, estimated runtime, and wear level (laptops only — reports no battery on desktops), \
159 topic=recent_crashes for BSOD and unexpected shutdown events plus application crash/hang events from the Windows event log, \
160 topic=scheduled_tasks for all non-disabled scheduled tasks including name, path, last run time, and executable, \
161 topic=dev_conflicts for cross-tool environment conflict detection (Node.js version managers, Python 2 vs 3 ambiguity, conda env shadowing, Rust toolchain path conflicts, Git identity/signing config, duplicate PATH entries), \
162 topic=bitlocker for drive encryption status (BitLocker on Windows, LUKS on Linux), \
163 topic=ad_user for Active Directory / Managed Identity details (SID, group memberships, domain role), \
164 topic=user_accounts for Local User and Group diagnostics (Built-in Administrators, local account state), \
165 topic=rdp for Remote Desktop configuration, port, and active sessions, \
166 topic=shadow_copies for Volume Shadow Copies (VSS) and system restore points, \
167 topic=pagefile for Windows page file configuration and current usage, \
168 topic=windows_features for enabled Windows optional features (IIS, Hyper-V, etc.), \
169 topic=printers for installed printers and active print jobs, \
170 topic=winrm for Windows Remote Management (WinRM) and PS Remoting status, \
171 topic=network_stats for adapter throughput (RX/TX), errors, and dropped packets, \
172 topic=udp_ports for active UDP listeners and notable port annotations, \
173 topic=gpo for applied Group Policy Objects, topic=certificates for local personal certificates, topic=integrity for Windows component store health (SFC/DISM state), topic=domain for Active Directory and domain join status, \
174 topic=device_health for identifying malfunctioning hardware with ConfigManager error codes (Yellow Bangs), topic=drivers for auditing active system drivers and their states, topic=peripherals for enumerating connected USB, input, and display hardware, \
175 topic=sessions for auditing active and disconnected user logon sessions, \
176 topic=ad_user for specific Active Directory user identity, SID, and group membership auditing, \
177 topic=dns_lookup for precision DNS record queries (SRV, MX, TXT), \
178 topic=hyperv for local Hyper-V VM inventory and real-time load, \
179 topic=ip_config for detailed adapter configuration and DHCP lease state, \
180 topic=disk_benchmark for high-performance silicon-aware stress testing, \
181 and topic=directory or topic=disk for arbitrary paths.",
182 serde_json::json!({
183 "type": "object",
184 "properties": {
185 "topic": {
186 "type": "string",
187 "enum": ["summary", "toolchains", "path", "env_doctor", "fix_plan", "network", "services", "processes", "desktop", "downloads", "directory", "disk", "ports", "repo_doctor", "log_check", "startup_items", "health_report", "storage", "hardware", "updates", "security", "pending_reboot", "disk_health", "battery", "recent_crashes", "scheduled_tasks", "dev_conflicts", "os_config", "bitlocker", "rdp", "shadow_copies", "pagefile", "windows_features", "printers", "winrm", "network_stats", "udp_ports", "gpo", "certificates", "integrity", "domain", "device_health", "drivers", "peripherals", "disk_benchmark", "permissions", "login_history", "registry_audit", "share_access", "thermal", "activation", "patch_history", "ad_user", "dns_lookup", "hyperv", "ip_config"],
188 "description": "Which structured host inspection to run. Use topic=ad_user for domain identity audit, topic=dns_lookup for SRV/MX records, topic=hyperv for VM load, and topic=ip_config for detailed adapter info."
189 },
190 "name": {
191 "type": "string",
192 "description": "Optional when topic=processes or topic=services. Case-insensitive substring filter for process or service names."
193 },
194 "issue": {
195 "type": "string",
196 "description": "Optional when topic=fix_plan. Plain-English issue description such as 'cargo not found', 'port 3000 already in use', or 'LM Studio not reachable on localhost:1234'."
197 },
198 "path": {
199 "type": "string",
200 "description": "Required when topic=directory. Optional for topic=disk or topic=repo_doctor. Absolute or relative path to inspect."
201 },
202 "port": {
203 "type": "integer",
204 "description": "Optional when topic=ports or topic=fix_plan. Filter the result to one listening TCP port or anchor a port-conflict fix plan."
205 },
206 "max_entries": {
207 "type": "integer",
208 "description": "Optional cap for listed entries. Defaults to 10 and is capped internally."
209 }
210 }
211 }),
212 ),
213 make_tool(
214 "resolve_host_issue",
215 "A safe, bounded tool for remediating OS and environment issues automatically with user approval. \
216 Use this to fix missing dependencies, restart stuck services, or clear disk space instead of using raw shell. \
217 The user will be prompted to approve the action. Keep targets exact.",
218 serde_json::json!({
219 "type": "object",
220 "properties": {
221 "action": {
222 "type": "string",
223 "enum": ["install_package", "restart_service", "clear_temp"],
224 "description": "The type of remediation to perform."
225 },
226 "target": {
227 "type": "string",
228 "description": "The specific target (e.g., 'python' for install_package, or 'docker' for restart_service). Optional for clear_temp."
229 }
230 },
231 "required": ["action"]
232 }),
233 ),
234 make_tool(
235 "run_hematite_maintainer_workflow",
236 "Run one of Hematite's known maintainer or release workflows with explicit approval. \
237 Prefer this over raw shell when the user explicitly asks to run one of Hematite's own scripts such as `clean.ps1`, `scripts/package-windows.ps1`, or `release.ps1`. \
238 Use workflow=clean for cleanup, workflow=package_windows for rebuilding the local Windows portable or installer, and workflow=release for the normal version bump/tag/push/publish flow. \
239 Keep this tool constrained to Hematite's own known workflows instead of inventing ad hoc shell commands or pretending to run arbitrary project scripts.",
240 serde_json::json!({
241 "type": "object",
242 "properties": {
243 "workflow": {
244 "type": "string",
245 "enum": ["clean", "package_windows", "release"],
246 "description": "Which known Hematite maintainer workflow to run."
247 },
248 "deep": {
249 "type": "boolean",
250 "description": "For workflow=clean. Also remove heavy build/runtime artifacts such as target/ and vein.db."
251 },
252 "reset": {
253 "type": "boolean",
254 "description": "For workflow=clean. Reset PLAN/TASK state in addition to normal cleanup."
255 },
256 "prune_dist": {
257 "type": "boolean",
258 "description": "For workflow=clean. Keep only the current Cargo.toml version under dist/."
259 },
260 "installer": {
261 "type": "boolean",
262 "description": "For workflow=package_windows. Also build the Windows installer."
263 },
264 "add_to_path": {
265 "type": "boolean",
266 "description": "For workflow=package_windows or workflow=release. Update the user PATH to the rebuilt portable."
267 },
268 "version": {
269 "type": "string",
270 "description": "For workflow=release. Exact semantic version such as 0.4.5."
271 },
272 "bump": {
273 "type": "string",
274 "enum": ["patch", "minor", "major"],
275 "description": "For workflow=release. Ask release.ps1 to calculate the next version."
276 },
277 "push": {
278 "type": "boolean",
279 "description": "For workflow=release. Push main and the new tag."
280 },
281 "skip_installer": {
282 "type": "boolean",
283 "description": "For workflow=release. Skip the Windows installer build."
284 },
285 "publish_crates": {
286 "type": "boolean",
287 "description": "For workflow=release. Publish hematite-cli to crates.io after a successful push."
288 },
289 "publish_voice_crate": {
290 "type": "boolean",
291 "description": "For workflow=release. Publish hematite-kokoros first, then hematite-cli."
292 }
293 },
294 "required": ["workflow"]
295 }),
296 ),
297 make_tool(
298 "run_workspace_workflow",
299 "Run an approval-gated workflow or script in the locked project workspace root. \
300 Use this for the current project's build, test, lint, fix, package.json scripts, just/task/make targets, explicit local script paths, exact workspace commands, or typed website server control. \
301 Website workflows are preferred when working on a local web app because they give Hematite a structured start/probe/validate/status/stop loop with stored runtime metadata instead of improvised shell. \
302 FORBIDDEN: The `command` field MUST be a real executable shell command (e.g. `npm install`, `cargo build`). \
303 NEVER put natural language, user-requests, or conversational intent into the `command` field. \
304 This tool is for the active workspace, not for Hematite's own maintainer scripts.",
305 serde_json::json!({
306 "type": "object",
307 "properties": {
308 "workflow": {
309 "type": "string",
310 "enum": ["build", "test", "lint", "fix", "package_script", "task", "just", "make", "script_path", "command", "website_start", "website_probe", "website_validate", "website_status", "website_stop"],
311 "description": "Which workspace workflow to run."
312 },
313 "name": {
314 "type": "string",
315 "description": "Required for workflow=package_script, task, just, or make. The script or target name."
316 },
317 "path": {
318 "type": "string",
319 "description": "Required for workflow=script_path. Relative path to a script inside the locked workspace root."
320 },
321 "command": {
322 "type": "string",
323 "description": "Required for workflow=command. Exact command to execute from the locked workspace root."
324 },
325 "mode": {
326 "type": "string",
327 "enum": ["dev", "preview", "start"],
328 "description": "Optional for workflow=website_start. Which website server mode to infer. Defaults to dev."
329 },
330 "script": {
331 "type": "string",
332 "description": "Optional for workflow=website_start. Exact package.json script to run instead of inferring one."
333 },
334 "url": {
335 "type": "string",
336 "description": "Optional for workflow=website_start, website_probe, or website_validate. Explicit local URL to probe, such as http://127.0.0.1:5173/."
337 },
338 "host": {
339 "type": "string",
340 "description": "Optional for workflow=website_start. Host used when constructing an inferred probe URL. Defaults to 127.0.0.1."
341 },
342 "port": {
343 "type": "integer",
344 "description": "Optional for workflow=website_start. Port used when constructing an inferred probe URL."
345 },
346 "label": {
347 "type": "string",
348 "description": "Optional for website workflows. Logical server name for storing runtime metadata. Defaults to default."
349 },
350 "routes": {
351 "type": "array",
352 "items": { "type": "string" },
353 "description": "Optional for workflow=website_validate. Relative routes or absolute URLs to validate, such as [\"/\", \"/pricing\", \"/about\"]."
354 },
355 "asset_limit": {
356 "type": "integer",
357 "description": "Optional for workflow=website_validate. Maximum number of linked local assets to probe after route validation."
358 },
359 "request_timeout_ms": {
360 "type": "integer",
361 "description": "Optional for workflow=website_start. Per-request HTTP timeout used by the readiness probe."
362 },
363 "timeout_ms": {
364 "type": "integer",
365 "description": "Optional timeout override in milliseconds. For website_start this is the boot/readiness timeout. For website_probe and website_status it is the probe timeout."
366 }
367 },
368 "required": ["workflow"]
369 }),
370 ),
371 make_tool(
372 "read_file",
373 "Read the contents of a file. For large files, use 'offset' and 'limit' to navigate.",
374 serde_json::json!({
375 "type": "object",
376 "properties": {
377 "path": {
378 "type": "string",
379 "description": "Path to the file, relative to the project root"
380 },
381 "offset": {
382 "type": "integer",
383 "description": "Starting line number (0-indexed)"
384 },
385 "limit": {
386 "type": "integer",
387 "description": "Number of lines to read"
388 }
389 },
390 "required": ["path"]
391 }),
392 ),
393 make_tool(
394 "lsp_definitions",
395 "Get the precise definition location (file:line:char) for a symbol at a specific position. \
396 Use this to jump to function/struct source code accurately.",
397 serde_json::json!({
398 "type": "object",
399 "properties": {
400 "path": { "type": "string", "description": "File path" },
401 "line": { "type": "integer", "description": "0-indexed line" },
402 "character": { "type": "integer", "description": "0-indexed character" }
403 },
404 "required": ["path", "line", "character"]
405 }),
406 ),
407 make_tool(
408 "lsp_references",
409 "Find all locations where a symbol is used across the entire workspace. \
410 Use this to understand the impact of a refactor or discover internal API users.",
411 serde_json::json!({
412 "type": "object",
413 "properties": {
414 "path": { "type": "string", "description": "File path" },
415 "line": { "type": "integer", "description": "0-indexed line" },
416 "character": { "type": "integer", "description": "0-indexed character" }
417 },
418 "required": ["path", "line", "character"]
419 }),
420 ),
421 make_tool(
422 "lsp_hover",
423 "Get hover information (documentation, function signature, type details) for a symbol. \
424 Use this for rapid spatial awareness without opening every file.",
425 serde_json::json!({
426 "type": "object",
427 "properties": {
428 "path": { "type": "string", "description": "File path" },
429 "line": { "type": "integer", "description": "0-indexed line" },
430 "character": { "type": "integer", "description": "0-indexed character" }
431 },
432 "required": ["path", "line", "character"]
433 }),
434 ),
435 make_tool(
436 "lsp_rename_symbol",
437 "Rename a symbol project-wide using the Language Server. Ensures all references are updated safely.",
438 serde_json::json!({
439 "type": "object",
440 "properties": {
441 "path": { "type": "string", "description": "File path" },
442 "line": { "type": "integer", "description": "0-indexed line" },
443 "character": { "type": "integer", "description": "0-indexed character" },
444 "new_name": { "type": "string", "description": "The new name for the symbol" }
445 },
446 "required": ["path", "line", "character", "new_name"]
447 }),
448 ),
449 make_tool(
450 "lsp_get_diagnostics",
451 "Get a list of current compiler errors and warnings for a specific file. \
452 Use this to verify your code compiles and and to find exactly where errors are located.",
453 serde_json::json!({
454 "type": "object",
455 "properties": {
456 "path": { "type": "string", "description": "File path" }
457 },
458 "required": ["path"]
459 }),
460 ),
461 make_tool(
462 "vision_analyze",
463 "Send an image file (screenshot, diagram, or UI mockup) to the multimodal vision model for technical analysis. \
464 Use this to identify UI bugs, confirm visual states, or understand architectural diagrams.",
465 serde_json::json!({
466 "type": "object",
467 "properties": {
468 "path": { "type": "string", "description": "Absolute or relative path to the image file." },
469 "prompt": { "type": "string", "description": "The specific question or analysis request for the vision model." }
470 },
471 "required": ["path", "prompt"]
472 }),
473 ),
474 make_tool(
475 "patch_hunk",
476 "Replace a specific line range [start_line, end_line] with new content. \
477 This is the most precise way to edit code and avoids search string failures.",
478 serde_json::json!({
479 "type": "object",
480 "properties": {
481 "path": { "type": "string", "description": "File path" },
482 "start_line": { "type": "integer", "description": "Starting line (1-indexed)" },
483 "end_line": { "type": "integer", "description": "Ending line (inclusive)" },
484 "replacement": { "type": "string", "description": "The new content for this range" }
485 },
486 "required": ["path", "start_line", "end_line", "replacement"]
487 }),
488 ),
489 make_tool(
490 "multi_search_replace",
491 "Replace multiple existing code blocks in a single file with new content. \
492 Each hunk specifies an EXACT 'search' string and a 'replace' string. \
493 The 'search' string MUST exactly match the existing file contents (including whitespace). \
494 This is the safest and most reliable way to make multiple structural edits.",
495 serde_json::json!({
496 "type": "object",
497 "properties": {
498 "path": { "type": "string", "description": "File path" },
499 "hunks": {
500 "type": "array",
501 "items": {
502 "type": "object",
503 "properties": {
504 "search": { "type": "string", "description": "Exact existing text to find and replace" },
505 "replace": { "type": "string", "description": "The new replacement text" }
506 },
507 "required": ["search", "replace"]
508 }
509 }
510 },
511 "required": ["path", "hunks"]
512 }),
513 ),
514 make_tool(
515 "write_file",
516 "Write content to a file, creating it (and any parent dirs) if needed. \
517 Overwrites existing files. \
518 SOVEREIGN PATHING: For files in common areas, use `@DESKTOP/file.txt`, `@DOCUMENTS/file.txt`, `@DOWNLOADS/file.txt`, or `@HOME/file.txt` to ensure 100% path accuracy.",
519 serde_json::json!({
520 "type": "object",
521 "properties": {
522 "path": { "type": "string", "description": "File path" },
523 "content": { "type": "string", "description": "Full file content to write" }
524 },
525 "required": ["path", "content"]
526 }),
527 ),
528 make_tool(
529 "create_directory",
530 "Authoritatively create a new directory (and any parent dirs) if they do not exist. \
531 Use this instead of raw shell (mkdir) for all filesystem organization. \
532 Supports both relative paths and absolute paths. \
533 SOVEREIGN PATHING: For directories in common areas, use `@DESKTOP/folder`, `@DOCUMENTS/folder`, `@DOWNLOADS/folder`, or `@HOME/folder` to ensure 100% path accuracy.",
534 serde_json::json!({
535 "type": "object",
536 "properties": {
537 "path": { "type": "string", "description": "Relative or absolute directory path" }
538 },
539 "required": ["path"]
540 }),
541 ),
542 make_tool(
543 "research_web",
544 "Perform a zero-cost technical search using DuckDuckGo. \
545 Use this to find documentation, latest API changes, or solutions to complex errors \
546 when your internal knowledge is insufficient. Returns snippets and URLs.",
547 serde_json::json!({
548 "type": "object",
549 "properties": {
550 "query": { "type": "string", "description": "The technical search query" }
551 },
552 "required": ["query"]
553 }),
554 ),
555 make_tool(
556 "fetch_docs",
557 "Fetch a URL and convert it to clean Markdown. Use this to 'read' the documentation \
558 links found via research_web. This tool uses a proxy to bypass IP blocks.",
559 serde_json::json!({
560 "type": "object",
561 "properties": {
562 "url": { "type": "string", "description": "The URL of the documentation to fetch" }
563 },
564 "required": ["url"]
565 }),
566 ),
567 make_tool(
568 "edit_file",
569 "Edit a file by replacing an exact string with another. \
570 The 'search' string does NOT need perfectly matching indentation (it is fuzzy), \
571 but the non-whitespace text must match exactly. Use this for targeted edits.",
572 serde_json::json!({
573 "type": "object",
574 "properties": {
575 "path": { "type": "string", "description": "File path" },
576 "search": {
577 "type": "string",
578 "description": "The exact text to find (must match whitespace/indentation precisely)"
579 },
580 "replace": {
581 "type": "string",
582 "description": "The replacement text"
583 }
584 },
585 "required": ["path", "search", "replace"]
586 }),
587 ),
588 make_tool(
589 "auto_pin_context",
590 "Select 1-3 core files to 'Lock' into prioritized memory. \
591 Use this to ensure the most important architecture files \
592 are always visible during complex refactorings.",
593 serde_json::json!({
594 "type": "object",
595 "properties": {
596 "paths": {
597 "type": "array",
598 "items": { "type": "string" }
599 },
600 "reason": { "type": "string" }
601 },
602 "required": ["paths", "reason"]
603 }),
604 ),
605 make_tool(
606 "list_pinned",
607 "List all files currently pinned in the model's active context.",
608 serde_json::json!({
609 "type": "object",
610 "properties": {}
611 }),
612 ),
613 make_tool(
614 "list_files",
615 "List files in a directory, optionally filtered by extension.",
616 serde_json::json!({
617 "type": "object",
618 "properties": {
619 "path": {
620 "type": "string",
621 "description": "Directory to list (default: current dir)"
622 },
623 "extension": {
624 "type": "string",
625 "description": "Only return files with this extension, e.g. 'rs', 'toml' (no dot)"
626 }
627 },
628 "required": []
629 }),
630 ),
631 make_tool(
632 "tail_file",
633 "Read the last N lines of a file — useful for log files, test output, \
634 build artifacts, and any large file where only the tail is relevant. \
635 Supports an optional grep filter to show only matching lines from the tail. \
636 Use this instead of read_file when you only need the end of a large file.",
637 serde_json::json!({
638 "type": "object",
639 "properties": {
640 "path": {
641 "type": "string",
642 "description": "Path to the file, relative to the project root"
643 },
644 "lines": {
645 "type": "integer",
646 "description": "Number of lines to return from the end (default: 50, max: 500)"
647 },
648 "grep": {
649 "type": "string",
650 "description": "Optional regex pattern — only return lines matching this pattern (applied before the tail slice)"
651 }
652 },
653 "required": ["path"]
654 }),
655 ),
656 make_tool(
657 "grep_files",
658 "Search file contents for a regex pattern. Supports context lines, files-only mode, \
659 and pagination. Returns file:line:content format by default.",
660 serde_json::json!({
661 "type": "object",
662 "properties": {
663 "pattern": {
664 "type": "string",
665 "description": "Regex pattern to search for (case-insensitive by default)"
666 },
667 "path": {
668 "type": "string",
669 "description": "Directory to search (default: current dir)"
670 },
671 "extension": {
672 "type": "string",
673 "description": "Only search files with this extension, e.g. 'rs'"
674 },
675 "mode": {
676 "type": "string",
677 "enum": ["content", "files_only"],
678 "description": "'content' (default) returns matching lines; 'files_only' returns only filenames"
679 },
680 "context": {
681 "type": "integer",
682 "description": "Lines of context before AND after each match (like rg -C)"
683 },
684 "before": {
685 "type": "integer",
686 "description": "Lines of context before each match (overrides context)"
687 },
688 "after": {
689 "type": "integer",
690 "description": "Lines of context after each match (overrides context)"
691 },
692 "head_limit": {
693 "type": "integer",
694 "description": "Max hunks (or files in files_only) to return (default: 50)"
695 },
696 "offset": {
697 "type": "integer",
698 "description": "Skip first N hunks/files - for pagination (default: 0)"
699 }
700 },
701 "required": ["pattern"]
702 }),
703 ),
704 make_tool(
705 "git_commit",
706 "Stage all changes (git add -A) and create a commit. You MUST use 'Conventional Commits' (e.g. 'feat: description').",
707 serde_json::json!({
708 "type": "object",
709 "properties": {
710 "message": { "type": "string", "description": "Commit message (Conventional Commit style)" }
711 },
712 "required": ["message"]
713 }),
714 ),
715 make_tool(
716 "git_push",
717 "Push current branched changes to the remote origin. Requires an existing remote connection.",
718 serde_json::json!({
719 "type": "object",
720 "properties": {},
721 "required": []
722 }),
723 ),
724 make_tool(
725 "git_remote",
726 "View or manage git remotes. Use this for onboarding to GitHub/GitLab services.",
727 serde_json::json!({
728 "type": "object",
729 "properties": {
730 "action": {
731 "type": "string",
732 "enum": ["list", "add", "remove"],
733 "description": "Operation to perform"
734 },
735 "name": { "type": "string", "description": "Remote name (e.g. origin)" },
736 "url": { "type": "string", "description": "Remote URL (for 'add' action)" }
737 },
738 "required": ["action"]
739 }),
740 ),
741 make_tool(
742 "git_onboarding",
743 "High-level wizard to connect this repository to a remote host (GitHub/GitLab). \
744 Handles adding the remote and performing the initial tracking push in one step.",
745 serde_json::json!({
746 "type": "object",
747 "properties": {
748 "url": { "type": "string", "description": "The remote repository URL (HTTPS or SSH)" },
749 "name": { "type": "string", "description": "The remote name (default: origin)" },
750 "push": { "type": "boolean", "description": "Whether to perform an initial push to establish tracking (default: false)" }
751 },
752 "required": ["url"]
753 }),
754 ),
755 make_tool(
756 "verify_build",
757 "Run project verification for build, test, lint, or fix workflows. \
758 Prefer per-project verify profiles from `.hematite/settings.json`, and fall back to \
759 auto-detected defaults when no profile is configured. Returns BUILD OK or BUILD FAILED \
760 with command output. ALWAYS call this after scaffolding a new project or making structural changes.",
761 serde_json::json!({
762 "type": "object",
763 "properties": {
764 "action": {
765 "type": "string",
766 "enum": ["build", "test", "lint", "fix"],
767 "description": "Which verification action to run. Defaults to build."
768 },
769 "profile": {
770 "type": "string",
771 "description": "Optional named verify profile from `.hematite/settings.json`."
772 },
773 "timeout_secs": {
774 "type": "integer",
775 "description": "Optional timeout override for this verification run."
776 }
777 }
778 }),
779 ),
780 make_tool(
781 "git_worktree",
782 "Manage Git worktrees - isolated working directories on separate branches. \
783 Use 'add' to create a safe sandbox for risky/experimental work, \
784 'list' to see all worktrees, 'remove' to clean up, 'prune' to remove stale entries.",
785 serde_json::json!({
786 "type": "object",
787 "properties": {
788 "action": {
789 "type": "string",
790 "enum": ["list", "add", "remove", "prune"],
791 "description": "Worktree operation to perform"
792 },
793 "path": {
794 "type": "string",
795 "description": "Directory path for the new worktree (required for add/remove)"
796 },
797 "branch": {
798 "type": "string",
799 "description": "Branch name for the worktree (add only; defaults to path basename)"
800 }
801 },
802 "required": ["action"]
803 }),
804 ),
805 make_tool(
806 "clarify",
807 "Ask the user a clarifying question when you genuinely cannot proceed without \
808 more information. Use this ONLY when you are blocked and cannot make a \
809 reasonable assumption. Do NOT use it to ask permission - just act.",
810 serde_json::json!({
811 "type": "object",
812 "properties": {
813 "question": {
814 "type": "string",
815 "description": "The specific question to ask the user"
816 }
817 },
818 "required": ["question"]
819 }),
820 ),
821 make_tool(
822 "manage_tasks",
823 "Manage the persistent task ledger in .hematite/TASK.md. Use this to track long-term goals across restarts.",
824 crate::tools::tasks::get_tasks_params(),
825 ),
826 make_tool(
827 "maintain_plan",
828 "Document the architectural strategy and session blueprint in .hematite/PLAN.md. Use this to maintain context across restarts.",
829 crate::tools::plan::get_plan_params(),
830 ),
831 make_tool(
832 "generate_walkthrough",
833 "Generate a final session report in .hematite/WALKTHROUGH.md including achievements and verification results.",
834 crate::tools::plan::get_walkthrough_params(),
835 ),
836 make_tool(
837 "swarm",
838 "Delegate high-volume parallel tasks to a swarm of background workers. \
839 Use this for large-scale refactors, multi-file research, or parallel documentation updates. \
840 You must provide a 'tasks' array where each task has an 'id', 'target' (file), and 'instruction'.",
841 serde_json::json!({
842 "type": "object",
843 "properties": {
844 "tasks": {
845 "type": "array",
846 "items": {
847 "type": "object",
848 "properties": {
849 "id": { "type": "string" },
850 "target": { "type": "string", "description": "Target file or directory" },
851 "instruction": { "type": "string", "description": "Specific task for this worker" }
852 },
853 "required": ["id", "target", "instruction"]
854 }
855 },
856 "max_workers": {
857 "type": "integer",
858 "description": "Max parallel workers (default 3, auto-throttled by hardware)",
859 "default": 3
860 }
861 },
862 "required": ["tasks"]
863 }),
864 ),
865 ];
866
867 let lsp_defs = crate::tools::lsp_tools::get_lsp_definitions();
868 tools.push(make_tool(
869 "lsp_search_symbol",
870 "Find the location (file/line) of any function, struct, or variable in the entire project workspace. \
871 This is the fastest 'Golden Path' for navigating to a symbol by name.",
872 serde_json::json!({
873 "type": "object",
874 "properties": {
875 "query": { "type": "string", "description": "The name of the symbol to find (e.g. 'initialize_mcp')" }
876 },
877 "required": ["query"]
878 }),
879 ));
880 for def in lsp_defs {
881 let name = def["name"].as_str().unwrap();
882 tools.push(ToolDefinition {
883 tool_type: "function".into(),
884 function: ToolFunction {
885 name: name.into(),
886 description: def["description"].as_str().unwrap().into(),
887 parameters: def["parameters"].clone(),
888 },
889 metadata: tool_metadata_for_name(name),
890 });
891 }
892
893 tools
894}
895
896pub async fn dispatch_builtin_tool(
897 name: &str,
898 args: &Value,
899 config: &HematiteConfig,
900) -> Result<String, String> {
901 match name {
902 "shell" => crate::tools::shell::execute(args).await,
903 "run_code" => crate::tools::code_sandbox::execute(args).await,
904 "trace_runtime_flow" => crate::tools::runtime_trace::trace_runtime_flow(args).await,
905 "describe_toolchain" => crate::tools::toolchain::describe_toolchain(args).await,
906 "inspect_host" => crate::tools::host_inspect::inspect_host(args).await,
907 "resolve_host_issue" => crate::tools::host_inspect::resolve_host_issue(args).await,
908 "run_hematite_maintainer_workflow" => {
909 crate::tools::repo_script::run_hematite_maintainer_workflow(args).await
910 }
911 "run_workspace_workflow" => crate::tools::workspace_workflow::run_workspace_workflow(args).await,
912 "read_file" => crate::tools::file_ops::read_file(args).await,
913 "inspect_lines" => crate::tools::file_ops::inspect_lines(args).await,
914 "tail_file" => crate::tools::file_ops::tail_file(args).await,
915 "write_file" => crate::tools::file_ops::write_file(args).await,
916 "create_directory" => crate::tools::file_ops::create_directory(args).await,
917 "edit_file" => crate::tools::file_ops::edit_file(args).await,
918 "patch_hunk" => crate::tools::file_ops::patch_hunk(args).await,
919 "multi_search_replace" => crate::tools::file_ops::multi_search_replace(args).await,
920 "list_files" => crate::tools::file_ops::list_files(args).await,
921 "grep_files" => crate::tools::file_ops::grep_files(args).await,
922 "git_commit" => crate::tools::git::execute(args).await,
923 "git_push" => crate::tools::git::execute_push(args).await,
924 "git_remote" => crate::tools::git::execute_remote(args).await,
925 "git_onboarding" => crate::tools::git_onboarding::execute(args).await,
926 "verify_build" => crate::tools::verify_build::execute(args).await,
927 "git_worktree" => crate::tools::git::execute_worktree(args).await,
928 "health" => crate::tools::health::execute(args).await,
929 "research_web" => {
930 crate::tools::research::execute_search(args, config.searx_url.clone()).await
931 }
932 "fetch_docs" => crate::tools::research::execute_fetch(args).await,
933 "manage_tasks" => crate::tools::tasks::manage_tasks(args).await,
934 "maintain_plan" => crate::tools::plan::maintain_plan(args).await,
935 "generate_walkthrough" => crate::tools::plan::generate_walkthrough(args).await,
936 "clarify" => {
937 let q = args.get("question").and_then(|v| v.as_str()).unwrap_or("?");
938 Ok(format!("[clarify] {q}"))
939 }
940 "vision_analyze" => Err(
941 "Tool 'vision_analyze' must be dispatched by ConversationManager (it requires hardware engine access)."
942 .into(),
943 ),
944 other => {
945 if other.contains('.') || other.contains('/') || other.contains('\\') {
946 Err(format!(
947 "'{}' is a PATH, not a tool. You correctly identified the location, but you MUST use `read_file` or `list_files` (internal) or `powershell` (external) to access it.",
948 other
949 ))
950 } else if matches!(other.to_lowercase().as_str(), "hematite" | "assistant" | "ai") {
951 Err(format!(
952 "'{}' is YOUR IDENTITY, not a tool. Use list_files or read_file to explore the codebase.",
953 other
954 ))
955 } else if matches!(
956 other.to_lowercase().as_str(),
957 "thought" | "think" | "reasoning" | "thinking" | "internal"
958 ) {
959 Err(format!(
960 "'{}' is NOT a tool - it is a reasoning tag. Output your answer as plain text after your <think> block.",
961 other
962 ))
963 } else {
964 Err(format!("Unknown tool: '{}'", other))
965 }
966 }
967 }
968}
969
970pub fn get_mutation_label(name: &str, args: &Value) -> Option<String> {
971 match name {
972 "shell" => {
973 let cmd = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
974 if cmd.contains("rm ") || cmd.contains("del ") {
975 Some("Destructive File Deletion".into())
976 } else if cmd.contains("mkdir ") {
977 Some("Directory Creation".into())
978 } else {
979 Some("Execute Shell Command".into())
980 }
981 }
982 "write_file" => {
983 let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
984 Some(format!("Create/Overwrite File: {}", path))
985 }
986 "create_directory" => {
987 let path = args
988 .get("path")
989 .and_then(|v| v.as_str())
990 .unwrap_or("folder");
991 Some(format!("Create Directory: {}", path))
992 }
993 "edit_file" | "patch_hunk" | "multi_search_replace" => {
994 let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
995 Some(format!("Surgical Code Mutation: {}", path))
996 }
997 "git_commit" => Some("Permanent Version History Commit".into()),
998 "git_push" => Some("Remote Origin Synchronisation (Push)".into()),
999 "resolve_host_issue" => Some("System-Level Host Remediation".into()),
1000 "run_workspace_workflow" => Some("Automated Workspace Re-alignment".into()),
1001 _ => None,
1002 }
1003}