1use crate::agent::config::HematiteConfig;
2use crate::agent::inference::tool_metadata_for_name;
3use crate::agent::types::{ToolDefinition, ToolFunction};
4use serde_json::Value;
5
6fn make_tool(name: &str, description: &str, parameters: Value) -> ToolDefinition {
7 ToolDefinition {
8 tool_type: "function".into(),
9 function: ToolFunction {
10 name: name.into(),
11 description: description.into(),
12 parameters,
13 },
14 metadata: tool_metadata_for_name(name),
15 }
16}
17
18pub fn get_tools() -> Vec<ToolDefinition> {
20 let os = std::env::consts::OS;
21 let mut tools = vec![
22 make_tool(
23 "shell",
24 &format!(
25 "Execute a command in the host shell ({os}). \
26 Use this ONLY for building, testing, or advanced system operations that have no dedicated Hematite tool. \
27 FORBIDDEN: Never use shell to run `mkdir`, `rm`, `cat`, `head`, `tail`, or `write-file` equivalents. \
28 Use the dedicated surgical tools (create_directory, read_file, tail_file) instead. \
29 Output is capped at 64KB. Prefer non-interactive commands."
30 ),
31 serde_json::json!({
32 "type": "object",
33 "properties": {
34 "command": {
35 "type": "string",
36 "description": "The command to run"
37 },
38 "reason": {
39 "type": "string",
40 "description": "For risky shell calls, explain what this command is verifying or changing."
41 },
42 "timeout_secs": {
43 "type": "integer",
44 "description": "Optional timeout in seconds (default 60)"
45 }
46 },
47 "required": ["command"]
48 }),
49 ),
50 make_tool(
51 "run_code",
52 "Execute a short JavaScript/TypeScript or Python snippet in a sandboxed subprocess. \
53 No network access, no filesystem escape, hard 10-second timeout. \
54 Use this to verify logic, test algorithms, compute values, or test functions \
55 when you need real output rather than a guess. \
56 ALWAYS include the `language` field — there is no default. \
57 \
58 JAVASCRIPT/TYPESCRIPT (language: \"javascript\"): \
59 Runs via Deno, NOT Node.js. `require()` does not exist — never use it. \
60 URL imports (e.g. from 'https://deno.land/...') are blocked — network is off. \
61 Use built-in Web APIs only: `crypto.subtle`, `TextEncoder`, `URL`, `atob`/`btoa`, etc. \
62 SHA-256 example: \
63 const buf = await crypto.subtle.digest('SHA-256', new TextEncoder().encode('hello')); \
64 console.log([...new Uint8Array(buf)].map(b=>b.toString(16).padStart(2,'0')).join('')); \
65 \
66 PYTHON (language: \"python\"): \
67 Standard library is available. `hashlib`, `json`, `math`, `datetime`, `re`, `itertools` all work. \
68 `subprocess`, `socket`, `urllib`, `requests` are blocked. \
69 SHA-256 example: import hashlib; print(hashlib.sha256(b'hello').hexdigest()) \
70 \
71 Do NOT use this tool for PowerShell or shell scripting. This is strictly for high-precision computation in JavaScript, TypeScript, or Python only. \
72 Do NOT fall back to shell to run deno, python, or node — use this tool directly.",
73 serde_json::json!({
74 "type": "object",
75 "properties": {
76 "language": {
77 "type": "string",
78 "enum": ["javascript", "typescript", "python"],
79 "description": "The language to run. javascript/typescript requires Deno; python requires Python 3."
80 },
81 "code": {
82 "type": "string",
83 "description": "The code to execute. Keep it short and self-contained. Print results to stdout."
84 },
85 "timeout_seconds": {
86 "type": "integer",
87 "description": "Max execution time in seconds (default 10, max 60). Use higher values for longer computations."
88 }
89 },
90 "required": ["language", "code"]
91 }),
92 ),
93 make_tool(
94 "query_data",
95 "Execute an analytical SQL query against a local file (CSV, JSON, or SQLite .db) using SQLite semantics. \
96 Use this for high-precision data analysis, aggregation, and filtering without writing custom scripts. \
97 For CSV and JSON files, the table name is always 'source'. \
98 For SQLite (.db) files, use the actual table names defined in the schema. \
99 Results are returned as a formatted table (max 100 rows).",
100 serde_json::json!({
101 "type": "object",
102 "properties": {
103 "sql": { "type": "string", "description": "The SQL query to run (e.g. SELECT count(*), category FROM source GROUP BY category;)" },
104 "path": { "type": "string", "description": "Relative path to the data file (CSV, JSON, or .db) inside the project root." },
105 "explain": { "type": "boolean", "description": "If true, returns the SQL execution plan (EXPLAIN QUERY PLAN) instead of the results." }
106 },
107 "required": ["sql", "path"]
108 }),
109 ),
110 make_tool(
111 "export_as_table",
112 "Persist a structured list of objects (JSON array) to a local CSV or SQLite file. \
113 Use this to save research results, system snapshots, or data analysis outputs for later use. \
114 Hematite will automatically create the table schema or CSV header based on the object keys.",
115 serde_json::json!({
116 "type": "object",
117 "properties": {
118 "items": { "type": "array", "items": { "type": "object" }, "description": "The list of JSON objects to export." },
119 "path": { "type": "string", "description": "Relative path to save the file (e.g. 'results.csv' or 'audit.db')." },
120 "format": { "type": "string", "enum": ["csv", "sqlite"], "description": "The output format (default: csv)." }
121 },
122 "required": ["items", "path"]
123 }),
124 ),
125 make_tool(
126 "analyze_trends",
127 "Perform statistical analysis and generate an ASCII histogram from a SQL query result. \
128 This tool pipes SQL data into a Python sandbox to calculate Mean, Median, StdDev, and distribution. \
129 Use this to find patterns, anomalies, or trends in large datasets without manual calculation.",
130 serde_json::json!({
131 "type": "object",
132 "properties": {
133 "sql": { "type": "string", "description": "The SQL query to run (must return at least one numeric column)." },
134 "path": { "type": "string", "description": "Relative path to the data file (.db, .csv, or .json)." }
135 },
136 "required": ["sql", "path"]
137 }),
138 ),
139 make_tool(
140 "scientific_compute",
141 "Advanced computational research: symbolic math, unit-safety, complexity auditing, ledger memory, and dataset math.",
142 serde_json::json!({
143 "type": "object",
144 "properties": {
145 "mode": { "type": "string", "enum": ["symbolic", "units", "complexity", "ledger", "dataset"] },
146 "expr": { "type": "string", "description": "Equation/expression for symbolic mode." },
147 "calculation": { "type": "string", "description": "Calculation for units mode (e.g. 10m/2s)." },
148 "snippet": { "type": "string", "description": "Python snippet for complexity auditing (loop over n)." },
149 "target": { "type": "string", "enum": ["solve", "simplify", "integrate", "diff"], "description": "Symbolic operation." },
150 "latex": { "type": "boolean", "description": "Toggle LaTeX output for symbolic mode." },
151 "action": { "type": "string", "enum": ["read", "append"], "description": "Ledger action." },
152 "content": { "type": "string", "description": "Derivation content for ledger append." },
153 "path": { "type": "string", "description": "Path to dataset (.db, .csv, .json) for dataset mode." },
154 "sql": { "type": "string", "description": "SQL query to fetch data for dataset mode." },
155 "python_op": { "type": "string", "description": "Python operation for dataset mode (e.g. 'sum(vals)/len(vals)')." }
156 },
157 "required": ["mode"]
158 }),
159 ),
160
161 make_tool(
162 "trace_runtime_flow",
163 "Return an authoritative read-only trace of Hematite runtime flow. \
164 Use this for architecture questions about keyboard input to final output, \
165 reasoning/specular separation, startup wiring, runtime subsystems, \
166 voice synthesis and Ctrl+T toggle, or \
167 session reset commands like /clear, /new, and /forget. Prefer this over guessing.",
168 serde_json::json!({
169 "type": "object",
170 "properties": {
171 "topic": {
172 "type": "string",
173 "enum": ["user_turn", "session_reset", "reasoning_split", "runtime_subsystems", "startup", "voice"],
174 "description": "Which verified runtime report to return. Use 'voice' for any question about Ctrl+T, voice toggle, or TTS pipeline. Use 'user_turn' for keyboard-to-output flow. Use 'session_reset' for /clear, /forget, /new. Use 'startup' for startup wiring. Use 'reasoning_split' for specular/thought routing. Use 'runtime_subsystems' for background subsystem overview."
175 },
176 "input": {
177 "type": "string",
178 "description": "Optional user input to label a normal user-turn trace"
179 },
180 "command": {
181 "type": "string",
182 "enum": ["/clear", "/new", "/forget", "all"],
183 "description": "Optional reset command when topic=session_reset"
184 }
185 },
186 "required": ["topic"]
187 }),
188 ),
189 make_tool(
190 "describe_toolchain",
191 "Return an authoritative read-only description of Hematite's actual tool surface and investigation strategy. \
192 Use this for tooling-discipline questions, best-tool selection, or read-only plans for tracing runtime behavior. \
193 Prefer this over improvising tool names or investigation steps from memory.",
194 serde_json::json!({
195 "type": "object",
196 "properties": {
197 "topic": {
198 "type": "string",
199 "enum": ["read_only_codebase", "user_turn_plan", "voice_latency_plan", "host_inspection_plan", "all"],
200 "description": "Which authoritative toolchain report to return"
201 },
202 "question": {
203 "type": "string",
204 "description": "Optional user question to label or tailor the read-only investigation plan"
205 }
206 }
207 }),
208 ),
209 make_tool(
210 "inspect_host",
211 "Return a structured read-only inspection of the current machine and environment. \
212 Prefer this over raw shell for questions about OS configuration (firewall, power, uptime), plain-English system health reports, silicon health and high-fidelity hardware telemetry (NVIDIA clocks/fans/power, CPU frequency averaging), installed developer tools, PATH issues, package-manager and environment health, network state, service state, running processes, desktop items, Downloads size, listening ports, repo health, or directory/disk summaries. \
213 For high-performance hardware testing, use topic=disk_benchmark to measure real-time kernel disk queue intensity. \
214 For remediation questions phrased like 'how do I fix cargo not found', 'how do I fix port 3000 already in use', or 'how do I fix LM Studio not reachable', use topic=fix_plan instead of diagnosis-only topics like env_doctor, path, or ports. \
215 Use topic=summary for a compact host snapshot, topic=toolchains for common dev tool versions, topic=path for PATH analysis, topic=env_doctor for package-manager and PATH health, topic=fix_plan for structured remediation plans, topic=network for adapters/IPs/gateways/DNS, topic=services for service status and startup mode, \
216 topic=processes for top processes by memory/cpu and real-time disk/network I/O stats (look for [I/O R:N/W:N] tags to identify disk-heavy processes), \
217 topic=desktop or topic=downloads for known folders, topic=ports for listening endpoints, topic=repo_doctor for a structured workspace health report, \
218 topic=log_check for recent critical/error events from system event logs or journalctl, topic=startup_items for programs and services that run at boot (registry Run keys and startup folders on Windows; systemd enabled units on Linux), \
219 topic=health_report for a plain-English tiered system health verdict (disk, RAM, tools, recent errors), \
220 topic=storage for all drives with capacity/free space plus large developer cache directories, \
221 topic=hardware for CPU model/cores, RAM size/speed, GPU name/driver, motherboard, BIOS, and display configuration, \
222 topic=updates for Windows Update status (last install date, pending update count, WU service state), \
223 topic=security for Windows Defender real-time protection status, last scan date, signature age, firewall profile states, Windows activation, and UAC state, \
224 topic=pending_reboot to check whether a system restart is required and why (Windows Update, CBS, file rename operations), \
225 topic=disk_health for physical drive health via Get-PhysicalDisk and SMART failure prediction, \
226 topic=battery for charge level, status, estimated runtime, and wear level (laptops only — reports no battery on desktops), \
227 topic=recent_crashes for BSOD and unexpected shutdown events plus application crash/hang events from the Windows event log, \
228 topic=scheduled_tasks for all non-disabled scheduled tasks including name, path, last run time, and executable, \
229 topic=dev_conflicts for cross-tool environment conflict detection (Node.js version managers, Python 2 vs 3 ambiguity, conda env shadowing, Rust toolchain path conflicts, Git identity/signing config, duplicate PATH entries), \
230 topic=bitlocker for drive encryption status (BitLocker on Windows, LUKS on Linux), \
231 topic=ad_user for Active Directory / Managed Identity details (SID, group memberships, domain role), \
232 topic=user_accounts for Local User and Group diagnostics (Built-in Administrators, local account state), \
233 topic=rdp for Remote Desktop configuration, port, and active sessions, \
234 topic=shadow_copies for Volume Shadow Copies (VSS) and system restore points, \
235 topic=pagefile for Windows page file configuration and current usage, \
236 topic=windows_features for enabled Windows optional features (IIS, Hyper-V, etc.), \
237 topic=printers for installed printers and active print jobs, \
238 topic=winrm for Windows Remote Management (WinRM) and PS Remoting status, \
239 topic=network_stats for adapter throughput (RX/TX), errors, and dropped packets, \
240 topic=udp_ports for active UDP listeners and notable port annotations, \
241 topic=gpo for applied Group Policy Objects, topic=certificates for local personal certificates, topic=integrity for Windows component store health (SFC/DISM state), topic=domain for Active Directory and domain join status, \
242 topic=device_health for identifying malfunctioning hardware with ConfigManager error codes (Yellow Bangs), topic=drivers for auditing active system drivers and their states, topic=peripherals for enumerating connected USB, input, and display hardware, \
243 topic=sessions for auditing active and disconnected user logon sessions, \
244 topic=ad_user for specific Active Directory user identity, SID, and group membership auditing, \
245 topic=dns_lookup for precision DNS record queries (SRV, MX, TXT), \
246 topic=mdm_enrollment for Intune/MDM enrollment state, Azure AD join, and device management health, \
247 topic=hyperv for local Hyper-V VM inventory and real-time load, \
248 topic=ip_config for detailed adapter configuration and DHCP lease state, \
249 topic=disk_benchmark for high-performance silicon-aware stress testing, \
250 topic=storage_spaces for Windows Storage Spaces pools, virtual disks, physical disk health, and Linux mdadm/LVM, \
251 topic=defender_quarantine for Windows Defender threat detections, quarantine history, and scan summary, \
252 topic=domain_health for domain controller connectivity, LDAP port tests, dsregcmd join state, and GPO last refresh, \
253 topic=service_dependencies for service dependency graph (what requires what, restart cascade planning), \
254 topic=wmi_health for WMI repository integrity, winmgmt verify, and repair steps, \
255 topic=local_security_policy for password/lockout policy, LM compatibility level, and UAC settings, \
256 topic=usb_history for USB device connection history from the USBSTOR registry, \
257 topic=print_spooler for Print Spooler state, PrintNightmare (CVE-2021-34527) hardening check, and print queue, \
258 and topic=directory or topic=disk for arbitrary paths.",
259 serde_json::json!({
260 "type": "object",
261 "properties": {
262 "topic": {
263 "type": "string",
264 "enum": ["summary", "toolchains", "path", "env_doctor", "fix_plan", "network", "services", "processes", "desktop", "downloads", "directory", "disk", "ports", "repo_doctor", "log_check", "startup_items", "health_report", "storage", "hardware", "updates", "security", "pending_reboot", "disk_health", "battery", "recent_crashes", "scheduled_tasks", "dev_conflicts", "os_config", "bitlocker", "rdp", "shadow_copies", "pagefile", "windows_features", "printers", "winrm", "network_stats", "udp_ports", "gpo", "certificates", "integrity", "domain", "domain_health", "device_health", "drivers", "peripherals", "disk_benchmark", "permissions", "login_history", "registry_audit", "share_access", "thermal", "activation", "patch_history", "ad_user", "dns_lookup", "hyperv", "ip_config", "mdm_enrollment", "storage_spaces", "defender_quarantine", "service_dependencies", "wmi_health", "local_security_policy", "usb_history", "print_spooler"],
265 "description": "Which structured host inspection to run. Use topic=ad_user for domain identity audit, topic=dns_lookup for SRV/MX records, topic=hyperv for VM load, topic=ip_config for detailed adapter info, topic=mdm_enrollment for Intune/MDM enrollment state, topic=storage_spaces for Windows Storage Spaces/RAID pools, topic=defender_quarantine for Defender threat history, topic=domain_health for DC connectivity and LDAP tests, topic=service_dependencies for restart cascade planning, topic=wmi_health for WMI repository integrity, topic=local_security_policy for password/lockout/NTLMv2 policy, topic=usb_history for USB forensics, and topic=print_spooler for PrintNightmare check."
266 },
267 "name": {
268 "type": "string",
269 "description": "Optional when topic=processes or topic=services. Case-insensitive substring filter for process or service names."
270 },
271 "issue": {
272 "type": "string",
273 "description": "Optional when topic=fix_plan. Plain-English issue description such as 'cargo not found', 'port 3000 already in use', or 'LM Studio not reachable on localhost:1234'."
274 },
275 "path": {
276 "type": "string",
277 "description": "Required when topic=directory. Optional for topic=disk or topic=repo_doctor. Absolute or relative path to inspect."
278 },
279 "port": {
280 "type": "integer",
281 "description": "Optional when topic=ports or topic=fix_plan. Filter the result to one listening TCP port or anchor a port-conflict fix plan."
282 },
283 "max_entries": {
284 "type": "integer",
285 "description": "Optional cap for listed entries. Defaults to 10 and is capped internally."
286 }
287 }
288 }),
289 ),
290 make_tool(
291 "resolve_host_issue",
292 "A safe, bounded tool for remediating OS and environment issues automatically with user approval. \
293 Use this to fix missing dependencies, restart stuck services, or clear disk space instead of using raw shell. \
294 The user will be prompted to approve the action. Keep targets exact.",
295 serde_json::json!({
296 "type": "object",
297 "properties": {
298 "action": {
299 "type": "string",
300 "enum": ["install_package", "restart_service", "clear_temp"],
301 "description": "The type of remediation to perform."
302 },
303 "target": {
304 "type": "string",
305 "description": "The specific target (e.g., 'python' for install_package, or 'docker' for restart_service). Optional for clear_temp."
306 }
307 },
308 "required": ["action"]
309 }),
310 ),
311 make_tool(
312 "run_hematite_maintainer_workflow",
313 "Run one of Hematite's known maintainer or release workflows with explicit approval. \
314 Prefer this over raw shell when the user explicitly asks to run one of Hematite's own scripts such as `clean.ps1`, `scripts/package-windows.ps1`, or `release.ps1`. \
315 Use workflow=clean for cleanup, workflow=package_windows for rebuilding the local Windows portable or installer, and workflow=release for the normal version bump/tag/push/publish flow. \
316 Keep this tool constrained to Hematite's own known workflows instead of inventing ad hoc shell commands or pretending to run arbitrary project scripts.",
317 serde_json::json!({
318 "type": "object",
319 "properties": {
320 "workflow": {
321 "type": "string",
322 "enum": ["clean", "package_windows", "release"],
323 "description": "Which known Hematite maintainer workflow to run."
324 },
325 "deep": {
326 "type": "boolean",
327 "description": "For workflow=clean. Also remove heavy build/runtime artifacts such as target/ and vein.db."
328 },
329 "reset": {
330 "type": "boolean",
331 "description": "For workflow=clean. Reset PLAN/TASK state in addition to normal cleanup."
332 },
333 "prune_dist": {
334 "type": "boolean",
335 "description": "For workflow=clean. Keep only the current Cargo.toml version under dist/."
336 },
337 "installer": {
338 "type": "boolean",
339 "description": "For workflow=package_windows. Also build the Windows installer."
340 },
341 "add_to_path": {
342 "type": "boolean",
343 "description": "For workflow=package_windows or workflow=release. Update the user PATH to the rebuilt portable."
344 },
345 "version": {
346 "type": "string",
347 "description": "For workflow=release. Exact semantic version such as 0.4.5."
348 },
349 "bump": {
350 "type": "string",
351 "enum": ["patch", "minor", "major"],
352 "description": "For workflow=release. Ask release.ps1 to calculate the next version."
353 },
354 "push": {
355 "type": "boolean",
356 "description": "For workflow=release. Push main and the new tag."
357 },
358 "skip_installer": {
359 "type": "boolean",
360 "description": "For workflow=release. Skip the Windows installer build."
361 },
362 "publish_crates": {
363 "type": "boolean",
364 "description": "For workflow=release. Publish hematite-cli to crates.io after a successful push."
365 },
366 "publish_voice_crate": {
367 "type": "boolean",
368 "description": "For workflow=release. Publish hematite-kokoros first, then hematite-cli."
369 }
370 },
371 "required": ["workflow"]
372 }),
373 ),
374 make_tool(
375 "run_workspace_workflow",
376 "Run an approval-gated workflow or script in the locked project workspace root. \
377 Use this for the current project's build, test, lint, fix, package.json scripts, just/task/make targets, explicit local script paths, exact workspace commands, or typed website server control. \
378 Website workflows are preferred when working on a local web app because they give Hematite a structured start/probe/validate/status/stop loop with stored runtime metadata instead of improvised shell. \
379 FORBIDDEN: The `command` field MUST be a real executable shell command (e.g. `npm install`, `cargo build`). \
380 NEVER put natural language, user-requests, or conversational intent into the `command` field. \
381 This tool is for the active workspace, not for Hematite's own maintainer scripts.",
382 serde_json::json!({
383 "type": "object",
384 "properties": {
385 "workflow": {
386 "type": "string",
387 "enum": ["build", "test", "lint", "fix", "package_script", "task", "just", "make", "script_path", "command", "website_start", "website_probe", "website_validate", "website_status", "website_stop"],
388 "description": "Which workspace workflow to run."
389 },
390 "name": {
391 "type": "string",
392 "description": "Required for workflow=package_script, task, just, or make. The script or target name."
393 },
394 "path": {
395 "type": "string",
396 "description": "Required for workflow=script_path. Relative path to a script inside the locked workspace root."
397 },
398 "command": {
399 "type": "string",
400 "description": "Required for workflow=command. Exact command to execute from the locked workspace root."
401 },
402 "mode": {
403 "type": "string",
404 "enum": ["dev", "preview", "start"],
405 "description": "Optional for workflow=website_start. Which website server mode to infer. Defaults to dev."
406 },
407 "script": {
408 "type": "string",
409 "description": "Optional for workflow=website_start. Exact package.json script to run instead of inferring one."
410 },
411 "url": {
412 "type": "string",
413 "description": "Optional for workflow=website_start, website_probe, or website_validate. Explicit local URL to probe, such as http://127.0.0.1:5173/."
414 },
415 "host": {
416 "type": "string",
417 "description": "Optional for workflow=website_start. Host used when constructing an inferred probe URL. Defaults to 127.0.0.1."
418 },
419 "port": {
420 "type": "integer",
421 "description": "Optional for workflow=website_start. Port used when constructing an inferred probe URL."
422 },
423 "label": {
424 "type": "string",
425 "description": "Optional for website workflows. Logical server name for storing runtime metadata. Defaults to default."
426 },
427 "routes": {
428 "type": "array",
429 "items": { "type": "string" },
430 "description": "Optional for workflow=website_validate. Relative routes or absolute URLs to validate, such as [\"/\", \"/pricing\", \"/about\"]."
431 },
432 "asset_limit": {
433 "type": "integer",
434 "description": "Optional for workflow=website_validate. Maximum number of linked local assets to probe after route validation."
435 },
436 "request_timeout_ms": {
437 "type": "integer",
438 "description": "Optional for workflow=website_start. Per-request HTTP timeout used by the readiness probe."
439 },
440 "timeout_ms": {
441 "type": "integer",
442 "description": "Optional timeout override in milliseconds. For website_start this is the boot/readiness timeout. For website_probe and website_status it is the probe timeout."
443 }
444 },
445 "required": ["workflow"]
446 }),
447 ),
448 make_tool(
449 "read_file",
450 "Read the contents of a file. For large files, use 'offset' and 'limit' to navigate.",
451 serde_json::json!({
452 "type": "object",
453 "properties": {
454 "path": {
455 "type": "string",
456 "description": "Path to the file, relative to the project root"
457 },
458 "offset": {
459 "type": "integer",
460 "description": "Starting line number (0-indexed)"
461 },
462 "limit": {
463 "type": "integer",
464 "description": "Number of lines to read"
465 }
466 },
467 "required": ["path"]
468 }),
469 ),
470 make_tool(
471 "lsp_definitions",
472 "Get the precise definition location (file:line:char) for a symbol at a specific position. \
473 Use this to jump to function/struct source code accurately.",
474 serde_json::json!({
475 "type": "object",
476 "properties": {
477 "path": { "type": "string", "description": "File path" },
478 "line": { "type": "integer", "description": "0-indexed line" },
479 "character": { "type": "integer", "description": "0-indexed character" }
480 },
481 "required": ["path", "line", "character"]
482 }),
483 ),
484 make_tool(
485 "lsp_references",
486 "Find all locations where a symbol is used across the entire workspace. \
487 Use this to understand the impact of a refactor or discover internal API users.",
488 serde_json::json!({
489 "type": "object",
490 "properties": {
491 "path": { "type": "string", "description": "File path" },
492 "line": { "type": "integer", "description": "0-indexed line" },
493 "character": { "type": "integer", "description": "0-indexed character" }
494 },
495 "required": ["path", "line", "character"]
496 }),
497 ),
498 make_tool(
499 "lsp_hover",
500 "Get hover information (documentation, function signature, type details) for a symbol. \
501 Use this for rapid spatial awareness without opening every file.",
502 serde_json::json!({
503 "type": "object",
504 "properties": {
505 "path": { "type": "string", "description": "File path" },
506 "line": { "type": "integer", "description": "0-indexed line" },
507 "character": { "type": "integer", "description": "0-indexed character" }
508 },
509 "required": ["path", "line", "character"]
510 }),
511 ),
512 make_tool(
513 "lsp_rename_symbol",
514 "Rename a symbol project-wide using the Language Server. Ensures all references are updated safely.",
515 serde_json::json!({
516 "type": "object",
517 "properties": {
518 "path": { "type": "string", "description": "File path" },
519 "line": { "type": "integer", "description": "0-indexed line" },
520 "character": { "type": "integer", "description": "0-indexed character" },
521 "new_name": { "type": "string", "description": "The new name for the symbol" }
522 },
523 "required": ["path", "line", "character", "new_name"]
524 }),
525 ),
526 make_tool(
527 "lsp_get_diagnostics",
528 "Get a list of current compiler errors and warnings for a specific file. \
529 Use this to verify your code compiles and and to find exactly where errors are located.",
530 serde_json::json!({
531 "type": "object",
532 "properties": {
533 "path": { "type": "string", "description": "File path" }
534 },
535 "required": ["path"]
536 }),
537 ),
538 make_tool(
539 "vision_analyze",
540 "Send an image file (screenshot, diagram, or UI mockup) to the multimodal vision model for technical analysis. \
541 Use this to identify UI bugs, confirm visual states, or understand architectural diagrams.",
542 serde_json::json!({
543 "type": "object",
544 "properties": {
545 "path": { "type": "string", "description": "Absolute or relative path to the image file." },
546 "prompt": { "type": "string", "description": "The specific question or analysis request for the vision model." }
547 },
548 "required": ["path", "prompt"]
549 }),
550 ),
551 make_tool(
552 "patch_hunk",
553 "Replace a specific line range [start_line, end_line] with new content. \
554 This is the most precise way to edit code and avoids search string failures.",
555 serde_json::json!({
556 "type": "object",
557 "properties": {
558 "path": { "type": "string", "description": "File path" },
559 "start_line": { "type": "integer", "description": "Starting line (1-indexed)" },
560 "end_line": { "type": "integer", "description": "Ending line (inclusive)" },
561 "replacement": { "type": "string", "description": "The new content for this range" }
562 },
563 "required": ["path", "start_line", "end_line", "replacement"]
564 }),
565 ),
566 make_tool(
567 "multi_search_replace",
568 "Replace multiple existing code blocks in a single file with new content. \
569 Each hunk specifies an EXACT 'search' string and a 'replace' string. \
570 The 'search' string MUST exactly match the existing file contents (including whitespace). \
571 This is the safest and most reliable way to make multiple structural edits.",
572 serde_json::json!({
573 "type": "object",
574 "properties": {
575 "path": { "type": "string", "description": "File path" },
576 "hunks": {
577 "type": "array",
578 "items": {
579 "type": "object",
580 "properties": {
581 "search": { "type": "string", "description": "Exact existing text to find and replace" },
582 "replace": { "type": "string", "description": "The new replacement text" }
583 },
584 "required": ["search", "replace"]
585 }
586 }
587 },
588 "required": ["path", "hunks"]
589 }),
590 ),
591 make_tool(
592 "write_file",
593 "Write content to a file, creating it (and any parent dirs) if needed. \
594 Overwrites existing files. \
595 SOVEREIGN PATHING: For files in common areas, use `@DESKTOP/file.txt`, `@DOCUMENTS/file.txt`, `@DOWNLOADS/file.txt`, or `@HOME/file.txt` to ensure 100% path accuracy.",
596 serde_json::json!({
597 "type": "object",
598 "properties": {
599 "path": { "type": "string", "description": "File path" },
600 "content": { "type": "string", "description": "Full file content to write" }
601 },
602 "required": ["path", "content"]
603 }),
604 ),
605 make_tool(
606 "create_directory",
607 "Authoritatively create a new directory (and any parent dirs) if they do not exist. \
608 Use this instead of raw shell (mkdir) for all filesystem organization. \
609 Supports both relative paths and absolute paths. \
610 SOVEREIGN PATHING: For directories in common areas, use `@DESKTOP/folder`, `@DOCUMENTS/folder`, `@DOWNLOADS/folder`, or `@HOME/folder` to ensure 100% path accuracy.",
611 serde_json::json!({
612 "type": "object",
613 "properties": {
614 "path": { "type": "string", "description": "Relative or absolute directory path" }
615 },
616 "required": ["path"]
617 }),
618 ),
619 make_tool(
620 "research_web",
621 "Perform a zero-cost technical search using DuckDuckGo. \
622 Use this to find documentation, latest API changes, or solutions to complex errors \
623 when your internal knowledge is insufficient. Returns snippets and URLs.",
624 serde_json::json!({
625 "type": "object",
626 "properties": {
627 "query": { "type": "string", "description": "The technical search query" }
628 },
629 "required": ["query"]
630 }),
631 ),
632 make_tool(
633 "fetch_docs",
634 "Fetch a URL and convert it to clean Markdown. Use this to 'read' the documentation \
635 links found via research_web. This tool uses a proxy to bypass IP blocks.",
636 serde_json::json!({
637 "type": "object",
638 "properties": {
639 "url": { "type": "string", "description": "The URL of the documentation to fetch" }
640 },
641 "required": ["url"]
642 }),
643 ),
644 make_tool(
645 "edit_file",
646 "Edit a file by replacing an exact string with another. \
647 The 'search' string does NOT need perfectly matching indentation (it is fuzzy), \
648 but the non-whitespace text must match exactly. Use this for targeted edits.",
649 serde_json::json!({
650 "type": "object",
651 "properties": {
652 "path": { "type": "string", "description": "File path" },
653 "search": {
654 "type": "string",
655 "description": "The exact text to find (must match whitespace/indentation precisely)"
656 },
657 "replace": {
658 "type": "string",
659 "description": "The replacement text"
660 }
661 },
662 "required": ["path", "search", "replace"]
663 }),
664 ),
665 make_tool(
666 "auto_pin_context",
667 "Select 1-3 core files to 'Lock' into prioritized memory. \
668 Use this to ensure the most important architecture files \
669 are always visible during complex refactorings.",
670 serde_json::json!({
671 "type": "object",
672 "properties": {
673 "paths": {
674 "type": "array",
675 "items": { "type": "string" }
676 },
677 "reason": { "type": "string" }
678 },
679 "required": ["paths", "reason"]
680 }),
681 ),
682 make_tool(
683 "list_pinned",
684 "List all files currently pinned in the model's active context.",
685 serde_json::json!({
686 "type": "object",
687 "properties": {}
688 }),
689 ),
690 make_tool(
691 "list_files",
692 "List files in a directory, optionally filtered by extension.",
693 serde_json::json!({
694 "type": "object",
695 "properties": {
696 "path": {
697 "type": "string",
698 "description": "Directory to list (default: current dir)"
699 },
700 "extension": {
701 "type": "string",
702 "description": "Only return files with this extension, e.g. 'rs', 'toml' (no dot)"
703 }
704 },
705 "required": []
706 }),
707 ),
708 make_tool(
709 "tail_file",
710 "Read the last N lines of a file — useful for log files, test output, \
711 build artifacts, and any large file where only the tail is relevant. \
712 Supports an optional grep filter to show only matching lines from the tail. \
713 Use this instead of read_file when you only need the end of a large file.",
714 serde_json::json!({
715 "type": "object",
716 "properties": {
717 "path": {
718 "type": "string",
719 "description": "Path to the file, relative to the project root"
720 },
721 "lines": {
722 "type": "integer",
723 "description": "Number of lines to return from the end (default: 50, max: 500)"
724 },
725 "grep": {
726 "type": "string",
727 "description": "Optional regex pattern — only return lines matching this pattern (applied before the tail slice)"
728 }
729 },
730 "required": ["path"]
731 }),
732 ),
733 make_tool(
734 "grep_files",
735 "Search file contents for a regex pattern. Supports context lines, files-only mode, \
736 and pagination. Returns file:line:content format by default.",
737 serde_json::json!({
738 "type": "object",
739 "properties": {
740 "pattern": {
741 "type": "string",
742 "description": "Regex pattern to search for (case-insensitive by default)"
743 },
744 "path": {
745 "type": "string",
746 "description": "Directory to search (default: current dir)"
747 },
748 "extension": {
749 "type": "string",
750 "description": "Only search files with this extension, e.g. 'rs'"
751 },
752 "mode": {
753 "type": "string",
754 "enum": ["content", "files_only"],
755 "description": "'content' (default) returns matching lines; 'files_only' returns only filenames"
756 },
757 "context": {
758 "type": "integer",
759 "description": "Lines of context before AND after each match (like rg -C)"
760 },
761 "before": {
762 "type": "integer",
763 "description": "Lines of context before each match (overrides context)"
764 },
765 "after": {
766 "type": "integer",
767 "description": "Lines of context after each match (overrides context)"
768 },
769 "head_limit": {
770 "type": "integer",
771 "description": "Max hunks (or files in files_only) to return (default: 50)"
772 },
773 "offset": {
774 "type": "integer",
775 "description": "Skip first N hunks/files - for pagination (default: 0)"
776 }
777 },
778 "required": ["pattern"]
779 }),
780 ),
781 make_tool(
782 "github_ops",
783 "Interact with GitHub via the `gh` CLI. Requires `gh` installed and `gh auth login` completed. \
784 Use for pull requests, issues, CI run status, and repo metadata. \
785 Never use `shell` to call `gh` — use this tool instead.",
786 serde_json::json!({
787 "type": "object",
788 "properties": {
789 "action": {
790 "type": "string",
791 "enum": [
792 "pr_list", "pr_view", "pr_create", "pr_status", "pr_checks", "pr_merge",
793 "issue_list", "issue_view", "issue_create",
794 "ci_status", "run_view",
795 "repo_view", "release_list"
796 ],
797 "description": "GitHub operation to perform"
798 },
799 "title": { "type": "string", "description": "PR or issue title (for create actions)" },
800 "body": { "type": "string", "description": "PR or issue body (for create actions)" },
801 "base": { "type": "string", "description": "Base branch for PR (default: main)" },
802 "draft": { "type": "boolean", "description": "Create PR as draft" },
803 "pr": { "type": "string", "description": "PR number or URL (for view/checks/merge)" },
804 "number": { "description": "Issue number (for issue_view)" },
805 "state": { "type": "string", "enum": ["open", "closed", "all"], "description": "Filter state for listings" },
806 "strategy": { "type": "string", "enum": ["merge", "squash", "rebase"], "description": "Merge strategy for pr_merge" },
807 "branch": { "type": "string", "description": "Branch name for ci_status (defaults to current branch)" },
808 "run_id": { "type": "string", "description": "Run ID for run_view" },
809 "limit": { "type": "integer", "description": "Max results to return (default 10)" }
810 },
811 "required": ["action"]
812 }),
813 ),
814 make_tool(
815 "git_commit",
816 "Stage all changes (git add -A) and create a commit. You MUST use 'Conventional Commits' (e.g. 'feat: description').",
817 serde_json::json!({
818 "type": "object",
819 "properties": {
820 "message": { "type": "string", "description": "Commit message (Conventional Commit style)" }
821 },
822 "required": ["message"]
823 }),
824 ),
825 make_tool(
826 "git_push",
827 "Push current branched changes to the remote origin. Requires an existing remote connection.",
828 serde_json::json!({
829 "type": "object",
830 "properties": {},
831 "required": []
832 }),
833 ),
834 make_tool(
835 "git_remote",
836 "View or manage git remotes. Use this for onboarding to GitHub/GitLab services.",
837 serde_json::json!({
838 "type": "object",
839 "properties": {
840 "action": {
841 "type": "string",
842 "enum": ["list", "add", "remove"],
843 "description": "Operation to perform"
844 },
845 "name": { "type": "string", "description": "Remote name (e.g. origin)" },
846 "url": { "type": "string", "description": "Remote URL (for 'add' action)" }
847 },
848 "required": ["action"]
849 }),
850 ),
851 make_tool(
852 "git_onboarding",
853 "High-level wizard to connect this repository to a remote host (GitHub/GitLab). \
854 Handles adding the remote and performing the initial tracking push in one step.",
855 serde_json::json!({
856 "type": "object",
857 "properties": {
858 "url": { "type": "string", "description": "The remote repository URL (HTTPS or SSH)" },
859 "name": { "type": "string", "description": "The remote name (default: origin)" },
860 "push": { "type": "boolean", "description": "Whether to perform an initial push to establish tracking (default: false)" }
861 },
862 "required": ["url"]
863 }),
864 ),
865 make_tool(
866 "verify_build",
867 "Run project verification for build, test, lint, or fix workflows. \
868 Prefer per-project verify profiles from `.hematite/settings.json`, and fall back to \
869 auto-detected defaults when no profile is configured. Returns BUILD OK or BUILD FAILED \
870 with command output. ALWAYS call this after scaffolding a new project or making structural changes.",
871 serde_json::json!({
872 "type": "object",
873 "properties": {
874 "action": {
875 "type": "string",
876 "enum": ["build", "test", "lint", "fix"],
877 "description": "Which verification action to run. Defaults to build."
878 },
879 "profile": {
880 "type": "string",
881 "description": "Optional named verify profile from `.hematite/settings.json`."
882 },
883 "timeout_secs": {
884 "type": "integer",
885 "description": "Optional timeout override for this verification run."
886 }
887 }
888 }),
889 ),
890 make_tool(
891 "git_worktree",
892 "Manage Git worktrees - isolated working directories on separate branches. \
893 Use 'add' to create a safe sandbox for risky/experimental work, \
894 'list' to see all worktrees, 'remove' to clean up, 'prune' to remove stale entries.",
895 serde_json::json!({
896 "type": "object",
897 "properties": {
898 "action": {
899 "type": "string",
900 "enum": ["list", "add", "remove", "prune"],
901 "description": "Worktree operation to perform"
902 },
903 "path": {
904 "type": "string",
905 "description": "Directory path for the new worktree (required for add/remove)"
906 },
907 "branch": {
908 "type": "string",
909 "description": "Branch name for the worktree (add only; defaults to path basename)"
910 }
911 },
912 "required": ["action"]
913 }),
914 ),
915 make_tool(
916 "clarify",
917 "Ask the user a clarifying question when you genuinely cannot proceed without \
918 more information. Use this ONLY when you are blocked and cannot make a \
919 reasonable assumption. Do NOT use it to ask permission - just act.",
920 serde_json::json!({
921 "type": "object",
922 "properties": {
923 "question": {
924 "type": "string",
925 "description": "The specific question to ask the user"
926 }
927 },
928 "required": ["question"]
929 }),
930 ),
931 make_tool(
932 "manage_tasks",
933 "Manage the persistent task ledger in .hematite/TASK.md. Use this to track long-term goals across restarts.",
934 crate::tools::tasks::get_tasks_params(),
935 ),
936 make_tool(
937 "maintain_plan",
938 "Document the architectural strategy and session blueprint in .hematite/PLAN.md. Use this to maintain context across restarts.",
939 crate::tools::plan::get_plan_params(),
940 ),
941 make_tool(
942 "generate_walkthrough",
943 "Generate a final session report in .hematite/WALKTHROUGH.md including achievements and verification results.",
944 crate::tools::plan::get_walkthrough_params(),
945 ),
946 make_tool(
947 "swarm",
948 "Delegate high-volume parallel tasks to a swarm of background workers. \
949 Use this for large-scale refactors, multi-file research, or parallel documentation updates. \
950 You must provide a 'tasks' array where each task has an 'id', 'target' (file), and 'instruction'.",
951 serde_json::json!({
952 "type": "object",
953 "properties": {
954 "tasks": {
955 "type": "array",
956 "items": {
957 "type": "object",
958 "properties": {
959 "id": { "type": "string" },
960 "target": { "type": "string", "description": "Target file or directory" },
961 "instruction": { "type": "string", "description": "Specific task for this worker" }
962 },
963 "required": ["id", "target", "instruction"]
964 }
965 },
966 "max_workers": {
967 "type": "integer",
968 "description": "Max parallel workers (default 3, auto-throttled by hardware)",
969 "default": 3
970 }
971 },
972 "required": ["tasks"]
973 }),
974 ),
975 ];
976
977 let lsp_defs = crate::tools::lsp_tools::get_lsp_definitions();
978 tools.push(make_tool(
979 "lsp_search_symbol",
980 "Find the location (file/line) of any function, struct, or variable in the entire project workspace. \
981 This is the fastest 'Golden Path' for navigating to a symbol by name.",
982 serde_json::json!({
983 "type": "object",
984 "properties": {
985 "query": { "type": "string", "description": "The name of the symbol to find (e.g. 'initialize_mcp')" }
986 },
987 "required": ["query"]
988 }),
989 ));
990 for def in lsp_defs {
991 let name = def["name"].as_str().unwrap();
992 tools.push(ToolDefinition {
993 tool_type: "function".into(),
994 function: ToolFunction {
995 name: name.into(),
996 description: def["description"].as_str().unwrap().into(),
997 parameters: def["parameters"].clone(),
998 },
999 metadata: tool_metadata_for_name(name),
1000 });
1001 }
1002
1003 tools
1004}
1005
1006pub async fn dispatch_builtin_tool(
1007 name: &str,
1008 args: &Value,
1009 config: &HematiteConfig,
1010 budget_tokens: usize,
1011) -> Result<String, String> {
1012 match name {
1013 "shell" => crate::tools::shell::execute(args, budget_tokens).await,
1014 "run_code" => crate::tools::code_sandbox::execute(args).await,
1015 "query_data" => crate::tools::data_query::query_data(args).await,
1016 "export_as_table" => crate::tools::data_query::export_as_table(args).await,
1017 "analyze_trends" => crate::tools::data_query::analyze_trends(args).await,
1018 "scientific_compute" => crate::tools::scientific::scientific_compute(args).await,
1019 "trace_runtime_flow" => crate::tools::runtime_trace::trace_runtime_flow(args).await,
1020 "describe_toolchain" => crate::tools::toolchain::describe_toolchain(args).await,
1021 "inspect_host" => crate::tools::host_inspect::inspect_host(args).await,
1022 "resolve_host_issue" => crate::tools::host_inspect::resolve_host_issue(args).await,
1023 "run_hematite_maintainer_workflow" => {
1024 crate::tools::repo_script::run_hematite_maintainer_workflow(args).await
1025 }
1026 "run_workspace_workflow" => crate::tools::workspace_workflow::run_workspace_workflow(args).await,
1027 "read_file" => crate::tools::file_ops::read_file(args, budget_tokens).await,
1028 "inspect_lines" => crate::tools::file_ops::inspect_lines(args).await,
1029 "tail_file" => crate::tools::file_ops::tail_file(args).await,
1030 "write_file" => crate::tools::file_ops::write_file(args).await,
1031 "create_directory" => crate::tools::file_ops::create_directory(args).await,
1032 "edit_file" => crate::tools::file_ops::edit_file(args).await,
1033 "patch_hunk" => crate::tools::file_ops::patch_hunk(args).await,
1034 "multi_search_replace" => crate::tools::file_ops::multi_search_replace(args).await,
1035 "list_files" => crate::tools::file_ops::list_files(args, budget_tokens).await,
1036 "grep_files" => crate::tools::file_ops::grep_files(args, budget_tokens).await,
1037 "github_ops" => crate::tools::github::execute(args).await,
1038 "git_commit" => crate::tools::git::execute(args).await,
1039 "git_push" => crate::tools::git::execute_push(args).await,
1040 "git_remote" => crate::tools::git::execute_remote(args).await,
1041 "git_onboarding" => crate::tools::git_onboarding::execute(args).await,
1042 "verify_build" => crate::tools::verify_build::execute(args).await,
1043 "git_worktree" => crate::tools::git::execute_worktree(args).await,
1044 "health" => crate::tools::health::execute(args).await,
1045 "research_web" => {
1046 crate::tools::research::execute_search(args, config.searx_url.clone()).await
1047 }
1048 "fetch_docs" => crate::tools::research::execute_fetch(args).await,
1049 "manage_tasks" => crate::tools::tasks::manage_tasks(args).await,
1050 "maintain_plan" => crate::tools::plan::maintain_plan(args).await,
1051 "generate_walkthrough" => crate::tools::plan::generate_walkthrough(args).await,
1052 "clarify" => {
1053 let q = args.get("question").and_then(|v| v.as_str()).unwrap_or("?");
1054 Ok(format!("[clarify] {q}"))
1055 }
1056 "vision_analyze" => Err(
1057 "Tool 'vision_analyze' must be dispatched by ConversationManager (it requires hardware engine access)."
1058 .into(),
1059 ),
1060 other => {
1061 if other.contains('.') || other.contains('/') || other.contains('\\') {
1062 Err(format!(
1063 "'{}' is a PATH, not a tool. You correctly identified the location, but you MUST use `read_file` or `list_files` (internal) or `powershell` (external) to access it.",
1064 other
1065 ))
1066 } else if matches!(other.to_lowercase().as_str(), "hematite" | "assistant" | "ai") {
1067 Err(format!(
1068 "'{}' is YOUR IDENTITY, not a tool. Use list_files or read_file to explore the codebase.",
1069 other
1070 ))
1071 } else if matches!(
1072 other.to_lowercase().as_str(),
1073 "thought" | "think" | "reasoning" | "thinking" | "internal"
1074 ) {
1075 Err(format!(
1076 "'{}' is NOT a tool - it is a reasoning tag. Output your answer as plain text after your <think> block.",
1077 other
1078 ))
1079 } else {
1080 Err(format!("Unknown tool: '{}'", other))
1081 }
1082 }
1083 }
1084}
1085
1086pub fn get_mutation_label(name: &str, args: &Value) -> Option<String> {
1087 match name {
1088 "shell" => {
1089 let cmd = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1090 if cmd.contains("rm ") || cmd.contains("del ") {
1091 Some("Destructive File Deletion".into())
1092 } else if cmd.contains("mkdir ") {
1093 Some("Directory Creation".into())
1094 } else {
1095 Some("Execute Shell Command".into())
1096 }
1097 }
1098 "write_file" => {
1099 let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
1100 Some(format!("Create/Overwrite File: {}", path))
1101 }
1102 "create_directory" => {
1103 let path = args
1104 .get("path")
1105 .and_then(|v| v.as_str())
1106 .unwrap_or("folder");
1107 Some(format!("Create Directory: {}", path))
1108 }
1109 "edit_file" | "patch_hunk" | "multi_search_replace" => {
1110 let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
1111 Some(format!("Surgical Code Mutation: {}", path))
1112 }
1113 "github_ops" => {
1114 let action = args.get("action").and_then(|v| v.as_str()).unwrap_or("?");
1115 match action {
1116 "pr_create" | "pr_merge" | "issue_create" => Some(format!("GitHub: {}", action)),
1117 _ => None,
1118 }
1119 }
1120 "git_commit" => Some("Permanent Version History Commit".into()),
1121 "git_push" => Some("Remote Origin Synchronisation (Push)".into()),
1122 "resolve_host_issue" => Some("System-Level Host Remediation".into()),
1123 "run_workspace_workflow" => Some("Automated Workspace Re-alignment".into()),
1124 _ => None,
1125 }
1126}