1use serde_json::Value;
2
3pub async fn describe_toolchain(args: &Value) -> Result<String, String> {
4 let topic = args.get("topic").and_then(|v| v.as_str()).unwrap_or("all");
5 let question =
6 normalize_question_label(args.get("question").and_then(|v| v.as_str()).unwrap_or(""));
7
8 match topic {
9 "read_only_codebase" => Ok(describe_read_only_codebase_tools()),
10 "user_turn_plan" => Ok(describe_user_turn_plan(question)),
11 "voice_latency_plan" => Ok(describe_voice_latency_plan(question)),
12 "host_inspection_plan" => Ok(describe_host_inspection_plan(question)),
13 "all" => Ok(format!(
14 "{}\n\n{}",
15 describe_read_only_codebase_tools(),
16 describe_best_plan_for_question(question)
17 )),
18 other => Err(format!(
19 "Unknown topic '{}'. Use one of: read_only_codebase, user_turn_plan, voice_latency_plan, host_inspection_plan, all.",
20 other
21 )),
22 }
23}
24
25fn describe_best_plan_for_question(question: &str) -> String {
26 if is_voice_latency_question(question) {
27 describe_voice_latency_plan(question)
28 } else if is_host_inspection_question(question) {
29 describe_host_inspection_plan(question)
30 } else {
31 describe_user_turn_plan(question)
32 }
33}
34
35fn is_voice_latency_question(question: &str) -> bool {
36 let lower = question.to_lowercase();
37 (lower.contains("voice output") || lower.contains("voice"))
38 && (lower.contains("lag")
39 || lower.contains("behind visible text")
40 || lower.contains("latency"))
41}
42
43fn is_host_inspection_question(question: &str) -> bool {
44 let lower = question.to_lowercase();
45 let host_terms = [
46 "path",
47 "package manager",
48 "package managers",
49 "environment",
50 "env doctor",
51 "network",
52 "adapter",
53 "dns",
54 "gateway",
55 "ip address",
56 "service",
57 "services",
58 "daemon",
59 "startup type",
60 "desktop",
61 "downloads",
62 "toolchain",
63 "installed",
64 "version",
65 "directory",
66 "folder",
67 "computer",
68 "machine",
69 "port",
70 "process",
71 "environment",
72 ];
73 host_terms.iter().any(|needle| lower.contains(needle))
74}
75
76fn normalize_question_label(question: &str) -> &str {
77 let trimmed = question.trim();
78 if trimmed.is_empty() {
79 return trimmed;
80 }
81
82 if let Some(idx) = trimmed.find("Question:") {
83 let after = trimmed[idx + "Question:".len()..].trim();
84 if !after.is_empty() {
85 let requirement_markers = [
86 "Requirements:",
87 "Requirement:",
88 "Initial Investigation Order",
89 ];
90 let mut end = after.len();
91 for marker in requirement_markers {
92 if let Some(marker_idx) = after.find(marker) {
93 end = end.min(marker_idx);
94 }
95 }
96 return after[..end].trim();
97 }
98 }
99
100 trimmed
101}
102
103fn describe_read_only_codebase_tools() -> String {
104 "Verified Hematite read-only toolchain\n\n\
105Text search and file inspection\n\
106- `map_project`\n\
107 Good for: first-pass spatial awareness of the repository layout, likely entrypoints, core owner files, and a small set of extracted top symbols.\n\
108 Bad for: exact control flow, full call graphs, or precise line-level inspection.\n\
109 Choose it over another tool when: you need a compact architecture map before diving into files or LSP.\n\
110- `list_files`\n\
111 Good for: enumerating files in a directory, optionally narrowed by extension.\n\
112 Bad for: content search or semantic understanding.\n\
113 Choose it over another tool when: you know the directory area but need concrete file candidates.\n\
114- `grep_files`\n\
115 Good for: fast textual search across many files, including regex and context lines.\n\
116 Bad for: exact symbol definitions, types, or call relationships.\n\
117 Choose it over another tool when: you know a string pattern but not the owning symbol.\n\
118- `read_file`\n\
119 Good for: reading a full file or a large chunk once you know the target path.\n\
120 Bad for: precise line-range inspection in very large files.\n\
121 Choose it over another tool when: you already know the file and need broad local context.\n\
122- `inspect_lines`\n\
123 Good for: tight, line-ranged inspection after you know the relevant window.\n\
124 Bad for: first-pass exploration or cross-file search.\n\
125 Choose it over another tool when: you want exact nearby lines without rereading the whole file.\n\n\
126Semantic and LSP tools\n\
127- `lsp_search_symbol`\n\
128 Good for: jumping to a named symbol quickly across the workspace.\n\
129 Bad for: fuzzy textual patterns or unknown names.\n\
130 Choose it over another tool when: you know the symbol name and want the fastest semantic entry point.\n\
131- `lsp_definitions`\n\
132 Good for: confirming the exact definition site of a symbol at a position.\n\
133 Bad for: finding every caller or usage.\n\
134 Choose it over another tool when: you already have a coordinate and need the true definition.\n\
135- `lsp_references`\n\
136 Good for: tracing who uses a symbol across the project.\n\
137 Bad for: initial discovery when you do not know the symbol yet.\n\
138 Choose it over another tool when: you need impact analysis or call-flow expansion.\n\
139- `lsp_hover`\n\
140 Good for: quick type and documentation context at a position.\n\
141 Bad for: ownership mapping or full call graphs.\n\
142 Choose it over another tool when: you need a compact semantic summary before deeper reading.\n\
143- `lsp_get_diagnostics`\n\
144 Good for: current compiler and analysis errors on a file.\n\
145 Bad for: architecture understanding.\n\
146 Choose it over another tool when: you need to validate file health or check active breakage.\n\
147 Conditional: usefulness depends on the language server being available and healthy.\n\n\
148Runtime and control-flow tools\n\
149- `trace_runtime_flow`\n\
150 Good for: authoritative runtime/control-flow questions such as user turns, startup, session reset, and reasoning separation.\n\
151 Bad for: arbitrary feature ownership outside the built-in runtime reports.\n\
152 Choose it over another tool when: the user asks how data or events move through Hematite.\n\n\
153Web research and docs\n\
154- `research_web`\n\
155 Good for: external technical search when repo context is not enough.\n\
156 Bad for: internal code truth.\n\
157 Choose it over another tool when: you need current docs, standards, or API changes outside the repo.\n\
158 Conditional: only relevant when external information is needed.\n\
159- `fetch_docs`\n\
160 Good for: reading a specific documentation URL found elsewhere.\n\
161 Bad for: discovery.\n\
162 Choose it over another tool when: you already have the URL and want readable docs.\n\
163 Conditional: usually paired with `research_web`.\n\n\
164Vision\n\
165- `vision_analyze`\n\
166 Good for: screenshots, diagrams, and visual state confirmation.\n\
167 Bad for: source-of-truth code tracing.\n\
168 Choose it over another tool when: the input is visual rather than textual.\n\
169 Conditional: only relevant when an image is available and the vision path is enabled.\n\n\
170Shell and context management\n\
171- `inspect_host`\n\
172 Good for: structured read-only inspection of the current machine such as common developer tool versions, PATH analysis, environment/package-manager health, grounded fix plans for common workstation failures, network snapshots, service snapshots, process snapshots, desktop items, Downloads summaries, listening ports, repo-doctor checks, and arbitrary directory or disk-size reports.\n\
173 Bad for: custom build commands, arbitrary process control, or any mutation.\n\
174 Choose it over another tool when: the user is asking about the host machine rather than repo internals and the question fits one of its built-in topics.\n\
175- `shell`\n\
176 Good for: builds, tests, environment checks, and OS-level read-only inspection.\n\
177 Bad for: precise code understanding when built-in file and LSP tools are available.\n\
178 Choose it over another tool when: you need runtime verification, a custom command, or host information that `inspect_host` cannot answer directly.\n\
179- `auto_pin_context`\n\
180 Good for: keeping 1-3 critical files in active memory during a complex investigation.\n\
181 Bad for: discovery by itself.\n\
182 Choose it over another tool when: the task spans several important files and you need them held stable.\n\
183- `list_pinned`\n\
184 Good for: confirming what is pinned right now.\n\
185 Bad for: learning anything new about the codebase.\n\
186 Choose it over another tool when: you want to inspect or audit the current pinned set.\n\n\
187Optional external surface\n\
188- `mcp__*` tools\n\
189 Good for: optional external capabilities from configured MCP servers.\n\
190 Bad for: baseline assumptions about Hematite's built-in tool surface.\n\
191 Choose them over another tool when: a configured MCP server is active and directly relevant.\n\
192 Conditional: they only exist when MCP servers are configured and loaded.\n\n\
193Best Read-Only Toolchain\n\
194- Start with `trace_runtime_flow` for runtime wiring questions.\n\
195- Use `map_project` only when ownership or structure is still unclear.\n\
196- Use `grep_files` for textual discovery, then switch to `read_file` or `inspect_lines` for exact local context.\n\
197- Use `lsp_search_symbol`, `lsp_definitions`, `lsp_references`, and `lsp_hover` for semantic confirmation once you know the area.\n\
198- Use `inspect_host` before `shell` for read-only questions about PATH, installed tools, environment/package-manager health, grounded fix plans for common workstation failures, network state, service state, running processes, desktop items, Downloads size, listening ports, repo-health summaries, or directory/disk summaries.\n\
199- If the user asks how to fix a common workstation problem such as `cargo not found`, `port 3000 already in use`, or `LM Studio not reachable`, use `fix_plan` first instead of `env_doctor`, `path`, or `ports`.\n\
200- If `env_doctor` answers a PATH/package-manager sanity question, stop there unless the user explicitly asks for the raw PATH list.\n\
201- Use `shell` only when the answer requires runtime verification or host-state information beyond `inspect_host`.\n\
202- Use `research_web`, `fetch_docs`, and `vision_analyze` only when the question truly depends on external docs or images."
203 .to_string()
204}
205
206fn describe_user_turn_plan(question: &str) -> String {
207 let label = if question.trim().is_empty() {
208 "How does Hematite move a user message from the TUI to the model and back?"
209 } else {
210 question
211 };
212
213 format!(
214 "Concrete read-only investigation plan for: {:?}\n\n\
2151. `trace_runtime_flow`\n\
216 Why first: it is the most authoritative built-in tool for runtime/control-flow questions and already knows the exact Hematite event path categories such as `user_turn`.\n\
217 Use: request the `user_turn` report first so you get the verified top-level path before reading source.\n\
2182. `read_file`\n\
219 Why second: once the runtime trace identifies the owning files, read the specific owners directly instead of guessing from memory.\n\
220 Use: inspect `src/main.rs`, `src/ui/tui.rs`, `src/agent/conversation.rs`, and `src/agent/inference.rs` in broad chunks.\n\
2213. `inspect_lines`\n\
222 Why third: after the broad read, narrow to the exact line windows that contain `run_app`, `run_agent_task`, `ConversationManager::run_turn`, and the relevant `InferenceEvent` handling.\n\
223 Use: confirm the exact local flow without rereading unrelated code.\n\
2244. `lsp_search_symbol`\n\
225 Why fourth: if a specific symbol from the trace needs precise navigation, this is the fastest semantic jump.\n\
226 Use: search for symbols like `run_app`, `run_agent_task`, `ConversationManager::run_turn`, `InferenceEvent`, `extract_think_block`, or `strip_think_blocks` only after the trace names them.\n\
2275. `lsp_definitions`\n\
228 Why fifth: confirm the true definition site when a symbol appears in several places or when the file read is ambiguous.\n\
229 Use: anchor the investigation on the exact definition instead of a textual match.\n\
2306. `lsp_references`\n\
231 Why sixth: expand outward from a confirmed symbol to see who calls it and where the next handoff occurs.\n\
232 Use: trace the path from TUI submit code into the agent loop and then into inference handling.\n\
2337. `lsp_hover`\n\
234 Why seventh: fill semantic gaps quickly without extra reading when a type or event payload is unclear.\n\
235 Use: confirm what an enum variant or function signature carries at that point in the flow.\n\
2368. `auto_pin_context`\n\
237 Why eighth: once the 2-3 core files are obvious, pin them so a longer investigation does not drift.\n\
238 Use: pin the owner files after the first pass, not before.\n\
2399. `shell`\n\
240 Why last and only if needed: runtime verification belongs after source truth, not before it.\n\
241 Use: only when you need a build, a health check, or another host-level confirmation that the static code reading cannot provide.\n\n\
242Tools I would not start with\n\
243- `map_project`: useful for initial orientation, but unnecessary if `trace_runtime_flow` already identifies the owner files.\n\
244- `grep_files`: useful for fuzzy discovery, but weaker than `trace_runtime_flow` plus LSP once the target path is a known runtime flow.\n\
245- `research_web`, `fetch_docs`, `vision_analyze`: not first-choice tools for this repo-local runtime question.\n\
246\nBest Read-Only Toolchain\n\
247`trace_runtime_flow` -> `read_file` -> `inspect_lines` -> `lsp_search_symbol` -> `lsp_definitions` / `lsp_references` -> `lsp_hover` -> `auto_pin_context` -> optional `shell`",
248 label
249 )
250}
251
252fn describe_voice_latency_plan(question: &str) -> String {
253 let label = if question.trim().is_empty() {
254 "If I needed to understand why Hematite's voice output can lag behind visible text, what tools would I choose first, in order, and why?"
255 } else {
256 question
257 };
258
259 format!(
260 "Concrete read-only investigation plan for: {:?}\n\n\
2611. `trace_runtime_flow`\n\
262 Why first: it is the only authoritative built-in runtime/control-flow report, and it already covers the visible text path and the voice path inside a normal `user_turn` trace.\n\
263 Use: request the `user_turn` report first so you can see where visible `InferenceEvent::Token` handling and `app.voice_manager.speak(...)` diverge.\n\
2642. `read_file`\n\
265 Why second: once the high-level flow is confirmed, read the owner files directly instead of inventing helper layers.\n\
266 Use: inspect `src/ui/tui.rs` for `InferenceEvent::Token`, `InferenceEvent::MutedToken`, and `InferenceEvent::Done` handling, then inspect `src/ui/voice.rs` for `VoiceManager::new`, `VoiceManager::speak`, and `VoiceManager::flush`.\n\
2673. `inspect_lines`\n\
268 Why third: narrow to the exact windows where visible text is appended and where voice work is queued or flushed.\n\
269 Use: inspect the token-handling block in `src/ui/tui.rs` and the queueing / synthesis blocks in `src/ui/voice.rs` without rereading the full files.\n\
2704. `lsp_search_symbol`\n\
271 Why fourth: if you need precise navigation after the first file read, this is the fastest semantic jump.\n\
272 Use: search for `VoiceManager`, `VoiceManager::speak`, `VoiceManager::flush`, and `run_app`.\n\
2735. `lsp_references`\n\
274 Why fifth: confirm every place where the TUI calls into the voice path and where the relevant voice methods are used.\n\
275 Use: trace who calls `VoiceManager::speak` and `VoiceManager::flush` to see whether lag is created before queueing, during streaming, or at turn finalization.\n\
2766. `lsp_hover`\n\
277 Why sixth: quickly confirm type signatures and payload details for `InferenceEvent` handling and voice methods without extra full-file reading.\n\
278 Use: inspect the event variants and the `VoiceManager` method surfaces when the control-flow meaning is still unclear.\n\
2797. `lsp_definitions`\n\
280 Why seventh: anchor the final understanding on the true definition sites if a search result or reference set is ambiguous.\n\
281 Use: confirm exact definition coordinates for `VoiceManager` methods and the relevant `InferenceEvent` enum variants.\n\
2828. `shell`\n\
283 Why last and only if needed: shell is for runtime verification after the source investigation, not before it.\n\
284 Use: only if you need to confirm host-level load or reproduce the lag under observation after the static code path is understood.\n\n\
285Built-in authoritative tool note\n\
286- `trace_runtime_flow` is authoritative for part of this question because it already describes the visible chat path and the voice path inside a `user_turn` trace.\n\
287- It is not sufficient by itself to explain why lag happens inside `VoiceManager`, so the next step is direct file reading in `src/ui/tui.rs` and `src/ui/voice.rs`.\n\n\
288Tools I would not start with\n\
289- `mcp__*` tools: optional external surface, not the baseline for this built-in voice investigation.\n\
290- `research_web`, `fetch_docs`, `vision_analyze`: not first-choice tools for a repo-local voice-latency question.\n\
291- `map_project`: useful if ownership were unclear, but unnecessary here because the runtime trace and symbol names already point to the likely owners.\n\
292\nInitial Investigation Order\n\
293`trace_runtime_flow` -> `read_file` -> `inspect_lines` -> `lsp_search_symbol` -> `lsp_references` -> `lsp_hover` -> `lsp_definitions` -> optional `shell`",
294 label
295 )
296}
297
298fn describe_host_inspection_plan(question: &str) -> String {
299 let label = if question.trim().is_empty() {
300 "What is the best read-only tool order for checking my machine state, installed tools, PATH, environment/package-manager health, network adapters, services, desktop items, or folder sizes?"
301 } else {
302 question
303 };
304
305 format!(
306 "Concrete read-only investigation plan for: {:?}\n\n\
3071. `inspect_host`\n\
308 Why first: it is the built-in structured host-inspection tool, so it can answer common machine-state questions without forcing the model to invent shell commands.\n\
309 Use: start with the closest topic such as `summary`, `toolchains`, `path`, `env_doctor`, `fix_plan`, `network`, `services`, `processes`, `desktop`, `downloads`, `ports`, `repo_doctor`, `directory`, or `disk`.\n\
3102. `shell`\n\
311 Why second and only if needed: shell is still the fallback for custom host checks that go beyond `inspect_host`, but it should not be the first move for routine read-only inspection.\n\
312 Use: confirm a special case, run a project-specific command, or inspect host state that has no structured built-in topic yet.\n\
3133. `read_file` / `list_files`\n\
314 Why third and conditional: if the question shifts from host state back into the workspace, move to file tools instead of staying in shell.\n\
315 Use: inspect repo files, logs, or config once the machine-level question identifies the relevant path.\n\n\
316Tools I would not start with\n\
317- `grep_files`: useful for repo text search, but not the right first tool for PATH or desktop questions.\n\
318- `trace_runtime_flow`: useful for Hematite runtime architecture, not machine-state inspection.\n\
319- `research_web`, `fetch_docs`, `vision_analyze`: only relevant if the question expands beyond the local machine.\n\n\
320Initial Investigation Order\n\
321`inspect_host` -> optional `shell` -> optional repo/file tools",
322 label
323 )
324}