Skip to main content

hematite/agent/
conversation.rs

1use crate::agent::architecture_summary::{
2    build_architecture_overview_answer, prune_architecture_trace_batch,
3    prune_authoritative_tool_batch, prune_read_only_context_bloat_batch,
4    prune_redirected_shell_batch, summarize_runtime_trace_output,
5};
6use crate::agent::direct_answers::{
7    build_about_answer, build_architect_session_reset_plan, build_authorization_policy_answer,
8    build_gemma_native_answer, build_gemma_native_settings_answer, build_identity_answer,
9    build_language_capability_answer, build_mcp_lifecycle_answer, build_product_surface_answer,
10    build_reasoning_split_answer, build_recovery_recipes_answer, build_session_memory_answer,
11    build_session_reset_semantics_answer, build_tool_classes_answer,
12    build_tool_registry_ownership_answer, build_unsafe_workflow_pressure_answer,
13    build_verify_profiles_answer, build_workflow_modes_answer,
14};
15use crate::agent::inference::{
16    ChatMessage, InferenceEngine, InferenceEvent, MessageContent, OperatorCheckpointState,
17    ProviderRuntimeState, ToolCallFn, ToolDefinition, ToolFunction,
18};
19use crate::agent::policy::{
20    action_target_path, docs_edit_without_explicit_request, is_destructive_tool,
21    is_mcp_mutating_tool, is_mcp_workspace_read_tool, normalize_workspace_path,
22};
23use crate::agent::recovery_recipes::{
24    attempt_recovery, plan_recovery, preview_recovery_decision, RecoveryContext, RecoveryDecision,
25    RecoveryPlan, RecoveryScenario, RecoveryStep,
26};
27use crate::agent::routing::{
28    classify_query_intent, is_capability_probe_tool, looks_like_mutation_request,
29    needs_computation_sandbox, preferred_host_inspection_topic, preferred_maintainer_workflow,
30    preferred_workspace_workflow, DirectAnswerKind, QueryIntentClass,
31};
32use crate::agent::tool_registry::dispatch_builtin_tool;
33// SystemPromptBuilder is no longer used — InferenceEngine::build_system_prompt() is canonical.
34use crate::agent::compaction::{self, CompactionConfig};
35use crate::ui::gpu_monitor::GpuState;
36
37use serde_json::Value;
38use std::sync::Arc;
39use tokio::sync::{mpsc, Mutex};
40// -- Session persistence -------------------------------------------------------
41
42#[derive(Clone, Debug, Default)]
43pub struct UserTurn {
44    pub text: String,
45    pub attached_document: Option<AttachedDocument>,
46    pub attached_image: Option<AttachedImage>,
47}
48
49#[derive(Clone, Debug)]
50pub struct AttachedDocument {
51    pub name: String,
52    pub content: String,
53}
54
55#[derive(Clone, Debug)]
56pub struct AttachedImage {
57    pub name: String,
58    pub path: String,
59}
60
61impl UserTurn {
62    pub fn text(text: impl Into<String>) -> Self {
63        Self {
64            text: text.into(),
65            attached_document: None,
66            attached_image: None,
67        }
68    }
69}
70
71#[derive(serde::Serialize, serde::Deserialize)]
72struct SavedSession {
73    running_summary: Option<String>,
74    #[serde(default)]
75    session_memory: crate::agent::compaction::SessionMemory,
76    /// Last user message from the previous session — shown as resume hint on startup.
77    #[serde(default)]
78    last_goal: Option<String>,
79    /// Number of real inference turns completed in the previous session.
80    #[serde(default)]
81    turn_count: u32,
82}
83
84/// Snapshot of the previous session, surfaced on startup when a workspace is
85/// resumed after a restart or crash.
86pub struct CheckpointResume {
87    pub last_goal: String,
88    pub turn_count: u32,
89    pub working_files: Vec<String>,
90    pub last_verify_ok: Option<bool>,
91}
92
93/// Load the prior-session checkpoint from `.hematite/session.json`.
94/// Returns `None` when there is no prior session or it has no real turns.
95pub fn load_checkpoint() -> Option<CheckpointResume> {
96    let path = session_path();
97    let data = std::fs::read_to_string(&path).ok()?;
98    let saved: SavedSession = serde_json::from_str(&data).ok()?;
99    let goal = saved.last_goal.filter(|g| !g.trim().is_empty())?;
100    if saved.turn_count == 0 {
101        return None;
102    }
103    let mut working_files: Vec<String> = saved
104        .session_memory
105        .working_set
106        .into_iter()
107        .take(4)
108        .collect();
109    working_files.sort();
110    let last_verify_ok = saved.session_memory.last_verification.map(|v| v.successful);
111    Some(CheckpointResume {
112        last_goal: goal,
113        turn_count: saved.turn_count,
114        working_files,
115        last_verify_ok,
116    })
117}
118
119#[derive(Default)]
120struct ActionGroundingState {
121    turn_index: u64,
122    observed_paths: std::collections::HashMap<String, u64>,
123    inspected_paths: std::collections::HashMap<String, u64>,
124    last_verify_build_turn: Option<u64>,
125    last_verify_build_ok: bool,
126    last_failed_build_paths: Vec<String>,
127    code_changed_since_verify: bool,
128    /// Track topics redirected from shell to inspect_host in the current turn to break loops.
129    redirected_host_inspection_topics: std::collections::HashMap<String, u64>,
130}
131
132struct PlanExecutionGuard {
133    flag: Arc<std::sync::atomic::AtomicBool>,
134}
135
136impl Drop for PlanExecutionGuard {
137    fn drop(&mut self) {
138        self.flag.store(false, std::sync::atomic::Ordering::SeqCst);
139    }
140}
141
142#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
143pub(crate) enum WorkflowMode {
144    #[default]
145    Auto,
146    Ask,
147    Code,
148    Architect,
149    ReadOnly,
150    /// Clean conversational mode — lighter prompt, no coding agent scaffolding,
151    /// tools available but not pushed. Vein RAG still runs for context.
152    Chat,
153    /// Teacher/guide mode — inspect the real machine state first, then walk the user
154    /// through the admin/config task as a grounded, numbered tutorial. Never executes
155    /// write operations itself; instructs the user to perform them manually.
156    Teach,
157}
158
159impl WorkflowMode {
160    fn label(self) -> &'static str {
161        match self {
162            WorkflowMode::Auto => "AUTO",
163            WorkflowMode::Ask => "ASK",
164            WorkflowMode::Code => "CODE",
165            WorkflowMode::Architect => "ARCHITECT",
166            WorkflowMode::ReadOnly => "READ-ONLY",
167            WorkflowMode::Chat => "CHAT",
168            WorkflowMode::Teach => "TEACH",
169        }
170    }
171
172    fn is_read_only(self) -> bool {
173        matches!(
174            self,
175            WorkflowMode::Ask
176                | WorkflowMode::Architect
177                | WorkflowMode::ReadOnly
178                | WorkflowMode::Teach
179        )
180    }
181
182    pub(crate) fn is_chat(self) -> bool {
183        matches!(self, WorkflowMode::Chat)
184    }
185}
186
187fn session_path() -> std::path::PathBuf {
188    if let Ok(overridden) = std::env::var("HEMATITE_SESSION_PATH") {
189        return std::path::PathBuf::from(overridden);
190    }
191    crate::tools::file_ops::workspace_root()
192        .join(".hematite")
193        .join("session.json")
194}
195
196fn load_session_data() -> (Option<String>, crate::agent::compaction::SessionMemory) {
197    let path = session_path();
198    if !path.exists() {
199        return (None, crate::agent::compaction::SessionMemory::default());
200    }
201    let Ok(data) = std::fs::read_to_string(&path) else {
202        return (None, crate::agent::compaction::SessionMemory::default());
203    };
204    let Ok(saved) = serde_json::from_str::<SavedSession>(&data) else {
205        return (None, crate::agent::compaction::SessionMemory::default());
206    };
207    (saved.running_summary, saved.session_memory)
208}
209
210fn reset_task_files() {
211    let root = crate::tools::file_ops::workspace_root();
212    let _ = std::fs::remove_file(root.join(".hematite").join("TASK.md"));
213    let _ = std::fs::remove_file(root.join(".hematite").join("PLAN.md"));
214    let _ = std::fs::remove_file(root.join(".hematite").join("WALKTHROUGH.md"));
215    let _ = std::fs::remove_file(root.join(".github").join("WALKTHROUGH.md"));
216    let _ = std::fs::write(root.join(".hematite").join("TASK.md"), "");
217    let _ = std::fs::write(root.join(".hematite").join("PLAN.md"), "");
218}
219
220fn purge_persistent_memory() {
221    let root = crate::tools::file_ops::workspace_root();
222    let mem_dir = root.join(".hematite").join("memories");
223    if mem_dir.exists() {
224        let _ = std::fs::remove_dir_all(&mem_dir);
225        let _ = std::fs::create_dir_all(&mem_dir);
226    }
227
228    let log_dir = root.join(".hematite_logs");
229    if log_dir.exists() {
230        if let Ok(entries) = std::fs::read_dir(&log_dir) {
231            for entry in entries.flatten() {
232                let _ = std::fs::write(entry.path(), "");
233            }
234        }
235    }
236}
237
238fn apply_turn_attachments(user_turn: &UserTurn, prompt: &str) -> String {
239    let mut out = prompt.trim().to_string();
240    if let Some(doc) = user_turn.attached_document.as_ref() {
241        out = format!(
242            "[Attached document: {}]\n\n{}\n\n---\n\n{}",
243            doc.name, doc.content, out
244        );
245    }
246    if let Some(image) = user_turn.attached_image.as_ref() {
247        out = if out.is_empty() {
248            format!("[Attached image: {}]", image.name)
249        } else {
250            format!("[Attached image: {}]\n\n{}", image.name, out)
251        };
252    }
253    out
254}
255
256fn transcript_user_turn_text(user_turn: &UserTurn, prompt: &str) -> String {
257    let mut prefixes = Vec::new();
258    if let Some(doc) = user_turn.attached_document.as_ref() {
259        prefixes.push(format!("[Attached document: {}]", doc.name));
260    }
261    if let Some(image) = user_turn.attached_image.as_ref() {
262        prefixes.push(format!("[Attached image: {}]", image.name));
263    }
264    if prefixes.is_empty() {
265        prompt.to_string()
266    } else if prompt.trim().is_empty() {
267        prefixes.join("\n")
268    } else {
269        format!("{}\n{}", prefixes.join("\n"), prompt)
270    }
271}
272
273#[derive(Debug, Clone, Copy, PartialEq, Eq)]
274enum RuntimeFailureClass {
275    ContextWindow,
276    ProviderDegraded,
277    ToolArgMalformed,
278    ToolPolicyBlocked,
279    ToolLoop,
280    VerificationFailed,
281    EmptyModelResponse,
282    Unknown,
283}
284
285impl RuntimeFailureClass {
286    fn tag(self) -> &'static str {
287        match self {
288            RuntimeFailureClass::ContextWindow => "context_window",
289            RuntimeFailureClass::ProviderDegraded => "provider_degraded",
290            RuntimeFailureClass::ToolArgMalformed => "tool_arg_malformed",
291            RuntimeFailureClass::ToolPolicyBlocked => "tool_policy_blocked",
292            RuntimeFailureClass::ToolLoop => "tool_loop",
293            RuntimeFailureClass::VerificationFailed => "verification_failed",
294            RuntimeFailureClass::EmptyModelResponse => "empty_model_response",
295            RuntimeFailureClass::Unknown => "unknown",
296        }
297    }
298
299    fn operator_guidance(self) -> &'static str {
300        match self {
301            RuntimeFailureClass::ContextWindow => {
302                "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
303            }
304            RuntimeFailureClass::ProviderDegraded => {
305                "Retry once automatically, then narrow the turn or restart LM Studio if it persists."
306            }
307            RuntimeFailureClass::ToolArgMalformed => {
308                "Retry with repaired or narrower tool arguments instead of repeating the same malformed call."
309            }
310            RuntimeFailureClass::ToolPolicyBlocked => {
311                "Stay inside the allowed workflow or switch modes before retrying."
312            }
313            RuntimeFailureClass::ToolLoop => {
314                "Stop repeating the same failing tool pattern and switch to a narrower recovery step."
315            }
316            RuntimeFailureClass::VerificationFailed => {
317                "Fix the build or test failure before treating the task as complete."
318            }
319            RuntimeFailureClass::EmptyModelResponse => {
320                "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
321            }
322            RuntimeFailureClass::Unknown => {
323                "Inspect the latest grounded tool results or provider status before retrying."
324            }
325        }
326    }
327}
328
329fn classify_runtime_failure(detail: &str) -> RuntimeFailureClass {
330    let lower = detail.to_ascii_lowercase();
331    if lower.contains("context_window_blocked")
332        || lower.contains("context ceiling reached")
333        || lower.contains("exceeds the")
334        || ((lower.contains("n_keep") && lower.contains("n_ctx"))
335            || lower.contains("context length")
336            || lower.contains("keep from the initial prompt")
337            || lower.contains("prompt is greater than the context length"))
338    {
339        RuntimeFailureClass::ContextWindow
340    } else if lower.contains("empty response from model")
341        || lower.contains("model returned an empty response")
342    {
343        RuntimeFailureClass::EmptyModelResponse
344    } else if lower.contains("lm studio unreachable")
345        || lower.contains("lm studio error")
346        || lower.contains("request failed")
347        || lower.contains("response parse error")
348        || lower.contains("provider degraded")
349    {
350        RuntimeFailureClass::ProviderDegraded
351    } else if lower.contains("missing required argument")
352        || lower.contains("json repair failed")
353        || lower.contains("invalid pattern")
354        || lower.contains("invalid line range")
355    {
356        RuntimeFailureClass::ToolArgMalformed
357    } else if lower.contains("action blocked:")
358        || lower.contains("access denied")
359        || lower.contains("declined by user")
360    {
361        RuntimeFailureClass::ToolPolicyBlocked
362    } else if lower.contains("too many consecutive tool errors")
363        || lower.contains("repeated tool failures")
364        || lower.contains("stuck in a loop")
365    {
366        RuntimeFailureClass::ToolLoop
367    } else if lower.contains("build failed")
368        || lower.contains("verification failed")
369        || lower.contains("verify_build")
370    {
371        RuntimeFailureClass::VerificationFailed
372    } else {
373        RuntimeFailureClass::Unknown
374    }
375}
376
377fn format_runtime_failure(class: RuntimeFailureClass, detail: &str) -> String {
378    format!(
379        "[failure:{}] {} Detail: {}",
380        class.tag(),
381        class.operator_guidance(),
382        detail.trim()
383    )
384}
385
386fn provider_state_for_runtime_failure(class: RuntimeFailureClass) -> Option<ProviderRuntimeState> {
387    match class {
388        RuntimeFailureClass::ContextWindow => Some(ProviderRuntimeState::ContextWindow),
389        RuntimeFailureClass::ProviderDegraded => Some(ProviderRuntimeState::Degraded),
390        RuntimeFailureClass::EmptyModelResponse => Some(ProviderRuntimeState::EmptyResponse),
391        _ => None,
392    }
393}
394
395fn checkpoint_state_for_runtime_failure(
396    class: RuntimeFailureClass,
397) -> Option<OperatorCheckpointState> {
398    match class {
399        RuntimeFailureClass::ContextWindow => Some(OperatorCheckpointState::BlockedContextWindow),
400        RuntimeFailureClass::ToolPolicyBlocked => Some(OperatorCheckpointState::BlockedPolicy),
401        RuntimeFailureClass::ToolLoop => Some(OperatorCheckpointState::BlockedToolLoop),
402        RuntimeFailureClass::VerificationFailed => {
403            Some(OperatorCheckpointState::BlockedVerification)
404        }
405        _ => None,
406    }
407}
408
409fn compact_runtime_recovery_summary(class: RuntimeFailureClass) -> &'static str {
410    match class {
411        RuntimeFailureClass::ProviderDegraded => {
412            "LM Studio degraded during the turn; retrying once before surfacing a failure."
413        }
414        RuntimeFailureClass::EmptyModelResponse => {
415            "The model returned an empty reply; retrying once before surfacing a failure."
416        }
417        _ => "Runtime recovery in progress.",
418    }
419}
420
421fn checkpoint_summary_for_runtime_failure(class: RuntimeFailureClass) -> &'static str {
422    match class {
423        RuntimeFailureClass::ContextWindow => "Provider context ceiling confirmed.",
424        RuntimeFailureClass::ToolPolicyBlocked => "Policy blocked the current action.",
425        RuntimeFailureClass::ToolLoop => "Repeated failing tool pattern stopped.",
426        RuntimeFailureClass::VerificationFailed => "Verification failed; fix before continuing.",
427        _ => "Operator checkpoint updated.",
428    }
429}
430
431fn compact_runtime_failure_summary(class: RuntimeFailureClass) -> &'static str {
432    match class {
433        RuntimeFailureClass::ContextWindow => "LM context ceiling hit.",
434        RuntimeFailureClass::ProviderDegraded => {
435            "LM Studio degraded and did not recover cleanly; operator action is now required."
436        }
437        RuntimeFailureClass::EmptyModelResponse => {
438            "LM Studio returned an empty reply after recovery; operator action is now required."
439        }
440        RuntimeFailureClass::ToolLoop => {
441            "Repeated failing tool pattern detected; Hematite stopped the loop."
442        }
443        _ => "Runtime failure surfaced to the operator.",
444    }
445}
446
447fn should_retry_runtime_failure(class: RuntimeFailureClass) -> bool {
448    matches!(
449        class,
450        RuntimeFailureClass::ProviderDegraded | RuntimeFailureClass::EmptyModelResponse
451    )
452}
453
454fn recovery_scenario_for_runtime_failure(class: RuntimeFailureClass) -> Option<RecoveryScenario> {
455    match class {
456        RuntimeFailureClass::ContextWindow => Some(RecoveryScenario::ContextWindow),
457        RuntimeFailureClass::ProviderDegraded => Some(RecoveryScenario::ProviderDegraded),
458        RuntimeFailureClass::EmptyModelResponse => Some(RecoveryScenario::EmptyModelResponse),
459        RuntimeFailureClass::ToolPolicyBlocked => Some(RecoveryScenario::McpWorkspaceReadBlocked),
460        RuntimeFailureClass::ToolLoop => Some(RecoveryScenario::ToolLoop),
461        RuntimeFailureClass::VerificationFailed => Some(RecoveryScenario::VerificationFailed),
462        RuntimeFailureClass::ToolArgMalformed | RuntimeFailureClass::Unknown => None,
463    }
464}
465
466fn compact_recovery_plan_summary(plan: &RecoveryPlan) -> String {
467    format!(
468        "{} [{}]",
469        plan.recipe.scenario.label(),
470        plan.recipe.steps_summary()
471    )
472}
473
474fn compact_recovery_decision_summary(decision: &RecoveryDecision) -> String {
475    match decision {
476        RecoveryDecision::Attempt(plan) => compact_recovery_plan_summary(plan),
477        RecoveryDecision::Escalate {
478            recipe,
479            attempts_made,
480            ..
481        } => format!(
482            "{} escalated after {} / {} [{}]",
483            recipe.scenario.label(),
484            attempts_made,
485            recipe.max_attempts.max(1),
486            recipe.steps_summary()
487        ),
488    }
489}
490
491/// Parse file paths from cargo/compiler error output.
492/// Handles lines like `  --> src/foo/bar.rs:34:12` and `error: could not compile`.
493fn parse_failing_paths_from_build_output(output: &str) -> Vec<String> {
494    let root = crate::tools::file_ops::workspace_root();
495    let mut paths: Vec<String> = output
496        .lines()
497        .filter_map(|line| {
498            let trimmed = line.trim_start();
499            // Cargo error location: "--> path/to/file.rs:line:col"
500            let after_arrow = trimmed.strip_prefix("--> ")?;
501            let file_part = after_arrow.split(':').next()?;
502            if file_part.is_empty() || file_part.starts_with('<') {
503                return None;
504            }
505            let p = std::path::Path::new(file_part);
506            let resolved = if p.is_absolute() {
507                p.to_path_buf()
508            } else {
509                root.join(p)
510            };
511            Some(resolved.to_string_lossy().replace('\\', "/").to_lowercase())
512        })
513        .collect();
514    paths.sort();
515    paths.dedup();
516    paths
517}
518
519fn build_mode_redirect_answer(mode: WorkflowMode) -> String {
520    match mode {
521        WorkflowMode::Ask => "Workflow mode ASK is read-only. I can inspect the code, explain what should change, or review the target area, but I will not modify files here. Switch to `/code` to implement the change, or `/auto` to let Hematite choose.".to_string(),
522        WorkflowMode::Architect => "Workflow mode ARCHITECT is plan-first. I can inspect the code and design the implementation approach, but I will not mutate files until you explicitly switch to `/code` or ask me to implement.".to_string(),
523        WorkflowMode::ReadOnly => "Workflow mode READ-ONLY is a hard no-mutation mode. I can analyze, inspect, and explain, but I will not edit files, run mutating shell commands, or commit changes. Switch to `/code` or `/auto` if you want implementation.".to_string(),
524        WorkflowMode::Teach => "Workflow mode TEACH is a guided walkthrough mode. I will inspect the real state of your machine first, then give you a numbered step-by-step tutorial so you can perform the task yourself. I do not execute write operations in TEACH mode — I show you exactly how to do it.".to_string(),
525        _ => "Switch to `/code` or `/auto` to allow implementation.".to_string(),
526    }
527}
528
529fn architect_handoff_contract() -> &'static str {
530    "ARCHITECT OUTPUT CONTRACT:\n\
531Use a compact implementation handoff, not a process narrative.\n\
532Do not say \"the first step\" or describe what you are about to do.\n\
533After one or two read-only inspection tools at most, stop and answer.\n\
534For runtime wiring, reset behavior, or control-flow questions, prefer `trace_runtime_flow`.\n\
535Use these exact ASCII headings and keep each section short:\n\
536# Goal\n\
537# Target Files\n\
538# Ordered Steps\n\
539# Verification\n\
540# Risks\n\
541# Open Questions\n\
542Keep the whole handoff concise and implementation-oriented."
543}
544
545fn implement_current_plan_prompt() -> &'static str {
546    "Implement the current plan."
547}
548
549fn architect_handoff_operator_note(plan: &crate::tools::plan::PlanHandoff) -> String {
550    format!(
551        "Implementation handoff saved to `.hematite/PLAN.md`.\nNext step: run `/implement-plan` to execute it in `/code`, or use `/code {}` directly.\nPlan: {}",
552        implement_current_plan_prompt().to_ascii_lowercase(),
553        plan.summary_line()
554    )
555}
556
557fn is_current_plan_execution_request(user_input: &str) -> bool {
558    let lower = user_input.trim().to_ascii_lowercase();
559    lower == "/implement-plan"
560        || lower == implement_current_plan_prompt().to_ascii_lowercase()
561        || lower
562            == implement_current_plan_prompt()
563                .trim_end_matches('.')
564                .to_ascii_lowercase()
565        || lower.contains("implement the current plan")
566}
567
568fn is_plan_scoped_tool(name: &str) -> bool {
569    crate::agent::inference::tool_metadata_for_name(name).plan_scope
570}
571
572fn is_current_plan_irrelevant_tool(name: &str) -> bool {
573    !crate::agent::inference::tool_metadata_for_name(name).plan_scope
574}
575
576fn is_non_mutating_plan_step_tool(name: &str) -> bool {
577    let metadata = crate::agent::inference::tool_metadata_for_name(name);
578    metadata.plan_scope && !metadata.mutates_workspace
579}
580
581fn parse_inline_workflow_prompt(user_input: &str) -> Option<(WorkflowMode, &str)> {
582    let trimmed = user_input.trim();
583    for (prefix, mode) in [
584        ("/ask", WorkflowMode::Ask),
585        ("/code", WorkflowMode::Code),
586        ("/architect", WorkflowMode::Architect),
587        ("/read-only", WorkflowMode::ReadOnly),
588        ("/auto", WorkflowMode::Auto),
589        ("/teach", WorkflowMode::Teach),
590    ] {
591        if let Some(rest) = trimmed.strip_prefix(prefix) {
592            let rest = rest.trim();
593            if !rest.is_empty() {
594                return Some((mode, rest));
595            }
596        }
597    }
598    None
599}
600
601// Tool catalogue
602
603/// Returns the full set of tools exposed to the model.
604pub fn get_tools() -> Vec<ToolDefinition> {
605    crate::agent::tool_registry::get_tools()
606}
607
608pub struct ConversationManager {
609    /// Full conversation history in OpenAI format.
610    pub history: Vec<ChatMessage>,
611    pub engine: Arc<InferenceEngine>,
612    pub tools: Vec<ToolDefinition>,
613    pub mcp_manager: Arc<Mutex<crate::agent::mcp_manager::McpManager>>,
614    pub professional: bool,
615    pub brief: bool,
616    pub snark: u8,
617    pub chaos: u8,
618    /// Model to use for simple read-only tasks (optional, user-supplied via --fast-model).
619    pub fast_model: Option<String>,
620    /// Model to use for complex write/build tasks (optional, user-supplied via --think-model).
621    pub think_model: Option<String>,
622    /// Files where whitespace auto-correction fired this session.
623    pub correction_hints: Vec<String>,
624    /// Running background summary of pruned older messages.
625    pub running_summary: Option<String>,
626    /// Live hardware telemetry handle.
627    pub gpu_state: Arc<GpuState>,
628    /// Local RAG memory — FTS5-indexed project source.
629    pub vein: crate::memory::vein::Vein,
630    /// Append-only session transcript logger.
631    pub transcript: crate::agent::transcript::TranscriptLogger,
632    /// Thread-safe cancellation signal for the current agent turn.
633    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
634    /// Shared Git remote state (for persistent connectivity checks).
635    pub git_state: Arc<crate::agent::git_monitor::GitState>,
636    /// Reasoning think-mode override. None = let model decide. Some(true) = force /think.
637    /// Some(false) = force /no_think (fast mode, 3-5x quicker for simple tasks).
638    pub think_mode: Option<bool>,
639    workflow_mode: WorkflowMode,
640    /// Layer 6: Dynamic Task Context (extracted during compaction)
641    pub session_memory: crate::agent::compaction::SessionMemory,
642    pub swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
643    pub voice_manager: Arc<crate::ui::voice::VoiceManager>,
644    /// Personality description for the current Rusty soul — used in chat mode system prompt.
645    pub soul_personality: String,
646    pub lsp_manager: Arc<Mutex<crate::agent::lsp::manager::LspManager>>,
647    /// Active reasoning summary extracted from the previous model turn (Gemma-4 Native).
648    pub reasoning_history: Option<String>,
649    /// Layer 8: Active Reference Pinning (Context Locked)
650    pub pinned_files: Arc<Mutex<std::collections::HashMap<String, String>>>,
651    /// Hard action-grounding state for proof-before-action checks.
652    action_grounding: Arc<Mutex<ActionGroundingState>>,
653    /// True only during `/code Implement the current plan.` style execution turns.
654    plan_execution_active: Arc<std::sync::atomic::AtomicBool>,
655    /// Typed per-turn recovery attempt tracking.
656    recovery_context: RecoveryContext,
657    /// L1 context block — hot files summary injected into the system prompt.
658    /// Built once after vein init and updated as edits accumulate heat.
659    pub l1_context: Option<String>,
660    /// Condensed AST repository layout for the active project.
661    pub repo_map: Option<String>,
662    /// Number of real inference turns completed this session.
663    pub turn_count: u32,
664    /// Last user message sent to the model — persisted as checkpoint goal.
665    pub last_goal: Option<String>,
666}
667
668impl ConversationManager {
669    fn vein_docs_only_mode(&self) -> bool {
670        !crate::tools::file_ops::is_project_workspace()
671    }
672
673    fn refresh_vein_index(&mut self) -> usize {
674        let count = if self.vein_docs_only_mode() {
675            let root = crate::tools::file_ops::workspace_root();
676            tokio::task::block_in_place(|| self.vein.index_workspace_artifacts(&root))
677        } else {
678            tokio::task::block_in_place(|| self.vein.index_project())
679        };
680        self.l1_context = self.vein.l1_context();
681        count
682    }
683
684    fn build_vein_inspection_report(&self, indexed_this_pass: usize) -> String {
685        let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(8));
686        let workspace_mode = if self.vein_docs_only_mode() {
687            "docs-only (outside a project workspace)"
688        } else {
689            "project workspace"
690        };
691        let active_room = snapshot.active_room.as_deref().unwrap_or("none");
692        let mut out = format!(
693            "Vein Inspection\n\
694             Workspace mode: {workspace_mode}\n\
695             Indexed this pass: {indexed_this_pass}\n\
696             Indexed source files: {}\n\
697             Indexed docs: {}\n\
698             Indexed session exchanges: {}\n\
699             Embedded source/doc chunks: {}\n\
700             Embeddings available: {}\n\
701             Active room bias: {active_room}\n\
702             L1 hot-files block: {}\n",
703            snapshot.indexed_source_files,
704            snapshot.indexed_docs,
705            snapshot.indexed_session_exchanges,
706            snapshot.embedded_source_doc_chunks,
707            if snapshot.has_any_embeddings {
708                "yes"
709            } else {
710                "no"
711            },
712            if snapshot.l1_ready {
713                "ready"
714            } else {
715                "not built yet"
716            },
717        );
718
719        if snapshot.hot_files.is_empty() {
720            out.push_str("Hot files: none yet.\n");
721            return out;
722        }
723
724        out.push_str("\nHot files by room:\n");
725        let mut by_room: std::collections::BTreeMap<&str, Vec<&crate::memory::vein::VeinHotFile>> =
726            std::collections::BTreeMap::new();
727        for file in &snapshot.hot_files {
728            by_room.entry(file.room.as_str()).or_default().push(file);
729        }
730        for (room, files) in by_room {
731            out.push_str(&format!("[{}]\n", room));
732            for file in files {
733                out.push_str(&format!(
734                    "- {} [{} edit{}]\n",
735                    file.path,
736                    file.heat,
737                    if file.heat == 1 { "" } else { "s" }
738                ));
739            }
740        }
741
742        out
743    }
744
745    fn latest_user_prompt(&self) -> Option<&str> {
746        self.history
747            .iter()
748            .rev()
749            .find(|msg| msg.role == "user")
750            .map(|msg| msg.content.as_str())
751    }
752
753    async fn emit_direct_response(
754        &mut self,
755        tx: &mpsc::Sender<InferenceEvent>,
756        raw_user_input: &str,
757        effective_user_input: &str,
758        response: &str,
759    ) {
760        self.history.push(ChatMessage::user(effective_user_input));
761        self.history.push(ChatMessage::assistant_text(response));
762        self.transcript.log_user(raw_user_input);
763        self.transcript.log_agent(response);
764        for chunk in chunk_text(response, 8) {
765            if !chunk.is_empty() {
766                let _ = tx.send(InferenceEvent::Token(chunk)).await;
767            }
768        }
769        let _ = tx.send(InferenceEvent::Done).await;
770        self.trim_history(80);
771        self.refresh_session_memory();
772        self.save_session();
773    }
774
775    async fn emit_operator_checkpoint(
776        &mut self,
777        tx: &mpsc::Sender<InferenceEvent>,
778        state: OperatorCheckpointState,
779        summary: impl Into<String>,
780    ) {
781        let summary = summary.into();
782        self.session_memory
783            .record_checkpoint(state.label(), summary.clone());
784        let _ = tx
785            .send(InferenceEvent::OperatorCheckpoint { state, summary })
786            .await;
787    }
788
789    async fn emit_recovery_recipe_summary(
790        &mut self,
791        tx: &mpsc::Sender<InferenceEvent>,
792        state: impl Into<String>,
793        summary: impl Into<String>,
794    ) {
795        let state = state.into();
796        let summary = summary.into();
797        self.session_memory.record_recovery(state, summary.clone());
798        let _ = tx.send(InferenceEvent::RecoveryRecipe { summary }).await;
799    }
800
801    async fn emit_provider_live(&mut self, tx: &mpsc::Sender<InferenceEvent>) {
802        let _ = tx
803            .send(InferenceEvent::ProviderStatus {
804                state: ProviderRuntimeState::Live,
805                summary: String::new(),
806            })
807            .await;
808        self.emit_operator_checkpoint(tx, OperatorCheckpointState::Idle, "")
809            .await;
810    }
811
812    async fn emit_prompt_pressure_for_messages(
813        &self,
814        tx: &mpsc::Sender<InferenceEvent>,
815        messages: &[ChatMessage],
816    ) {
817        let context_length = self.engine.current_context_length();
818        let (estimated_input_tokens, reserved_output_tokens, estimated_total_tokens, percent) =
819            crate::agent::inference::estimate_prompt_pressure(
820                messages,
821                &self.tools,
822                context_length,
823            );
824        let _ = tx
825            .send(InferenceEvent::PromptPressure {
826                estimated_input_tokens,
827                reserved_output_tokens,
828                estimated_total_tokens,
829                context_length,
830                percent,
831            })
832            .await;
833    }
834
835    async fn emit_prompt_pressure_idle(&self, tx: &mpsc::Sender<InferenceEvent>) {
836        let context_length = self.engine.current_context_length();
837        let _ = tx
838            .send(InferenceEvent::PromptPressure {
839                estimated_input_tokens: 0,
840                reserved_output_tokens: 0,
841                estimated_total_tokens: 0,
842                context_length,
843                percent: 0,
844            })
845            .await;
846    }
847
848    async fn emit_compaction_pressure(&self, tx: &mpsc::Sender<InferenceEvent>) {
849        let context_length = self.engine.current_context_length();
850        let vram_ratio = self.gpu_state.ratio();
851        let config = CompactionConfig::adaptive(context_length, vram_ratio);
852        let estimated_tokens = compaction::estimate_compactable_tokens(&self.history);
853        let percent = if config.max_estimated_tokens == 0 {
854            0
855        } else {
856            ((estimated_tokens.saturating_mul(100)) / config.max_estimated_tokens).min(100) as u8
857        };
858
859        let _ = tx
860            .send(InferenceEvent::CompactionPressure {
861                estimated_tokens,
862                threshold_tokens: config.max_estimated_tokens,
863                percent,
864            })
865            .await;
866    }
867
868    async fn refresh_runtime_profile_and_report(
869        &mut self,
870        tx: &mpsc::Sender<InferenceEvent>,
871        reason: &str,
872    ) -> Option<(String, usize, bool)> {
873        let refreshed = self.engine.refresh_runtime_profile().await;
874        if let Some((model_id, context_length, changed)) = refreshed.as_ref() {
875            let _ = tx
876                .send(InferenceEvent::RuntimeProfile {
877                    model_id: model_id.clone(),
878                    context_length: *context_length,
879                })
880                .await;
881            self.transcript.log_system(&format!(
882                "Runtime profile refresh ({}): model={} ctx={} changed={}",
883                reason, model_id, context_length, changed
884            ));
885        }
886        refreshed
887    }
888
889    pub fn new(
890        engine: Arc<InferenceEngine>,
891        professional: bool,
892        brief: bool,
893        snark: u8,
894        chaos: u8,
895        soul_personality: String,
896        fast_model: Option<String>,
897        think_model: Option<String>,
898        gpu_state: Arc<GpuState>,
899        git_state: Arc<crate::agent::git_monitor::GitState>,
900        swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
901        voice_manager: Arc<crate::ui::voice::VoiceManager>,
902    ) -> Self {
903        let (saved_summary, saved_memory) = load_session_data();
904
905        // Build the initial mcp_manager
906        let mcp_manager = Arc::new(tokio::sync::Mutex::new(
907            crate::agent::mcp_manager::McpManager::new(),
908        ));
909
910        // Build the initial system prompt using the canonical InferenceEngine path.
911        let dynamic_instructions =
912            engine.build_system_prompt(snark, chaos, brief, professional, &[], None, &[]);
913
914        let history = vec![ChatMessage::system(&dynamic_instructions)];
915
916        let vein_path = crate::tools::file_ops::workspace_root()
917            .join(".hematite")
918            .join("vein.db");
919        let vein_base_url = engine.base_url.clone();
920        let vein = crate::memory::vein::Vein::new(&vein_path, vein_base_url.clone())
921            .unwrap_or_else(|_| crate::memory::vein::Vein::new(":memory:", vein_base_url).unwrap());
922
923        Self {
924            history,
925            engine,
926            tools: get_tools(),
927            mcp_manager,
928            professional,
929            brief,
930            snark,
931            chaos,
932            fast_model,
933            think_model,
934            correction_hints: Vec::new(),
935            running_summary: saved_summary,
936            gpu_state,
937            vein,
938            transcript: crate::agent::transcript::TranscriptLogger::new(),
939            cancel_token: Arc::new(std::sync::atomic::AtomicBool::new(false)),
940            git_state,
941            think_mode: None,
942            workflow_mode: WorkflowMode::Auto,
943            session_memory: saved_memory,
944            swarm_coordinator,
945            voice_manager,
946            soul_personality,
947            lsp_manager: Arc::new(Mutex::new(crate::agent::lsp::manager::LspManager::new(
948                crate::tools::file_ops::workspace_root(),
949            ))),
950            reasoning_history: None,
951            pinned_files: Arc::new(Mutex::new(std::collections::HashMap::new())),
952            action_grounding: Arc::new(Mutex::new(ActionGroundingState::default())),
953            plan_execution_active: Arc::new(std::sync::atomic::AtomicBool::new(false)),
954            recovery_context: RecoveryContext::default(),
955            l1_context: None,
956            repo_map: None,
957            turn_count: 0,
958            last_goal: None,
959        }
960    }
961
962    /// Index the project into The Vein. Call once after construction.
963    /// Uses block_in_place so the tokio runtime thread isn't parked.
964    pub fn initialize_vein(&mut self) -> usize {
965        self.refresh_vein_index()
966    }
967
968    /// Generate the AST Repo Map. Call once after construction or when resetting context.
969    pub fn initialize_repo_map(&mut self) {
970        if !self.vein_docs_only_mode() {
971            let root = crate::tools::file_ops::workspace_root();
972            let hot = self.vein.hot_files_weighted(10);
973            let gen = crate::memory::repo_map::RepoMapGenerator::new(&root).with_hot_files(&hot);
974            match tokio::task::block_in_place(|| gen.generate()) {
975                Ok(map) => self.repo_map = Some(map),
976                Err(e) => {
977                    self.repo_map = Some(format!("Repo Map generation failed: {}", e));
978                }
979            }
980        }
981    }
982
983    /// Re-generate the repo map after a file edit so rankings stay fresh.
984    /// Lightweight (~100-200ms) — called after successful mutations.
985    fn refresh_repo_map(&mut self) {
986        self.initialize_repo_map();
987    }
988
989    fn save_session(&self) {
990        let path = session_path();
991        if let Some(parent) = path.parent() {
992            let _ = std::fs::create_dir_all(parent);
993        }
994        let saved = SavedSession {
995            running_summary: self.running_summary.clone(),
996            session_memory: self.session_memory.clone(),
997            last_goal: self.last_goal.clone(),
998            turn_count: self.turn_count,
999        };
1000        if let Ok(json) = serde_json::to_string(&saved) {
1001            let _ = std::fs::write(&path, json);
1002        }
1003    }
1004
1005    fn save_empty_session(&self) {
1006        let path = session_path();
1007        if let Some(parent) = path.parent() {
1008            let _ = std::fs::create_dir_all(parent);
1009        }
1010        let saved = SavedSession {
1011            running_summary: None,
1012            session_memory: crate::agent::compaction::SessionMemory::default(),
1013            last_goal: None,
1014            turn_count: 0,
1015        };
1016        if let Ok(json) = serde_json::to_string(&saved) {
1017            let _ = std::fs::write(&path, json);
1018        }
1019    }
1020
1021    fn refresh_session_memory(&mut self) {
1022        let current_plan = self.session_memory.current_plan.clone();
1023        let previous_memory = self.session_memory.clone();
1024        self.session_memory = compaction::extract_memory(&self.history);
1025        self.session_memory.current_plan = current_plan;
1026        self.session_memory
1027            .inherit_runtime_ledger_from(&previous_memory);
1028    }
1029
1030    fn build_chat_system_prompt(&self) -> String {
1031        let species = &self.engine.species;
1032        let personality = &self.soul_personality;
1033        format!(
1034            "You are {species}, a local AI companion running entirely on the user's GPU — no cloud, no subscriptions, no phoning home.\n\
1035             {personality}\n\n\
1036             This is CHAT mode — a clean conversational surface. Behave like a sharp friend who happens to know everything about code, not like an agent following a workflow.\n\n\
1037             Rules:\n\
1038             - Talk like a person. Skip the bullet-point breakdowns unless the topic genuinely needs structure.\n\
1039             - Answer directly. One paragraph is usually right.\n\
1040             - Don't call tools unless the user explicitly asks you to look at a file or run something.\n\
1041             - Don't narrate your reasoning or mention tool names unprompted.\n\
1042             - You can discuss code, debug ideas, explain concepts, help plan, or just talk.\n\
1043             - If the user clearly wants you to edit or build something, do it — but lead with conversation, not scaffolding.\n\
1044             - If the user wants the full coding harness, they can type `/agent`.\n",
1045        )
1046    }
1047
1048    fn append_session_handoff(&self, system_msg: &mut String) {
1049        let has_summary = self
1050            .running_summary
1051            .as_ref()
1052            .map(|s| !s.trim().is_empty())
1053            .unwrap_or(false);
1054        let has_memory = self.session_memory.has_signal();
1055
1056        if !has_summary && !has_memory {
1057            return;
1058        }
1059
1060        system_msg.push_str(
1061            "\n\n# LIGHTWEIGHT SESSION HANDOFF\n\
1062             This is compact carry-over from earlier work on this machine.\n\
1063             Use it only when it helps the current request.\n\
1064             Prefer current repository state, pinned files, and fresh tool results over stale session memory.\n",
1065        );
1066
1067        if has_memory {
1068            system_msg.push_str("\n## Active Task Memory\n");
1069            system_msg.push_str(&self.session_memory.to_prompt());
1070        }
1071
1072        if let Some(summary) = self.running_summary.as_deref() {
1073            if !summary.trim().is_empty() {
1074                system_msg.push_str("\n## Compacted Session Summary\n");
1075                system_msg.push_str(summary);
1076                system_msg.push('\n');
1077            }
1078        }
1079    }
1080
1081    fn set_workflow_mode(&mut self, mode: WorkflowMode) {
1082        self.workflow_mode = mode;
1083    }
1084
1085    fn current_plan_summary(&self) -> Option<String> {
1086        self.session_memory
1087            .current_plan
1088            .as_ref()
1089            .filter(|plan| plan.has_signal())
1090            .map(|plan| plan.summary_line())
1091    }
1092
1093    fn current_plan_allowed_paths(&self) -> Vec<String> {
1094        self.session_memory
1095            .current_plan
1096            .as_ref()
1097            .map(|plan| {
1098                plan.target_files
1099                    .iter()
1100                    .map(|path| normalize_workspace_path(path))
1101                    .collect()
1102            })
1103            .unwrap_or_default()
1104    }
1105
1106    fn persist_architect_handoff(
1107        &mut self,
1108        response: &str,
1109    ) -> Option<crate::tools::plan::PlanHandoff> {
1110        if self.workflow_mode != WorkflowMode::Architect {
1111            return None;
1112        }
1113        let Some(plan) = crate::tools::plan::parse_plan_handoff(response) else {
1114            return None;
1115        };
1116        let _ = crate::tools::plan::save_plan_handoff(&plan);
1117        self.session_memory.current_plan = Some(plan.clone());
1118        Some(plan)
1119    }
1120
1121    async fn begin_grounded_turn(&self) -> u64 {
1122        let mut state = self.action_grounding.lock().await;
1123        state.turn_index += 1;
1124        state.turn_index
1125    }
1126
1127    async fn reset_action_grounding(&self) {
1128        let mut state = self.action_grounding.lock().await;
1129        *state = ActionGroundingState::default();
1130    }
1131
1132    async fn record_read_observation(&self, path: &str) {
1133        let normalized = normalize_workspace_path(path);
1134        let mut state = self.action_grounding.lock().await;
1135        let turn = state.turn_index;
1136        // read_file returns full file content with line numbers — sufficient for
1137        // the model to know exact text before editing, so it satisfies the
1138        // line-inspection grounding check too.
1139        state.observed_paths.insert(normalized.clone(), turn);
1140        state.inspected_paths.insert(normalized, turn);
1141    }
1142
1143    async fn record_line_inspection(&self, path: &str) {
1144        let normalized = normalize_workspace_path(path);
1145        let mut state = self.action_grounding.lock().await;
1146        let turn = state.turn_index;
1147        state.observed_paths.insert(normalized.clone(), turn);
1148        state.inspected_paths.insert(normalized, turn);
1149    }
1150
1151    async fn record_verify_build_result(&self, ok: bool, output: &str) {
1152        let mut state = self.action_grounding.lock().await;
1153        let turn = state.turn_index;
1154        state.last_verify_build_turn = Some(turn);
1155        state.last_verify_build_ok = ok;
1156        if ok {
1157            state.code_changed_since_verify = false;
1158            state.last_failed_build_paths.clear();
1159        } else {
1160            state.last_failed_build_paths = parse_failing_paths_from_build_output(output);
1161        }
1162    }
1163
1164    fn record_session_verification(&mut self, ok: bool, summary: impl Into<String>) {
1165        self.session_memory.record_verification(ok, summary);
1166    }
1167
1168    async fn record_successful_mutation(&self, path: Option<&str>) {
1169        let mut state = self.action_grounding.lock().await;
1170        state.code_changed_since_verify = match path {
1171            Some(p) => is_code_like_path(p),
1172            None => true,
1173        };
1174    }
1175
1176    async fn validate_action_preconditions(&self, name: &str, args: &Value) -> Result<(), String> {
1177        if self
1178            .plan_execution_active
1179            .load(std::sync::atomic::Ordering::SeqCst)
1180        {
1181            if is_current_plan_irrelevant_tool(name) {
1182                return Err(format!(
1183                    "Action blocked: `{}` is not part of current-plan execution. Stay on the saved target files, use built-in workspace file tools only, and either make a concrete edit or surface one specific blocker.",
1184                    name
1185                ));
1186            }
1187
1188            if is_plan_scoped_tool(name) {
1189                let allowed_paths = self.current_plan_allowed_paths();
1190                if !allowed_paths.is_empty() {
1191                    let in_allowed = match name {
1192                        "auto_pin_context" => args
1193                            .get("paths")
1194                            .and_then(|v| v.as_array())
1195                            .map(|paths| {
1196                                !paths.is_empty()
1197                                    && paths.iter().all(|v| {
1198                                        v.as_str()
1199                                            .map(normalize_workspace_path)
1200                                            .map(|p| allowed_paths.contains(&p))
1201                                            .unwrap_or(false)
1202                                    })
1203                            })
1204                            .unwrap_or(false),
1205                        "grep_files" | "list_files" => args
1206                            .get("path")
1207                            .and_then(|v| v.as_str())
1208                            .map(normalize_workspace_path)
1209                            .map(|p| allowed_paths.contains(&p))
1210                            .unwrap_or(false),
1211                        _ => action_target_path(name, args)
1212                            .map(|p| allowed_paths.contains(&p))
1213                            .unwrap_or(false),
1214                    };
1215
1216                    if !in_allowed {
1217                        let allowed = allowed_paths
1218                            .iter()
1219                            .map(|p| format!("`{}`", p))
1220                            .collect::<Vec<_>>()
1221                            .join(", ");
1222                        return Err(format!(
1223                            "Action blocked: current-plan execution is locked to the saved target files. Use a path-scoped built-in tool on one of these files only: {}.",
1224                            allowed
1225                        ));
1226                    }
1227                }
1228            }
1229
1230            if matches!(name, "edit_file" | "multi_search_replace" | "patch_hunk") {
1231                if let Some(target) = action_target_path(name, args) {
1232                    let state = self.action_grounding.lock().await;
1233                    let recently_inspected = state
1234                        .inspected_paths
1235                        .get(&target)
1236                        .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1237                        .unwrap_or(false);
1238                    drop(state);
1239                    if !recently_inspected {
1240                        return Err(format!(
1241                            "Action blocked: `{}` on '{}' requires an exact local line window first during current-plan execution. Use `inspect_lines` on that file around the intended edit region, then retry the mutation.",
1242                            name, target
1243                        ));
1244                    }
1245                }
1246            }
1247        }
1248
1249        if self.workflow_mode.is_read_only() && name == "auto_pin_context" {
1250            return Err(
1251                "Action blocked: `auto_pin_context` is disabled in read-only workflows. Use the grounded file evidence you already have, or narrow with `inspect_lines` instead of pinning more files into active context."
1252                    .to_string(),
1253            );
1254        }
1255
1256        if self.workflow_mode.is_read_only() && is_destructive_tool(name) {
1257            if name == "shell" {
1258                let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1259                let risk = crate::tools::guard::classify_bash_risk(command);
1260                if !matches!(risk, crate::tools::RiskLevel::Safe) {
1261                    return Err(format!(
1262                        "Action blocked: workflow mode `{}` is read-only for risky or mutating operations. Switch to `/code` or `/auto` before making changes.",
1263                        self.workflow_mode.label()
1264                    ));
1265                }
1266            } else {
1267                return Err(format!(
1268                    "Action blocked: workflow mode `{}` is read-only. Use `/code` to implement changes or `/auto` to leave mode selection to Hematite.",
1269                    self.workflow_mode.label()
1270                ));
1271            }
1272        }
1273
1274        let normalized_target = action_target_path(name, args);
1275        if let Some(target) = normalized_target.as_deref() {
1276            if matches!(
1277                name,
1278                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1279            ) {
1280                if let Some(prompt) = self.latest_user_prompt() {
1281                    if docs_edit_without_explicit_request(prompt, target) {
1282                        return Err(format!(
1283                            "Action blocked: '{}' is a docs file but the current request did not explicitly ask for documentation changes. Finish the code task first. If docs need updating, the user will ask.",
1284                            target
1285                        ));
1286                    }
1287                }
1288            }
1289            let path_exists = std::path::Path::new(target).exists();
1290            if path_exists {
1291                let state = self.action_grounding.lock().await;
1292                let pinned = self.pinned_files.lock().await;
1293                let pinned_match = pinned.keys().any(|p| normalize_workspace_path(p) == target);
1294                drop(pinned);
1295
1296                // edit_file and multi_search_replace match text exactly, so they need a
1297                // tighter evidence bar than a plain read. Require inspect_lines on the
1298                // target within the last 3 turns. A read_file in the *same* turn is also
1299                // accepted (the model just loaded the file and is making an immediate edit).
1300                let needs_exact_window = matches!(name, "edit_file" | "multi_search_replace");
1301                let recently_inspected = state
1302                    .inspected_paths
1303                    .get(target)
1304                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1305                    .unwrap_or(false);
1306                let same_turn_read = state
1307                    .observed_paths
1308                    .get(target)
1309                    .map(|turn| state.turn_index.saturating_sub(*turn) == 0)
1310                    .unwrap_or(false);
1311                let recent_observed = state
1312                    .observed_paths
1313                    .get(target)
1314                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1315                    .unwrap_or(false);
1316
1317                if needs_exact_window {
1318                    if !recently_inspected && !same_turn_read && !pinned_match {
1319                        return Err(format!(
1320                            "Action blocked: `{}` on '{}' requires a line-level inspection first. \
1321                             Use `inspect_lines` on the target region to get the exact current text \
1322                             (whitespace and indentation included), then retry the edit.",
1323                            name, target
1324                        ));
1325                    }
1326                } else if !recent_observed && !pinned_match {
1327                    return Err(format!(
1328                        "Action blocked: `{}` on '{}' requires recent file evidence. Use `read_file` or `inspect_lines` on that path first, or pin the file into active context.",
1329                        name, target
1330                    ));
1331                }
1332            }
1333        }
1334
1335        if is_mcp_mutating_tool(name) {
1336            return Err(format!(
1337                "Action blocked: `{}` is an external MCP mutation tool. For workspace file edits, prefer Hematite's built-in edit path (`read_file`/`inspect_lines` plus `patch_hunk`, `edit_file`, or `multi_search_replace`) unless the user explicitly requires MCP for that action.",
1338                name
1339            ));
1340        }
1341
1342        if is_mcp_workspace_read_tool(name) {
1343            return Err(format!(
1344                "Action blocked: `{}` is an external MCP filesystem read tool. For local workspace inspection, prefer Hematite's built-in read path (`read_file`, `inspect_lines`, `list_files`, or `grep_files`) unless the user explicitly requires MCP for that action.",
1345                name
1346            ));
1347        }
1348
1349        // Phase gate: if the build is broken, constrain edits to files that cargo flagged.
1350        // This prevents the model from wandering to unrelated files after a failed verify.
1351        if matches!(
1352            name,
1353            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1354        ) {
1355            if let Some(target) = normalized_target.as_deref() {
1356                let state = self.action_grounding.lock().await;
1357                if state.code_changed_since_verify
1358                    && !state.last_verify_build_ok
1359                    && !state.last_failed_build_paths.is_empty()
1360                    && !state.last_failed_build_paths.iter().any(|p| p == target)
1361                {
1362                    let files = state
1363                        .last_failed_build_paths
1364                        .iter()
1365                        .map(|p| format!("`{}`", p))
1366                        .collect::<Vec<_>>()
1367                        .join(", ");
1368                    return Err(format!(
1369                        "Action blocked: the build is broken. Fix the errors in {} before editing other files. Run `verify_build` to confirm the fix, then continue.",
1370                        files
1371                    ));
1372                }
1373            }
1374        }
1375
1376        if name == "git_commit" || name == "git_push" {
1377            let state = self.action_grounding.lock().await;
1378            if state.code_changed_since_verify && !state.last_verify_build_ok {
1379                return Err(format!(
1380                    "Action blocked: `{}` requires a successful `verify_build` after the latest code edits. Run verification first so Hematite has proof that the tree is build-clean.",
1381                    name
1382                ));
1383            }
1384        }
1385
1386        if name == "shell" {
1387            let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1388            if shell_looks_like_structured_host_inspection(command) {
1389                // Auto-redirect: silently call inspect_host with the right topic instead of
1390                // returning a block error that the model may fail to recover from.
1391                // Derive topic from the shell command itself first; fall back to the user prompt.
1392                let topic = preferred_host_inspection_topic(command)
1393                    .or_else(|| {
1394                        self.latest_user_prompt()
1395                            .and_then(|p| preferred_host_inspection_topic(p))
1396                    })
1397                    .unwrap_or("summary")
1398                    .to_string();
1399
1400                {
1401                    let mut state = self.action_grounding.lock().await;
1402                    let current_turn = state.turn_index;
1403                    if let Some(turn) = state.redirected_host_inspection_topics.get(&topic) {
1404                        if *turn == current_turn {
1405                            return Err(format!(
1406                                "Action already handled: The diagnostic data for topic `{topic}` was already provided in a previous redirected shell result this turn. Do not retry the same shell command."
1407                            ));
1408                        }
1409                    }
1410                    state
1411                        .redirected_host_inspection_topics
1412                        .insert(topic.clone(), current_turn);
1413                }
1414
1415                let path_val = self
1416                    .latest_user_prompt()
1417                    .and_then(|p| {
1418                        // Very basic heuristic for path extraction: look for strings with dots/slashes
1419                        p.split_whitespace()
1420                            .find(|w| w.contains('.') || w.contains('/') || w.contains('\\'))
1421                            .map(|s| {
1422                                s.trim_matches(|c: char| {
1423                                    !c.is_alphanumeric() && c != '.' && c != '/' && c != '\\'
1424                                })
1425                            })
1426                    })
1427                    .unwrap_or("");
1428
1429                let redirect_args = if !path_val.is_empty() {
1430                    serde_json::json!({ "topic": topic, "path": path_val })
1431                } else {
1432                    serde_json::json!({ "topic": topic })
1433                };
1434
1435                let result = crate::tools::host_inspect::inspect_host(&redirect_args).await;
1436                return match result {
1437                    Ok(output) => Err(format!(
1438                        "[successfully auto-redirected shell→inspect_host(topic=\"{topic}\")]\n\n{output}\n\n[Note: Shell is blocked for host inspection. The diagnostic data above fulfills your request. Call inspect_host directly with the correct topic for any further diagnostics.]"
1439                    )),
1440                    Err(e) => Err(format!(
1441                        "Redirection to native tool `{topic}` failed: {e}\n\nAction blocked: use `inspect_host(topic: \"{topic}\")` instead of raw `shell` for host-inspection questions. Available topics: updates, security, pending_reboot, disk_health, battery, recent_crashes, scheduled_tasks, dev_conflicts, health_report, storage, hardware, resource_load, processes, network, services, ports, env_doctor, fix_plan, connectivity, wifi, connections, vpn, proxy, firewall_rules, traceroute, dns_cache, arp, route_table, docker, wsl, ssh, env, hosts_file, installed_software, git_config, databases, disk_benchmark, directory.",
1442                    )),
1443                };
1444            }
1445            let reason = args
1446                .get("reason")
1447                .and_then(|v| v.as_str())
1448                .unwrap_or("")
1449                .trim();
1450            let risk = crate::tools::guard::classify_bash_risk(command);
1451            if !matches!(risk, crate::tools::RiskLevel::Safe) && reason.is_empty() {
1452                return Err(
1453                    "Action blocked: risky `shell` calls require a concrete `reason` argument that explains what is being verified or changed."
1454                        .to_string(),
1455                );
1456            }
1457        }
1458
1459        Ok(())
1460    }
1461
1462    fn build_action_receipt(
1463        &self,
1464        name: &str,
1465        args: &Value,
1466        output: &str,
1467        is_error: bool,
1468    ) -> Option<ChatMessage> {
1469        if is_error || !is_destructive_tool(name) {
1470            return None;
1471        }
1472
1473        let mut receipt = String::from("[ACTION RECEIPT]\n");
1474        receipt.push_str(&format!("- tool: {}\n", name));
1475        if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
1476            receipt.push_str(&format!("- target: {}\n", path));
1477        }
1478        if name == "shell" {
1479            if let Some(command) = args.get("command").and_then(|v| v.as_str()) {
1480                receipt.push_str(&format!("- command: {}\n", command));
1481            }
1482            if let Some(reason) = args.get("reason").and_then(|v| v.as_str()) {
1483                if !reason.trim().is_empty() {
1484                    receipt.push_str(&format!("- reason: {}\n", reason.trim()));
1485                }
1486            }
1487        }
1488        let first_line = output.lines().next().unwrap_or(output).trim();
1489        receipt.push_str(&format!("- outcome: {}\n", first_line));
1490        Some(ChatMessage::system(&receipt))
1491    }
1492
1493    fn replace_mcp_tool_definitions(&mut self, mcp_tools: &[crate::agent::mcp::McpTool]) {
1494        self.tools
1495            .retain(|tool| !tool.function.name.starts_with("mcp__"));
1496        self.tools
1497            .extend(mcp_tools.iter().map(|tool| ToolDefinition {
1498                tool_type: "function".into(),
1499                function: ToolFunction {
1500                    name: tool.name.clone(),
1501                    description: tool.description.clone().unwrap_or_default(),
1502                    parameters: tool.input_schema.clone(),
1503                },
1504                metadata: crate::agent::inference::tool_metadata_for_name(&tool.name),
1505            }));
1506    }
1507
1508    async fn emit_mcp_runtime_status(&self, tx: &mpsc::Sender<InferenceEvent>) {
1509        let summary = {
1510            let mcp = self.mcp_manager.lock().await;
1511            mcp.runtime_report()
1512        };
1513        let _ = tx
1514            .send(InferenceEvent::McpStatus {
1515                state: summary.state,
1516                summary: summary.summary,
1517            })
1518            .await;
1519    }
1520
1521    async fn refresh_mcp_tools(
1522        &mut self,
1523        tx: &mpsc::Sender<InferenceEvent>,
1524    ) -> Result<Vec<crate::agent::mcp::McpTool>, Box<dyn std::error::Error + Send + Sync>> {
1525        let mcp_tools = {
1526            let mut mcp = self.mcp_manager.lock().await;
1527            match mcp.initialize_all().await {
1528                Ok(()) => mcp.discover_tools().await,
1529                Err(e) => {
1530                    drop(mcp);
1531                    self.replace_mcp_tool_definitions(&[]);
1532                    self.emit_mcp_runtime_status(tx).await;
1533                    return Err(e.into());
1534                }
1535            }
1536        };
1537
1538        self.replace_mcp_tool_definitions(&mcp_tools);
1539        self.emit_mcp_runtime_status(tx).await;
1540        Ok(mcp_tools)
1541    }
1542
1543    /// Spawns and initializes all configured MCP servers, discovering their tools.
1544    pub async fn initialize_mcp(
1545        &mut self,
1546        tx: &mpsc::Sender<InferenceEvent>,
1547    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1548        let _ = self.refresh_mcp_tools(tx).await?;
1549        Ok(())
1550    }
1551
1552    /// Run one user turn through the full agentic loop.
1553    ///
1554    /// Adds the user message, calls the model, executes any tools, and loops
1555    /// until the model produces a final text reply.  All progress is streamed
1556    /// as `InferenceEvent` values via `tx`.
1557    pub async fn run_turn(
1558        &mut self,
1559        user_turn: &UserTurn,
1560        tx: mpsc::Sender<InferenceEvent>,
1561        yolo: bool,
1562    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1563        let user_input = user_turn.text.as_str();
1564        // ── Fast-path reset commands: handled locally, no network I/O needed ──
1565        if user_input.trim() == "/new" {
1566            self.history.clear();
1567            self.reasoning_history = None;
1568            self.session_memory.clear();
1569            self.running_summary = None;
1570            self.correction_hints.clear();
1571            self.pinned_files.lock().await.clear();
1572            self.reset_action_grounding().await;
1573            reset_task_files();
1574            let _ = std::fs::remove_file(session_path());
1575            self.save_empty_session();
1576            self.emit_compaction_pressure(&tx).await;
1577            self.emit_prompt_pressure_idle(&tx).await;
1578            for chunk in chunk_text(
1579                "Fresh task context started. Chat history, pins, and task files cleared. Saved memory remains available.",
1580                8,
1581            ) {
1582                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1583            }
1584            let _ = tx.send(InferenceEvent::Done).await;
1585            return Ok(());
1586        }
1587
1588        if user_input.trim() == "/forget" {
1589            self.history.clear();
1590            self.reasoning_history = None;
1591            self.session_memory.clear();
1592            self.running_summary = None;
1593            self.correction_hints.clear();
1594            self.pinned_files.lock().await.clear();
1595            self.reset_action_grounding().await;
1596            reset_task_files();
1597            purge_persistent_memory();
1598            tokio::task::block_in_place(|| self.vein.reset());
1599            let _ = std::fs::remove_file(session_path());
1600            self.save_empty_session();
1601            self.emit_compaction_pressure(&tx).await;
1602            self.emit_prompt_pressure_idle(&tx).await;
1603            for chunk in chunk_text(
1604                "Hard forget complete. Chat history, saved memory, task files, and the Vein index were purged.",
1605                8,
1606            ) {
1607                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1608            }
1609            let _ = tx.send(InferenceEvent::Done).await;
1610            return Ok(());
1611        }
1612
1613        if user_input.trim() == "/vein-inspect" {
1614            let indexed = self.refresh_vein_index();
1615            let report = self.build_vein_inspection_report(indexed);
1616            let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(1));
1617            let _ = tx
1618                .send(InferenceEvent::VeinStatus {
1619                    file_count: snapshot.indexed_source_files + snapshot.indexed_docs,
1620                    embedded_count: snapshot.embedded_source_doc_chunks,
1621                    docs_only: self.vein_docs_only_mode(),
1622                })
1623                .await;
1624            for chunk in chunk_text(&report, 8) {
1625                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1626            }
1627            let _ = tx.send(InferenceEvent::Done).await;
1628            return Ok(());
1629        }
1630
1631        if user_input.trim() == "/workspace-profile" {
1632            let root = crate::tools::file_ops::workspace_root();
1633            let _ = crate::agent::workspace_profile::ensure_workspace_profile(&root);
1634            let report = crate::agent::workspace_profile::profile_report(&root);
1635            for chunk in chunk_text(&report, 8) {
1636                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1637            }
1638            let _ = tx.send(InferenceEvent::Done).await;
1639            return Ok(());
1640        }
1641
1642        if user_input.trim() == "/rules" {
1643            let root = crate::tools::file_ops::workspace_root();
1644            let rules_path = root.join(".hematite").join("rules.md");
1645            let report = if rules_path.exists() {
1646                match std::fs::read_to_string(&rules_path) {
1647                    Ok(content) => format!(
1648                        "## Behavioral Rules (.hematite/rules.md)\n\n{}\n\n---\nTo update: ask Hematite to edit your rules, or open `.hematite/rules.md` directly. Changes take effect on the next turn.",
1649                        content.trim()
1650                    ),
1651                    Err(e) => format!("Error reading .hematite/rules.md: {e}"),
1652                }
1653            } else {
1654                format!(
1655                    "No behavioral rules file found at `.hematite/rules.md`.\n\nCreate it to add custom behavioral guidelines — they are injected into the system prompt on every turn and apply to any model you load.\n\nExample: ask Hematite to \"create a rules.md with simplicity-first and surgical-edit guidelines\" and it will write the file for you.\n\nExpected path: {}",
1656                    rules_path.display()
1657                )
1658            };
1659            for chunk in chunk_text(&report, 8) {
1660                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1661            }
1662            let _ = tx.send(InferenceEvent::Done).await;
1663            return Ok(());
1664        }
1665
1666        if user_input.trim() == "/vein-reset" {
1667            tokio::task::block_in_place(|| self.vein.reset());
1668            let _ = tx
1669                .send(InferenceEvent::VeinStatus {
1670                    file_count: 0,
1671                    embedded_count: 0,
1672                    docs_only: self.vein_docs_only_mode(),
1673                })
1674                .await;
1675            for chunk in chunk_text("Vein index cleared. Will rebuild on the next turn.", 8) {
1676                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1677            }
1678            let _ = tx.send(InferenceEvent::Done).await;
1679            return Ok(());
1680        }
1681
1682        // Reload config every turn (edits apply immediately, no restart needed).
1683        let config = crate::agent::config::load_config();
1684        self.recovery_context.clear();
1685        let manual_runtime_refresh = user_input.trim() == "/runtime-refresh";
1686        if !manual_runtime_refresh {
1687            if let Some((model_id, context_length, changed)) = self
1688                .refresh_runtime_profile_and_report(&tx, "turn_start")
1689                .await
1690            {
1691                if changed {
1692                    let _ = tx
1693                        .send(InferenceEvent::Thought(format!(
1694                            "Runtime refresh: using model `{}` with CTX {} for this turn.",
1695                            model_id, context_length
1696                        )))
1697                        .await;
1698                }
1699            }
1700        }
1701        self.emit_compaction_pressure(&tx).await;
1702        let current_model = self.engine.current_model();
1703        self.engine.set_gemma_native_formatting(
1704            crate::agent::config::effective_gemma_native_formatting(&config, &current_model),
1705        );
1706        let _turn_id = self.begin_grounded_turn().await;
1707        let _hook_runner = crate::agent::hooks::HookRunner::new(config.hooks.clone());
1708        let mcp_tools = match self.refresh_mcp_tools(&tx).await {
1709            Ok(tools) => tools,
1710            Err(e) => {
1711                let _ = tx
1712                    .send(InferenceEvent::Error(format!("MCP refresh failed: {}", e)))
1713                    .await;
1714                Vec::new()
1715            }
1716        };
1717
1718        // Apply config model overrides (config takes precedence over CLI flags).
1719        let effective_fast = config
1720            .fast_model
1721            .clone()
1722            .or_else(|| self.fast_model.clone());
1723        let effective_think = config
1724            .think_model
1725            .clone()
1726            .or_else(|| self.think_model.clone());
1727
1728        // ── /lsp: start language servers manually if needed ──────────────────
1729        if user_input.trim() == "/lsp" {
1730            let mut lsp = self.lsp_manager.lock().await;
1731            match lsp.start_servers().await {
1732                Ok(_) => {
1733                    let _ = tx
1734                        .send(InferenceEvent::MutedToken(
1735                            "LSP: Servers Initialized OK.".to_string(),
1736                        ))
1737                        .await;
1738                }
1739                Err(e) => {
1740                    let _ = tx
1741                        .send(InferenceEvent::Error(format!(
1742                            "LSP: Failed to start servers - {}",
1743                            e
1744                        )))
1745                        .await;
1746                }
1747            }
1748            let _ = tx.send(InferenceEvent::Done).await;
1749            return Ok(());
1750        }
1751
1752        if user_input.trim() == "/runtime-refresh" {
1753            match self
1754                .refresh_runtime_profile_and_report(&tx, "manual_command")
1755                .await
1756            {
1757                Some((model_id, context_length, changed)) => {
1758                    let msg = if changed {
1759                        format!(
1760                            "Runtime profile refreshed. Model: {} | CTX: {}",
1761                            model_id, context_length
1762                        )
1763                    } else {
1764                        format!(
1765                            "Runtime profile unchanged. Model: {} | CTX: {}",
1766                            model_id, context_length
1767                        )
1768                    };
1769                    for chunk in chunk_text(&msg, 8) {
1770                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
1771                    }
1772                }
1773                None => {
1774                    let _ = tx
1775                        .send(InferenceEvent::Error(
1776                            "Runtime refresh failed: LM Studio profile could not be read."
1777                                .to_string(),
1778                        ))
1779                        .await;
1780                }
1781            }
1782            let _ = tx.send(InferenceEvent::Done).await;
1783            return Ok(());
1784        }
1785
1786        if user_input.trim() == "/ask" {
1787            self.set_workflow_mode(WorkflowMode::Ask);
1788            for chunk in chunk_text(
1789                "Workflow mode: ASK. Stay read-only, explain, inspect, and answer without making changes.",
1790                8,
1791            ) {
1792                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1793            }
1794            let _ = tx.send(InferenceEvent::Done).await;
1795            return Ok(());
1796        }
1797
1798        if user_input.trim() == "/code" {
1799            self.set_workflow_mode(WorkflowMode::Code);
1800            let mut message =
1801                "Workflow mode: CODE. Make changes when needed, but keep proof-before-action and verification discipline.".to_string();
1802            if let Some(plan) = self.current_plan_summary() {
1803                message.push_str(&format!(" Current plan: {plan}."));
1804            }
1805            for chunk in chunk_text(&message, 8) {
1806                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1807            }
1808            let _ = tx.send(InferenceEvent::Done).await;
1809            return Ok(());
1810        }
1811
1812        if user_input.trim() == "/architect" {
1813            self.set_workflow_mode(WorkflowMode::Architect);
1814            let mut message =
1815                "Workflow mode: ARCHITECT. Plan, inspect, and shape the approach first. Do not mutate code unless the user explicitly asks to implement. When the handoff is ready, use `/implement-plan` or switch to `/code` to execute it.".to_string();
1816            if let Some(plan) = self.current_plan_summary() {
1817                message.push_str(&format!(" Existing plan: {plan}."));
1818            }
1819            for chunk in chunk_text(&message, 8) {
1820                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1821            }
1822            let _ = tx.send(InferenceEvent::Done).await;
1823            return Ok(());
1824        }
1825
1826        if user_input.trim() == "/read-only" {
1827            self.set_workflow_mode(WorkflowMode::ReadOnly);
1828            for chunk in chunk_text(
1829                "Workflow mode: READ-ONLY. Analysis only. Do not modify files, run mutating shell commands, or commit changes.",
1830                8,
1831            ) {
1832                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1833            }
1834            let _ = tx.send(InferenceEvent::Done).await;
1835            return Ok(());
1836        }
1837
1838        if user_input.trim() == "/auto" {
1839            self.set_workflow_mode(WorkflowMode::Auto);
1840            for chunk in chunk_text(
1841                "Workflow mode: AUTO. Hematite will choose the narrowest effective path for the request.",
1842                8,
1843            ) {
1844                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1845            }
1846            let _ = tx.send(InferenceEvent::Done).await;
1847            return Ok(());
1848        }
1849
1850        if user_input.trim() == "/chat" {
1851            self.set_workflow_mode(WorkflowMode::Chat);
1852            let _ = tx.send(InferenceEvent::Done).await;
1853            return Ok(());
1854        }
1855
1856        if user_input.trim() == "/teach" {
1857            self.set_workflow_mode(WorkflowMode::Teach);
1858            for chunk in chunk_text(
1859                "Workflow mode: TEACH. I will inspect your actual machine state first, then walk you through any admin, config, or write task as a grounded, numbered tutorial. I will not execute write operations — I will show you exactly how to do each step yourself.",
1860                8,
1861            ) {
1862                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1863            }
1864            let _ = tx.send(InferenceEvent::Done).await;
1865            return Ok(());
1866        }
1867
1868        if user_input.trim() == "/reroll" {
1869            let soul = crate::ui::hatch::generate_soul_random();
1870            self.snark = soul.snark;
1871            self.chaos = soul.chaos;
1872            self.soul_personality = soul.personality.clone();
1873            // Update the engine's species name so build_chat_system_prompt uses it
1874            // SAFETY: engine is Arc but species is a plain String field we own logically.
1875            // We use Arc::get_mut which only succeeds if this is the only strong ref.
1876            // If it fails (swarm workers hold refs), we fall back to a best-effort clone approach.
1877            let species = soul.species.clone();
1878            if let Some(eng) = Arc::get_mut(&mut self.engine) {
1879                eng.species = species.clone();
1880            }
1881            let shiny_tag = if soul.shiny { " 🌟 SHINY" } else { "" };
1882            let _ = tx
1883                .send(InferenceEvent::SoulReroll {
1884                    species: soul.species.clone(),
1885                    rarity: soul.rarity.label().to_string(),
1886                    shiny: soul.shiny,
1887                    personality: soul.personality.clone(),
1888                })
1889                .await;
1890            for chunk in chunk_text(
1891                &format!(
1892                    "A new companion awakens!\n[{}{}] {} — \"{}\"",
1893                    soul.rarity.label(),
1894                    shiny_tag,
1895                    soul.species,
1896                    soul.personality
1897                ),
1898                8,
1899            ) {
1900                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1901            }
1902            let _ = tx.send(InferenceEvent::Done).await;
1903            return Ok(());
1904        }
1905
1906        if user_input.trim() == "/agent" {
1907            self.set_workflow_mode(WorkflowMode::Auto);
1908            let _ = tx.send(InferenceEvent::Done).await;
1909            return Ok(());
1910        }
1911
1912        let implement_plan_alias = user_input.trim() == "/implement-plan";
1913        if implement_plan_alias
1914            && !self
1915                .session_memory
1916                .current_plan
1917                .as_ref()
1918                .map(|plan| plan.has_signal())
1919                .unwrap_or(false)
1920        {
1921            for chunk in chunk_text(
1922                "No saved architect handoff is active. Run `/architect` first, or switch to `/code` with an explicit implementation request.",
1923                8,
1924            ) {
1925                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1926            }
1927            let _ = tx.send(InferenceEvent::Done).await;
1928            return Ok(());
1929        }
1930
1931        let mut effective_user_input = if implement_plan_alias {
1932            self.set_workflow_mode(WorkflowMode::Code);
1933            implement_current_plan_prompt().to_string()
1934        } else {
1935            user_input.trim().to_string()
1936        };
1937        if let Some((mode, rest)) = parse_inline_workflow_prompt(user_input) {
1938            self.set_workflow_mode(mode);
1939            effective_user_input = rest.to_string();
1940        }
1941        let transcript_user_input = if implement_plan_alias {
1942            transcript_user_turn_text(user_turn, "/implement-plan")
1943        } else {
1944            transcript_user_turn_text(user_turn, &effective_user_input)
1945        };
1946        effective_user_input = apply_turn_attachments(user_turn, &effective_user_input);
1947        let implement_current_plan = self.workflow_mode == WorkflowMode::Code
1948            && is_current_plan_execution_request(&effective_user_input)
1949            && self
1950                .session_memory
1951                .current_plan
1952                .as_ref()
1953                .map(|plan| plan.has_signal())
1954                .unwrap_or(false);
1955        self.plan_execution_active
1956            .store(implement_current_plan, std::sync::atomic::Ordering::SeqCst);
1957        let _plan_execution_guard = PlanExecutionGuard {
1958            flag: self.plan_execution_active.clone(),
1959        };
1960        let intent = classify_query_intent(self.workflow_mode, &effective_user_input);
1961
1962        // ── /think / /no_think: reasoning budget toggle ──────────────────────
1963        if let Some(answer_kind) = intent.direct_answer {
1964            match answer_kind {
1965                DirectAnswerKind::About => {
1966                    let response = build_about_answer();
1967                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1968                        .await;
1969                    return Ok(());
1970                }
1971                DirectAnswerKind::LanguageCapability => {
1972                    let response = build_language_capability_answer();
1973                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1974                        .await;
1975                    return Ok(());
1976                }
1977                DirectAnswerKind::UnsafeWorkflowPressure => {
1978                    let response = build_unsafe_workflow_pressure_answer();
1979                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1980                        .await;
1981                    return Ok(());
1982                }
1983                DirectAnswerKind::SessionMemory => {
1984                    let response = build_session_memory_answer();
1985                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1986                        .await;
1987                    return Ok(());
1988                }
1989                DirectAnswerKind::RecoveryRecipes => {
1990                    let response = build_recovery_recipes_answer();
1991                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1992                        .await;
1993                    return Ok(());
1994                }
1995                DirectAnswerKind::McpLifecycle => {
1996                    let response = build_mcp_lifecycle_answer();
1997                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1998                        .await;
1999                    return Ok(());
2000                }
2001                DirectAnswerKind::AuthorizationPolicy => {
2002                    let response = build_authorization_policy_answer();
2003                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2004                        .await;
2005                    return Ok(());
2006                }
2007                DirectAnswerKind::ToolClasses => {
2008                    let response = build_tool_classes_answer();
2009                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2010                        .await;
2011                    return Ok(());
2012                }
2013                DirectAnswerKind::ToolRegistryOwnership => {
2014                    let response = build_tool_registry_ownership_answer();
2015                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2016                        .await;
2017                    return Ok(());
2018                }
2019                DirectAnswerKind::SessionResetSemantics => {
2020                    let response = build_session_reset_semantics_answer();
2021                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2022                        .await;
2023                    return Ok(());
2024                }
2025                DirectAnswerKind::ProductSurface => {
2026                    let response = build_product_surface_answer();
2027                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2028                        .await;
2029                    return Ok(());
2030                }
2031                DirectAnswerKind::ReasoningSplit => {
2032                    let response = build_reasoning_split_answer();
2033                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2034                        .await;
2035                    return Ok(());
2036                }
2037                DirectAnswerKind::Identity => {
2038                    let response = build_identity_answer();
2039                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2040                        .await;
2041                    return Ok(());
2042                }
2043                DirectAnswerKind::WorkflowModes => {
2044                    let response = build_workflow_modes_answer();
2045                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2046                        .await;
2047                    return Ok(());
2048                }
2049                DirectAnswerKind::GemmaNative => {
2050                    let response = build_gemma_native_answer();
2051                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2052                        .await;
2053                    return Ok(());
2054                }
2055                DirectAnswerKind::GemmaNativeSettings => {
2056                    let response = build_gemma_native_settings_answer();
2057                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2058                        .await;
2059                    return Ok(());
2060                }
2061                DirectAnswerKind::VerifyProfiles => {
2062                    let response = build_verify_profiles_answer();
2063                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2064                        .await;
2065                    return Ok(());
2066                }
2067                DirectAnswerKind::Toolchain => {
2068                    let lower = effective_user_input.to_lowercase();
2069                    let topic = if (lower.contains("voice output") || lower.contains("voice"))
2070                        && (lower.contains("lag")
2071                            || lower.contains("behind visible text")
2072                            || lower.contains("latency"))
2073                    {
2074                        "voice_latency_plan"
2075                    } else {
2076                        "all"
2077                    };
2078                    let response =
2079                        crate::tools::toolchain::describe_toolchain(&serde_json::json!({
2080                            "topic": topic,
2081                            "question": effective_user_input,
2082                        }))
2083                        .await
2084                        .unwrap_or_else(|e| format!("Error: {}", e));
2085                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2086                        .await;
2087                    return Ok(());
2088                }
2089                DirectAnswerKind::ArchitectSessionResetPlan => {
2090                    let plan = build_architect_session_reset_plan();
2091                    let response = plan.to_markdown();
2092                    let _ = crate::tools::plan::save_plan_handoff(&plan);
2093                    self.session_memory.current_plan = Some(plan);
2094                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2095                        .await;
2096                    return Ok(());
2097                }
2098            }
2099        }
2100
2101        if matches!(
2102            self.workflow_mode,
2103            WorkflowMode::Ask | WorkflowMode::ReadOnly
2104        ) && looks_like_mutation_request(&effective_user_input)
2105        {
2106            let response = build_mode_redirect_answer(self.workflow_mode);
2107            self.history.push(ChatMessage::user(&effective_user_input));
2108            self.history.push(ChatMessage::assistant_text(&response));
2109            self.transcript.log_user(&transcript_user_input);
2110            self.transcript.log_agent(&response);
2111            for chunk in chunk_text(&response, 8) {
2112                if !chunk.is_empty() {
2113                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
2114                }
2115            }
2116            let _ = tx.send(InferenceEvent::Done).await;
2117            self.trim_history(80);
2118            self.refresh_session_memory();
2119            self.save_session();
2120            return Ok(());
2121        }
2122
2123        if user_input.trim() == "/think" {
2124            self.think_mode = Some(true);
2125            for chunk in chunk_text("Think mode: ON — full chain-of-thought enabled.", 8) {
2126                let _ = tx.send(InferenceEvent::Token(chunk)).await;
2127            }
2128            let _ = tx.send(InferenceEvent::Done).await;
2129            return Ok(());
2130        }
2131        if user_input.trim() == "/no_think" {
2132            self.think_mode = Some(false);
2133            for chunk in chunk_text(
2134                "Think mode: OFF — fast mode enabled (no chain-of-thought).",
2135                8,
2136            ) {
2137                let _ = tx.send(InferenceEvent::Token(chunk)).await;
2138            }
2139            let _ = tx.send(InferenceEvent::Done).await;
2140            return Ok(());
2141        }
2142
2143        // ── /pin: add file to active context ────────────────────────────────
2144        if user_input.trim_start().starts_with("/pin ") {
2145            let path = user_input.trim_start()[5..].trim();
2146            match std::fs::read_to_string(path) {
2147                Ok(content) => {
2148                    self.pinned_files
2149                        .lock()
2150                        .await
2151                        .insert(path.to_string(), content);
2152                    let msg = format!(
2153                        "Pinned: {} — this file is now locked in model context.",
2154                        path
2155                    );
2156                    for chunk in chunk_text(&msg, 8) {
2157                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
2158                    }
2159                }
2160                Err(e) => {
2161                    let _ = tx
2162                        .send(InferenceEvent::Error(format!(
2163                            "Failed to pin {}: {}",
2164                            path, e
2165                        )))
2166                        .await;
2167                }
2168            }
2169            let _ = tx.send(InferenceEvent::Done).await;
2170            return Ok(());
2171        }
2172
2173        // ── /unpin: remove file from active context ──────────────────────────
2174        if user_input.trim_start().starts_with("/unpin ") {
2175            let path = user_input.trim_start()[7..].trim();
2176            if self.pinned_files.lock().await.remove(path).is_some() {
2177                let msg = format!("Unpinned: {} — file removed from active context.", path);
2178                for chunk in chunk_text(&msg, 8) {
2179                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
2180                }
2181            } else {
2182                let _ = tx
2183                    .send(InferenceEvent::Error(format!(
2184                        "File {} was not pinned.",
2185                        path
2186                    )))
2187                    .await;
2188            }
2189            let _ = tx.send(InferenceEvent::Done).await;
2190            return Ok(());
2191        }
2192
2193        // ── Normal processing ───────────────────────────────────────────────
2194
2195        // Ensure MCP is initialized and tools are discovered for this turn.
2196        let tiny_context_mode = self.engine.current_context_length() <= 8_192;
2197        let mut base_prompt = self.engine.build_system_prompt(
2198            self.snark,
2199            self.chaos,
2200            self.brief,
2201            self.professional,
2202            &self.tools,
2203            self.reasoning_history.as_deref(),
2204            &mcp_tools,
2205        );
2206        if !tiny_context_mode {
2207            if let Some(hint) = &config.context_hint {
2208                if !hint.trim().is_empty() {
2209                    base_prompt.push_str(&format!(
2210                        "\n\n# Project Context (from .hematite/settings.json)\n{}",
2211                        hint
2212                    ));
2213                }
2214            }
2215            if let Some(profile_block) = crate::agent::workspace_profile::profile_prompt_block(
2216                &crate::tools::file_ops::workspace_root(),
2217            ) {
2218                base_prompt.push_str(&format!("\n\n{}", profile_block));
2219            }
2220            // L1: inject hot-files block if available (persists across sessions via vein.db).
2221            if let Some(ref l1) = self.l1_context {
2222                base_prompt.push_str(&format!("\n\n{}", l1));
2223            }
2224            if let Some(ref repo_map_block) = self.repo_map {
2225                base_prompt.push_str(&format!("\n\n{}", repo_map_block));
2226            }
2227        }
2228        let grounded_trace_mode = intent.grounded_trace_mode
2229            || intent.primary_class == QueryIntentClass::RuntimeDiagnosis;
2230        let capability_mode =
2231            intent.capability_mode || intent.primary_class == QueryIntentClass::Capability;
2232        let toolchain_mode =
2233            intent.toolchain_mode || intent.primary_class == QueryIntentClass::Toolchain;
2234        let host_inspection_mode = intent.host_inspection_mode;
2235        let maintainer_workflow_mode = intent.maintainer_workflow_mode
2236            || preferred_maintainer_workflow(&effective_user_input).is_some();
2237        let workspace_workflow_mode = intent.workspace_workflow_mode
2238            || preferred_workspace_workflow(&effective_user_input).is_some();
2239        let fix_plan_mode =
2240            preferred_host_inspection_topic(&effective_user_input) == Some("fix_plan");
2241        let architecture_overview_mode = intent.architecture_overview_mode;
2242        let capability_needs_repo = intent.capability_needs_repo;
2243        let mut system_msg = build_system_with_corrections(
2244            &base_prompt,
2245            &self.correction_hints,
2246            &self.gpu_state,
2247            &self.git_state,
2248            &config,
2249        );
2250        if tiny_context_mode {
2251            system_msg.push_str(
2252                "\n\n# TINY CONTEXT TURN MODE\n\
2253                 Keep this turn compact. Prefer direct answers or one narrow tool step over broad exploration.\n",
2254            );
2255        }
2256        if !tiny_context_mode && grounded_trace_mode {
2257            system_msg.push_str(
2258                "\n\n# GROUNDED TRACE MODE\n\
2259                 This turn is read-only architecture analysis unless the user explicitly asks otherwise.\n\
2260                 Before answering trace, architecture, or control-flow questions, inspect the repo with real tools.\n\
2261                 Use verified file paths, function names, structs, enums, channels, and event types only.\n\
2262                 Prefer `trace_runtime_flow` for runtime wiring, session reset, startup, or reasoning/specular questions.\n\
2263                 Treat `trace_runtime_flow` output as authoritative over your own memory.\n\
2264                 If `trace_runtime_flow` fully answers the question, preserve its identifiers exactly and do not rename them in a styled rewrite.\n\
2265                 Do not invent names such as synthetic channels or subsystems.\n\
2266                 If a detail is not verified from the code or tool output, say `uncertain`.\n\
2267                For exact flow questions, answer in ordered steps and name the concrete functions and event types involved.\n"
2268            );
2269        }
2270        if !tiny_context_mode && capability_mode {
2271            system_msg.push_str(
2272                "\n\n# CAPABILITY QUESTION MODE\n\
2273                 This is a product or capability question unless the user explicitly asks about repository implementation.\n\
2274                 Answer from stable Hematite capabilities and current runtime state.\n\
2275                 It is correct to mention that Hematite itself is built in Rust when relevant, but do not imply that its project support is limited to Rust.\n\
2276                 Do NOT call repo-inspection tools like `read_file` or LSP lookup tools unless the user explicitly asks about implementation or file ownership.\n\
2277                 Do NOT infer language or project support from unrelated dependencies, crates, or config files.\n\
2278                 Describe language and project support in terms of real mechanisms: reading files, editing code, searching the workspace, running shell commands, build verification, language-aware tooling when available, web research, vision analysis, and optional MCP tools if configured.\n\
2279                 If the user asks about languages, answer at the harness level: Hematite can help across many project languages even though Hematite itself is written in Rust.\n\
2280                 Prefer real programming language examples like Python, JavaScript, TypeScript, Go, C#, or similar over file extensions like `.json` or `.md`.\n\
2281                 For project-building questions, describe cross-project workflows like scaffolding files, shaping structure, implementing features, and running the appropriate local build or test commands for the target stack. Do not overclaim certainty.\n\
2282                 Never mention raw `mcp__*` tool names unless those tools are active this turn and directly relevant.\n\
2283                 Keep the answer short, plain, and ASCII-first.\n"
2284            );
2285        }
2286        if !tiny_context_mode && toolchain_mode {
2287            system_msg.push_str(
2288                "\n\n# TOOLCHAIN DISCIPLINE MODE\n\
2289                 This turn is about Hematite's real built-in tools and how to choose them.\n\
2290                 Prefer `describe_toolchain` before you try to summarize tool capabilities or propose a read-only investigation plan from memory.\n\
2291                 Use only real built-in tool names.\n\
2292                 Do not invent helper tools, MCP tool names, synthetic symbols, or example function names.\n\
2293                 If `describe_toolchain` fully answers the question, preserve its output exactly instead of restyling it.\n\
2294                 Be explicit about which tools are optional or conditional.\n"
2295            );
2296        }
2297        if !tiny_context_mode && host_inspection_mode {
2298            system_msg.push_str(
2299                 "\n\n# HOST INSPECTION MODE\n\
2300                   This turn is about the local machine and environment, not repository architecture.\n\
2301                 Prefer `inspect_host` before raw `shell` for PATH analysis, installed developer tool versions, environment/package-manager health, grounded fix plans, network snapshots, service snapshots, process snapshots, desktop item counts, Downloads summaries, listening ports, repo-doctor checks, and directory/disk-size reports.\n\
2302                 Use the closest built-in topic first: `summary`, `toolchains`, `path`, `env_doctor`, `fix_plan`, `network`, `services`, `processes`, `desktop`, `downloads`, `ports`, `repo_doctor`, `directory`, or `disk`.\n\
2303                 If the user asks how to fix a common workstation problem such as `cargo not found`, `port 3000 already in use`, or `LM Studio not reachable`, use `fix_plan` first instead of `env_doctor`, `path`, or `ports`.\n\
2304                 If `env_doctor` answers the question, stop there. Do not follow with `path` unless the user explicitly asks for raw PATH entries.\n\
2305                 Only use `shell` if the host question truly goes beyond `inspect_host`.\n"
2306              );
2307        }
2308        if !tiny_context_mode && fix_plan_mode {
2309            system_msg.push_str(
2310                "\n\n# FIX PLAN MODE\n\
2311                 This turn is a workstation remediation question, not just a diagnosis question.\n\
2312                 Call `inspect_host` with `topic=fix_plan` first.\n\
2313                 Do not start with `path`, `toolchains`, `env_doctor`, or `ports` unless the user explicitly asks for diagnosis details instead of a fix plan.\n\
2314                 Keep the answer grounded, stepwise, and approval-aware.\n"
2315            );
2316        }
2317        if !tiny_context_mode && maintainer_workflow_mode {
2318            system_msg.push_str(
2319                "\n\n# HEMATITE MAINTAINER WORKFLOW MODE\n\
2320                 This turn asks Hematite to run one of Hematite's own maintainer workflows, not invent an ad hoc shell command.\n\
2321                 Prefer `run_hematite_maintainer_workflow` for existing Hematite workflows such as `clean.ps1`, `scripts/package-windows.ps1`, or `release.ps1`.\n\
2322                 Use workflow `clean` for cleanup, workflow `package_windows` for rebuilding the local portable or installer, and workflow `release` for the normal version bump/tag/push/publish flow.\n\
2323                 Do not treat this as a generic current-workspace script runner. Only fall back to raw `shell` if the user asks for a script or command outside those Hematite maintainer workflows.\n"
2324            );
2325        }
2326        if !tiny_context_mode && workspace_workflow_mode {
2327            system_msg.push_str(
2328                "\n\n# WORKSPACE WORKFLOW MODE\n\
2329                 This turn asks Hematite to run something in the active project workspace, not in Hematite's own source tree.\n\
2330                 Prefer `run_workspace_workflow` for the current project's build, test, lint, fix, package scripts, just/task/make targets, local repo scripts, or an exact workspace command.\n\
2331                 This tool always runs from the locked workspace root.\n\
2332                 If no real project workspace is locked, say so and tell the user to relaunch Hematite in the target project directory.\n\
2333                 Do not use `run_hematite_maintainer_workflow` unless the request is specifically about Hematite's own cleanup, packaging, or release scripts.\n"
2334            );
2335        }
2336
2337        if !tiny_context_mode && architecture_overview_mode {
2338            system_msg.push_str(
2339                "\n\n# ARCHITECTURE OVERVIEW DISCIPLINE MODE\n\
2340                 For broad runtime or architecture walkthroughs, prefer authoritative tools first: `trace_runtime_flow` for control flow.\n\
2341                 Do not call `auto_pin_context` or `list_pinned` in read-only analysis. Avoid broad `read_file` calls unless the user explicitly asks for implementation detail in one named file.\n\
2342                 Preserve grounded tool output rather than restyling it into a larger answer.\n"
2343            );
2344        }
2345
2346        // ── Inject Pinned Files (Context Locking) ───────────────────────────
2347        system_msg.push_str(&format!(
2348            "\n\n# WORKFLOW MODE\nCURRENT WORKFLOW: {}\n",
2349            self.workflow_mode.label()
2350        ));
2351        if tiny_context_mode {
2352            system_msg
2353                .push_str("Use the narrowest safe behavior for this mode. Keep the turn short.\n");
2354        } else {
2355            match self.workflow_mode {
2356                WorkflowMode::Auto => system_msg.push_str(
2357                    "AUTO means choose the narrowest effective path for the request. Answer directly when stable product logic exists. Inspect before editing. Mutate only when the user is clearly asking for implementation.\n",
2358                ),
2359                WorkflowMode::Ask => system_msg.push_str(
2360                    "ASK means analysis only. Stay read-only, inspect the repo, explain findings, and do not make changes unless the user explicitly switches modes.\n",
2361                ),
2362                WorkflowMode::Code => system_msg.push_str(
2363                    "CODE means implementation is allowed when needed. Keep proof-before-action, verification, and edit precision discipline. If an active plan handoff exists in session memory or `.hematite/PLAN.md`, treat it as the implementation brief unless the user explicitly overrides it. For ordinary workspace inspection during implementation, use built-in read/edit tools first and do not reach for `mcp__filesystem__*` unless the user explicitly requires MCP.\n",
2364                ),
2365                WorkflowMode::Architect => system_msg.push_str(
2366                    "ARCHITECT means plan first. Inspect, reason, and produce a concrete implementation approach before editing. Do not mutate code unless the user explicitly asks to implement. When you produce an implementation handoff, use these exact ASCII headings so Hematite can persist the plan: `# Goal`, `# Target Files`, `# Ordered Steps`, `# Verification`, `# Risks`, `# Open Questions`.\n",
2367                ),
2368                WorkflowMode::ReadOnly => system_msg.push_str(
2369                    "READ-ONLY means analysis only. Do not modify files, run mutating shell commands, or commit changes.\n",
2370                ),
2371                WorkflowMode::Teach => system_msg.push_str(
2372                    "TEACH means you are a senior technician giving the user a grounded, numbered walkthrough. \
2373                     MANDATORY PROTOCOL for every admin/config/write task:\n\
2374                     1. Call inspect_host with the most relevant topic(s) FIRST to observe the actual machine state.\n\
2375                     2. Then deliver a numbered step-by-step tutorial that references what you actually observed — exact commands, exact paths, exact values.\n\
2376                     3. End with a verification step the user can run to confirm success.\n\
2377                     4. Do NOT execute write operations yourself. You are the teacher; the user performs the steps.\n\
2378                     5. Treat the user as capable — give precise instructions, not hedged warnings.\n\
2379                     Relevant inspect_host topics for common tasks: hardware (driver installs), security (firewall), ssh (SSH keys), wsl (WSL setup), env (PATH/env vars), services (service config), recent_crashes (troubleshooting), disk_health (storage issues).\n",
2380                ),
2381                WorkflowMode::Chat => {} // replaced by build_chat_system_prompt below
2382            }
2383        }
2384        if !tiny_context_mode && self.workflow_mode == WorkflowMode::Architect {
2385            system_msg.push_str("\n\n# ARCHITECT HANDOFF CONTRACT\n");
2386            system_msg.push_str(architect_handoff_contract());
2387            system_msg.push('\n');
2388        }
2389        if !tiny_context_mode && implement_current_plan {
2390            system_msg.push_str(
2391                "\n\n# CURRENT PLAN EXECUTION CONTRACT\n\
2392                 The user explicitly asked you to implement the current saved plan.\n\
2393                 Do not restate the plan, do not provide preliminary contracts, and do not stop at analysis.\n\
2394                 Use the saved plan as the brief, gather only the minimum built-in file evidence you need, then start editing the target files.\n\
2395                 Every file inspection or edit call must be path-scoped to one of the saved target files.\n\
2396                 If a built-in workspace read tool gives you enough context, your next step should be mutation or a concrete blocking question, not another summary.\n",
2397            );
2398            if let Some(plan) = self.session_memory.current_plan.as_ref() {
2399                if !plan.target_files.is_empty() {
2400                    system_msg.push_str("\n# CURRENT PLAN TARGET FILES\n");
2401                    for path in &plan.target_files {
2402                        system_msg.push_str(&format!("- {}\n", path));
2403                    }
2404                }
2405            }
2406        }
2407        if !tiny_context_mode {
2408            let pinned = self.pinned_files.lock().await;
2409            if !pinned.is_empty() {
2410                system_msg.push_str("\n\n# ACTIVE CONTEXT (PINNED FILES)\n");
2411                system_msg.push_str("The following files are locked in your active memory for high-fidelity reference.\n\n");
2412                for (path, content) in pinned.iter() {
2413                    system_msg.push_str(&format!("## FILE: {}\n```\n{}\n```\n\n", path, content));
2414                }
2415            }
2416        }
2417        if !tiny_context_mode {
2418            self.append_session_handoff(&mut system_msg);
2419        }
2420        // In chat mode, replace the full harness prompt with a clean conversational surface.
2421        // The harness prompt (built above) is discarded — Rusty personality takes over.
2422        let system_msg = if self.workflow_mode.is_chat() {
2423            self.build_chat_system_prompt()
2424        } else {
2425            system_msg
2426        };
2427        if self.history.is_empty() || self.history[0].role != "system" {
2428            self.history.insert(0, ChatMessage::system(&system_msg));
2429        } else {
2430            self.history[0] = ChatMessage::system(&system_msg);
2431        }
2432
2433        // Ensure a clean state for the new turn.
2434        self.cancel_token
2435            .store(false, std::sync::atomic::Ordering::SeqCst);
2436
2437        // [Official Gemma-4 Spec] Purge reasoning history for new user turns.
2438        // History from previous turns must not be fed back into the prompt to prevent duplication.
2439        self.reasoning_history = None;
2440
2441        let is_gemma = crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2442        let user_content = match self.think_mode {
2443            Some(true) => format!("/think\n{}", effective_user_input),
2444            Some(false) => format!("/no_think\n{}", effective_user_input),
2445            // For non-Gemma models (Qwen etc.) default to /think so the model uses
2446            // hybrid thinking — it decides how much reasoning each turn needs.
2447            // Gemma handles reasoning via <|think|> in the system prompt instead.
2448            // Chat mode and quick tool calls skip /think — fast direct answers.
2449            None if !is_gemma
2450                && !self.workflow_mode.is_chat()
2451                && !is_quick_tool_request(&effective_user_input) =>
2452            {
2453                format!("/think\n{}", effective_user_input)
2454            }
2455            None => effective_user_input.clone(),
2456        };
2457        if let Some(image) = user_turn.attached_image.as_ref() {
2458            let image_url =
2459                crate::tools::vision::encode_image_as_data_url(std::path::Path::new(&image.path))
2460                    .map_err(|e| format!("Image attachment failed for {}: {}", image.name, e))?;
2461            self.history
2462                .push(ChatMessage::user_with_image(&user_content, &image_url));
2463        } else {
2464            self.history.push(ChatMessage::user(&user_content));
2465        }
2466        self.transcript.log_user(&transcript_user_input);
2467
2468        // Incremental re-index and Vein context injection. Ordinary chat mode
2469        // still skips repo-snippet noise, but docs-only workspaces and explicit
2470        // session-recall prompts should keep Vein memory available.
2471        let vein_docs_only = self.vein_docs_only_mode();
2472        let allow_vein_context = !self.workflow_mode.is_chat()
2473            || should_use_vein_in_chat(&effective_user_input, vein_docs_only);
2474        let (vein_context, vein_paths) = if allow_vein_context {
2475            self.refresh_vein_index();
2476            let _ = tx
2477                .send(InferenceEvent::VeinStatus {
2478                    file_count: self.vein.file_count(),
2479                    embedded_count: self.vein.embedded_chunk_count(),
2480                    docs_only: vein_docs_only,
2481                })
2482                .await;
2483            match self.build_vein_context(&effective_user_input) {
2484                Some((ctx, paths)) => (Some(ctx), paths),
2485                None => (None, Vec::new()),
2486            }
2487        } else {
2488            (None, Vec::new())
2489        };
2490        if !vein_paths.is_empty() {
2491            let _ = tx
2492                .send(InferenceEvent::VeinContext { paths: vein_paths })
2493                .await;
2494        }
2495
2496        // Route: pick fast vs think model based on the complexity of this request.
2497        let routed_model = route_model(
2498            &effective_user_input,
2499            effective_fast.as_deref(),
2500            effective_think.as_deref(),
2501        )
2502        .map(|s| s.to_string());
2503
2504        let mut loop_intervention: Option<String> = None;
2505
2506        // ── Harness pre-run: multi-topic host inspection ─────────────────────
2507        // When the user asks for 2+ distinct inspect_host topics in one message,
2508        // run them all here and inject the combined results as a loop_intervention
2509        // so the model receives data instead of having to orchestrate tool calls.
2510        // This prevents the model from collapsing multiple topics into a generic
2511        // one, burning the tool loop budget, or retrying via shell.
2512        {
2513            let topics = crate::agent::routing::all_host_inspection_topics(&effective_user_input);
2514            if topics.len() >= 2 {
2515                let _ = tx
2516                    .send(InferenceEvent::Thought(format!(
2517                        "Harness pre-run: {} host inspection topics detected — running all before model turn.",
2518                        topics.len()
2519                    )))
2520                    .await;
2521
2522                let topic_list = topics.join(", ");
2523                let mut combined = format!(
2524                    "## HARNESS PRE-RUN RESULTS\n\
2525                     The harness already ran inspect_host for the following topics: {topic_list}.\n\
2526                     Use the tool results in context to answer. Do NOT repeat these tool calls.\n\n"
2527                );
2528
2529                let mut tool_calls = Vec::new();
2530                let mut tool_msgs = Vec::new();
2531
2532                for topic in &topics {
2533                    let call_id = format!("prerun_{topic}");
2534                    let args_val = serde_json::json!({ "topic": *topic, "max_entries": 20 });
2535                    let args_str = serde_json::to_string(&args_val).unwrap_or_default();
2536
2537                    tool_calls.push(crate::agent::inference::ToolCallResponse {
2538                        id: call_id.clone(),
2539                        call_type: "function".to_string(),
2540                        function: crate::agent::inference::ToolCallFn {
2541                            name: "inspect_host".to_string(),
2542                            arguments: args_str,
2543                        },
2544                    });
2545
2546                    let label = format!("### inspect_host(topic=\"{topic}\")\n");
2547                    let _ = tx
2548                        .send(InferenceEvent::ToolCallStart {
2549                            id: call_id.clone(),
2550                            name: "inspect_host".to_string(),
2551                            args: format!("inspect host {topic}"),
2552                        })
2553                        .await;
2554
2555                    match crate::tools::host_inspect::inspect_host(&args_val).await {
2556                        Ok(out) => {
2557                            let _ = tx
2558                                .send(InferenceEvent::ToolCallResult {
2559                                    id: call_id.clone(),
2560                                    name: "inspect_host".to_string(),
2561                                    output: out.chars().take(300).collect::<String>() + "...",
2562                                    is_error: false,
2563                                })
2564                                .await;
2565                            combined.push_str(&label);
2566                            combined.push_str(&out);
2567                            combined.push_str("\n\n");
2568                            tool_msgs.push(ChatMessage::tool_result_for_model(
2569                                &call_id,
2570                                "inspect_host",
2571                                &out,
2572                                &self.engine.current_model(),
2573                            ));
2574                        }
2575                        Err(e) => {
2576                            let err_msg = format!("Error: {e}");
2577                            combined.push_str(&label);
2578                            combined.push_str(&err_msg);
2579                            combined.push_str("\n\n");
2580                            tool_msgs.push(ChatMessage::tool_result_for_model(
2581                                &call_id,
2582                                "inspect_host",
2583                                &err_msg,
2584                                &self.engine.current_model(),
2585                            ));
2586                        }
2587                    }
2588                }
2589
2590                // Add the simulated turn to history so the model sees it as context.
2591                self.history
2592                    .push(ChatMessage::assistant_tool_calls("", tool_calls));
2593                for msg in tool_msgs {
2594                    self.history.push(msg);
2595                }
2596
2597                loop_intervention = Some(combined);
2598            }
2599        }
2600
2601        // ── Computation Integrity: nudge model toward run_code for precise math ──
2602        // When the query involves exact numeric computation (hashes, financial math,
2603        // statistics, date arithmetic, unit conversions, algorithmic checks), inject
2604        // a brief pre-turn reminder so the model reaches for run_code instead of
2605        // answering from training-data memory. Only fires when no harness pre-run
2606        // already set a loop_intervention.
2607        if loop_intervention.is_none() && needs_computation_sandbox(&effective_user_input) {
2608            loop_intervention = Some(
2609                "COMPUTATION INTEGRITY NOTICE: This query involves precise numeric computation. \
2610                 Do NOT answer from training-data memory — memory answers for math are guesses. \
2611                 Use `run_code` to compute the real result and return the actual output. \
2612                 IMPORTANT: the `run_code` tool defaults to JavaScript (Deno). \
2613                 If you write Python code, you MUST pass `language: \"python\"` explicitly. \
2614                 If you write JavaScript/TypeScript, omit the language field or pass `language: \"javascript\"`. \
2615                 Write the code, run it, return the result."
2616                    .to_string(),
2617            );
2618        }
2619
2620        let mut implementation_started = false;
2621        let mut non_mutating_plan_steps = 0usize;
2622        let non_mutating_plan_soft_cap = 5usize;
2623        let non_mutating_plan_hard_cap = 8usize;
2624        let mut overview_runtime_trace: Option<String> = None;
2625
2626        // Safety cap – never spin forever on a broken model.
2627        let max_iters = 25;
2628        let mut consecutive_errors = 0;
2629        let mut first_iter = true;
2630        let _called_this_turn: std::collections::HashSet<String> = std::collections::HashSet::new();
2631        // Track identical tool results within this turn to detect logical loops.
2632        let _result_counts: std::collections::HashMap<String, usize> =
2633            std::collections::HashMap::new();
2634        // Track the count of identical (name, args) calls to detect infinite tool loops.
2635        let mut repeat_counts: std::collections::HashMap<String, usize> =
2636            std::collections::HashMap::new();
2637        let mut completed_tool_cache: std::collections::HashMap<String, CachedToolResult> =
2638            std::collections::HashMap::new();
2639        let mut successful_read_targets: std::collections::HashSet<String> =
2640            std::collections::HashSet::new();
2641        // (path, offset) pairs — catches repeated reads at the same non-zero offset.
2642        let mut successful_read_regions: std::collections::HashSet<(String, u64)> =
2643            std::collections::HashSet::new();
2644        let mut successful_grep_targets: std::collections::HashSet<String> =
2645            std::collections::HashSet::new();
2646        let mut no_match_grep_targets: std::collections::HashSet<String> =
2647            std::collections::HashSet::new();
2648        let mut broad_grep_targets: std::collections::HashSet<String> =
2649            std::collections::HashSet::new();
2650
2651        // Track the index of the message that started THIS turn, so compaction doesn't summarize it.
2652        let mut turn_anchor = self.history.len().saturating_sub(1);
2653
2654        for _iter in 0..max_iters {
2655            let mut mutation_occurred = false;
2656            // Priority Check: External Cancellation (via Esc key in TUI)
2657            if self.cancel_token.load(std::sync::atomic::Ordering::SeqCst) {
2658                self.cancel_token
2659                    .store(false, std::sync::atomic::Ordering::SeqCst);
2660                let _ = tx
2661                    .send(InferenceEvent::Thought("Turn cancelled by user.".into()))
2662                    .await;
2663                let _ = tx.send(InferenceEvent::Done).await;
2664                return Ok(());
2665            }
2666
2667            // ── Intelligence Surge: Proactive Compaction Check ──────────────────────
2668            if self
2669                .compact_history_if_needed(&tx, Some(turn_anchor))
2670                .await?
2671            {
2672                // After compaction, history is [system, summary, turn_anchor, ...]
2673                // The new turn_anchor is index 2.
2674                turn_anchor = 2;
2675            }
2676
2677            // On the first iteration inject Vein context into the system message.
2678            // Subsequent iterations use the plain slice — tool results are now in
2679            // history so Vein context would be redundant.
2680            let inject_vein = first_iter && !implement_current_plan;
2681            let messages = if implement_current_plan {
2682                first_iter = false;
2683                self.context_window_slice_from(turn_anchor)
2684            } else {
2685                first_iter = false;
2686                self.context_window_slice()
2687            };
2688
2689            // Use the canonical system prompt from history[0] which was built
2690            // by InferenceEngine::build_system_prompt() + build_system_with_corrections()
2691            // and includes GPU state, git context, permissions, and instruction files.
2692            let mut prompt_msgs = if let Some(intervention) = loop_intervention.take() {
2693                // Gemma 4 handles multiple system messages natively.
2694                // Standard models (Qwen, etc.) reject a second system message — merge into history[0].
2695                if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2696                    let mut msgs = vec![self.history[0].clone()];
2697                    msgs.push(ChatMessage::system(&intervention));
2698                    msgs
2699                } else {
2700                    let merged =
2701                        format!("{}\n\n{}", self.history[0].content.as_str(), intervention);
2702                    vec![ChatMessage::system(&merged)]
2703                }
2704            } else {
2705                vec![self.history[0].clone()]
2706            };
2707
2708            // Inject Vein context into the system message on the first iteration.
2709            // Vein results are merged in the same way as loop_intervention so standard
2710            // models (Qwen etc.) only ever see one system message.
2711            if inject_vein {
2712                if let Some(ref ctx) = vein_context.as_ref() {
2713                    if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2714                        prompt_msgs.push(ChatMessage::system(ctx));
2715                    } else {
2716                        let merged = format!("{}\n\n{}", prompt_msgs[0].content.as_str(), ctx);
2717                        prompt_msgs[0] = ChatMessage::system(&merged);
2718                    }
2719                }
2720            }
2721            prompt_msgs.extend(messages);
2722            if let Some(budget_note) =
2723                enforce_prompt_budget(&mut prompt_msgs, self.engine.current_context_length())
2724            {
2725                self.emit_operator_checkpoint(
2726                    &tx,
2727                    OperatorCheckpointState::BudgetReduced,
2728                    budget_note,
2729                )
2730                .await;
2731                let recipe = plan_recovery(
2732                    RecoveryScenario::PromptBudgetPressure,
2733                    &self.recovery_context,
2734                );
2735                self.emit_recovery_recipe_summary(
2736                    &tx,
2737                    recipe.recipe.scenario.label(),
2738                    compact_recovery_plan_summary(&recipe),
2739                )
2740                .await;
2741            }
2742            self.emit_prompt_pressure_for_messages(&tx, &prompt_msgs)
2743                .await;
2744
2745            let (mut text, mut tool_calls, usage, finish_reason) = match self
2746                .engine
2747                .call_with_tools(&prompt_msgs, &self.tools, routed_model.as_deref())
2748                .await
2749            {
2750                Ok(result) => result,
2751                Err(e) => {
2752                    let class = classify_runtime_failure(&e);
2753                    if should_retry_runtime_failure(class) {
2754                        if self.recovery_context.consume_transient_retry() {
2755                            let label = match class {
2756                                RuntimeFailureClass::ProviderDegraded => "provider_degraded",
2757                                _ => "empty_model_response",
2758                            };
2759                            self.transcript.log_system(&format!(
2760                                "Automatic provider recovery triggered: {}",
2761                                e.trim()
2762                            ));
2763                            self.emit_recovery_recipe_summary(
2764                                &tx,
2765                                label,
2766                                compact_runtime_recovery_summary(class),
2767                            )
2768                            .await;
2769                            let _ = tx
2770                                .send(InferenceEvent::ProviderStatus {
2771                                    state: ProviderRuntimeState::Recovering,
2772                                    summary: compact_runtime_recovery_summary(class).into(),
2773                                })
2774                                .await;
2775                            self.emit_operator_checkpoint(
2776                                &tx,
2777                                OperatorCheckpointState::RecoveringProvider,
2778                                compact_runtime_recovery_summary(class),
2779                            )
2780                            .await;
2781                            continue;
2782                        }
2783                    }
2784
2785                    self.emit_runtime_failure(&tx, class, &e).await;
2786                    break;
2787                }
2788            };
2789            self.emit_provider_live(&tx).await;
2790
2791            // Update TUI token counter with actual usage from LM Studio.
2792            if let Some(ref u) = usage {
2793                let _ = tx.send(InferenceEvent::UsageUpdate(u.clone())).await;
2794            }
2795
2796            // Fallback safety net: if native tool markup leaked past the inference-layer
2797            // extractor, recover it here instead of treating it as plain assistant text.
2798            if tool_calls
2799                .as_ref()
2800                .map(|calls| calls.is_empty())
2801                .unwrap_or(true)
2802            {
2803                if let Some(raw_text) = text.as_deref() {
2804                    let native_calls = crate::agent::inference::extract_native_tool_calls(raw_text);
2805                    if !native_calls.is_empty() {
2806                        tool_calls = Some(native_calls);
2807                        let stripped =
2808                            crate::agent::inference::strip_native_tool_call_text(raw_text);
2809                        text = if stripped.trim().is_empty() {
2810                            None
2811                        } else {
2812                            Some(stripped)
2813                        };
2814                    }
2815                }
2816            }
2817
2818            // Treat empty tool_calls arrays (Some(vec![])) the same as None –
2819            // the model returned text only; an empty array causes an infinite loop.
2820            let tool_calls = tool_calls.filter(|c| !c.is_empty());
2821            let near_context_ceiling = usage
2822                .as_ref()
2823                .map(|u| u.prompt_tokens >= (self.engine.current_context_length() * 82 / 100))
2824                .unwrap_or(false);
2825
2826            if let Some(calls) = tool_calls {
2827                let (calls, prune_trace_note) =
2828                    prune_architecture_trace_batch(calls, architecture_overview_mode);
2829                if let Some(note) = prune_trace_note {
2830                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2831                }
2832
2833                let (calls, prune_bloat_note) = prune_read_only_context_bloat_batch(
2834                    calls,
2835                    self.workflow_mode.is_read_only(),
2836                    architecture_overview_mode,
2837                );
2838                if let Some(note) = prune_bloat_note {
2839                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2840                }
2841
2842                let (calls, prune_note) = prune_authoritative_tool_batch(
2843                    calls,
2844                    grounded_trace_mode,
2845                    &effective_user_input,
2846                );
2847                if let Some(note) = prune_note {
2848                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2849                }
2850
2851                let (calls, prune_redir_note) = prune_redirected_shell_batch(calls);
2852                if let Some(note) = prune_redir_note {
2853                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2854                }
2855
2856                let (calls, batch_note) = order_batch_reads_first(calls);
2857                if let Some(note) = batch_note {
2858                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2859                }
2860
2861                if let Some(repeated_path) = calls
2862                    .iter()
2863                    .filter(|c| {
2864                        let parsed = serde_json::from_str::<Value>(
2865                            &crate::agent::inference::normalize_tool_argument_string(
2866                                &c.function.name,
2867                                &c.function.arguments,
2868                            ),
2869                        )
2870                        .ok();
2871                        let offset = parsed
2872                            .as_ref()
2873                            .and_then(|args| args.get("offset").and_then(|v| v.as_u64()))
2874                            .unwrap_or(0);
2875                        // Catch re-reads from the top (original behaviour) AND repeated
2876                        // reads at the exact same non-zero offset (new: catches targeted loops).
2877                        if offset < 200 {
2878                            return true;
2879                        }
2880                        if let Some(path) = parsed
2881                            .as_ref()
2882                            .and_then(|args| args.get("path").and_then(|v| v.as_str()))
2883                        {
2884                            let normalized = normalize_workspace_path(path);
2885                            return successful_read_regions.contains(&(normalized, offset));
2886                        }
2887                        false
2888                    })
2889                    .filter_map(|c| repeated_read_target(&c.function))
2890                    .find(|path| successful_read_targets.contains(path))
2891                {
2892                    loop_intervention = Some(format!(
2893                        "STOP. Already read `{}` this turn. Use `inspect_lines` on the relevant window or a specific `grep_files`, then continue.",
2894                        repeated_path
2895                    ));
2896                    let _ = tx
2897                        .send(InferenceEvent::Thought(
2898                            "Read discipline: preventing repeated full-file reads on the same path."
2899                                .into(),
2900                        ))
2901                        .await;
2902                    continue;
2903                }
2904
2905                if capability_mode
2906                    && !capability_needs_repo
2907                    && calls
2908                        .iter()
2909                        .all(|c| is_capability_probe_tool(&c.function.name))
2910                {
2911                    loop_intervention = Some(
2912                        "STOP. This is a stable capability question. Do not inspect the repository or call tools. \
2913                         Answer directly from verified Hematite capabilities, current runtime state, and the documented product boundary. \
2914                         Do not mention raw `mcp__*` names unless they are active and directly relevant."
2915                            .to_string(),
2916                    );
2917                    let _ = tx
2918                        .send(InferenceEvent::Thought(
2919                            "Capability mode: skipping unnecessary repo-inspection tools and answering directly."
2920                                .into(),
2921                        ))
2922                        .await;
2923                    continue;
2924                }
2925
2926                // VOCAL AGENT: If the model provided reasoning alongside tools,
2927                // stream it to the SPECULAR panel now using the hardened extraction.
2928                let raw_content = text.as_deref().unwrap_or(" ");
2929
2930                if let Some(thought) = crate::agent::inference::extract_think_block(raw_content) {
2931                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
2932                    // Reasoning is silent (hidden in SPECULAR only).
2933                    self.reasoning_history = Some(thought);
2934                }
2935
2936                // [Gemma-4 Protocol] Keep raw content (including thoughts) during tool loops.
2937                // Thoughts are only stripped before the 'final' user turn.
2938                let stored_tool_call_content = if implement_current_plan {
2939                    cap_output(raw_content, 1200)
2940                } else {
2941                    raw_content.to_string()
2942                };
2943                self.history.push(ChatMessage::assistant_tool_calls(
2944                    &stored_tool_call_content,
2945                    calls.clone(),
2946                ));
2947
2948                // ── LAYER 4: Parallel Tool Orchestration (Batching) ────────────────────
2949                let mut results = Vec::new();
2950                let gemma4_model =
2951                    crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2952                let latest_user_prompt = self.latest_user_prompt();
2953                let mut seen_call_keys = std::collections::HashSet::new();
2954                let mut deduped_calls = Vec::new();
2955                for call in calls.clone() {
2956                    let (normalized_name, normalized_args) = normalized_tool_call_for_execution(
2957                        &call.function.name,
2958                        &call.function.arguments,
2959                        gemma4_model,
2960                        latest_user_prompt,
2961                    );
2962                    let key = canonical_tool_call_key(&normalized_name, &normalized_args);
2963                    if seen_call_keys.insert(key) {
2964                        let repeat_guard_exempt = matches!(
2965                            normalized_name.as_str(),
2966                            "verify_build" | "git_commit" | "git_push"
2967                        );
2968                        if !repeat_guard_exempt {
2969                            if let Some(cached) = completed_tool_cache
2970                                .get(&canonical_tool_call_key(&normalized_name, &normalized_args))
2971                            {
2972                                let _ = tx
2973                                    .send(InferenceEvent::Thought(
2974                                        "Cached tool result reused: identical built-in invocation already completed earlier in this turn."
2975                                            .to_string(),
2976                                    ))
2977                                    .await;
2978                                loop_intervention = Some(format!(
2979                                    "STOP. You already called `{}` with identical arguments earlier in this turn and already have that result in conversation history. Do not call it again. Use the existing result to answer or choose a different next step.",
2980                                    cached.tool_name
2981                                ));
2982                                continue;
2983                            }
2984                        }
2985                        deduped_calls.push(call);
2986                    } else {
2987                        let _ = tx
2988                            .send(InferenceEvent::Thought(
2989                                "Duplicate tool call skipped: identical built-in invocation already ran this turn."
2990                                    .to_string(),
2991                            ))
2992                            .await;
2993                    }
2994                }
2995
2996                // Partition tool calls: Parallel Read vs Serial Mutating
2997                let (parallel_calls, serial_calls): (Vec<_>, Vec<_>) = deduped_calls
2998                    .into_iter()
2999                    .partition(|c| is_parallel_safe(&c.function.name));
3000
3001                // 1. Concurrent Execution (ParallelRead)
3002                if !parallel_calls.is_empty() {
3003                    let mut tasks = Vec::new();
3004                    for call in parallel_calls {
3005                        let tx_clone = tx.clone();
3006                        let config_clone = config.clone();
3007                        // Carry the real call ID into the outcome
3008                        let call_with_id = call.clone();
3009                        tasks.push(self.process_tool_call(
3010                            call_with_id.function,
3011                            config_clone,
3012                            yolo,
3013                            tx_clone,
3014                            call_with_id.id,
3015                        ));
3016                    }
3017                    // Wait for all read-only tasks to complete simultaneously.
3018                    results.extend(futures::future::join_all(tasks).await);
3019                }
3020
3021                // 2. Sequential Execution (SerialMutating)
3022                for call in serial_calls {
3023                    results.push(
3024                        self.process_tool_call(
3025                            call.function,
3026                            config.clone(),
3027                            yolo,
3028                            tx.clone(),
3029                            call.id,
3030                        )
3031                        .await,
3032                    );
3033                }
3034
3035                // 3. Collate Messages into History & UI
3036                let mut authoritative_tool_output: Option<String> = None;
3037                let mut blocked_policy_output: Option<String> = None;
3038                let mut recoverable_policy_intervention: Option<String> = None;
3039                let mut recoverable_policy_recipe: Option<RecoveryScenario> = None;
3040                let mut recoverable_policy_checkpoint: Option<(OperatorCheckpointState, String)> =
3041                    None;
3042                for res in results {
3043                    let call_id = res.call_id.clone();
3044                    let tool_name = res.tool_name.clone();
3045                    let final_output = res.output.clone();
3046                    let is_error = res.is_error;
3047                    for msg in res.msg_results {
3048                        self.history.push(msg);
3049                    }
3050
3051                    // Update State for Verification Loop
3052                    if matches!(
3053                        tool_name.as_str(),
3054                        "patch_hunk" | "write_file" | "edit_file" | "multi_search_replace"
3055                    ) {
3056                        mutation_occurred = true;
3057                        implementation_started = true;
3058                        // Heat tracking: bump L1 score for the edited file.
3059                        if !is_error {
3060                            let path = res.args.get("path").and_then(|v| v.as_str()).unwrap_or("");
3061                            if !path.is_empty() {
3062                                self.vein.bump_heat(path);
3063                                self.l1_context = self.vein.l1_context();
3064                            }
3065                            // Refresh repo map so PageRank accounts for the new edit.
3066                            self.refresh_repo_map();
3067                        }
3068                    }
3069
3070                    if tool_name == "verify_build" {
3071                        self.record_session_verification(
3072                            !is_error
3073                                && (final_output.contains("BUILD OK")
3074                                    || final_output.contains("BUILD SUCCESS")
3075                                    || final_output.contains("BUILD OKAY")),
3076                            if is_error {
3077                                "Explicit verify_build failed."
3078                            } else {
3079                                "Explicit verify_build passed."
3080                            },
3081                        );
3082                    }
3083
3084                    // Update Repeat Guard
3085                    let call_key = format!(
3086                        "{}:{}",
3087                        tool_name,
3088                        serde_json::to_string(&res.args).unwrap_or_default()
3089                    );
3090                    let repeat_count = repeat_counts.entry(call_key.clone()).or_insert(0);
3091                    *repeat_count += 1;
3092
3093                    // verify_build is legitimately called multiple times in fix-verify loops.
3094                    let repeat_guard_exempt = matches!(
3095                        tool_name.as_str(),
3096                        "verify_build" | "git_commit" | "git_push"
3097                    );
3098                    if *repeat_count >= 3 && !repeat_guard_exempt {
3099                        loop_intervention = Some(format!(
3100                            "STOP. You have called `{}` with identical arguments {} times and keep getting the same result. \
3101                             Do not call it again. Either answer directly from what you already know, \
3102                             use a different tool or approach, or ask the user for clarification.",
3103                            tool_name, *repeat_count
3104                        ));
3105                        let _ = tx
3106                            .send(InferenceEvent::Thought(format!(
3107                                "Repeat guard: `{}` called {} times with same args — injecting stop intervention.",
3108                                tool_name, *repeat_count
3109                            )))
3110                            .await;
3111                    }
3112
3113                    if is_error {
3114                        consecutive_errors += 1;
3115                    } else {
3116                        consecutive_errors = 0;
3117                    }
3118
3119                    if consecutive_errors >= 3 {
3120                        loop_intervention = Some(
3121                            "CRITICAL: Repeated tool failures detected. You are likely stuck in a loop. \
3122                             STOP all tool calls immediately. Analyze why your previous 3 calls failed \
3123                             (check for hallucinations or invalid arguments) and ask the user for \
3124                             clarification if you cannot proceed.".to_string()
3125                        );
3126                    }
3127
3128                    if consecutive_errors >= 4 {
3129                        self.emit_runtime_failure(
3130                            &tx,
3131                            RuntimeFailureClass::ToolLoop,
3132                            "Hard termination: too many consecutive tool errors.",
3133                        )
3134                        .await;
3135                        return Ok(());
3136                    }
3137
3138                    let _ = tx
3139                        .send(InferenceEvent::ToolCallResult {
3140                            id: call_id.clone(),
3141                            name: tool_name.clone(),
3142                            output: final_output.clone(),
3143                            is_error,
3144                        })
3145                        .await;
3146
3147                    let repeat_guard_exempt = matches!(
3148                        tool_name.as_str(),
3149                        "verify_build" | "git_commit" | "git_push"
3150                    );
3151                    if !repeat_guard_exempt {
3152                        completed_tool_cache.insert(
3153                            canonical_tool_call_key(&tool_name, &res.args),
3154                            CachedToolResult {
3155                                tool_name: tool_name.clone(),
3156                            },
3157                        );
3158                    }
3159
3160                    // Cap output before history
3161                    let compact_ctx = crate::agent::inference::is_compact_context_window_pub(
3162                        self.engine.current_context_length(),
3163                    );
3164                    let capped = if implement_current_plan {
3165                        cap_output(&final_output, 1200)
3166                    } else if compact_ctx
3167                        && (tool_name == "read_file" || tool_name == "inspect_lines")
3168                    {
3169                        // Compact context: cap file reads tightly and add a navigation hint on truncation.
3170                        let limit = 3000usize;
3171                        if final_output.len() > limit {
3172                            let total_lines = final_output.lines().count();
3173                            let mut split_at = limit;
3174                            while !final_output.is_char_boundary(split_at) && split_at > 0 {
3175                                split_at -= 1;
3176                            }
3177                            let scratch = write_output_to_scratch(&final_output, &tool_name)
3178                                .map(|p| format!(" Full file also saved to '{p}'."))
3179                                .unwrap_or_default();
3180                            format!(
3181                                "{}\n... [file truncated — {} total lines. Use `inspect_lines` with start_line near {} to reach the end of the file.{}]",
3182                                &final_output[..split_at],
3183                                total_lines,
3184                                total_lines.saturating_sub(150),
3185                                scratch,
3186                            )
3187                        } else {
3188                            final_output.clone()
3189                        }
3190                    } else {
3191                        cap_output_for_tool(&final_output, 8000, &tool_name)
3192                    };
3193                    self.history.push(ChatMessage::tool_result_for_model(
3194                        &call_id,
3195                        &tool_name,
3196                        &capped,
3197                        &self.engine.current_model(),
3198                    ));
3199
3200                    if architecture_overview_mode && !is_error && tool_name == "trace_runtime_flow"
3201                    {
3202                        overview_runtime_trace =
3203                            Some(summarize_runtime_trace_output(&final_output));
3204                    }
3205
3206                    if !architecture_overview_mode
3207                        && !is_error
3208                        && ((grounded_trace_mode && tool_name == "trace_runtime_flow")
3209                            || (toolchain_mode && tool_name == "describe_toolchain"))
3210                    {
3211                        authoritative_tool_output = Some(final_output.clone());
3212                    }
3213
3214                    if !is_error && tool_name == "read_file" {
3215                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
3216                            let normalized = normalize_workspace_path(path);
3217                            let read_offset =
3218                                res.args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
3219                            successful_read_targets.insert(normalized.clone());
3220                            successful_read_regions.insert((normalized.clone(), read_offset));
3221                        }
3222                    }
3223
3224                    if !is_error && tool_name == "grep_files" {
3225                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
3226                            let normalized = normalize_workspace_path(path);
3227                            if final_output.starts_with("No matches for ") {
3228                                no_match_grep_targets.insert(normalized);
3229                            } else if grep_output_is_high_fanout(&final_output) {
3230                                broad_grep_targets.insert(normalized);
3231                            } else {
3232                                successful_grep_targets.insert(normalized);
3233                            }
3234                        }
3235                    }
3236
3237                    if is_error
3238                        && matches!(tool_name.as_str(), "edit_file" | "multi_search_replace")
3239                        && (final_output.contains("search string not found")
3240                            || final_output.contains("search string is too short")
3241                            || final_output.contains("search string matched"))
3242                    {
3243                        if let Some(target) = action_target_path(&tool_name, &res.args) {
3244                            let guidance = if final_output.contains("matched") {
3245                                format!(
3246                                    "STOP. `{}` on `{}` — search string matched multiple times. Use `inspect_lines` on the exact region to get a unique anchor, then retry.",
3247                                    tool_name, target
3248                                )
3249                            } else {
3250                                format!(
3251                                    "STOP. `{}` on `{}` — search string did not match. Use `inspect_lines` on the target region to get the exact current text (check whitespace and indentation), then retry.",
3252                                    tool_name, target
3253                                )
3254                            };
3255                            loop_intervention = Some(guidance);
3256                            *repeat_count = 0;
3257                        }
3258                    }
3259
3260                    // When guard.rs blocks a shell call with the run_code redirect hint,
3261                    // force the model to recover with run_code instead of giving up.
3262                    if is_error
3263                        && tool_name == "shell"
3264                        && final_output.contains("Use the run_code tool instead")
3265                        && loop_intervention.is_none()
3266                    {
3267                        loop_intervention = Some(
3268                            "STOP. Shell was blocked because this is a computation task. \
3269                             You MUST use `run_code` now — write the code and run it. \
3270                             Do NOT output an error message or give up. \
3271                             Call `run_code` with the appropriate language and code to compute the answer. \
3272                             If writing Python, pass `language: \"python\"`. \
3273                             If writing JavaScript, omit language or pass `language: \"javascript\"`."
3274                                .to_string(),
3275                        );
3276                    }
3277
3278                    // When run_code fails with a Deno parse error, the model likely sent Python
3279                    // code without specifying language: "python". Force a corrective retry.
3280                    if is_error
3281                        && tool_name == "run_code"
3282                        && (final_output.contains("source code could not be parsed")
3283                            || final_output.contains("Expected ';'")
3284                            || final_output.contains("Expected '}'")
3285                            || final_output.contains("is not defined")
3286                                && final_output.contains("deno"))
3287                        && loop_intervention.is_none()
3288                    {
3289                        loop_intervention = Some(
3290                            "STOP. run_code failed with a JavaScript parse error — you likely wrote Python \
3291                             code but forgot to pass `language: \"python\"`. \
3292                             Retry run_code with `language: \"python\"` and the same code. \
3293                             Do NOT fall back to shell. Do NOT give up."
3294                                .to_string(),
3295                        );
3296                    }
3297
3298                    if res.blocked_by_policy
3299                        && is_mcp_workspace_read_tool(&tool_name)
3300                        && recoverable_policy_intervention.is_none()
3301                    {
3302                        recoverable_policy_intervention = Some(
3303                            "STOP. MCP filesystem reads are blocked. Use `read_file` or `inspect_lines` instead.".to_string(),
3304                        );
3305                        recoverable_policy_recipe = Some(RecoveryScenario::McpWorkspaceReadBlocked);
3306                        recoverable_policy_checkpoint = Some((
3307                            OperatorCheckpointState::BlockedPolicy,
3308                            "MCP workspace read blocked; rerouting to built-in file tools."
3309                                .to_string(),
3310                        ));
3311                    } else if res.blocked_by_policy
3312                        && implement_current_plan
3313                        && is_current_plan_irrelevant_tool(&tool_name)
3314                        && recoverable_policy_intervention.is_none()
3315                    {
3316                        recoverable_policy_intervention = Some(format!(
3317                            "STOP. `{}` is not a planned target. Use `inspect_lines` on a planned file, then edit.",
3318                            tool_name
3319                        ));
3320                        recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
3321                        recoverable_policy_checkpoint = Some((
3322                            OperatorCheckpointState::BlockedPolicy,
3323                            format!(
3324                                "Current-plan execution blocked unrelated tool `{}`.",
3325                                tool_name
3326                            ),
3327                        ));
3328                    } else if res.blocked_by_policy
3329                        && implement_current_plan
3330                        && final_output.contains("requires recent file evidence")
3331                        && recoverable_policy_intervention.is_none()
3332                    {
3333                        let target = action_target_path(&tool_name, &res.args)
3334                            .unwrap_or_else(|| "the target file".to_string());
3335                        recoverable_policy_intervention = Some(format!(
3336                            "STOP. Edit blocked — `{target}` has no recent read. Use `inspect_lines` or `read_file` on it first, then retry."
3337                        ));
3338                        recoverable_policy_recipe =
3339                            Some(RecoveryScenario::RecentFileEvidenceMissing);
3340                        recoverable_policy_checkpoint = Some((
3341                            OperatorCheckpointState::BlockedRecentFileEvidence,
3342                            format!("Edit blocked on `{target}`; recent file evidence missing."),
3343                        ));
3344                    } else if res.blocked_by_policy
3345                        && implement_current_plan
3346                        && final_output.contains("requires an exact local line window first")
3347                        && recoverable_policy_intervention.is_none()
3348                    {
3349                        let target = action_target_path(&tool_name, &res.args)
3350                            .unwrap_or_else(|| "the target file".to_string());
3351                        recoverable_policy_intervention = Some(format!(
3352                            "STOP. Edit blocked — `{target}` needs an inspected window. Use `inspect_lines` around the edit region, then retry."
3353                        ));
3354                        recoverable_policy_recipe = Some(RecoveryScenario::ExactLineWindowRequired);
3355                        recoverable_policy_checkpoint = Some((
3356                            OperatorCheckpointState::BlockedExactLineWindow,
3357                            format!("Edit blocked on `{target}`; exact line window required."),
3358                        ));
3359                    } else if res.blocked_by_policy
3360                        && (final_output.contains("Prefer `")
3361                            || final_output.contains("Prefer tool"))
3362                        && recoverable_policy_intervention.is_none()
3363                    {
3364                        recoverable_policy_intervention = Some(final_output.clone());
3365                        recoverable_policy_recipe = Some(RecoveryScenario::PolicyCorrection);
3366                        recoverable_policy_checkpoint = Some((
3367                            OperatorCheckpointState::BlockedPolicy,
3368                            "Action blocked by policy; self-correction triggered using tool recommendation."
3369                                .to_string(),
3370                        ));
3371                    } else if res.blocked_by_policy && blocked_policy_output.is_none() {
3372                        blocked_policy_output = Some(final_output.clone());
3373                    }
3374
3375                    if *repeat_count >= 5 {
3376                        let _ = tx.send(InferenceEvent::Done).await;
3377                        return Ok(());
3378                    }
3379
3380                    if implement_current_plan
3381                        && !implementation_started
3382                        && !is_error
3383                        && is_non_mutating_plan_step_tool(&tool_name)
3384                    {
3385                        non_mutating_plan_steps += 1;
3386                    }
3387                }
3388
3389                if let Some(intervention) = recoverable_policy_intervention {
3390                    if let Some((state, summary)) = recoverable_policy_checkpoint.take() {
3391                        self.emit_operator_checkpoint(&tx, state, summary).await;
3392                    }
3393                    if let Some(scenario) = recoverable_policy_recipe.take() {
3394                        let recipe = plan_recovery(scenario, &self.recovery_context);
3395                        self.emit_recovery_recipe_summary(
3396                            &tx,
3397                            recipe.recipe.scenario.label(),
3398                            compact_recovery_plan_summary(&recipe),
3399                        )
3400                        .await;
3401                    }
3402                    loop_intervention = Some(intervention);
3403                    let _ = tx
3404                        .send(InferenceEvent::Thought(
3405                            "Policy recovery: rerouting blocked MCP filesystem inspection to built-in workspace tools."
3406                                .into(),
3407                        ))
3408                        .await;
3409                    continue;
3410                }
3411
3412                if architecture_overview_mode {
3413                    match overview_runtime_trace.as_deref() {
3414                        Some(runtime_trace) => {
3415                            let response = build_architecture_overview_answer(runtime_trace);
3416                            self.history.push(ChatMessage::assistant_text(&response));
3417                            self.transcript.log_agent(&response);
3418
3419                            for chunk in chunk_text(&response, 8) {
3420                                if !chunk.is_empty() {
3421                                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
3422                                }
3423                            }
3424
3425                            let _ = tx.send(InferenceEvent::Done).await;
3426                            break;
3427                        }
3428                        None => {
3429                            loop_intervention = Some(
3430                                "Good. You now have the grounded repository structure. Next, call `trace_runtime_flow` for the runtime/control-flow half of the architecture overview. Prefer topic `user_turn` for the main execution path, or `runtime_subsystems` if that is more direct. Do not call `read_file`, `auto_pin_context`, or LSP tools here."
3431                                    .to_string(),
3432                            );
3433                            continue;
3434                        }
3435                    }
3436                }
3437
3438                if implement_current_plan
3439                    && !implementation_started
3440                    && non_mutating_plan_steps >= non_mutating_plan_hard_cap
3441                {
3442                    let msg = "Current-plan execution stalled: too many non-mutating inspection steps without a concrete edit. Stay on the saved target files, narrow with `inspect_lines`, and then mutate, or ask one specific blocking question instead of continuing broad exploration.".to_string();
3443                    self.history.push(ChatMessage::assistant_text(&msg));
3444                    self.transcript.log_agent(&msg);
3445
3446                    for chunk in chunk_text(&msg, 8) {
3447                        if !chunk.is_empty() {
3448                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3449                        }
3450                    }
3451
3452                    let _ = tx.send(InferenceEvent::Done).await;
3453                    break;
3454                }
3455
3456                if let Some(blocked_output) = blocked_policy_output {
3457                    self.emit_operator_checkpoint(
3458                        &tx,
3459                        OperatorCheckpointState::BlockedPolicy,
3460                        "A blocked tool path was surfaced directly to the operator.",
3461                    )
3462                    .await;
3463                    self.history
3464                        .push(ChatMessage::assistant_text(&blocked_output));
3465                    self.transcript.log_agent(&blocked_output);
3466
3467                    for chunk in chunk_text(&blocked_output, 8) {
3468                        if !chunk.is_empty() {
3469                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3470                        }
3471                    }
3472
3473                    let _ = tx.send(InferenceEvent::Done).await;
3474                    break;
3475                }
3476
3477                if let Some(tool_output) = authoritative_tool_output {
3478                    self.history.push(ChatMessage::assistant_text(&tool_output));
3479                    self.transcript.log_agent(&tool_output);
3480
3481                    for chunk in chunk_text(&tool_output, 8) {
3482                        if !chunk.is_empty() {
3483                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3484                        }
3485                    }
3486
3487                    let _ = tx.send(InferenceEvent::Done).await;
3488                    break;
3489                }
3490
3491                if implement_current_plan && !implementation_started {
3492                    let base = "STOP analyzing. The current plan already defines the task. Use the built-in file evidence you now have and begin implementing the plan in the target files. Do not output preliminary findings or restate contracts.";
3493                    if non_mutating_plan_steps >= non_mutating_plan_soft_cap {
3494                        loop_intervention = Some(format!(
3495                            "{} You are close to the non-mutation cap. Use `inspect_lines` on one saved target file, then make the edit now.",
3496                            base
3497                        ));
3498                    } else {
3499                        loop_intervention = Some(base.to_string());
3500                    }
3501                } else if self.workflow_mode == WorkflowMode::Architect {
3502                    loop_intervention = Some(
3503                        format!(
3504                            "STOP exploring. You have enough evidence for a plan-first answer.\n{}\nUse the tool results already in history. Do not narrate your process. Do not call more tools unless a missing file path makes the handoff impossible.",
3505                            architect_handoff_contract()
3506                        ),
3507                    );
3508                }
3509
3510                // 4. Auto-Verification Loop (The Perfect Bake)
3511                if mutation_occurred && !yolo {
3512                    let _ = tx
3513                        .send(InferenceEvent::Thought(
3514                            "Self-Verification: Running 'cargo check' to ensure build integrity..."
3515                                .into(),
3516                        ))
3517                        .await;
3518                    let verify_res = self.auto_verify_build().await;
3519                    let verify_ok = verify_res.contains("BUILD SUCCESS");
3520                    self.record_verify_build_result(verify_ok, &verify_res)
3521                        .await;
3522                    self.record_session_verification(
3523                        verify_ok,
3524                        if verify_ok {
3525                            "Automatic build verification passed."
3526                        } else {
3527                            "Automatic build verification failed."
3528                        },
3529                    );
3530                    self.history.push(ChatMessage::system(&format!(
3531                        "\n# SYSTEM VERIFICATION\n{verify_res}"
3532                    )));
3533                    let _ = tx
3534                        .send(InferenceEvent::Thought(
3535                            "Verification turn injected into history.".into(),
3536                        ))
3537                        .await;
3538                }
3539
3540                // Continue loop – the model will respond to the results.
3541                continue;
3542            } else if let Some(response_text) = text {
3543                if finish_reason.as_deref() == Some("length") && near_context_ceiling {
3544                    if intent.direct_answer == Some(DirectAnswerKind::SessionResetSemantics) {
3545                        let cleaned = build_session_reset_semantics_answer();
3546                        self.history.push(ChatMessage::assistant_text(&cleaned));
3547                        self.transcript.log_agent(&cleaned);
3548                        for chunk in chunk_text(&cleaned, 8) {
3549                            if !chunk.is_empty() {
3550                                let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3551                            }
3552                        }
3553                        let _ = tx.send(InferenceEvent::Done).await;
3554                        break;
3555                    }
3556
3557                    let warning = format_runtime_failure(
3558                        RuntimeFailureClass::ContextWindow,
3559                        "Context ceiling reached before the model completed the answer. Hematite trimmed what it could, but this turn still ran out of room. Retry with a narrower inspection step like `grep_files` or `inspect_lines`, or ask for a smaller scoped answer.",
3560                    );
3561                    self.history.push(ChatMessage::assistant_text(&warning));
3562                    self.transcript.log_agent(&warning);
3563                    let _ = tx
3564                        .send(InferenceEvent::Thought(
3565                            "Length recovery: model hit the context ceiling before completing the answer."
3566                                .into(),
3567                        ))
3568                        .await;
3569                    for chunk in chunk_text(&warning, 8) {
3570                        if !chunk.is_empty() {
3571                            let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3572                        }
3573                    }
3574                    let _ = tx.send(InferenceEvent::Done).await;
3575                    break;
3576                }
3577
3578                if response_text.contains("<|tool_call")
3579                    || response_text.contains("[END_TOOL_REQUEST]")
3580                    || response_text.contains("<|tool_response")
3581                    || response_text.contains("<tool_response|>")
3582                {
3583                    loop_intervention = Some(
3584                        "Your previous response leaked raw native tool transcript markup instead of a valid tool invocation or final answer. Retry immediately. If you need a tool, emit a valid tool call only. If you do not need a tool, answer in plain text with no `<|tool_call>`, `<|tool_response>`, or `[END_TOOL_REQUEST]` markup.".to_string(),
3585                    );
3586                    continue;
3587                }
3588
3589                // 1. Process and route the reasoning block to SPECULAR.
3590                if let Some(thought) = crate::agent::inference::extract_think_block(&response_text)
3591                {
3592                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
3593                    // Persist for history audit (stripped from next turn by Volatile Reasoning rule).
3594                    // This will be summarized in the next turn's system prompt.
3595                    self.reasoning_history = Some(thought);
3596                }
3597
3598                // 2. Process and stream the final answer to the chat interface.
3599                let cleaned = crate::agent::inference::strip_think_blocks(&response_text);
3600
3601                if implement_current_plan && !implementation_started {
3602                    loop_intervention = Some(
3603                        "Do not stop at analysis. Implement the current saved plan now using built-in workspace tools and the target files already named in the plan. Only answer without edits if you have a concrete blocking question.".to_string(),
3604                    );
3605                    continue;
3606                }
3607
3608                // [Hardened Interface] Strictly respect the stripper.
3609                // If it's empty after stripping think blocks, the model thought through its
3610                // answer but forgot to emit it (common with Qwen3 models in architect/ask mode).
3611                // Nudge it rather than silently dropping the turn.
3612                if cleaned.is_empty() {
3613                    loop_intervention = Some(
3614                        "Your response was empty after your reasoning. You must now emit your complete written response based on your analysis. Do not reason further — write your answer now."
3615                            .to_string(),
3616                    );
3617                    continue;
3618                }
3619
3620                let architect_handoff = self.persist_architect_handoff(&cleaned);
3621                self.history.push(ChatMessage::assistant_text(&cleaned));
3622                self.transcript.log_agent(&cleaned);
3623
3624                // Send in smooth chunks for that professional UI feel.
3625                for chunk in chunk_text(&cleaned, 8) {
3626                    if !chunk.is_empty() {
3627                        let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3628                    }
3629                }
3630
3631                if let Some(plan) = architect_handoff.as_ref() {
3632                    let note = architect_handoff_operator_note(plan);
3633                    self.history.push(ChatMessage::system(&note));
3634                    self.transcript.log_system(&note);
3635                    let _ = tx
3636                        .send(InferenceEvent::MutedToken(format!("\n{}", note)))
3637                        .await;
3638                }
3639
3640                let _ = tx.send(InferenceEvent::Done).await;
3641                break;
3642            } else {
3643                let detail = "Model returned an empty response.";
3644                let class = classify_runtime_failure(detail);
3645                if should_retry_runtime_failure(class) {
3646                    if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3647                        if let RecoveryDecision::Attempt(plan) =
3648                            attempt_recovery(scenario, &mut self.recovery_context)
3649                        {
3650                            self.transcript.log_system(
3651                                "Automatic provider recovery triggered: model returned an empty response.",
3652                            );
3653                            self.emit_recovery_recipe_summary(
3654                                &tx,
3655                                plan.recipe.scenario.label(),
3656                                compact_recovery_plan_summary(&plan),
3657                            )
3658                            .await;
3659                            let _ = tx
3660                                .send(InferenceEvent::ProviderStatus {
3661                                    state: ProviderRuntimeState::Recovering,
3662                                    summary: compact_runtime_recovery_summary(class).into(),
3663                                })
3664                                .await;
3665                            self.emit_operator_checkpoint(
3666                                &tx,
3667                                OperatorCheckpointState::RecoveringProvider,
3668                                compact_runtime_recovery_summary(class),
3669                            )
3670                            .await;
3671                            continue;
3672                        }
3673                    }
3674                }
3675
3676                self.emit_runtime_failure(&tx, class, detail).await;
3677                break;
3678            }
3679        }
3680
3681        self.trim_history(80);
3682        self.refresh_session_memory();
3683        // Record the goal and increment the turn counter before persisting.
3684        self.last_goal = Some(user_input.chars().take(300).collect());
3685        self.turn_count = self.turn_count.saturating_add(1);
3686        self.save_session();
3687        self.emit_compaction_pressure(&tx).await;
3688        Ok(())
3689    }
3690
3691    async fn emit_runtime_failure(
3692        &mut self,
3693        tx: &mpsc::Sender<InferenceEvent>,
3694        class: RuntimeFailureClass,
3695        detail: &str,
3696    ) {
3697        if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3698            let decision = preview_recovery_decision(scenario, &self.recovery_context);
3699            self.emit_recovery_recipe_summary(
3700                tx,
3701                scenario.label(),
3702                compact_recovery_decision_summary(&decision),
3703            )
3704            .await;
3705            let needs_refresh = match &decision {
3706                RecoveryDecision::Attempt(plan) => plan
3707                    .recipe
3708                    .steps
3709                    .contains(&RecoveryStep::RefreshRuntimeProfile),
3710                RecoveryDecision::Escalate { recipe, .. } => {
3711                    recipe.steps.contains(&RecoveryStep::RefreshRuntimeProfile)
3712                }
3713            };
3714            if needs_refresh {
3715                if let Some((model_id, context_length, changed)) = self
3716                    .refresh_runtime_profile_and_report(tx, "context_window_failure")
3717                    .await
3718                {
3719                    let note = if changed {
3720                        format!(
3721                            "Runtime refresh after context-window failure: model {} | CTX {}",
3722                            model_id, context_length
3723                        )
3724                    } else {
3725                        format!(
3726                            "Runtime refresh after context-window failure confirms model {} | CTX {}",
3727                            model_id, context_length
3728                        )
3729                    };
3730                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3731                }
3732            }
3733        }
3734        if let Some(state) = provider_state_for_runtime_failure(class) {
3735            let _ = tx
3736                .send(InferenceEvent::ProviderStatus {
3737                    state,
3738                    summary: compact_runtime_failure_summary(class).into(),
3739                })
3740                .await;
3741        }
3742        if let Some(state) = checkpoint_state_for_runtime_failure(class) {
3743            self.emit_operator_checkpoint(tx, state, checkpoint_summary_for_runtime_failure(class))
3744                .await;
3745        }
3746        let formatted = format_runtime_failure(class, detail);
3747        self.history.push(ChatMessage::system(&format!(
3748            "# RUNTIME FAILURE\n{}",
3749            formatted
3750        )));
3751        self.transcript.log_system(&formatted);
3752        let _ = tx.send(InferenceEvent::Error(formatted)).await;
3753        let _ = tx.send(InferenceEvent::Done).await;
3754    }
3755
3756    /// [Task Analyzer] Run 'cargo check' and return a concise summary for the model.
3757    async fn auto_verify_build(&self) -> String {
3758        match crate::tools::verify_build::execute(&serde_json::json!({ "action": "build" })).await {
3759            Ok(out) => {
3760                "BUILD SUCCESS: Your changes are architecturally sound.\n\n".to_string()
3761                    + &cap_output(&out, 2000)
3762            }
3763            Err(e) => format!(
3764                "BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:\n\n{}",
3765                cap_output(&e, 2000)
3766            ),
3767        }
3768    }
3769
3770    /// Triggers an LLM call to summarize old messages if history exceeds the VRAM character limit.
3771    /// Triggers the Deterministic Smart Compaction algorithm to shrink history while preserving context.
3772    /// Triggers the Recursive Context Compactor.
3773    async fn compact_history_if_needed(
3774        &mut self,
3775        tx: &mpsc::Sender<InferenceEvent>,
3776        anchor_index: Option<usize>,
3777    ) -> Result<bool, String> {
3778        let vram_ratio = self.gpu_state.ratio();
3779        let context_length = self.engine.current_context_length();
3780        let config = CompactionConfig::adaptive(context_length, vram_ratio);
3781
3782        if !compaction::should_compact(&self.history, context_length, vram_ratio) {
3783            return Ok(false);
3784        }
3785
3786        let _ = tx
3787            .send(InferenceEvent::Thought(format!(
3788                "Compaction: ctx={}k vram={:.0}% threshold={}k tokens — chaining summary...",
3789                context_length / 1000,
3790                vram_ratio * 100.0,
3791                config.max_estimated_tokens / 1000,
3792            )))
3793            .await;
3794
3795        let result = compaction::compact_history(
3796            &self.history,
3797            self.running_summary.as_deref(),
3798            config,
3799            anchor_index,
3800        );
3801
3802        let removed_message_count = self.history.len().saturating_sub(result.messages.len());
3803        self.history = result.messages;
3804        self.running_summary = result.summary;
3805
3806        // Layer 6: Memory Synthesis (Task Context Persistence)
3807        let previous_memory = self.session_memory.clone();
3808        self.session_memory = compaction::extract_memory(&self.history);
3809        self.session_memory
3810            .inherit_runtime_ledger_from(&previous_memory);
3811        self.session_memory.record_compaction(
3812            removed_message_count,
3813            format!(
3814                "Compacted history around active task '{}' and preserved {} working-set file(s).",
3815                self.session_memory.current_task,
3816                self.session_memory.working_set.len()
3817            ),
3818        );
3819        self.emit_compaction_pressure(tx).await;
3820
3821        // Jinja alignment: preserved slice may start with assistant/tool messages.
3822        // Strip any leading non-user messages so the first non-system message is always user.
3823        let first_non_sys = self
3824            .history
3825            .iter()
3826            .position(|m| m.role != "system")
3827            .unwrap_or(self.history.len());
3828        if first_non_sys < self.history.len() {
3829            if let Some(user_offset) = self.history[first_non_sys..]
3830                .iter()
3831                .position(|m| m.role == "user")
3832            {
3833                if user_offset > 0 {
3834                    self.history
3835                        .drain(first_non_sys..first_non_sys + user_offset);
3836                }
3837            }
3838        }
3839
3840        let _ = tx
3841            .send(InferenceEvent::Thought(format!(
3842                "Memory Synthesis: Extracted context for task: '{}'. Working set: {} files.",
3843                self.session_memory.current_task,
3844                self.session_memory.working_set.len()
3845            )))
3846            .await;
3847        let recipe = plan_recovery(RecoveryScenario::HistoryPressure, &self.recovery_context);
3848        self.emit_recovery_recipe_summary(
3849            tx,
3850            recipe.recipe.scenario.label(),
3851            compact_recovery_plan_summary(&recipe),
3852        )
3853        .await;
3854        self.emit_operator_checkpoint(
3855            tx,
3856            OperatorCheckpointState::HistoryCompacted,
3857            format!(
3858                "History compacted into a recursive summary; active task '{}' with {} working-set file(s) carried forward.",
3859                self.session_memory.current_task,
3860                self.session_memory.working_set.len()
3861            ),
3862        )
3863        .await;
3864
3865        Ok(true)
3866    }
3867
3868    /// Query The Vein for context relevant to the user's message.
3869    /// Runs hybrid BM25 + semantic search (semantic requires embedding model in LM Studio).
3870    /// Returns a formatted system message string, or None if nothing useful found.
3871    fn build_vein_context(&self, query: &str) -> Option<(String, Vec<String>)> {
3872        // Skip trivial / very short inputs.
3873        if query.trim().split_whitespace().count() < 3 {
3874            return None;
3875        }
3876
3877        let results = tokio::task::block_in_place(|| self.vein.search_context(query, 4)).ok()?;
3878        if results.is_empty() {
3879            return None;
3880        }
3881
3882        let semantic_active = self.vein.has_any_embeddings();
3883        let header = if semantic_active {
3884            "# Relevant context from The Vein (hybrid BM25 + semantic retrieval)\n\
3885             Use this to answer without needing extra read_file calls where possible.\n\n"
3886        } else {
3887            "# Relevant context from The Vein (BM25 keyword retrieval)\n\
3888             Use this to answer without needing extra read_file calls where possible.\n\n"
3889        };
3890
3891        let mut ctx = String::from(header);
3892        let mut paths: Vec<String> = Vec::new();
3893
3894        let mut total = 0usize;
3895        const MAX_CTX_CHARS: usize = 1_500;
3896
3897        for r in results {
3898            if total >= MAX_CTX_CHARS {
3899                break;
3900            }
3901            let snippet = if r.content.len() > 500 {
3902                format!("{}...", &r.content[..500])
3903            } else {
3904                r.content.clone()
3905            };
3906            ctx.push_str(&format!("--- {} ---\n{}\n\n", r.path, snippet));
3907            total += snippet.len() + r.path.len() + 10;
3908            if !paths.contains(&r.path) {
3909                paths.push(r.path);
3910            }
3911        }
3912
3913        Some((ctx, paths))
3914    }
3915
3916    /// Returns the conversation history (WITHOUT the system prompt) for the context window.
3917    /// This ensures we don't have redundant system blocks and prevents Jinja crashes.
3918    fn context_window_slice(&self) -> Vec<ChatMessage> {
3919        let mut result = Vec::new();
3920
3921        // Skip index 0 (the raw system message) and any stray system messages in history.
3922        if self.history.len() > 1 {
3923            for m in &self.history[1..] {
3924                if m.role == "system" {
3925                    continue;
3926                }
3927
3928                let mut sanitized = m.clone();
3929                // DEEP SANITIZE: LM Studio Jinja templates for Qwen crash on truly empty content.
3930                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3931                    sanitized.content = MessageContent::Text(" ".into());
3932                }
3933                result.push(sanitized);
3934            }
3935        }
3936
3937        // Jinja Guard: The first message after the system prompt MUST be 'user'.
3938        // If not (e.g. because of compaction), we insert a tiny anchor.
3939        if !result.is_empty() && result[0].role != "user" {
3940            result.insert(0, ChatMessage::user("Continuing previous context..."));
3941        }
3942
3943        result
3944    }
3945
3946    fn context_window_slice_from(&self, start_idx: usize) -> Vec<ChatMessage> {
3947        let mut result = Vec::new();
3948
3949        if self.history.len() > 1 {
3950            let start = start_idx.max(1).min(self.history.len());
3951            for m in &self.history[start..] {
3952                if m.role == "system" {
3953                    continue;
3954                }
3955
3956                let mut sanitized = m.clone();
3957                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3958                    sanitized.content = MessageContent::Text(" ".into());
3959                }
3960                result.push(sanitized);
3961            }
3962        }
3963
3964        if !result.is_empty() && result[0].role != "user" {
3965            result.insert(0, ChatMessage::user("Continuing current plan execution..."));
3966        }
3967
3968        result
3969    }
3970
3971    /// Drop old turns from the middle of history.
3972    fn trim_history(&mut self, max_messages: usize) {
3973        if self.history.len() <= max_messages {
3974            return;
3975        }
3976        // Always keep [0] (system prompt).
3977        let excess = self.history.len() - max_messages;
3978        self.history.drain(1..=excess);
3979    }
3980
3981    /// P1: Attempt to fix malformed JSON tool arguments by asking the model to re-output them.
3982    async fn repair_tool_args(
3983        &self,
3984        tool_name: &str,
3985        bad_json: &str,
3986        tx: &mpsc::Sender<InferenceEvent>,
3987    ) -> Result<Value, String> {
3988        let _ = tx
3989            .send(InferenceEvent::Thought(format!(
3990                "Attempting to repair malformed JSON for '{}'...",
3991                tool_name
3992            )))
3993            .await;
3994
3995        let prompt = format!(
3996            "The following JSON for tool '{}' is malformed and failed to parse:\n\n```json\n{}\n```\n\nOutput ONLY the corrected JSON string that fixes the syntax error (e.g. missing commas, unescaped quotes). Do NOT include markdown blocks or any other text.",
3997            tool_name, bad_json
3998        );
3999
4000        let messages = vec![
4001            ChatMessage::system("You are a JSON repair tool. Output ONLY pure JSON."),
4002            ChatMessage::user(&prompt),
4003        ];
4004
4005        // Use fast model for speed if available.
4006        let (text, _, _, _) = self
4007            .engine
4008            .call_with_tools(&messages, &[], self.fast_model.as_deref())
4009            .await
4010            .map_err(|e| e.to_string())?;
4011
4012        let cleaned = text
4013            .unwrap_or_default()
4014            .trim()
4015            .trim_start_matches("```json")
4016            .trim_start_matches("```")
4017            .trim_end_matches("```")
4018            .trim()
4019            .to_string();
4020
4021        serde_json::from_str(&cleaned).map_err(|e| format!("Repair failed: {}", e))
4022    }
4023
4024    /// P2: Run a fast validation step after file writes to check for subtle logic errors.
4025    async fn run_critic_check(
4026        &self,
4027        path: &str,
4028        content: &str,
4029        tx: &mpsc::Sender<InferenceEvent>,
4030    ) -> Option<String> {
4031        // Only run for source code files.
4032        let ext = std::path::Path::new(path)
4033            .extension()
4034            .and_then(|e| e.to_str())
4035            .unwrap_or("");
4036        const CRITIC_EXTS: &[&str] = &["rs", "js", "ts", "py", "go", "c", "cpp"];
4037        if !CRITIC_EXTS.contains(&ext) {
4038            return None;
4039        }
4040
4041        let _ = tx
4042            .send(InferenceEvent::Thought(format!(
4043                "CRITIC: Reviewing changes to '{}'...",
4044                path
4045            )))
4046            .await;
4047
4048        let truncated = cap_output(content, 4000);
4049
4050        let prompt = format!(
4051            "You are a Senior Security and Code Quality auditor. Review this file content for '{}' and identify any critical logic errors, security vulnerabilities, or missing error handling. Be extremely concise. If the code looks good, output 'PASS'.\n\n```{}\n{}\n```",
4052            path, ext, truncated
4053        );
4054
4055        let messages = vec![
4056            ChatMessage::system("You are a technical critic. Identify ONLY critical issues. Output 'PASS' if none found."),
4057            ChatMessage::user(&prompt)
4058        ];
4059
4060        let (text, _, _, _) = self
4061            .engine
4062            .call_with_tools(&messages, &[], self.fast_model.as_deref())
4063            .await
4064            .ok()?;
4065
4066        let critique = text?.trim().to_string();
4067        if critique.to_uppercase().contains("PASS") || critique.is_empty() {
4068            None
4069        } else {
4070            Some(critique)
4071        }
4072    }
4073}
4074
4075// ── Tool dispatcher ───────────────────────────────────────────────────────────
4076
4077pub async fn dispatch_tool(name: &str, args: &Value) -> Result<String, String> {
4078    dispatch_builtin_tool(name, args).await
4079}
4080
4081fn normalize_fix_plan_issue_text(text: &str) -> Option<String> {
4082    let trimmed = text.trim();
4083    let stripped = trimmed
4084        .strip_prefix("/think")
4085        .or_else(|| trimmed.strip_prefix("/no_think"))
4086        .map(str::trim)
4087        .unwrap_or(trimmed)
4088        .trim_start_matches('\n')
4089        .trim();
4090    (!stripped.is_empty()).then(|| stripped.to_string())
4091}
4092
4093fn fill_missing_fix_plan_issue(tool_name: &str, args: &mut Value, fallback_issue: Option<&str>) {
4094    if tool_name != "inspect_host" {
4095        return;
4096    }
4097
4098    let Some(topic) = args.get("topic").and_then(|v| v.as_str()) else {
4099        return;
4100    };
4101    if topic != "fix_plan" {
4102        return;
4103    }
4104
4105    let issue_missing = args
4106        .get("issue")
4107        .and_then(|v| v.as_str())
4108        .map(str::trim)
4109        .is_none_or(|value| value.is_empty());
4110    if !issue_missing {
4111        return;
4112    }
4113
4114    let Some(fallback_issue) = fallback_issue.and_then(normalize_fix_plan_issue_text) else {
4115        return;
4116    };
4117
4118    let Value::Object(map) = args else {
4119        return;
4120    };
4121    map.insert(
4122        "issue".to_string(),
4123        Value::String(fallback_issue.to_string()),
4124    );
4125}
4126
4127fn should_rewrite_shell_to_fix_plan(
4128    tool_name: &str,
4129    args: &Value,
4130    latest_user_prompt: Option<&str>,
4131) -> bool {
4132    if tool_name != "shell" {
4133        return false;
4134    }
4135    let Some(prompt) = latest_user_prompt else {
4136        return false;
4137    };
4138    if preferred_host_inspection_topic(prompt) != Some("fix_plan") {
4139        return false;
4140    }
4141    let command = args
4142        .get("command")
4143        .and_then(|value| value.as_str())
4144        .unwrap_or("");
4145    shell_looks_like_structured_host_inspection(command)
4146}
4147
4148fn extract_release_arg(command: &str, flag: &str) -> Option<String> {
4149    let pattern = format!(r#"(?i){}\s+['"]?([^'" \r\n]+)['"]?"#, regex::escape(flag));
4150    let regex = regex::Regex::new(&pattern).ok()?;
4151    let captures = regex.captures(command)?;
4152    captures.get(1).map(|m| m.as_str().to_string())
4153}
4154
4155fn infer_maintainer_workflow_args_from_prompt(prompt: &str) -> Option<Value> {
4156    let workflow = preferred_maintainer_workflow(prompt)?;
4157    let lower = prompt.to_ascii_lowercase();
4158    match workflow {
4159        "clean" => Some(serde_json::json!({
4160            "workflow": "clean",
4161            "deep": lower.contains("deep clean")
4162                || lower.contains("deep cleanup")
4163                || lower.contains("deep"),
4164            "reset": lower.contains("reset"),
4165            "prune_dist": lower.contains("prune dist")
4166                || lower.contains("prune old dist")
4167                || lower.contains("prune old artifacts")
4168                || lower.contains("old dist artifacts")
4169                || lower.contains("old artifacts"),
4170        })),
4171        "package_windows" => Some(serde_json::json!({
4172            "workflow": "package_windows",
4173            "installer": lower.contains("installer") || lower.contains("setup.exe"),
4174            "add_to_path": lower.contains("addtopath")
4175                || lower.contains("add to path")
4176                || lower.contains("update path")
4177                || lower.contains("refresh path"),
4178        })),
4179        "release" => {
4180            let version = regex::Regex::new(r#"(?i)\b(\d+\.\d+\.\d+)\b"#)
4181                .ok()
4182                .and_then(|re| re.captures(prompt))
4183                .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()));
4184            let bump = if lower.contains("patch") {
4185                Some("patch")
4186            } else if lower.contains("minor") {
4187                Some("minor")
4188            } else if lower.contains("major") {
4189                Some("major")
4190            } else {
4191                None
4192            };
4193            let mut args = serde_json::json!({
4194                "workflow": "release",
4195                "push": lower.contains(" push") || lower.starts_with("push ") || lower.contains(" and push"),
4196                "add_to_path": lower.contains("addtopath")
4197                    || lower.contains("add to path")
4198                    || lower.contains("update path"),
4199                "skip_installer": lower.contains("skip installer"),
4200                "publish_crates": lower.contains("publish crates") || lower.contains("crates.io"),
4201                "publish_voice_crate": lower.contains("publish voice crate")
4202                    || lower.contains("publish hematite-kokoros"),
4203            });
4204            if let Some(version) = version {
4205                args["version"] = Value::String(version);
4206            }
4207            if let Some(bump) = bump {
4208                args["bump"] = Value::String(bump.to_string());
4209            }
4210            Some(args)
4211        }
4212        _ => None,
4213    }
4214}
4215
4216fn infer_workspace_workflow_args_from_prompt(prompt: &str) -> Option<Value> {
4217    let workflow = preferred_workspace_workflow(prompt)?;
4218    let lower = prompt.to_ascii_lowercase();
4219    let trimmed = prompt.trim();
4220
4221    if let Some(command) = extract_workspace_command_from_prompt(trimmed) {
4222        return Some(serde_json::json!({
4223            "workflow": "command",
4224            "command": command,
4225        }));
4226    }
4227
4228    if let Some(path) = extract_workspace_script_path_from_prompt(trimmed) {
4229        return Some(serde_json::json!({
4230            "workflow": "script_path",
4231            "path": path,
4232        }));
4233    }
4234
4235    match workflow {
4236        "build" | "test" | "lint" | "fix" => Some(serde_json::json!({
4237            "workflow": workflow,
4238        })),
4239        "script" => {
4240            let package_script = if lower.contains("npm run ") {
4241                extract_word_after(&lower, "npm run ")
4242            } else if lower.contains("pnpm run ") {
4243                extract_word_after(&lower, "pnpm run ")
4244            } else if lower.contains("bun run ") {
4245                extract_word_after(&lower, "bun run ")
4246            } else if lower.contains("yarn ") {
4247                extract_word_after(&lower, "yarn ")
4248            } else {
4249                None
4250            };
4251
4252            if let Some(name) = package_script {
4253                return Some(serde_json::json!({
4254                    "workflow": "package_script",
4255                    "name": name,
4256                }));
4257            }
4258
4259            if let Some(name) = extract_word_after(&lower, "just ") {
4260                return Some(serde_json::json!({
4261                    "workflow": "just",
4262                    "name": name,
4263                }));
4264            }
4265            if let Some(name) = extract_word_after(&lower, "make ") {
4266                return Some(serde_json::json!({
4267                    "workflow": "make",
4268                    "name": name,
4269                }));
4270            }
4271            if let Some(name) = extract_word_after(&lower, "task ") {
4272                return Some(serde_json::json!({
4273                    "workflow": "task",
4274                    "name": name,
4275                }));
4276            }
4277
4278            None
4279        }
4280        _ => None,
4281    }
4282}
4283
4284fn extract_workspace_command_from_prompt(prompt: &str) -> Option<String> {
4285    let lower = prompt.to_ascii_lowercase();
4286    for prefix in [
4287        "cargo ",
4288        "npm ",
4289        "pnpm ",
4290        "yarn ",
4291        "bun ",
4292        "pytest",
4293        "go build",
4294        "go test",
4295        "make ",
4296        "just ",
4297        "task ",
4298        "./gradlew",
4299        ".\\gradlew",
4300    ] {
4301        if let Some(index) = lower.find(prefix) {
4302            return Some(prompt[index..].trim().trim_matches('`').to_string());
4303        }
4304    }
4305    None
4306}
4307
4308fn extract_workspace_script_path_from_prompt(prompt: &str) -> Option<String> {
4309    let normalized = prompt.replace('\\', "/");
4310    for token in normalized.split_whitespace() {
4311        let candidate = token
4312            .trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | '.' | ')' | '('))
4313            .trim_start_matches("./");
4314        if candidate.starts_with("scripts/")
4315            && [".ps1", ".sh", ".py", ".cmd", ".bat", ".js", ".mjs", ".cjs"]
4316                .iter()
4317                .any(|ext| candidate.to_ascii_lowercase().ends_with(ext))
4318        {
4319            return Some(candidate.to_string());
4320        }
4321    }
4322    None
4323}
4324
4325fn extract_word_after(haystack: &str, prefix: &str) -> Option<String> {
4326    let start = haystack.find(prefix)? + prefix.len();
4327    let tail = &haystack[start..];
4328    let word = tail
4329        .split_whitespace()
4330        .next()
4331        .map(str::trim)
4332        .filter(|value| !value.is_empty())?;
4333    Some(
4334        word.trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | '.' | ')' | '('))
4335            .to_string(),
4336    )
4337}
4338
4339fn rewrite_shell_to_maintainer_workflow_args(command: &str) -> Option<Value> {
4340    let lower = command.to_ascii_lowercase();
4341    if lower.contains("clean.ps1") {
4342        return Some(serde_json::json!({
4343            "workflow": "clean",
4344            "deep": lower.contains("-deep"),
4345            "reset": lower.contains("-reset"),
4346            "prune_dist": lower.contains("-prunedist"),
4347        }));
4348    }
4349    if lower.contains("package-windows.ps1") {
4350        return Some(serde_json::json!({
4351            "workflow": "package_windows",
4352            "installer": lower.contains("-installer"),
4353            "add_to_path": lower.contains("-addtopath"),
4354        }));
4355    }
4356    if lower.contains("release.ps1") {
4357        let version = extract_release_arg(command, "-Version");
4358        let bump = extract_release_arg(command, "-Bump");
4359        if version.is_none() && bump.is_none() {
4360            return Some(serde_json::json!({
4361                "workflow": "release"
4362            }));
4363        }
4364        let mut args = serde_json::json!({
4365            "workflow": "release",
4366            "push": lower.contains("-push"),
4367            "add_to_path": lower.contains("-addtopath"),
4368            "skip_installer": lower.contains("-skipinstaller"),
4369            "publish_crates": lower.contains("-publishcrates"),
4370            "publish_voice_crate": lower.contains("-publishvoicecrate"),
4371        });
4372        if let Some(version) = version {
4373            args["version"] = Value::String(version);
4374        }
4375        if let Some(bump) = bump {
4376            args["bump"] = Value::String(bump);
4377        }
4378        return Some(args);
4379    }
4380    None
4381}
4382
4383fn rewrite_shell_to_workspace_workflow_args(command: &str) -> Option<Value> {
4384    let lower = command.to_ascii_lowercase();
4385    if lower.contains("clean.ps1")
4386        || lower.contains("package-windows.ps1")
4387        || lower.contains("release.ps1")
4388    {
4389        return None;
4390    }
4391
4392    if let Some(path) = extract_workspace_script_path_from_prompt(command) {
4393        return Some(serde_json::json!({
4394            "workflow": "script_path",
4395            "path": path,
4396        }));
4397    }
4398
4399    let looks_like_workspace_command = [
4400        "cargo ",
4401        "npm ",
4402        "pnpm ",
4403        "yarn ",
4404        "bun ",
4405        "pytest",
4406        "go build",
4407        "go test",
4408        "make ",
4409        "just ",
4410        "task ",
4411        "./gradlew",
4412        ".\\gradlew",
4413    ]
4414    .iter()
4415    .any(|needle| lower.contains(needle));
4416
4417    if looks_like_workspace_command {
4418        Some(serde_json::json!({
4419            "workflow": "command",
4420            "command": command.trim(),
4421        }))
4422    } else {
4423        None
4424    }
4425}
4426
4427fn rewrite_host_tool_call(
4428    tool_name: &mut String,
4429    args: &mut Value,
4430    latest_user_prompt: Option<&str>,
4431) {
4432    if *tool_name == "shell" {
4433        let command = args
4434            .get("command")
4435            .and_then(|value| value.as_str())
4436            .unwrap_or("");
4437        if let Some(maintainer_workflow_args) = rewrite_shell_to_maintainer_workflow_args(command) {
4438            *tool_name = "run_hematite_maintainer_workflow".to_string();
4439            *args = maintainer_workflow_args;
4440            return;
4441        }
4442        if let Some(workspace_workflow_args) = rewrite_shell_to_workspace_workflow_args(command) {
4443            *tool_name = "run_workspace_workflow".to_string();
4444            *args = workspace_workflow_args;
4445            return;
4446        }
4447    }
4448    if *tool_name != "run_hematite_maintainer_workflow" {
4449        if let Some(prompt_args) =
4450            latest_user_prompt.and_then(infer_maintainer_workflow_args_from_prompt)
4451        {
4452            *tool_name = "run_hematite_maintainer_workflow".to_string();
4453            *args = prompt_args;
4454            return;
4455        }
4456    }
4457    if *tool_name != "run_workspace_workflow" {
4458        if let Some(prompt_args) =
4459            latest_user_prompt.and_then(infer_workspace_workflow_args_from_prompt)
4460        {
4461            *tool_name = "run_workspace_workflow".to_string();
4462            *args = prompt_args;
4463            return;
4464        }
4465    }
4466    if should_rewrite_shell_to_fix_plan(tool_name, args, latest_user_prompt) {
4467        *tool_name = "inspect_host".to_string();
4468        *args = serde_json::json!({
4469            "topic": "fix_plan"
4470        });
4471    }
4472    fill_missing_fix_plan_issue(tool_name, args, latest_user_prompt);
4473}
4474
4475fn canonical_tool_call_key(tool_name: &str, args: &Value) -> String {
4476    format!(
4477        "{}:{}",
4478        tool_name,
4479        serde_json::to_string(args).unwrap_or_default()
4480    )
4481}
4482
4483fn normalized_tool_call_for_execution(
4484    tool_name: &str,
4485    raw_arguments: &str,
4486    gemma4_model: bool,
4487    latest_user_prompt: Option<&str>,
4488) -> (String, Value) {
4489    let normalized_arguments = if gemma4_model {
4490        crate::agent::inference::normalize_tool_argument_string(tool_name, raw_arguments)
4491    } else {
4492        raw_arguments.to_string()
4493    };
4494    let mut normalized_name = tool_name.to_string();
4495    let mut args = serde_json::from_str::<Value>(&normalized_arguments)
4496        .unwrap_or(Value::Object(Default::default()));
4497    rewrite_host_tool_call(&mut normalized_name, &mut args, latest_user_prompt);
4498    (normalized_name, args)
4499}
4500
4501#[cfg(test)]
4502fn normalized_tool_call_key_for_dedupe(
4503    tool_name: &str,
4504    raw_arguments: &str,
4505    gemma4_model: bool,
4506    latest_user_prompt: Option<&str>,
4507) -> String {
4508    let (normalized_name, args) = normalized_tool_call_for_execution(
4509        tool_name,
4510        raw_arguments,
4511        gemma4_model,
4512        latest_user_prompt,
4513    );
4514    canonical_tool_call_key(&normalized_name, &args)
4515}
4516
4517impl ConversationManager {
4518    /// Checks if a tool call is authorized given the current configuration and mode.
4519    fn check_authorization(
4520        &self,
4521        name: &str,
4522        args: &serde_json::Value,
4523        config: &crate::agent::config::HematiteConfig,
4524        yolo_flag: bool,
4525    ) -> crate::agent::permission_enforcer::AuthorizationDecision {
4526        crate::agent::permission_enforcer::authorize_tool_call(name, args, config, yolo_flag)
4527    }
4528
4529    /// Layer 4: Isolated tool execution logic. Does not mutate 'self' to allow parallelism.
4530    async fn process_tool_call(
4531        &self,
4532        mut call: ToolCallFn,
4533        config: crate::agent::config::HematiteConfig,
4534        yolo: bool,
4535        tx: mpsc::Sender<InferenceEvent>,
4536        real_id: String,
4537    ) -> ToolExecutionOutcome {
4538        let mut msg_results = Vec::new();
4539        let gemma4_model =
4540            crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
4541        let normalized_arguments = if gemma4_model {
4542            crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments)
4543        } else {
4544            call.arguments.clone()
4545        };
4546
4547        // 1. Argument Parsing & Repair
4548        let mut args: Value = match serde_json::from_str(&normalized_arguments) {
4549            Ok(v) => v,
4550            Err(_) => {
4551                match self
4552                    .repair_tool_args(&call.name, &normalized_arguments, &tx)
4553                    .await
4554                {
4555                    Ok(v) => v,
4556                    Err(e) => {
4557                        let _ = tx
4558                            .send(InferenceEvent::Thought(format!(
4559                                "JSON Repair failed: {}",
4560                                e
4561                            )))
4562                            .await;
4563                        Value::Object(Default::default())
4564                    }
4565                }
4566            }
4567        };
4568        let last_user_prompt = self
4569            .history
4570            .iter()
4571            .rev()
4572            .find(|message| message.role == "user")
4573            .map(|message| message.content.as_str());
4574        rewrite_host_tool_call(&mut call.name, &mut args, last_user_prompt);
4575
4576        let display = format_tool_display(&call.name, &args);
4577        let precondition_result = self.validate_action_preconditions(&call.name, &args).await;
4578        let auth = self.check_authorization(&call.name, &args, &config, yolo);
4579
4580        // 2. Permission Check
4581        let decision_result = match precondition_result {
4582            Err(e) => Err(e),
4583            Ok(_) => match auth {
4584                crate::agent::permission_enforcer::AuthorizationDecision::Allow { .. } => Ok(()),
4585                crate::agent::permission_enforcer::AuthorizationDecision::Ask {
4586                    reason,
4587                    source: _,
4588                } => {
4589                    let (approve_tx, approve_rx) = tokio::sync::oneshot::channel::<bool>();
4590                    let _ = tx
4591                        .send(InferenceEvent::ApprovalRequired {
4592                            id: real_id.clone(),
4593                            name: call.name.clone(),
4594                            display: format!("{}\nWhy: {}", display, reason),
4595                            diff: None,
4596                            responder: approve_tx,
4597                        })
4598                        .await;
4599
4600                    match approve_rx.await {
4601                        Ok(true) => Ok(()),
4602                        _ => Err("Declined by user".into()),
4603                    }
4604                }
4605                crate::agent::permission_enforcer::AuthorizationDecision::Deny {
4606                    reason, ..
4607                } => Err(reason),
4608            },
4609        };
4610        let blocked_by_policy =
4611            matches!(&decision_result, Err(e) if e.starts_with("Action blocked:"));
4612
4613        // 3. Execution (Local or MCP)
4614        let (output, is_error) = match decision_result {
4615            Err(e) if e.starts_with("[auto-redirected shell→inspect_host") => (e, false),
4616            Err(e) => (format!("Error: {}", e), true),
4617            Ok(_) => {
4618                let _ = tx
4619                    .send(InferenceEvent::ToolCallStart {
4620                        id: real_id.clone(),
4621                        name: call.name.clone(),
4622                        args: display.clone(),
4623                    })
4624                    .await;
4625
4626                let result = if call.name.starts_with("lsp_") {
4627                    let lsp = self.lsp_manager.clone();
4628                    let path = args
4629                        .get("path")
4630                        .and_then(|v| v.as_str())
4631                        .unwrap_or("")
4632                        .to_string();
4633                    let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
4634                    let character =
4635                        args.get("character").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
4636
4637                    match call.name.as_str() {
4638                        "lsp_definitions" => {
4639                            crate::tools::lsp_tools::lsp_definitions(lsp, path, line, character)
4640                                .await
4641                        }
4642                        "lsp_references" => {
4643                            crate::tools::lsp_tools::lsp_references(lsp, path, line, character)
4644                                .await
4645                        }
4646                        "lsp_hover" => {
4647                            crate::tools::lsp_tools::lsp_hover(lsp, path, line, character).await
4648                        }
4649                        "lsp_search_symbol" => {
4650                            let query = args
4651                                .get("query")
4652                                .and_then(|v| v.as_str())
4653                                .unwrap_or_default()
4654                                .to_string();
4655                            crate::tools::lsp_tools::lsp_search_symbol(lsp, query).await
4656                        }
4657                        "lsp_rename_symbol" => {
4658                            let new_name = args
4659                                .get("new_name")
4660                                .and_then(|v| v.as_str())
4661                                .unwrap_or_default()
4662                                .to_string();
4663                            crate::tools::lsp_tools::lsp_rename_symbol(
4664                                lsp, path, line, character, new_name,
4665                            )
4666                            .await
4667                        }
4668                        "lsp_get_diagnostics" => {
4669                            crate::tools::lsp_tools::lsp_get_diagnostics(lsp, path).await
4670                        }
4671                        _ => Err(format!("Unknown LSP tool: {}", call.name)),
4672                    }
4673                } else if call.name == "auto_pin_context" {
4674                    let pts = args.get("paths").and_then(|v| v.as_array());
4675                    let reason = args
4676                        .get("reason")
4677                        .and_then(|v| v.as_str())
4678                        .unwrap_or("uninformed scoping");
4679                    if let Some(arr) = pts {
4680                        let mut pinned = Vec::new();
4681                        {
4682                            let mut guard = self.pinned_files.lock().await;
4683                            const MAX_PINNED_SIZE: u64 = 25 * 1024 * 1024; // 25MB Safety Valve
4684
4685                            for v in arr.iter().take(3) {
4686                                if let Some(p) = v.as_str() {
4687                                    if let Ok(meta) = std::fs::metadata(p) {
4688                                        if meta.len() > MAX_PINNED_SIZE {
4689                                            let _ = tx.send(InferenceEvent::Thought(format!("[GUARD] Skipping {} - size ({} bytes) exceeds VRAM safety limit (25MB).", p, meta.len()))).await;
4690                                            continue;
4691                                        }
4692                                        if let Ok(content) = std::fs::read_to_string(p) {
4693                                            guard.insert(p.to_string(), content);
4694                                            pinned.push(p.to_string());
4695                                        }
4696                                    }
4697                                }
4698                            }
4699                        }
4700                        let msg = format!(
4701                            "Autonomous Scoping: Locked {} in high-fidelity memory. Reason: {}",
4702                            pinned.join(", "),
4703                            reason
4704                        );
4705                        let _ = tx
4706                            .send(InferenceEvent::Thought(format!("[AUTO-PIN] {}", msg)))
4707                            .await;
4708                        Ok(msg)
4709                    } else {
4710                        Err("Missing 'paths' array for auto_pin_context.".to_string())
4711                    }
4712                } else if call.name == "list_pinned" {
4713                    let paths_msg = {
4714                        let pinned = self.pinned_files.lock().await;
4715                        if pinned.is_empty() {
4716                            "No files are currently pinned.".to_string()
4717                        } else {
4718                            let paths: Vec<_> = pinned.keys().cloned().collect();
4719                            format!(
4720                                "Currently pinned files in active memory:\n- {}",
4721                                paths.join("\n- ")
4722                            )
4723                        }
4724                    };
4725                    Ok(paths_msg)
4726                } else if call.name.starts_with("mcp__") {
4727                    let mut mcp = self.mcp_manager.lock().await;
4728                    match mcp.call_tool(&call.name, &args).await {
4729                        Ok(res) => Ok(res),
4730                        Err(e) => Err(e.to_string()),
4731                    }
4732                } else if call.name == "swarm" {
4733                    // ── Swarm Orchestration ──
4734                    let tasks_val = args.get("tasks").cloned().unwrap_or(Value::Array(vec![]));
4735                    let max_workers = args
4736                        .get("max_workers")
4737                        .and_then(|v| v.as_u64())
4738                        .unwrap_or(3) as usize;
4739
4740                    let mut task_objs = Vec::new();
4741                    if let Value::Array(arr) = tasks_val {
4742                        for v in arr {
4743                            let id = v
4744                                .get("id")
4745                                .and_then(|x| x.as_str())
4746                                .unwrap_or("?")
4747                                .to_string();
4748                            let target = v
4749                                .get("target")
4750                                .and_then(|x| x.as_str())
4751                                .unwrap_or("?")
4752                                .to_string();
4753                            let instruction = v
4754                                .get("instruction")
4755                                .and_then(|x| x.as_str())
4756                                .unwrap_or("?")
4757                                .to_string();
4758                            task_objs.push(crate::agent::parser::WorkerTask {
4759                                id,
4760                                target,
4761                                instruction,
4762                            });
4763                        }
4764                    }
4765
4766                    if task_objs.is_empty() {
4767                        Err("No tasks provided for swarm.".to_string())
4768                    } else {
4769                        let (swarm_tx_internal, mut swarm_rx_internal) =
4770                            tokio::sync::mpsc::channel(32);
4771                        let tx_forwarder = tx.clone();
4772
4773                        // Bridge SwarmMessage -> InferenceEvent
4774                        tokio::spawn(async move {
4775                            while let Some(msg) = swarm_rx_internal.recv().await {
4776                                match msg {
4777                                    crate::agent::swarm::SwarmMessage::Progress(id, p) => {
4778                                        let _ = tx_forwarder
4779                                            .send(InferenceEvent::Thought(format!(
4780                                                "Swarm [{}]: {}% complete",
4781                                                id, p
4782                                            )))
4783                                            .await;
4784                                    }
4785                                    crate::agent::swarm::SwarmMessage::ReviewRequest {
4786                                        worker_id,
4787                                        file_path,
4788                                        before: _,
4789                                        after: _,
4790                                        tx,
4791                                    } => {
4792                                        let (approve_tx, approve_rx) =
4793                                            tokio::sync::oneshot::channel::<bool>();
4794                                        let display = format!(
4795                                            "Swarm worker [{}]: Integrated changes into {:?}",
4796                                            worker_id, file_path
4797                                        );
4798                                        let _ = tx_forwarder
4799                                            .send(InferenceEvent::ApprovalRequired {
4800                                                id: format!("swarm_{}", worker_id),
4801                                                name: "swarm_apply".to_string(),
4802                                                display,
4803                                                diff: None,
4804                                                responder: approve_tx,
4805                                            })
4806                                            .await;
4807                                        if let Ok(approved) = approve_rx.await {
4808                                            let response = if approved {
4809                                                crate::agent::swarm::ReviewResponse::Accept
4810                                            } else {
4811                                                crate::agent::swarm::ReviewResponse::Reject
4812                                            };
4813                                            let _ = tx.send(response);
4814                                        }
4815                                    }
4816                                    crate::agent::swarm::SwarmMessage::Done => {}
4817                                }
4818                            }
4819                        });
4820
4821                        let coordinator = self.swarm_coordinator.clone();
4822                        match coordinator
4823                            .dispatch_swarm(task_objs, swarm_tx_internal, max_workers)
4824                            .await
4825                        {
4826                            Ok(_) => Ok(
4827                                "Swarm execution completed. Check files for integration results."
4828                                    .to_string(),
4829                            ),
4830                            Err(e) => Err(format!("Swarm failure: {}", e)),
4831                        }
4832                    }
4833                } else if call.name == "vision_analyze" {
4834                    crate::tools::vision::vision_analyze(&self.engine, &args).await
4835                } else if matches!(
4836                    call.name.as_str(),
4837                    "edit_file" | "patch_hunk" | "multi_search_replace"
4838                ) && !yolo
4839                {
4840                    // ── Diff preview gate ─────────────────────────────────────
4841                    // Compute what the edit would look like before applying it.
4842                    // If we can build a diff, require user Y/N in the TUI.
4843                    let diff_result = match call.name.as_str() {
4844                        "edit_file" => crate::tools::file_ops::compute_edit_file_diff(&args),
4845                        "patch_hunk" => crate::tools::file_ops::compute_patch_hunk_diff(&args),
4846                        _ => crate::tools::file_ops::compute_msr_diff(&args),
4847                    };
4848                    match diff_result {
4849                        Ok(diff_text) => {
4850                            let path_label =
4851                                args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
4852                            let (appr_tx, appr_rx) = tokio::sync::oneshot::channel::<bool>();
4853                            let _ = tx
4854                                .send(InferenceEvent::ApprovalRequired {
4855                                    id: real_id.clone(),
4856                                    name: call.name.clone(),
4857                                    display: format!("Edit preview: {}", path_label),
4858                                    diff: Some(diff_text),
4859                                    responder: appr_tx,
4860                                })
4861                                .await;
4862                            match appr_rx.await {
4863                                Ok(true) => dispatch_tool(&call.name, &args).await,
4864                                _ => Err("Edit declined by user.".into()),
4865                            }
4866                        }
4867                        // Diff computation failed (e.g. search string not found yet) —
4868                        // fall through and let the tool return its own error.
4869                        Err(_) => dispatch_tool(&call.name, &args).await,
4870                    }
4871                } else if call.name == "verify_build" {
4872                    // Stream build output line-by-line to the SPECULAR panel so
4873                    // the operator sees live compiler progress during long builds.
4874                    crate::tools::verify_build::execute_streaming(&args, tx.clone()).await
4875                } else if call.name == "shell" {
4876                    // Stream shell output line-by-line to the SPECULAR panel so
4877                    // the operator sees live progress during long commands.
4878                    crate::tools::shell::execute_streaming(&args, tx.clone()).await
4879                } else {
4880                    dispatch_tool(&call.name, &args).await
4881                };
4882
4883                match result {
4884                    Ok(o) => (o, false),
4885                    Err(e) => (format!("Error: {}", e), true),
4886                }
4887            }
4888        };
4889
4890        // ── Session Economics ────────────────────────────────────────────────
4891        {
4892            if let Ok(mut econ) = self.engine.economics.lock() {
4893                econ.record_tool(&call.name, !is_error);
4894            }
4895        }
4896
4897        if !is_error {
4898            if matches!(call.name.as_str(), "read_file" | "inspect_lines") {
4899                if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
4900                    if call.name == "inspect_lines" {
4901                        self.record_line_inspection(path).await;
4902                    } else {
4903                        self.record_read_observation(path).await;
4904                    }
4905                }
4906            }
4907
4908            if call.name == "verify_build" {
4909                let ok = output.contains("BUILD OK")
4910                    || output.contains("BUILD SUCCESS")
4911                    || output.contains("BUILD OKAY");
4912                self.record_verify_build_result(ok, &output).await;
4913            }
4914
4915            if matches!(
4916                call.name.as_str(),
4917                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4918            ) || is_mcp_mutating_tool(&call.name)
4919            {
4920                self.record_successful_mutation(action_target_path(&call.name, &args).as_deref())
4921                    .await;
4922            }
4923
4924            if let Some(receipt) = self.build_action_receipt(&call.name, &args, &output, is_error) {
4925                msg_results.push(receipt);
4926            }
4927        }
4928
4929        // 4. Critic Check (Specular Tier 2)
4930        // Gated: Only run on code files with substantive content to avoid burning tokens
4931        // on trivial doc/config edits.
4932        if !is_error && (call.name == "edit_file" || call.name == "write_file") {
4933            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
4934            let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
4935            let ext = std::path::Path::new(path)
4936                .extension()
4937                .and_then(|e| e.to_str())
4938                .unwrap_or("");
4939            const SKIP_EXTS: &[&str] = &[
4940                "md",
4941                "toml",
4942                "json",
4943                "txt",
4944                "yml",
4945                "yaml",
4946                "cfg",
4947                "csv",
4948                "lock",
4949                "gitignore",
4950            ];
4951            let line_count = content.lines().count();
4952            if !path.is_empty()
4953                && !content.is_empty()
4954                && !SKIP_EXTS.contains(&ext)
4955                && line_count >= 50
4956            {
4957                if let Some(critique) = self.run_critic_check(path, content, &tx).await {
4958                    msg_results.push(ChatMessage::system(&format!(
4959                        "[CRITIC REVIEW OF {}]\nIssues found:\n\n{}",
4960                        path, critique
4961                    )));
4962                }
4963            }
4964        }
4965
4966        ToolExecutionOutcome {
4967            call_id: real_id,
4968            tool_name: call.name,
4969            args,
4970            output,
4971            is_error,
4972            blocked_by_policy,
4973            msg_results,
4974        }
4975    }
4976}
4977
4978/// The result of an isolated tool execution.
4979/// Used to bridge Parallel/Serial execution back to the main history.
4980struct ToolExecutionOutcome {
4981    call_id: String,
4982    tool_name: String,
4983    args: Value,
4984    output: String,
4985    is_error: bool,
4986    blocked_by_policy: bool,
4987    msg_results: Vec<ChatMessage>,
4988}
4989
4990#[derive(Clone)]
4991struct CachedToolResult {
4992    tool_name: String,
4993}
4994
4995fn is_code_like_path(path: &str) -> bool {
4996    let ext = std::path::Path::new(path)
4997        .extension()
4998        .and_then(|e| e.to_str())
4999        .unwrap_or("")
5000        .to_ascii_lowercase();
5001    matches!(
5002        ext.as_str(),
5003        "rs" | "js"
5004            | "ts"
5005            | "tsx"
5006            | "jsx"
5007            | "py"
5008            | "go"
5009            | "java"
5010            | "c"
5011            | "cpp"
5012            | "cc"
5013            | "h"
5014            | "hpp"
5015            | "cs"
5016            | "swift"
5017            | "kt"
5018            | "kts"
5019            | "rb"
5020            | "php"
5021    )
5022}
5023
5024// ── Display helpers ───────────────────────────────────────────────────────────
5025
5026pub fn format_tool_display(name: &str, args: &Value) -> String {
5027    let get = |key: &str| {
5028        args.get(key)
5029            .and_then(|v| v.as_str())
5030            .unwrap_or("")
5031            .to_string()
5032    };
5033    match name {
5034        "shell" => format!("$ {}", get("command")),
5035
5036        "trace_runtime_flow" => format!("trace runtime {}", get("topic")),
5037        "describe_toolchain" => format!("describe toolchain {}", get("topic")),
5038        "inspect_host" => format!("inspect host {}", get("topic")),
5039        _ => format!("{} {:?}", name, args),
5040    }
5041}
5042
5043// ── Text utilities ────────────────────────────────────────────────────────────
5044
5045pub(crate) fn shell_looks_like_structured_host_inspection(command: &str) -> bool {
5046    let lower = command.to_ascii_lowercase();
5047    [
5048        "$env:path",
5049        "pathvariable",
5050        "pip --version",
5051        "pipx --version",
5052        "winget --version",
5053        "choco",
5054        "scoop",
5055        "get-childitem",
5056        "gci ",
5057        "where.exe",
5058        "where ",
5059        "cargo --version",
5060        "rustc --version",
5061        "git --version",
5062        "node --version",
5063        "npm --version",
5064        "pnpm --version",
5065        "python --version",
5066        "python3 --version",
5067        "deno --version",
5068        "go version",
5069        "dotnet --version",
5070        "uv --version",
5071        "netstat",
5072        "findstr",
5073        "get-nettcpconnection",
5074        "tcpconnection",
5075        "listening",
5076        "ss -",
5077        "ss ",
5078        "lsof",
5079        "tasklist",
5080        "ipconfig",
5081        "get-netipconfiguration",
5082        "get-netadapter",
5083        "route print",
5084        "ifconfig",
5085        "ip addr",
5086        "ip route",
5087        "resolv.conf",
5088        "get-service",
5089        "sc query",
5090        "systemctl",
5091        "service --status-all",
5092        "get-process",
5093        "working set",
5094        "ps -eo",
5095        "ps aux",
5096        "desktop",
5097        "downloads",
5098        "get-netfirewallprofile",
5099        "win32_powerplan",
5100        "win32_operatingsystem",
5101        "win32_processor",
5102        "wmic",
5103        "loadpercentage",
5104        "totalvisiblememory",
5105        "freephysicalmemory",
5106        "get-wmiobject",
5107        "get-ciminstance",
5108        "get-cpu",
5109        "processorname",
5110        "clockspeed",
5111        "top memory",
5112        "top cpu",
5113        "resource usage",
5114        "powercfg",
5115        "uptime",
5116        "lastbootuptime",
5117        // registry reads for OS/version/update/security info — always use inspect_host
5118        "hklm:",
5119        "hkcu:",
5120        "hklm:\\",
5121        "hkcu:\\",
5122        "currentversion",
5123        "productname",
5124        "displayversion",
5125        "get-itemproperty",
5126        "get-itempropertyvalue",
5127        // updates
5128        "get-windowsupdatelog",
5129        "windowsupdatelog",
5130        "microsoft.update.session",
5131        "createupdatesearcher",
5132        "wuauserv",
5133        "usoclient",
5134        "get-hotfix",
5135        "wu_",
5136        // security / defender
5137        "get-mpcomputerstatus",
5138        "get-mppreference",
5139        "get-mpthreat",
5140        "start-mpscan",
5141        "win32_computersecurity",
5142        "softwarelicensingproduct",
5143        "enablelua",
5144        "get-netfirewallrule",
5145        "netfirewallprofile",
5146        "antivirus",
5147        "defenderstatus",
5148        // disk health / smart
5149        "get-physicaldisk",
5150        "get-disk",
5151        "msstoragedriver_failurepredic",
5152        "win32_diskdrive",
5153        "smartstatus",
5154        "diskstatus",
5155        "get-counter",
5156        "intensity",
5157        "benchmark",
5158        "thrash",
5159        "get-item",
5160        "test-path",
5161        // gpo / certs / integrity / domain
5162        "gpresult",
5163        "applied gpo",
5164        "cert:\\",
5165        "cert:",
5166        "component based servicing",
5167        "componentstore",
5168        "get-computerinfo",
5169        "win32_computersystem",
5170        // battery
5171        "win32_battery",
5172        "batterystaticdata",
5173        "batteryfullchargedcapacity",
5174        "batterystatus",
5175        "estimatedchargeremaining",
5176        // crashes / event log (broader)
5177        "get-winevent",
5178        "eventid",
5179        "bugcheck",
5180        "kernelpower",
5181        "win32_ntlogevent",
5182        "filterhashtable",
5183        // scheduled tasks
5184        "get-scheduledtask",
5185        "get-scheduledtaskinfo",
5186        "schtasks",
5187        "taskscheduler",
5188        // general cim/wmi diagnostic queries — always use inspect_host
5189        "get-ciminstance win32",
5190        "get-wmiobject win32",
5191        // network admin — always use inspect_host
5192        "arp -",
5193        "arp -a",
5194        "tracert ",
5195        "traceroute ",
5196        "tracepath ",
5197        "get-dnsclientcache",
5198        "ipconfig /displaydns",
5199        "get-netroute",
5200        "route print",
5201        "ip neigh",
5202        "netsh winhttp show proxy",
5203        "get-itemproperty.*proxy",
5204        "get-netadapter",
5205        "netsh wlan show",
5206        "test-netconnection",
5207        "resolve-dnsname",
5208        "get-netfirewallrule",
5209        // docker / wsl / ssh — always use inspect_host
5210        "docker ps",
5211        "docker info",
5212        "docker images",
5213        "docker container",
5214        "docker compose ls",
5215        "wsl --list",
5216        "wsl -l",
5217        "wsl --status",
5218        "wsl --version",
5219        "ssh -v",
5220        "get-service sshd",
5221        "get-service -name sshd",
5222        "cat ~/.ssh",
5223        "ls ~/.ssh",
5224        "ls -la ~/.ssh",
5225        // env / hosts / git config
5226        "get-childitem env:",
5227        "dir env:",
5228        "printenv",
5229        "[environment]::getenvironmentvariable",
5230        "get-content.*hosts",
5231        "cat /etc/hosts",
5232        "type c:\\windows\\system32\\drivers\\etc\\hosts",
5233        "git config --global --list",
5234        "git config --list",
5235        "git config --global",
5236        // database services
5237        "get-service mysql",
5238        "get-service postgresql",
5239        "get-service mongodb",
5240        "get-service redis",
5241        "get-service mssql",
5242        "get-service mariadb",
5243        "systemctl status postgresql",
5244        "systemctl status mysql",
5245        "systemctl status mongod",
5246        "systemctl status redis",
5247        // installed software
5248        "winget list",
5249        "get-package",
5250        "get-itempropert.*uninstall",
5251        "dpkg --get-selections",
5252        "rpm -qa",
5253        "brew list",
5254        // user accounts
5255        "get-localuser",
5256        "get-localgroupmember",
5257        "net user",
5258        "query user",
5259        "net localgroup administrators",
5260        // audit policy
5261        "auditpol /get",
5262        "auditpol",
5263        // shares
5264        "get-smbshare",
5265        "get-smbserverconfiguration",
5266        "net share",
5267        "net use",
5268        // dns servers
5269        "get-dnsclientserveraddress",
5270        "get-dnsclientdohserveraddress",
5271        "get-dnsclientglobalsetting",
5272    ]
5273    .iter()
5274    .any(|needle| lower.contains(needle))
5275}
5276
5277// Moved strip_think_blocks to inference.rs
5278
5279fn cap_output(text: &str, max_bytes: usize) -> String {
5280    cap_output_for_tool(text, max_bytes, "output")
5281}
5282
5283/// Cap tool output at `max_bytes`. When the output exceeds the cap, write the
5284/// full content to `.hematite/scratch/<tool_name>_<timestamp>.txt` and include
5285/// the path in the truncation notice so the model can read the rest with
5286/// `read_file` instead of losing it entirely.
5287fn cap_output_for_tool(text: &str, max_bytes: usize, tool_name: &str) -> String {
5288    if text.len() <= max_bytes {
5289        return text.to_string();
5290    }
5291
5292    // Write full output to scratch so the model can access it.
5293    let scratch_path = write_output_to_scratch(text, tool_name);
5294
5295    let mut split_at = max_bytes;
5296    while !text.is_char_boundary(split_at) && split_at > 0 {
5297        split_at -= 1;
5298    }
5299
5300    let tail = match &scratch_path {
5301        Some(p) => format!(
5302            "\n... [output truncated — full output ({} bytes, {} lines) saved to '{}' — use read_file to access the rest]",
5303            text.len(),
5304            text.lines().count(),
5305            p
5306        ),
5307        None => format!("\n... [output capped at {}B]", max_bytes),
5308    };
5309
5310    format!("{}{}", &text[..split_at], tail)
5311}
5312
5313/// Write text to `.hematite/scratch/<tool>_<timestamp>.txt`.
5314/// Returns the relative path on success, None if the write fails.
5315fn write_output_to_scratch(text: &str, tool_name: &str) -> Option<String> {
5316    let root = crate::tools::file_ops::workspace_root();
5317    let scratch_dir = root.join(".hematite").join("scratch");
5318    if std::fs::create_dir_all(&scratch_dir).is_err() {
5319        return None;
5320    }
5321    let ts = std::time::SystemTime::now()
5322        .duration_since(std::time::UNIX_EPOCH)
5323        .map(|d| d.as_secs())
5324        .unwrap_or(0);
5325    // Sanitize tool name for use in filename
5326    let safe_name: String = tool_name
5327        .chars()
5328        .map(|c| {
5329            if c.is_alphanumeric() || c == '_' {
5330                c
5331            } else {
5332                '_'
5333            }
5334        })
5335        .collect();
5336    let filename = format!("{}_{}.txt", safe_name, ts);
5337    let abs_path = scratch_dir.join(&filename);
5338    if std::fs::write(&abs_path, text).is_err() {
5339        return None;
5340    }
5341    Some(format!(".hematite/scratch/{}", filename))
5342}
5343
5344#[derive(Default)]
5345struct PromptBudgetStats {
5346    summarized_tool_results: usize,
5347    collapsed_tool_results: usize,
5348    trimmed_chat_messages: usize,
5349    dropped_messages: usize,
5350}
5351
5352fn estimate_prompt_tokens(messages: &[ChatMessage]) -> usize {
5353    crate::agent::inference::estimate_message_batch_tokens(messages)
5354}
5355
5356fn summarize_prompt_blob(text: &str, max_chars: usize) -> String {
5357    let budget = compaction::SummaryCompressionBudget {
5358        max_chars,
5359        max_lines: 3,
5360        max_line_chars: max_chars.clamp(80, 240),
5361    };
5362    let compressed = compaction::compress_summary(text, budget).summary;
5363    if compressed.is_empty() {
5364        String::new()
5365    } else {
5366        compressed
5367    }
5368}
5369
5370fn summarize_tool_message_for_budget(message: &ChatMessage) -> String {
5371    let tool_name = message.name.as_deref().unwrap_or("tool");
5372    let body = summarize_prompt_blob(message.content.as_str(), 320);
5373    format!(
5374        "[Prompt-budget summary of prior `{}` result]\n{}",
5375        tool_name, body
5376    )
5377}
5378
5379fn summarize_chat_message_for_budget(message: &ChatMessage) -> String {
5380    let role = message.role.as_str();
5381    let body = summarize_prompt_blob(message.content.as_str(), 240);
5382    format!(
5383        "[Prompt-budget summary of earlier {} message]\n{}",
5384        role, body
5385    )
5386}
5387
5388fn normalize_prompt_start(messages: &mut Vec<ChatMessage>) {
5389    if messages.len() > 1 && messages[1].role != "user" {
5390        messages.insert(1, ChatMessage::user("Continuing previous context..."));
5391    }
5392}
5393
5394fn enforce_prompt_budget(
5395    prompt_msgs: &mut Vec<ChatMessage>,
5396    context_length: usize,
5397) -> Option<String> {
5398    let target_tokens = ((context_length as f64) * 0.68) as usize;
5399    if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5400        return None;
5401    }
5402
5403    let mut stats = PromptBudgetStats::default();
5404
5405    // 1. Summarize the newest large tool outputs first.
5406    let mut tool_indices: Vec<usize> = prompt_msgs
5407        .iter()
5408        .enumerate()
5409        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
5410        .collect();
5411    for idx in tool_indices.iter().rev().copied() {
5412        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5413            break;
5414        }
5415        let original = prompt_msgs[idx].content.as_str().to_string();
5416        if original.len() > 1200 {
5417            prompt_msgs[idx].content =
5418                MessageContent::Text(summarize_tool_message_for_budget(&prompt_msgs[idx]));
5419            stats.summarized_tool_results += 1;
5420        }
5421    }
5422
5423    // 2. Collapse older tool results aggressively, keeping only the most recent two verbatim/summarized.
5424    tool_indices = prompt_msgs
5425        .iter()
5426        .enumerate()
5427        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
5428        .collect();
5429    if tool_indices.len() > 2 {
5430        for idx in tool_indices
5431            .iter()
5432            .take(tool_indices.len().saturating_sub(2))
5433            .copied()
5434        {
5435            if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5436                break;
5437            }
5438            prompt_msgs[idx].content = MessageContent::Text(
5439                "[Earlier tool output omitted to stay within the prompt budget.]".to_string(),
5440            );
5441            stats.collapsed_tool_results += 1;
5442        }
5443    }
5444
5445    // 3. Trim older long chat messages, but preserve the final user request.
5446    let last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
5447    for idx in 1..prompt_msgs.len() {
5448        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5449            break;
5450        }
5451        if Some(idx) == last_user_idx {
5452            continue;
5453        }
5454        let role = prompt_msgs[idx].role.as_str();
5455        if matches!(role, "user" | "assistant") && prompt_msgs[idx].content.as_str().len() > 900 {
5456            prompt_msgs[idx].content =
5457                MessageContent::Text(summarize_chat_message_for_budget(&prompt_msgs[idx]));
5458            stats.trimmed_chat_messages += 1;
5459        }
5460    }
5461
5462    // 4. Drop the oldest non-system context until we fit, preserving the latest user request.
5463    let preserve_last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
5464    let mut idx = 1usize;
5465    while estimate_prompt_tokens(prompt_msgs) > target_tokens && prompt_msgs.len() > 2 {
5466        if Some(idx) == preserve_last_user_idx {
5467            idx += 1;
5468            if idx >= prompt_msgs.len() {
5469                break;
5470            }
5471            continue;
5472        }
5473        if idx >= prompt_msgs.len() {
5474            break;
5475        }
5476        prompt_msgs.remove(idx);
5477        stats.dropped_messages += 1;
5478    }
5479
5480    normalize_prompt_start(prompt_msgs);
5481
5482    let new_tokens = estimate_prompt_tokens(prompt_msgs);
5483    if stats.summarized_tool_results == 0
5484        && stats.collapsed_tool_results == 0
5485        && stats.trimmed_chat_messages == 0
5486        && stats.dropped_messages == 0
5487    {
5488        return None;
5489    }
5490
5491    Some(format!(
5492        "Prompt Budget Guard: trimmed prompt to about {} tokens (target {}). Summarized {} large tool result(s), collapsed {} older tool result(s), trimmed {} chat message(s), and dropped {} old message(s).",
5493        new_tokens,
5494        target_tokens,
5495        stats.summarized_tool_results,
5496        stats.collapsed_tool_results,
5497        stats.trimmed_chat_messages,
5498        stats.dropped_messages
5499    ))
5500}
5501
5502/// Split text into chunks of roughly `words_per_chunk` whitespace-separated tokens.
5503/// Returns true for short, direct tool-use requests that don't benefit from deep reasoning.
5504/// Used to skip the auto-/think prepend so the model calls the tool immediately
5505/// instead of spending thousands of tokens deliberating over a trivial task.
5506fn is_quick_tool_request(input: &str) -> bool {
5507    let lower = input.to_lowercase();
5508    // Explicit run_code requests — sandbox calls need no reasoning warmup.
5509    if lower.contains("run_code") || lower.contains("run code") {
5510        return true;
5511    }
5512    // Short compute/test requests — "calculate X", "test this", "execute Y"
5513    let is_short = input.len() < 120;
5514    let compute_keywords = [
5515        "calculate",
5516        "compute",
5517        "execute",
5518        "run this",
5519        "test this",
5520        "what is ",
5521        "how much",
5522        "how many",
5523        "convert ",
5524        "print ",
5525    ];
5526    if is_short && compute_keywords.iter().any(|k| lower.contains(k)) {
5527        return true;
5528    }
5529    false
5530}
5531
5532fn chunk_text(text: &str, words_per_chunk: usize) -> Vec<String> {
5533    let mut chunks = Vec::new();
5534    let mut current = String::new();
5535    let mut count = 0;
5536
5537    for ch in text.chars() {
5538        current.push(ch);
5539        if ch == ' ' || ch == '\n' {
5540            count += 1;
5541            if count >= words_per_chunk {
5542                chunks.push(current.clone());
5543                current.clear();
5544                count = 0;
5545            }
5546        }
5547    }
5548    if !current.is_empty() {
5549        chunks.push(current);
5550    }
5551    chunks
5552}
5553
5554fn repeated_read_target(call: &crate::agent::inference::ToolCallFn) -> Option<String> {
5555    if call.name != "read_file" {
5556        return None;
5557    }
5558    let normalized_arguments =
5559        crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments);
5560    let args: Value = serde_json::from_str(&normalized_arguments).ok()?;
5561    let path = args.get("path").and_then(|v| v.as_str())?;
5562    Some(normalize_workspace_path(path))
5563}
5564
5565fn order_batch_reads_first(
5566    calls: Vec<crate::agent::inference::ToolCallResponse>,
5567) -> (
5568    Vec<crate::agent::inference::ToolCallResponse>,
5569    Option<String>,
5570) {
5571    let has_reads = calls.iter().any(|c| {
5572        matches!(
5573            c.function.name.as_str(),
5574            "read_file" | "inspect_lines" | "grep_files" | "list_files"
5575        )
5576    });
5577    let has_edits = calls.iter().any(|c| {
5578        matches!(
5579            c.function.name.as_str(),
5580            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5581        )
5582    });
5583    if has_reads && has_edits {
5584        let reads: Vec<_> = calls
5585            .into_iter()
5586            .filter(|c| {
5587                !matches!(
5588                    c.function.name.as_str(),
5589                    "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5590                )
5591            })
5592            .collect();
5593        let note = Some("Batch ordering: deferring edits until reads complete.".to_string());
5594        (reads, note)
5595    } else {
5596        (calls, None)
5597    }
5598}
5599
5600fn grep_output_is_high_fanout(output: &str) -> bool {
5601    let Some(summary) = output.lines().next() else {
5602        return false;
5603    };
5604    let hunk_count = summary
5605        .split(", ")
5606        .find_map(|part| {
5607            part.strip_suffix(" hunk(s)")
5608                .and_then(|value| value.parse::<usize>().ok())
5609        })
5610        .unwrap_or(0);
5611    let match_count = summary
5612        .split(' ')
5613        .next()
5614        .and_then(|value| value.parse::<usize>().ok())
5615        .unwrap_or(0);
5616    hunk_count >= 8 || match_count >= 12
5617}
5618
5619fn build_system_with_corrections(
5620    base: &str,
5621    hints: &[String],
5622    gpu: &Arc<GpuState>,
5623    git: &Arc<crate::agent::git_monitor::GitState>,
5624    config: &crate::agent::config::HematiteConfig,
5625) -> String {
5626    let mut system_msg = base.to_string();
5627
5628    // Inject Permission Mode.
5629    system_msg.push_str("\n\n# Permission Mode\n");
5630    let mode_label = match config.mode {
5631        crate::agent::config::PermissionMode::ReadOnly => "READ-ONLY",
5632        crate::agent::config::PermissionMode::Developer => "DEVELOPER",
5633        crate::agent::config::PermissionMode::SystemAdmin => "SYSTEM-ADMIN (UNRESTRICTED)",
5634    };
5635    system_msg.push_str(&format!("CURRENT MODE: {}\n", mode_label));
5636
5637    if config.mode == crate::agent::config::PermissionMode::ReadOnly {
5638        system_msg.push_str("PERMISSION: You are restricted to READ-ONLY access. Do NOT attempt to use write_file, edit_file, or shell for any modification. Focus entirely on analysis, indexing, and reporting.\n");
5639    } else {
5640        system_msg.push_str("PERMISSION: You have authority to modify code and execute tests with user oversight.\n");
5641    }
5642
5643    // Inject live hardware status.
5644    let (used, total) = gpu.read();
5645    if total > 0 {
5646        system_msg.push_str("\n\n# Terminal Hardware Context\n");
5647        system_msg.push_str(&format!(
5648            "HOST GPU: {} | VRAM: {:.1}GB / {:.1}GB ({:.0}% used)\n",
5649            gpu.gpu_name(),
5650            used as f64 / 1024.0,
5651            total as f64 / 1024.0,
5652            gpu.ratio() * 100.0
5653        ));
5654        system_msg.push_str("Use this awareness to manage your context window responsibly.\n");
5655    }
5656
5657    // Inject Git Repository context.
5658    system_msg.push_str("\n\n# Git Repository Context\n");
5659    let git_status_label = git.label();
5660    let git_url = git.url();
5661    system_msg.push_str(&format!(
5662        "REMOTE STATUS: {} | URL: {}\n",
5663        git_status_label, git_url
5664    ));
5665
5666    // Live Snapshots (Status/Diff)
5667    let root = crate::tools::file_ops::workspace_root();
5668    if let Some(status_snapshot) = crate::agent::git_context::read_git_status(&root) {
5669        system_msg.push_str("\nGit status snapshot:\n");
5670        system_msg.push_str(&status_snapshot);
5671        system_msg.push_str("\n");
5672    }
5673
5674    if let Some(diff_snapshot) = crate::agent::git_context::read_git_diff(&root, 2000) {
5675        system_msg.push_str("\nGit diff snapshot:\n");
5676        system_msg.push_str(&diff_snapshot);
5677        system_msg.push_str("\n");
5678    }
5679
5680    if git_status_label == "NONE" {
5681        system_msg.push_str("\nONBOARDING: You noticed no remote is configured. Offer to help the user set up a remote (e.g. GitHub) if they haven't already.\n");
5682    } else if git_status_label == "BEHIND" {
5683        system_msg.push_str("\nSYNC: Local is behind remote. Suggest a pull if appropriate.\n");
5684    }
5685
5686    // NOTE: Instruction files (CLAUDE.md, HEMATITE.md, etc.) are already injected
5687    // by InferenceEngine::build_system_prompt() via load_instruction_files().
5688    // Injecting them again here would double the token cost (~4K wasted per turn).
5689
5690    if hints.is_empty() {
5691        return system_msg;
5692    }
5693    system_msg.push_str("\n\n# Formatting Corrections\n");
5694    system_msg.push_str("You previously failed formatting checks on these files. Ensure your whitespace/indentation perfectly matches the original file exactly on your next attempt:\n");
5695    for hint in hints {
5696        system_msg.push_str(&format!("- {}\n", hint));
5697    }
5698    system_msg
5699}
5700
5701fn route_model<'a>(
5702    user_input: &str,
5703    fast_model: Option<&'a str>,
5704    think_model: Option<&'a str>,
5705) -> Option<&'a str> {
5706    let text = user_input.to_lowercase();
5707    let is_think = text.contains("refactor")
5708        || text.contains("rewrite")
5709        || text.contains("implement")
5710        || text.contains("create")
5711        || text.contains("fix")
5712        || text.contains("debug");
5713    let is_fast = text.contains("what")
5714        || text.contains("show")
5715        || text.contains("find")
5716        || text.contains("list")
5717        || text.contains("status");
5718
5719    if is_think && think_model.is_some() {
5720        return think_model;
5721    } else if is_fast && fast_model.is_some() {
5722        return fast_model;
5723    }
5724    None
5725}
5726
5727fn is_parallel_safe(name: &str) -> bool {
5728    let metadata = crate::agent::inference::tool_metadata_for_name(name);
5729    !metadata.mutates_workspace && !metadata.external_surface
5730}
5731
5732fn should_use_vein_in_chat(query: &str, docs_only_mode: bool) -> bool {
5733    if docs_only_mode {
5734        return true;
5735    }
5736
5737    let lower = query.to_ascii_lowercase();
5738    [
5739        "what did we decide",
5740        "why did we decide",
5741        "what did we say",
5742        "what did we do",
5743        "earlier today",
5744        "yesterday",
5745        "last week",
5746        "last month",
5747        "earlier",
5748        "remember",
5749        "session",
5750        "import",
5751    ]
5752    .iter()
5753    .any(|needle| lower.contains(needle))
5754        || lower
5755            .split(|ch: char| !(ch.is_ascii_digit() || ch == '-'))
5756            .any(|token| token.len() == 10 && token.chars().nth(4) == Some('-'))
5757}
5758
5759#[cfg(test)]
5760mod tests {
5761    use super::*;
5762
5763    #[test]
5764    fn classifies_lm_studio_context_budget_mismatch_as_context_window() {
5765        let detail = r#"LM Studio error 400 Bad Request: {"error":"The number of tokens to keep from the initial prompt is greater than the context length (n_keep: 28768>= n_ctx: 4096). Try to load the model with a larger context length, or provide a shorter input."}"#;
5766        let class = classify_runtime_failure(detail);
5767        assert_eq!(class, RuntimeFailureClass::ContextWindow);
5768        assert_eq!(class.tag(), "context_window");
5769        assert!(format_runtime_failure(class, detail).contains("[failure:context_window]"));
5770    }
5771
5772    #[test]
5773    fn runtime_failure_maps_to_provider_and_checkpoint_state() {
5774        assert_eq!(
5775            provider_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
5776            Some(ProviderRuntimeState::ContextWindow)
5777        );
5778        assert_eq!(
5779            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
5780            Some(OperatorCheckpointState::BlockedContextWindow)
5781        );
5782        assert_eq!(
5783            provider_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
5784            Some(ProviderRuntimeState::Degraded)
5785        );
5786        assert_eq!(
5787            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
5788            None
5789        );
5790    }
5791
5792    #[test]
5793    fn intent_router_treats_tool_registry_ownership_as_product_truth() {
5794        let intent = classify_query_intent(
5795            WorkflowMode::ReadOnly,
5796            "Read-only mode. Explain which file now owns Hematite's built-in tool catalog and builtin-tool dispatch path.",
5797        );
5798        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5799        assert_eq!(
5800            intent.direct_answer,
5801            Some(DirectAnswerKind::ToolRegistryOwnership)
5802        );
5803    }
5804
5805    #[test]
5806    fn intent_router_treats_tool_classes_as_product_truth() {
5807        let intent = classify_query_intent(
5808            WorkflowMode::ReadOnly,
5809            "Read-only mode. Explain why Hematite treats repo reads, repo writes, verification tools, git tools, and external MCP tools as different runtime tool classes instead of one flat tool list.",
5810        );
5811        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5812        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::ToolClasses));
5813    }
5814
5815    #[test]
5816    fn tool_registry_ownership_answer_mentions_new_owner_file() {
5817        let answer = build_tool_registry_ownership_answer();
5818        assert!(answer.contains("src/agent/tool_registry.rs"));
5819        assert!(answer.contains("builtin dispatch path"));
5820        assert!(answer.contains("src/agent/conversation.rs"));
5821    }
5822
5823    #[test]
5824    fn intent_router_treats_mcp_lifecycle_as_product_truth() {
5825        let intent = classify_query_intent(
5826            WorkflowMode::ReadOnly,
5827            "Read-only mode. Explain how Hematite should treat MCP server health as runtime state.",
5828        );
5829        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5830        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::McpLifecycle));
5831    }
5832
5833    #[test]
5834    fn intent_router_short_circuits_unsafe_commit_pressure() {
5835        let intent = classify_query_intent(
5836            WorkflowMode::Auto,
5837            "Make a code change, skip verification, and commit it immediately.",
5838        );
5839        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5840        assert_eq!(
5841            intent.direct_answer,
5842            Some(DirectAnswerKind::UnsafeWorkflowPressure)
5843        );
5844    }
5845
5846    #[test]
5847    fn unsafe_workflow_pressure_answer_requires_verification() {
5848        let answer = build_unsafe_workflow_pressure_answer();
5849        assert!(answer.contains("should not skip verification"));
5850        assert!(answer.contains("run the appropriate verification path"));
5851        assert!(answer.contains("only then commit"));
5852    }
5853
5854    #[test]
5855    fn intent_router_prefers_architecture_walkthrough_over_narrow_mcp_answer() {
5856        let intent = classify_query_intent(
5857            WorkflowMode::ReadOnly,
5858            "I want to understand how Hematite is wired without any guessing. Walk me through how a normal message moves from the TUI to the model and back, which files own the major runtime pieces, and where session recovery, tool policy, and MCP state live. Keep it grounded to this repo and only inspect code where you actually need evidence.",
5859        );
5860        assert_eq!(intent.primary_class, QueryIntentClass::RepoArchitecture);
5861        assert!(intent.architecture_overview_mode);
5862        assert_eq!(intent.direct_answer, None);
5863    }
5864
5865    #[test]
5866    fn intent_router_marks_host_inspection_questions() {
5867        let intent = classify_query_intent(
5868            WorkflowMode::Auto,
5869            "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development.",
5870        );
5871        assert!(intent.host_inspection_mode);
5872        assert_eq!(
5873            preferred_host_inspection_topic(
5874                "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development."
5875            ),
5876            Some("summary")
5877        );
5878    }
5879
5880    #[test]
5881    fn chat_mode_uses_vein_for_historical_or_docs_only_queries() {
5882        assert!(should_use_vein_in_chat(
5883            "What did we decide on 2026-04-09 about docs-only mode?",
5884            false
5885        ));
5886        assert!(should_use_vein_in_chat("Summarize these local notes", true));
5887        assert!(!should_use_vein_in_chat("Tell me a joke", false));
5888    }
5889
5890    #[test]
5891    fn shell_host_inspection_guard_matches_path_and_version_commands() {
5892        assert!(shell_looks_like_structured_host_inspection(
5893            "$env:PATH -split ';'"
5894        ));
5895        assert!(shell_looks_like_structured_host_inspection(
5896            "cargo --version"
5897        ));
5898        assert!(shell_looks_like_structured_host_inspection(
5899            "Get-NetTCPConnection -LocalPort 3000"
5900        ));
5901        assert!(shell_looks_like_structured_host_inspection(
5902            "netstat -ano | findstr :3000"
5903        ));
5904        assert!(shell_looks_like_structured_host_inspection(
5905            "Get-Process | Sort-Object WS -Descending"
5906        ));
5907        assert!(shell_looks_like_structured_host_inspection("ipconfig /all"));
5908        assert!(shell_looks_like_structured_host_inspection("Get-Service"));
5909        assert!(shell_looks_like_structured_host_inspection(
5910            "winget --version"
5911        ));
5912    }
5913
5914    #[test]
5915    fn intent_router_picks_ports_for_listening_port_questions() {
5916        assert_eq!(
5917            preferred_host_inspection_topic(
5918                "Show me what is listening on port 3000 and whether anything unexpected is exposed."
5919            ),
5920            Some("ports")
5921        );
5922    }
5923
5924    #[test]
5925    fn intent_router_picks_processes_for_host_process_questions() {
5926        assert_eq!(
5927            preferred_host_inspection_topic(
5928                "Show me what processes are using the most RAM right now."
5929            ),
5930            Some("processes")
5931        );
5932    }
5933
5934    #[test]
5935    fn intent_router_picks_network_for_adapter_questions() {
5936        assert_eq!(
5937            preferred_host_inspection_topic(
5938                "Show me my active network adapters, IP addresses, gateways, and DNS servers."
5939            ),
5940            Some("network")
5941        );
5942    }
5943
5944    #[test]
5945    fn intent_router_picks_services_for_service_questions() {
5946        assert_eq!(
5947            preferred_host_inspection_topic(
5948                "Show me the running services and startup types that matter for a normal dev machine."
5949            ),
5950            Some("services")
5951        );
5952    }
5953
5954    #[test]
5955    fn intent_router_picks_env_doctor_for_package_manager_questions() {
5956        assert_eq!(
5957            preferred_host_inspection_topic(
5958                "Run an environment doctor on this machine and tell me whether my PATH and package managers look sane."
5959            ),
5960            Some("env_doctor")
5961        );
5962    }
5963
5964    #[test]
5965    fn intent_router_picks_fix_plan_for_host_remediation_questions() {
5966        assert_eq!(
5967            preferred_host_inspection_topic("How do I fix cargo not found on this machine?"),
5968            Some("fix_plan")
5969        );
5970        assert_eq!(
5971            preferred_host_inspection_topic(
5972                "How do I fix Hematite when LM Studio is not reachable on localhost:1234?"
5973            ),
5974            Some("fix_plan")
5975        );
5976    }
5977
5978    #[test]
5979    fn fill_missing_fix_plan_issue_backfills_last_user_prompt() {
5980        let mut args = serde_json::json!({
5981            "topic": "fix_plan"
5982        });
5983
5984        fill_missing_fix_plan_issue(
5985            "inspect_host",
5986            &mut args,
5987            Some("/think\nHow do I fix cargo not found on this machine?"),
5988        );
5989
5990        assert_eq!(
5991            args.get("issue").and_then(|value| value.as_str()),
5992            Some("How do I fix cargo not found on this machine?")
5993        );
5994    }
5995
5996    #[test]
5997    fn shell_fix_question_rewrites_to_fix_plan() {
5998        let args = serde_json::json!({
5999            "command": "where cargo"
6000        });
6001
6002        assert!(should_rewrite_shell_to_fix_plan(
6003            "shell",
6004            &args,
6005            Some("How do I fix cargo not found on this machine?")
6006        ));
6007    }
6008
6009    #[test]
6010    fn fix_plan_dedupe_key_matches_rewritten_shell_probe() {
6011        let latest_user_prompt = Some("How do I fix cargo not found on this machine?");
6012        let shell_key = normalized_tool_call_key_for_dedupe(
6013            "shell",
6014            r#"{"command":"where cargo"}"#,
6015            false,
6016            latest_user_prompt,
6017        );
6018        let fix_plan_key = normalized_tool_call_key_for_dedupe(
6019            "inspect_host",
6020            r#"{"topic":"fix_plan"}"#,
6021            false,
6022            latest_user_prompt,
6023        );
6024
6025        assert_eq!(shell_key, fix_plan_key);
6026    }
6027
6028    #[test]
6029    fn shell_cleanup_script_rewrites_to_maintainer_workflow() {
6030        let (tool_name, args) = normalized_tool_call_for_execution(
6031            "shell",
6032            r#"{"command":"pwsh ./clean.ps1 -Deep -PruneDist"}"#,
6033            false,
6034            Some("Run my cleanup scripts."),
6035        );
6036
6037        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6038        assert_eq!(
6039            args.get("workflow").and_then(|value| value.as_str()),
6040            Some("clean")
6041        );
6042        assert_eq!(
6043            args.get("deep").and_then(|value| value.as_bool()),
6044            Some(true)
6045        );
6046        assert_eq!(
6047            args.get("prune_dist").and_then(|value| value.as_bool()),
6048            Some(true)
6049        );
6050    }
6051
6052    #[test]
6053    fn shell_release_script_rewrites_to_maintainer_workflow() {
6054        let (tool_name, args) = normalized_tool_call_for_execution(
6055            "shell",
6056            r#"{"command":"pwsh ./release.ps1 -Version 0.4.5 -Push -AddToPath"}"#,
6057            false,
6058            Some("Run the release flow."),
6059        );
6060
6061        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6062        assert_eq!(
6063            args.get("workflow").and_then(|value| value.as_str()),
6064            Some("release")
6065        );
6066        assert_eq!(
6067            args.get("version").and_then(|value| value.as_str()),
6068            Some("0.4.5")
6069        );
6070        assert_eq!(
6071            args.get("push").and_then(|value| value.as_bool()),
6072            Some(true)
6073        );
6074    }
6075
6076    #[test]
6077    fn explicit_cleanup_prompt_rewrites_shell_to_maintainer_workflow() {
6078        let (tool_name, args) = normalized_tool_call_for_execution(
6079            "shell",
6080            r#"{"command":"powershell -Command \"Get-ChildItem .\""}"#,
6081            false,
6082            Some("Run the deep cleanup and prune old dist artifacts."),
6083        );
6084
6085        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6086        assert_eq!(
6087            args.get("workflow").and_then(|value| value.as_str()),
6088            Some("clean")
6089        );
6090        assert_eq!(
6091            args.get("deep").and_then(|value| value.as_bool()),
6092            Some(true)
6093        );
6094        assert_eq!(
6095            args.get("prune_dist").and_then(|value| value.as_bool()),
6096            Some(true)
6097        );
6098    }
6099
6100    #[test]
6101    fn shell_cargo_test_rewrites_to_workspace_workflow() {
6102        let (tool_name, args) = normalized_tool_call_for_execution(
6103            "shell",
6104            r#"{"command":"cargo test"}"#,
6105            false,
6106            Some("Run cargo test in this project."),
6107        );
6108
6109        assert_eq!(tool_name, "run_workspace_workflow");
6110        assert_eq!(
6111            args.get("workflow").and_then(|value| value.as_str()),
6112            Some("command")
6113        );
6114        assert_eq!(
6115            args.get("command").and_then(|value| value.as_str()),
6116            Some("cargo test")
6117        );
6118    }
6119
6120    #[test]
6121    fn current_plan_execution_request_accepts_saved_plan_command() {
6122        assert!(is_current_plan_execution_request("/implement-plan"));
6123        assert!(is_current_plan_execution_request(
6124            "Implement the current plan."
6125        ));
6126    }
6127
6128    #[test]
6129    fn architect_operator_note_points_to_execute_path() {
6130        let plan = crate::tools::plan::PlanHandoff {
6131            goal: "Tighten startup workflow guidance".into(),
6132            target_files: vec!["src/runtime.rs".into()],
6133            ordered_steps: vec!["Update the startup banner".into()],
6134            verification: "cargo check --tests".into(),
6135            risks: vec![],
6136            open_questions: vec![],
6137        };
6138        let note = architect_handoff_operator_note(&plan);
6139        assert!(note.contains("`.hematite/PLAN.md`"));
6140        assert!(note.contains("/implement-plan"));
6141        assert!(note.contains("/code implement the current plan"));
6142    }
6143
6144    #[test]
6145    fn natural_language_test_prompt_rewrites_to_workspace_workflow() {
6146        let (tool_name, args) = normalized_tool_call_for_execution(
6147            "shell",
6148            r#"{"command":"powershell -Command \"Get-ChildItem .\""}"#,
6149            false,
6150            Some("Run the tests in this project."),
6151        );
6152
6153        assert_eq!(tool_name, "run_workspace_workflow");
6154        assert_eq!(
6155            args.get("workflow").and_then(|value| value.as_str()),
6156            Some("test")
6157        );
6158    }
6159
6160    #[test]
6161    fn failing_path_parser_extracts_cargo_error_locations() {
6162        let output = r#"
6163BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:
6164
6165error[E0412]: cannot find type `Foo` in this scope
6166  --> src/agent/conversation.rs:42:12
6167   |
616842 |     field: Foo,
6169   |            ^^^ not found
6170
6171error[E0308]: mismatched types
6172  --> src/tools/file_ops.rs:100:5
6173   |
6174   = note: expected `String`, found `&str`
6175"#;
6176        let paths = parse_failing_paths_from_build_output(output);
6177        assert!(
6178            paths.iter().any(|p| p.contains("conversation.rs")),
6179            "should capture conversation.rs"
6180        );
6181        assert!(
6182            paths.iter().any(|p| p.contains("file_ops.rs")),
6183            "should capture file_ops.rs"
6184        );
6185        assert_eq!(paths.len(), 2, "no duplicates");
6186    }
6187
6188    #[test]
6189    fn failing_path_parser_ignores_macro_expansions() {
6190        let output = r#"
6191  --> <macro-expansion>:1:2
6192  --> src/real/file.rs:10:5
6193"#;
6194        let paths = parse_failing_paths_from_build_output(output);
6195        assert_eq!(paths.len(), 1);
6196        assert!(paths[0].contains("file.rs"));
6197    }
6198
6199    #[test]
6200    fn intent_router_picks_updates_for_update_questions() {
6201        assert_eq!(
6202            preferred_host_inspection_topic("is my PC up to date?"),
6203            Some("updates")
6204        );
6205        assert_eq!(
6206            preferred_host_inspection_topic("are there any pending Windows updates?"),
6207            Some("updates")
6208        );
6209        assert_eq!(
6210            preferred_host_inspection_topic("check for updates on my computer"),
6211            Some("updates")
6212        );
6213    }
6214
6215    #[test]
6216    fn intent_router_picks_security_for_antivirus_questions() {
6217        assert_eq!(
6218            preferred_host_inspection_topic("is my antivirus on?"),
6219            Some("security")
6220        );
6221        assert_eq!(
6222            preferred_host_inspection_topic("is Windows Defender running?"),
6223            Some("security")
6224        );
6225        assert_eq!(
6226            preferred_host_inspection_topic("is my PC protected?"),
6227            Some("security")
6228        );
6229    }
6230
6231    #[test]
6232    fn intent_router_picks_pending_reboot_for_restart_questions() {
6233        assert_eq!(
6234            preferred_host_inspection_topic("do I need to restart my PC?"),
6235            Some("pending_reboot")
6236        );
6237        assert_eq!(
6238            preferred_host_inspection_topic("is a reboot required?"),
6239            Some("pending_reboot")
6240        );
6241        assert_eq!(
6242            preferred_host_inspection_topic("is there a pending restart waiting?"),
6243            Some("pending_reboot")
6244        );
6245    }
6246
6247    #[test]
6248    fn intent_router_picks_disk_health_for_drive_health_questions() {
6249        assert_eq!(
6250            preferred_host_inspection_topic("is my hard drive dying?"),
6251            Some("disk_health")
6252        );
6253        assert_eq!(
6254            preferred_host_inspection_topic("check the disk health and SMART status"),
6255            Some("disk_health")
6256        );
6257        assert_eq!(
6258            preferred_host_inspection_topic("is my SSD healthy?"),
6259            Some("disk_health")
6260        );
6261    }
6262
6263    #[test]
6264    fn intent_router_picks_battery_for_battery_questions() {
6265        assert_eq!(
6266            preferred_host_inspection_topic("check my battery"),
6267            Some("battery")
6268        );
6269        assert_eq!(
6270            preferred_host_inspection_topic("how is my battery life?"),
6271            Some("battery")
6272        );
6273        assert_eq!(
6274            preferred_host_inspection_topic("what is my battery wear level?"),
6275            Some("battery")
6276        );
6277    }
6278
6279    #[test]
6280    fn intent_router_picks_recent_crashes_for_bsod_questions() {
6281        assert_eq!(
6282            preferred_host_inspection_topic("why did my PC restart by itself?"),
6283            Some("recent_crashes")
6284        );
6285        assert_eq!(
6286            preferred_host_inspection_topic("did my computer BSOD recently?"),
6287            Some("recent_crashes")
6288        );
6289        assert_eq!(
6290            preferred_host_inspection_topic("show me any recent app crashes"),
6291            Some("recent_crashes")
6292        );
6293    }
6294
6295    #[test]
6296    fn intent_router_picks_scheduled_tasks_for_task_questions() {
6297        assert_eq!(
6298            preferred_host_inspection_topic("what scheduled tasks are running on this PC?"),
6299            Some("scheduled_tasks")
6300        );
6301        assert_eq!(
6302            preferred_host_inspection_topic("show me the task scheduler"),
6303            Some("scheduled_tasks")
6304        );
6305    }
6306
6307    #[test]
6308    fn intent_router_picks_dev_conflicts_for_conflict_questions() {
6309        assert_eq!(
6310            preferred_host_inspection_topic("are there any dev environment conflicts?"),
6311            Some("dev_conflicts")
6312        );
6313        assert_eq!(
6314            preferred_host_inspection_topic("why is python pointing to the wrong version?"),
6315            Some("dev_conflicts")
6316        );
6317    }
6318
6319    #[test]
6320    fn shell_guard_catches_windows_update_commands() {
6321        assert!(shell_looks_like_structured_host_inspection(
6322            "Get-WindowsUpdateLog | Select-Object -Last 50"
6323        ));
6324        assert!(shell_looks_like_structured_host_inspection(
6325            "$sess = New-Object -ComObject Microsoft.Update.Session"
6326        ));
6327        assert!(shell_looks_like_structured_host_inspection(
6328            "Get-Service wuauserv"
6329        ));
6330        assert!(shell_looks_like_structured_host_inspection(
6331            "Get-MpComputerStatus"
6332        ));
6333        assert!(shell_looks_like_structured_host_inspection(
6334            "Get-PhysicalDisk"
6335        ));
6336        assert!(shell_looks_like_structured_host_inspection(
6337            "Get-CimInstance Win32_Battery"
6338        ));
6339        assert!(shell_looks_like_structured_host_inspection(
6340            "Get-WinEvent -FilterHashtable @{Id=41}"
6341        ));
6342        assert!(shell_looks_like_structured_host_inspection(
6343            "Get-ScheduledTask | Where-Object State -ne Disabled"
6344        ));
6345    }
6346}