Skip to main content

hematite/agent/
conversation.rs

1use crate::agent::architecture_summary::{
2    build_architecture_overview_answer, prune_architecture_trace_batch,
3    prune_authoritative_tool_batch, prune_read_only_context_bloat_batch,
4    prune_redirected_shell_batch, summarize_runtime_trace_output,
5};
6use crate::agent::direct_answers::{
7    build_about_answer, build_architect_session_reset_plan, build_authorization_policy_answer,
8    build_gemma_native_answer, build_gemma_native_settings_answer, build_identity_answer,
9    build_language_capability_answer, build_mcp_lifecycle_answer, build_product_surface_answer,
10    build_reasoning_split_answer, build_recovery_recipes_answer, build_session_memory_answer,
11    build_session_reset_semantics_answer, build_tool_classes_answer,
12    build_tool_registry_ownership_answer, build_unsafe_workflow_pressure_answer,
13    build_verify_profiles_answer, build_workflow_modes_answer,
14};
15use crate::agent::inference::{
16    ChatMessage, InferenceEngine, InferenceEvent, MessageContent, OperatorCheckpointState,
17    ProviderRuntimeState, ToolCallFn, ToolDefinition, ToolFunction,
18};
19use crate::agent::policy::{
20    action_target_path, docs_edit_without_explicit_request, is_destructive_tool,
21    is_mcp_mutating_tool, is_mcp_workspace_read_tool, normalize_workspace_path,
22};
23use crate::agent::recovery_recipes::{
24    attempt_recovery, plan_recovery, preview_recovery_decision, RecoveryContext, RecoveryDecision,
25    RecoveryPlan, RecoveryScenario, RecoveryStep,
26};
27use crate::agent::routing::{
28    all_host_inspection_topics, classify_query_intent, is_capability_probe_tool,
29    looks_like_mutation_request, needs_computation_sandbox, preferred_host_inspection_topic,
30    preferred_maintainer_workflow, preferred_workspace_workflow, DirectAnswerKind,
31    QueryIntentClass,
32};
33use crate::agent::tool_registry::dispatch_builtin_tool;
34// SystemPromptBuilder is no longer used — InferenceEngine::build_system_prompt() is canonical.
35use crate::agent::compaction::{self, CompactionConfig};
36use crate::ui::gpu_monitor::GpuState;
37
38use serde_json::Value;
39use std::sync::Arc;
40use tokio::sync::{mpsc, Mutex};
41// -- Session persistence -------------------------------------------------------
42
43#[derive(Clone, Debug, Default)]
44pub struct UserTurn {
45    pub text: String,
46    pub attached_document: Option<AttachedDocument>,
47    pub attached_image: Option<AttachedImage>,
48}
49
50#[derive(Clone, Debug)]
51pub struct AttachedDocument {
52    pub name: String,
53    pub content: String,
54}
55
56#[derive(Clone, Debug)]
57pub struct AttachedImage {
58    pub name: String,
59    pub path: String,
60}
61
62impl UserTurn {
63    pub fn text(text: impl Into<String>) -> Self {
64        Self {
65            text: text.into(),
66            attached_document: None,
67            attached_image: None,
68        }
69    }
70}
71
72#[derive(serde::Serialize, serde::Deserialize)]
73struct SavedSession {
74    running_summary: Option<String>,
75    #[serde(default)]
76    session_memory: crate::agent::compaction::SessionMemory,
77    /// Last user message from the previous session — shown as resume hint on startup.
78    #[serde(default)]
79    last_goal: Option<String>,
80    /// Number of real inference turns completed in the previous session.
81    #[serde(default)]
82    turn_count: u32,
83}
84
85/// Snapshot of the previous session, surfaced on startup when a workspace is
86/// resumed after a restart or crash.
87pub struct CheckpointResume {
88    pub last_goal: String,
89    pub turn_count: u32,
90    pub working_files: Vec<String>,
91    pub last_verify_ok: Option<bool>,
92}
93
94/// Load the prior-session checkpoint from `.hematite/session.json`.
95/// Returns `None` when there is no prior session or it has no real turns.
96pub fn load_checkpoint() -> Option<CheckpointResume> {
97    let path = session_path();
98    let data = std::fs::read_to_string(&path).ok()?;
99    let saved: SavedSession = serde_json::from_str(&data).ok()?;
100    let goal = saved.last_goal.filter(|g| !g.trim().is_empty())?;
101    if saved.turn_count == 0 {
102        return None;
103    }
104    let mut working_files: Vec<String> = saved
105        .session_memory
106        .working_set
107        .into_iter()
108        .take(4)
109        .collect();
110    working_files.sort();
111    let last_verify_ok = saved.session_memory.last_verification.map(|v| v.successful);
112    Some(CheckpointResume {
113        last_goal: goal,
114        turn_count: saved.turn_count,
115        working_files,
116        last_verify_ok,
117    })
118}
119
120#[derive(Default)]
121struct ActionGroundingState {
122    turn_index: u64,
123    observed_paths: std::collections::HashMap<String, u64>,
124    inspected_paths: std::collections::HashMap<String, u64>,
125    last_verify_build_turn: Option<u64>,
126    last_verify_build_ok: bool,
127    last_failed_build_paths: Vec<String>,
128    code_changed_since_verify: bool,
129    /// Track topics redirected from shell to inspect_host in the current turn to break loops.
130    redirected_host_inspection_topics: std::collections::HashMap<String, u64>,
131}
132
133struct PlanExecutionGuard {
134    flag: Arc<std::sync::atomic::AtomicBool>,
135}
136
137impl Drop for PlanExecutionGuard {
138    fn drop(&mut self) {
139        self.flag.store(false, std::sync::atomic::Ordering::SeqCst);
140    }
141}
142
143#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
144pub(crate) enum WorkflowMode {
145    #[default]
146    Auto,
147    Ask,
148    Code,
149    Architect,
150    ReadOnly,
151    /// Clean conversational mode — lighter prompt, no coding agent scaffolding,
152    /// tools available but not pushed. Vein RAG still runs for context.
153    Chat,
154    /// Teacher/guide mode — inspect the real machine state first, then walk the user
155    /// through the admin/config task as a grounded, numbered tutorial. Never executes
156    /// write operations itself; instructs the user to perform them manually.
157    Teach,
158}
159
160impl WorkflowMode {
161    fn label(self) -> &'static str {
162        match self {
163            WorkflowMode::Auto => "AUTO",
164            WorkflowMode::Ask => "ASK",
165            WorkflowMode::Code => "CODE",
166            WorkflowMode::Architect => "ARCHITECT",
167            WorkflowMode::ReadOnly => "READ-ONLY",
168            WorkflowMode::Chat => "CHAT",
169            WorkflowMode::Teach => "TEACH",
170        }
171    }
172
173    fn is_read_only(self) -> bool {
174        matches!(
175            self,
176            WorkflowMode::Ask
177                | WorkflowMode::Architect
178                | WorkflowMode::ReadOnly
179                | WorkflowMode::Teach
180        )
181    }
182
183    pub(crate) fn is_chat(self) -> bool {
184        matches!(self, WorkflowMode::Chat)
185    }
186}
187
188fn session_path() -> std::path::PathBuf {
189    if let Ok(overridden) = std::env::var("HEMATITE_SESSION_PATH") {
190        return std::path::PathBuf::from(overridden);
191    }
192    crate::tools::file_ops::workspace_root()
193        .join(".hematite")
194        .join("session.json")
195}
196
197fn load_session_data() -> (Option<String>, crate::agent::compaction::SessionMemory) {
198    let path = session_path();
199    if !path.exists() {
200        return (None, crate::agent::compaction::SessionMemory::default());
201    }
202    let Ok(data) = std::fs::read_to_string(&path) else {
203        return (None, crate::agent::compaction::SessionMemory::default());
204    };
205    let Ok(saved) = serde_json::from_str::<SavedSession>(&data) else {
206        return (None, crate::agent::compaction::SessionMemory::default());
207    };
208    (saved.running_summary, saved.session_memory)
209}
210
211fn reset_task_files() {
212    let root = crate::tools::file_ops::workspace_root();
213    let _ = std::fs::remove_file(root.join(".hematite").join("TASK.md"));
214    let _ = std::fs::remove_file(root.join(".hematite").join("PLAN.md"));
215    let _ = std::fs::remove_file(root.join(".hematite").join("WALKTHROUGH.md"));
216    let _ = std::fs::remove_file(root.join(".github").join("WALKTHROUGH.md"));
217    let _ = std::fs::write(root.join(".hematite").join("TASK.md"), "");
218    let _ = std::fs::write(root.join(".hematite").join("PLAN.md"), "");
219}
220
221fn purge_persistent_memory() {
222    let root = crate::tools::file_ops::workspace_root();
223    let mem_dir = root.join(".hematite").join("memories");
224    if mem_dir.exists() {
225        let _ = std::fs::remove_dir_all(&mem_dir);
226        let _ = std::fs::create_dir_all(&mem_dir);
227    }
228
229    let log_dir = root.join(".hematite_logs");
230    if log_dir.exists() {
231        if let Ok(entries) = std::fs::read_dir(&log_dir) {
232            for entry in entries.flatten() {
233                let _ = std::fs::write(entry.path(), "");
234            }
235        }
236    }
237}
238
239fn apply_turn_attachments(user_turn: &UserTurn, prompt: &str) -> String {
240    let mut out = prompt.trim().to_string();
241    if let Some(doc) = user_turn.attached_document.as_ref() {
242        out = format!(
243            "[Attached document: {}]\n\n{}\n\n---\n\n{}",
244            doc.name, doc.content, out
245        );
246    }
247    if let Some(image) = user_turn.attached_image.as_ref() {
248        out = if out.is_empty() {
249            format!("[Attached image: {}]", image.name)
250        } else {
251            format!("[Attached image: {}]\n\n{}", image.name, out)
252        };
253    }
254    out
255}
256
257fn transcript_user_turn_text(user_turn: &UserTurn, prompt: &str) -> String {
258    let mut prefixes = Vec::new();
259    if let Some(doc) = user_turn.attached_document.as_ref() {
260        prefixes.push(format!("[Attached document: {}]", doc.name));
261    }
262    if let Some(image) = user_turn.attached_image.as_ref() {
263        prefixes.push(format!("[Attached image: {}]", image.name));
264    }
265    if prefixes.is_empty() {
266        prompt.to_string()
267    } else if prompt.trim().is_empty() {
268        prefixes.join("\n")
269    } else {
270        format!("{}\n{}", prefixes.join("\n"), prompt)
271    }
272}
273
274#[derive(Debug, Clone, Copy, PartialEq, Eq)]
275enum RuntimeFailureClass {
276    ContextWindow,
277    ProviderDegraded,
278    ToolArgMalformed,
279    ToolPolicyBlocked,
280    ToolLoop,
281    VerificationFailed,
282    EmptyModelResponse,
283    Unknown,
284}
285
286impl RuntimeFailureClass {
287    fn tag(self) -> &'static str {
288        match self {
289            RuntimeFailureClass::ContextWindow => "context_window",
290            RuntimeFailureClass::ProviderDegraded => "provider_degraded",
291            RuntimeFailureClass::ToolArgMalformed => "tool_arg_malformed",
292            RuntimeFailureClass::ToolPolicyBlocked => "tool_policy_blocked",
293            RuntimeFailureClass::ToolLoop => "tool_loop",
294            RuntimeFailureClass::VerificationFailed => "verification_failed",
295            RuntimeFailureClass::EmptyModelResponse => "empty_model_response",
296            RuntimeFailureClass::Unknown => "unknown",
297        }
298    }
299
300    fn operator_guidance(self) -> &'static str {
301        match self {
302            RuntimeFailureClass::ContextWindow => {
303                "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
304            }
305            RuntimeFailureClass::ProviderDegraded => {
306                "Retry once automatically, then narrow the turn or restart LM Studio if it persists."
307            }
308            RuntimeFailureClass::ToolArgMalformed => {
309                "Retry with repaired or narrower tool arguments instead of repeating the same malformed call."
310            }
311            RuntimeFailureClass::ToolPolicyBlocked => {
312                "Stay inside the allowed workflow or switch modes before retrying."
313            }
314            RuntimeFailureClass::ToolLoop => {
315                "Stop repeating the same failing tool pattern and switch to a narrower recovery step."
316            }
317            RuntimeFailureClass::VerificationFailed => {
318                "Fix the build or test failure before treating the task as complete."
319            }
320            RuntimeFailureClass::EmptyModelResponse => {
321                "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
322            }
323            RuntimeFailureClass::Unknown => {
324                "Inspect the latest grounded tool results or provider status before retrying."
325            }
326        }
327    }
328}
329
330fn classify_runtime_failure(detail: &str) -> RuntimeFailureClass {
331    let lower = detail.to_ascii_lowercase();
332    if lower.contains("context_window_blocked")
333        || lower.contains("context ceiling reached")
334        || lower.contains("exceeds the")
335        || ((lower.contains("n_keep") && lower.contains("n_ctx"))
336            || lower.contains("context length")
337            || lower.contains("keep from the initial prompt")
338            || lower.contains("prompt is greater than the context length"))
339    {
340        RuntimeFailureClass::ContextWindow
341    } else if lower.contains("empty response from model")
342        || lower.contains("model returned an empty response")
343    {
344        RuntimeFailureClass::EmptyModelResponse
345    } else if lower.contains("lm studio unreachable")
346        || lower.contains("lm studio error")
347        || lower.contains("request failed")
348        || lower.contains("response parse error")
349        || lower.contains("provider degraded")
350    {
351        RuntimeFailureClass::ProviderDegraded
352    } else if lower.contains("missing required argument")
353        || lower.contains("json repair failed")
354        || lower.contains("invalid pattern")
355        || lower.contains("invalid line range")
356    {
357        RuntimeFailureClass::ToolArgMalformed
358    } else if lower.contains("action blocked:")
359        || lower.contains("access denied")
360        || lower.contains("declined by user")
361    {
362        RuntimeFailureClass::ToolPolicyBlocked
363    } else if lower.contains("too many consecutive tool errors")
364        || lower.contains("repeated tool failures")
365        || lower.contains("stuck in a loop")
366    {
367        RuntimeFailureClass::ToolLoop
368    } else if lower.contains("build failed")
369        || lower.contains("verification failed")
370        || lower.contains("verify_build")
371    {
372        RuntimeFailureClass::VerificationFailed
373    } else {
374        RuntimeFailureClass::Unknown
375    }
376}
377
378fn format_runtime_failure(class: RuntimeFailureClass, detail: &str) -> String {
379    format!(
380        "[failure:{}] {} Detail: {}",
381        class.tag(),
382        class.operator_guidance(),
383        detail.trim()
384    )
385}
386
387fn provider_state_for_runtime_failure(class: RuntimeFailureClass) -> Option<ProviderRuntimeState> {
388    match class {
389        RuntimeFailureClass::ContextWindow => Some(ProviderRuntimeState::ContextWindow),
390        RuntimeFailureClass::ProviderDegraded => Some(ProviderRuntimeState::Degraded),
391        RuntimeFailureClass::EmptyModelResponse => Some(ProviderRuntimeState::EmptyResponse),
392        _ => None,
393    }
394}
395
396fn checkpoint_state_for_runtime_failure(
397    class: RuntimeFailureClass,
398) -> Option<OperatorCheckpointState> {
399    match class {
400        RuntimeFailureClass::ContextWindow => Some(OperatorCheckpointState::BlockedContextWindow),
401        RuntimeFailureClass::ToolPolicyBlocked => Some(OperatorCheckpointState::BlockedPolicy),
402        RuntimeFailureClass::ToolLoop => Some(OperatorCheckpointState::BlockedToolLoop),
403        RuntimeFailureClass::VerificationFailed => {
404            Some(OperatorCheckpointState::BlockedVerification)
405        }
406        _ => None,
407    }
408}
409
410fn compact_runtime_recovery_summary(class: RuntimeFailureClass) -> &'static str {
411    match class {
412        RuntimeFailureClass::ProviderDegraded => {
413            "LM Studio degraded during the turn; retrying once before surfacing a failure."
414        }
415        RuntimeFailureClass::EmptyModelResponse => {
416            "The model returned an empty reply; retrying once before surfacing a failure."
417        }
418        _ => "Runtime recovery in progress.",
419    }
420}
421
422fn checkpoint_summary_for_runtime_failure(class: RuntimeFailureClass) -> &'static str {
423    match class {
424        RuntimeFailureClass::ContextWindow => "Provider context ceiling confirmed.",
425        RuntimeFailureClass::ToolPolicyBlocked => "Policy blocked the current action.",
426        RuntimeFailureClass::ToolLoop => "Repeated failing tool pattern stopped.",
427        RuntimeFailureClass::VerificationFailed => "Verification failed; fix before continuing.",
428        _ => "Operator checkpoint updated.",
429    }
430}
431
432fn compact_runtime_failure_summary(class: RuntimeFailureClass) -> &'static str {
433    match class {
434        RuntimeFailureClass::ContextWindow => "LM context ceiling hit.",
435        RuntimeFailureClass::ProviderDegraded => {
436            "LM Studio degraded and did not recover cleanly; operator action is now required."
437        }
438        RuntimeFailureClass::EmptyModelResponse => {
439            "LM Studio returned an empty reply after recovery; operator action is now required."
440        }
441        RuntimeFailureClass::ToolLoop => {
442            "Repeated failing tool pattern detected; Hematite stopped the loop."
443        }
444        _ => "Runtime failure surfaced to the operator.",
445    }
446}
447
448fn should_retry_runtime_failure(class: RuntimeFailureClass) -> bool {
449    matches!(
450        class,
451        RuntimeFailureClass::ProviderDegraded | RuntimeFailureClass::EmptyModelResponse
452    )
453}
454
455fn recovery_scenario_for_runtime_failure(class: RuntimeFailureClass) -> Option<RecoveryScenario> {
456    match class {
457        RuntimeFailureClass::ContextWindow => Some(RecoveryScenario::ContextWindow),
458        RuntimeFailureClass::ProviderDegraded => Some(RecoveryScenario::ProviderDegraded),
459        RuntimeFailureClass::EmptyModelResponse => Some(RecoveryScenario::EmptyModelResponse),
460        RuntimeFailureClass::ToolPolicyBlocked => Some(RecoveryScenario::McpWorkspaceReadBlocked),
461        RuntimeFailureClass::ToolLoop => Some(RecoveryScenario::ToolLoop),
462        RuntimeFailureClass::VerificationFailed => Some(RecoveryScenario::VerificationFailed),
463        RuntimeFailureClass::ToolArgMalformed | RuntimeFailureClass::Unknown => None,
464    }
465}
466
467fn compact_recovery_plan_summary(plan: &RecoveryPlan) -> String {
468    format!(
469        "{} [{}]",
470        plan.recipe.scenario.label(),
471        plan.recipe.steps_summary()
472    )
473}
474
475fn compact_recovery_decision_summary(decision: &RecoveryDecision) -> String {
476    match decision {
477        RecoveryDecision::Attempt(plan) => compact_recovery_plan_summary(plan),
478        RecoveryDecision::Escalate {
479            recipe,
480            attempts_made,
481            ..
482        } => format!(
483            "{} escalated after {} / {} [{}]",
484            recipe.scenario.label(),
485            attempts_made,
486            recipe.max_attempts.max(1),
487            recipe.steps_summary()
488        ),
489    }
490}
491
492/// Parse file paths from cargo/compiler error output.
493/// Handles lines like `  --> src/foo/bar.rs:34:12` and `error: could not compile`.
494fn parse_failing_paths_from_build_output(output: &str) -> Vec<String> {
495    let root = crate::tools::file_ops::workspace_root();
496    let mut paths: Vec<String> = output
497        .lines()
498        .filter_map(|line| {
499            let trimmed = line.trim_start();
500            // Cargo error location: "--> path/to/file.rs:line:col"
501            let after_arrow = trimmed.strip_prefix("--> ")?;
502            let file_part = after_arrow.split(':').next()?;
503            if file_part.is_empty() || file_part.starts_with('<') {
504                return None;
505            }
506            let p = std::path::Path::new(file_part);
507            let resolved = if p.is_absolute() {
508                p.to_path_buf()
509            } else {
510                root.join(p)
511            };
512            Some(resolved.to_string_lossy().replace('\\', "/").to_lowercase())
513        })
514        .collect();
515    paths.sort();
516    paths.dedup();
517    paths
518}
519
520fn build_mode_redirect_answer(mode: WorkflowMode) -> String {
521    match mode {
522        WorkflowMode::Ask => "Workflow mode ASK is read-only. I can inspect the code, explain what should change, or review the target area, but I will not modify files here. Switch to `/code` to implement the change, or `/auto` to let Hematite choose.".to_string(),
523        WorkflowMode::Architect => "Workflow mode ARCHITECT is plan-first. I can inspect the code and design the implementation approach, but I will not mutate files until you explicitly switch to `/code` or ask me to implement.".to_string(),
524        WorkflowMode::ReadOnly => "Workflow mode READ-ONLY is a hard no-mutation mode. I can analyze, inspect, and explain, but I will not edit files, run mutating shell commands, or commit changes. Switch to `/code` or `/auto` if you want implementation.".to_string(),
525        WorkflowMode::Teach => "Workflow mode TEACH is a guided walkthrough mode. I will inspect the real state of your machine first, then give you a numbered step-by-step tutorial so you can perform the task yourself. I do not execute write operations in TEACH mode — I show you exactly how to do it.".to_string(),
526        _ => "Switch to `/code` or `/auto` to allow implementation.".to_string(),
527    }
528}
529
530fn architect_handoff_contract() -> &'static str {
531    "ARCHITECT OUTPUT CONTRACT:\n\
532Use a compact implementation handoff, not a process narrative.\n\
533Do not say \"the first step\" or describe what you are about to do.\n\
534After one or two read-only inspection tools at most, stop and answer.\n\
535For runtime wiring, reset behavior, or control-flow questions, prefer `trace_runtime_flow`.\n\
536Use these exact ASCII headings and keep each section short:\n\
537# Goal\n\
538# Target Files\n\
539# Ordered Steps\n\
540# Verification\n\
541# Risks\n\
542# Open Questions\n\
543Keep the whole handoff concise and implementation-oriented."
544}
545
546fn implement_current_plan_prompt() -> &'static str {
547    "Implement the current plan."
548}
549
550fn architect_handoff_operator_note(plan: &crate::tools::plan::PlanHandoff) -> String {
551    format!(
552        "Implementation handoff saved to `.hematite/PLAN.md`.\nNext step: run `/implement-plan` to execute it in `/code`, or use `/code {}` directly.\nPlan: {}",
553        implement_current_plan_prompt().to_ascii_lowercase(),
554        plan.summary_line()
555    )
556}
557
558fn is_current_plan_execution_request(user_input: &str) -> bool {
559    let lower = user_input.trim().to_ascii_lowercase();
560    lower == "/implement-plan"
561        || lower == implement_current_plan_prompt().to_ascii_lowercase()
562        || lower
563            == implement_current_plan_prompt()
564                .trim_end_matches('.')
565                .to_ascii_lowercase()
566        || lower.contains("implement the current plan")
567}
568
569fn is_plan_scoped_tool(name: &str) -> bool {
570    crate::agent::inference::tool_metadata_for_name(name).plan_scope
571}
572
573fn is_current_plan_irrelevant_tool(name: &str) -> bool {
574    !crate::agent::inference::tool_metadata_for_name(name).plan_scope
575}
576
577fn is_non_mutating_plan_step_tool(name: &str) -> bool {
578    let metadata = crate::agent::inference::tool_metadata_for_name(name);
579    metadata.plan_scope && !metadata.mutates_workspace
580}
581
582fn parse_inline_workflow_prompt(user_input: &str) -> Option<(WorkflowMode, &str)> {
583    let trimmed = user_input.trim();
584    for (prefix, mode) in [
585        ("/ask", WorkflowMode::Ask),
586        ("/code", WorkflowMode::Code),
587        ("/architect", WorkflowMode::Architect),
588        ("/read-only", WorkflowMode::ReadOnly),
589        ("/auto", WorkflowMode::Auto),
590        ("/teach", WorkflowMode::Teach),
591    ] {
592        if let Some(rest) = trimmed.strip_prefix(prefix) {
593            let rest = rest.trim();
594            if !rest.is_empty() {
595                return Some((mode, rest));
596            }
597        }
598    }
599    None
600}
601
602// Tool catalogue
603
604/// Returns the full set of tools exposed to the model.
605pub fn get_tools() -> Vec<ToolDefinition> {
606    crate::agent::tool_registry::get_tools()
607}
608
609pub struct ConversationManager {
610    /// Full conversation history in OpenAI format.
611    pub history: Vec<ChatMessage>,
612    pub engine: Arc<InferenceEngine>,
613    pub tools: Vec<ToolDefinition>,
614    pub mcp_manager: Arc<Mutex<crate::agent::mcp_manager::McpManager>>,
615    pub professional: bool,
616    pub brief: bool,
617    pub snark: u8,
618    pub chaos: u8,
619    /// Model to use for simple read-only tasks (optional, user-supplied via --fast-model).
620    pub fast_model: Option<String>,
621    /// Model to use for complex write/build tasks (optional, user-supplied via --think-model).
622    pub think_model: Option<String>,
623    /// Files where whitespace auto-correction fired this session.
624    pub correction_hints: Vec<String>,
625    /// Running background summary of pruned older messages.
626    pub running_summary: Option<String>,
627    /// Live hardware telemetry handle.
628    pub gpu_state: Arc<GpuState>,
629    /// Local RAG memory — FTS5-indexed project source.
630    pub vein: crate::memory::vein::Vein,
631    /// Append-only session transcript logger.
632    pub transcript: crate::agent::transcript::TranscriptLogger,
633    /// Thread-safe cancellation signal for the current agent turn.
634    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
635    /// Shared Git remote state (for persistent connectivity checks).
636    pub git_state: Arc<crate::agent::git_monitor::GitState>,
637    /// Reasoning think-mode override. None = let model decide. Some(true) = force /think.
638    /// Some(false) = force /no_think (fast mode, 3-5x quicker for simple tasks).
639    pub think_mode: Option<bool>,
640    workflow_mode: WorkflowMode,
641    /// Layer 6: Dynamic Task Context (extracted during compaction)
642    pub session_memory: crate::agent::compaction::SessionMemory,
643    pub swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
644    pub voice_manager: Arc<crate::ui::voice::VoiceManager>,
645    /// Personality description for the current Rusty soul — used in chat mode system prompt.
646    pub soul_personality: String,
647    pub lsp_manager: Arc<Mutex<crate::agent::lsp::manager::LspManager>>,
648    /// Active reasoning summary extracted from the previous model turn (Gemma-4 Native).
649    pub reasoning_history: Option<String>,
650    /// Layer 8: Active Reference Pinning (Context Locked)
651    pub pinned_files: Arc<Mutex<std::collections::HashMap<String, String>>>,
652    /// Hard action-grounding state for proof-before-action checks.
653    action_grounding: Arc<Mutex<ActionGroundingState>>,
654    /// True only during `/code Implement the current plan.` style execution turns.
655    plan_execution_active: Arc<std::sync::atomic::AtomicBool>,
656    /// Typed per-turn recovery attempt tracking.
657    recovery_context: RecoveryContext,
658    /// L1 context block — hot files summary injected into the system prompt.
659    /// Built once after vein init and updated as edits accumulate heat.
660    pub l1_context: Option<String>,
661    /// Condensed AST repository layout for the active project.
662    pub repo_map: Option<String>,
663    /// Number of real inference turns completed this session.
664    pub turn_count: u32,
665    /// Last user message sent to the model — persisted as checkpoint goal.
666    pub last_goal: Option<String>,
667}
668
669impl ConversationManager {
670    fn vein_docs_only_mode(&self) -> bool {
671        !crate::tools::file_ops::is_project_workspace()
672    }
673
674    fn refresh_vein_index(&mut self) -> usize {
675        let count = if self.vein_docs_only_mode() {
676            let root = crate::tools::file_ops::workspace_root();
677            tokio::task::block_in_place(|| self.vein.index_workspace_artifacts(&root))
678        } else {
679            tokio::task::block_in_place(|| self.vein.index_project())
680        };
681        self.l1_context = self.vein.l1_context();
682        count
683    }
684
685    fn build_vein_inspection_report(&self, indexed_this_pass: usize) -> String {
686        let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(8));
687        let workspace_mode = if self.vein_docs_only_mode() {
688            "docs-only (outside a project workspace)"
689        } else {
690            "project workspace"
691        };
692        let active_room = snapshot.active_room.as_deref().unwrap_or("none");
693        let mut out = format!(
694            "Vein Inspection\n\
695             Workspace mode: {workspace_mode}\n\
696             Indexed this pass: {indexed_this_pass}\n\
697             Indexed source files: {}\n\
698             Indexed docs: {}\n\
699             Indexed session exchanges: {}\n\
700             Embedded source/doc chunks: {}\n\
701             Embeddings available: {}\n\
702             Active room bias: {active_room}\n\
703             L1 hot-files block: {}\n",
704            snapshot.indexed_source_files,
705            snapshot.indexed_docs,
706            snapshot.indexed_session_exchanges,
707            snapshot.embedded_source_doc_chunks,
708            if snapshot.has_any_embeddings {
709                "yes"
710            } else {
711                "no"
712            },
713            if snapshot.l1_ready {
714                "ready"
715            } else {
716                "not built yet"
717            },
718        );
719
720        if snapshot.hot_files.is_empty() {
721            out.push_str("Hot files: none yet.\n");
722            return out;
723        }
724
725        out.push_str("\nHot files by room:\n");
726        let mut by_room: std::collections::BTreeMap<&str, Vec<&crate::memory::vein::VeinHotFile>> =
727            std::collections::BTreeMap::new();
728        for file in &snapshot.hot_files {
729            by_room.entry(file.room.as_str()).or_default().push(file);
730        }
731        for (room, files) in by_room {
732            out.push_str(&format!("[{}]\n", room));
733            for file in files {
734                out.push_str(&format!(
735                    "- {} [{} edit{}]\n",
736                    file.path,
737                    file.heat,
738                    if file.heat == 1 { "" } else { "s" }
739                ));
740            }
741        }
742
743        out
744    }
745
746    fn latest_user_prompt(&self) -> Option<&str> {
747        self.history
748            .iter()
749            .rev()
750            .find(|msg| msg.role == "user")
751            .map(|msg| msg.content.as_str())
752    }
753
754    async fn emit_direct_response(
755        &mut self,
756        tx: &mpsc::Sender<InferenceEvent>,
757        raw_user_input: &str,
758        effective_user_input: &str,
759        response: &str,
760    ) {
761        self.history.push(ChatMessage::user(effective_user_input));
762        self.history.push(ChatMessage::assistant_text(response));
763        self.transcript.log_user(raw_user_input);
764        self.transcript.log_agent(response);
765        for chunk in chunk_text(response, 8) {
766            if !chunk.is_empty() {
767                let _ = tx.send(InferenceEvent::Token(chunk)).await;
768            }
769        }
770        let _ = tx.send(InferenceEvent::Done).await;
771        self.trim_history(80);
772        self.refresh_session_memory();
773        self.save_session();
774    }
775
776    async fn emit_operator_checkpoint(
777        &mut self,
778        tx: &mpsc::Sender<InferenceEvent>,
779        state: OperatorCheckpointState,
780        summary: impl Into<String>,
781    ) {
782        let summary = summary.into();
783        self.session_memory
784            .record_checkpoint(state.label(), summary.clone());
785        let _ = tx
786            .send(InferenceEvent::OperatorCheckpoint { state, summary })
787            .await;
788    }
789
790    async fn emit_recovery_recipe_summary(
791        &mut self,
792        tx: &mpsc::Sender<InferenceEvent>,
793        state: impl Into<String>,
794        summary: impl Into<String>,
795    ) {
796        let state = state.into();
797        let summary = summary.into();
798        self.session_memory.record_recovery(state, summary.clone());
799        let _ = tx.send(InferenceEvent::RecoveryRecipe { summary }).await;
800    }
801
802    async fn emit_provider_live(&mut self, tx: &mpsc::Sender<InferenceEvent>) {
803        let _ = tx
804            .send(InferenceEvent::ProviderStatus {
805                state: ProviderRuntimeState::Live,
806                summary: String::new(),
807            })
808            .await;
809        self.emit_operator_checkpoint(tx, OperatorCheckpointState::Idle, "")
810            .await;
811    }
812
813    async fn emit_prompt_pressure_for_messages(
814        &self,
815        tx: &mpsc::Sender<InferenceEvent>,
816        messages: &[ChatMessage],
817    ) {
818        let context_length = self.engine.current_context_length();
819        let (estimated_input_tokens, reserved_output_tokens, estimated_total_tokens, percent) =
820            crate::agent::inference::estimate_prompt_pressure(
821                messages,
822                &self.tools,
823                context_length,
824            );
825        let _ = tx
826            .send(InferenceEvent::PromptPressure {
827                estimated_input_tokens,
828                reserved_output_tokens,
829                estimated_total_tokens,
830                context_length,
831                percent,
832            })
833            .await;
834    }
835
836    async fn emit_prompt_pressure_idle(&self, tx: &mpsc::Sender<InferenceEvent>) {
837        let context_length = self.engine.current_context_length();
838        let _ = tx
839            .send(InferenceEvent::PromptPressure {
840                estimated_input_tokens: 0,
841                reserved_output_tokens: 0,
842                estimated_total_tokens: 0,
843                context_length,
844                percent: 0,
845            })
846            .await;
847    }
848
849    async fn emit_compaction_pressure(&self, tx: &mpsc::Sender<InferenceEvent>) {
850        let context_length = self.engine.current_context_length();
851        let vram_ratio = self.gpu_state.ratio();
852        let config = CompactionConfig::adaptive(context_length, vram_ratio);
853        let estimated_tokens = compaction::estimate_compactable_tokens(&self.history);
854        let percent = if config.max_estimated_tokens == 0 {
855            0
856        } else {
857            ((estimated_tokens.saturating_mul(100)) / config.max_estimated_tokens).min(100) as u8
858        };
859
860        let _ = tx
861            .send(InferenceEvent::CompactionPressure {
862                estimated_tokens,
863                threshold_tokens: config.max_estimated_tokens,
864                percent,
865            })
866            .await;
867    }
868
869    async fn refresh_runtime_profile_and_report(
870        &mut self,
871        tx: &mpsc::Sender<InferenceEvent>,
872        reason: &str,
873    ) -> Option<(String, usize, bool)> {
874        let refreshed = self.engine.refresh_runtime_profile().await;
875        if let Some((model_id, context_length, changed)) = refreshed.as_ref() {
876            let _ = tx
877                .send(InferenceEvent::RuntimeProfile {
878                    model_id: model_id.clone(),
879                    context_length: *context_length,
880                })
881                .await;
882            self.transcript.log_system(&format!(
883                "Runtime profile refresh ({}): model={} ctx={} changed={}",
884                reason, model_id, context_length, changed
885            ));
886        }
887        refreshed
888    }
889
890    pub fn new(
891        engine: Arc<InferenceEngine>,
892        professional: bool,
893        brief: bool,
894        snark: u8,
895        chaos: u8,
896        soul_personality: String,
897        fast_model: Option<String>,
898        think_model: Option<String>,
899        gpu_state: Arc<GpuState>,
900        git_state: Arc<crate::agent::git_monitor::GitState>,
901        swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
902        voice_manager: Arc<crate::ui::voice::VoiceManager>,
903    ) -> Self {
904        let (saved_summary, saved_memory) = load_session_data();
905
906        // Build the initial mcp_manager
907        let mcp_manager = Arc::new(tokio::sync::Mutex::new(
908            crate::agent::mcp_manager::McpManager::new(),
909        ));
910
911        // Build the initial system prompt using the canonical InferenceEngine path.
912        let dynamic_instructions =
913            engine.build_system_prompt(snark, chaos, brief, professional, &[], None, &[]);
914
915        let history = vec![ChatMessage::system(&dynamic_instructions)];
916
917        let vein_path = crate::tools::file_ops::workspace_root()
918            .join(".hematite")
919            .join("vein.db");
920        let vein_base_url = engine.base_url.clone();
921        let vein = crate::memory::vein::Vein::new(&vein_path, vein_base_url.clone())
922            .unwrap_or_else(|_| crate::memory::vein::Vein::new(":memory:", vein_base_url).unwrap());
923
924        Self {
925            history,
926            engine,
927            tools: get_tools(),
928            mcp_manager,
929            professional,
930            brief,
931            snark,
932            chaos,
933            fast_model,
934            think_model,
935            correction_hints: Vec::new(),
936            running_summary: saved_summary,
937            gpu_state,
938            vein,
939            transcript: crate::agent::transcript::TranscriptLogger::new(),
940            cancel_token: Arc::new(std::sync::atomic::AtomicBool::new(false)),
941            git_state,
942            think_mode: None,
943            workflow_mode: WorkflowMode::Auto,
944            session_memory: saved_memory,
945            swarm_coordinator,
946            voice_manager,
947            soul_personality,
948            lsp_manager: Arc::new(Mutex::new(crate::agent::lsp::manager::LspManager::new(
949                crate::tools::file_ops::workspace_root(),
950            ))),
951            reasoning_history: None,
952            pinned_files: Arc::new(Mutex::new(std::collections::HashMap::new())),
953            action_grounding: Arc::new(Mutex::new(ActionGroundingState::default())),
954            plan_execution_active: Arc::new(std::sync::atomic::AtomicBool::new(false)),
955            recovery_context: RecoveryContext::default(),
956            l1_context: None,
957            repo_map: None,
958            turn_count: 0,
959            last_goal: None,
960        }
961    }
962
963    /// Index the project into The Vein. Call once after construction.
964    /// Uses block_in_place so the tokio runtime thread isn't parked.
965    pub fn initialize_vein(&mut self) -> usize {
966        self.refresh_vein_index()
967    }
968
969    /// Generate the AST Repo Map. Call once after construction or when resetting context.
970    pub fn initialize_repo_map(&mut self) {
971        if !self.vein_docs_only_mode() {
972            let root = crate::tools::file_ops::workspace_root();
973            let hot = self.vein.hot_files_weighted(10);
974            let gen = crate::memory::repo_map::RepoMapGenerator::new(&root).with_hot_files(&hot);
975            match tokio::task::block_in_place(|| gen.generate()) {
976                Ok(map) => self.repo_map = Some(map),
977                Err(e) => {
978                    self.repo_map = Some(format!("Repo Map generation failed: {}", e));
979                }
980            }
981        }
982    }
983
984    /// Re-generate the repo map after a file edit so rankings stay fresh.
985    /// Lightweight (~100-200ms) — called after successful mutations.
986    fn refresh_repo_map(&mut self) {
987        self.initialize_repo_map();
988    }
989
990    fn save_session(&self) {
991        let path = session_path();
992        if let Some(parent) = path.parent() {
993            let _ = std::fs::create_dir_all(parent);
994        }
995        let saved = SavedSession {
996            running_summary: self.running_summary.clone(),
997            session_memory: self.session_memory.clone(),
998            last_goal: self.last_goal.clone(),
999            turn_count: self.turn_count,
1000        };
1001        if let Ok(json) = serde_json::to_string(&saved) {
1002            let _ = std::fs::write(&path, json);
1003        }
1004    }
1005
1006    fn save_empty_session(&self) {
1007        let path = session_path();
1008        if let Some(parent) = path.parent() {
1009            let _ = std::fs::create_dir_all(parent);
1010        }
1011        let saved = SavedSession {
1012            running_summary: None,
1013            session_memory: crate::agent::compaction::SessionMemory::default(),
1014            last_goal: None,
1015            turn_count: 0,
1016        };
1017        if let Ok(json) = serde_json::to_string(&saved) {
1018            let _ = std::fs::write(&path, json);
1019        }
1020    }
1021
1022    fn refresh_session_memory(&mut self) {
1023        let current_plan = self.session_memory.current_plan.clone();
1024        let previous_memory = self.session_memory.clone();
1025        self.session_memory = compaction::extract_memory(&self.history);
1026        self.session_memory.current_plan = current_plan;
1027        self.session_memory
1028            .inherit_runtime_ledger_from(&previous_memory);
1029    }
1030
1031    fn build_chat_system_prompt(&self) -> String {
1032        let species = &self.engine.species;
1033        let personality = &self.soul_personality;
1034        format!(
1035            "You are {species}, a local AI companion running entirely on the user's GPU — no cloud, no subscriptions, no phoning home.\n\
1036             {personality}\n\n\
1037             This is CHAT mode — a clean conversational surface. Behave like a sharp friend who happens to know everything about code, not like an agent following a workflow.\n\n\
1038             Rules:\n\
1039             - Talk like a person. Skip the bullet-point breakdowns unless the topic genuinely needs structure.\n\
1040             - Answer directly. One paragraph is usually right.\n\
1041             - Don't call tools unless the user explicitly asks you to look at a file or run something.\n\
1042             - Don't narrate your reasoning or mention tool names unprompted.\n\
1043             - You can discuss code, debug ideas, explain concepts, help plan, or just talk.\n\
1044             - If the user clearly wants you to edit or build something, do it — but lead with conversation, not scaffolding.\n\
1045             - If the user wants the full coding harness, they can type `/agent`.\n",
1046        )
1047    }
1048
1049    fn append_session_handoff(&self, system_msg: &mut String) {
1050        let has_summary = self
1051            .running_summary
1052            .as_ref()
1053            .map(|s| !s.trim().is_empty())
1054            .unwrap_or(false);
1055        let has_memory = self.session_memory.has_signal();
1056
1057        if !has_summary && !has_memory {
1058            return;
1059        }
1060
1061        system_msg.push_str(
1062            "\n\n# LIGHTWEIGHT SESSION HANDOFF\n\
1063             This is compact carry-over from earlier work on this machine.\n\
1064             Use it only when it helps the current request.\n\
1065             Prefer current repository state, pinned files, and fresh tool results over stale session memory.\n",
1066        );
1067
1068        if has_memory {
1069            system_msg.push_str("\n## Active Task Memory\n");
1070            system_msg.push_str(&self.session_memory.to_prompt());
1071        }
1072
1073        if let Some(summary) = self.running_summary.as_deref() {
1074            if !summary.trim().is_empty() {
1075                system_msg.push_str("\n## Compacted Session Summary\n");
1076                system_msg.push_str(summary);
1077                system_msg.push('\n');
1078            }
1079        }
1080    }
1081
1082    fn set_workflow_mode(&mut self, mode: WorkflowMode) {
1083        self.workflow_mode = mode;
1084    }
1085
1086    fn current_plan_summary(&self) -> Option<String> {
1087        self.session_memory
1088            .current_plan
1089            .as_ref()
1090            .filter(|plan| plan.has_signal())
1091            .map(|plan| plan.summary_line())
1092    }
1093
1094    fn current_plan_allowed_paths(&self) -> Vec<String> {
1095        self.session_memory
1096            .current_plan
1097            .as_ref()
1098            .map(|plan| {
1099                plan.target_files
1100                    .iter()
1101                    .map(|path| normalize_workspace_path(path))
1102                    .collect()
1103            })
1104            .unwrap_or_default()
1105    }
1106
1107    fn persist_architect_handoff(
1108        &mut self,
1109        response: &str,
1110    ) -> Option<crate::tools::plan::PlanHandoff> {
1111        if self.workflow_mode != WorkflowMode::Architect {
1112            return None;
1113        }
1114        let Some(plan) = crate::tools::plan::parse_plan_handoff(response) else {
1115            return None;
1116        };
1117        let _ = crate::tools::plan::save_plan_handoff(&plan);
1118        self.session_memory.current_plan = Some(plan.clone());
1119        Some(plan)
1120    }
1121
1122    async fn begin_grounded_turn(&self) -> u64 {
1123        let mut state = self.action_grounding.lock().await;
1124        state.turn_index += 1;
1125        state.turn_index
1126    }
1127
1128    async fn reset_action_grounding(&self) {
1129        let mut state = self.action_grounding.lock().await;
1130        *state = ActionGroundingState::default();
1131    }
1132
1133    async fn record_read_observation(&self, path: &str) {
1134        let normalized = normalize_workspace_path(path);
1135        let mut state = self.action_grounding.lock().await;
1136        let turn = state.turn_index;
1137        // read_file returns full file content with line numbers — sufficient for
1138        // the model to know exact text before editing, so it satisfies the
1139        // line-inspection grounding check too.
1140        state.observed_paths.insert(normalized.clone(), turn);
1141        state.inspected_paths.insert(normalized, turn);
1142    }
1143
1144    async fn record_line_inspection(&self, path: &str) {
1145        let normalized = normalize_workspace_path(path);
1146        let mut state = self.action_grounding.lock().await;
1147        let turn = state.turn_index;
1148        state.observed_paths.insert(normalized.clone(), turn);
1149        state.inspected_paths.insert(normalized, turn);
1150    }
1151
1152    async fn record_verify_build_result(&self, ok: bool, output: &str) {
1153        let mut state = self.action_grounding.lock().await;
1154        let turn = state.turn_index;
1155        state.last_verify_build_turn = Some(turn);
1156        state.last_verify_build_ok = ok;
1157        if ok {
1158            state.code_changed_since_verify = false;
1159            state.last_failed_build_paths.clear();
1160        } else {
1161            state.last_failed_build_paths = parse_failing_paths_from_build_output(output);
1162        }
1163    }
1164
1165    fn record_session_verification(&mut self, ok: bool, summary: impl Into<String>) {
1166        self.session_memory.record_verification(ok, summary);
1167    }
1168
1169    async fn record_successful_mutation(&self, path: Option<&str>) {
1170        let mut state = self.action_grounding.lock().await;
1171        state.code_changed_since_verify = match path {
1172            Some(p) => is_code_like_path(p),
1173            None => true,
1174        };
1175    }
1176
1177    async fn validate_action_preconditions(&self, name: &str, args: &Value) -> Result<(), String> {
1178        if self
1179            .plan_execution_active
1180            .load(std::sync::atomic::Ordering::SeqCst)
1181        {
1182            if is_current_plan_irrelevant_tool(name) {
1183                return Err(format!(
1184                    "Action blocked: `{}` is not part of current-plan execution. Stay on the saved target files, use built-in workspace file tools only, and either make a concrete edit or surface one specific blocker.",
1185                    name
1186                ));
1187            }
1188
1189            if is_plan_scoped_tool(name) {
1190                let allowed_paths = self.current_plan_allowed_paths();
1191                if !allowed_paths.is_empty() {
1192                    let in_allowed = match name {
1193                        "auto_pin_context" => args
1194                            .get("paths")
1195                            .and_then(|v| v.as_array())
1196                            .map(|paths| {
1197                                !paths.is_empty()
1198                                    && paths.iter().all(|v| {
1199                                        v.as_str()
1200                                            .map(normalize_workspace_path)
1201                                            .map(|p| allowed_paths.contains(&p))
1202                                            .unwrap_or(false)
1203                                    })
1204                            })
1205                            .unwrap_or(false),
1206                        "grep_files" | "list_files" => args
1207                            .get("path")
1208                            .and_then(|v| v.as_str())
1209                            .map(normalize_workspace_path)
1210                            .map(|p| allowed_paths.contains(&p))
1211                            .unwrap_or(false),
1212                        _ => action_target_path(name, args)
1213                            .map(|p| allowed_paths.contains(&p))
1214                            .unwrap_or(false),
1215                    };
1216
1217                    if !in_allowed {
1218                        let allowed = allowed_paths
1219                            .iter()
1220                            .map(|p| format!("`{}`", p))
1221                            .collect::<Vec<_>>()
1222                            .join(", ");
1223                        return Err(format!(
1224                            "Action blocked: current-plan execution is locked to the saved target files. Use a path-scoped built-in tool on one of these files only: {}.",
1225                            allowed
1226                        ));
1227                    }
1228                }
1229            }
1230
1231            if matches!(name, "edit_file" | "multi_search_replace" | "patch_hunk") {
1232                if let Some(target) = action_target_path(name, args) {
1233                    let state = self.action_grounding.lock().await;
1234                    let recently_inspected = state
1235                        .inspected_paths
1236                        .get(&target)
1237                        .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1238                        .unwrap_or(false);
1239                    drop(state);
1240                    if !recently_inspected {
1241                        return Err(format!(
1242                            "Action blocked: `{}` on '{}' requires an exact local line window first during current-plan execution. Use `inspect_lines` on that file around the intended edit region, then retry the mutation.",
1243                            name, target
1244                        ));
1245                    }
1246                }
1247            }
1248        }
1249
1250        if self.workflow_mode.is_read_only() && name == "auto_pin_context" {
1251            return Err(
1252                "Action blocked: `auto_pin_context` is disabled in read-only workflows. Use the grounded file evidence you already have, or narrow with `inspect_lines` instead of pinning more files into active context."
1253                    .to_string(),
1254            );
1255        }
1256
1257        if self.workflow_mode.is_read_only() && is_destructive_tool(name) {
1258            if name == "shell" {
1259                let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1260                let risk = crate::tools::guard::classify_bash_risk(command);
1261                if !matches!(risk, crate::tools::RiskLevel::Safe) {
1262                    return Err(format!(
1263                        "Action blocked: workflow mode `{}` is read-only for risky or mutating operations. Switch to `/code` or `/auto` before making changes.",
1264                        self.workflow_mode.label()
1265                    ));
1266                }
1267            } else {
1268                return Err(format!(
1269                    "Action blocked: workflow mode `{}` is read-only. Use `/code` to implement changes or `/auto` to leave mode selection to Hematite.",
1270                    self.workflow_mode.label()
1271                ));
1272            }
1273        }
1274
1275        let normalized_target = action_target_path(name, args);
1276        if let Some(target) = normalized_target.as_deref() {
1277            if matches!(
1278                name,
1279                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1280            ) {
1281                if let Some(prompt) = self.latest_user_prompt() {
1282                    if docs_edit_without_explicit_request(prompt, target) {
1283                        return Err(format!(
1284                            "Action blocked: '{}' is a docs file but the current request did not explicitly ask for documentation changes. Finish the code task first. If docs need updating, the user will ask.",
1285                            target
1286                        ));
1287                    }
1288                }
1289            }
1290            let path_exists = std::path::Path::new(target).exists();
1291            if path_exists {
1292                let state = self.action_grounding.lock().await;
1293                let pinned = self.pinned_files.lock().await;
1294                let pinned_match = pinned.keys().any(|p| normalize_workspace_path(p) == target);
1295                drop(pinned);
1296
1297                // edit_file and multi_search_replace match text exactly, so they need a
1298                // tighter evidence bar than a plain read. Require inspect_lines on the
1299                // target within the last 3 turns. A read_file in the *same* turn is also
1300                // accepted (the model just loaded the file and is making an immediate edit).
1301                let needs_exact_window = matches!(name, "edit_file" | "multi_search_replace");
1302                let recently_inspected = state
1303                    .inspected_paths
1304                    .get(target)
1305                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1306                    .unwrap_or(false);
1307                let same_turn_read = state
1308                    .observed_paths
1309                    .get(target)
1310                    .map(|turn| state.turn_index.saturating_sub(*turn) == 0)
1311                    .unwrap_or(false);
1312                let recent_observed = state
1313                    .observed_paths
1314                    .get(target)
1315                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1316                    .unwrap_or(false);
1317
1318                if needs_exact_window {
1319                    if !recently_inspected && !same_turn_read && !pinned_match {
1320                        return Err(format!(
1321                            "Action blocked: `{}` on '{}' requires a line-level inspection first. \
1322                             Use `inspect_lines` on the target region to get the exact current text \
1323                             (whitespace and indentation included), then retry the edit.",
1324                            name, target
1325                        ));
1326                    }
1327                } else if !recent_observed && !pinned_match {
1328                    return Err(format!(
1329                        "Action blocked: `{}` on '{}' requires recent file evidence. Use `read_file` or `inspect_lines` on that path first, or pin the file into active context.",
1330                        name, target
1331                    ));
1332                }
1333            }
1334        }
1335
1336        if is_mcp_mutating_tool(name) {
1337            return Err(format!(
1338                "Action blocked: `{}` is an external MCP mutation tool. For workspace file edits, prefer Hematite's built-in edit path (`read_file`/`inspect_lines` plus `patch_hunk`, `edit_file`, or `multi_search_replace`) unless the user explicitly requires MCP for that action.",
1339                name
1340            ));
1341        }
1342
1343        if is_mcp_workspace_read_tool(name) {
1344            return Err(format!(
1345                "Action blocked: `{}` is an external MCP filesystem read tool. For local workspace inspection, prefer Hematite's built-in read path (`read_file`, `inspect_lines`, `list_files`, or `grep_files`) unless the user explicitly requires MCP for that action.",
1346                name
1347            ));
1348        }
1349
1350        // Phase gate: if the build is broken, constrain edits to files that cargo flagged.
1351        // This prevents the model from wandering to unrelated files after a failed verify.
1352        if matches!(
1353            name,
1354            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1355        ) {
1356            if let Some(target) = normalized_target.as_deref() {
1357                let state = self.action_grounding.lock().await;
1358                if state.code_changed_since_verify
1359                    && !state.last_verify_build_ok
1360                    && !state.last_failed_build_paths.is_empty()
1361                    && !state.last_failed_build_paths.iter().any(|p| p == target)
1362                {
1363                    let files = state
1364                        .last_failed_build_paths
1365                        .iter()
1366                        .map(|p| format!("`{}`", p))
1367                        .collect::<Vec<_>>()
1368                        .join(", ");
1369                    return Err(format!(
1370                        "Action blocked: the build is broken. Fix the errors in {} before editing other files. Run `verify_build` to confirm the fix, then continue.",
1371                        files
1372                    ));
1373                }
1374            }
1375        }
1376
1377        if name == "git_commit" || name == "git_push" {
1378            let state = self.action_grounding.lock().await;
1379            if state.code_changed_since_verify && !state.last_verify_build_ok {
1380                return Err(format!(
1381                    "Action blocked: `{}` requires a successful `verify_build` after the latest code edits. Run verification first so Hematite has proof that the tree is build-clean.",
1382                    name
1383                ));
1384            }
1385        }
1386
1387        if name == "shell" {
1388            let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1389            if shell_looks_like_structured_host_inspection(command) {
1390                // Auto-redirect: silently call inspect_host with the right topic instead of
1391                // returning a block error that the model may fail to recover from.
1392                // Derive topic ONLY from the shell command itself. We do not fall back to the user prompt
1393                // here to avoid trapping secondary shell commands in a redirection loop based on the primary intent.
1394                let topic = match preferred_host_inspection_topic(command) {
1395                    Some(t) => t.to_string(),
1396                    None => return Ok(()), // Not a clear host inspection command, allow it to pass through.
1397                };
1398
1399                {
1400                    let mut state = self.action_grounding.lock().await;
1401                    let current_turn = state.turn_index;
1402                    if let Some(turn) = state.redirected_host_inspection_topics.get(&topic) {
1403                        if *turn == current_turn {
1404                             return Err(format!(
1405                                "[auto-redirected shell→inspect_host(topic=\"{topic}\")] Notice: The diagnostic data for topic `{topic}` was already provided in this turn. Using the previous result to avoid redundant tool calls."
1406                            ));
1407                        }
1408                    }
1409                    state
1410                        .redirected_host_inspection_topics
1411                        .insert(topic.clone(), current_turn);
1412                }
1413
1414                let path_val = self
1415                    .latest_user_prompt()
1416                    .and_then(|p| {
1417                        // Very basic heuristic for path extraction: look for strings with dots/slashes
1418                        p.split_whitespace()
1419                            .find(|w| w.contains('.') || w.contains('/') || w.contains('\\'))
1420                            .map(|s| {
1421                                s.trim_matches(|c: char| {
1422                                    !c.is_alphanumeric() && c != '.' && c != '/' && c != '\\'
1423                                })
1424                            })
1425                    })
1426                    .unwrap_or("");
1427
1428                let mut redirect_args = if !path_val.is_empty() {
1429                    serde_json::json!({ "topic": topic, "path": path_val })
1430                } else {
1431                    serde_json::json!({ "topic": topic })
1432                };
1433
1434                // Surgical Argument Extraction for redirected shell payloads (Robust "Wide Net" version)
1435                if topic == "ad_user" || topic == "dns_lookup" {
1436                    let cmd_lower = command.to_lowercase();
1437                    let mut identity = String::new();
1438
1439                    // 1. Explicit Identity check
1440                    if let Some(idx) = cmd_lower.find("-identity") {
1441                        let after_id = &command[idx + 9..].trim();
1442                        identity = if after_id.starts_with('\'') || after_id.starts_with('"') {
1443                            let quote = after_id.chars().next().unwrap();
1444                            after_id.split(quote).nth(1).unwrap_or("").to_string()
1445                        } else {
1446                            after_id.split_whitespace().next().unwrap_or("").to_string()
1447                        };
1448                    } 
1449                    
1450                    // 2. Wide-Net Fallback: Find the first non-cmdlet, non-parameter string
1451                    if identity.is_empty() {
1452                        let parts: Vec<&str> = command.split_whitespace().collect();
1453                        for (i, part) in parts.iter().enumerate() {
1454                            if i == 0 || part.starts_with('-') { continue; }
1455                            // Skip common cmdlets if they are in the parts list
1456                            let p_low = part.to_lowercase();
1457                            if p_low.contains("get-ad") || p_low.contains("powershell") || p_low == "-command" { continue; }
1458                            
1459                            identity = part.trim_matches(|c: char| c == '\'' || c == '"').to_string();
1460                            if !identity.is_empty() { break; }
1461                        }
1462                    }
1463
1464                    if !identity.is_empty() {
1465                        redirect_args.as_object_mut().unwrap().insert("name_filter".to_string(), serde_json::Value::String(identity));
1466                    }
1467                }
1468
1469                let result = crate::tools::host_inspect::inspect_host(&redirect_args).await;
1470                return match result {
1471                    Ok(output) => Err(format!(
1472                        "[auto-redirected shell→inspect_host(topic=\"{topic}\")]\n\n{output}\n\n[Note: Shell is blocked for host inspection. The diagnostic data above fulfills your request. Use inspect_host directly for further diagnostics.]"
1473                    )),
1474                    Err(e) => Err(format!(
1475                        "Redirection to native tool `{topic}` failed: {e}\n\nAction blocked: use `inspect_host(topic: \"{topic}\")` instead of raw `shell` for host-inspection questions. Available topics: updates, security, pending_reboot, disk_health, battery, recent_crashes, scheduled_tasks, dev_conflicts, health_report, storage, hardware, resource_load, processes, network, services, ports, env_doctor, fix_plan, connectivity, wifi, connections, vpn, proxy, firewall_rules, traceroute, dns_cache, arp, route_table, docker, wsl, ssh, env, hosts_file, installed_software, git_config, databases, disk_benchmark, directory, permissions, login_history, registry_audit, share_access.",
1476                    )),
1477                };
1478            }
1479            let reason = args
1480                .get("reason")
1481                .and_then(|v| v.as_str())
1482                .unwrap_or("")
1483                .trim();
1484            let risk = crate::tools::guard::classify_bash_risk(command);
1485            if !matches!(risk, crate::tools::RiskLevel::Safe) && reason.is_empty() {
1486                return Err(
1487                    "Action blocked: risky `shell` calls require a concrete `reason` argument that explains what is being verified or changed."
1488                        .to_string(),
1489                );
1490            }
1491        }
1492
1493        Ok(())
1494    }
1495
1496    fn build_action_receipt(
1497        &self,
1498        name: &str,
1499        args: &Value,
1500        output: &str,
1501        is_error: bool,
1502    ) -> Option<ChatMessage> {
1503        if is_error || !is_destructive_tool(name) {
1504            return None;
1505        }
1506
1507        let mut receipt = String::from("[ACTION RECEIPT]\n");
1508        receipt.push_str(&format!("- tool: {}\n", name));
1509        if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
1510            receipt.push_str(&format!("- target: {}\n", path));
1511        }
1512        if name == "shell" {
1513            if let Some(command) = args.get("command").and_then(|v| v.as_str()) {
1514                receipt.push_str(&format!("- command: {}\n", command));
1515            }
1516            if let Some(reason) = args.get("reason").and_then(|v| v.as_str()) {
1517                if !reason.trim().is_empty() {
1518                    receipt.push_str(&format!("- reason: {}\n", reason.trim()));
1519                }
1520            }
1521        }
1522        let first_line = output.lines().next().unwrap_or(output).trim();
1523        receipt.push_str(&format!("- outcome: {}\n", first_line));
1524        Some(ChatMessage::system(&receipt))
1525    }
1526
1527    fn replace_mcp_tool_definitions(&mut self, mcp_tools: &[crate::agent::mcp::McpTool]) {
1528        self.tools
1529            .retain(|tool| !tool.function.name.starts_with("mcp__"));
1530        self.tools
1531            .extend(mcp_tools.iter().map(|tool| ToolDefinition {
1532                tool_type: "function".into(),
1533                function: ToolFunction {
1534                    name: tool.name.clone(),
1535                    description: tool.description.clone().unwrap_or_default(),
1536                    parameters: tool.input_schema.clone(),
1537                },
1538                metadata: crate::agent::inference::tool_metadata_for_name(&tool.name),
1539            }));
1540    }
1541
1542    async fn emit_mcp_runtime_status(&self, tx: &mpsc::Sender<InferenceEvent>) {
1543        let summary = {
1544            let mcp = self.mcp_manager.lock().await;
1545            mcp.runtime_report()
1546        };
1547        let _ = tx
1548            .send(InferenceEvent::McpStatus {
1549                state: summary.state,
1550                summary: summary.summary,
1551            })
1552            .await;
1553    }
1554
1555    async fn refresh_mcp_tools(
1556        &mut self,
1557        tx: &mpsc::Sender<InferenceEvent>,
1558    ) -> Result<Vec<crate::agent::mcp::McpTool>, Box<dyn std::error::Error + Send + Sync>> {
1559        let mcp_tools = {
1560            let mut mcp = self.mcp_manager.lock().await;
1561            match mcp.initialize_all().await {
1562                Ok(()) => mcp.discover_tools().await,
1563                Err(e) => {
1564                    drop(mcp);
1565                    self.replace_mcp_tool_definitions(&[]);
1566                    self.emit_mcp_runtime_status(tx).await;
1567                    return Err(e.into());
1568                }
1569            }
1570        };
1571
1572        self.replace_mcp_tool_definitions(&mcp_tools);
1573        self.emit_mcp_runtime_status(tx).await;
1574        Ok(mcp_tools)
1575    }
1576
1577    /// Spawns and initializes all configured MCP servers, discovering their tools.
1578    pub async fn initialize_mcp(
1579        &mut self,
1580        tx: &mpsc::Sender<InferenceEvent>,
1581    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1582        let _ = self.refresh_mcp_tools(tx).await?;
1583        Ok(())
1584    }
1585
1586    /// Run one user turn through the full agentic loop.
1587    ///
1588    /// Adds the user message, calls the model, executes any tools, and loops
1589    /// until the model produces a final text reply.  All progress is streamed
1590    /// as `InferenceEvent` values via `tx`.
1591    pub async fn run_turn(
1592        &mut self,
1593        user_turn: &UserTurn,
1594        tx: mpsc::Sender<InferenceEvent>,
1595        yolo: bool,
1596    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1597        let user_input = user_turn.text.as_str();
1598        // ── Fast-path reset commands: handled locally, no network I/O needed ──
1599        if user_input.trim() == "/new" {
1600            self.history.clear();
1601            self.reasoning_history = None;
1602            self.session_memory.clear();
1603            self.running_summary = None;
1604            self.correction_hints.clear();
1605            self.pinned_files.lock().await.clear();
1606            self.reset_action_grounding().await;
1607            reset_task_files();
1608            let _ = std::fs::remove_file(session_path());
1609            self.save_empty_session();
1610            self.emit_compaction_pressure(&tx).await;
1611            self.emit_prompt_pressure_idle(&tx).await;
1612            for chunk in chunk_text(
1613                "Fresh task context started. Chat history, pins, and task files cleared. Saved memory remains available.",
1614                8,
1615            ) {
1616                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1617            }
1618            let _ = tx.send(InferenceEvent::Done).await;
1619            return Ok(());
1620        }
1621
1622        if user_input.trim() == "/forget" {
1623            self.history.clear();
1624            self.reasoning_history = None;
1625            self.session_memory.clear();
1626            self.running_summary = None;
1627            self.correction_hints.clear();
1628            self.pinned_files.lock().await.clear();
1629            self.reset_action_grounding().await;
1630            reset_task_files();
1631            purge_persistent_memory();
1632            tokio::task::block_in_place(|| self.vein.reset());
1633            let _ = std::fs::remove_file(session_path());
1634            self.save_empty_session();
1635            self.emit_compaction_pressure(&tx).await;
1636            self.emit_prompt_pressure_idle(&tx).await;
1637            for chunk in chunk_text(
1638                "Hard forget complete. Chat history, saved memory, task files, and the Vein index were purged.",
1639                8,
1640            ) {
1641                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1642            }
1643            let _ = tx.send(InferenceEvent::Done).await;
1644            return Ok(());
1645        }
1646
1647        if user_input.trim() == "/vein-inspect" {
1648            let indexed = self.refresh_vein_index();
1649            let report = self.build_vein_inspection_report(indexed);
1650            let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(1));
1651            let _ = tx
1652                .send(InferenceEvent::VeinStatus {
1653                    file_count: snapshot.indexed_source_files + snapshot.indexed_docs,
1654                    embedded_count: snapshot.embedded_source_doc_chunks,
1655                    docs_only: self.vein_docs_only_mode(),
1656                })
1657                .await;
1658            for chunk in chunk_text(&report, 8) {
1659                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1660            }
1661            let _ = tx.send(InferenceEvent::Done).await;
1662            return Ok(());
1663        }
1664
1665        if user_input.trim() == "/workspace-profile" {
1666            let root = crate::tools::file_ops::workspace_root();
1667            let _ = crate::agent::workspace_profile::ensure_workspace_profile(&root);
1668            let report = crate::agent::workspace_profile::profile_report(&root);
1669            for chunk in chunk_text(&report, 8) {
1670                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1671            }
1672            let _ = tx.send(InferenceEvent::Done).await;
1673            return Ok(());
1674        }
1675
1676        if user_input.trim() == "/rules" {
1677            let root = crate::tools::file_ops::workspace_root();
1678            let rules_path = root.join(".hematite").join("rules.md");
1679            let report = if rules_path.exists() {
1680                match std::fs::read_to_string(&rules_path) {
1681                    Ok(content) => format!(
1682                        "## Behavioral Rules (.hematite/rules.md)\n\n{}\n\n---\nTo update: ask Hematite to edit your rules, or open `.hematite/rules.md` directly. Changes take effect on the next turn.",
1683                        content.trim()
1684                    ),
1685                    Err(e) => format!("Error reading .hematite/rules.md: {e}"),
1686                }
1687            } else {
1688                format!(
1689                    "No behavioral rules file found at `.hematite/rules.md`.\n\nCreate it to add custom behavioral guidelines — they are injected into the system prompt on every turn and apply to any model you load.\n\nExample: ask Hematite to \"create a rules.md with simplicity-first and surgical-edit guidelines\" and it will write the file for you.\n\nExpected path: {}",
1690                    rules_path.display()
1691                )
1692            };
1693            for chunk in chunk_text(&report, 8) {
1694                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1695            }
1696            let _ = tx.send(InferenceEvent::Done).await;
1697            return Ok(());
1698        }
1699
1700        if user_input.trim() == "/vein-reset" {
1701            tokio::task::block_in_place(|| self.vein.reset());
1702            let _ = tx
1703                .send(InferenceEvent::VeinStatus {
1704                    file_count: 0,
1705                    embedded_count: 0,
1706                    docs_only: self.vein_docs_only_mode(),
1707                })
1708                .await;
1709            for chunk in chunk_text("Vein index cleared. Will rebuild on the next turn.", 8) {
1710                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1711            }
1712            let _ = tx.send(InferenceEvent::Done).await;
1713            return Ok(());
1714        }
1715
1716        // Reload config every turn (edits apply immediately, no restart needed).
1717        let config = crate::agent::config::load_config();
1718        self.recovery_context.clear();
1719        let manual_runtime_refresh = user_input.trim() == "/runtime-refresh";
1720        if !manual_runtime_refresh {
1721            if let Some((model_id, context_length, changed)) = self
1722                .refresh_runtime_profile_and_report(&tx, "turn_start")
1723                .await
1724            {
1725                if changed {
1726                    let _ = tx
1727                        .send(InferenceEvent::Thought(format!(
1728                            "Runtime refresh: using model `{}` with CTX {} for this turn.",
1729                            model_id, context_length
1730                        )))
1731                        .await;
1732                }
1733            }
1734        }
1735        self.emit_compaction_pressure(&tx).await;
1736        let current_model = self.engine.current_model();
1737        self.engine.set_gemma_native_formatting(
1738            crate::agent::config::effective_gemma_native_formatting(&config, &current_model),
1739        );
1740        let _turn_id = self.begin_grounded_turn().await;
1741        let _hook_runner = crate::agent::hooks::HookRunner::new(config.hooks.clone());
1742        let mcp_tools = match self.refresh_mcp_tools(&tx).await {
1743            Ok(tools) => tools,
1744            Err(e) => {
1745                let _ = tx
1746                    .send(InferenceEvent::Error(format!("MCP refresh failed: {}", e)))
1747                    .await;
1748                Vec::new()
1749            }
1750        };
1751
1752        // Apply config model overrides (config takes precedence over CLI flags).
1753        let effective_fast = config
1754            .fast_model
1755            .clone()
1756            .or_else(|| self.fast_model.clone());
1757        let effective_think = config
1758            .think_model
1759            .clone()
1760            .or_else(|| self.think_model.clone());
1761
1762        // ── /lsp: start language servers manually if needed ──────────────────
1763        if user_input.trim() == "/lsp" {
1764            let mut lsp = self.lsp_manager.lock().await;
1765            match lsp.start_servers().await {
1766                Ok(_) => {
1767                    let _ = tx
1768                        .send(InferenceEvent::MutedToken(
1769                            "LSP: Servers Initialized OK.".to_string(),
1770                        ))
1771                        .await;
1772                }
1773                Err(e) => {
1774                    let _ = tx
1775                        .send(InferenceEvent::Error(format!(
1776                            "LSP: Failed to start servers - {}",
1777                            e
1778                        )))
1779                        .await;
1780                }
1781            }
1782            let _ = tx.send(InferenceEvent::Done).await;
1783            return Ok(());
1784        }
1785
1786        if user_input.trim() == "/runtime-refresh" {
1787            match self
1788                .refresh_runtime_profile_and_report(&tx, "manual_command")
1789                .await
1790            {
1791                Some((model_id, context_length, changed)) => {
1792                    let msg = if changed {
1793                        format!(
1794                            "Runtime profile refreshed. Model: {} | CTX: {}",
1795                            model_id, context_length
1796                        )
1797                    } else {
1798                        format!(
1799                            "Runtime profile unchanged. Model: {} | CTX: {}",
1800                            model_id, context_length
1801                        )
1802                    };
1803                    for chunk in chunk_text(&msg, 8) {
1804                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
1805                    }
1806                }
1807                None => {
1808                    let _ = tx
1809                        .send(InferenceEvent::Error(
1810                            "Runtime refresh failed: LM Studio profile could not be read."
1811                                .to_string(),
1812                        ))
1813                        .await;
1814                }
1815            }
1816            let _ = tx.send(InferenceEvent::Done).await;
1817            return Ok(());
1818        }
1819
1820        if user_input.trim() == "/ask" {
1821            self.set_workflow_mode(WorkflowMode::Ask);
1822            for chunk in chunk_text(
1823                "Workflow mode: ASK. Stay read-only, explain, inspect, and answer without making changes.",
1824                8,
1825            ) {
1826                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1827            }
1828            let _ = tx.send(InferenceEvent::Done).await;
1829            return Ok(());
1830        }
1831
1832        if user_input.trim() == "/code" {
1833            self.set_workflow_mode(WorkflowMode::Code);
1834            let mut message =
1835                "Workflow mode: CODE. Make changes when needed, but keep proof-before-action and verification discipline.".to_string();
1836            if let Some(plan) = self.current_plan_summary() {
1837                message.push_str(&format!(" Current plan: {plan}."));
1838            }
1839            for chunk in chunk_text(&message, 8) {
1840                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1841            }
1842            let _ = tx.send(InferenceEvent::Done).await;
1843            return Ok(());
1844        }
1845
1846        if user_input.trim() == "/architect" {
1847            self.set_workflow_mode(WorkflowMode::Architect);
1848            let mut message =
1849                "Workflow mode: ARCHITECT. Plan, inspect, and shape the approach first. Do not mutate code unless the user explicitly asks to implement. When the handoff is ready, use `/implement-plan` or switch to `/code` to execute it.".to_string();
1850            if let Some(plan) = self.current_plan_summary() {
1851                message.push_str(&format!(" Existing plan: {plan}."));
1852            }
1853            for chunk in chunk_text(&message, 8) {
1854                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1855            }
1856            let _ = tx.send(InferenceEvent::Done).await;
1857            return Ok(());
1858        }
1859
1860        if user_input.trim() == "/read-only" {
1861            self.set_workflow_mode(WorkflowMode::ReadOnly);
1862            for chunk in chunk_text(
1863                "Workflow mode: READ-ONLY. Analysis only. Do not modify files, run mutating shell commands, or commit changes.",
1864                8,
1865            ) {
1866                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1867            }
1868            let _ = tx.send(InferenceEvent::Done).await;
1869            return Ok(());
1870        }
1871
1872        if user_input.trim() == "/auto" {
1873            self.set_workflow_mode(WorkflowMode::Auto);
1874            for chunk in chunk_text(
1875                "Workflow mode: AUTO. Hematite will choose the narrowest effective path for the request.",
1876                8,
1877            ) {
1878                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1879            }
1880            let _ = tx.send(InferenceEvent::Done).await;
1881            return Ok(());
1882        }
1883
1884        if user_input.trim() == "/chat" {
1885            self.set_workflow_mode(WorkflowMode::Chat);
1886            let _ = tx.send(InferenceEvent::Done).await;
1887            return Ok(());
1888        }
1889
1890        if user_input.trim() == "/teach" {
1891            self.set_workflow_mode(WorkflowMode::Teach);
1892            for chunk in chunk_text(
1893                "Workflow mode: TEACH. I will inspect your actual machine state first, then walk you through any admin, config, or write task as a grounded, numbered tutorial. I will not execute write operations — I will show you exactly how to do each step yourself.",
1894                8,
1895            ) {
1896                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1897            }
1898            let _ = tx.send(InferenceEvent::Done).await;
1899            return Ok(());
1900        }
1901
1902        if user_input.trim() == "/reroll" {
1903            let soul = crate::ui::hatch::generate_soul_random();
1904            self.snark = soul.snark;
1905            self.chaos = soul.chaos;
1906            self.soul_personality = soul.personality.clone();
1907            // Update the engine's species name so build_chat_system_prompt uses it
1908            // SAFETY: engine is Arc but species is a plain String field we own logically.
1909            // We use Arc::get_mut which only succeeds if this is the only strong ref.
1910            // If it fails (swarm workers hold refs), we fall back to a best-effort clone approach.
1911            let species = soul.species.clone();
1912            if let Some(eng) = Arc::get_mut(&mut self.engine) {
1913                eng.species = species.clone();
1914            }
1915            let shiny_tag = if soul.shiny { " 🌟 SHINY" } else { "" };
1916            let _ = tx
1917                .send(InferenceEvent::SoulReroll {
1918                    species: soul.species.clone(),
1919                    rarity: soul.rarity.label().to_string(),
1920                    shiny: soul.shiny,
1921                    personality: soul.personality.clone(),
1922                })
1923                .await;
1924            for chunk in chunk_text(
1925                &format!(
1926                    "A new companion awakens!\n[{}{}] {} — \"{}\"",
1927                    soul.rarity.label(),
1928                    shiny_tag,
1929                    soul.species,
1930                    soul.personality
1931                ),
1932                8,
1933            ) {
1934                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1935            }
1936            let _ = tx.send(InferenceEvent::Done).await;
1937            return Ok(());
1938        }
1939
1940        if user_input.trim() == "/agent" {
1941            self.set_workflow_mode(WorkflowMode::Auto);
1942            let _ = tx.send(InferenceEvent::Done).await;
1943            return Ok(());
1944        }
1945
1946        let implement_plan_alias = user_input.trim() == "/implement-plan";
1947        if implement_plan_alias
1948            && !self
1949                .session_memory
1950                .current_plan
1951                .as_ref()
1952                .map(|plan| plan.has_signal())
1953                .unwrap_or(false)
1954        {
1955            for chunk in chunk_text(
1956                "No saved architect handoff is active. Run `/architect` first, or switch to `/code` with an explicit implementation request.",
1957                8,
1958            ) {
1959                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1960            }
1961            let _ = tx.send(InferenceEvent::Done).await;
1962            return Ok(());
1963        }
1964
1965        let mut effective_user_input = if implement_plan_alias {
1966            self.set_workflow_mode(WorkflowMode::Code);
1967            implement_current_plan_prompt().to_string()
1968        } else {
1969            user_input.trim().to_string()
1970        };
1971        if let Some((mode, rest)) = parse_inline_workflow_prompt(user_input) {
1972            self.set_workflow_mode(mode);
1973            effective_user_input = rest.to_string();
1974        }
1975        let transcript_user_input = if implement_plan_alias {
1976            transcript_user_turn_text(user_turn, "/implement-plan")
1977        } else {
1978            transcript_user_turn_text(user_turn, &effective_user_input)
1979        };
1980        effective_user_input = apply_turn_attachments(user_turn, &effective_user_input);
1981        let implement_current_plan = self.workflow_mode == WorkflowMode::Code
1982            && is_current_plan_execution_request(&effective_user_input)
1983            && self
1984                .session_memory
1985                .current_plan
1986                .as_ref()
1987                .map(|plan| plan.has_signal())
1988                .unwrap_or(false);
1989        self.plan_execution_active
1990            .store(implement_current_plan, std::sync::atomic::Ordering::SeqCst);
1991        let _plan_execution_guard = PlanExecutionGuard {
1992            flag: self.plan_execution_active.clone(),
1993        };
1994        let intent = classify_query_intent(self.workflow_mode, &effective_user_input);
1995
1996        // ── /think / /no_think: reasoning budget toggle ──────────────────────
1997        if let Some(answer_kind) = intent.direct_answer {
1998            match answer_kind {
1999                DirectAnswerKind::About => {
2000                    let response = build_about_answer();
2001                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2002                        .await;
2003                    return Ok(());
2004                }
2005                DirectAnswerKind::LanguageCapability => {
2006                    let response = build_language_capability_answer();
2007                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2008                        .await;
2009                    return Ok(());
2010                }
2011                DirectAnswerKind::UnsafeWorkflowPressure => {
2012                    let response = build_unsafe_workflow_pressure_answer();
2013                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2014                        .await;
2015                    return Ok(());
2016                }
2017                DirectAnswerKind::SessionMemory => {
2018                    let response = build_session_memory_answer();
2019                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2020                        .await;
2021                    return Ok(());
2022                }
2023                DirectAnswerKind::RecoveryRecipes => {
2024                    let response = build_recovery_recipes_answer();
2025                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2026                        .await;
2027                    return Ok(());
2028                }
2029                DirectAnswerKind::McpLifecycle => {
2030                    let response = build_mcp_lifecycle_answer();
2031                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2032                        .await;
2033                    return Ok(());
2034                }
2035                DirectAnswerKind::AuthorizationPolicy => {
2036                    let response = build_authorization_policy_answer();
2037                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2038                        .await;
2039                    return Ok(());
2040                }
2041                DirectAnswerKind::ToolClasses => {
2042                    let response = build_tool_classes_answer();
2043                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2044                        .await;
2045                    return Ok(());
2046                }
2047                DirectAnswerKind::ToolRegistryOwnership => {
2048                    let response = build_tool_registry_ownership_answer();
2049                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2050                        .await;
2051                    return Ok(());
2052                }
2053                DirectAnswerKind::SessionResetSemantics => {
2054                    let response = build_session_reset_semantics_answer();
2055                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2056                        .await;
2057                    return Ok(());
2058                }
2059                DirectAnswerKind::ProductSurface => {
2060                    let response = build_product_surface_answer();
2061                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2062                        .await;
2063                    return Ok(());
2064                }
2065                DirectAnswerKind::ReasoningSplit => {
2066                    let response = build_reasoning_split_answer();
2067                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2068                        .await;
2069                    return Ok(());
2070                }
2071                DirectAnswerKind::Identity => {
2072                    let response = build_identity_answer();
2073                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2074                        .await;
2075                    return Ok(());
2076                }
2077                DirectAnswerKind::WorkflowModes => {
2078                    let response = build_workflow_modes_answer();
2079                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2080                        .await;
2081                    return Ok(());
2082                }
2083                DirectAnswerKind::GemmaNative => {
2084                    let response = build_gemma_native_answer();
2085                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2086                        .await;
2087                    return Ok(());
2088                }
2089                DirectAnswerKind::GemmaNativeSettings => {
2090                    let response = build_gemma_native_settings_answer();
2091                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2092                        .await;
2093                    return Ok(());
2094                }
2095                DirectAnswerKind::VerifyProfiles => {
2096                    let response = build_verify_profiles_answer();
2097                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2098                        .await;
2099                    return Ok(());
2100                }
2101                DirectAnswerKind::Toolchain => {
2102                    let lower = effective_user_input.to_lowercase();
2103                    let topic = if (lower.contains("voice output") || lower.contains("voice"))
2104                        && (lower.contains("lag")
2105                            || lower.contains("behind visible text")
2106                            || lower.contains("latency"))
2107                    {
2108                        "voice_latency_plan"
2109                    } else {
2110                        "all"
2111                    };
2112                    let response =
2113                        crate::tools::toolchain::describe_toolchain(&serde_json::json!({
2114                            "topic": topic,
2115                            "question": effective_user_input,
2116                        }))
2117                        .await
2118                        .unwrap_or_else(|e| format!("Error: {}", e));
2119                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2120                        .await;
2121                    return Ok(());
2122                }
2123                DirectAnswerKind::ArchitectSessionResetPlan => {
2124                    let plan = build_architect_session_reset_plan();
2125                    let response = plan.to_markdown();
2126                    let _ = crate::tools::plan::save_plan_handoff(&plan);
2127                    self.session_memory.current_plan = Some(plan);
2128                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2129                        .await;
2130                    return Ok(());
2131                }
2132            }
2133        }
2134
2135        if matches!(
2136            self.workflow_mode,
2137            WorkflowMode::Ask | WorkflowMode::ReadOnly
2138        ) && looks_like_mutation_request(&effective_user_input)
2139        {
2140            let response = build_mode_redirect_answer(self.workflow_mode);
2141            self.history.push(ChatMessage::user(&effective_user_input));
2142            self.history.push(ChatMessage::assistant_text(&response));
2143            self.transcript.log_user(&transcript_user_input);
2144            self.transcript.log_agent(&response);
2145            for chunk in chunk_text(&response, 8) {
2146                if !chunk.is_empty() {
2147                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
2148                }
2149            }
2150            let _ = tx.send(InferenceEvent::Done).await;
2151            self.trim_history(80);
2152            self.refresh_session_memory();
2153            self.save_session();
2154            return Ok(());
2155        }
2156
2157        if user_input.trim() == "/think" {
2158            self.think_mode = Some(true);
2159            for chunk in chunk_text("Think mode: ON — full chain-of-thought enabled.", 8) {
2160                let _ = tx.send(InferenceEvent::Token(chunk)).await;
2161            }
2162            let _ = tx.send(InferenceEvent::Done).await;
2163            return Ok(());
2164        }
2165        if user_input.trim() == "/no_think" {
2166            self.think_mode = Some(false);
2167            for chunk in chunk_text(
2168                "Think mode: OFF — fast mode enabled (no chain-of-thought).",
2169                8,
2170            ) {
2171                let _ = tx.send(InferenceEvent::Token(chunk)).await;
2172            }
2173            let _ = tx.send(InferenceEvent::Done).await;
2174            return Ok(());
2175        }
2176
2177        // ── /pin: add file to active context ────────────────────────────────
2178        if user_input.trim_start().starts_with("/pin ") {
2179            let path = user_input.trim_start()[5..].trim();
2180            match std::fs::read_to_string(path) {
2181                Ok(content) => {
2182                    self.pinned_files
2183                        .lock()
2184                        .await
2185                        .insert(path.to_string(), content);
2186                    let msg = format!(
2187                        "Pinned: {} — this file is now locked in model context.",
2188                        path
2189                    );
2190                    for chunk in chunk_text(&msg, 8) {
2191                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
2192                    }
2193                }
2194                Err(e) => {
2195                    let _ = tx
2196                        .send(InferenceEvent::Error(format!(
2197                            "Failed to pin {}: {}",
2198                            path, e
2199                        )))
2200                        .await;
2201                }
2202            }
2203            let _ = tx.send(InferenceEvent::Done).await;
2204            return Ok(());
2205        }
2206
2207        // ── /unpin: remove file from active context ──────────────────────────
2208        if user_input.trim_start().starts_with("/unpin ") {
2209            let path = user_input.trim_start()[7..].trim();
2210            if self.pinned_files.lock().await.remove(path).is_some() {
2211                let msg = format!("Unpinned: {} — file removed from active context.", path);
2212                for chunk in chunk_text(&msg, 8) {
2213                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
2214                }
2215            } else {
2216                let _ = tx
2217                    .send(InferenceEvent::Error(format!(
2218                        "File {} was not pinned.",
2219                        path
2220                    )))
2221                    .await;
2222            }
2223            let _ = tx.send(InferenceEvent::Done).await;
2224            return Ok(());
2225        }
2226
2227        // ── Normal processing ───────────────────────────────────────────────
2228
2229        // Ensure MCP is initialized and tools are discovered for this turn.
2230        let tiny_context_mode = self.engine.current_context_length() <= 8_192;
2231        let mut base_prompt = self.engine.build_system_prompt(
2232            self.snark,
2233            self.chaos,
2234            self.brief,
2235            self.professional,
2236            &self.tools,
2237            self.reasoning_history.as_deref(),
2238            &mcp_tools,
2239        );
2240        if !tiny_context_mode {
2241            if let Some(hint) = &config.context_hint {
2242                if !hint.trim().is_empty() {
2243                    base_prompt.push_str(&format!(
2244                        "\n\n# Project Context (from .hematite/settings.json)\n{}",
2245                        hint
2246                    ));
2247                }
2248            }
2249            if let Some(profile_block) = crate::agent::workspace_profile::profile_prompt_block(
2250                &crate::tools::file_ops::workspace_root(),
2251            ) {
2252                base_prompt.push_str(&format!("\n\n{}", profile_block));
2253            }
2254            // L1: inject hot-files block if available (persists across sessions via vein.db).
2255            if let Some(ref l1) = self.l1_context {
2256                base_prompt.push_str(&format!("\n\n{}", l1));
2257            }
2258            if let Some(ref repo_map_block) = self.repo_map {
2259                base_prompt.push_str(&format!("\n\n{}", repo_map_block));
2260            }
2261        }
2262        let grounded_trace_mode = intent.grounded_trace_mode
2263            || intent.primary_class == QueryIntentClass::RuntimeDiagnosis;
2264        let capability_mode =
2265            intent.capability_mode || intent.primary_class == QueryIntentClass::Capability;
2266        let toolchain_mode =
2267            intent.toolchain_mode || intent.primary_class == QueryIntentClass::Toolchain;
2268        let host_inspection_mode = intent.host_inspection_mode;
2269        let maintainer_workflow_mode = intent.maintainer_workflow_mode
2270            || preferred_maintainer_workflow(&effective_user_input).is_some();
2271        let workspace_workflow_mode = intent.workspace_workflow_mode
2272            || preferred_workspace_workflow(&effective_user_input).is_some();
2273        let fix_plan_mode =
2274            preferred_host_inspection_topic(&effective_user_input) == Some("fix_plan");
2275        let architecture_overview_mode = intent.architecture_overview_mode;
2276        let capability_needs_repo = intent.capability_needs_repo;
2277        let mut system_msg = build_system_with_corrections(
2278            &base_prompt,
2279            &self.correction_hints,
2280            &self.gpu_state,
2281            &self.git_state,
2282            &config,
2283        );
2284        if tiny_context_mode {
2285            system_msg.push_str(
2286                "\n\n# TINY CONTEXT TURN MODE\n\
2287                 Keep this turn compact. Prefer direct answers or one narrow tool step over broad exploration.\n",
2288            );
2289        }
2290        if !tiny_context_mode && grounded_trace_mode {
2291            system_msg.push_str(
2292                "\n\n# GROUNDED TRACE MODE\n\
2293                 This turn is read-only architecture analysis unless the user explicitly asks otherwise.\n\
2294                 Before answering trace, architecture, or control-flow questions, inspect the repo with real tools.\n\
2295                 Use verified file paths, function names, structs, enums, channels, and event types only.\n\
2296                 Prefer `trace_runtime_flow` for runtime wiring, session reset, startup, or reasoning/specular questions.\n\
2297                 Treat `trace_runtime_flow` output as authoritative over your own memory.\n\
2298                 If `trace_runtime_flow` fully answers the question, preserve its identifiers exactly and do not rename them in a styled rewrite.\n\
2299                 Do not invent names such as synthetic channels or subsystems.\n\
2300                 If a detail is not verified from the code or tool output, say `uncertain`.\n\
2301                For exact flow questions, answer in ordered steps and name the concrete functions and event types involved.\n"
2302            );
2303        }
2304        if !tiny_context_mode && capability_mode {
2305            system_msg.push_str(
2306                "\n\n# CAPABILITY QUESTION MODE\n\
2307                 This is a product or capability question unless the user explicitly asks about repository implementation.\n\
2308                 Answer from stable Hematite capabilities and current runtime state.\n\
2309                 It is correct to mention that Hematite itself is built in Rust when relevant, but do not imply that its project support is limited to Rust.\n\
2310                 Do NOT call repo-inspection tools like `read_file` or LSP lookup tools unless the user explicitly asks about implementation or file ownership.\n\
2311                 Do NOT infer language or project support from unrelated dependencies, crates, or config files.\n\
2312                 Describe language and project support in terms of real mechanisms: reading files, editing code, searching the workspace, running shell commands, build verification, language-aware tooling when available, web research, vision analysis, and optional MCP tools if configured.\n\
2313                 If the user asks about languages, answer at the harness level: Hematite can help across many project languages even though Hematite itself is written in Rust.\n\
2314                 Prefer real programming language examples like Python, JavaScript, TypeScript, Go, C#, or similar over file extensions like `.json` or `.md`.\n\
2315                 For project-building questions, describe cross-project workflows like scaffolding files, shaping structure, implementing features, and running the appropriate local build or test commands for the target stack. Do not overclaim certainty.\n\
2316                 Never mention raw `mcp__*` tool names unless those tools are active this turn and directly relevant.\n\
2317                 Keep the answer short, plain, and ASCII-first.\n"
2318            );
2319        }
2320        if !tiny_context_mode && toolchain_mode {
2321            system_msg.push_str(
2322                "\n\n# TOOLCHAIN DISCIPLINE MODE\n\
2323                 This turn is about Hematite's real built-in tools and how to choose them.\n\
2324                 Prefer `describe_toolchain` before you try to summarize tool capabilities or propose a read-only investigation plan from memory.\n\
2325                 Use only real built-in tool names.\n\
2326                 Do not invent helper tools, MCP tool names, synthetic symbols, or example function names.\n\
2327                 If `describe_toolchain` fully answers the question, preserve its output exactly instead of restyling it.\n\
2328                 Be explicit about which tools are optional or conditional.\n"
2329            );
2330        }
2331        if !tiny_context_mode && host_inspection_mode {
2332            system_msg.push_str(
2333                 "\n\n# HOST INSPECTION MODE\n\
2334                 This turn is about the local machine. Make EXACTLY ONE `inspect_host` call using the best matching topic below, then answer. Do NOT call `summary` first. Do NOT make exploratory shell calls.\n\
2335                 - Drive space / disk usage / free space / storage across drives → `storage`\n\
2336                 - CPU model / RAM size / GPU name / hardware specs / BIOS / motherboard → `hardware`\n\
2337                 - CPU % / RAM % / what is using resources / slow machine → `resource_load`\n\
2338                 - Running processes / task manager / what is using RAM → `processes`\n\
2339                 - Windows services / daemons / service state → `services`\n\
2340                 - Listening ports / open ports / what process owns port N / which processes are listening / what is bound to a port → `ports` (waiting for inbound connections — includes PIDs and process names — do NOT also call `processes`)\n\
2341                 - Active connections / established connections / what is connected right now / outbound sessions / show me connections / network connections → `connections` (live two-way sessions, NOT listening ports)\n\
2342                 - Network adapters / IP / gateway / DNS overview → `network`\n\
2343                 - Internet / online / can I reach the internet → `connectivity`\n\
2344                 - Wi-Fi / wireless / signal strength / SSID → `wifi`\n\
2345                 - VPN tunnel / VPN adapter → `vpn`\n\
2346                 - Security / Defender / antivirus / firewall / UAC → `security`\n\
2347                 - Windows Update / pending updates → `updates`\n\
2348                 - Health report / system status overall → `health_report`\n\
2349                 - PATH entries / raw PATH → `path`\n\
2350                 - Installed developer tools / versions / toolchain → `toolchains`\n\
2351                 - Environment/package-manager conflicts → `env_doctor`\n\
2352                 - Fix a workstation problem (cargo not found, port in use, LM Studio) → `fix_plan`\n\
2353                 - Recent Windows errors / warnings / event log / event viewer / show me errors / what failed recently → `log_check` (do NOT call health_report first)\n\
2354                 - Repo / git / workspace health → `repo_doctor`\n\
2355                 - List a specific directory → `directory` (pass `path` arg)\n\
2356                 - Desktop or Downloads folder → `desktop` or `downloads`\n\
2357                 NEVER use `disk` or `directory` for storage/space questions — use `storage`.\n\
2358                 Only use `shell` if the question truly cannot be answered by any topic above.\n\
2359                 NEVER tell the user to run PowerShell, cmd, or shell commands themselves. If the data is incomplete, say so and tell them to ask a more specific question instead.\n\
2360                 NEVER expose internal tool names or API syntax (like `inspect_host(topic=...)`) in your response. Refer to capabilities in plain English: say 'ask me for a fix plan' not 'run inspect_host(topic=fix_plan)'.\n"
2361              );
2362        }
2363        if !tiny_context_mode && fix_plan_mode {
2364            system_msg.push_str(
2365                "\n\n# FIX PLAN MODE\n\
2366                 This turn is a workstation remediation question, not just a diagnosis question.\n\
2367                 Call `inspect_host` with `topic=fix_plan` first.\n\
2368                 Do not start with `path`, `toolchains`, `env_doctor`, or `ports` unless the user explicitly asks for diagnosis details instead of a fix plan.\n\
2369                 Keep the answer grounded, stepwise, and approval-aware.\n"
2370            );
2371        }
2372        if !tiny_context_mode && maintainer_workflow_mode {
2373            system_msg.push_str(
2374                "\n\n# HEMATITE MAINTAINER WORKFLOW MODE\n\
2375                 This turn asks Hematite to run one of Hematite's own maintainer workflows, not invent an ad hoc shell command.\n\
2376                 Prefer `run_hematite_maintainer_workflow` for existing Hematite workflows such as `clean.ps1`, `scripts/package-windows.ps1`, or `release.ps1`.\n\
2377                 Use workflow `clean` for cleanup, workflow `package_windows` for rebuilding the local portable or installer, and workflow `release` for the normal version bump/tag/push/publish flow.\n\
2378                 Do not treat this as a generic current-workspace script runner. Only fall back to raw `shell` if the user asks for a script or command outside those Hematite maintainer workflows.\n"
2379            );
2380        }
2381        if !tiny_context_mode && workspace_workflow_mode {
2382            system_msg.push_str(
2383                "\n\n# WORKSPACE WORKFLOW MODE\n\
2384                 This turn asks Hematite to run something in the active project workspace, not in Hematite's own source tree.\n\
2385                 Prefer `run_workspace_workflow` for the current project's build, test, lint, fix, package scripts, just/task/make targets, local repo scripts, or an exact workspace command.\n\
2386                 This tool always runs from the locked workspace root.\n\
2387                 If no real project workspace is locked, say so and tell the user to relaunch Hematite in the target project directory.\n\
2388                 Do not use `run_hematite_maintainer_workflow` unless the request is specifically about Hematite's own cleanup, packaging, or release scripts.\n"
2389            );
2390        }
2391
2392        if !tiny_context_mode && architecture_overview_mode {
2393            system_msg.push_str(
2394                "\n\n# ARCHITECTURE OVERVIEW DISCIPLINE MODE\n\
2395                 For broad runtime or architecture walkthroughs, prefer authoritative tools first: `trace_runtime_flow` for control flow.\n\
2396                 Do not call `auto_pin_context` or `list_pinned` in read-only analysis. Avoid broad `read_file` calls unless the user explicitly asks for implementation detail in one named file.\n\
2397                 Preserve grounded tool output rather than restyling it into a larger answer.\n"
2398            );
2399        }
2400
2401        // ── Inject Pinned Files (Context Locking) ───────────────────────────
2402        system_msg.push_str(&format!(
2403            "\n\n# WORKFLOW MODE\nCURRENT WORKFLOW: {}\n",
2404            self.workflow_mode.label()
2405        ));
2406        if tiny_context_mode {
2407            system_msg
2408                .push_str("Use the narrowest safe behavior for this mode. Keep the turn short.\n");
2409        } else {
2410            match self.workflow_mode {
2411                WorkflowMode::Auto => system_msg.push_str(
2412                    "AUTO means choose the narrowest effective path for the request. Answer directly when stable product logic exists. Inspect before editing. Mutate only when the user is clearly asking for implementation.\n",
2413                ),
2414                WorkflowMode::Ask => system_msg.push_str(
2415                    "ASK means analysis only. Stay read-only, inspect the repo, explain findings, and do not make changes unless the user explicitly switches modes.\n",
2416                ),
2417                WorkflowMode::Code => system_msg.push_str(
2418                    "CODE means implementation is allowed when needed. Keep proof-before-action, verification, and edit precision discipline. If an active plan handoff exists in session memory or `.hematite/PLAN.md`, treat it as the implementation brief unless the user explicitly overrides it. For ordinary workspace inspection during implementation, use built-in read/edit tools first and do not reach for `mcp__filesystem__*` unless the user explicitly requires MCP.\n",
2419                ),
2420                WorkflowMode::Architect => system_msg.push_str(
2421                    "ARCHITECT means plan first. Inspect, reason, and produce a concrete implementation approach before editing. Do not mutate code unless the user explicitly asks to implement. When you produce an implementation handoff, use these exact ASCII headings so Hematite can persist the plan: `# Goal`, `# Target Files`, `# Ordered Steps`, `# Verification`, `# Risks`, `# Open Questions`.\n",
2422                ),
2423                WorkflowMode::ReadOnly => system_msg.push_str(
2424                    "READ-ONLY means analysis only. Do not modify files, run mutating shell commands, or commit changes.\n",
2425                ),
2426                WorkflowMode::Teach => system_msg.push_str(
2427                    "TEACH means you are a senior technician giving the user a grounded, numbered walkthrough. \
2428                     MANDATORY PROTOCOL for every admin/config/write task:\n\
2429                     1. Call inspect_host with the most relevant topic(s) FIRST to observe the actual machine state.\n\
2430                     2. Then deliver a numbered step-by-step tutorial that references what you actually observed — exact commands, exact paths, exact values.\n\
2431                     3. End with a verification step the user can run to confirm success.\n\
2432                     4. Do NOT execute write operations yourself. You are the teacher; the user performs the steps.\n\
2433                     5. Treat the user as capable — give precise instructions, not hedged warnings.\n\
2434                     Relevant inspect_host topics for common tasks: hardware (driver installs), security (firewall), ssh (SSH keys), wsl (WSL setup), env (PATH/env vars), services (service config), recent_crashes (troubleshooting), disk_health (storage issues).\n",
2435                ),
2436                WorkflowMode::Chat => {} // replaced by build_chat_system_prompt below
2437            }
2438        }
2439        if !tiny_context_mode && self.workflow_mode == WorkflowMode::Architect {
2440            system_msg.push_str("\n\n# ARCHITECT HANDOFF CONTRACT\n");
2441            system_msg.push_str(architect_handoff_contract());
2442            system_msg.push('\n');
2443        }
2444        if !tiny_context_mode && implement_current_plan {
2445            system_msg.push_str(
2446                "\n\n# CURRENT PLAN EXECUTION CONTRACT\n\
2447                 The user explicitly asked you to implement the current saved plan.\n\
2448                 Do not restate the plan, do not provide preliminary contracts, and do not stop at analysis.\n\
2449                 Use the saved plan as the brief, gather only the minimum built-in file evidence you need, then start editing the target files.\n\
2450                 Every file inspection or edit call must be path-scoped to one of the saved target files.\n\
2451                 If a built-in workspace read tool gives you enough context, your next step should be mutation or a concrete blocking question, not another summary.\n",
2452            );
2453            if let Some(plan) = self.session_memory.current_plan.as_ref() {
2454                if !plan.target_files.is_empty() {
2455                    system_msg.push_str("\n# CURRENT PLAN TARGET FILES\n");
2456                    for path in &plan.target_files {
2457                        system_msg.push_str(&format!("- {}\n", path));
2458                    }
2459                }
2460            }
2461        }
2462        if !tiny_context_mode {
2463            let pinned = self.pinned_files.lock().await;
2464            if !pinned.is_empty() {
2465                system_msg.push_str("\n\n# ACTIVE CONTEXT (PINNED FILES)\n");
2466                system_msg.push_str("The following files are locked in your active memory for prioritized reference.\n\n");
2467                for (path, content) in pinned.iter() {
2468                    system_msg.push_str(&format!("## FILE: {}\n```\n{}\n```\n\n", path, content));
2469                }
2470            }
2471        }
2472        if !tiny_context_mode {
2473            self.append_session_handoff(&mut system_msg);
2474        }
2475        // In chat mode, replace the full harness prompt with a clean conversational surface.
2476        // The harness prompt (built above) is discarded — Rusty personality takes over.
2477        let system_msg = if self.workflow_mode.is_chat() {
2478            self.build_chat_system_prompt()
2479        } else {
2480            system_msg
2481        };
2482        if self.history.is_empty() || self.history[0].role != "system" {
2483            self.history.insert(0, ChatMessage::system(&system_msg));
2484        } else {
2485            self.history[0] = ChatMessage::system(&system_msg);
2486        }
2487
2488        // Ensure a clean state for the new turn.
2489        self.cancel_token
2490            .store(false, std::sync::atomic::Ordering::SeqCst);
2491
2492        // [Official Gemma-4 Spec] Purge reasoning history for new user turns.
2493        // History from previous turns must not be fed back into the prompt to prevent duplication.
2494        self.reasoning_history = None;
2495
2496        let is_gemma = crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2497        let user_content = match self.think_mode {
2498            Some(true) => format!("/think\n{}", effective_user_input),
2499            Some(false) => format!("/no_think\n{}", effective_user_input),
2500            // For non-Gemma models (Qwen etc.) default to /think so the model uses
2501            // hybrid thinking — it decides how much reasoning each turn needs.
2502            // Gemma handles reasoning via <|think|> in the system prompt instead.
2503            // Chat mode and quick tool calls skip /think — fast direct answers.
2504            None if !is_gemma
2505                && !self.workflow_mode.is_chat()
2506                && !is_quick_tool_request(&effective_user_input) =>
2507            {
2508                format!("/think\n{}", effective_user_input)
2509            }
2510            None => effective_user_input.clone(),
2511        };
2512        if let Some(image) = user_turn.attached_image.as_ref() {
2513            let image_url =
2514                crate::tools::vision::encode_image_as_data_url(std::path::Path::new(&image.path))
2515                    .map_err(|e| format!("Image attachment failed for {}: {}", image.name, e))?;
2516            self.history
2517                .push(ChatMessage::user_with_image(&user_content, &image_url));
2518        } else {
2519            self.history.push(ChatMessage::user(&user_content));
2520        }
2521        self.transcript.log_user(&transcript_user_input);
2522
2523        // Incremental re-index and Vein context injection. Ordinary chat mode
2524        // still skips repo-snippet noise, but docs-only workspaces and explicit
2525        // session-recall prompts should keep Vein memory available.
2526        let vein_docs_only = self.vein_docs_only_mode();
2527        let allow_vein_context = !self.workflow_mode.is_chat()
2528            || should_use_vein_in_chat(&effective_user_input, vein_docs_only);
2529        let (vein_context, vein_paths) = if allow_vein_context {
2530            self.refresh_vein_index();
2531            let _ = tx
2532                .send(InferenceEvent::VeinStatus {
2533                    file_count: self.vein.file_count(),
2534                    embedded_count: self.vein.embedded_chunk_count(),
2535                    docs_only: vein_docs_only,
2536                })
2537                .await;
2538            match self.build_vein_context(&effective_user_input) {
2539                Some((ctx, paths)) => (Some(ctx), paths),
2540                None => (None, Vec::new()),
2541            }
2542        } else {
2543            (None, Vec::new())
2544        };
2545        if !vein_paths.is_empty() {
2546            let _ = tx
2547                .send(InferenceEvent::VeinContext { paths: vein_paths })
2548                .await;
2549        }
2550
2551        // Route: pick fast vs think model based on the complexity of this request.
2552        let routed_model = route_model(
2553            &effective_user_input,
2554            effective_fast.as_deref(),
2555            effective_think.as_deref(),
2556        )
2557        .map(|s| s.to_string());
2558
2559        let mut loop_intervention: Option<String> = None;
2560
2561        // ── Harness pre-run: multi-topic host inspection ─────────────────────
2562        // When the user asks for 2+ distinct inspect_host topics in one message,
2563        // run them all here and inject the combined results as a loop_intervention
2564        // so the model receives data instead of having to orchestrate tool calls.
2565        // This prevents the model from collapsing multiple topics into a generic
2566        // one, burning the tool loop budget, or retrying via shell.
2567        {
2568            let topics = all_host_inspection_topics(&effective_user_input);
2569            if topics.len() >= 2 {
2570                let _ = tx
2571                    .send(InferenceEvent::Thought(format!(
2572                        "Harness pre-run: {} host inspection topics detected — running all before model turn.",
2573                        topics.len()
2574                    )))
2575                    .await;
2576
2577                let topic_list = topics.join(", ");
2578                let mut combined = format!(
2579                    "## HARNESS PRE-RUN RESULTS\n\
2580                     The harness already ran inspect_host for the following topics: {topic_list}.\n\
2581                     Use the tool results in context to answer. Do NOT repeat these tool calls.\n\n"
2582                );
2583
2584                let mut tool_calls = Vec::new();
2585                let mut tool_msgs = Vec::new();
2586
2587                for topic in &topics {
2588                    let call_id = format!("prerun_{topic}");
2589                    let args_val = serde_json::json!({ "topic": *topic, "max_entries": 20 });
2590                    let args_str = serde_json::to_string(&args_val).unwrap_or_default();
2591
2592                    tool_calls.push(crate::agent::inference::ToolCallResponse {
2593                        id: call_id.clone(),
2594                        call_type: "function".to_string(),
2595                        function: crate::agent::inference::ToolCallFn {
2596                            name: "inspect_host".to_string(),
2597                            arguments: args_str,
2598                        },
2599                    });
2600
2601                    let label = format!("### inspect_host(topic=\"{topic}\")\n");
2602                    let _ = tx
2603                        .send(InferenceEvent::ToolCallStart {
2604                            id: call_id.clone(),
2605                            name: "inspect_host".to_string(),
2606                            args: format!("inspect host {topic}"),
2607                        })
2608                        .await;
2609
2610                    match crate::tools::host_inspect::inspect_host(&args_val).await {
2611                        Ok(out) => {
2612                            let _ = tx
2613                                .send(InferenceEvent::ToolCallResult {
2614                                    id: call_id.clone(),
2615                                    name: "inspect_host".to_string(),
2616                                    output: out.chars().take(300).collect::<String>() + "...",
2617                                    is_error: false,
2618                                })
2619                                .await;
2620                            combined.push_str(&label);
2621                            combined.push_str(&out);
2622                            combined.push_str("\n\n");
2623                            tool_msgs.push(ChatMessage::tool_result_for_model(
2624                                &call_id,
2625                                "inspect_host",
2626                                &out,
2627                                &self.engine.current_model(),
2628                            ));
2629                        }
2630                        Err(e) => {
2631                            let err_msg = format!("Error: {e}");
2632                            combined.push_str(&label);
2633                            combined.push_str(&err_msg);
2634                            combined.push_str("\n\n");
2635                            tool_msgs.push(ChatMessage::tool_result_for_model(
2636                                &call_id,
2637                                "inspect_host",
2638                                &err_msg,
2639                                &self.engine.current_model(),
2640                            ));
2641                        }
2642                    }
2643                }
2644
2645                // Add the simulated turn to history so the model sees it as context.
2646                self.history
2647                    .push(ChatMessage::assistant_tool_calls("", tool_calls));
2648                for msg in tool_msgs {
2649                    self.history.push(msg);
2650                }
2651
2652                loop_intervention = Some(combined);
2653            }
2654        }
2655
2656        // ── Computation Integrity: nudge model toward run_code for precise math ──
2657        // When the query involves exact numeric computation (hashes, financial math,
2658        // statistics, date arithmetic, unit conversions, algorithmic checks), inject
2659        // a brief pre-turn reminder so the model reaches for run_code instead of
2660        // answering from training-data memory. Only fires when no harness pre-run
2661        // already set a loop_intervention.
2662        if loop_intervention.is_none() && needs_computation_sandbox(&effective_user_input) {
2663            loop_intervention = Some(
2664                "COMPUTATION INTEGRITY NOTICE: This query involves precise numeric computation. \
2665                 Do NOT answer from training-data memory — memory answers for math are guesses. \
2666                 Use `run_code` to compute the real result and return the actual output. \
2667                 IMPORTANT: the `run_code` tool defaults to JavaScript (Deno). \
2668                 If you write Python code, you MUST pass `language: \"python\"` explicitly. \
2669                 If you write JavaScript/TypeScript, omit the language field or pass `language: \"javascript\"`. \
2670                 Write the code, run it, return the result."
2671                    .to_string(),
2672            );
2673        }
2674
2675        let mut implementation_started = false;
2676        let mut non_mutating_plan_steps = 0usize;
2677        let non_mutating_plan_soft_cap = 5usize;
2678        let non_mutating_plan_hard_cap = 8usize;
2679        let mut overview_runtime_trace: Option<String> = None;
2680
2681        // Safety cap – never spin forever on a broken model.
2682        let max_iters = 25;
2683        let mut consecutive_errors = 0;
2684        let mut empty_cleaned_nudges = 0u8;
2685        let mut first_iter = true;
2686        let _called_this_turn: std::collections::HashSet<String> = std::collections::HashSet::new();
2687        // Track identical tool results within this turn to detect logical loops.
2688        let _result_counts: std::collections::HashMap<String, usize> =
2689            std::collections::HashMap::new();
2690        // Track the count of identical (name, args) calls to detect infinite tool loops.
2691        let mut repeat_counts: std::collections::HashMap<String, usize> =
2692            std::collections::HashMap::new();
2693        let mut completed_tool_cache: std::collections::HashMap<String, CachedToolResult> =
2694            std::collections::HashMap::new();
2695        let mut successful_read_targets: std::collections::HashSet<String> =
2696            std::collections::HashSet::new();
2697        // (path, offset) pairs — catches repeated reads at the same non-zero offset.
2698        let mut successful_read_regions: std::collections::HashSet<(String, u64)> =
2699            std::collections::HashSet::new();
2700        let mut successful_grep_targets: std::collections::HashSet<String> =
2701            std::collections::HashSet::new();
2702        let mut no_match_grep_targets: std::collections::HashSet<String> =
2703            std::collections::HashSet::new();
2704        let mut broad_grep_targets: std::collections::HashSet<String> =
2705            std::collections::HashSet::new();
2706
2707        // Track the index of the message that started THIS turn, so compaction doesn't summarize it.
2708        let mut turn_anchor = self.history.len().saturating_sub(1);
2709
2710        for _iter in 0..max_iters {
2711            let mut mutation_occurred = false;
2712            // Priority Check: External Cancellation (via Esc key in TUI)
2713            if self.cancel_token.load(std::sync::atomic::Ordering::SeqCst) {
2714                self.cancel_token
2715                    .store(false, std::sync::atomic::Ordering::SeqCst);
2716                let _ = tx
2717                    .send(InferenceEvent::Thought("Turn cancelled by user.".into()))
2718                    .await;
2719                let _ = tx.send(InferenceEvent::Done).await;
2720                return Ok(());
2721            }
2722
2723            // ── Intelligence Surge: Proactive Compaction Check ──────────────────────
2724            if self
2725                .compact_history_if_needed(&tx, Some(turn_anchor))
2726                .await?
2727            {
2728                // After compaction, history is [system, summary, turn_anchor, ...]
2729                // The new turn_anchor is index 2.
2730                turn_anchor = 2;
2731            }
2732
2733            // On the first iteration inject Vein context into the system message.
2734            // Subsequent iterations use the plain slice — tool results are now in
2735            // history so Vein context would be redundant.
2736            let inject_vein = first_iter && !implement_current_plan;
2737            let messages = if implement_current_plan {
2738                first_iter = false;
2739                self.context_window_slice_from(turn_anchor)
2740            } else {
2741                first_iter = false;
2742                self.context_window_slice()
2743            };
2744
2745            // Use the canonical system prompt from history[0] which was built
2746            // by InferenceEngine::build_system_prompt() + build_system_with_corrections()
2747            // and includes GPU state, git context, permissions, and instruction files.
2748            let mut prompt_msgs = if let Some(intervention) = loop_intervention.take() {
2749                // Gemma 4 handles multiple system messages natively.
2750                // Standard models (Qwen, etc.) reject a second system message — merge into history[0].
2751                if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2752                    let mut msgs = vec![self.history[0].clone()];
2753                    msgs.push(ChatMessage::system(&intervention));
2754                    msgs
2755                } else {
2756                    let merged =
2757                        format!("{}\n\n{}", self.history[0].content.as_str(), intervention);
2758                    vec![ChatMessage::system(&merged)]
2759                }
2760            } else {
2761                vec![self.history[0].clone()]
2762            };
2763
2764            // Inject Vein context into the system message on the first iteration.
2765            // Vein results are merged in the same way as loop_intervention so standard
2766            // models (Qwen etc.) only ever see one system message.
2767            if inject_vein {
2768                if let Some(ref ctx) = vein_context.as_ref() {
2769                    if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2770                        prompt_msgs.push(ChatMessage::system(ctx));
2771                    } else {
2772                        let merged = format!("{}\n\n{}", prompt_msgs[0].content.as_str(), ctx);
2773                        prompt_msgs[0] = ChatMessage::system(&merged);
2774                    }
2775                }
2776            }
2777            prompt_msgs.extend(messages);
2778            if let Some(budget_note) =
2779                enforce_prompt_budget(&mut prompt_msgs, self.engine.current_context_length())
2780            {
2781                self.emit_operator_checkpoint(
2782                    &tx,
2783                    OperatorCheckpointState::BudgetReduced,
2784                    budget_note,
2785                )
2786                .await;
2787                let recipe = plan_recovery(
2788                    RecoveryScenario::PromptBudgetPressure,
2789                    &self.recovery_context,
2790                );
2791                self.emit_recovery_recipe_summary(
2792                    &tx,
2793                    recipe.recipe.scenario.label(),
2794                    compact_recovery_plan_summary(&recipe),
2795                )
2796                .await;
2797            }
2798            self.emit_prompt_pressure_for_messages(&tx, &prompt_msgs)
2799                .await;
2800
2801            let (mut text, mut tool_calls, usage, finish_reason) = match self
2802                .engine
2803                .call_with_tools(&prompt_msgs, &self.tools, routed_model.as_deref())
2804                .await
2805            {
2806                Ok(result) => result,
2807                Err(e) => {
2808                    let class = classify_runtime_failure(&e);
2809                    if should_retry_runtime_failure(class) {
2810                        if self.recovery_context.consume_transient_retry() {
2811                            let label = match class {
2812                                RuntimeFailureClass::ProviderDegraded => "provider_degraded",
2813                                _ => "empty_model_response",
2814                            };
2815                            self.transcript.log_system(&format!(
2816                                "Automatic provider recovery triggered: {}",
2817                                e.trim()
2818                            ));
2819                            self.emit_recovery_recipe_summary(
2820                                &tx,
2821                                label,
2822                                compact_runtime_recovery_summary(class),
2823                            )
2824                            .await;
2825                            let _ = tx
2826                                .send(InferenceEvent::ProviderStatus {
2827                                    state: ProviderRuntimeState::Recovering,
2828                                    summary: compact_runtime_recovery_summary(class).into(),
2829                                })
2830                                .await;
2831                            self.emit_operator_checkpoint(
2832                                &tx,
2833                                OperatorCheckpointState::RecoveringProvider,
2834                                compact_runtime_recovery_summary(class),
2835                            )
2836                            .await;
2837                            continue;
2838                        }
2839                    }
2840
2841                    self.emit_runtime_failure(&tx, class, &e).await;
2842                    break;
2843                }
2844            };
2845            self.emit_provider_live(&tx).await;
2846
2847            // Update TUI token counter with actual usage from LM Studio.
2848            if let Some(ref u) = usage {
2849                let _ = tx.send(InferenceEvent::UsageUpdate(u.clone())).await;
2850            }
2851
2852            // Fallback safety net: if native tool markup leaked past the inference-layer
2853            // extractor, recover it here instead of treating it as plain assistant text.
2854            if tool_calls
2855                .as_ref()
2856                .map(|calls| calls.is_empty())
2857                .unwrap_or(true)
2858            {
2859                if let Some(raw_text) = text.as_deref() {
2860                    let native_calls = crate::agent::inference::extract_native_tool_calls(raw_text);
2861                    if !native_calls.is_empty() {
2862                        tool_calls = Some(native_calls);
2863                        let stripped =
2864                            crate::agent::inference::strip_native_tool_call_text(raw_text);
2865                        text = if stripped.trim().is_empty() {
2866                            None
2867                        } else {
2868                            Some(stripped)
2869                        };
2870                    }
2871                }
2872            }
2873
2874            // Treat empty tool_calls arrays (Some(vec![])) the same as None –
2875            // the model returned text only; an empty array causes an infinite loop.
2876            let tool_calls = tool_calls.filter(|c| !c.is_empty());
2877            let near_context_ceiling = usage
2878                .as_ref()
2879                .map(|u| u.prompt_tokens >= (self.engine.current_context_length() * 82 / 100))
2880                .unwrap_or(false);
2881
2882            if let Some(calls) = tool_calls {
2883                let (calls, prune_trace_note) =
2884                    prune_architecture_trace_batch(calls, architecture_overview_mode);
2885                if let Some(note) = prune_trace_note {
2886                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2887                }
2888
2889                let (calls, prune_bloat_note) = prune_read_only_context_bloat_batch(
2890                    calls,
2891                    self.workflow_mode.is_read_only(),
2892                    architecture_overview_mode,
2893                );
2894                if let Some(note) = prune_bloat_note {
2895                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2896                }
2897
2898                let (calls, prune_note) = prune_authoritative_tool_batch(
2899                    calls,
2900                    grounded_trace_mode,
2901                    &effective_user_input,
2902                );
2903                if let Some(note) = prune_note {
2904                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2905                }
2906
2907                let (calls, prune_redir_note) = prune_redirected_shell_batch(calls);
2908                if let Some(note) = prune_redir_note {
2909                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2910                }
2911
2912                let (calls, batch_note) = order_batch_reads_first(calls);
2913                if let Some(note) = batch_note {
2914                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2915                }
2916
2917                if let Some(repeated_path) = calls
2918                    .iter()
2919                    .filter(|c| {
2920                        let parsed = serde_json::from_str::<Value>(
2921                            &crate::agent::inference::normalize_tool_argument_string(
2922                                &c.function.name,
2923                                &c.function.arguments,
2924                            ),
2925                        )
2926                        .ok();
2927                        let offset = parsed
2928                            .as_ref()
2929                            .and_then(|args| args.get("offset").and_then(|v| v.as_u64()))
2930                            .unwrap_or(0);
2931                        // Catch re-reads from the top (original behaviour) AND repeated
2932                        // reads at the exact same non-zero offset (new: catches targeted loops).
2933                        if offset < 200 {
2934                            return true;
2935                        }
2936                        if let Some(path) = parsed
2937                            .as_ref()
2938                            .and_then(|args| args.get("path").and_then(|v| v.as_str()))
2939                        {
2940                            let normalized = normalize_workspace_path(path);
2941                            return successful_read_regions.contains(&(normalized, offset));
2942                        }
2943                        false
2944                    })
2945                    .filter_map(|c| repeated_read_target(&c.function))
2946                    .find(|path| successful_read_targets.contains(path))
2947                {
2948                    loop_intervention = Some(format!(
2949                        "STOP. Already read `{}` this turn. Use `inspect_lines` on the relevant window or a specific `grep_files`, then continue.",
2950                        repeated_path
2951                    ));
2952                    let _ = tx
2953                        .send(InferenceEvent::Thought(
2954                            "Read discipline: preventing repeated full-file reads on the same path."
2955                                .into(),
2956                        ))
2957                        .await;
2958                    continue;
2959                }
2960
2961                if capability_mode
2962                    && !capability_needs_repo
2963                    && calls
2964                        .iter()
2965                        .all(|c| is_capability_probe_tool(&c.function.name))
2966                {
2967                    loop_intervention = Some(
2968                        "STOP. This is a stable capability question. Do not inspect the repository or call tools. \
2969                         Answer directly from verified Hematite capabilities, current runtime state, and the documented product boundary. \
2970                         Do not mention raw `mcp__*` names unless they are active and directly relevant."
2971                            .to_string(),
2972                    );
2973                    let _ = tx
2974                        .send(InferenceEvent::Thought(
2975                            "Capability mode: skipping unnecessary repo-inspection tools and answering directly."
2976                                .into(),
2977                        ))
2978                        .await;
2979                    continue;
2980                }
2981
2982                // VOCAL AGENT: If the model provided reasoning alongside tools,
2983                // stream it to the SPECULAR panel now using the hardened extraction.
2984                let raw_content = text.as_deref().unwrap_or(" ");
2985
2986                if let Some(thought) = crate::agent::inference::extract_think_block(raw_content) {
2987                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
2988                    // Reasoning is silent (hidden in SPECULAR only).
2989                    self.reasoning_history = Some(thought);
2990                }
2991
2992                // [Gemma-4 Protocol] Keep raw content (including thoughts) during tool loops.
2993                // Thoughts are only stripped before the 'final' user turn.
2994                let stored_tool_call_content = if implement_current_plan {
2995                    cap_output(raw_content, 1200)
2996                } else {
2997                    raw_content.to_string()
2998                };
2999                self.history.push(ChatMessage::assistant_tool_calls(
3000                    &stored_tool_call_content,
3001                    calls.clone(),
3002                ));
3003
3004                // ── LAYER 4: Parallel Tool Orchestration (Batching) ────────────────────
3005                let mut results = Vec::new();
3006                let gemma4_model =
3007                    crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
3008                let latest_user_prompt = self.latest_user_prompt();
3009                let mut seen_call_keys = std::collections::HashSet::new();
3010                let mut deduped_calls = Vec::new();
3011                for call in calls.clone() {
3012                    let (normalized_name, normalized_args) = normalized_tool_call_for_execution(
3013                        &call.function.name,
3014                        &call.function.arguments,
3015                        gemma4_model,
3016                        latest_user_prompt,
3017                    );
3018                    let key = canonical_tool_call_key(&normalized_name, &normalized_args);
3019                    if seen_call_keys.insert(key) {
3020                        let repeat_guard_exempt = matches!(
3021                            normalized_name.as_str(),
3022                            "verify_build" | "git_commit" | "git_push"
3023                        );
3024                        if !repeat_guard_exempt {
3025                            if let Some(cached) = completed_tool_cache
3026                                .get(&canonical_tool_call_key(&normalized_name, &normalized_args))
3027                            {
3028                                let _ = tx
3029                                    .send(InferenceEvent::Thought(
3030                                        "Cached tool result reused: identical built-in invocation already completed earlier in this turn."
3031                                            .to_string(),
3032                                    ))
3033                                    .await;
3034                                loop_intervention = Some(format!(
3035                                    "STOP. You already called `{}` with identical arguments earlier in this turn and already have that result in conversation history. Do not call it again. Use the existing result to answer or choose a different next step.",
3036                                    cached.tool_name
3037                                ));
3038                                continue;
3039                            }
3040                        }
3041                        deduped_calls.push(call);
3042                    } else {
3043                        let _ = tx
3044                            .send(InferenceEvent::Thought(
3045                                "Duplicate tool call skipped: identical built-in invocation already ran this turn."
3046                                    .to_string(),
3047                            ))
3048                            .await;
3049                    }
3050                }
3051
3052                // Partition tool calls: Parallel Read vs Serial Mutating
3053                let (parallel_calls, serial_calls): (Vec<_>, Vec<_>) = deduped_calls
3054                    .into_iter()
3055                    .partition(|c| is_parallel_safe(&c.function.name));
3056
3057                // 1. Concurrent Execution (ParallelRead)
3058                if !parallel_calls.is_empty() {
3059                    let mut tasks = Vec::new();
3060                    for call in parallel_calls {
3061                        let tx_clone = tx.clone();
3062                        let config_clone = config.clone();
3063                        // Carry the real call ID into the outcome
3064                        let call_with_id = call.clone();
3065                        tasks.push(self.process_tool_call(
3066                            call_with_id.function,
3067                            config_clone,
3068                            yolo,
3069                            tx_clone,
3070                            call_with_id.id,
3071                        ));
3072                    }
3073                    // Wait for all read-only tasks to complete simultaneously.
3074                    results.extend(futures::future::join_all(tasks).await);
3075                }
3076
3077                // 2. Sequential Execution (SerialMutating)
3078                for call in serial_calls {
3079                    results.push(
3080                        self.process_tool_call(
3081                            call.function,
3082                            config.clone(),
3083                            yolo,
3084                            tx.clone(),
3085                            call.id,
3086                        )
3087                        .await,
3088                    );
3089                }
3090
3091                // 3. Collate Messages into History & UI
3092                let mut authoritative_tool_output: Option<String> = None;
3093                let mut blocked_policy_output: Option<String> = None;
3094                let mut recoverable_policy_intervention: Option<String> = None;
3095                let mut recoverable_policy_recipe: Option<RecoveryScenario> = None;
3096                let mut recoverable_policy_checkpoint: Option<(OperatorCheckpointState, String)> =
3097                    None;
3098                for res in results {
3099                    let call_id = res.call_id.clone();
3100                    let tool_name = res.tool_name.clone();
3101                    let final_output = res.output.clone();
3102                    let is_error = res.is_error;
3103                    for msg in res.msg_results {
3104                        self.history.push(msg);
3105                    }
3106
3107                    // Update State for Verification Loop
3108                    if matches!(
3109                        tool_name.as_str(),
3110                        "patch_hunk" | "write_file" | "edit_file" | "multi_search_replace"
3111                    ) {
3112                        mutation_occurred = true;
3113                        implementation_started = true;
3114                        // Heat tracking: bump L1 score for the edited file.
3115                        if !is_error {
3116                            let path = res.args.get("path").and_then(|v| v.as_str()).unwrap_or("");
3117                            if !path.is_empty() {
3118                                self.vein.bump_heat(path);
3119                                self.l1_context = self.vein.l1_context();
3120                            }
3121                            // Refresh repo map so PageRank accounts for the new edit.
3122                            self.refresh_repo_map();
3123                        }
3124                    }
3125
3126                    if tool_name == "verify_build" {
3127                        self.record_session_verification(
3128                            !is_error
3129                                && (final_output.contains("BUILD OK")
3130                                    || final_output.contains("BUILD SUCCESS")
3131                                    || final_output.contains("BUILD OKAY")),
3132                            if is_error {
3133                                "Explicit verify_build failed."
3134                            } else {
3135                                "Explicit verify_build passed."
3136                            },
3137                        );
3138                    }
3139
3140                    // Update Repeat Guard
3141                    let call_key = format!(
3142                        "{}:{}",
3143                        tool_name,
3144                        serde_json::to_string(&res.args).unwrap_or_default()
3145                    );
3146                    let repeat_count = repeat_counts.entry(call_key.clone()).or_insert(0);
3147                    *repeat_count += 1;
3148
3149                    // verify_build is legitimately called multiple times in fix-verify loops.
3150                    let repeat_guard_exempt = matches!(
3151                        tool_name.as_str(),
3152                        "verify_build" | "git_commit" | "git_push"
3153                    );
3154                    if *repeat_count >= 3 && !repeat_guard_exempt {
3155                        loop_intervention = Some(format!(
3156                            "STOP. You have called `{}` with identical arguments {} times and keep getting the same result. \
3157                             Do not call it again. Either answer directly from what you already know, \
3158                             use a different tool or approach, or ask the user for clarification.",
3159                            tool_name, *repeat_count
3160                        ));
3161                        let _ = tx
3162                            .send(InferenceEvent::Thought(format!(
3163                                "Repeat guard: `{}` called {} times with same args — injecting stop intervention.",
3164                                tool_name, *repeat_count
3165                            )))
3166                            .await;
3167                    }
3168
3169                    if is_error {
3170                        consecutive_errors += 1;
3171                    } else {
3172                        consecutive_errors = 0;
3173                    }
3174
3175                    if consecutive_errors >= 3 {
3176                        loop_intervention = Some(
3177                            "CRITICAL: Repeated tool failures detected. You are likely stuck in a loop. \
3178                             STOP all tool calls immediately. Analyze why your previous 3 calls failed \
3179                             (check for hallucinations or invalid arguments) and ask the user for \
3180                             clarification if you cannot proceed.".to_string()
3181                        );
3182                    }
3183
3184                    if consecutive_errors >= 4 {
3185                        self.emit_runtime_failure(
3186                            &tx,
3187                            RuntimeFailureClass::ToolLoop,
3188                            "Hard termination: too many consecutive tool errors.",
3189                        )
3190                        .await;
3191                        return Ok(());
3192                    }
3193
3194                    let _ = tx
3195                        .send(InferenceEvent::ToolCallResult {
3196                            id: call_id.clone(),
3197                            name: tool_name.clone(),
3198                            output: final_output.clone(),
3199                            is_error,
3200                        })
3201                        .await;
3202
3203                    let repeat_guard_exempt = matches!(
3204                        tool_name.as_str(),
3205                        "verify_build" | "git_commit" | "git_push"
3206                    );
3207                    if !repeat_guard_exempt {
3208                        completed_tool_cache.insert(
3209                            canonical_tool_call_key(&tool_name, &res.args),
3210                            CachedToolResult {
3211                                tool_name: tool_name.clone(),
3212                            },
3213                        );
3214                    }
3215
3216                    // Cap output before history
3217                    let compact_ctx = crate::agent::inference::is_compact_context_window_pub(
3218                        self.engine.current_context_length(),
3219                    );
3220                    let capped = if implement_current_plan {
3221                        cap_output(&final_output, 1200)
3222                    } else if compact_ctx
3223                        && (tool_name == "read_file" || tool_name == "inspect_lines")
3224                    {
3225                        // Compact context: cap file reads tightly and add a navigation hint on truncation.
3226                        let limit = 3000usize;
3227                        if final_output.len() > limit {
3228                            let total_lines = final_output.lines().count();
3229                            let mut split_at = limit;
3230                            while !final_output.is_char_boundary(split_at) && split_at > 0 {
3231                                split_at -= 1;
3232                            }
3233                            let scratch = write_output_to_scratch(&final_output, &tool_name)
3234                                .map(|p| format!(" Full file also saved to '{p}'."))
3235                                .unwrap_or_default();
3236                            format!(
3237                                "{}\n... [file truncated — {} total lines. Use `inspect_lines` with start_line near {} to reach the end of the file.{}]",
3238                                &final_output[..split_at],
3239                                total_lines,
3240                                total_lines.saturating_sub(150),
3241                                scratch,
3242                            )
3243                        } else {
3244                            final_output.clone()
3245                        }
3246                    } else {
3247                        cap_output_for_tool(&final_output, 8000, &tool_name)
3248                    };
3249                    self.history.push(ChatMessage::tool_result_for_model(
3250                        &call_id,
3251                        &tool_name,
3252                        &capped,
3253                        &self.engine.current_model(),
3254                    ));
3255
3256                    if architecture_overview_mode && !is_error && tool_name == "trace_runtime_flow"
3257                    {
3258                        overview_runtime_trace =
3259                            Some(summarize_runtime_trace_output(&final_output));
3260                    }
3261
3262                    if !architecture_overview_mode
3263                        && !is_error
3264                        && ((grounded_trace_mode && tool_name == "trace_runtime_flow")
3265                            || (toolchain_mode && tool_name == "describe_toolchain"))
3266                    {
3267                        authoritative_tool_output = Some(final_output.clone());
3268                    }
3269
3270                    if !is_error && tool_name == "read_file" {
3271                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
3272                            let normalized = normalize_workspace_path(path);
3273                            let read_offset =
3274                                res.args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
3275                            successful_read_targets.insert(normalized.clone());
3276                            successful_read_regions.insert((normalized.clone(), read_offset));
3277                        }
3278                    }
3279
3280                    if !is_error && tool_name == "grep_files" {
3281                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
3282                            let normalized = normalize_workspace_path(path);
3283                            if final_output.starts_with("No matches for ") {
3284                                no_match_grep_targets.insert(normalized);
3285                            } else if grep_output_is_high_fanout(&final_output) {
3286                                broad_grep_targets.insert(normalized);
3287                            } else {
3288                                successful_grep_targets.insert(normalized);
3289                            }
3290                        }
3291                    }
3292
3293                    if is_error
3294                        && matches!(tool_name.as_str(), "edit_file" | "multi_search_replace")
3295                        && (final_output.contains("search string not found")
3296                            || final_output.contains("search string is too short")
3297                            || final_output.contains("search string matched"))
3298                    {
3299                        if let Some(target) = action_target_path(&tool_name, &res.args) {
3300                            let guidance = if final_output.contains("matched") {
3301                                format!(
3302                                    "STOP. `{}` on `{}` — search string matched multiple times. Use `inspect_lines` on the exact region to get a unique anchor, then retry.",
3303                                    tool_name, target
3304                                )
3305                            } else {
3306                                format!(
3307                                    "STOP. `{}` on `{}` — search string did not match. Use `inspect_lines` on the target region to get the exact current text (check whitespace and indentation), then retry.",
3308                                    tool_name, target
3309                                )
3310                            };
3311                            loop_intervention = Some(guidance);
3312                            *repeat_count = 0;
3313                        }
3314                    }
3315
3316                    // When guard.rs blocks a shell call with the run_code redirect hint,
3317                    // force the model to recover with run_code instead of giving up.
3318                    if is_error
3319                        && tool_name == "shell"
3320                        && final_output.contains("Use the run_code tool instead")
3321                        && loop_intervention.is_none()
3322                    {
3323                        loop_intervention = Some(
3324                            "STOP. Shell was blocked because this is a computation task. \
3325                             You MUST use `run_code` now — write the code and run it. \
3326                             Do NOT output an error message or give up. \
3327                             Call `run_code` with the appropriate language and code to compute the answer. \
3328                             If writing Python, pass `language: \"python\"`. \
3329                             If writing JavaScript, omit language or pass `language: \"javascript\"`."
3330                                .to_string(),
3331                        );
3332                    }
3333
3334                    // When run_code fails with a Deno parse error, the model likely sent Python
3335                    // code without specifying language: "python". Force a corrective retry.
3336                    if is_error
3337                        && tool_name == "run_code"
3338                        && (final_output.contains("source code could not be parsed")
3339                            || final_output.contains("Expected ';'")
3340                            || final_output.contains("Expected '}'")
3341                            || final_output.contains("is not defined")
3342                                && final_output.contains("deno"))
3343                        && loop_intervention.is_none()
3344                    {
3345                        loop_intervention = Some(
3346                            "STOP. run_code failed with a JavaScript parse error — you likely wrote Python \
3347                             code but forgot to pass `language: \"python\"`. \
3348                             Retry run_code with `language: \"python\"` and the same code. \
3349                             Do NOT fall back to shell. Do NOT give up."
3350                                .to_string(),
3351                        );
3352                    }
3353
3354                    if res.blocked_by_policy
3355                        && is_mcp_workspace_read_tool(&tool_name)
3356                        && recoverable_policy_intervention.is_none()
3357                    {
3358                        recoverable_policy_intervention = Some(
3359                            "STOP. MCP filesystem reads are blocked. Use `read_file` or `inspect_lines` instead.".to_string(),
3360                        );
3361                        recoverable_policy_recipe = Some(RecoveryScenario::McpWorkspaceReadBlocked);
3362                        recoverable_policy_checkpoint = Some((
3363                            OperatorCheckpointState::BlockedPolicy,
3364                            "MCP workspace read blocked; rerouting to built-in file tools."
3365                                .to_string(),
3366                        ));
3367                    } else if res.blocked_by_policy
3368                        && implement_current_plan
3369                        && is_current_plan_irrelevant_tool(&tool_name)
3370                        && recoverable_policy_intervention.is_none()
3371                    {
3372                        recoverable_policy_intervention = Some(format!(
3373                            "STOP. `{}` is not a planned target. Use `inspect_lines` on a planned file, then edit.",
3374                            tool_name
3375                        ));
3376                        recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
3377                        recoverable_policy_checkpoint = Some((
3378                            OperatorCheckpointState::BlockedPolicy,
3379                            format!(
3380                                "Current-plan execution blocked unrelated tool `{}`.",
3381                                tool_name
3382                            ),
3383                        ));
3384                    } else if res.blocked_by_policy
3385                        && implement_current_plan
3386                        && final_output.contains("requires recent file evidence")
3387                        && recoverable_policy_intervention.is_none()
3388                    {
3389                        let target = action_target_path(&tool_name, &res.args)
3390                            .unwrap_or_else(|| "the target file".to_string());
3391                        recoverable_policy_intervention = Some(format!(
3392                            "STOP. Edit blocked — `{target}` has no recent read. Use `inspect_lines` or `read_file` on it first, then retry."
3393                        ));
3394                        recoverable_policy_recipe =
3395                            Some(RecoveryScenario::RecentFileEvidenceMissing);
3396                        recoverable_policy_checkpoint = Some((
3397                            OperatorCheckpointState::BlockedRecentFileEvidence,
3398                            format!("Edit blocked on `{target}`; recent file evidence missing."),
3399                        ));
3400                    } else if res.blocked_by_policy
3401                        && implement_current_plan
3402                        && final_output.contains("requires an exact local line window first")
3403                        && recoverable_policy_intervention.is_none()
3404                    {
3405                        let target = action_target_path(&tool_name, &res.args)
3406                            .unwrap_or_else(|| "the target file".to_string());
3407                        recoverable_policy_intervention = Some(format!(
3408                            "STOP. Edit blocked — `{target}` needs an inspected window. Use `inspect_lines` around the edit region, then retry."
3409                        ));
3410                        recoverable_policy_recipe = Some(RecoveryScenario::ExactLineWindowRequired);
3411                        recoverable_policy_checkpoint = Some((
3412                            OperatorCheckpointState::BlockedExactLineWindow,
3413                            format!("Edit blocked on `{target}`; exact line window required."),
3414                        ));
3415                    } else if res.blocked_by_policy
3416                        && (final_output.contains("Prefer `")
3417                            || final_output.contains("Prefer tool"))
3418                        && recoverable_policy_intervention.is_none()
3419                    {
3420                        recoverable_policy_intervention = Some(final_output.clone());
3421                        recoverable_policy_recipe = Some(RecoveryScenario::PolicyCorrection);
3422                        recoverable_policy_checkpoint = Some((
3423                            OperatorCheckpointState::BlockedPolicy,
3424                            "Action blocked by policy; self-correction triggered using tool recommendation."
3425                                .to_string(),
3426                        ));
3427                    } else if res.blocked_by_policy && blocked_policy_output.is_none() {
3428                        blocked_policy_output = Some(final_output.clone());
3429                    }
3430
3431                    if *repeat_count >= 5 {
3432                        let _ = tx.send(InferenceEvent::Done).await;
3433                        return Ok(());
3434                    }
3435
3436                    if implement_current_plan
3437                        && !implementation_started
3438                        && !is_error
3439                        && is_non_mutating_plan_step_tool(&tool_name)
3440                    {
3441                        non_mutating_plan_steps += 1;
3442                    }
3443                }
3444
3445                if let Some(intervention) = recoverable_policy_intervention {
3446                    if let Some((state, summary)) = recoverable_policy_checkpoint.take() {
3447                        self.emit_operator_checkpoint(&tx, state, summary).await;
3448                    }
3449                    if let Some(scenario) = recoverable_policy_recipe.take() {
3450                        let recipe = plan_recovery(scenario, &self.recovery_context);
3451                        self.emit_recovery_recipe_summary(
3452                            &tx,
3453                            recipe.recipe.scenario.label(),
3454                            compact_recovery_plan_summary(&recipe),
3455                        )
3456                        .await;
3457                    }
3458                    loop_intervention = Some(intervention);
3459                    let _ = tx
3460                        .send(InferenceEvent::Thought(
3461                            "Policy recovery: rerouting blocked MCP filesystem inspection to built-in workspace tools."
3462                                .into(),
3463                        ))
3464                        .await;
3465                    continue;
3466                }
3467
3468                if architecture_overview_mode {
3469                    match overview_runtime_trace.as_deref() {
3470                        Some(runtime_trace) => {
3471                            let response = build_architecture_overview_answer(runtime_trace);
3472                            self.history.push(ChatMessage::assistant_text(&response));
3473                            self.transcript.log_agent(&response);
3474
3475                            for chunk in chunk_text(&response, 8) {
3476                                if !chunk.is_empty() {
3477                                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
3478                                }
3479                            }
3480
3481                            let _ = tx.send(InferenceEvent::Done).await;
3482                            break;
3483                        }
3484                        None => {
3485                            loop_intervention = Some(
3486                                "Good. You now have the grounded repository structure. Next, call `trace_runtime_flow` for the runtime/control-flow half of the architecture overview. Prefer topic `user_turn` for the main execution path, or `runtime_subsystems` if that is more direct. Do not call `read_file`, `auto_pin_context`, or LSP tools here."
3487                                    .to_string(),
3488                            );
3489                            continue;
3490                        }
3491                    }
3492                }
3493
3494                if implement_current_plan
3495                    && !implementation_started
3496                    && non_mutating_plan_steps >= non_mutating_plan_hard_cap
3497                {
3498                    let msg = "Current-plan execution stalled: too many non-mutating inspection steps without a concrete edit. Stay on the saved target files, narrow with `inspect_lines`, and then mutate, or ask one specific blocking question instead of continuing broad exploration.".to_string();
3499                    self.history.push(ChatMessage::assistant_text(&msg));
3500                    self.transcript.log_agent(&msg);
3501
3502                    for chunk in chunk_text(&msg, 8) {
3503                        if !chunk.is_empty() {
3504                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3505                        }
3506                    }
3507
3508                    let _ = tx.send(InferenceEvent::Done).await;
3509                    break;
3510                }
3511
3512                if let Some(blocked_output) = blocked_policy_output {
3513                    self.emit_operator_checkpoint(
3514                        &tx,
3515                        OperatorCheckpointState::BlockedPolicy,
3516                        "A blocked tool path was surfaced directly to the operator.",
3517                    )
3518                    .await;
3519                    self.history
3520                        .push(ChatMessage::assistant_text(&blocked_output));
3521                    self.transcript.log_agent(&blocked_output);
3522
3523                    for chunk in chunk_text(&blocked_output, 8) {
3524                        if !chunk.is_empty() {
3525                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3526                        }
3527                    }
3528
3529                    let _ = tx.send(InferenceEvent::Done).await;
3530                    break;
3531                }
3532
3533                if let Some(tool_output) = authoritative_tool_output {
3534                    self.history.push(ChatMessage::assistant_text(&tool_output));
3535                    self.transcript.log_agent(&tool_output);
3536
3537                    for chunk in chunk_text(&tool_output, 8) {
3538                        if !chunk.is_empty() {
3539                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3540                        }
3541                    }
3542
3543                    let _ = tx.send(InferenceEvent::Done).await;
3544                    break;
3545                }
3546
3547                if implement_current_plan && !implementation_started {
3548                    let base = "STOP analyzing. The current plan already defines the task. Use the built-in file evidence you now have and begin implementing the plan in the target files. Do not output preliminary findings or restate contracts.";
3549                    if non_mutating_plan_steps >= non_mutating_plan_soft_cap {
3550                        loop_intervention = Some(format!(
3551                            "{} You are close to the non-mutation cap. Use `inspect_lines` on one saved target file, then make the edit now.",
3552                            base
3553                        ));
3554                    } else {
3555                        loop_intervention = Some(base.to_string());
3556                    }
3557                } else if self.workflow_mode == WorkflowMode::Architect {
3558                    loop_intervention = Some(
3559                        format!(
3560                            "STOP exploring. You have enough evidence for a plan-first answer.\n{}\nUse the tool results already in history. Do not narrate your process. Do not call more tools unless a missing file path makes the handoff impossible.",
3561                            architect_handoff_contract()
3562                        ),
3563                    );
3564                }
3565
3566                // 4. Auto-Verification Loop (The Perfect Bake)
3567                if mutation_occurred && !yolo {
3568                    let _ = tx
3569                        .send(InferenceEvent::Thought(
3570                            "Self-Verification: Running 'cargo check' to ensure build integrity..."
3571                                .into(),
3572                        ))
3573                        .await;
3574                    let verify_res = self.auto_verify_build().await;
3575                    let verify_ok = verify_res.contains("BUILD SUCCESS");
3576                    self.record_verify_build_result(verify_ok, &verify_res)
3577                        .await;
3578                    self.record_session_verification(
3579                        verify_ok,
3580                        if verify_ok {
3581                            "Automatic build verification passed."
3582                        } else {
3583                            "Automatic build verification failed."
3584                        },
3585                    );
3586                    self.history.push(ChatMessage::system(&format!(
3587                        "\n# SYSTEM VERIFICATION\n{verify_res}"
3588                    )));
3589                    let _ = tx
3590                        .send(InferenceEvent::Thought(
3591                            "Verification turn injected into history.".into(),
3592                        ))
3593                        .await;
3594                }
3595
3596                // Continue loop – the model will respond to the results.
3597                continue;
3598            } else if let Some(response_text) = text {
3599                if finish_reason.as_deref() == Some("length") && near_context_ceiling {
3600                    if intent.direct_answer == Some(DirectAnswerKind::SessionResetSemantics) {
3601                        let cleaned = build_session_reset_semantics_answer();
3602                        self.history.push(ChatMessage::assistant_text(&cleaned));
3603                        self.transcript.log_agent(&cleaned);
3604                        for chunk in chunk_text(&cleaned, 8) {
3605                            if !chunk.is_empty() {
3606                                let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3607                            }
3608                        }
3609                        let _ = tx.send(InferenceEvent::Done).await;
3610                        break;
3611                    }
3612
3613                    let warning = format_runtime_failure(
3614                        RuntimeFailureClass::ContextWindow,
3615                        "Context ceiling reached before the model completed the answer. Hematite trimmed what it could, but this turn still ran out of room. Retry with a narrower inspection step like `grep_files` or `inspect_lines`, or ask for a smaller scoped answer.",
3616                    );
3617                    self.history.push(ChatMessage::assistant_text(&warning));
3618                    self.transcript.log_agent(&warning);
3619                    let _ = tx
3620                        .send(InferenceEvent::Thought(
3621                            "Length recovery: model hit the context ceiling before completing the answer."
3622                                .into(),
3623                        ))
3624                        .await;
3625                    for chunk in chunk_text(&warning, 8) {
3626                        if !chunk.is_empty() {
3627                            let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3628                        }
3629                    }
3630                    let _ = tx.send(InferenceEvent::Done).await;
3631                    break;
3632                }
3633
3634                if response_text.contains("<|tool_call")
3635                    || response_text.contains("[END_TOOL_REQUEST]")
3636                    || response_text.contains("<|tool_response")
3637                    || response_text.contains("<tool_response|>")
3638                {
3639                    loop_intervention = Some(
3640                        "Your previous response leaked raw native tool transcript markup instead of a valid tool invocation or final answer. Retry immediately. If you need a tool, emit a valid tool call only. If you do not need a tool, answer in plain text with no `<|tool_call>`, `<|tool_response>`, or `[END_TOOL_REQUEST]` markup.".to_string(),
3641                    );
3642                    continue;
3643                }
3644
3645                // 1. Process and route the reasoning block to SPECULAR.
3646                if let Some(thought) = crate::agent::inference::extract_think_block(&response_text)
3647                {
3648                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
3649                    // Persist for history audit (stripped from next turn by Volatile Reasoning rule).
3650                    // This will be summarized in the next turn's system prompt.
3651                    self.reasoning_history = Some(thought);
3652                }
3653
3654                // 2. Process and stream the final answer to the chat interface.
3655                let cleaned = crate::agent::inference::strip_think_blocks(&response_text);
3656
3657                if implement_current_plan && !implementation_started {
3658                    loop_intervention = Some(
3659                        "Do not stop at analysis. Implement the current saved plan now using built-in workspace tools and the target files already named in the plan. Only answer without edits if you have a concrete blocking question.".to_string(),
3660                    );
3661                    continue;
3662                }
3663
3664                // [Hardened Interface] Strictly respect the stripper.
3665                // If it's empty after stripping think blocks, the model thought through its
3666                // answer but forgot to emit it (common with Qwen3 models in architect/ask mode).
3667                // Nudge it rather than silently dropping the turn — but cap at 2 retries so a
3668                // model that keeps returning whitespace/empty doesn't spin all 25 iterations.
3669                if cleaned.is_empty() {
3670                    empty_cleaned_nudges += 1;
3671                    if empty_cleaned_nudges == 1 {
3672                        loop_intervention = Some(
3673                            "Your visible response was empty. The tool already returned data. \
3674                             Write your answer now in plain text — no <think> tags, no tool calls. \
3675                             State the key facts in 2-5 sentences and stop."
3676                                .to_string(),
3677                        );
3678                        continue;
3679                    } else if empty_cleaned_nudges == 2 {
3680                        loop_intervention = Some(
3681                            "EMPTY RESPONSE. Do NOT use <think>. Do NOT call tools. \
3682                             Write the answer in plain text right now. \
3683                             Example format: \"Your CPU is X. Your GPU is Y. You have Z GB of RAM.\""
3684                                .to_string(),
3685                        );
3686                        continue;
3687                    }
3688                    // Nudge budget exhausted — surface as a recoverable empty-response failure
3689                    // so the TUI unblocks instead of hanging for the full max_iters budget.
3690                    let class = RuntimeFailureClass::EmptyModelResponse;
3691                    self.emit_runtime_failure(
3692                        &tx,
3693                        class,
3694                        "Model returned empty content after 2 nudge attempts.",
3695                    )
3696                    .await;
3697                    break;
3698                }
3699
3700                let architect_handoff = self.persist_architect_handoff(&cleaned);
3701                self.history.push(ChatMessage::assistant_text(&cleaned));
3702                self.transcript.log_agent(&cleaned);
3703
3704                // Send in smooth chunks for that professional UI feel.
3705                for chunk in chunk_text(&cleaned, 8) {
3706                    if !chunk.is_empty() {
3707                        let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3708                    }
3709                }
3710
3711                if let Some(plan) = architect_handoff.as_ref() {
3712                    let note = architect_handoff_operator_note(plan);
3713                    self.history.push(ChatMessage::system(&note));
3714                    self.transcript.log_system(&note);
3715                    let _ = tx
3716                        .send(InferenceEvent::MutedToken(format!("\n{}", note)))
3717                        .await;
3718                }
3719
3720                let _ = tx.send(InferenceEvent::Done).await;
3721                break;
3722            } else {
3723                let detail = "Model returned an empty response.";
3724                let class = classify_runtime_failure(detail);
3725                if should_retry_runtime_failure(class) {
3726                    if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3727                        if let RecoveryDecision::Attempt(plan) =
3728                            attempt_recovery(scenario, &mut self.recovery_context)
3729                        {
3730                            self.transcript.log_system(
3731                                "Automatic provider recovery triggered: model returned an empty response.",
3732                            );
3733                            self.emit_recovery_recipe_summary(
3734                                &tx,
3735                                plan.recipe.scenario.label(),
3736                                compact_recovery_plan_summary(&plan),
3737                            )
3738                            .await;
3739                            let _ = tx
3740                                .send(InferenceEvent::ProviderStatus {
3741                                    state: ProviderRuntimeState::Recovering,
3742                                    summary: compact_runtime_recovery_summary(class).into(),
3743                                })
3744                                .await;
3745                            self.emit_operator_checkpoint(
3746                                &tx,
3747                                OperatorCheckpointState::RecoveringProvider,
3748                                compact_runtime_recovery_summary(class),
3749                            )
3750                            .await;
3751                            continue;
3752                        }
3753                    }
3754                }
3755
3756                self.emit_runtime_failure(&tx, class, detail).await;
3757                break;
3758            }
3759        }
3760
3761        self.trim_history(80);
3762        self.refresh_session_memory();
3763        // Record the goal and increment the turn counter before persisting.
3764        self.last_goal = Some(user_input.chars().take(300).collect());
3765        self.turn_count = self.turn_count.saturating_add(1);
3766        self.save_session();
3767        self.emit_compaction_pressure(&tx).await;
3768        Ok(())
3769    }
3770
3771    async fn emit_runtime_failure(
3772        &mut self,
3773        tx: &mpsc::Sender<InferenceEvent>,
3774        class: RuntimeFailureClass,
3775        detail: &str,
3776    ) {
3777        if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3778            let decision = preview_recovery_decision(scenario, &self.recovery_context);
3779            self.emit_recovery_recipe_summary(
3780                tx,
3781                scenario.label(),
3782                compact_recovery_decision_summary(&decision),
3783            )
3784            .await;
3785            let needs_refresh = match &decision {
3786                RecoveryDecision::Attempt(plan) => plan
3787                    .recipe
3788                    .steps
3789                    .contains(&RecoveryStep::RefreshRuntimeProfile),
3790                RecoveryDecision::Escalate { recipe, .. } => {
3791                    recipe.steps.contains(&RecoveryStep::RefreshRuntimeProfile)
3792                }
3793            };
3794            if needs_refresh {
3795                if let Some((model_id, context_length, changed)) = self
3796                    .refresh_runtime_profile_and_report(tx, "context_window_failure")
3797                    .await
3798                {
3799                    let note = if changed {
3800                        format!(
3801                            "Runtime refresh after context-window failure: model {} | CTX {}",
3802                            model_id, context_length
3803                        )
3804                    } else {
3805                        format!(
3806                            "Runtime refresh after context-window failure confirms model {} | CTX {}",
3807                            model_id, context_length
3808                        )
3809                    };
3810                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3811                }
3812            }
3813        }
3814        if let Some(state) = provider_state_for_runtime_failure(class) {
3815            let _ = tx
3816                .send(InferenceEvent::ProviderStatus {
3817                    state,
3818                    summary: compact_runtime_failure_summary(class).into(),
3819                })
3820                .await;
3821        }
3822        if let Some(state) = checkpoint_state_for_runtime_failure(class) {
3823            self.emit_operator_checkpoint(tx, state, checkpoint_summary_for_runtime_failure(class))
3824                .await;
3825        }
3826        let formatted = format_runtime_failure(class, detail);
3827        self.history.push(ChatMessage::system(&format!(
3828            "# RUNTIME FAILURE\n{}",
3829            formatted
3830        )));
3831        self.transcript.log_system(&formatted);
3832        let _ = tx.send(InferenceEvent::Error(formatted)).await;
3833        let _ = tx.send(InferenceEvent::Done).await;
3834    }
3835
3836    /// [Task Analyzer] Run 'cargo check' and return a concise summary for the model.
3837    async fn auto_verify_build(&self) -> String {
3838        match crate::tools::verify_build::execute(&serde_json::json!({ "action": "build" })).await {
3839            Ok(out) => {
3840                "BUILD SUCCESS: Your changes are architecturally sound.\n\n".to_string()
3841                    + &cap_output(&out, 2000)
3842            }
3843            Err(e) => format!(
3844                "BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:\n\n{}",
3845                cap_output(&e, 2000)
3846            ),
3847        }
3848    }
3849
3850    /// Triggers an LLM call to summarize old messages if history exceeds the VRAM character limit.
3851    /// Triggers the Deterministic Smart Compaction algorithm to shrink history while preserving context.
3852    /// Triggers the Recursive Context Compactor.
3853    async fn compact_history_if_needed(
3854        &mut self,
3855        tx: &mpsc::Sender<InferenceEvent>,
3856        anchor_index: Option<usize>,
3857    ) -> Result<bool, String> {
3858        let vram_ratio = self.gpu_state.ratio();
3859        let context_length = self.engine.current_context_length();
3860        let config = CompactionConfig::adaptive(context_length, vram_ratio);
3861
3862        if !compaction::should_compact(&self.history, context_length, vram_ratio) {
3863            return Ok(false);
3864        }
3865
3866        let _ = tx
3867            .send(InferenceEvent::Thought(format!(
3868                "Compaction: ctx={}k vram={:.0}% threshold={}k tokens — chaining summary...",
3869                context_length / 1000,
3870                vram_ratio * 100.0,
3871                config.max_estimated_tokens / 1000,
3872            )))
3873            .await;
3874
3875        let result = compaction::compact_history(
3876            &self.history,
3877            self.running_summary.as_deref(),
3878            config,
3879            anchor_index,
3880        );
3881
3882        let removed_message_count = self.history.len().saturating_sub(result.messages.len());
3883        self.history = result.messages;
3884        self.running_summary = result.summary;
3885
3886        // Layer 6: Memory Synthesis (Task Context Persistence)
3887        let previous_memory = self.session_memory.clone();
3888        self.session_memory = compaction::extract_memory(&self.history);
3889        self.session_memory
3890            .inherit_runtime_ledger_from(&previous_memory);
3891        self.session_memory.record_compaction(
3892            removed_message_count,
3893            format!(
3894                "Compacted history around active task '{}' and preserved {} working-set file(s).",
3895                self.session_memory.current_task,
3896                self.session_memory.working_set.len()
3897            ),
3898        );
3899        self.emit_compaction_pressure(tx).await;
3900
3901        // Jinja alignment: preserved slice may start with assistant/tool messages.
3902        // Strip any leading non-user messages so the first non-system message is always user.
3903        let first_non_sys = self
3904            .history
3905            .iter()
3906            .position(|m| m.role != "system")
3907            .unwrap_or(self.history.len());
3908        if first_non_sys < self.history.len() {
3909            if let Some(user_offset) = self.history[first_non_sys..]
3910                .iter()
3911                .position(|m| m.role == "user")
3912            {
3913                if user_offset > 0 {
3914                    self.history
3915                        .drain(first_non_sys..first_non_sys + user_offset);
3916                }
3917            }
3918        }
3919
3920        let _ = tx
3921            .send(InferenceEvent::Thought(format!(
3922                "Memory Synthesis: Extracted context for task: '{}'. Working set: {} files.",
3923                self.session_memory.current_task,
3924                self.session_memory.working_set.len()
3925            )))
3926            .await;
3927        let recipe = plan_recovery(RecoveryScenario::HistoryPressure, &self.recovery_context);
3928        self.emit_recovery_recipe_summary(
3929            tx,
3930            recipe.recipe.scenario.label(),
3931            compact_recovery_plan_summary(&recipe),
3932        )
3933        .await;
3934        self.emit_operator_checkpoint(
3935            tx,
3936            OperatorCheckpointState::HistoryCompacted,
3937            format!(
3938                "History compacted into a recursive summary; active task '{}' with {} working-set file(s) carried forward.",
3939                self.session_memory.current_task,
3940                self.session_memory.working_set.len()
3941            ),
3942        )
3943        .await;
3944
3945        Ok(true)
3946    }
3947
3948    /// Query The Vein for context relevant to the user's message.
3949    /// Runs hybrid BM25 + semantic search (semantic requires embedding model in LM Studio).
3950    /// Returns a formatted system message string, or None if nothing useful found.
3951    fn build_vein_context(&self, query: &str) -> Option<(String, Vec<String>)> {
3952        // Skip trivial / very short inputs.
3953        if query.trim().split_whitespace().count() < 3 {
3954            return None;
3955        }
3956
3957        let results = tokio::task::block_in_place(|| self.vein.search_context(query, 4)).ok()?;
3958        if results.is_empty() {
3959            return None;
3960        }
3961
3962        let semantic_active = self.vein.has_any_embeddings();
3963        let header = if semantic_active {
3964            "# Relevant context from The Vein (hybrid BM25 + semantic retrieval)\n\
3965             Use this to answer without needing extra read_file calls where possible.\n\n"
3966        } else {
3967            "# Relevant context from The Vein (BM25 keyword retrieval)\n\
3968             Use this to answer without needing extra read_file calls where possible.\n\n"
3969        };
3970
3971        let mut ctx = String::from(header);
3972        let mut paths: Vec<String> = Vec::new();
3973
3974        let mut total = 0usize;
3975        const MAX_CTX_CHARS: usize = 1_500;
3976
3977        for r in results {
3978            if total >= MAX_CTX_CHARS {
3979                break;
3980            }
3981            let snippet = if r.content.len() > 500 {
3982                format!("{}...", &r.content[..500])
3983            } else {
3984                r.content.clone()
3985            };
3986            ctx.push_str(&format!("--- {} ---\n{}\n\n", r.path, snippet));
3987            total += snippet.len() + r.path.len() + 10;
3988            if !paths.contains(&r.path) {
3989                paths.push(r.path);
3990            }
3991        }
3992
3993        Some((ctx, paths))
3994    }
3995
3996    /// Returns the conversation history (WITHOUT the system prompt) for the context window.
3997    /// This ensures we don't have redundant system blocks and prevents Jinja crashes.
3998    fn context_window_slice(&self) -> Vec<ChatMessage> {
3999        let mut result = Vec::new();
4000
4001        // Skip index 0 (the raw system message) and any stray system messages in history.
4002        if self.history.len() > 1 {
4003            for m in &self.history[1..] {
4004                if m.role == "system" {
4005                    continue;
4006                }
4007
4008                let mut sanitized = m.clone();
4009                // DEEP SANITIZE: LM Studio Jinja templates for Qwen crash on truly empty content.
4010                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
4011                    sanitized.content = MessageContent::Text(" ".into());
4012                }
4013                result.push(sanitized);
4014            }
4015        }
4016
4017        // Jinja Guard: The first message after the system prompt MUST be 'user'.
4018        // If not (e.g. because of compaction), we insert a tiny anchor.
4019        if !result.is_empty() && result[0].role != "user" {
4020            result.insert(0, ChatMessage::user("Continuing previous context..."));
4021        }
4022
4023        result
4024    }
4025
4026    fn context_window_slice_from(&self, start_idx: usize) -> Vec<ChatMessage> {
4027        let mut result = Vec::new();
4028
4029        if self.history.len() > 1 {
4030            let start = start_idx.max(1).min(self.history.len());
4031            for m in &self.history[start..] {
4032                if m.role == "system" {
4033                    continue;
4034                }
4035
4036                let mut sanitized = m.clone();
4037                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
4038                    sanitized.content = MessageContent::Text(" ".into());
4039                }
4040                result.push(sanitized);
4041            }
4042        }
4043
4044        if !result.is_empty() && result[0].role != "user" {
4045            result.insert(0, ChatMessage::user("Continuing current plan execution..."));
4046        }
4047
4048        result
4049    }
4050
4051    /// Drop old turns from the middle of history.
4052    fn trim_history(&mut self, max_messages: usize) {
4053        if self.history.len() <= max_messages {
4054            return;
4055        }
4056        // Always keep [0] (system prompt).
4057        let excess = self.history.len() - max_messages;
4058        self.history.drain(1..=excess);
4059    }
4060
4061    /// P1: Attempt to fix malformed JSON tool arguments by asking the model to re-output them.
4062    async fn repair_tool_args(
4063        &self,
4064        tool_name: &str,
4065        bad_json: &str,
4066        tx: &mpsc::Sender<InferenceEvent>,
4067    ) -> Result<Value, String> {
4068        let _ = tx
4069            .send(InferenceEvent::Thought(format!(
4070                "Attempting to repair malformed JSON for '{}'...",
4071                tool_name
4072            )))
4073            .await;
4074
4075        let prompt = format!(
4076            "The following JSON for tool '{}' is malformed and failed to parse:\n\n```json\n{}\n```\n\nOutput ONLY the corrected JSON string that fixes the syntax error (e.g. missing commas, unescaped quotes). Do NOT include markdown blocks or any other text.",
4077            tool_name, bad_json
4078        );
4079
4080        let messages = vec![
4081            ChatMessage::system("You are a JSON repair tool. Output ONLY pure JSON."),
4082            ChatMessage::user(&prompt),
4083        ];
4084
4085        // Use fast model for speed if available.
4086        let (text, _, _, _) = self
4087            .engine
4088            .call_with_tools(&messages, &[], self.fast_model.as_deref())
4089            .await
4090            .map_err(|e| e.to_string())?;
4091
4092        let cleaned = text
4093            .unwrap_or_default()
4094            .trim()
4095            .trim_start_matches("```json")
4096            .trim_start_matches("```")
4097            .trim_end_matches("```")
4098            .trim()
4099            .to_string();
4100
4101        serde_json::from_str(&cleaned).map_err(|e| format!("Repair failed: {}", e))
4102    }
4103
4104    /// P2: Run a fast validation step after file writes to check for subtle logic errors.
4105    async fn run_critic_check(
4106        &self,
4107        path: &str,
4108        content: &str,
4109        tx: &mpsc::Sender<InferenceEvent>,
4110    ) -> Option<String> {
4111        // Only run for source code files.
4112        let ext = std::path::Path::new(path)
4113            .extension()
4114            .and_then(|e| e.to_str())
4115            .unwrap_or("");
4116        const CRITIC_EXTS: &[&str] = &["rs", "js", "ts", "py", "go", "c", "cpp"];
4117        if !CRITIC_EXTS.contains(&ext) {
4118            return None;
4119        }
4120
4121        let _ = tx
4122            .send(InferenceEvent::Thought(format!(
4123                "CRITIC: Reviewing changes to '{}'...",
4124                path
4125            )))
4126            .await;
4127
4128        let truncated = cap_output(content, 4000);
4129
4130        let prompt = format!(
4131            "You are a Senior Security and Code Quality auditor. Review this file content for '{}' and identify any critical logic errors, security vulnerabilities, or missing error handling. Be extremely concise. If the code looks good, output 'PASS'.\n\n```{}\n{}\n```",
4132            path, ext, truncated
4133        );
4134
4135        let messages = vec![
4136            ChatMessage::system("You are a technical critic. Identify ONLY critical issues. Output 'PASS' if none found."),
4137            ChatMessage::user(&prompt)
4138        ];
4139
4140        let (text, _, _, _) = self
4141            .engine
4142            .call_with_tools(&messages, &[], self.fast_model.as_deref())
4143            .await
4144            .ok()?;
4145
4146        let critique = text?.trim().to_string();
4147        if critique.to_uppercase().contains("PASS") || critique.is_empty() {
4148            None
4149        } else {
4150            Some(critique)
4151        }
4152    }
4153}
4154
4155// ── Tool dispatcher ───────────────────────────────────────────────────────────
4156
4157pub async fn dispatch_tool(name: &str, args: &Value) -> Result<String, String> {
4158    dispatch_builtin_tool(name, args).await
4159}
4160
4161fn normalize_fix_plan_issue_text(text: &str) -> Option<String> {
4162    let trimmed = text.trim();
4163    let stripped = trimmed
4164        .strip_prefix("/think")
4165        .or_else(|| trimmed.strip_prefix("/no_think"))
4166        .map(str::trim)
4167        .unwrap_or(trimmed)
4168        .trim_start_matches('\n')
4169        .trim();
4170    (!stripped.is_empty()).then(|| stripped.to_string())
4171}
4172
4173fn fill_missing_fix_plan_issue(tool_name: &str, args: &mut Value, fallback_issue: Option<&str>) {
4174    if tool_name != "inspect_host" {
4175        return;
4176    }
4177
4178    let Some(topic) = args.get("topic").and_then(|v| v.as_str()) else {
4179        return;
4180    };
4181    if topic != "fix_plan" {
4182        return;
4183    }
4184
4185    let issue_missing = args
4186        .get("issue")
4187        .and_then(|v| v.as_str())
4188        .map(str::trim)
4189        .is_none_or(|value| value.is_empty());
4190    if !issue_missing {
4191        return;
4192    }
4193
4194    let Some(fallback_issue) = fallback_issue.and_then(normalize_fix_plan_issue_text) else {
4195        return;
4196    };
4197
4198    let Value::Object(map) = args else {
4199        return;
4200    };
4201    map.insert(
4202        "issue".to_string(),
4203        Value::String(fallback_issue.to_string()),
4204    );
4205}
4206
4207fn should_rewrite_shell_to_fix_plan(
4208    tool_name: &str,
4209    args: &Value,
4210    latest_user_prompt: Option<&str>,
4211) -> bool {
4212    if tool_name != "shell" {
4213        return false;
4214    }
4215    let Some(prompt) = latest_user_prompt else {
4216        return false;
4217    };
4218    if preferred_host_inspection_topic(prompt) != Some("fix_plan") {
4219        return false;
4220    }
4221    let command = args
4222        .get("command")
4223        .and_then(|value| value.as_str())
4224        .unwrap_or("");
4225    shell_looks_like_structured_host_inspection(command)
4226}
4227
4228fn extract_release_arg(command: &str, flag: &str) -> Option<String> {
4229    let pattern = format!(r#"(?i){}\s+['"]?([^'" \r\n]+)['"]?"#, regex::escape(flag));
4230    let regex = regex::Regex::new(&pattern).ok()?;
4231    let captures = regex.captures(command)?;
4232    captures.get(1).map(|m| m.as_str().to_string())
4233}
4234
4235fn infer_maintainer_workflow_args_from_prompt(prompt: &str) -> Option<Value> {
4236    let workflow = preferred_maintainer_workflow(prompt)?;
4237    let lower = prompt.to_ascii_lowercase();
4238    match workflow {
4239        "clean" => Some(serde_json::json!({
4240            "workflow": "clean",
4241            "deep": lower.contains("deep clean")
4242                || lower.contains("deep cleanup")
4243                || lower.contains("deep"),
4244            "reset": lower.contains("reset"),
4245            "prune_dist": lower.contains("prune dist")
4246                || lower.contains("prune old dist")
4247                || lower.contains("prune old artifacts")
4248                || lower.contains("old dist artifacts")
4249                || lower.contains("old artifacts"),
4250        })),
4251        "package_windows" => Some(serde_json::json!({
4252            "workflow": "package_windows",
4253            "installer": lower.contains("installer") || lower.contains("setup.exe"),
4254            "add_to_path": lower.contains("addtopath")
4255                || lower.contains("add to path")
4256                || lower.contains("update path")
4257                || lower.contains("refresh path"),
4258        })),
4259        "release" => {
4260            let version = regex::Regex::new(r#"(?i)\b(\d+\.\d+\.\d+)\b"#)
4261                .ok()
4262                .and_then(|re| re.captures(prompt))
4263                .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()));
4264            let bump = if lower.contains("patch") {
4265                Some("patch")
4266            } else if lower.contains("minor") {
4267                Some("minor")
4268            } else if lower.contains("major") {
4269                Some("major")
4270            } else {
4271                None
4272            };
4273            let mut args = serde_json::json!({
4274                "workflow": "release",
4275                "push": lower.contains(" push") || lower.starts_with("push ") || lower.contains(" and push"),
4276                "add_to_path": lower.contains("addtopath")
4277                    || lower.contains("add to path")
4278                    || lower.contains("update path"),
4279                "skip_installer": lower.contains("skip installer"),
4280                "publish_crates": lower.contains("publish crates") || lower.contains("crates.io"),
4281                "publish_voice_crate": lower.contains("publish voice crate")
4282                    || lower.contains("publish hematite-kokoros"),
4283            });
4284            if let Some(version) = version {
4285                args["version"] = Value::String(version);
4286            }
4287            if let Some(bump) = bump {
4288                args["bump"] = Value::String(bump.to_string());
4289            }
4290            Some(args)
4291        }
4292        _ => None,
4293    }
4294}
4295
4296fn infer_workspace_workflow_args_from_prompt(prompt: &str) -> Option<Value> {
4297    let workflow = preferred_workspace_workflow(prompt)?;
4298    let lower = prompt.to_ascii_lowercase();
4299    let trimmed = prompt.trim();
4300
4301    if let Some(command) = extract_workspace_command_from_prompt(trimmed) {
4302        return Some(serde_json::json!({
4303            "workflow": "command",
4304            "command": command,
4305        }));
4306    }
4307
4308    if let Some(path) = extract_workspace_script_path_from_prompt(trimmed) {
4309        return Some(serde_json::json!({
4310            "workflow": "script_path",
4311            "path": path,
4312        }));
4313    }
4314
4315    match workflow {
4316        "build" | "test" | "lint" | "fix" => Some(serde_json::json!({
4317            "workflow": workflow,
4318        })),
4319        "script" => {
4320            let package_script = if lower.contains("npm run ") {
4321                extract_word_after(&lower, "npm run ")
4322            } else if lower.contains("pnpm run ") {
4323                extract_word_after(&lower, "pnpm run ")
4324            } else if lower.contains("bun run ") {
4325                extract_word_after(&lower, "bun run ")
4326            } else if lower.contains("yarn ") {
4327                extract_word_after(&lower, "yarn ")
4328            } else {
4329                None
4330            };
4331
4332            if let Some(name) = package_script {
4333                return Some(serde_json::json!({
4334                    "workflow": "package_script",
4335                    "name": name,
4336                }));
4337            }
4338
4339            if let Some(name) = extract_word_after(&lower, "just ") {
4340                return Some(serde_json::json!({
4341                    "workflow": "just",
4342                    "name": name,
4343                }));
4344            }
4345            if let Some(name) = extract_word_after(&lower, "make ") {
4346                return Some(serde_json::json!({
4347                    "workflow": "make",
4348                    "name": name,
4349                }));
4350            }
4351            if let Some(name) = extract_word_after(&lower, "task ") {
4352                return Some(serde_json::json!({
4353                    "workflow": "task",
4354                    "name": name,
4355                }));
4356            }
4357
4358            None
4359        }
4360        _ => None,
4361    }
4362}
4363
4364fn extract_workspace_command_from_prompt(prompt: &str) -> Option<String> {
4365    let lower = prompt.to_ascii_lowercase();
4366    for prefix in [
4367        "cargo ",
4368        "npm ",
4369        "pnpm ",
4370        "yarn ",
4371        "bun ",
4372        "pytest",
4373        "go build",
4374        "go test",
4375        "make ",
4376        "just ",
4377        "task ",
4378        "./gradlew",
4379        ".\\gradlew",
4380    ] {
4381        if let Some(index) = lower.find(prefix) {
4382            return Some(prompt[index..].trim().trim_matches('`').to_string());
4383        }
4384    }
4385    None
4386}
4387
4388fn extract_workspace_script_path_from_prompt(prompt: &str) -> Option<String> {
4389    let normalized = prompt.replace('\\', "/");
4390    for token in normalized.split_whitespace() {
4391        let candidate = token
4392            .trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | '.' | ')' | '('))
4393            .trim_start_matches("./");
4394        if candidate.starts_with("scripts/")
4395            && [".ps1", ".sh", ".py", ".cmd", ".bat", ".js", ".mjs", ".cjs"]
4396                .iter()
4397                .any(|ext| candidate.to_ascii_lowercase().ends_with(ext))
4398        {
4399            return Some(candidate.to_string());
4400        }
4401    }
4402    None
4403}
4404
4405fn extract_word_after(haystack: &str, prefix: &str) -> Option<String> {
4406    let start = haystack.find(prefix)? + prefix.len();
4407    let tail = &haystack[start..];
4408    let word = tail
4409        .split_whitespace()
4410        .next()
4411        .map(str::trim)
4412        .filter(|value| !value.is_empty())?;
4413    Some(
4414        word.trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | '.' | ')' | '('))
4415            .to_string(),
4416    )
4417}
4418
4419fn rewrite_shell_to_maintainer_workflow_args(command: &str) -> Option<Value> {
4420    let lower = command.to_ascii_lowercase();
4421    if lower.contains("clean.ps1") {
4422        return Some(serde_json::json!({
4423            "workflow": "clean",
4424            "deep": lower.contains("-deep"),
4425            "reset": lower.contains("-reset"),
4426            "prune_dist": lower.contains("-prunedist"),
4427        }));
4428    }
4429    if lower.contains("package-windows.ps1") {
4430        return Some(serde_json::json!({
4431            "workflow": "package_windows",
4432            "installer": lower.contains("-installer"),
4433            "add_to_path": lower.contains("-addtopath"),
4434        }));
4435    }
4436    if lower.contains("release.ps1") {
4437        let version = extract_release_arg(command, "-Version");
4438        let bump = extract_release_arg(command, "-Bump");
4439        if version.is_none() && bump.is_none() {
4440            return Some(serde_json::json!({
4441                "workflow": "release"
4442            }));
4443        }
4444        let mut args = serde_json::json!({
4445            "workflow": "release",
4446            "push": lower.contains("-push"),
4447            "add_to_path": lower.contains("-addtopath"),
4448            "skip_installer": lower.contains("-skipinstaller"),
4449            "publish_crates": lower.contains("-publishcrates"),
4450            "publish_voice_crate": lower.contains("-publishvoicecrate"),
4451        });
4452        if let Some(version) = version {
4453            args["version"] = Value::String(version);
4454        }
4455        if let Some(bump) = bump {
4456            args["bump"] = Value::String(bump);
4457        }
4458        return Some(args);
4459    }
4460    None
4461}
4462
4463fn rewrite_shell_to_workspace_workflow_args(command: &str) -> Option<Value> {
4464    let lower = command.to_ascii_lowercase();
4465    if lower.contains("clean.ps1")
4466        || lower.contains("package-windows.ps1")
4467        || lower.contains("release.ps1")
4468    {
4469        return None;
4470    }
4471
4472    if let Some(path) = extract_workspace_script_path_from_prompt(command) {
4473        return Some(serde_json::json!({
4474            "workflow": "script_path",
4475            "path": path,
4476        }));
4477    }
4478
4479    let looks_like_workspace_command = [
4480        "cargo ",
4481        "npm ",
4482        "pnpm ",
4483        "yarn ",
4484        "bun ",
4485        "pytest",
4486        "go build",
4487        "go test",
4488        "make ",
4489        "just ",
4490        "task ",
4491        "./gradlew",
4492        ".\\gradlew",
4493    ]
4494    .iter()
4495    .any(|needle| lower.contains(needle));
4496
4497    if looks_like_workspace_command {
4498        Some(serde_json::json!({
4499            "workflow": "command",
4500            "command": command.trim(),
4501        }))
4502    } else {
4503        None
4504    }
4505}
4506
4507fn rewrite_host_tool_call(
4508    tool_name: &mut String,
4509    args: &mut Value,
4510    latest_user_prompt: Option<&str>,
4511) {
4512    if *tool_name == "shell" {
4513        let command = args
4514            .get("command")
4515            .and_then(|value| value.as_str())
4516            .unwrap_or("");
4517        if let Some(maintainer_workflow_args) = rewrite_shell_to_maintainer_workflow_args(command) {
4518            *tool_name = "run_hematite_maintainer_workflow".to_string();
4519            *args = maintainer_workflow_args;
4520            return;
4521        }
4522        if let Some(workspace_workflow_args) = rewrite_shell_to_workspace_workflow_args(command) {
4523            *tool_name = "run_workspace_workflow".to_string();
4524            *args = workspace_workflow_args;
4525            return;
4526        }
4527    }
4528    if *tool_name != "run_hematite_maintainer_workflow" {
4529        if let Some(prompt_args) =
4530            latest_user_prompt.and_then(infer_maintainer_workflow_args_from_prompt)
4531        {
4532            *tool_name = "run_hematite_maintainer_workflow".to_string();
4533            *args = prompt_args;
4534            return;
4535        }
4536    }
4537    if *tool_name != "run_workspace_workflow" {
4538        if let Some(prompt_args) =
4539            latest_user_prompt.and_then(infer_workspace_workflow_args_from_prompt)
4540        {
4541            *tool_name = "run_workspace_workflow".to_string();
4542            *args = prompt_args;
4543            return;
4544        }
4545    }
4546    if should_rewrite_shell_to_fix_plan(tool_name, args, latest_user_prompt) {
4547        *tool_name = "inspect_host".to_string();
4548        *args = serde_json::json!({
4549            "topic": "fix_plan"
4550        });
4551    }
4552    fill_missing_fix_plan_issue(tool_name, args, latest_user_prompt);
4553}
4554
4555fn canonical_tool_call_key(tool_name: &str, args: &Value) -> String {
4556    format!(
4557        "{}:{}",
4558        tool_name,
4559        serde_json::to_string(args).unwrap_or_default()
4560    )
4561}
4562
4563fn normalized_tool_call_for_execution(
4564    tool_name: &str,
4565    raw_arguments: &str,
4566    gemma4_model: bool,
4567    latest_user_prompt: Option<&str>,
4568) -> (String, Value) {
4569    let normalized_arguments = if gemma4_model {
4570        crate::agent::inference::normalize_tool_argument_string(tool_name, raw_arguments)
4571    } else {
4572        raw_arguments.to_string()
4573    };
4574    let mut normalized_name = tool_name.to_string();
4575    let mut args = serde_json::from_str::<Value>(&normalized_arguments)
4576        .unwrap_or(Value::Object(Default::default()));
4577    rewrite_host_tool_call(&mut normalized_name, &mut args, latest_user_prompt);
4578    (normalized_name, args)
4579}
4580
4581#[cfg(test)]
4582fn normalized_tool_call_key_for_dedupe(
4583    tool_name: &str,
4584    raw_arguments: &str,
4585    gemma4_model: bool,
4586    latest_user_prompt: Option<&str>,
4587) -> String {
4588    let (normalized_name, args) = normalized_tool_call_for_execution(
4589        tool_name,
4590        raw_arguments,
4591        gemma4_model,
4592        latest_user_prompt,
4593    );
4594    canonical_tool_call_key(&normalized_name, &args)
4595}
4596
4597impl ConversationManager {
4598    /// Checks if a tool call is authorized given the current configuration and mode.
4599    fn check_authorization(
4600        &self,
4601        name: &str,
4602        args: &serde_json::Value,
4603        config: &crate::agent::config::HematiteConfig,
4604        yolo_flag: bool,
4605    ) -> crate::agent::permission_enforcer::AuthorizationDecision {
4606        crate::agent::permission_enforcer::authorize_tool_call(name, args, config, yolo_flag)
4607    }
4608
4609    /// Layer 4: Isolated tool execution logic. Does not mutate 'self' to allow parallelism.
4610    async fn process_tool_call(
4611        &self,
4612        mut call: ToolCallFn,
4613        config: crate::agent::config::HematiteConfig,
4614        yolo: bool,
4615        tx: mpsc::Sender<InferenceEvent>,
4616        real_id: String,
4617    ) -> ToolExecutionOutcome {
4618        let mut msg_results = Vec::new();
4619        let gemma4_model =
4620            crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
4621        let normalized_arguments = if gemma4_model {
4622            crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments)
4623        } else {
4624            call.arguments.clone()
4625        };
4626
4627        // 1. Argument Parsing & Repair
4628        let mut args: Value = match serde_json::from_str(&normalized_arguments) {
4629            Ok(v) => v,
4630            Err(_) => {
4631                match self
4632                    .repair_tool_args(&call.name, &normalized_arguments, &tx)
4633                    .await
4634                {
4635                    Ok(v) => v,
4636                    Err(e) => {
4637                        let _ = tx
4638                            .send(InferenceEvent::Thought(format!(
4639                                "JSON Repair failed: {}",
4640                                e
4641                            )))
4642                            .await;
4643                        Value::Object(Default::default())
4644                    }
4645                }
4646            }
4647        };
4648        let last_user_prompt = self
4649            .history
4650            .iter()
4651            .rev()
4652            .find(|message| message.role == "user")
4653            .map(|message| message.content.as_str());
4654        rewrite_host_tool_call(&mut call.name, &mut args, last_user_prompt);
4655
4656        let display = format_tool_display(&call.name, &args);
4657        let precondition_result = self.validate_action_preconditions(&call.name, &args).await;
4658        let auth = self.check_authorization(&call.name, &args, &config, yolo);
4659
4660        // 2. Permission Check
4661        let decision_result = match precondition_result {
4662            Err(e) => Err(e),
4663            Ok(_) => match auth {
4664                crate::agent::permission_enforcer::AuthorizationDecision::Allow { .. } => Ok(()),
4665                crate::agent::permission_enforcer::AuthorizationDecision::Ask {
4666                    reason,
4667                    source: _,
4668                } => {
4669                    let (approve_tx, approve_rx) = tokio::sync::oneshot::channel::<bool>();
4670                    let _ = tx
4671                        .send(InferenceEvent::ApprovalRequired {
4672                            id: real_id.clone(),
4673                            name: call.name.clone(),
4674                            display: format!("{}\nWhy: {}", display, reason),
4675                            diff: None,
4676                            responder: approve_tx,
4677                        })
4678                        .await;
4679
4680                    match approve_rx.await {
4681                        Ok(true) => Ok(()),
4682                        _ => Err("Declined by user".into()),
4683                    }
4684                }
4685                crate::agent::permission_enforcer::AuthorizationDecision::Deny {
4686                    reason, ..
4687                } => Err(reason),
4688            },
4689        };
4690        let blocked_by_policy =
4691            matches!(&decision_result, Err(e) if e.starts_with("Action blocked:"));
4692
4693        // 3. Execution (Local or MCP)
4694        let (output, is_error) = match decision_result {
4695            Err(e) if e.starts_with("[auto-redirected shell→inspect_host") => (e, false),
4696            Err(e) => (format!("Error: {}", e), true),
4697            Ok(_) => {
4698                let _ = tx
4699                    .send(InferenceEvent::ToolCallStart {
4700                        id: real_id.clone(),
4701                        name: call.name.clone(),
4702                        args: display.clone(),
4703                    })
4704                    .await;
4705
4706                let result = if call.name.starts_with("lsp_") {
4707                    let lsp = self.lsp_manager.clone();
4708                    let path = args
4709                        .get("path")
4710                        .and_then(|v| v.as_str())
4711                        .unwrap_or("")
4712                        .to_string();
4713                    let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
4714                    let character =
4715                        args.get("character").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
4716
4717                    match call.name.as_str() {
4718                        "lsp_definitions" => {
4719                            crate::tools::lsp_tools::lsp_definitions(lsp, path, line, character)
4720                                .await
4721                        }
4722                        "lsp_references" => {
4723                            crate::tools::lsp_tools::lsp_references(lsp, path, line, character)
4724                                .await
4725                        }
4726                        "lsp_hover" => {
4727                            crate::tools::lsp_tools::lsp_hover(lsp, path, line, character).await
4728                        }
4729                        "lsp_search_symbol" => {
4730                            let query = args
4731                                .get("query")
4732                                .and_then(|v| v.as_str())
4733                                .unwrap_or_default()
4734                                .to_string();
4735                            crate::tools::lsp_tools::lsp_search_symbol(lsp, query).await
4736                        }
4737                        "lsp_rename_symbol" => {
4738                            let new_name = args
4739                                .get("new_name")
4740                                .and_then(|v| v.as_str())
4741                                .unwrap_or_default()
4742                                .to_string();
4743                            crate::tools::lsp_tools::lsp_rename_symbol(
4744                                lsp, path, line, character, new_name,
4745                            )
4746                            .await
4747                        }
4748                        "lsp_get_diagnostics" => {
4749                            crate::tools::lsp_tools::lsp_get_diagnostics(lsp, path).await
4750                        }
4751                        _ => Err(format!("Unknown LSP tool: {}", call.name)),
4752                    }
4753                } else if call.name == "auto_pin_context" {
4754                    let pts = args.get("paths").and_then(|v| v.as_array());
4755                    let reason = args
4756                        .get("reason")
4757                        .and_then(|v| v.as_str())
4758                        .unwrap_or("uninformed scoping");
4759                    if let Some(arr) = pts {
4760                        let mut pinned = Vec::new();
4761                        {
4762                            let mut guard = self.pinned_files.lock().await;
4763                            const MAX_PINNED_SIZE: u64 = 25 * 1024 * 1024; // 25MB Safety Valve
4764
4765                            for v in arr.iter().take(3) {
4766                                if let Some(p) = v.as_str() {
4767                                    if let Ok(meta) = std::fs::metadata(p) {
4768                                        if meta.len() > MAX_PINNED_SIZE {
4769                                            let _ = tx.send(InferenceEvent::Thought(format!("[GUARD] Skipping {} - size ({} bytes) exceeds VRAM safety limit (25MB).", p, meta.len()))).await;
4770                                            continue;
4771                                        }
4772                                        if let Ok(content) = std::fs::read_to_string(p) {
4773                                            guard.insert(p.to_string(), content);
4774                                            pinned.push(p.to_string());
4775                                        }
4776                                    }
4777                                }
4778                            }
4779                        }
4780                        let msg = format!(
4781                            "Autonomous Scoping: Locked {} in prioritized memory. Reason: {}",
4782                            pinned.join(", "),
4783                            reason
4784                        );
4785                        let _ = tx
4786                            .send(InferenceEvent::Thought(format!("[AUTO-PIN] {}", msg)))
4787                            .await;
4788                        Ok(msg)
4789                    } else {
4790                        Err("Missing 'paths' array for auto_pin_context.".to_string())
4791                    }
4792                } else if call.name == "list_pinned" {
4793                    let paths_msg = {
4794                        let pinned = self.pinned_files.lock().await;
4795                        if pinned.is_empty() {
4796                            "No files are currently pinned.".to_string()
4797                        } else {
4798                            let paths: Vec<_> = pinned.keys().cloned().collect();
4799                            format!(
4800                                "Currently pinned files in active memory:\n- {}",
4801                                paths.join("\n- ")
4802                            )
4803                        }
4804                    };
4805                    Ok(paths_msg)
4806                } else if call.name.starts_with("mcp__") {
4807                    let mut mcp = self.mcp_manager.lock().await;
4808                    match mcp.call_tool(&call.name, &args).await {
4809                        Ok(res) => Ok(res),
4810                        Err(e) => Err(e.to_string()),
4811                    }
4812                } else if call.name == "swarm" {
4813                    // ── Swarm Orchestration ──
4814                    let tasks_val = args.get("tasks").cloned().unwrap_or(Value::Array(vec![]));
4815                    let max_workers = args
4816                        .get("max_workers")
4817                        .and_then(|v| v.as_u64())
4818                        .unwrap_or(3) as usize;
4819
4820                    let mut task_objs = Vec::new();
4821                    if let Value::Array(arr) = tasks_val {
4822                        for v in arr {
4823                            let id = v
4824                                .get("id")
4825                                .and_then(|x| x.as_str())
4826                                .unwrap_or("?")
4827                                .to_string();
4828                            let target = v
4829                                .get("target")
4830                                .and_then(|x| x.as_str())
4831                                .unwrap_or("?")
4832                                .to_string();
4833                            let instruction = v
4834                                .get("instruction")
4835                                .and_then(|x| x.as_str())
4836                                .unwrap_or("?")
4837                                .to_string();
4838                            task_objs.push(crate::agent::parser::WorkerTask {
4839                                id,
4840                                target,
4841                                instruction,
4842                            });
4843                        }
4844                    }
4845
4846                    if task_objs.is_empty() {
4847                        Err("No tasks provided for swarm.".to_string())
4848                    } else {
4849                        let (swarm_tx_internal, mut swarm_rx_internal) =
4850                            tokio::sync::mpsc::channel(32);
4851                        let tx_forwarder = tx.clone();
4852
4853                        // Bridge SwarmMessage -> InferenceEvent
4854                        tokio::spawn(async move {
4855                            while let Some(msg) = swarm_rx_internal.recv().await {
4856                                match msg {
4857                                    crate::agent::swarm::SwarmMessage::Progress(id, p) => {
4858                                        let _ = tx_forwarder
4859                                            .send(InferenceEvent::Thought(format!(
4860                                                "Swarm [{}]: {}% complete",
4861                                                id, p
4862                                            )))
4863                                            .await;
4864                                    }
4865                                    crate::agent::swarm::SwarmMessage::ReviewRequest {
4866                                        worker_id,
4867                                        file_path,
4868                                        before: _,
4869                                        after: _,
4870                                        tx,
4871                                    } => {
4872                                        let (approve_tx, approve_rx) =
4873                                            tokio::sync::oneshot::channel::<bool>();
4874                                        let display = format!(
4875                                            "Swarm worker [{}]: Integrated changes into {:?}",
4876                                            worker_id, file_path
4877                                        );
4878                                        let _ = tx_forwarder
4879                                            .send(InferenceEvent::ApprovalRequired {
4880                                                id: format!("swarm_{}", worker_id),
4881                                                name: "swarm_apply".to_string(),
4882                                                display,
4883                                                diff: None,
4884                                                responder: approve_tx,
4885                                            })
4886                                            .await;
4887                                        if let Ok(approved) = approve_rx.await {
4888                                            let response = if approved {
4889                                                crate::agent::swarm::ReviewResponse::Accept
4890                                            } else {
4891                                                crate::agent::swarm::ReviewResponse::Reject
4892                                            };
4893                                            let _ = tx.send(response);
4894                                        }
4895                                    }
4896                                    crate::agent::swarm::SwarmMessage::Done => {}
4897                                }
4898                            }
4899                        });
4900
4901                        let coordinator = self.swarm_coordinator.clone();
4902                        match coordinator
4903                            .dispatch_swarm(task_objs, swarm_tx_internal, max_workers)
4904                            .await
4905                        {
4906                            Ok(_) => Ok(
4907                                "Swarm execution completed. Check files for integration results."
4908                                    .to_string(),
4909                            ),
4910                            Err(e) => Err(format!("Swarm failure: {}", e)),
4911                        }
4912                    }
4913                } else if call.name == "vision_analyze" {
4914                    crate::tools::vision::vision_analyze(&self.engine, &args).await
4915                } else if matches!(
4916                    call.name.as_str(),
4917                    "edit_file" | "patch_hunk" | "multi_search_replace"
4918                ) && !yolo
4919                {
4920                    // ── Diff preview gate ─────────────────────────────────────
4921                    // Compute what the edit would look like before applying it.
4922                    // If we can build a diff, require user Y/N in the TUI.
4923                    let diff_result = match call.name.as_str() {
4924                        "edit_file" => crate::tools::file_ops::compute_edit_file_diff(&args),
4925                        "patch_hunk" => crate::tools::file_ops::compute_patch_hunk_diff(&args),
4926                        _ => crate::tools::file_ops::compute_msr_diff(&args),
4927                    };
4928                    match diff_result {
4929                        Ok(diff_text) => {
4930                            let path_label =
4931                                args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
4932                            let (appr_tx, appr_rx) = tokio::sync::oneshot::channel::<bool>();
4933                            let _ = tx
4934                                .send(InferenceEvent::ApprovalRequired {
4935                                    id: real_id.clone(),
4936                                    name: call.name.clone(),
4937                                    display: format!("Edit preview: {}", path_label),
4938                                    diff: Some(diff_text),
4939                                    responder: appr_tx,
4940                                })
4941                                .await;
4942                            match appr_rx.await {
4943                                Ok(true) => dispatch_tool(&call.name, &args).await,
4944                                _ => Err("Edit declined by user.".into()),
4945                            }
4946                        }
4947                        // Diff computation failed (e.g. search string not found yet) —
4948                        // fall through and let the tool return its own error.
4949                        Err(_) => dispatch_tool(&call.name, &args).await,
4950                    }
4951                } else if call.name == "verify_build" {
4952                    // Stream build output line-by-line to the SPECULAR panel so
4953                    // the operator sees live compiler progress during long builds.
4954                    crate::tools::verify_build::execute_streaming(&args, tx.clone()).await
4955                } else if call.name == "shell" {
4956                    // Stream shell output line-by-line to the SPECULAR panel so
4957                    // the operator sees live progress during long commands.
4958                    crate::tools::shell::execute_streaming(&args, tx.clone()).await
4959                } else {
4960                    dispatch_tool(&call.name, &args).await
4961                };
4962
4963                match result {
4964                    Ok(o) => (o, false),
4965                    Err(e) => (format!("Error: {}", e), true),
4966                }
4967            }
4968        };
4969
4970        // ── Session Economics ────────────────────────────────────────────────
4971        {
4972            if let Ok(mut econ) = self.engine.economics.lock() {
4973                econ.record_tool(&call.name, !is_error);
4974            }
4975        }
4976
4977        if !is_error {
4978            if matches!(call.name.as_str(), "read_file" | "inspect_lines") {
4979                if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
4980                    if call.name == "inspect_lines" {
4981                        self.record_line_inspection(path).await;
4982                    } else {
4983                        self.record_read_observation(path).await;
4984                    }
4985                }
4986            }
4987
4988            if call.name == "verify_build" {
4989                let ok = output.contains("BUILD OK")
4990                    || output.contains("BUILD SUCCESS")
4991                    || output.contains("BUILD OKAY");
4992                self.record_verify_build_result(ok, &output).await;
4993            }
4994
4995            if matches!(
4996                call.name.as_str(),
4997                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4998            ) || is_mcp_mutating_tool(&call.name)
4999            {
5000                self.record_successful_mutation(action_target_path(&call.name, &args).as_deref())
5001                    .await;
5002            }
5003
5004            if let Some(receipt) = self.build_action_receipt(&call.name, &args, &output, is_error) {
5005                msg_results.push(receipt);
5006            }
5007        }
5008
5009        // 4. Critic Check (Specular Tier 2)
5010        // Gated: Only run on code files with substantive content to avoid burning tokens
5011        // on trivial doc/config edits.
5012        if !is_error && (call.name == "edit_file" || call.name == "write_file") {
5013            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
5014            let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
5015            let ext = std::path::Path::new(path)
5016                .extension()
5017                .and_then(|e| e.to_str())
5018                .unwrap_or("");
5019            const SKIP_EXTS: &[&str] = &[
5020                "md",
5021                "toml",
5022                "json",
5023                "txt",
5024                "yml",
5025                "yaml",
5026                "cfg",
5027                "csv",
5028                "lock",
5029                "gitignore",
5030            ];
5031            let line_count = content.lines().count();
5032            if !path.is_empty()
5033                && !content.is_empty()
5034                && !SKIP_EXTS.contains(&ext)
5035                && line_count >= 50
5036            {
5037                if let Some(critique) = self.run_critic_check(path, content, &tx).await {
5038                    msg_results.push(ChatMessage::system(&format!(
5039                        "[CRITIC REVIEW OF {}]\nIssues found:\n\n{}",
5040                        path, critique
5041                    )));
5042                }
5043            }
5044        }
5045
5046        ToolExecutionOutcome {
5047            call_id: real_id,
5048            tool_name: call.name,
5049            args,
5050            output,
5051            is_error,
5052            blocked_by_policy,
5053            msg_results,
5054        }
5055    }
5056}
5057
5058/// The result of an isolated tool execution.
5059/// Used to bridge Parallel/Serial execution back to the main history.
5060struct ToolExecutionOutcome {
5061    call_id: String,
5062    tool_name: String,
5063    args: Value,
5064    output: String,
5065    is_error: bool,
5066    blocked_by_policy: bool,
5067    msg_results: Vec<ChatMessage>,
5068}
5069
5070#[derive(Clone)]
5071struct CachedToolResult {
5072    tool_name: String,
5073}
5074
5075fn is_code_like_path(path: &str) -> bool {
5076    let ext = std::path::Path::new(path)
5077        .extension()
5078        .and_then(|e| e.to_str())
5079        .unwrap_or("")
5080        .to_ascii_lowercase();
5081    matches!(
5082        ext.as_str(),
5083        "rs" | "js"
5084            | "ts"
5085            | "tsx"
5086            | "jsx"
5087            | "py"
5088            | "go"
5089            | "java"
5090            | "c"
5091            | "cpp"
5092            | "cc"
5093            | "h"
5094            | "hpp"
5095            | "cs"
5096            | "swift"
5097            | "kt"
5098            | "kts"
5099            | "rb"
5100            | "php"
5101    )
5102}
5103
5104// ── Display helpers ───────────────────────────────────────────────────────────
5105
5106pub fn format_tool_display(name: &str, args: &Value) -> String {
5107    let get = |key: &str| {
5108        args.get(key)
5109            .and_then(|v| v.as_str())
5110            .unwrap_or("")
5111            .to_string()
5112    };
5113    match name {
5114        "shell" => format!("$ {}", get("command")),
5115
5116        "trace_runtime_flow" => format!("trace runtime {}", get("topic")),
5117        "describe_toolchain" => format!("describe toolchain {}", get("topic")),
5118        "inspect_host" => format!("inspect host {}", get("topic")),
5119        _ => format!("{} {:?}", name, args),
5120    }
5121}
5122
5123// ── Text utilities ────────────────────────────────────────────────────────────
5124
5125pub(crate) fn shell_looks_like_structured_host_inspection(command: &str) -> bool {
5126    let lower = command.to_ascii_lowercase();
5127    [
5128        "$env:path",
5129        "pathvariable",
5130        "pip --version",
5131        "pipx --version",
5132        "winget --version",
5133        "choco",
5134        "scoop",
5135        "get-childitem",
5136        "gci ",
5137        "where.exe",
5138        "where ",
5139        "cargo --version",
5140        "rustc --version",
5141        "git --version",
5142        "node --version",
5143        "npm --version",
5144        "pnpm --version",
5145        "python --version",
5146        "python3 --version",
5147        "deno --version",
5148        "go version",
5149        "dotnet --version",
5150        "uv --version",
5151        "netstat",
5152        "findstr",
5153        "get-nettcpconnection",
5154        "tcpconnection",
5155        "listening",
5156        "ss -",
5157        "ss ",
5158        "lsof",
5159        "tasklist",
5160        "ipconfig",
5161        "get-netipconfiguration",
5162        "get-netadapter",
5163        "route print",
5164        "ifconfig",
5165        "ip addr",
5166        "ip route",
5167        "resolv.conf",
5168        "get-service",
5169        "sc query",
5170        "systemctl",
5171        "service --status-all",
5172        "get-process",
5173        "working set",
5174        "ps -eo",
5175        "ps aux",
5176        "desktop",
5177        "downloads",
5178        "get-netfirewallprofile",
5179        "win32_powerplan",
5180        "win32_operatingsystem",
5181        "win32_processor",
5182        "wmic",
5183        "loadpercentage",
5184        "totalvisiblememory",
5185        "freephysicalmemory",
5186        "get-wmiobject",
5187        "get-ciminstance",
5188        "get-cpu",
5189        "processorname",
5190        "clockspeed",
5191        "top memory",
5192        "top cpu",
5193        "resource usage",
5194        "powercfg",
5195        "uptime",
5196        "lastbootuptime",
5197        // registry reads for OS/version/update/security info — always use inspect_host
5198        "hklm:",
5199        "hkcu:",
5200        "hklm:\\",
5201        "hkcu:\\",
5202        "currentversion",
5203        "productname",
5204        "displayversion",
5205        "get-itemproperty",
5206        "get-itempropertyvalue",
5207        // updates
5208        "get-windowsupdatelog",
5209        "windowsupdatelog",
5210        "microsoft.update.session",
5211        "createupdatesearcher",
5212        "wuauserv",
5213        "usoclient",
5214        "get-hotfix",
5215        "wu_",
5216        // security / defender
5217        "get-mpcomputerstatus",
5218        "get-mppreference",
5219        "get-mpthreat",
5220        "start-mpscan",
5221        "win32_computersecurity",
5222        "softwarelicensingproduct",
5223        "enablelua",
5224        "get-netfirewallrule",
5225        "netfirewallprofile",
5226        "antivirus",
5227        "defenderstatus",
5228        // disk health / smart
5229        "get-physicaldisk",
5230        "get-disk",
5231        "get-volume",
5232        "get-psdrive",
5233        "psdrive",
5234        "manage-bde",
5235        "bitlockervolume",
5236        "get-bitlockervolume",
5237        "get-smbencryptionstatus",
5238        "smbencryption",
5239        "get-netlanmanagerconnection",
5240        "lanmanager",
5241        "msstoragedriver_failurepredic",
5242        "win32_diskdrive",
5243        "smartstatus",
5244        "diskstatus",
5245        "get-counter",
5246        "intensity",
5247        "benchmark",
5248        "thrash",
5249        "get-item",
5250        "test-path",
5251        // gpo / certs / integrity / domain
5252        "gpresult",
5253        "applied gpo",
5254        "cert:\\",
5255        "cert:",
5256        "component based servicing",
5257        "componentstore",
5258        "get-computerinfo",
5259        "win32_computersystem",
5260        // battery
5261        "win32_battery",
5262        "batterystaticdata",
5263        "batteryfullchargedcapacity",
5264        "batterystatus",
5265        "estimatedchargeremaining",
5266        // crashes / event log (broader)
5267        "get-winevent",
5268        "eventid",
5269        "bugcheck",
5270        "kernelpower",
5271        "win32_ntlogevent",
5272        "filterhashtable",
5273        // scheduled tasks
5274        "get-scheduledtask",
5275        "get-scheduledtaskinfo",
5276        "schtasks",
5277        "taskscheduler",
5278        "get-acl",
5279        "icacls",
5280        "takeown",
5281        "event id 4624",
5282        "eventid 4624",
5283        "who logged in",
5284        "logon history",
5285        "login history",
5286        "get-smbshare",
5287        "net share",
5288        "mbps",
5289        "throughput",
5290        "whoami",
5291        // general cim/wmi diagnostic queries — always use inspect_host
5292        "get-ciminstance win32",
5293        "get-wmiobject win32",
5294        // network admin — always use inspect_host
5295        "arp -",
5296        "arp -a",
5297        "tracert ",
5298        "traceroute ",
5299        "tracepath ",
5300        "get-dnsclientcache",
5301        "ipconfig /displaydns",
5302        "get-netroute",
5303        "route print",
5304        "ip neigh",
5305        // active directory - always use inspect_host
5306        "get-aduser",
5307        "get-addomain",
5308        "get-adforest",
5309        "get-adgroup",
5310        "get-adcomputer",
5311        "activedirectory",
5312        "get-localuser",
5313        "get-localgroup",
5314        "get-localgroupmember",
5315        "net user",
5316        "net localgroup",
5317        "netsh winhttp show proxy",
5318        "get-itemproperty.*proxy",
5319        "get-netadapter",
5320        "netsh wlan show",
5321        "test-netconnection",
5322        "resolve-dnsname",
5323        "get-netfirewallrule",
5324        // docker / wsl / ssh — always use inspect_host
5325        "docker ps",
5326        "docker info",
5327        "docker images",
5328        "docker container",
5329        "docker compose ls",
5330        "wsl --list",
5331        "wsl -l",
5332        "wsl --status",
5333        "wsl --version",
5334        "ssh -v",
5335        "get-service sshd",
5336        "get-service -name sshd",
5337        "cat ~/.ssh",
5338        "ls ~/.ssh",
5339        "ls -la ~/.ssh",
5340        // env / hosts / git config
5341        "get-childitem env:",
5342        "dir env:",
5343        "printenv",
5344        "[environment]::getenvironmentvariable",
5345        "get-content.*hosts",
5346        "cat /etc/hosts",
5347        "type c:\\windows\\system32\\drivers\\etc\\hosts",
5348        "git config --global --list",
5349        "git config --list",
5350        "git config --global",
5351        // database services
5352        "get-service mysql",
5353        "get-service postgresql",
5354        "get-service mongodb",
5355        "get-service redis",
5356        "get-service mssql",
5357        "get-service mariadb",
5358        "systemctl status postgresql",
5359        "systemctl status mysql",
5360        "systemctl status mongod",
5361        "systemctl status redis",
5362        // installed software
5363        "winget list",
5364        "get-package",
5365        "get-itempropert.*uninstall",
5366        "dpkg --get-selections",
5367        "rpm -qa",
5368        "brew list",
5369        // user accounts
5370        "get-localuser",
5371        "get-localgroupmember",
5372        "net user",
5373        "query user",
5374        "net localgroup administrators",
5375        // audit policy
5376        "auditpol /get",
5377        "auditpol",
5378        // shares
5379        "get-smbshare",
5380        "get-smbserverconfiguration",
5381        "net share",
5382        "net use",
5383        // dns servers
5384        "get-dnsclientserveraddress",
5385        "get-dnsclientdohserveraddress",
5386        "get-dnsclientglobalsetting",
5387    ]
5388    .iter()
5389    .any(|needle| lower.contains(needle))
5390}
5391
5392// Moved strip_think_blocks to inference.rs
5393
5394fn cap_output(text: &str, max_bytes: usize) -> String {
5395    cap_output_for_tool(text, max_bytes, "output")
5396}
5397
5398/// Cap tool output at `max_bytes`. When the output exceeds the cap, write the
5399/// full content to `.hematite/scratch/<tool_name>_<timestamp>.txt` and include
5400/// the path in the truncation notice so the model can read the rest with
5401/// `read_file` instead of losing it entirely.
5402fn cap_output_for_tool(text: &str, max_bytes: usize, tool_name: &str) -> String {
5403    if text.len() <= max_bytes {
5404        return text.to_string();
5405    }
5406
5407    // Write full output to scratch so the model can access it.
5408    let scratch_path = write_output_to_scratch(text, tool_name);
5409
5410    let mut split_at = max_bytes;
5411    while !text.is_char_boundary(split_at) && split_at > 0 {
5412        split_at -= 1;
5413    }
5414
5415    let tail = match &scratch_path {
5416        Some(p) => format!(
5417            "\n... [output truncated — full output ({} bytes, {} lines) saved to '{}' — use read_file to access the rest]",
5418            text.len(),
5419            text.lines().count(),
5420            p
5421        ),
5422        None => format!("\n... [output capped at {}B]", max_bytes),
5423    };
5424
5425    format!("{}{}", &text[..split_at], tail)
5426}
5427
5428/// Write text to `.hematite/scratch/<tool>_<timestamp>.txt`.
5429/// Returns the relative path on success, None if the write fails.
5430fn write_output_to_scratch(text: &str, tool_name: &str) -> Option<String> {
5431    let root = crate::tools::file_ops::workspace_root();
5432    let scratch_dir = root.join(".hematite").join("scratch");
5433    if std::fs::create_dir_all(&scratch_dir).is_err() {
5434        return None;
5435    }
5436    let ts = std::time::SystemTime::now()
5437        .duration_since(std::time::UNIX_EPOCH)
5438        .map(|d| d.as_secs())
5439        .unwrap_or(0);
5440    // Sanitize tool name for use in filename
5441    let safe_name: String = tool_name
5442        .chars()
5443        .map(|c| {
5444            if c.is_alphanumeric() || c == '_' {
5445                c
5446            } else {
5447                '_'
5448            }
5449        })
5450        .collect();
5451    let filename = format!("{}_{}.txt", safe_name, ts);
5452    let abs_path = scratch_dir.join(&filename);
5453    if std::fs::write(&abs_path, text).is_err() {
5454        return None;
5455    }
5456    Some(format!(".hematite/scratch/{}", filename))
5457}
5458
5459#[derive(Default)]
5460struct PromptBudgetStats {
5461    summarized_tool_results: usize,
5462    collapsed_tool_results: usize,
5463    trimmed_chat_messages: usize,
5464    dropped_messages: usize,
5465}
5466
5467fn estimate_prompt_tokens(messages: &[ChatMessage]) -> usize {
5468    crate::agent::inference::estimate_message_batch_tokens(messages)
5469}
5470
5471fn summarize_prompt_blob(text: &str, max_chars: usize) -> String {
5472    let budget = compaction::SummaryCompressionBudget {
5473        max_chars,
5474        max_lines: 3,
5475        max_line_chars: max_chars.clamp(80, 240),
5476    };
5477    let compressed = compaction::compress_summary(text, budget).summary;
5478    if compressed.is_empty() {
5479        String::new()
5480    } else {
5481        compressed
5482    }
5483}
5484
5485fn summarize_tool_message_for_budget(message: &ChatMessage) -> String {
5486    let tool_name = message.name.as_deref().unwrap_or("tool");
5487    let body = summarize_prompt_blob(message.content.as_str(), 320);
5488    format!(
5489        "[Prompt-budget summary of prior `{}` result]\n{}",
5490        tool_name, body
5491    )
5492}
5493
5494fn summarize_chat_message_for_budget(message: &ChatMessage) -> String {
5495    let role = message.role.as_str();
5496    let body = summarize_prompt_blob(message.content.as_str(), 240);
5497    format!(
5498        "[Prompt-budget summary of earlier {} message]\n{}",
5499        role, body
5500    )
5501}
5502
5503fn normalize_prompt_start(messages: &mut Vec<ChatMessage>) {
5504    if messages.len() > 1 && messages[1].role != "user" {
5505        messages.insert(1, ChatMessage::user("Continuing previous context..."));
5506    }
5507}
5508
5509fn enforce_prompt_budget(
5510    prompt_msgs: &mut Vec<ChatMessage>,
5511    context_length: usize,
5512) -> Option<String> {
5513    let target_tokens = ((context_length as f64) * 0.68) as usize;
5514    if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5515        return None;
5516    }
5517
5518    let mut stats = PromptBudgetStats::default();
5519
5520    // 1. Summarize the newest large tool outputs first.
5521    let mut tool_indices: Vec<usize> = prompt_msgs
5522        .iter()
5523        .enumerate()
5524        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
5525        .collect();
5526    for idx in tool_indices.iter().rev().copied() {
5527        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5528            break;
5529        }
5530        let original = prompt_msgs[idx].content.as_str().to_string();
5531        if original.len() > 1200 {
5532            prompt_msgs[idx].content =
5533                MessageContent::Text(summarize_tool_message_for_budget(&prompt_msgs[idx]));
5534            stats.summarized_tool_results += 1;
5535        }
5536    }
5537
5538    // 2. Collapse older tool results aggressively, keeping only the most recent two verbatim/summarized.
5539    tool_indices = prompt_msgs
5540        .iter()
5541        .enumerate()
5542        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
5543        .collect();
5544    if tool_indices.len() > 2 {
5545        for idx in tool_indices
5546            .iter()
5547            .take(tool_indices.len().saturating_sub(2))
5548            .copied()
5549        {
5550            if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5551                break;
5552            }
5553            prompt_msgs[idx].content = MessageContent::Text(
5554                "[Earlier tool output omitted to stay within the prompt budget.]".to_string(),
5555            );
5556            stats.collapsed_tool_results += 1;
5557        }
5558    }
5559
5560    // 3. Trim older long chat messages, but preserve the final user request.
5561    let last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
5562    for idx in 1..prompt_msgs.len() {
5563        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5564            break;
5565        }
5566        if Some(idx) == last_user_idx {
5567            continue;
5568        }
5569        let role = prompt_msgs[idx].role.as_str();
5570        if matches!(role, "user" | "assistant") && prompt_msgs[idx].content.as_str().len() > 900 {
5571            prompt_msgs[idx].content =
5572                MessageContent::Text(summarize_chat_message_for_budget(&prompt_msgs[idx]));
5573            stats.trimmed_chat_messages += 1;
5574        }
5575    }
5576
5577    // 4. Drop the oldest non-system context until we fit, preserving the latest user request.
5578    let preserve_last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
5579    let mut idx = 1usize;
5580    while estimate_prompt_tokens(prompt_msgs) > target_tokens && prompt_msgs.len() > 2 {
5581        if Some(idx) == preserve_last_user_idx {
5582            idx += 1;
5583            if idx >= prompt_msgs.len() {
5584                break;
5585            }
5586            continue;
5587        }
5588        if idx >= prompt_msgs.len() {
5589            break;
5590        }
5591        prompt_msgs.remove(idx);
5592        stats.dropped_messages += 1;
5593    }
5594
5595    normalize_prompt_start(prompt_msgs);
5596
5597    let new_tokens = estimate_prompt_tokens(prompt_msgs);
5598    if stats.summarized_tool_results == 0
5599        && stats.collapsed_tool_results == 0
5600        && stats.trimmed_chat_messages == 0
5601        && stats.dropped_messages == 0
5602    {
5603        return None;
5604    }
5605
5606    Some(format!(
5607        "Prompt Budget Guard: trimmed prompt to about {} tokens (target {}). Summarized {} large tool result(s), collapsed {} older tool result(s), trimmed {} chat message(s), and dropped {} old message(s).",
5608        new_tokens,
5609        target_tokens,
5610        stats.summarized_tool_results,
5611        stats.collapsed_tool_results,
5612        stats.trimmed_chat_messages,
5613        stats.dropped_messages
5614    ))
5615}
5616
5617/// Split text into chunks of roughly `words_per_chunk` whitespace-separated tokens.
5618/// Returns true for short, direct tool-use requests that don't benefit from deep reasoning.
5619/// Used to skip the auto-/think prepend so the model calls the tool immediately
5620/// instead of spending thousands of tokens deliberating over a trivial task.
5621fn is_quick_tool_request(input: &str) -> bool {
5622    let lower = input.to_lowercase();
5623    // Explicit run_code requests — sandbox calls need no reasoning warmup.
5624    if lower.contains("run_code") || lower.contains("run code") {
5625        return true;
5626    }
5627    // Short compute/test requests — "calculate X", "test this", "execute Y"
5628    let is_short = input.len() < 120;
5629    let compute_keywords = [
5630        "calculate",
5631        "compute",
5632        "execute",
5633        "run this",
5634        "test this",
5635        "what is ",
5636        "how much",
5637        "how many",
5638        "convert ",
5639        "print ",
5640    ];
5641    if is_short && compute_keywords.iter().any(|k| lower.contains(k)) {
5642        return true;
5643    }
5644    false
5645}
5646
5647fn chunk_text(text: &str, words_per_chunk: usize) -> Vec<String> {
5648    let mut chunks = Vec::new();
5649    let mut current = String::new();
5650    let mut count = 0;
5651
5652    for ch in text.chars() {
5653        current.push(ch);
5654        if ch == ' ' || ch == '\n' {
5655            count += 1;
5656            if count >= words_per_chunk {
5657                chunks.push(current.clone());
5658                current.clear();
5659                count = 0;
5660            }
5661        }
5662    }
5663    if !current.is_empty() {
5664        chunks.push(current);
5665    }
5666    chunks
5667}
5668
5669fn repeated_read_target(call: &crate::agent::inference::ToolCallFn) -> Option<String> {
5670    if call.name != "read_file" {
5671        return None;
5672    }
5673    let normalized_arguments =
5674        crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments);
5675    let args: Value = serde_json::from_str(&normalized_arguments).ok()?;
5676    let path = args.get("path").and_then(|v| v.as_str())?;
5677    Some(normalize_workspace_path(path))
5678}
5679
5680fn order_batch_reads_first(
5681    calls: Vec<crate::agent::inference::ToolCallResponse>,
5682) -> (
5683    Vec<crate::agent::inference::ToolCallResponse>,
5684    Option<String>,
5685) {
5686    let has_reads = calls.iter().any(|c| {
5687        matches!(
5688            c.function.name.as_str(),
5689            "read_file" | "inspect_lines" | "grep_files" | "list_files"
5690        )
5691    });
5692    let has_edits = calls.iter().any(|c| {
5693        matches!(
5694            c.function.name.as_str(),
5695            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5696        )
5697    });
5698    if has_reads && has_edits {
5699        let reads: Vec<_> = calls
5700            .into_iter()
5701            .filter(|c| {
5702                !matches!(
5703                    c.function.name.as_str(),
5704                    "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5705                )
5706            })
5707            .collect();
5708        let note = Some("Batch ordering: deferring edits until reads complete.".to_string());
5709        (reads, note)
5710    } else {
5711        (calls, None)
5712    }
5713}
5714
5715fn grep_output_is_high_fanout(output: &str) -> bool {
5716    let Some(summary) = output.lines().next() else {
5717        return false;
5718    };
5719    let hunk_count = summary
5720        .split(", ")
5721        .find_map(|part| {
5722            part.strip_suffix(" hunk(s)")
5723                .and_then(|value| value.parse::<usize>().ok())
5724        })
5725        .unwrap_or(0);
5726    let match_count = summary
5727        .split(' ')
5728        .next()
5729        .and_then(|value| value.parse::<usize>().ok())
5730        .unwrap_or(0);
5731    hunk_count >= 8 || match_count >= 12
5732}
5733
5734fn build_system_with_corrections(
5735    base: &str,
5736    hints: &[String],
5737    gpu: &Arc<GpuState>,
5738    git: &Arc<crate::agent::git_monitor::GitState>,
5739    config: &crate::agent::config::HematiteConfig,
5740) -> String {
5741    let mut system_msg = base.to_string();
5742
5743    // Inject Permission Mode.
5744    system_msg.push_str("\n\n# Permission Mode\n");
5745    let mode_label = match config.mode {
5746        crate::agent::config::PermissionMode::ReadOnly => "READ-ONLY",
5747        crate::agent::config::PermissionMode::Developer => "DEVELOPER",
5748        crate::agent::config::PermissionMode::SystemAdmin => "SYSTEM-ADMIN (UNRESTRICTED)",
5749    };
5750    system_msg.push_str(&format!("CURRENT MODE: {}\n", mode_label));
5751
5752    if config.mode == crate::agent::config::PermissionMode::ReadOnly {
5753        system_msg.push_str("PERMISSION: You are restricted to READ-ONLY access. Do NOT attempt to use write_file, edit_file, or shell for any modification. Focus entirely on analysis, indexing, and reporting.\n");
5754    } else {
5755        system_msg.push_str("PERMISSION: You have authority to modify code and execute tests with user oversight.\n");
5756    }
5757
5758    // Inject live hardware status.
5759    let (used, total) = gpu.read();
5760    if total > 0 {
5761        system_msg.push_str("\n\n# Terminal Hardware Context\n");
5762        system_msg.push_str(&format!(
5763            "HOST GPU: {} | VRAM: {:.1}GB / {:.1}GB ({:.0}% used)\n",
5764            gpu.gpu_name(),
5765            used as f64 / 1024.0,
5766            total as f64 / 1024.0,
5767            gpu.ratio() * 100.0
5768        ));
5769        system_msg.push_str("Use this awareness to manage your context window responsibly.\n");
5770    }
5771
5772    // Inject Git Repository context.
5773    system_msg.push_str("\n\n# Git Repository Context\n");
5774    let git_status_label = git.label();
5775    let git_url = git.url();
5776    system_msg.push_str(&format!(
5777        "REMOTE STATUS: {} | URL: {}\n",
5778        git_status_label, git_url
5779    ));
5780
5781    // Live Snapshots (Status/Diff)
5782    let root = crate::tools::file_ops::workspace_root();
5783    if let Some(status_snapshot) = crate::agent::git_context::read_git_status(&root) {
5784        system_msg.push_str("\nGit status snapshot:\n");
5785        system_msg.push_str(&status_snapshot);
5786        system_msg.push_str("\n");
5787    }
5788
5789    if let Some(diff_snapshot) = crate::agent::git_context::read_git_diff(&root, 2000) {
5790        system_msg.push_str("\nGit diff snapshot:\n");
5791        system_msg.push_str(&diff_snapshot);
5792        system_msg.push_str("\n");
5793    }
5794
5795    if git_status_label == "NONE" {
5796        system_msg.push_str("\nONBOARDING: You noticed no remote is configured. Offer to help the user set up a remote (e.g. GitHub) if they haven't already.\n");
5797    } else if git_status_label == "BEHIND" {
5798        system_msg.push_str("\nSYNC: Local is behind remote. Suggest a pull if appropriate.\n");
5799    }
5800
5801    // NOTE: Instruction files (CLAUDE.md, HEMATITE.md, etc.) are already injected
5802    // by InferenceEngine::build_system_prompt() via load_instruction_files().
5803    // Injecting them again here would double the token cost (~4K wasted per turn).
5804
5805    if hints.is_empty() {
5806        return system_msg;
5807    }
5808    system_msg.push_str("\n\n# Formatting Corrections\n");
5809    system_msg.push_str("You previously failed formatting checks on these files. Ensure your whitespace/indentation perfectly matches the original file exactly on your next attempt:\n");
5810    for hint in hints {
5811        system_msg.push_str(&format!("- {}\n", hint));
5812    }
5813    system_msg
5814}
5815
5816fn route_model<'a>(
5817    user_input: &str,
5818    fast_model: Option<&'a str>,
5819    think_model: Option<&'a str>,
5820) -> Option<&'a str> {
5821    let text = user_input.to_lowercase();
5822    let is_think = text.contains("refactor")
5823        || text.contains("rewrite")
5824        || text.contains("implement")
5825        || text.contains("create")
5826        || text.contains("fix")
5827        || text.contains("debug");
5828    let is_fast = text.contains("what")
5829        || text.contains("show")
5830        || text.contains("find")
5831        || text.contains("list")
5832        || text.contains("status");
5833
5834    if is_think && think_model.is_some() {
5835        return think_model;
5836    } else if is_fast && fast_model.is_some() {
5837        return fast_model;
5838    }
5839    None
5840}
5841
5842fn is_parallel_safe(name: &str) -> bool {
5843    let metadata = crate::agent::inference::tool_metadata_for_name(name);
5844    !metadata.mutates_workspace && !metadata.external_surface
5845}
5846
5847fn should_use_vein_in_chat(query: &str, docs_only_mode: bool) -> bool {
5848    if docs_only_mode {
5849        return true;
5850    }
5851
5852    let lower = query.to_ascii_lowercase();
5853    [
5854        "what did we decide",
5855        "why did we decide",
5856        "what did we say",
5857        "what did we do",
5858        "earlier today",
5859        "yesterday",
5860        "last week",
5861        "last month",
5862        "earlier",
5863        "remember",
5864        "session",
5865        "import",
5866    ]
5867    .iter()
5868    .any(|needle| lower.contains(needle))
5869        || lower
5870            .split(|ch: char| !(ch.is_ascii_digit() || ch == '-'))
5871            .any(|token| token.len() == 10 && token.chars().nth(4) == Some('-'))
5872}
5873
5874#[cfg(test)]
5875mod tests {
5876    use super::*;
5877
5878    #[test]
5879    fn classifies_lm_studio_context_budget_mismatch_as_context_window() {
5880        let detail = r#"LM Studio error 400 Bad Request: {"error":"The number of tokens to keep from the initial prompt is greater than the context length (n_keep: 28768>= n_ctx: 4096). Try to load the model with a larger context length, or provide a shorter input."}"#;
5881        let class = classify_runtime_failure(detail);
5882        assert_eq!(class, RuntimeFailureClass::ContextWindow);
5883        assert_eq!(class.tag(), "context_window");
5884        assert!(format_runtime_failure(class, detail).contains("[failure:context_window]"));
5885    }
5886
5887    #[test]
5888    fn runtime_failure_maps_to_provider_and_checkpoint_state() {
5889        assert_eq!(
5890            provider_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
5891            Some(ProviderRuntimeState::ContextWindow)
5892        );
5893        assert_eq!(
5894            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
5895            Some(OperatorCheckpointState::BlockedContextWindow)
5896        );
5897        assert_eq!(
5898            provider_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
5899            Some(ProviderRuntimeState::Degraded)
5900        );
5901        assert_eq!(
5902            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
5903            None
5904        );
5905    }
5906
5907    #[test]
5908    fn intent_router_treats_tool_registry_ownership_as_product_truth() {
5909        let intent = classify_query_intent(
5910            WorkflowMode::ReadOnly,
5911            "Read-only mode. Explain which file now owns Hematite's built-in tool catalog and builtin-tool dispatch path.",
5912        );
5913        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5914        assert_eq!(
5915            intent.direct_answer,
5916            Some(DirectAnswerKind::ToolRegistryOwnership)
5917        );
5918    }
5919
5920    #[test]
5921    fn intent_router_treats_tool_classes_as_product_truth() {
5922        let intent = classify_query_intent(
5923            WorkflowMode::ReadOnly,
5924            "Read-only mode. Explain why Hematite treats repo reads, repo writes, verification tools, git tools, and external MCP tools as different runtime tool classes instead of one flat tool list.",
5925        );
5926        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5927        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::ToolClasses));
5928    }
5929
5930    #[test]
5931    fn tool_registry_ownership_answer_mentions_new_owner_file() {
5932        let answer = build_tool_registry_ownership_answer();
5933        assert!(answer.contains("src/agent/tool_registry.rs"));
5934        assert!(answer.contains("builtin dispatch path"));
5935        assert!(answer.contains("src/agent/conversation.rs"));
5936    }
5937
5938    #[test]
5939    fn intent_router_treats_mcp_lifecycle_as_product_truth() {
5940        let intent = classify_query_intent(
5941            WorkflowMode::ReadOnly,
5942            "Read-only mode. Explain how Hematite should treat MCP server health as runtime state.",
5943        );
5944        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5945        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::McpLifecycle));
5946    }
5947
5948    #[test]
5949    fn intent_router_short_circuits_unsafe_commit_pressure() {
5950        let intent = classify_query_intent(
5951            WorkflowMode::Auto,
5952            "Make a code change, skip verification, and commit it immediately.",
5953        );
5954        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5955        assert_eq!(
5956            intent.direct_answer,
5957            Some(DirectAnswerKind::UnsafeWorkflowPressure)
5958        );
5959    }
5960
5961    #[test]
5962    fn unsafe_workflow_pressure_answer_requires_verification() {
5963        let answer = build_unsafe_workflow_pressure_answer();
5964        assert!(answer.contains("should not skip verification"));
5965        assert!(answer.contains("run the appropriate verification path"));
5966        assert!(answer.contains("only then commit"));
5967    }
5968
5969    #[test]
5970    fn intent_router_prefers_architecture_walkthrough_over_narrow_mcp_answer() {
5971        let intent = classify_query_intent(
5972            WorkflowMode::ReadOnly,
5973            "I want to understand how Hematite is wired without any guessing. Walk me through how a normal message moves from the TUI to the model and back, which files own the major runtime pieces, and where session recovery, tool policy, and MCP state live. Keep it grounded to this repo and only inspect code where you actually need evidence.",
5974        );
5975        assert_eq!(intent.primary_class, QueryIntentClass::RepoArchitecture);
5976        assert!(intent.architecture_overview_mode);
5977        assert_eq!(intent.direct_answer, None);
5978    }
5979
5980    #[test]
5981    fn intent_router_marks_host_inspection_questions() {
5982        let intent = classify_query_intent(
5983            WorkflowMode::Auto,
5984            "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development.",
5985        );
5986        assert!(intent.host_inspection_mode);
5987        assert_eq!(
5988            preferred_host_inspection_topic(
5989                "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development."
5990            ),
5991            Some("summary")
5992        );
5993    }
5994
5995    #[test]
5996    fn chat_mode_uses_vein_for_historical_or_docs_only_queries() {
5997        assert!(should_use_vein_in_chat(
5998            "What did we decide on 2026-04-09 about docs-only mode?",
5999            false
6000        ));
6001        assert!(should_use_vein_in_chat("Summarize these local notes", true));
6002        assert!(!should_use_vein_in_chat("Tell me a joke", false));
6003    }
6004
6005    #[test]
6006    fn shell_host_inspection_guard_matches_path_and_version_commands() {
6007        assert!(shell_looks_like_structured_host_inspection(
6008            "$env:PATH -split ';'"
6009        ));
6010        assert!(shell_looks_like_structured_host_inspection(
6011            "cargo --version"
6012        ));
6013        assert!(shell_looks_like_structured_host_inspection(
6014            "Get-NetTCPConnection -LocalPort 3000"
6015        ));
6016        assert!(shell_looks_like_structured_host_inspection(
6017            "netstat -ano | findstr :3000"
6018        ));
6019        assert!(shell_looks_like_structured_host_inspection(
6020            "Get-Process | Sort-Object WS -Descending"
6021        ));
6022        assert!(shell_looks_like_structured_host_inspection("ipconfig /all"));
6023        assert!(shell_looks_like_structured_host_inspection("Get-Service"));
6024        assert!(shell_looks_like_structured_host_inspection(
6025            "winget --version"
6026        ));
6027    }
6028
6029    #[test]
6030    fn intent_router_picks_ports_for_listening_port_questions() {
6031        assert_eq!(
6032            preferred_host_inspection_topic(
6033                "Show me what is listening on port 3000 and whether anything unexpected is exposed."
6034            ),
6035            Some("ports")
6036        );
6037    }
6038
6039    #[test]
6040    fn intent_router_picks_processes_for_host_process_questions() {
6041        assert_eq!(
6042            preferred_host_inspection_topic(
6043                "Show me what processes are using the most RAM right now."
6044            ),
6045            Some("processes")
6046        );
6047    }
6048
6049    #[test]
6050    fn intent_router_picks_network_for_adapter_questions() {
6051        assert_eq!(
6052            preferred_host_inspection_topic(
6053                "Show me my active network adapters, IP addresses, gateways, and DNS servers."
6054            ),
6055            Some("network")
6056        );
6057    }
6058
6059    #[test]
6060    fn intent_router_picks_services_for_service_questions() {
6061        assert_eq!(
6062            preferred_host_inspection_topic(
6063                "Show me the running services and startup types that matter for a normal dev machine."
6064            ),
6065            Some("services")
6066        );
6067    }
6068
6069    #[test]
6070    fn intent_router_picks_env_doctor_for_package_manager_questions() {
6071        assert_eq!(
6072            preferred_host_inspection_topic(
6073                "Run an environment doctor on this machine and tell me whether my PATH and package managers look sane."
6074            ),
6075            Some("env_doctor")
6076        );
6077    }
6078
6079    #[test]
6080    fn intent_router_picks_fix_plan_for_host_remediation_questions() {
6081        assert_eq!(
6082            preferred_host_inspection_topic("How do I fix cargo not found on this machine?"),
6083            Some("fix_plan")
6084        );
6085        assert_eq!(
6086            preferred_host_inspection_topic(
6087                "How do I fix Hematite when LM Studio is not reachable on localhost:1234?"
6088            ),
6089            Some("fix_plan")
6090        );
6091    }
6092
6093    #[test]
6094    fn fill_missing_fix_plan_issue_backfills_last_user_prompt() {
6095        let mut args = serde_json::json!({
6096            "topic": "fix_plan"
6097        });
6098
6099        fill_missing_fix_plan_issue(
6100            "inspect_host",
6101            &mut args,
6102            Some("/think\nHow do I fix cargo not found on this machine?"),
6103        );
6104
6105        assert_eq!(
6106            args.get("issue").and_then(|value| value.as_str()),
6107            Some("How do I fix cargo not found on this machine?")
6108        );
6109    }
6110
6111    #[test]
6112    fn shell_fix_question_rewrites_to_fix_plan() {
6113        let args = serde_json::json!({
6114            "command": "where cargo"
6115        });
6116
6117        assert!(should_rewrite_shell_to_fix_plan(
6118            "shell",
6119            &args,
6120            Some("How do I fix cargo not found on this machine?")
6121        ));
6122    }
6123
6124    #[test]
6125    fn fix_plan_dedupe_key_matches_rewritten_shell_probe() {
6126        let latest_user_prompt = Some("How do I fix cargo not found on this machine?");
6127        let shell_key = normalized_tool_call_key_for_dedupe(
6128            "shell",
6129            r#"{"command":"where cargo"}"#,
6130            false,
6131            latest_user_prompt,
6132        );
6133        let fix_plan_key = normalized_tool_call_key_for_dedupe(
6134            "inspect_host",
6135            r#"{"topic":"fix_plan"}"#,
6136            false,
6137            latest_user_prompt,
6138        );
6139
6140        assert_eq!(shell_key, fix_plan_key);
6141    }
6142
6143    #[test]
6144    fn shell_cleanup_script_rewrites_to_maintainer_workflow() {
6145        let (tool_name, args) = normalized_tool_call_for_execution(
6146            "shell",
6147            r#"{"command":"pwsh ./clean.ps1 -Deep -PruneDist"}"#,
6148            false,
6149            Some("Run my cleanup scripts."),
6150        );
6151
6152        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6153        assert_eq!(
6154            args.get("workflow").and_then(|value| value.as_str()),
6155            Some("clean")
6156        );
6157        assert_eq!(
6158            args.get("deep").and_then(|value| value.as_bool()),
6159            Some(true)
6160        );
6161        assert_eq!(
6162            args.get("prune_dist").and_then(|value| value.as_bool()),
6163            Some(true)
6164        );
6165    }
6166
6167    #[test]
6168    fn shell_release_script_rewrites_to_maintainer_workflow() {
6169        let (tool_name, args) = normalized_tool_call_for_execution(
6170            "shell",
6171            r#"{"command":"pwsh ./release.ps1 -Version 0.4.5 -Push -AddToPath"}"#,
6172            false,
6173            Some("Run the release flow."),
6174        );
6175
6176        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6177        assert_eq!(
6178            args.get("workflow").and_then(|value| value.as_str()),
6179            Some("release")
6180        );
6181        assert_eq!(
6182            args.get("version").and_then(|value| value.as_str()),
6183            Some("0.4.5")
6184        );
6185        assert_eq!(
6186            args.get("push").and_then(|value| value.as_bool()),
6187            Some(true)
6188        );
6189    }
6190
6191    #[test]
6192    fn explicit_cleanup_prompt_rewrites_shell_to_maintainer_workflow() {
6193        let (tool_name, args) = normalized_tool_call_for_execution(
6194            "shell",
6195            r#"{"command":"powershell -Command \"Get-ChildItem .\""}"#,
6196            false,
6197            Some("Run the deep cleanup and prune old dist artifacts."),
6198        );
6199
6200        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6201        assert_eq!(
6202            args.get("workflow").and_then(|value| value.as_str()),
6203            Some("clean")
6204        );
6205        assert_eq!(
6206            args.get("deep").and_then(|value| value.as_bool()),
6207            Some(true)
6208        );
6209        assert_eq!(
6210            args.get("prune_dist").and_then(|value| value.as_bool()),
6211            Some(true)
6212        );
6213    }
6214
6215    #[test]
6216    fn shell_cargo_test_rewrites_to_workspace_workflow() {
6217        let (tool_name, args) = normalized_tool_call_for_execution(
6218            "shell",
6219            r#"{"command":"cargo test"}"#,
6220            false,
6221            Some("Run cargo test in this project."),
6222        );
6223
6224        assert_eq!(tool_name, "run_workspace_workflow");
6225        assert_eq!(
6226            args.get("workflow").and_then(|value| value.as_str()),
6227            Some("command")
6228        );
6229        assert_eq!(
6230            args.get("command").and_then(|value| value.as_str()),
6231            Some("cargo test")
6232        );
6233    }
6234
6235    #[test]
6236    fn current_plan_execution_request_accepts_saved_plan_command() {
6237        assert!(is_current_plan_execution_request("/implement-plan"));
6238        assert!(is_current_plan_execution_request(
6239            "Implement the current plan."
6240        ));
6241    }
6242
6243    #[test]
6244    fn architect_operator_note_points_to_execute_path() {
6245        let plan = crate::tools::plan::PlanHandoff {
6246            goal: "Tighten startup workflow guidance".into(),
6247            target_files: vec!["src/runtime.rs".into()],
6248            ordered_steps: vec!["Update the startup banner".into()],
6249            verification: "cargo check --tests".into(),
6250            risks: vec![],
6251            open_questions: vec![],
6252        };
6253        let note = architect_handoff_operator_note(&plan);
6254        assert!(note.contains("`.hematite/PLAN.md`"));
6255        assert!(note.contains("/implement-plan"));
6256        assert!(note.contains("/code implement the current plan"));
6257    }
6258
6259    #[test]
6260    fn natural_language_test_prompt_rewrites_to_workspace_workflow() {
6261        let (tool_name, args) = normalized_tool_call_for_execution(
6262            "shell",
6263            r#"{"command":"powershell -Command \"Get-ChildItem .\""}"#,
6264            false,
6265            Some("Run the tests in this project."),
6266        );
6267
6268        assert_eq!(tool_name, "run_workspace_workflow");
6269        assert_eq!(
6270            args.get("workflow").and_then(|value| value.as_str()),
6271            Some("test")
6272        );
6273    }
6274
6275    #[test]
6276    fn failing_path_parser_extracts_cargo_error_locations() {
6277        let output = r#"
6278BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:
6279
6280error[E0412]: cannot find type `Foo` in this scope
6281  --> src/agent/conversation.rs:42:12
6282   |
628342 |     field: Foo,
6284   |            ^^^ not found
6285
6286error[E0308]: mismatched types
6287  --> src/tools/file_ops.rs:100:5
6288   |
6289   = note: expected `String`, found `&str`
6290"#;
6291        let paths = parse_failing_paths_from_build_output(output);
6292        assert!(
6293            paths.iter().any(|p| p.contains("conversation.rs")),
6294            "should capture conversation.rs"
6295        );
6296        assert!(
6297            paths.iter().any(|p| p.contains("file_ops.rs")),
6298            "should capture file_ops.rs"
6299        );
6300        assert_eq!(paths.len(), 2, "no duplicates");
6301    }
6302
6303    #[test]
6304    fn failing_path_parser_ignores_macro_expansions() {
6305        let output = r#"
6306  --> <macro-expansion>:1:2
6307  --> src/real/file.rs:10:5
6308"#;
6309        let paths = parse_failing_paths_from_build_output(output);
6310        assert_eq!(paths.len(), 1);
6311        assert!(paths[0].contains("file.rs"));
6312    }
6313
6314    #[test]
6315    fn intent_router_picks_updates_for_update_questions() {
6316        assert_eq!(
6317            preferred_host_inspection_topic("is my PC up to date?"),
6318            Some("updates")
6319        );
6320        assert_eq!(
6321            preferred_host_inspection_topic("are there any pending Windows updates?"),
6322            Some("updates")
6323        );
6324        assert_eq!(
6325            preferred_host_inspection_topic("check for updates on my computer"),
6326            Some("updates")
6327        );
6328    }
6329
6330    #[test]
6331    fn intent_router_picks_security_for_antivirus_questions() {
6332        assert_eq!(
6333            preferred_host_inspection_topic("is my antivirus on?"),
6334            Some("security")
6335        );
6336        assert_eq!(
6337            preferred_host_inspection_topic("is Windows Defender running?"),
6338            Some("security")
6339        );
6340        assert_eq!(
6341            preferred_host_inspection_topic("is my PC protected?"),
6342            Some("security")
6343        );
6344    }
6345
6346    #[test]
6347    fn intent_router_picks_pending_reboot_for_restart_questions() {
6348        assert_eq!(
6349            preferred_host_inspection_topic("do I need to restart my PC?"),
6350            Some("pending_reboot")
6351        );
6352        assert_eq!(
6353            preferred_host_inspection_topic("is a reboot required?"),
6354            Some("pending_reboot")
6355        );
6356        assert_eq!(
6357            preferred_host_inspection_topic("is there a pending restart waiting?"),
6358            Some("pending_reboot")
6359        );
6360    }
6361
6362    #[test]
6363    fn intent_router_picks_disk_health_for_drive_health_questions() {
6364        assert_eq!(
6365            preferred_host_inspection_topic("is my hard drive dying?"),
6366            Some("disk_health")
6367        );
6368        assert_eq!(
6369            preferred_host_inspection_topic("check the disk health and SMART status"),
6370            Some("disk_health")
6371        );
6372        assert_eq!(
6373            preferred_host_inspection_topic("is my SSD healthy?"),
6374            Some("disk_health")
6375        );
6376    }
6377
6378    #[test]
6379    fn intent_router_picks_battery_for_battery_questions() {
6380        assert_eq!(
6381            preferred_host_inspection_topic("check my battery"),
6382            Some("battery")
6383        );
6384        assert_eq!(
6385            preferred_host_inspection_topic("how is my battery life?"),
6386            Some("battery")
6387        );
6388        assert_eq!(
6389            preferred_host_inspection_topic("what is my battery wear level?"),
6390            Some("battery")
6391        );
6392    }
6393
6394    #[test]
6395    fn intent_router_picks_recent_crashes_for_bsod_questions() {
6396        assert_eq!(
6397            preferred_host_inspection_topic("why did my PC restart by itself?"),
6398            Some("recent_crashes")
6399        );
6400        assert_eq!(
6401            preferred_host_inspection_topic("did my computer BSOD recently?"),
6402            Some("recent_crashes")
6403        );
6404        assert_eq!(
6405            preferred_host_inspection_topic("show me any recent app crashes"),
6406            Some("recent_crashes")
6407        );
6408    }
6409
6410    #[test]
6411    fn intent_router_picks_scheduled_tasks_for_task_questions() {
6412        assert_eq!(
6413            preferred_host_inspection_topic("what scheduled tasks are running on this PC?"),
6414            Some("scheduled_tasks")
6415        );
6416        assert_eq!(
6417            preferred_host_inspection_topic("show me the task scheduler"),
6418            Some("scheduled_tasks")
6419        );
6420    }
6421
6422    #[test]
6423    fn intent_router_picks_dev_conflicts_for_conflict_questions() {
6424        assert_eq!(
6425            preferred_host_inspection_topic("are there any dev environment conflicts?"),
6426            Some("dev_conflicts")
6427        );
6428        assert_eq!(
6429            preferred_host_inspection_topic("why is python pointing to the wrong version?"),
6430            Some("dev_conflicts")
6431        );
6432    }
6433
6434    #[test]
6435    fn shell_guard_catches_windows_update_commands() {
6436        assert!(shell_looks_like_structured_host_inspection(
6437            "Get-WindowsUpdateLog | Select-Object -Last 50"
6438        ));
6439        assert!(shell_looks_like_structured_host_inspection(
6440            "$sess = New-Object -ComObject Microsoft.Update.Session"
6441        ));
6442        assert!(shell_looks_like_structured_host_inspection(
6443            "Get-Service wuauserv"
6444        ));
6445        assert!(shell_looks_like_structured_host_inspection(
6446            "Get-MpComputerStatus"
6447        ));
6448        assert!(shell_looks_like_structured_host_inspection(
6449            "Get-PhysicalDisk"
6450        ));
6451        assert!(shell_looks_like_structured_host_inspection(
6452            "Get-CimInstance Win32_Battery"
6453        ));
6454        assert!(shell_looks_like_structured_host_inspection(
6455            "Get-WinEvent -FilterHashtable @{Id=41}"
6456        ));
6457        assert!(shell_looks_like_structured_host_inspection(
6458            "Get-ScheduledTask | Where-Object State -ne Disabled"
6459        ));
6460    }
6461
6462    #[test]
6463    fn intent_router_picks_permissions_for_acl_questions() {
6464        assert_eq!(
6465            preferred_host_inspection_topic("who has permission to access the downloads folder?"),
6466            Some("permissions")
6467        );
6468        assert_eq!(
6469            preferred_host_inspection_topic("audit the ntfs permissions for this path"),
6470            Some("permissions")
6471        );
6472    }
6473
6474    #[test]
6475    fn intent_router_picks_login_history_for_logon_questions() {
6476        assert_eq!(
6477            preferred_host_inspection_topic("who logged in recently on this machine?"),
6478            Some("login_history")
6479        );
6480        assert_eq!(
6481            preferred_host_inspection_topic("show me the logon history for the last 48 hours"),
6482            Some("login_history")
6483        );
6484    }
6485
6486    #[test]
6487    fn intent_router_picks_share_access_for_unc_questions() {
6488        assert_eq!(
6489            preferred_host_inspection_topic("can i reach \\\\server\\share right now?"),
6490            Some("share_access")
6491        );
6492        assert_eq!(
6493            preferred_host_inspection_topic("test accessibility of a network share"),
6494            Some("share_access")
6495        );
6496    }
6497
6498    #[test]
6499    fn intent_router_picks_registry_audit_for_persistence_questions() {
6500        assert_eq!(
6501            preferred_host_inspection_topic("audit my registry for persistence hacks or debugger hijacking"),
6502            Some("registry_audit")
6503        );
6504        assert_eq!(
6505            preferred_host_inspection_topic("check winlogon shell integrity and ifeo hijacks"),
6506            Some("registry_audit")
6507        );
6508    }
6509
6510    #[test]
6511    fn intent_router_picks_network_stats_for_mbps_questions() {
6512        assert_eq!(
6513            preferred_host_inspection_topic("what is my network throughput in mbps right now?"),
6514            Some("network_stats")
6515        );
6516    }
6517
6518    #[test]
6519    fn intent_router_picks_processes_for_cpu_percentage_questions() {
6520        assert_eq!(
6521            preferred_host_inspection_topic("which processes are using the most cpu % right now?"),
6522            Some("processes")
6523        );
6524    }
6525
6526    #[test]
6527    fn intent_router_picks_log_check_for_recent_window_questions() {
6528        assert_eq!(
6529            preferred_host_inspection_topic("show me system errors from the last 2 hours"),
6530            Some("log_check")
6531        );
6532    }
6533
6534    #[test]
6535    fn intent_router_picks_battery_for_health_and_cycles() {
6536        assert_eq!(
6537            preferred_host_inspection_topic("check my battery health and cycle count"),
6538            Some("battery")
6539        );
6540    }
6541
6542    #[test]
6543    fn intent_router_picks_thermal_for_throttling_questions() {
6544        assert_eq!(
6545            preferred_host_inspection_topic("why is my laptop slow? check for overheating or throttling"),
6546            Some("thermal")
6547        );
6548        assert_eq!(
6549            preferred_host_inspection_topic("show me the current cpu temp"),
6550            Some("thermal")
6551        );
6552    }
6553
6554    #[test]
6555    fn intent_router_picks_activation_for_genuine_questions() {
6556        assert_eq!(
6557            preferred_host_inspection_topic("is my windows genuine? check activation status"),
6558            Some("activation")
6559        );
6560        assert_eq!(
6561            preferred_host_inspection_topic("run slmgr to check my license state"),
6562            Some("activation")
6563        );
6564    }
6565
6566    #[test]
6567    fn intent_router_picks_patch_history_for_hotfix_questions() {
6568        assert_eq!(
6569            preferred_host_inspection_topic("show me the recently installed hotfixes"),
6570            Some("patch_history")
6571        );
6572        assert_eq!(
6573            preferred_host_inspection_topic("list the windows update patch history for the last 48 hours"),
6574            Some("patch_history")
6575        );
6576    }
6577
6578    #[test]
6579    fn intent_router_detects_multiple_symptoms_for_prerun() {
6580        let topics = all_host_inspection_topics("Why is my laptop slow? Check if it is overheating, throttling, or under heavy I/O pressure.");
6581        assert!(topics.contains(&"thermal"));
6582        assert!(topics.contains(&"resource_load"));
6583        assert!(topics.contains(&"storage"));
6584        assert!(topics.len() >= 3);
6585    }
6586}