hematite/agent/
conversation.rs

1use crate::agent::architecture_summary::{
2    build_architecture_overview_answer, prune_architecture_trace_batch,
3    prune_authoritative_tool_batch, prune_read_only_context_bloat_batch,
4    prune_redirected_shell_batch, summarize_runtime_trace_output,
5};
6use crate::agent::direct_answers::{
7    build_about_answer, build_architect_session_reset_plan, build_authorization_policy_answer,
8    build_gemma_native_answer, build_gemma_native_settings_answer, build_identity_answer,
9    build_language_capability_answer, build_mcp_lifecycle_answer, build_product_surface_answer,
10    build_reasoning_split_answer, build_recovery_recipes_answer, build_session_memory_answer,
11    build_session_reset_semantics_answer, build_tool_classes_answer,
12    build_tool_registry_ownership_answer, build_unsafe_workflow_pressure_answer,
13    build_verify_profiles_answer, build_workflow_modes_answer,
14};
15use crate::agent::inference::{
16    ChatMessage, InferenceEngine, InferenceEvent, MessageContent, OperatorCheckpointState,
17    ProviderRuntimeState, ToolCallFn, ToolDefinition, ToolFunction,
18};
19use crate::agent::policy::{
20    action_target_path, docs_edit_without_explicit_request, is_destructive_tool,
21    is_mcp_mutating_tool, is_mcp_workspace_read_tool, normalize_workspace_path,
22};
23use crate::agent::recovery_recipes::{
24    attempt_recovery, plan_recovery, preview_recovery_decision, RecoveryContext, RecoveryDecision,
25    RecoveryPlan, RecoveryScenario, RecoveryStep,
26};
27use crate::agent::routing::{
28    all_host_inspection_topics, classify_query_intent, is_capability_probe_tool,
29    looks_like_mutation_request, needs_computation_sandbox, preferred_host_inspection_topic,
30    preferred_maintainer_workflow, preferred_workspace_workflow, DirectAnswerKind,
31    QueryIntentClass,
32};
33use crate::agent::tool_registry::dispatch_builtin_tool;
34// SystemPromptBuilder is no longer used — InferenceEngine::build_system_prompt() is canonical.
35use crate::agent::compaction::{self, CompactionConfig};
36use crate::ui::gpu_monitor::GpuState;
37
38use serde_json::Value;
39use std::sync::Arc;
40use tokio::sync::{mpsc, Mutex};
41// -- Session persistence -------------------------------------------------------
42
43#[derive(Clone, Debug, Default)]
44pub struct UserTurn {
45    pub text: String,
46    pub attached_document: Option<AttachedDocument>,
47    pub attached_image: Option<AttachedImage>,
48}
49
50#[derive(Clone, Debug)]
51pub struct AttachedDocument {
52    pub name: String,
53    pub content: String,
54}
55
56#[derive(Clone, Debug)]
57pub struct AttachedImage {
58    pub name: String,
59    pub path: String,
60}
61
62impl UserTurn {
63    pub fn text(text: impl Into<String>) -> Self {
64        Self {
65            text: text.into(),
66            attached_document: None,
67            attached_image: None,
68        }
69    }
70}
71
72#[derive(serde::Serialize, serde::Deserialize)]
73struct SavedSession {
74    running_summary: Option<String>,
75    #[serde(default)]
76    session_memory: crate::agent::compaction::SessionMemory,
77    /// Last user message from the previous session — shown as resume hint on startup.
78    #[serde(default)]
79    last_goal: Option<String>,
80    /// Number of real inference turns completed in the previous session.
81    #[serde(default)]
82    turn_count: u32,
83}
84
85/// Snapshot of the previous session, surfaced on startup when a workspace is
86/// resumed after a restart or crash.
87pub struct CheckpointResume {
88    pub last_goal: String,
89    pub turn_count: u32,
90    pub working_files: Vec<String>,
91    pub last_verify_ok: Option<bool>,
92}
93
94/// Load the prior-session checkpoint from `.hematite/session.json`.
95/// Returns `None` when there is no prior session or it has no real turns.
96pub fn load_checkpoint() -> Option<CheckpointResume> {
97    let path = session_path();
98    let data = std::fs::read_to_string(&path).ok()?;
99    let saved: SavedSession = serde_json::from_str(&data).ok()?;
100    let goal = saved.last_goal.filter(|g| !g.trim().is_empty())?;
101    if saved.turn_count == 0 {
102        return None;
103    }
104    let mut working_files: Vec<String> = saved
105        .session_memory
106        .working_set
107        .into_iter()
108        .take(4)
109        .collect();
110    working_files.sort();
111    let last_verify_ok = saved.session_memory.last_verification.map(|v| v.successful);
112    Some(CheckpointResume {
113        last_goal: goal,
114        turn_count: saved.turn_count,
115        working_files,
116        last_verify_ok,
117    })
118}
119
120#[derive(Default)]
121struct ActionGroundingState {
122    turn_index: u64,
123    observed_paths: std::collections::HashMap<String, u64>,
124    inspected_paths: std::collections::HashMap<String, u64>,
125    last_verify_build_turn: Option<u64>,
126    last_verify_build_ok: bool,
127    last_failed_build_paths: Vec<String>,
128    code_changed_since_verify: bool,
129    /// Track topics redirected from shell to inspect_host in the current turn to break loops.
130    redirected_host_inspection_topics: std::collections::HashMap<String, u64>,
131}
132
133struct PlanExecutionGuard {
134    flag: Arc<std::sync::atomic::AtomicBool>,
135}
136
137impl Drop for PlanExecutionGuard {
138    fn drop(&mut self) {
139        self.flag.store(false, std::sync::atomic::Ordering::SeqCst);
140    }
141}
142
143#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
144pub enum WorkflowMode {
145    #[default]
146    Auto,
147    Ask,
148    Code,
149    Architect,
150    ReadOnly,
151    /// Clean conversational mode — lighter prompt, no coding agent scaffolding,
152    /// tools available but not pushed. Vein RAG still runs for context.
153    Chat,
154    /// Teacher/guide mode — inspect the real machine state first, then walk the user
155    /// through the admin/config task as a grounded, numbered tutorial. Never executes
156    /// write operations itself; instructs the user to perform them manually.
157    Teach,
158}
159
160impl WorkflowMode {
161    fn label(self) -> &'static str {
162        match self {
163            WorkflowMode::Auto => "AUTO",
164            WorkflowMode::Ask => "ASK",
165            WorkflowMode::Code => "CODE",
166            WorkflowMode::Architect => "ARCHITECT",
167            WorkflowMode::ReadOnly => "READ-ONLY",
168            WorkflowMode::Chat => "CHAT",
169            WorkflowMode::Teach => "TEACH",
170        }
171    }
172
173    fn is_read_only(self) -> bool {
174        matches!(
175            self,
176            WorkflowMode::Ask
177                | WorkflowMode::Architect
178                | WorkflowMode::ReadOnly
179                | WorkflowMode::Teach
180        )
181    }
182
183    pub(crate) fn is_chat(self) -> bool {
184        matches!(self, WorkflowMode::Chat)
185    }
186}
187
188fn session_path() -> std::path::PathBuf {
189    if let Ok(overridden) = std::env::var("HEMATITE_SESSION_PATH") {
190        return std::path::PathBuf::from(overridden);
191    }
192    crate::tools::file_ops::workspace_root()
193        .join(".hematite")
194        .join("session.json")
195}
196
197fn load_session_data() -> (Option<String>, crate::agent::compaction::SessionMemory) {
198    let path = session_path();
199    if !path.exists() {
200        return (None, crate::agent::compaction::SessionMemory::default());
201    }
202    let Ok(data) = std::fs::read_to_string(&path) else {
203        return (None, crate::agent::compaction::SessionMemory::default());
204    };
205    let Ok(saved) = serde_json::from_str::<SavedSession>(&data) else {
206        return (None, crate::agent::compaction::SessionMemory::default());
207    };
208    (saved.running_summary, saved.session_memory)
209}
210
211fn reset_task_files() {
212    let root = crate::tools::file_ops::workspace_root();
213    let _ = std::fs::remove_file(root.join(".hematite").join("TASK.md"));
214    let _ = std::fs::remove_file(root.join(".hematite").join("PLAN.md"));
215    let _ = std::fs::remove_file(root.join(".hematite").join("WALKTHROUGH.md"));
216    let _ = std::fs::remove_file(root.join(".github").join("WALKTHROUGH.md"));
217    let _ = std::fs::write(root.join(".hematite").join("TASK.md"), "");
218    let _ = std::fs::write(root.join(".hematite").join("PLAN.md"), "");
219}
220
221fn purge_persistent_memory() {
222    let root = crate::tools::file_ops::workspace_root();
223    let mem_dir = root.join(".hematite").join("memories");
224    if mem_dir.exists() {
225        let _ = std::fs::remove_dir_all(&mem_dir);
226        let _ = std::fs::create_dir_all(&mem_dir);
227    }
228
229    let log_dir = root.join(".hematite_logs");
230    if log_dir.exists() {
231        if let Ok(entries) = std::fs::read_dir(&log_dir) {
232            for entry in entries.flatten() {
233                let _ = std::fs::write(entry.path(), "");
234            }
235        }
236    }
237}
238
239fn apply_turn_attachments(user_turn: &UserTurn, prompt: &str) -> String {
240    let mut out = prompt.trim().to_string();
241    if let Some(doc) = user_turn.attached_document.as_ref() {
242        out = format!(
243            "[Attached document: {}]\n\n{}\n\n---\n\n{}",
244            doc.name, doc.content, out
245        );
246    }
247    if let Some(image) = user_turn.attached_image.as_ref() {
248        out = if out.is_empty() {
249            format!("[Attached image: {}]", image.name)
250        } else {
251            format!("[Attached image: {}]\n\n{}", image.name, out)
252        };
253    }
254    out
255}
256
257fn transcript_user_turn_text(user_turn: &UserTurn, prompt: &str) -> String {
258    let mut prefixes = Vec::new();
259    if let Some(doc) = user_turn.attached_document.as_ref() {
260        prefixes.push(format!("[Attached document: {}]", doc.name));
261    }
262    if let Some(image) = user_turn.attached_image.as_ref() {
263        prefixes.push(format!("[Attached image: {}]", image.name));
264    }
265    if prefixes.is_empty() {
266        prompt.to_string()
267    } else if prompt.trim().is_empty() {
268        prefixes.join("\n")
269    } else {
270        format!("{}\n{}", prefixes.join("\n"), prompt)
271    }
272}
273
274#[derive(Debug, Clone, Copy, PartialEq, Eq)]
275enum RuntimeFailureClass {
276    ContextWindow,
277    ProviderDegraded,
278    ToolArgMalformed,
279    ToolPolicyBlocked,
280    ToolLoop,
281    VerificationFailed,
282    EmptyModelResponse,
283    Unknown,
284}
285
286impl RuntimeFailureClass {
287    fn tag(self) -> &'static str {
288        match self {
289            RuntimeFailureClass::ContextWindow => "context_window",
290            RuntimeFailureClass::ProviderDegraded => "provider_degraded",
291            RuntimeFailureClass::ToolArgMalformed => "tool_arg_malformed",
292            RuntimeFailureClass::ToolPolicyBlocked => "tool_policy_blocked",
293            RuntimeFailureClass::ToolLoop => "tool_loop",
294            RuntimeFailureClass::VerificationFailed => "verification_failed",
295            RuntimeFailureClass::EmptyModelResponse => "empty_model_response",
296            RuntimeFailureClass::Unknown => "unknown",
297        }
298    }
299
300    fn operator_guidance(self) -> &'static str {
301        match self {
302            RuntimeFailureClass::ContextWindow => {
303                "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
304            }
305            RuntimeFailureClass::ProviderDegraded => {
306                "Retry once automatically, then narrow the turn or restart LM Studio if it persists."
307            }
308            RuntimeFailureClass::ToolArgMalformed => {
309                "Retry with repaired or narrower tool arguments instead of repeating the same malformed call."
310            }
311            RuntimeFailureClass::ToolPolicyBlocked => {
312                "Stay inside the allowed workflow or switch modes before retrying."
313            }
314            RuntimeFailureClass::ToolLoop => {
315                "Stop repeating the same failing tool pattern and switch to a narrower recovery step."
316            }
317            RuntimeFailureClass::VerificationFailed => {
318                "Fix the build or test failure before treating the task as complete."
319            }
320            RuntimeFailureClass::EmptyModelResponse => {
321                "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
322            }
323            RuntimeFailureClass::Unknown => {
324                "Inspect the latest grounded tool results or provider status before retrying."
325            }
326        }
327    }
328}
329
330fn classify_runtime_failure(detail: &str) -> RuntimeFailureClass {
331    let lower = detail.to_ascii_lowercase();
332    if lower.contains("context_window_blocked")
333        || lower.contains("context ceiling reached")
334        || lower.contains("exceeds the")
335        || ((lower.contains("n_keep") && lower.contains("n_ctx"))
336            || lower.contains("context length")
337            || lower.contains("keep from the initial prompt")
338            || lower.contains("prompt is greater than the context length"))
339    {
340        RuntimeFailureClass::ContextWindow
341    } else if lower.contains("empty response from model")
342        || lower.contains("model returned an empty response")
343    {
344        RuntimeFailureClass::EmptyModelResponse
345    } else if lower.contains("lm studio unreachable")
346        || lower.contains("lm studio error")
347        || lower.contains("request failed")
348        || lower.contains("response parse error")
349        || lower.contains("provider degraded")
350    {
351        RuntimeFailureClass::ProviderDegraded
352    } else if lower.contains("missing required argument")
353        || lower.contains("json repair failed")
354        || lower.contains("invalid pattern")
355        || lower.contains("invalid line range")
356    {
357        RuntimeFailureClass::ToolArgMalformed
358    } else if lower.contains("action blocked:")
359        || lower.contains("access denied")
360        || lower.contains("declined by user")
361    {
362        RuntimeFailureClass::ToolPolicyBlocked
363    } else if lower.contains("too many consecutive tool errors")
364        || lower.contains("repeated tool failures")
365        || lower.contains("stuck in a loop")
366    {
367        RuntimeFailureClass::ToolLoop
368    } else if lower.contains("build failed")
369        || lower.contains("verification failed")
370        || lower.contains("verify_build")
371    {
372        RuntimeFailureClass::VerificationFailed
373    } else {
374        RuntimeFailureClass::Unknown
375    }
376}
377
378fn format_runtime_failure(class: RuntimeFailureClass, detail: &str) -> String {
379    format!(
380        "[failure:{}] {} Detail: {}",
381        class.tag(),
382        class.operator_guidance(),
383        detail.trim()
384    )
385}
386
387fn provider_state_for_runtime_failure(class: RuntimeFailureClass) -> Option<ProviderRuntimeState> {
388    match class {
389        RuntimeFailureClass::ContextWindow => Some(ProviderRuntimeState::ContextWindow),
390        RuntimeFailureClass::ProviderDegraded => Some(ProviderRuntimeState::Degraded),
391        RuntimeFailureClass::EmptyModelResponse => Some(ProviderRuntimeState::EmptyResponse),
392        _ => None,
393    }
394}
395
396fn checkpoint_state_for_runtime_failure(
397    class: RuntimeFailureClass,
398) -> Option<OperatorCheckpointState> {
399    match class {
400        RuntimeFailureClass::ContextWindow => Some(OperatorCheckpointState::BlockedContextWindow),
401        RuntimeFailureClass::ToolPolicyBlocked => Some(OperatorCheckpointState::BlockedPolicy),
402        RuntimeFailureClass::ToolLoop => Some(OperatorCheckpointState::BlockedToolLoop),
403        RuntimeFailureClass::VerificationFailed => {
404            Some(OperatorCheckpointState::BlockedVerification)
405        }
406        _ => None,
407    }
408}
409
410fn compact_runtime_recovery_summary(class: RuntimeFailureClass) -> &'static str {
411    match class {
412        RuntimeFailureClass::ProviderDegraded => {
413            "LM Studio degraded during the turn; retrying once before surfacing a failure."
414        }
415        RuntimeFailureClass::EmptyModelResponse => {
416            "The model returned an empty reply; retrying once before surfacing a failure."
417        }
418        _ => "Runtime recovery in progress.",
419    }
420}
421
422fn checkpoint_summary_for_runtime_failure(class: RuntimeFailureClass) -> &'static str {
423    match class {
424        RuntimeFailureClass::ContextWindow => "Provider context ceiling confirmed.",
425        RuntimeFailureClass::ToolPolicyBlocked => "Policy blocked the current action.",
426        RuntimeFailureClass::ToolLoop => "Repeated failing tool pattern stopped.",
427        RuntimeFailureClass::VerificationFailed => "Verification failed; fix before continuing.",
428        _ => "Operator checkpoint updated.",
429    }
430}
431
432fn compact_runtime_failure_summary(class: RuntimeFailureClass) -> &'static str {
433    match class {
434        RuntimeFailureClass::ContextWindow => "LM context ceiling hit.",
435        RuntimeFailureClass::ProviderDegraded => {
436            "LM Studio degraded and did not recover cleanly; operator action is now required."
437        }
438        RuntimeFailureClass::EmptyModelResponse => {
439            "LM Studio returned an empty reply after recovery; operator action is now required."
440        }
441        RuntimeFailureClass::ToolLoop => {
442            "Repeated failing tool pattern detected; Hematite stopped the loop."
443        }
444        _ => "Runtime failure surfaced to the operator.",
445    }
446}
447
448fn should_retry_runtime_failure(class: RuntimeFailureClass) -> bool {
449    matches!(
450        class,
451        RuntimeFailureClass::ProviderDegraded | RuntimeFailureClass::EmptyModelResponse
452    )
453}
454
455fn recovery_scenario_for_runtime_failure(class: RuntimeFailureClass) -> Option<RecoveryScenario> {
456    match class {
457        RuntimeFailureClass::ContextWindow => Some(RecoveryScenario::ContextWindow),
458        RuntimeFailureClass::ProviderDegraded => Some(RecoveryScenario::ProviderDegraded),
459        RuntimeFailureClass::EmptyModelResponse => Some(RecoveryScenario::EmptyModelResponse),
460        RuntimeFailureClass::ToolPolicyBlocked => Some(RecoveryScenario::McpWorkspaceReadBlocked),
461        RuntimeFailureClass::ToolLoop => Some(RecoveryScenario::ToolLoop),
462        RuntimeFailureClass::VerificationFailed => Some(RecoveryScenario::VerificationFailed),
463        RuntimeFailureClass::ToolArgMalformed | RuntimeFailureClass::Unknown => None,
464    }
465}
466
467fn compact_recovery_plan_summary(plan: &RecoveryPlan) -> String {
468    format!(
469        "{} [{}]",
470        plan.recipe.scenario.label(),
471        plan.recipe.steps_summary()
472    )
473}
474
475fn compact_recovery_decision_summary(decision: &RecoveryDecision) -> String {
476    match decision {
477        RecoveryDecision::Attempt(plan) => compact_recovery_plan_summary(plan),
478        RecoveryDecision::Escalate {
479            recipe,
480            attempts_made,
481            ..
482        } => format!(
483            "{} escalated after {} / {} [{}]",
484            recipe.scenario.label(),
485            attempts_made,
486            recipe.max_attempts.max(1),
487            recipe.steps_summary()
488        ),
489    }
490}
491
492/// Parse file paths from cargo/compiler error output.
493/// Handles lines like `  --> src/foo/bar.rs:34:12` and `error: could not compile`.
494fn parse_failing_paths_from_build_output(output: &str) -> Vec<String> {
495    let root = crate::tools::file_ops::workspace_root();
496    let mut paths: Vec<String> = output
497        .lines()
498        .filter_map(|line| {
499            let trimmed = line.trim_start();
500            // Cargo error location: "--> path/to/file.rs:line:col"
501            let after_arrow = trimmed.strip_prefix("--> ")?;
502            let file_part = after_arrow.split(':').next()?;
503            if file_part.is_empty() || file_part.starts_with('<') {
504                return None;
505            }
506            let p = std::path::Path::new(file_part);
507            let resolved = if p.is_absolute() {
508                p.to_path_buf()
509            } else {
510                root.join(p)
511            };
512            Some(resolved.to_string_lossy().replace('\\', "/").to_lowercase())
513        })
514        .collect();
515    paths.sort();
516    paths.dedup();
517    paths
518}
519
520fn build_mode_redirect_answer(mode: WorkflowMode) -> String {
521    match mode {
522        WorkflowMode::Ask => "Workflow mode ASK is read-only. I can inspect the code, explain what should change, or review the target area, but I will not modify files here. Switch to `/code` to implement the change, or `/auto` to let Hematite choose.".to_string(),
523        WorkflowMode::Architect => "Workflow mode ARCHITECT is plan-first. I can inspect the code and design the implementation approach, but I will not mutate files until you explicitly switch to `/code` or ask me to implement.".to_string(),
524        WorkflowMode::ReadOnly => "Workflow mode READ-ONLY is a hard no-mutation mode. I can analyze, inspect, and explain, but I will not edit files, run mutating shell commands, or commit changes. Switch to `/code` or `/auto` if you want implementation.".to_string(),
525        WorkflowMode::Teach => "Workflow mode TEACH is a guided walkthrough mode. I will inspect the real state of your machine first, then give you a numbered step-by-step tutorial so you can perform the task yourself. I do not execute write operations in TEACH mode — I show you exactly how to do it.".to_string(),
526        _ => "Switch to `/code` or `/auto` to allow implementation.".to_string(),
527    }
528}
529
530fn architect_handoff_contract() -> &'static str {
531    "ARCHITECT OUTPUT CONTRACT:\n\
532Use a compact implementation handoff, not a process narrative.\n\
533Do not say \"the first step\" or describe what you are about to do.\n\
534After one or two read-only inspection tools at most, stop and answer.\n\
535For runtime wiring, reset behavior, or control-flow questions, prefer `trace_runtime_flow`.\n\
536Use these exact ASCII headings and keep each section short:\n\
537# Goal\n\
538# Target Files\n\
539# Ordered Steps\n\
540# Verification\n\
541# Risks\n\
542# Open Questions\n\
543Keep the whole handoff concise and implementation-oriented."
544}
545
546fn implement_current_plan_prompt() -> &'static str {
547    "Implement the current plan."
548}
549
550fn architect_handoff_operator_note(plan: &crate::tools::plan::PlanHandoff) -> String {
551    format!(
552        "Implementation handoff saved to `.hematite/PLAN.md`.\nNext step: run `/implement-plan` to execute it in `/code`, or use `/code {}` directly.\nPlan: {}",
553        implement_current_plan_prompt().to_ascii_lowercase(),
554        plan.summary_line()
555    )
556}
557
558fn is_current_plan_execution_request(user_input: &str) -> bool {
559    let lower = user_input.trim().to_ascii_lowercase();
560    lower == "/implement-plan"
561        || lower == implement_current_plan_prompt().to_ascii_lowercase()
562        || lower
563            == implement_current_plan_prompt()
564                .trim_end_matches('.')
565                .to_ascii_lowercase()
566        || lower.contains("implement the current plan")
567}
568
569fn is_plan_scoped_tool(name: &str) -> bool {
570    crate::agent::inference::tool_metadata_for_name(name).plan_scope
571}
572
573fn is_current_plan_irrelevant_tool(name: &str) -> bool {
574    !crate::agent::inference::tool_metadata_for_name(name).plan_scope
575}
576
577fn is_non_mutating_plan_step_tool(name: &str) -> bool {
578    let metadata = crate::agent::inference::tool_metadata_for_name(name);
579    metadata.plan_scope && !metadata.mutates_workspace
580}
581
582fn parse_inline_workflow_prompt(user_input: &str) -> Option<(WorkflowMode, &str)> {
583    let trimmed = user_input.trim();
584    for (prefix, mode) in [
585        ("/ask", WorkflowMode::Ask),
586        ("/code", WorkflowMode::Code),
587        ("/architect", WorkflowMode::Architect),
588        ("/read-only", WorkflowMode::ReadOnly),
589        ("/auto", WorkflowMode::Auto),
590        ("/teach", WorkflowMode::Teach),
591    ] {
592        if let Some(rest) = trimmed.strip_prefix(prefix) {
593            let rest = rest.trim();
594            if !rest.is_empty() {
595                return Some((mode, rest));
596            }
597        }
598    }
599    None
600}
601
602// Tool catalogue
603
604/// Returns the full set of tools exposed to the model.
605pub fn get_tools() -> Vec<ToolDefinition> {
606    crate::agent::tool_registry::get_tools()
607}
608
609fn is_natural_language_hallucination(input: &str) -> bool {
610    let lower = input.to_lowercase();
611    let words = lower.split_whitespace().collect::<Vec<_>>();
612
613    // 1. Sentences starting with conversational phrases
614    if words.is_empty() {
615        return false;
616    }
617    let first = words[0];
618    if [
619        "make", "create", "i", "can", "please", "we", "let's", "go", "execute", "run", "how",
620    ]
621    .contains(&first)
622    {
623        // If it's more than 2 words, it's likely a sentence, not a command
624        if words.len() >= 3 {
625            return true;
626        }
627    }
628
629    // 2. Presence of English stop-words that are rare in CLI commands
630    let stop_words = [
631        "the", "a", "an", "on", "my", "your", "for", "with", "into", "onto",
632    ];
633    let stop_count = words.iter().filter(|w| stop_words.contains(w)).count();
634    if stop_count >= 2 {
635        return true;
636    }
637
638    // 3. Lack of common CLI separators if many words exist
639    if words.len() >= 5
640        && !input.contains('-')
641        && !input.contains('/')
642        && !input.contains('\\')
643        && !input.contains('.')
644    {
645        return true;
646    }
647
648    false
649}
650
651pub struct ConversationManager {
652    /// Full conversation history in OpenAI format.
653    pub history: Vec<ChatMessage>,
654    pub engine: Arc<InferenceEngine>,
655    pub tools: Vec<ToolDefinition>,
656    pub mcp_manager: Arc<Mutex<crate::agent::mcp_manager::McpManager>>,
657    pub professional: bool,
658    pub brief: bool,
659    pub snark: u8,
660    pub chaos: u8,
661    /// Model to use for simple read-only tasks (optional, user-supplied via --fast-model).
662    pub fast_model: Option<String>,
663    /// Model to use for complex write/build tasks (optional, user-supplied via --think-model).
664    pub think_model: Option<String>,
665    /// Files where whitespace auto-correction fired this session.
666    pub correction_hints: Vec<String>,
667    /// Running background summary of pruned older messages.
668    pub running_summary: Option<String>,
669    /// Live hardware telemetry handle.
670    pub gpu_state: Arc<GpuState>,
671    /// Local RAG memory — FTS5-indexed project source.
672    pub vein: crate::memory::vein::Vein,
673    /// Append-only session transcript logger.
674    pub transcript: crate::agent::transcript::TranscriptLogger,
675    /// Thread-safe cancellation signal for the current agent turn.
676    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
677    /// Shared Git remote state (for persistent connectivity checks).
678    pub git_state: Arc<crate::agent::git_monitor::GitState>,
679    /// Reasoning think-mode override. None = let model decide. Some(true) = force /think.
680    /// Some(false) = force /no_think (fast mode, 3-5x quicker for simple tasks).
681    pub think_mode: Option<bool>,
682    workflow_mode: WorkflowMode,
683    /// Layer 6: Dynamic Task Context (extracted during compaction)
684    pub session_memory: crate::agent::compaction::SessionMemory,
685    pub swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
686    pub voice_manager: Arc<crate::ui::voice::VoiceManager>,
687    /// Personality description for the current Rusty soul — used in chat mode system prompt.
688    pub soul_personality: String,
689    pub lsp_manager: Arc<Mutex<crate::agent::lsp::manager::LspManager>>,
690    /// Active reasoning summary extracted from the previous model turn (Gemma-4 Native).
691    pub reasoning_history: Option<String>,
692    /// Layer 8: Active Reference Pinning (Context Locked)
693    pub pinned_files: Arc<Mutex<std::collections::HashMap<String, String>>>,
694    /// Hard action-grounding state for proof-before-action checks.
695    action_grounding: Arc<Mutex<ActionGroundingState>>,
696    /// True only during `/code Implement the current plan.` style execution turns.
697    plan_execution_active: Arc<std::sync::atomic::AtomicBool>,
698    /// Typed per-turn recovery attempt tracking.
699    recovery_context: RecoveryContext,
700    /// L1 context block — hot files summary injected into the system prompt.
701    /// Built once after vein init and updated as edits accumulate heat.
702    pub l1_context: Option<String>,
703    /// Condensed AST repository layout for the active project.
704    pub repo_map: Option<String>,
705    /// Number of real inference turns completed this session.
706    pub turn_count: u32,
707    /// Last user message sent to the model — persisted as checkpoint goal.
708    pub last_goal: Option<String>,
709    /// Most recent project directory created this session (Automatic Dive-In).
710    pub latest_target_dir: Option<String>,
711}
712
713impl ConversationManager {
714    fn vein_docs_only_mode(&self) -> bool {
715        !crate::tools::file_ops::is_project_workspace()
716    }
717
718    fn refresh_vein_index(&mut self) -> usize {
719        let count = if self.vein_docs_only_mode() {
720            let root = crate::tools::file_ops::workspace_root();
721            tokio::task::block_in_place(|| self.vein.index_workspace_artifacts(&root))
722        } else {
723            tokio::task::block_in_place(|| self.vein.index_project())
724        };
725        self.l1_context = self.vein.l1_context();
726        count
727    }
728
729    fn build_vein_inspection_report(&self, indexed_this_pass: usize) -> String {
730        let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(8));
731        let workspace_mode = if self.vein_docs_only_mode() {
732            "docs-only (outside a project workspace)"
733        } else {
734            "project workspace"
735        };
736        let active_room = snapshot.active_room.as_deref().unwrap_or("none");
737        let mut out = format!(
738            "Vein Inspection\n\
739             Workspace mode: {workspace_mode}\n\
740             Indexed this pass: {indexed_this_pass}\n\
741             Indexed source files: {}\n\
742             Indexed docs: {}\n\
743             Indexed session exchanges: {}\n\
744             Embedded source/doc chunks: {}\n\
745             Embeddings available: {}\n\
746             Active room bias: {active_room}\n\
747             L1 hot-files block: {}\n",
748            snapshot.indexed_source_files,
749            snapshot.indexed_docs,
750            snapshot.indexed_session_exchanges,
751            snapshot.embedded_source_doc_chunks,
752            if snapshot.has_any_embeddings {
753                "yes"
754            } else {
755                "no"
756            },
757            if snapshot.l1_ready {
758                "ready"
759            } else {
760                "not built yet"
761            },
762        );
763
764        if snapshot.hot_files.is_empty() {
765            out.push_str("Hot files: none yet.\n");
766            return out;
767        }
768
769        out.push_str("\nHot files by room:\n");
770        let mut by_room: std::collections::BTreeMap<&str, Vec<&crate::memory::vein::VeinHotFile>> =
771            std::collections::BTreeMap::new();
772        for file in &snapshot.hot_files {
773            by_room.entry(file.room.as_str()).or_default().push(file);
774        }
775        for (room, files) in by_room {
776            out.push_str(&format!("[{}]\n", room));
777            for file in files {
778                out.push_str(&format!(
779                    "- {} [{} edit{}]\n",
780                    file.path,
781                    file.heat,
782                    if file.heat == 1 { "" } else { "s" }
783                ));
784            }
785        }
786
787        out
788    }
789
790    fn latest_user_prompt(&self) -> Option<&str> {
791        self.history
792            .iter()
793            .rev()
794            .find(|msg| msg.role == "user")
795            .map(|msg| msg.content.as_str())
796    }
797
798    async fn emit_direct_response(
799        &mut self,
800        tx: &mpsc::Sender<InferenceEvent>,
801        raw_user_input: &str,
802        effective_user_input: &str,
803        response: &str,
804    ) {
805        self.history.push(ChatMessage::user(effective_user_input));
806        self.history.push(ChatMessage::assistant_text(response));
807        self.transcript.log_user(raw_user_input);
808        self.transcript.log_agent(response);
809        for chunk in chunk_text(response, 8) {
810            if !chunk.is_empty() {
811                let _ = tx.send(InferenceEvent::Token(chunk)).await;
812            }
813        }
814        if let Some(path) = self.latest_target_dir.take() {
815            let _ = tx.send(InferenceEvent::CopyDiveInCommand(path)).await;
816        }
817        let _ = tx.send(InferenceEvent::Done).await;
818        self.trim_history(80);
819        self.refresh_session_memory();
820        self.save_session();
821    }
822
823    async fn emit_operator_checkpoint(
824        &mut self,
825        tx: &mpsc::Sender<InferenceEvent>,
826        state: OperatorCheckpointState,
827        summary: impl Into<String>,
828    ) {
829        let summary = summary.into();
830        self.session_memory
831            .record_checkpoint(state.label(), summary.clone());
832        let _ = tx
833            .send(InferenceEvent::OperatorCheckpoint { state, summary })
834            .await;
835    }
836
837    async fn emit_recovery_recipe_summary(
838        &mut self,
839        tx: &mpsc::Sender<InferenceEvent>,
840        state: impl Into<String>,
841        summary: impl Into<String>,
842    ) {
843        let state = state.into();
844        let summary = summary.into();
845        self.session_memory.record_recovery(state, summary.clone());
846        let _ = tx.send(InferenceEvent::RecoveryRecipe { summary }).await;
847    }
848
849    async fn emit_provider_live(&mut self, tx: &mpsc::Sender<InferenceEvent>) {
850        let _ = tx
851            .send(InferenceEvent::ProviderStatus {
852                state: ProviderRuntimeState::Live,
853                summary: String::new(),
854            })
855            .await;
856        self.emit_operator_checkpoint(tx, OperatorCheckpointState::Idle, "")
857            .await;
858    }
859
860    async fn emit_prompt_pressure_for_messages(
861        &self,
862        tx: &mpsc::Sender<InferenceEvent>,
863        messages: &[ChatMessage],
864    ) {
865        let context_length = self.engine.current_context_length();
866        let (estimated_input_tokens, reserved_output_tokens, estimated_total_tokens, percent) =
867            crate::agent::inference::estimate_prompt_pressure(
868                messages,
869                &self.tools,
870                context_length,
871            );
872        let _ = tx
873            .send(InferenceEvent::PromptPressure {
874                estimated_input_tokens,
875                reserved_output_tokens,
876                estimated_total_tokens,
877                context_length,
878                percent,
879            })
880            .await;
881    }
882
883    async fn emit_prompt_pressure_idle(&self, tx: &mpsc::Sender<InferenceEvent>) {
884        let context_length = self.engine.current_context_length();
885        let _ = tx
886            .send(InferenceEvent::PromptPressure {
887                estimated_input_tokens: 0,
888                reserved_output_tokens: 0,
889                estimated_total_tokens: 0,
890                context_length,
891                percent: 0,
892            })
893            .await;
894    }
895
896    async fn emit_compaction_pressure(&self, tx: &mpsc::Sender<InferenceEvent>) {
897        let context_length = self.engine.current_context_length();
898        let vram_ratio = self.gpu_state.ratio();
899        let config = CompactionConfig::adaptive(context_length, vram_ratio);
900        let estimated_tokens = compaction::estimate_compactable_tokens(&self.history);
901        let percent = if config.max_estimated_tokens == 0 {
902            0
903        } else {
904            ((estimated_tokens.saturating_mul(100)) / config.max_estimated_tokens).min(100) as u8
905        };
906
907        let _ = tx
908            .send(InferenceEvent::CompactionPressure {
909                estimated_tokens,
910                threshold_tokens: config.max_estimated_tokens,
911                percent,
912            })
913            .await;
914    }
915
916    async fn refresh_runtime_profile_and_report(
917        &mut self,
918        tx: &mpsc::Sender<InferenceEvent>,
919        reason: &str,
920    ) -> Option<(String, usize, bool)> {
921        let refreshed = self.engine.refresh_runtime_profile().await;
922        if let Some((model_id, context_length, changed)) = refreshed.as_ref() {
923            let _ = tx
924                .send(InferenceEvent::RuntimeProfile {
925                    model_id: model_id.clone(),
926                    context_length: *context_length,
927                })
928                .await;
929            self.transcript.log_system(&format!(
930                "Runtime profile refresh ({}): model={} ctx={} changed={}",
931                reason, model_id, context_length, changed
932            ));
933        }
934        refreshed
935    }
936
937    pub fn new(
938        engine: Arc<InferenceEngine>,
939        professional: bool,
940        brief: bool,
941        snark: u8,
942        chaos: u8,
943        soul_personality: String,
944        fast_model: Option<String>,
945        think_model: Option<String>,
946        gpu_state: Arc<GpuState>,
947        git_state: Arc<crate::agent::git_monitor::GitState>,
948        swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
949        voice_manager: Arc<crate::ui::voice::VoiceManager>,
950    ) -> Self {
951        let (saved_summary, saved_memory) = load_session_data();
952
953        // Build the initial mcp_manager
954        let mcp_manager = Arc::new(tokio::sync::Mutex::new(
955            crate::agent::mcp_manager::McpManager::new(),
956        ));
957
958        // Build the initial system prompt using the canonical InferenceEngine path.
959        let dynamic_instructions =
960            engine.build_system_prompt(snark, chaos, brief, professional, &[], None, &[]);
961
962        let history = vec![ChatMessage::system(&dynamic_instructions)];
963
964        let vein_path = crate::tools::file_ops::workspace_root()
965            .join(".hematite")
966            .join("vein.db");
967        let vein_base_url = engine.base_url.clone();
968        let vein = crate::memory::vein::Vein::new(&vein_path, vein_base_url.clone())
969            .unwrap_or_else(|_| crate::memory::vein::Vein::new(":memory:", vein_base_url).unwrap());
970
971        Self {
972            history,
973            engine,
974            tools: get_tools(),
975            mcp_manager,
976            professional,
977            brief,
978            snark,
979            chaos,
980            fast_model,
981            think_model,
982            correction_hints: Vec::new(),
983            running_summary: saved_summary,
984            gpu_state,
985            vein,
986            transcript: crate::agent::transcript::TranscriptLogger::new(),
987            cancel_token: Arc::new(std::sync::atomic::AtomicBool::new(false)),
988            git_state,
989            think_mode: None,
990            workflow_mode: WorkflowMode::Auto,
991            session_memory: saved_memory,
992            swarm_coordinator,
993            voice_manager,
994            soul_personality,
995            lsp_manager: Arc::new(Mutex::new(crate::agent::lsp::manager::LspManager::new(
996                crate::tools::file_ops::workspace_root(),
997            ))),
998            reasoning_history: None,
999            pinned_files: Arc::new(Mutex::new(std::collections::HashMap::new())),
1000            action_grounding: Arc::new(Mutex::new(ActionGroundingState::default())),
1001            plan_execution_active: Arc::new(std::sync::atomic::AtomicBool::new(false)),
1002            recovery_context: RecoveryContext::default(),
1003            l1_context: None,
1004            repo_map: None,
1005            turn_count: 0,
1006            last_goal: None,
1007            latest_target_dir: None,
1008        }
1009    }
1010
1011    async fn emit_done_events(&mut self, tx: &tokio::sync::mpsc::Sender<InferenceEvent>) {
1012        if let Some(path) = self.latest_target_dir.take() {
1013            let _ = tx.send(InferenceEvent::CopyDiveInCommand(path)).await;
1014        }
1015        let _ = tx.send(InferenceEvent::Done).await;
1016    }
1017
1018    /// Index the project into The Vein. Call once after construction.
1019    /// Uses block_in_place so the tokio runtime thread isn't parked.
1020    pub fn initialize_vein(&mut self) -> usize {
1021        self.refresh_vein_index()
1022    }
1023
1024    /// Generate the AST Repo Map. Call once after construction or when resetting context.
1025    pub fn initialize_repo_map(&mut self) {
1026        if !self.vein_docs_only_mode() {
1027            let root = crate::tools::file_ops::workspace_root();
1028            let hot = self.vein.hot_files_weighted(10);
1029            let gen = crate::memory::repo_map::RepoMapGenerator::new(&root).with_hot_files(&hot);
1030            match tokio::task::block_in_place(|| gen.generate()) {
1031                Ok(map) => self.repo_map = Some(map),
1032                Err(e) => {
1033                    self.repo_map = Some(format!("Repo Map generation failed: {}", e));
1034                }
1035            }
1036        }
1037    }
1038
1039    /// Re-generate the repo map after a file edit so rankings stay fresh.
1040    /// Lightweight (~100-200ms) — called after successful mutations.
1041    fn refresh_repo_map(&mut self) {
1042        self.initialize_repo_map();
1043    }
1044
1045    fn save_session(&self) {
1046        let path = session_path();
1047        if let Some(parent) = path.parent() {
1048            let _ = std::fs::create_dir_all(parent);
1049        }
1050        let saved = SavedSession {
1051            running_summary: self.running_summary.clone(),
1052            session_memory: self.session_memory.clone(),
1053            last_goal: self.last_goal.clone(),
1054            turn_count: self.turn_count,
1055        };
1056        if let Ok(json) = serde_json::to_string(&saved) {
1057            let _ = std::fs::write(&path, json);
1058        }
1059    }
1060
1061    fn save_empty_session(&self) {
1062        let path = session_path();
1063        if let Some(parent) = path.parent() {
1064            let _ = std::fs::create_dir_all(parent);
1065        }
1066        let saved = SavedSession {
1067            running_summary: None,
1068            session_memory: crate::agent::compaction::SessionMemory::default(),
1069            last_goal: None,
1070            turn_count: 0,
1071        };
1072        if let Ok(json) = serde_json::to_string(&saved) {
1073            let _ = std::fs::write(&path, json);
1074        }
1075    }
1076
1077    fn refresh_session_memory(&mut self) {
1078        let current_plan = self.session_memory.current_plan.clone();
1079        let previous_memory = self.session_memory.clone();
1080        self.session_memory = compaction::extract_memory(&self.history);
1081        self.session_memory.current_plan = current_plan;
1082        self.session_memory
1083            .inherit_runtime_ledger_from(&previous_memory);
1084    }
1085
1086    fn build_chat_system_prompt(&self) -> String {
1087        let species = &self.engine.species;
1088        let personality = &self.soul_personality;
1089        format!(
1090            "You are {species}, a local AI companion running entirely on the user's GPU — no cloud, no subscriptions, no phoning home.\n\
1091             {personality}\n\n\
1092             This is CHAT mode — a clean conversational surface. Behave like a sharp friend who happens to know everything about code, not like an agent following a workflow.\n\n\
1093             Rules:\n\
1094             - Talk like a person. Skip the bullet-point breakdowns unless the topic genuinely needs structure.\n\
1095             - Answer directly. One paragraph is usually right.\n\
1096             - Don't call tools unless the user explicitly asks you to look at a file or run something.\n\
1097             - Don't narrate your reasoning or mention tool names unprompted.\n\
1098             - You can discuss code, debug ideas, explain concepts, help plan, or just talk.\n\
1099             - If the user clearly wants you to edit or build something, do it — but lead with conversation, not scaffolding.\n\
1100             - If the user wants the full coding harness, they can type `/agent`.\n",
1101        )
1102    }
1103
1104    fn append_session_handoff(&self, system_msg: &mut String) {
1105        let has_summary = self
1106            .running_summary
1107            .as_ref()
1108            .map(|s| !s.trim().is_empty())
1109            .unwrap_or(false);
1110        let has_memory = self.session_memory.has_signal();
1111
1112        if !has_summary && !has_memory {
1113            return;
1114        }
1115
1116        system_msg.push_str(
1117            "\n\n# LIGHTWEIGHT SESSION HANDOFF\n\
1118             This is compact carry-over from earlier work on this machine.\n\
1119             Use it only when it helps the current request.\n\
1120             Prefer current repository state, pinned files, and fresh tool results over stale session memory.\n",
1121        );
1122
1123        if has_memory {
1124            system_msg.push_str("\n## Active Task Memory\n");
1125            system_msg.push_str(&self.session_memory.to_prompt());
1126        }
1127
1128        if let Some(summary) = self.running_summary.as_deref() {
1129            if !summary.trim().is_empty() {
1130                system_msg.push_str("\n## Compacted Session Summary\n");
1131                system_msg.push_str(summary);
1132                system_msg.push('\n');
1133            }
1134        }
1135    }
1136
1137    fn set_workflow_mode(&mut self, mode: WorkflowMode) {
1138        self.workflow_mode = mode;
1139    }
1140
1141    fn current_plan_summary(&self) -> Option<String> {
1142        self.session_memory
1143            .current_plan
1144            .as_ref()
1145            .filter(|plan| plan.has_signal())
1146            .map(|plan| plan.summary_line())
1147    }
1148
1149    fn current_plan_allowed_paths(&self) -> Vec<String> {
1150        self.session_memory
1151            .current_plan
1152            .as_ref()
1153            .map(|plan| {
1154                plan.target_files
1155                    .iter()
1156                    .map(|path| normalize_workspace_path(path))
1157                    .collect()
1158            })
1159            .unwrap_or_default()
1160    }
1161
1162    fn persist_architect_handoff(
1163        &mut self,
1164        response: &str,
1165    ) -> Option<crate::tools::plan::PlanHandoff> {
1166        if self.workflow_mode != WorkflowMode::Architect {
1167            return None;
1168        }
1169        let Some(plan) = crate::tools::plan::parse_plan_handoff(response) else {
1170            return None;
1171        };
1172        let _ = crate::tools::plan::save_plan_handoff(&plan);
1173        self.session_memory.current_plan = Some(plan.clone());
1174        Some(plan)
1175    }
1176
1177    async fn begin_grounded_turn(&self) -> u64 {
1178        let mut state = self.action_grounding.lock().await;
1179        state.turn_index += 1;
1180        state.turn_index
1181    }
1182
1183    async fn reset_action_grounding(&self) {
1184        let mut state = self.action_grounding.lock().await;
1185        *state = ActionGroundingState::default();
1186    }
1187
1188    async fn record_read_observation(&self, path: &str) {
1189        let normalized = normalize_workspace_path(path);
1190        let mut state = self.action_grounding.lock().await;
1191        let turn = state.turn_index;
1192        // read_file returns full file content with line numbers — sufficient for
1193        // the model to know exact text before editing, so it satisfies the
1194        // line-inspection grounding check too.
1195        state.observed_paths.insert(normalized.clone(), turn);
1196        state.inspected_paths.insert(normalized, turn);
1197    }
1198
1199    async fn record_line_inspection(&self, path: &str) {
1200        let normalized = normalize_workspace_path(path);
1201        let mut state = self.action_grounding.lock().await;
1202        let turn = state.turn_index;
1203        state.observed_paths.insert(normalized.clone(), turn);
1204        state.inspected_paths.insert(normalized, turn);
1205    }
1206
1207    async fn record_verify_build_result(&self, ok: bool, output: &str) {
1208        let mut state = self.action_grounding.lock().await;
1209        let turn = state.turn_index;
1210        state.last_verify_build_turn = Some(turn);
1211        state.last_verify_build_ok = ok;
1212        if ok {
1213            state.code_changed_since_verify = false;
1214            state.last_failed_build_paths.clear();
1215        } else {
1216            state.last_failed_build_paths = parse_failing_paths_from_build_output(output);
1217        }
1218    }
1219
1220    fn record_session_verification(&mut self, ok: bool, summary: impl Into<String>) {
1221        self.session_memory.record_verification(ok, summary);
1222    }
1223
1224    async fn record_successful_mutation(&self, path: Option<&str>) {
1225        let mut state = self.action_grounding.lock().await;
1226        state.code_changed_since_verify = match path {
1227            Some(p) => is_code_like_path(p),
1228            None => true,
1229        };
1230    }
1231
1232    async fn validate_action_preconditions(&self, name: &str, args: &Value) -> Result<(), String> {
1233        if self
1234            .plan_execution_active
1235            .load(std::sync::atomic::Ordering::SeqCst)
1236        {
1237            if is_current_plan_irrelevant_tool(name) {
1238                return Err(format!(
1239                    "Action blocked: `{}` is not part of current-plan execution. Stay on the saved target files, use built-in workspace file tools only, and either make a concrete edit or surface one specific blocker.",
1240                    name
1241                ));
1242            }
1243
1244            if is_plan_scoped_tool(name) {
1245                let allowed_paths = self.current_plan_allowed_paths();
1246                if !allowed_paths.is_empty() {
1247                    let in_allowed = match name {
1248                        "auto_pin_context" => args
1249                            .get("paths")
1250                            .and_then(|v| v.as_array())
1251                            .map(|paths| {
1252                                !paths.is_empty()
1253                                    && paths.iter().all(|v| {
1254                                        v.as_str()
1255                                            .map(normalize_workspace_path)
1256                                            .map(|p| allowed_paths.contains(&p))
1257                                            .unwrap_or(false)
1258                                    })
1259                            })
1260                            .unwrap_or(false),
1261                        "grep_files" | "list_files" => args
1262                            .get("path")
1263                            .and_then(|v| v.as_str())
1264                            .map(normalize_workspace_path)
1265                            .map(|p| allowed_paths.contains(&p))
1266                            .unwrap_or(false),
1267                        _ => action_target_path(name, args)
1268                            .map(|p| allowed_paths.contains(&p))
1269                            .unwrap_or(false),
1270                    };
1271
1272                    if !in_allowed {
1273                        let allowed = allowed_paths
1274                            .iter()
1275                            .map(|p| format!("`{}`", p))
1276                            .collect::<Vec<_>>()
1277                            .join(", ");
1278                        return Err(format!(
1279                            "Action blocked: current-plan execution is locked to the saved target files. Use a path-scoped built-in tool on one of these files only: {}.",
1280                            allowed
1281                        ));
1282                    }
1283                }
1284            }
1285
1286            if matches!(name, "edit_file" | "multi_search_replace" | "patch_hunk") {
1287                if let Some(target) = action_target_path(name, args) {
1288                    let state = self.action_grounding.lock().await;
1289                    let recently_inspected = state
1290                        .inspected_paths
1291                        .get(&target)
1292                        .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1293                        .unwrap_or(false);
1294                    drop(state);
1295                    if !recently_inspected {
1296                        return Err(format!(
1297                            "Action blocked: `{}` on '{}' requires an exact local line window first during current-plan execution. Use `inspect_lines` on that file around the intended edit region, then retry the mutation.",
1298                            name, target
1299                        ));
1300                    }
1301                }
1302            }
1303        }
1304
1305        if self.workflow_mode.is_read_only() && name == "auto_pin_context" {
1306            return Err(
1307                "Action blocked: `auto_pin_context` is disabled in read-only workflows. Use the grounded file evidence you already have, or narrow with `inspect_lines` instead of pinning more files into active context."
1308                    .to_string(),
1309            );
1310        }
1311
1312        if self.workflow_mode.is_read_only() && is_destructive_tool(name) {
1313            if name == "shell" {
1314                let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1315                let risk = crate::tools::guard::classify_bash_risk(command);
1316                if !matches!(risk, crate::tools::RiskLevel::Safe) {
1317                    return Err(format!(
1318                        "Action blocked: workflow mode `{}` is read-only for risky or mutating operations. Switch to `/code` or `/auto` before making changes.",
1319                        self.workflow_mode.label()
1320                    ));
1321                }
1322            } else {
1323                return Err(format!(
1324                    "Action blocked: workflow mode `{}` is read-only. Use `/code` to implement changes or `/auto` to leave mode selection to Hematite.",
1325                    self.workflow_mode.label()
1326                ));
1327            }
1328        }
1329
1330        let normalized_target = action_target_path(name, args);
1331        if let Some(target) = normalized_target.as_deref() {
1332            if matches!(
1333                name,
1334                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1335            ) {
1336                if let Some(prompt) = self.latest_user_prompt() {
1337                    if docs_edit_without_explicit_request(prompt, target) {
1338                        return Err(format!(
1339                            "Action blocked: '{}' is a docs file but the current request did not explicitly ask for documentation changes. Finish the code task first. If docs need updating, the user will ask.",
1340                            target
1341                        ));
1342                    }
1343                }
1344            }
1345            let path_exists = std::path::Path::new(target).exists();
1346            if path_exists {
1347                let state = self.action_grounding.lock().await;
1348                let pinned = self.pinned_files.lock().await;
1349                let pinned_match = pinned.keys().any(|p| normalize_workspace_path(p) == target);
1350                drop(pinned);
1351
1352                // edit_file and multi_search_replace match text exactly, so they need a
1353                // tighter evidence bar than a plain read. Require inspect_lines on the
1354                // target within the last 3 turns. A read_file in the *same* turn is also
1355                // accepted (the model just loaded the file and is making an immediate edit).
1356                let needs_exact_window = matches!(name, "edit_file" | "multi_search_replace");
1357                let recently_inspected = state
1358                    .inspected_paths
1359                    .get(target)
1360                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1361                    .unwrap_or(false);
1362                let same_turn_read = state
1363                    .observed_paths
1364                    .get(target)
1365                    .map(|turn| state.turn_index.saturating_sub(*turn) == 0)
1366                    .unwrap_or(false);
1367                let recent_observed = state
1368                    .observed_paths
1369                    .get(target)
1370                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1371                    .unwrap_or(false);
1372
1373                if needs_exact_window {
1374                    if !recently_inspected && !same_turn_read && !pinned_match {
1375                        return Err(format!(
1376                            "Action blocked: `{}` on '{}' requires a line-level inspection first. \
1377                             Use `inspect_lines` on the target region to get the exact current text \
1378                             (whitespace and indentation included), then retry the edit.",
1379                            name, target
1380                        ));
1381                    }
1382                } else if !recent_observed && !pinned_match {
1383                    return Err(format!(
1384                        "Action blocked: `{}` on '{}' requires recent file evidence. Use `read_file` or `inspect_lines` on that path first, or pin the file into active context.",
1385                        name, target
1386                    ));
1387                }
1388            }
1389        }
1390
1391        if is_mcp_mutating_tool(name) {
1392            return Err(format!(
1393                "Action blocked: `{}` is an external MCP mutation tool. For workspace file edits, prefer Hematite's built-in edit path (`read_file`/`inspect_lines` plus `patch_hunk`, `edit_file`, or `multi_search_replace`) unless the user explicitly requires MCP for that action.",
1394                name
1395            ));
1396        }
1397
1398        if is_mcp_workspace_read_tool(name) {
1399            return Err(format!(
1400                "Action blocked: `{}` is an external MCP filesystem read tool. For local workspace inspection, prefer Hematite's built-in read path (`read_file`, `inspect_lines`, `list_files`, or `grep_files`) unless the user explicitly requires MCP for that action.",
1401                name
1402            ));
1403        }
1404
1405        // Phase gate: if the build is broken, constrain edits to files that cargo flagged.
1406        // This prevents the model from wandering to unrelated files after a failed verify.
1407        if matches!(
1408            name,
1409            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1410        ) {
1411            if let Some(target) = normalized_target.as_deref() {
1412                let state = self.action_grounding.lock().await;
1413                if state.code_changed_since_verify
1414                    && !state.last_verify_build_ok
1415                    && !state.last_failed_build_paths.is_empty()
1416                    && !state.last_failed_build_paths.iter().any(|p| p == target)
1417                {
1418                    let files = state
1419                        .last_failed_build_paths
1420                        .iter()
1421                        .map(|p| format!("`{}`", p))
1422                        .collect::<Vec<_>>()
1423                        .join(", ");
1424                    return Err(format!(
1425                        "Action blocked: the build is broken. Fix the errors in {} before editing other files. Run `verify_build` to confirm the fix, then continue.",
1426                        files
1427                    ));
1428                }
1429            }
1430        }
1431
1432        if name == "git_commit" || name == "git_push" {
1433            let state = self.action_grounding.lock().await;
1434            if state.code_changed_since_verify && !state.last_verify_build_ok {
1435                return Err(format!(
1436                    "Action blocked: `{}` requires a successful `verify_build` after the latest code edits. Run verification first so Hematite has proof that the tree is build-clean.",
1437                    name
1438                ));
1439            }
1440        }
1441
1442        if name == "shell" {
1443            let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1444            if shell_looks_like_structured_host_inspection(command) {
1445                // Auto-redirect: silently call inspect_host with the right topic instead of
1446                // returning a block error that the model may fail to recover from.
1447                // Derive topic ONLY from the shell command itself. We do not fall back to the user prompt
1448                // here to avoid trapping secondary shell commands in a redirection loop based on the primary intent.
1449                let topic = match preferred_host_inspection_topic(command) {
1450                    Some(t) => t.to_string(),
1451                    None => return Ok(()), // Not a clear host inspection command, allow it to pass through.
1452                };
1453
1454                {
1455                    let mut state = self.action_grounding.lock().await;
1456                    let current_turn = state.turn_index;
1457                    if let Some(turn) = state.redirected_host_inspection_topics.get(&topic) {
1458                        if *turn == current_turn {
1459                            return Err(format!(
1460                                "[auto-redirected shell→inspect_host(topic=\"{topic}\")] Notice: The diagnostic data for topic `{topic}` was already provided in this turn. Using the previous result to avoid redundant tool calls."
1461                            ));
1462                        }
1463                    }
1464                    state
1465                        .redirected_host_inspection_topics
1466                        .insert(topic.clone(), current_turn);
1467                }
1468
1469                let path_val = self
1470                    .latest_user_prompt()
1471                    .and_then(|p| {
1472                        // Very basic heuristic for path extraction: look for strings with dots/slashes
1473                        p.split_whitespace()
1474                            .find(|w| w.contains('.') || w.contains('/') || w.contains('\\'))
1475                            .map(|s| {
1476                                s.trim_matches(|c: char| {
1477                                    !c.is_alphanumeric() && c != '.' && c != '/' && c != '\\'
1478                                })
1479                            })
1480                    })
1481                    .unwrap_or("");
1482
1483                let mut redirect_args = if !path_val.is_empty() {
1484                    serde_json::json!({ "topic": topic, "path": path_val })
1485                } else {
1486                    serde_json::json!({ "topic": topic })
1487                };
1488
1489                // Surgical Argument Extraction for redirected shell payloads (Robust "Wide Net" version)
1490                if topic == "ad_user" || topic == "dns_lookup" {
1491                    let cmd_lower = command.to_lowercase();
1492                    let mut identity = String::new();
1493
1494                    // 1. Explicit Identity check
1495                    if let Some(idx) = cmd_lower.find("-identity") {
1496                        let after_id = &command[idx + 9..].trim();
1497                        identity = if after_id.starts_with('\'') || after_id.starts_with('"') {
1498                            let quote = after_id.chars().next().unwrap();
1499                            after_id.split(quote).nth(1).unwrap_or("").to_string()
1500                        } else {
1501                            after_id.split_whitespace().next().unwrap_or("").to_string()
1502                        };
1503                    }
1504
1505                    // 2. Wide-Net Fallback: Find the first non-cmdlet, non-parameter string
1506                    if identity.is_empty() {
1507                        let parts: Vec<&str> = command.split_whitespace().collect();
1508                        for (i, part) in parts.iter().enumerate() {
1509                            if i == 0 || part.starts_with('-') {
1510                                continue;
1511                            }
1512                            // Skip common cmdlets if they are in the parts list
1513                            let p_low = part.to_lowercase();
1514                            if p_low.contains("get-ad")
1515                                || p_low.contains("powershell")
1516                                || p_low == "-command"
1517                            {
1518                                continue;
1519                            }
1520
1521                            identity = part
1522                                .trim_matches(|c: char| c == '\'' || c == '"')
1523                                .to_string();
1524                            if !identity.is_empty() {
1525                                break;
1526                            }
1527                        }
1528                    }
1529
1530                    if !identity.is_empty() {
1531                        redirect_args.as_object_mut().unwrap().insert(
1532                            "name_filter".to_string(),
1533                            serde_json::Value::String(identity),
1534                        );
1535                    }
1536                }
1537
1538                let result = crate::tools::host_inspect::inspect_host(&redirect_args).await;
1539                return match result {
1540                    Ok(output) => Err(format!(
1541                        "[auto-redirected shell→inspect_host(topic=\"{topic}\")]\n\n{output}\n\n[Note: Shell is blocked for host inspection. The diagnostic data above fulfills your request. Use inspect_host directly for further diagnostics.]"
1542                    )),
1543                    Err(e) => Err(format!(
1544                        "Redirection to native tool `{topic}` failed: {e}\n\nAction blocked: use `inspect_host(topic: \"{topic}\")` instead of raw `shell` for host-inspection questions. Available topics: updates, security, pending_reboot, disk_health, battery, recent_crashes, scheduled_tasks, dev_conflicts, health_report, storage, hardware, resource_load, overclocker, processes, network, services, ports, env_doctor, fix_plan, connectivity, wifi, connections, vpn, proxy, firewall_rules, traceroute, dns_cache, arp, route_table, docker, wsl, ssh, env, hosts_file, installed_software, git_config, databases, disk_benchmark, directory, permissions, login_history, registry_audit, share_access.",
1545                    )),
1546                };
1547            }
1548            let reason = args
1549                .get("reason")
1550                .and_then(|v| v.as_str())
1551                .unwrap_or("")
1552                .trim();
1553            let risk = crate::tools::guard::classify_bash_risk(command);
1554            if !matches!(risk, crate::tools::RiskLevel::Safe) && reason.is_empty() {
1555                return Err(
1556                    "Action blocked: risky `shell` calls require a concrete `reason` argument that explains what is being verified or changed."
1557                        .to_string(),
1558                );
1559            }
1560        }
1561
1562        Ok(())
1563    }
1564
1565    fn build_action_receipt(
1566        &self,
1567        name: &str,
1568        args: &Value,
1569        output: &str,
1570        is_error: bool,
1571    ) -> Option<ChatMessage> {
1572        if is_error || !is_destructive_tool(name) {
1573            return None;
1574        }
1575
1576        let mut receipt = String::from("[ACTION RECEIPT]\n");
1577        receipt.push_str(&format!("- tool: {}\n", name));
1578        if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
1579            receipt.push_str(&format!("- target: {}\n", path));
1580        }
1581        if name == "shell" {
1582            if let Some(command) = args.get("command").and_then(|v| v.as_str()) {
1583                receipt.push_str(&format!("- command: {}\n", command));
1584            }
1585            if let Some(reason) = args.get("reason").and_then(|v| v.as_str()) {
1586                if !reason.trim().is_empty() {
1587                    receipt.push_str(&format!("- reason: {}\n", reason.trim()));
1588                }
1589            }
1590        }
1591        let first_line = output.lines().next().unwrap_or(output).trim();
1592        receipt.push_str(&format!("- outcome: {}\n", first_line));
1593        Some(ChatMessage::system(&receipt))
1594    }
1595
1596    fn replace_mcp_tool_definitions(&mut self, mcp_tools: &[crate::agent::mcp::McpTool]) {
1597        self.tools
1598            .retain(|tool| !tool.function.name.starts_with("mcp__"));
1599        self.tools
1600            .extend(mcp_tools.iter().map(|tool| ToolDefinition {
1601                tool_type: "function".into(),
1602                function: ToolFunction {
1603                    name: tool.name.clone(),
1604                    description: tool.description.clone().unwrap_or_default(),
1605                    parameters: tool.input_schema.clone(),
1606                },
1607                metadata: crate::agent::inference::tool_metadata_for_name(&tool.name),
1608            }));
1609    }
1610
1611    async fn emit_mcp_runtime_status(&self, tx: &mpsc::Sender<InferenceEvent>) {
1612        let summary = {
1613            let mcp = self.mcp_manager.lock().await;
1614            mcp.runtime_report()
1615        };
1616        let _ = tx
1617            .send(InferenceEvent::McpStatus {
1618                state: summary.state,
1619                summary: summary.summary,
1620            })
1621            .await;
1622    }
1623
1624    async fn refresh_mcp_tools(
1625        &mut self,
1626        tx: &mpsc::Sender<InferenceEvent>,
1627    ) -> Result<Vec<crate::agent::mcp::McpTool>, Box<dyn std::error::Error + Send + Sync>> {
1628        let mcp_tools = {
1629            let mut mcp = self.mcp_manager.lock().await;
1630            match mcp.initialize_all().await {
1631                Ok(()) => mcp.discover_tools().await,
1632                Err(e) => {
1633                    drop(mcp);
1634                    self.replace_mcp_tool_definitions(&[]);
1635                    self.emit_mcp_runtime_status(tx).await;
1636                    return Err(e.into());
1637                }
1638            }
1639        };
1640
1641        self.replace_mcp_tool_definitions(&mcp_tools);
1642        self.emit_mcp_runtime_status(tx).await;
1643        Ok(mcp_tools)
1644    }
1645
1646    /// Spawns and initializes all configured MCP servers, discovering their tools.
1647    pub async fn initialize_mcp(
1648        &mut self,
1649        tx: &mpsc::Sender<InferenceEvent>,
1650    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1651        let _ = self.refresh_mcp_tools(tx).await?;
1652        Ok(())
1653    }
1654
1655    /// Run one user turn through the full agentic loop.
1656    ///
1657    /// Adds the user message, calls the model, executes any tools, and loops
1658    /// until the model produces a final text reply.  All progress is streamed
1659    /// as `InferenceEvent` values via `tx`.
1660    pub async fn run_turn(
1661        &mut self,
1662        user_turn: &UserTurn,
1663        tx: mpsc::Sender<InferenceEvent>,
1664        yolo: bool,
1665    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1666        let user_input = user_turn.text.as_str();
1667        // ── Fast-path reset commands: handled locally, no network I/O needed ──
1668        if user_input.trim() == "/new" {
1669            self.history.clear();
1670            self.reasoning_history = None;
1671            self.session_memory.clear();
1672            self.running_summary = None;
1673            self.correction_hints.clear();
1674            self.pinned_files.lock().await.clear();
1675            self.reset_action_grounding().await;
1676            reset_task_files();
1677            let _ = std::fs::remove_file(session_path());
1678            self.save_empty_session();
1679            self.emit_compaction_pressure(&tx).await;
1680            self.emit_prompt_pressure_idle(&tx).await;
1681            for chunk in chunk_text(
1682                "Fresh task context started. Chat history, pins, and task files cleared. Saved memory remains available.",
1683                8,
1684            ) {
1685                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1686            }
1687            let _ = tx.send(InferenceEvent::Done).await;
1688            return Ok(());
1689        }
1690
1691        if user_input.trim() == "/forget" {
1692            self.history.clear();
1693            self.reasoning_history = None;
1694            self.session_memory.clear();
1695            self.running_summary = None;
1696            self.correction_hints.clear();
1697            self.pinned_files.lock().await.clear();
1698            self.reset_action_grounding().await;
1699            reset_task_files();
1700            purge_persistent_memory();
1701            tokio::task::block_in_place(|| self.vein.reset());
1702            let _ = std::fs::remove_file(session_path());
1703            self.save_empty_session();
1704            self.emit_compaction_pressure(&tx).await;
1705            self.emit_prompt_pressure_idle(&tx).await;
1706            for chunk in chunk_text(
1707                "Hard forget complete. Chat history, saved memory, task files, and the Vein index were purged.",
1708                8,
1709            ) {
1710                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1711            }
1712            let _ = tx.send(InferenceEvent::Done).await;
1713            return Ok(());
1714        }
1715
1716        if user_input.trim() == "/vein-inspect" {
1717            let indexed = self.refresh_vein_index();
1718            let report = self.build_vein_inspection_report(indexed);
1719            let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(1));
1720            let _ = tx
1721                .send(InferenceEvent::VeinStatus {
1722                    file_count: snapshot.indexed_source_files + snapshot.indexed_docs,
1723                    embedded_count: snapshot.embedded_source_doc_chunks,
1724                    docs_only: self.vein_docs_only_mode(),
1725                })
1726                .await;
1727            for chunk in chunk_text(&report, 8) {
1728                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1729            }
1730            let _ = tx.send(InferenceEvent::Done).await;
1731            return Ok(());
1732        }
1733
1734        if user_input.trim() == "/workspace-profile" {
1735            let root = crate::tools::file_ops::workspace_root();
1736            let _ = crate::agent::workspace_profile::ensure_workspace_profile(&root);
1737            let report = crate::agent::workspace_profile::profile_report(&root);
1738            for chunk in chunk_text(&report, 8) {
1739                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1740            }
1741            let _ = tx.send(InferenceEvent::Done).await;
1742            return Ok(());
1743        }
1744
1745        if user_input.trim() == "/rules" {
1746            let root = crate::tools::file_ops::workspace_root();
1747            let rules_path = root.join(".hematite").join("rules.md");
1748            let report = if rules_path.exists() {
1749                match std::fs::read_to_string(&rules_path) {
1750                    Ok(content) => format!(
1751                        "## Behavioral Rules (.hematite/rules.md)\n\n{}\n\n---\nTo update: ask Hematite to edit your rules, or open `.hematite/rules.md` directly. Changes take effect on the next turn.",
1752                        content.trim()
1753                    ),
1754                    Err(e) => format!("Error reading .hematite/rules.md: {e}"),
1755                }
1756            } else {
1757                format!(
1758                    "No behavioral rules file found at `.hematite/rules.md`.\n\nCreate it to add custom behavioral guidelines — they are injected into the system prompt on every turn and apply to any model you load.\n\nExample: ask Hematite to \"create a rules.md with simplicity-first and surgical-edit guidelines\" and it will write the file for you.\n\nExpected path: {}",
1759                    rules_path.display()
1760                )
1761            };
1762            for chunk in chunk_text(&report, 8) {
1763                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1764            }
1765            let _ = tx.send(InferenceEvent::Done).await;
1766            return Ok(());
1767        }
1768
1769        if user_input.trim() == "/vein-reset" {
1770            tokio::task::block_in_place(|| self.vein.reset());
1771            let _ = tx
1772                .send(InferenceEvent::VeinStatus {
1773                    file_count: 0,
1774                    embedded_count: 0,
1775                    docs_only: self.vein_docs_only_mode(),
1776                })
1777                .await;
1778            for chunk in chunk_text("Vein index cleared. Will rebuild on the next turn.", 8) {
1779                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1780            }
1781            let _ = tx.send(InferenceEvent::Done).await;
1782            return Ok(());
1783        }
1784
1785        // Reload config every turn (edits apply immediately, no restart needed).
1786        let config = crate::agent::config::load_config();
1787        self.recovery_context.clear();
1788        let manual_runtime_refresh = user_input.trim() == "/runtime-refresh";
1789        if !manual_runtime_refresh {
1790            if let Some((model_id, context_length, changed)) = self
1791                .refresh_runtime_profile_and_report(&tx, "turn_start")
1792                .await
1793            {
1794                if changed {
1795                    let _ = tx
1796                        .send(InferenceEvent::Thought(format!(
1797                            "Runtime refresh: using model `{}` with CTX {} for this turn.",
1798                            model_id, context_length
1799                        )))
1800                        .await;
1801                }
1802            }
1803        }
1804        self.emit_compaction_pressure(&tx).await;
1805        let current_model = self.engine.current_model();
1806        self.engine.set_gemma_native_formatting(
1807            crate::agent::config::effective_gemma_native_formatting(&config, &current_model),
1808        );
1809        let _turn_id = self.begin_grounded_turn().await;
1810        let _hook_runner = crate::agent::hooks::HookRunner::new(config.hooks.clone());
1811        let mcp_tools = match self.refresh_mcp_tools(&tx).await {
1812            Ok(tools) => tools,
1813            Err(e) => {
1814                let _ = tx
1815                    .send(InferenceEvent::Error(format!("MCP refresh failed: {}", e)))
1816                    .await;
1817                Vec::new()
1818            }
1819        };
1820
1821        // Apply config model overrides (config takes precedence over CLI flags).
1822        let effective_fast = config
1823            .fast_model
1824            .clone()
1825            .or_else(|| self.fast_model.clone());
1826        let effective_think = config
1827            .think_model
1828            .clone()
1829            .or_else(|| self.think_model.clone());
1830
1831        // ── /lsp: start language servers manually if needed ──────────────────
1832        if user_input.trim() == "/lsp" {
1833            let mut lsp = self.lsp_manager.lock().await;
1834            match lsp.start_servers().await {
1835                Ok(_) => {
1836                    let _ = tx
1837                        .send(InferenceEvent::MutedToken(
1838                            "LSP: Servers Initialized OK.".to_string(),
1839                        ))
1840                        .await;
1841                }
1842                Err(e) => {
1843                    let _ = tx
1844                        .send(InferenceEvent::Error(format!(
1845                            "LSP: Failed to start servers - {}",
1846                            e
1847                        )))
1848                        .await;
1849                }
1850            }
1851            let _ = tx.send(InferenceEvent::Done).await;
1852            return Ok(());
1853        }
1854
1855        if user_input.trim() == "/runtime-refresh" {
1856            match self
1857                .refresh_runtime_profile_and_report(&tx, "manual_command")
1858                .await
1859            {
1860                Some((model_id, context_length, changed)) => {
1861                    let msg = if changed {
1862                        format!(
1863                            "Runtime profile refreshed. Model: {} | CTX: {}",
1864                            model_id, context_length
1865                        )
1866                    } else {
1867                        format!(
1868                            "Runtime profile unchanged. Model: {} | CTX: {}",
1869                            model_id, context_length
1870                        )
1871                    };
1872                    for chunk in chunk_text(&msg, 8) {
1873                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
1874                    }
1875                }
1876                None => {
1877                    let _ = tx
1878                        .send(InferenceEvent::Error(
1879                            "Runtime refresh failed: LM Studio profile could not be read."
1880                                .to_string(),
1881                        ))
1882                        .await;
1883                }
1884            }
1885            let _ = tx.send(InferenceEvent::Done).await;
1886            return Ok(());
1887        }
1888
1889        if user_input.trim() == "/ask" {
1890            self.set_workflow_mode(WorkflowMode::Ask);
1891            for chunk in chunk_text(
1892                "Workflow mode: ASK. Stay read-only, explain, inspect, and answer without making changes.",
1893                8,
1894            ) {
1895                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1896            }
1897            let _ = tx.send(InferenceEvent::Done).await;
1898            return Ok(());
1899        }
1900
1901        if user_input.trim() == "/code" {
1902            self.set_workflow_mode(WorkflowMode::Code);
1903            let mut message =
1904                "Workflow mode: CODE. Make changes when needed, but keep proof-before-action and verification discipline.".to_string();
1905            if let Some(plan) = self.current_plan_summary() {
1906                message.push_str(&format!(" Current plan: {plan}."));
1907            }
1908            for chunk in chunk_text(&message, 8) {
1909                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1910            }
1911            let _ = tx.send(InferenceEvent::Done).await;
1912            return Ok(());
1913        }
1914
1915        if user_input.trim() == "/architect" {
1916            self.set_workflow_mode(WorkflowMode::Architect);
1917            let mut message =
1918                "Workflow mode: ARCHITECT. Plan, inspect, and shape the approach first. Do not mutate code unless the user explicitly asks to implement. When the handoff is ready, use `/implement-plan` or switch to `/code` to execute it.".to_string();
1919            if let Some(plan) = self.current_plan_summary() {
1920                message.push_str(&format!(" Existing plan: {plan}."));
1921            }
1922            for chunk in chunk_text(&message, 8) {
1923                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1924            }
1925            let _ = tx.send(InferenceEvent::Done).await;
1926            return Ok(());
1927        }
1928
1929        if user_input.trim() == "/read-only" {
1930            self.set_workflow_mode(WorkflowMode::ReadOnly);
1931            for chunk in chunk_text(
1932                "Workflow mode: READ-ONLY. Analysis only. Do not modify files, run mutating shell commands, or commit changes.",
1933                8,
1934            ) {
1935                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1936            }
1937            let _ = tx.send(InferenceEvent::Done).await;
1938            return Ok(());
1939        }
1940
1941        if user_input.trim() == "/auto" {
1942            self.set_workflow_mode(WorkflowMode::Auto);
1943            for chunk in chunk_text(
1944                "Workflow mode: AUTO. Hematite will choose the narrowest effective path for the request.",
1945                8,
1946            ) {
1947                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1948            }
1949            let _ = tx.send(InferenceEvent::Done).await;
1950            return Ok(());
1951        }
1952
1953        if user_input.trim() == "/chat" {
1954            self.set_workflow_mode(WorkflowMode::Chat);
1955            let _ = tx.send(InferenceEvent::Done).await;
1956            return Ok(());
1957        }
1958
1959        if user_input.trim() == "/teach" {
1960            self.set_workflow_mode(WorkflowMode::Teach);
1961            for chunk in chunk_text(
1962                "Workflow mode: TEACH. I will inspect your actual machine state first, then walk you through any admin, config, or write task as a grounded, numbered tutorial. I will not execute write operations — I will show you exactly how to do each step yourself.",
1963                8,
1964            ) {
1965                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1966            }
1967            let _ = tx.send(InferenceEvent::Done).await;
1968            return Ok(());
1969        }
1970
1971        if user_input.trim() == "/reroll" {
1972            let soul = crate::ui::hatch::generate_soul_random();
1973            self.snark = soul.snark;
1974            self.chaos = soul.chaos;
1975            self.soul_personality = soul.personality.clone();
1976            // Update the engine's species name so build_chat_system_prompt uses it
1977            // SAFETY: engine is Arc but species is a plain String field we own logically.
1978            // We use Arc::get_mut which only succeeds if this is the only strong ref.
1979            // If it fails (swarm workers hold refs), we fall back to a best-effort clone approach.
1980            let species = soul.species.clone();
1981            if let Some(eng) = Arc::get_mut(&mut self.engine) {
1982                eng.species = species.clone();
1983            }
1984            let shiny_tag = if soul.shiny { " 🌟 SHINY" } else { "" };
1985            let _ = tx
1986                .send(InferenceEvent::SoulReroll {
1987                    species: soul.species.clone(),
1988                    rarity: soul.rarity.label().to_string(),
1989                    shiny: soul.shiny,
1990                    personality: soul.personality.clone(),
1991                })
1992                .await;
1993            for chunk in chunk_text(
1994                &format!(
1995                    "A new companion awakens!\n[{}{}] {} — \"{}\"",
1996                    soul.rarity.label(),
1997                    shiny_tag,
1998                    soul.species,
1999                    soul.personality
2000                ),
2001                8,
2002            ) {
2003                let _ = tx.send(InferenceEvent::Token(chunk)).await;
2004            }
2005            let _ = tx.send(InferenceEvent::Done).await;
2006            return Ok(());
2007        }
2008
2009        if user_input.trim() == "/agent" {
2010            self.set_workflow_mode(WorkflowMode::Auto);
2011            let _ = tx.send(InferenceEvent::Done).await;
2012            return Ok(());
2013        }
2014
2015        let implement_plan_alias = user_input.trim() == "/implement-plan";
2016        if implement_plan_alias
2017            && !self
2018                .session_memory
2019                .current_plan
2020                .as_ref()
2021                .map(|plan| plan.has_signal())
2022                .unwrap_or(false)
2023        {
2024            for chunk in chunk_text(
2025                "No saved architect handoff is active. Run `/architect` first, or switch to `/code` with an explicit implementation request.",
2026                8,
2027            ) {
2028                let _ = tx.send(InferenceEvent::Token(chunk)).await;
2029            }
2030            let _ = tx.send(InferenceEvent::Done).await;
2031            return Ok(());
2032        }
2033
2034        let mut effective_user_input = if implement_plan_alias {
2035            self.set_workflow_mode(WorkflowMode::Code);
2036            implement_current_plan_prompt().to_string()
2037        } else {
2038            user_input.trim().to_string()
2039        };
2040        if let Some((mode, rest)) = parse_inline_workflow_prompt(user_input) {
2041            self.set_workflow_mode(mode);
2042            effective_user_input = rest.to_string();
2043        }
2044        let transcript_user_input = if implement_plan_alias {
2045            transcript_user_turn_text(user_turn, "/implement-plan")
2046        } else {
2047            transcript_user_turn_text(user_turn, &effective_user_input)
2048        };
2049        effective_user_input = apply_turn_attachments(user_turn, &effective_user_input);
2050        let implement_current_plan = self.workflow_mode == WorkflowMode::Code
2051            && is_current_plan_execution_request(&effective_user_input)
2052            && self
2053                .session_memory
2054                .current_plan
2055                .as_ref()
2056                .map(|plan| plan.has_signal())
2057                .unwrap_or(false);
2058        self.plan_execution_active
2059            .store(implement_current_plan, std::sync::atomic::Ordering::SeqCst);
2060        let _plan_execution_guard = PlanExecutionGuard {
2061            flag: self.plan_execution_active.clone(),
2062        };
2063        let intent = classify_query_intent(self.workflow_mode, &effective_user_input);
2064
2065        // ── /think / /no_think: reasoning budget toggle ──────────────────────
2066        if let Some(answer_kind) = intent.direct_answer {
2067            match answer_kind {
2068                DirectAnswerKind::About => {
2069                    let response = build_about_answer();
2070                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2071                        .await;
2072                    return Ok(());
2073                }
2074                DirectAnswerKind::LanguageCapability => {
2075                    let response = build_language_capability_answer();
2076                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2077                        .await;
2078                    return Ok(());
2079                }
2080                DirectAnswerKind::UnsafeWorkflowPressure => {
2081                    let response = build_unsafe_workflow_pressure_answer();
2082                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2083                        .await;
2084                    return Ok(());
2085                }
2086                DirectAnswerKind::SessionMemory => {
2087                    let response = build_session_memory_answer();
2088                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2089                        .await;
2090                    return Ok(());
2091                }
2092                DirectAnswerKind::RecoveryRecipes => {
2093                    let response = build_recovery_recipes_answer();
2094                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2095                        .await;
2096                    return Ok(());
2097                }
2098                DirectAnswerKind::McpLifecycle => {
2099                    let response = build_mcp_lifecycle_answer();
2100                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2101                        .await;
2102                    return Ok(());
2103                }
2104                DirectAnswerKind::AuthorizationPolicy => {
2105                    let response = build_authorization_policy_answer();
2106                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2107                        .await;
2108                    return Ok(());
2109                }
2110                DirectAnswerKind::ToolClasses => {
2111                    let response = build_tool_classes_answer();
2112                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2113                        .await;
2114                    return Ok(());
2115                }
2116                DirectAnswerKind::ToolRegistryOwnership => {
2117                    let response = build_tool_registry_ownership_answer();
2118                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2119                        .await;
2120                    return Ok(());
2121                }
2122                DirectAnswerKind::SessionResetSemantics => {
2123                    let response = build_session_reset_semantics_answer();
2124                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2125                        .await;
2126                    return Ok(());
2127                }
2128                DirectAnswerKind::ProductSurface => {
2129                    let response = build_product_surface_answer();
2130                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2131                        .await;
2132                    return Ok(());
2133                }
2134                DirectAnswerKind::ReasoningSplit => {
2135                    let response = build_reasoning_split_answer();
2136                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2137                        .await;
2138                    return Ok(());
2139                }
2140                DirectAnswerKind::Identity => {
2141                    let response = build_identity_answer();
2142                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2143                        .await;
2144                    return Ok(());
2145                }
2146                DirectAnswerKind::WorkflowModes => {
2147                    let response = build_workflow_modes_answer();
2148                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2149                        .await;
2150                    return Ok(());
2151                }
2152                DirectAnswerKind::GemmaNative => {
2153                    let response = build_gemma_native_answer();
2154                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2155                        .await;
2156                    return Ok(());
2157                }
2158                DirectAnswerKind::GemmaNativeSettings => {
2159                    let response = build_gemma_native_settings_answer();
2160                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2161                        .await;
2162                    return Ok(());
2163                }
2164                DirectAnswerKind::VerifyProfiles => {
2165                    let response = build_verify_profiles_answer();
2166                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2167                        .await;
2168                    return Ok(());
2169                }
2170                DirectAnswerKind::Toolchain => {
2171                    let lower = effective_user_input.to_lowercase();
2172                    let topic = if (lower.contains("voice output") || lower.contains("voice"))
2173                        && (lower.contains("lag")
2174                            || lower.contains("behind visible text")
2175                            || lower.contains("latency"))
2176                    {
2177                        "voice_latency_plan"
2178                    } else {
2179                        "all"
2180                    };
2181                    let response =
2182                        crate::tools::toolchain::describe_toolchain(&serde_json::json!({
2183                            "topic": topic,
2184                            "question": effective_user_input,
2185                        }))
2186                        .await
2187                        .unwrap_or_else(|e| format!("Error: {}", e));
2188                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2189                        .await;
2190                    return Ok(());
2191                }
2192                DirectAnswerKind::HostInspection => {
2193                    let topics = all_host_inspection_topics(&effective_user_input);
2194                    let response = if topics.len() >= 2 {
2195                        let mut combined = Vec::new();
2196                        for topic in topics {
2197                            let output =
2198                                crate::tools::host_inspect::inspect_host(&serde_json::json!({
2199                                    "topic": topic,
2200                                }))
2201                                .await
2202                                .unwrap_or_else(|e| format!("Error (topic {topic}): {e}"));
2203                            combined.push(format!("# Topic: {topic}\n{output}"));
2204                        }
2205                        combined.join("\n\n---\n\n")
2206                    } else {
2207                        let topic = preferred_host_inspection_topic(&effective_user_input)
2208                            .unwrap_or("summary");
2209                        crate::tools::host_inspect::inspect_host(&serde_json::json!({
2210                            "topic": topic,
2211                        }))
2212                        .await
2213                        .unwrap_or_else(|e| format!("Error: {e}"))
2214                    };
2215
2216                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2217                        .await;
2218                    return Ok(());
2219                }
2220                DirectAnswerKind::ArchitectSessionResetPlan => {
2221                    let plan = build_architect_session_reset_plan();
2222                    let response = plan.to_markdown();
2223                    let _ = crate::tools::plan::save_plan_handoff(&plan);
2224                    self.session_memory.current_plan = Some(plan);
2225                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2226                        .await;
2227                    return Ok(());
2228                }
2229            }
2230        }
2231
2232        if matches!(
2233            self.workflow_mode,
2234            WorkflowMode::Ask | WorkflowMode::ReadOnly
2235        ) && looks_like_mutation_request(&effective_user_input)
2236        {
2237            let response = build_mode_redirect_answer(self.workflow_mode);
2238            self.history.push(ChatMessage::user(&effective_user_input));
2239            self.history.push(ChatMessage::assistant_text(&response));
2240            self.transcript.log_user(&transcript_user_input);
2241            self.transcript.log_agent(&response);
2242            for chunk in chunk_text(&response, 8) {
2243                if !chunk.is_empty() {
2244                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
2245                }
2246            }
2247            let _ = tx.send(InferenceEvent::Done).await;
2248            self.trim_history(80);
2249            self.refresh_session_memory();
2250            self.save_session();
2251            return Ok(());
2252        }
2253
2254        if user_input.trim() == "/think" {
2255            self.think_mode = Some(true);
2256            for chunk in chunk_text("Think mode: ON — full chain-of-thought enabled.", 8) {
2257                let _ = tx.send(InferenceEvent::Token(chunk)).await;
2258            }
2259            let _ = tx.send(InferenceEvent::Done).await;
2260            return Ok(());
2261        }
2262        if user_input.trim() == "/no_think" {
2263            self.think_mode = Some(false);
2264            for chunk in chunk_text(
2265                "Think mode: OFF — fast mode enabled (no chain-of-thought).",
2266                8,
2267            ) {
2268                let _ = tx.send(InferenceEvent::Token(chunk)).await;
2269            }
2270            let _ = tx.send(InferenceEvent::Done).await;
2271            return Ok(());
2272        }
2273
2274        // ── /pin: add file to active context ────────────────────────────────
2275        if user_input.trim_start().starts_with("/pin ") {
2276            let path = user_input.trim_start()[5..].trim();
2277            match std::fs::read_to_string(path) {
2278                Ok(content) => {
2279                    self.pinned_files
2280                        .lock()
2281                        .await
2282                        .insert(path.to_string(), content);
2283                    let msg = format!(
2284                        "Pinned: {} — this file is now locked in model context.",
2285                        path
2286                    );
2287                    for chunk in chunk_text(&msg, 8) {
2288                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
2289                    }
2290                }
2291                Err(e) => {
2292                    let _ = tx
2293                        .send(InferenceEvent::Error(format!(
2294                            "Failed to pin {}: {}",
2295                            path, e
2296                        )))
2297                        .await;
2298                }
2299            }
2300            let _ = tx.send(InferenceEvent::Done).await;
2301            return Ok(());
2302        }
2303
2304        // ── /unpin: remove file from active context ──────────────────────────
2305        if user_input.trim_start().starts_with("/unpin ") {
2306            let path = user_input.trim_start()[7..].trim();
2307            if self.pinned_files.lock().await.remove(path).is_some() {
2308                let msg = format!("Unpinned: {} — file removed from active context.", path);
2309                for chunk in chunk_text(&msg, 8) {
2310                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
2311                }
2312            } else {
2313                let _ = tx
2314                    .send(InferenceEvent::Error(format!(
2315                        "File {} was not pinned.",
2316                        path
2317                    )))
2318                    .await;
2319            }
2320            let _ = tx.send(InferenceEvent::Done).await;
2321            return Ok(());
2322        }
2323
2324        // ── Normal processing ───────────────────────────────────────────────
2325
2326        // Ensure MCP is initialized and tools are discovered for this turn.
2327        let tiny_context_mode = self.engine.current_context_length() <= 8_192;
2328        let mut base_prompt = self.engine.build_system_prompt(
2329            self.snark,
2330            self.chaos,
2331            self.brief,
2332            self.professional,
2333            &self.tools,
2334            self.reasoning_history.as_deref(),
2335            &mcp_tools,
2336        );
2337        if !tiny_context_mode {
2338            if let Some(hint) = &config.context_hint {
2339                if !hint.trim().is_empty() {
2340                    base_prompt.push_str(&format!(
2341                        "\n\n# Project Context (from .hematite/settings.json)\n{}",
2342                        hint
2343                    ));
2344                }
2345            }
2346            if let Some(profile_block) = crate::agent::workspace_profile::profile_prompt_block(
2347                &crate::tools::file_ops::workspace_root(),
2348            ) {
2349                base_prompt.push_str(&format!("\n\n{}", profile_block));
2350            }
2351            // L1: inject hot-files block if available (persists across sessions via vein.db).
2352            if let Some(ref l1) = self.l1_context {
2353                base_prompt.push_str(&format!("\n\n{}", l1));
2354            }
2355            if let Some(ref repo_map_block) = self.repo_map {
2356                base_prompt.push_str(&format!("\n\n{}", repo_map_block));
2357            }
2358        }
2359        let grounded_trace_mode = intent.grounded_trace_mode
2360            || intent.primary_class == QueryIntentClass::RuntimeDiagnosis;
2361        let capability_mode =
2362            intent.capability_mode || intent.primary_class == QueryIntentClass::Capability;
2363        let toolchain_mode =
2364            intent.toolchain_mode || intent.primary_class == QueryIntentClass::Toolchain;
2365        let host_inspection_mode = intent.host_inspection_mode;
2366        let maintainer_workflow_mode = intent.maintainer_workflow_mode
2367            || preferred_maintainer_workflow(&effective_user_input).is_some();
2368        let workspace_workflow_mode = intent.workspace_workflow_mode
2369            || preferred_workspace_workflow(&effective_user_input).is_some();
2370        let fix_plan_mode =
2371            preferred_host_inspection_topic(&effective_user_input) == Some("fix_plan");
2372        let architecture_overview_mode = intent.architecture_overview_mode;
2373        let capability_needs_repo = intent.capability_needs_repo;
2374        let mut system_msg = build_system_with_corrections(
2375            &base_prompt,
2376            &self.correction_hints,
2377            &self.gpu_state,
2378            &self.git_state,
2379            &config,
2380        );
2381        if tiny_context_mode {
2382            system_msg.push_str(
2383                "\n\n# TINY CONTEXT TURN MODE\n\
2384                 Keep this turn compact. Prefer direct answers or one narrow tool step over broad exploration.\n",
2385            );
2386        }
2387        if !tiny_context_mode && grounded_trace_mode {
2388            system_msg.push_str(
2389                "\n\n# GROUNDED TRACE MODE\n\
2390                 This turn is read-only architecture analysis unless the user explicitly asks otherwise.\n\
2391                 Before answering trace, architecture, or control-flow questions, inspect the repo with real tools.\n\
2392                 Use verified file paths, function names, structs, enums, channels, and event types only.\n\
2393                 Prefer `trace_runtime_flow` for runtime wiring, session reset, startup, or reasoning/specular questions.\n\
2394                 Treat `trace_runtime_flow` output as authoritative over your own memory.\n\
2395                 If `trace_runtime_flow` fully answers the question, preserve its identifiers exactly and do not rename them in a styled rewrite.\n\
2396                 Do not invent names such as synthetic channels or subsystems.\n\
2397                 If a detail is not verified from the code or tool output, say `uncertain`.\n\
2398                For exact flow questions, answer in ordered steps and name the concrete functions and event types involved.\n"
2399            );
2400        }
2401        if !tiny_context_mode && capability_mode {
2402            system_msg.push_str(
2403                "\n\n# CAPABILITY QUESTION MODE\n\
2404                 This is a product or capability question unless the user explicitly asks about repository implementation.\n\
2405                 Answer from stable Hematite capabilities and current runtime state.\n\
2406                 It is correct to mention that Hematite itself is built in Rust when relevant, but do not imply that its project support is limited to Rust.\n\
2407                 Do NOT call repo-inspection tools like `read_file` or LSP lookup tools unless the user explicitly asks about implementation or file ownership.\n\
2408                 Do NOT infer language or project support from unrelated dependencies, crates, or config files.\n\
2409                 Describe language and project support in terms of real mechanisms: reading files, editing code, searching the workspace, running shell commands, build verification, language-aware tooling when available, web research, vision analysis, and optional MCP tools if configured.\n\
2410                 If the user asks about languages, answer at the harness level: Hematite can help across many project languages even though Hematite itself is written in Rust.\n\
2411                 Prefer real programming language examples like Python, JavaScript, TypeScript, Go, C#, or similar over file extensions like `.json` or `.md`.\n\
2412                 For project-building questions, describe cross-project workflows like scaffolding files, shaping structure, implementing features, and running the appropriate local build or test commands for the target stack. Do not overclaim certainty.\n\
2413                 Never mention raw `mcp__*` tool names unless those tools are active this turn and directly relevant.\n\
2414                 Keep the answer short, plain, and ASCII-first.\n"
2415            );
2416        }
2417        if !tiny_context_mode && toolchain_mode {
2418            system_msg.push_str(
2419                "\n\n# TOOLCHAIN DISCIPLINE MODE\n\
2420                 This turn is about Hematite's real built-in tools and how to choose them.\n\
2421                 Prefer `describe_toolchain` before you try to summarize tool capabilities or propose a read-only investigation plan from memory.\n\
2422                 Use only real built-in tool names.\n\
2423                 Do not invent helper tools, MCP tool names, synthetic symbols, or example function names.\n\
2424                 If `describe_toolchain` fully answers the question, preserve its output exactly instead of restyling it.\n\
2425                 Be explicit about which tools are optional or conditional.\n"
2426            );
2427        }
2428        if !tiny_context_mode && host_inspection_mode {
2429            system_msg.push_str(
2430                 "\n\n# HOST INSPECTION MODE\n\
2431                 This turn is about the local machine. Make EXACTLY ONE `inspect_host` call using the best matching topic below, then answer. Do NOT call `summary` first. Do NOT make exploratory shell calls.\n\
2432                 - Drive space / disk usage / free space / storage across drives → `storage`\n\
2433                 - CPU model / RAM size / GPU name / hardware specs / BIOS / motherboard → `hardware`\n\
2434                 - CPU % / RAM % / what is using resources / slow machine → `resource_load`\n\
2435                 - Running processes / task manager / what is using RAM → `processes`\n\
2436                 - Windows services / daemons / service state → `services`\n\
2437                 - Listening ports / open ports / what process owns port N / which processes are listening / what is bound to a port → `ports` (waiting for inbound connections — includes PIDs and process names — do NOT also call `processes`)\n\
2438                 - Active connections / established connections / what is connected right now / outbound sessions / show me connections / network connections → `connections` (live two-way sessions, NOT listening ports)\n\
2439                 - Network adapters / IP / gateway / DNS overview → `network`\n\
2440                 - Internet / online / can I reach the internet → `connectivity`\n\
2441                 - Wi-Fi / wireless / signal strength / SSID → `wifi`\n\
2442                 - VPN tunnel / VPN adapter → `vpn`\n\
2443                 - Security / Defender / antivirus / firewall / UAC → `security`\n\
2444                 - Windows Update / pending updates → `updates`\n\
2445                 - Health report / system status overall → `health_report`\n\
2446                 - PATH entries / raw PATH → `path`\n\
2447                 - Installed developer tools / versions / toolchain → `toolchains`\n\
2448                 - Environment/package-manager conflicts → `env_doctor`\n\
2449                 - Fix a workstation problem (cargo not found, port in use, LM Studio) → `fix_plan`\n\
2450                 - Recent Windows errors / warnings / event log / event viewer / show me errors / what failed recently → `log_check` (do NOT call health_report first)\n\
2451                 - Repo / git / workspace health → `repo_doctor`\n\
2452                 - List a specific directory → `directory` (pass `path` arg)\n\
2453                 - Desktop or Downloads folder → `desktop` or `downloads`\n\
2454                 NEVER use `disk` or `directory` for storage/space questions — use `storage`.\n\
2455                 Only use `shell` if the question truly cannot be answered by any topic above.\n\
2456                 NEVER tell the user to run PowerShell, cmd, or shell commands themselves. If the data is incomplete, say so and tell them to ask a more specific question instead.\n\
2457                 NEVER expose internal tool names or API syntax (like `inspect_host(topic=...)`) in your response. Refer to capabilities in plain English: say 'ask me for a fix plan' not 'run inspect_host(topic=fix_plan)'.\n"
2458              );
2459        }
2460        if !tiny_context_mode && fix_plan_mode {
2461            system_msg.push_str(
2462                "\n\n# FIX PLAN MODE\n\
2463                 This turn is a workstation remediation question, not just a diagnosis question.\n\
2464                 Call `inspect_host` with `topic=fix_plan` first.\n\
2465                 Do not start with `path`, `toolchains`, `env_doctor`, or `ports` unless the user explicitly asks for diagnosis details instead of a fix plan.\n\
2466                 Keep the answer grounded, stepwise, and approval-aware.\n"
2467            );
2468        }
2469        if !tiny_context_mode && maintainer_workflow_mode {
2470            system_msg.push_str(
2471                "\n\n# HEMATITE MAINTAINER WORKFLOW MODE\n\
2472                 This turn asks Hematite to run one of Hematite's own maintainer workflows, not invent an ad hoc shell command.\n\
2473                 Prefer `run_hematite_maintainer_workflow` for existing Hematite workflows such as `clean.ps1`, `scripts/package-windows.ps1`, or `release.ps1`.\n\
2474                 Use workflow `clean` for cleanup, workflow `package_windows` for rebuilding the local portable or installer, and workflow `release` for the normal version bump/tag/push/publish flow.\n\
2475                 Do not treat this as a generic current-workspace script runner. Only fall back to raw `shell` if the user asks for a script or command outside those Hematite maintainer workflows.\n"
2476            );
2477        }
2478        if !tiny_context_mode && workspace_workflow_mode {
2479            system_msg.push_str(
2480                "\n\n# WORKSPACE WORKFLOW MODE\n\
2481                 This turn asks Hematite to run something in the active project workspace, not in Hematite's own source tree.\n\
2482                 Prefer `run_workspace_workflow` for the current project's build, test, lint, fix, package scripts, just/task/make targets, local repo scripts, or an exact workspace command.\n\
2483                 This tool always runs from the locked workspace root.\n\
2484                 If no real project workspace is locked, say so and tell the user to relaunch Hematite in the target project directory.\n\
2485                 Do not use `run_hematite_maintainer_workflow` unless the request is specifically about Hematite's own cleanup, packaging, or release scripts.\n"
2486            );
2487        }
2488
2489        if !tiny_context_mode && architecture_overview_mode {
2490            system_msg.push_str(
2491                "\n\n# ARCHITECTURE OVERVIEW DISCIPLINE MODE\n\
2492                 For broad runtime or architecture walkthroughs, prefer authoritative tools first: `trace_runtime_flow` for control flow.\n\
2493                 Do not call `auto_pin_context` or `list_pinned` in read-only analysis. Avoid broad `read_file` calls unless the user explicitly asks for implementation detail in one named file.\n\
2494                 Preserve grounded tool output rather than restyling it into a larger answer.\n"
2495            );
2496        }
2497
2498        // ── Inject Pinned Files (Context Locking) ───────────────────────────
2499        system_msg.push_str(&format!(
2500            "\n\n# WORKFLOW MODE\nCURRENT WORKFLOW: {}\n",
2501            self.workflow_mode.label()
2502        ));
2503        if tiny_context_mode {
2504            system_msg
2505                .push_str("Use the narrowest safe behavior for this mode. Keep the turn short.\n");
2506        } else {
2507            match self.workflow_mode {
2508                WorkflowMode::Auto => system_msg.push_str(
2509                    "AUTO means choose the narrowest effective path for the request. Answer directly when stable product logic exists. Inspect before editing. Mutate only when the user is clearly asking for implementation.\n",
2510                ),
2511                WorkflowMode::Ask => system_msg.push_str(
2512                    "ASK means analysis only. Stay read-only, inspect the repo, explain findings, and do not make changes unless the user explicitly switches modes.\n",
2513                ),
2514                WorkflowMode::Code => system_msg.push_str(
2515                    "CODE means implementation is allowed when needed. Keep proof-before-action, verification, and edit precision discipline. If an active plan handoff exists in session memory or `.hematite/PLAN.md`, treat it as the implementation brief unless the user explicitly overrides it. For ordinary workspace inspection during implementation, use built-in read/edit tools first and do not reach for `mcp__filesystem__*` unless the user explicitly requires MCP.\n",
2516                ),
2517                WorkflowMode::Architect => system_msg.push_str(
2518                    "ARCHITECT means plan first. Inspect, reason, and produce a concrete implementation approach before editing. Do not mutate code unless the user explicitly asks to implement. When you produce an implementation handoff, use these exact ASCII headings so Hematite can persist the plan: `# Goal`, `# Target Files`, `# Ordered Steps`, `# Verification`, `# Risks`, `# Open Questions`.\n",
2519                ),
2520                WorkflowMode::ReadOnly => system_msg.push_str(
2521                    "READ-ONLY means analysis only. Do not modify files, run mutating shell commands, or commit changes.\n",
2522                ),
2523                WorkflowMode::Teach => system_msg.push_str(
2524                    "TEACH means you are a senior technician giving the user a grounded, numbered walkthrough. \
2525                     MANDATORY PROTOCOL for every admin/config/write task:\n\
2526                     1. Call inspect_host with the most relevant topic(s) FIRST to observe the actual machine state.\n\
2527                     2. Then deliver a numbered step-by-step tutorial that references what you actually observed — exact commands, exact paths, exact values.\n\
2528                     3. End with a verification step the user can run to confirm success.\n\
2529                     4. Do NOT execute write operations yourself. You are the teacher; the user performs the steps.\n\
2530                     5. Treat the user as capable — give precise instructions, not hedged warnings.\n\
2531                     Relevant inspect_host topics for common tasks: hardware (driver installs), overclocker (GPU/silicon vitals), security (firewall), ssh (SSH keys), wsl (WSL setup), env (PATH/env vars), services (service config), recent_crashes (troubleshooting), disk_health (storage issues).\n",
2532                ),
2533                WorkflowMode::Chat => {} // replaced by build_chat_system_prompt below
2534            }
2535        }
2536        if !tiny_context_mode && self.workflow_mode == WorkflowMode::Architect {
2537            system_msg.push_str("\n\n# ARCHITECT HANDOFF CONTRACT\n");
2538            system_msg.push_str(architect_handoff_contract());
2539            system_msg.push('\n');
2540        }
2541        if !tiny_context_mode && implement_current_plan {
2542            system_msg.push_str(
2543                "\n\n# CURRENT PLAN EXECUTION CONTRACT\n\
2544                 The user explicitly asked you to implement the current saved plan.\n\
2545                 Do not restate the plan, do not provide preliminary contracts, and do not stop at analysis.\n\
2546                 Use the saved plan as the brief, gather only the minimum built-in file evidence you need, then start editing the target files.\n\
2547                 Every file inspection or edit call must be path-scoped to one of the saved target files.\n\
2548                 If a built-in workspace read tool gives you enough context, your next step should be mutation or a concrete blocking question, not another summary.\n",
2549            );
2550            if let Some(plan) = self.session_memory.current_plan.as_ref() {
2551                if !plan.target_files.is_empty() {
2552                    system_msg.push_str("\n# CURRENT PLAN TARGET FILES\n");
2553                    for path in &plan.target_files {
2554                        system_msg.push_str(&format!("- {}\n", path));
2555                    }
2556                }
2557            }
2558        }
2559        if !tiny_context_mode {
2560            let pinned = self.pinned_files.lock().await;
2561            if !pinned.is_empty() {
2562                system_msg.push_str("\n\n# ACTIVE CONTEXT (PINNED FILES)\n");
2563                system_msg.push_str("The following files are locked in your active memory for prioritized reference.\n\n");
2564                for (path, content) in pinned.iter() {
2565                    system_msg.push_str(&format!("## FILE: {}\n```\n{}\n```\n\n", path, content));
2566                }
2567            }
2568        }
2569        if !tiny_context_mode {
2570            self.append_session_handoff(&mut system_msg);
2571        }
2572        // In chat mode, replace the full harness prompt with a clean conversational surface.
2573        // The harness prompt (built above) is discarded — Rusty personality takes over.
2574        let system_msg = if self.workflow_mode.is_chat() {
2575            self.build_chat_system_prompt()
2576        } else {
2577            system_msg
2578        };
2579        if self.history.is_empty() || self.history[0].role != "system" {
2580            self.history.insert(0, ChatMessage::system(&system_msg));
2581        } else {
2582            self.history[0] = ChatMessage::system(&system_msg);
2583        }
2584
2585        // Ensure a clean state for the new turn.
2586        self.cancel_token
2587            .store(false, std::sync::atomic::Ordering::SeqCst);
2588
2589        // [Official Gemma-4 Spec] Purge reasoning history for new user turns.
2590        // History from previous turns must not be fed back into the prompt to prevent duplication.
2591        self.reasoning_history = None;
2592
2593        let is_gemma = crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2594        let user_content = match self.think_mode {
2595            Some(true) => format!("/think\n{}", effective_user_input),
2596            Some(false) => format!("/no_think\n{}", effective_user_input),
2597            // For non-Gemma models (Qwen etc.) default to /think so the model uses
2598            // hybrid thinking — it decides how much reasoning each turn needs.
2599            // Gemma handles reasoning via <|think|> in the system prompt instead.
2600            // Chat mode and quick tool calls skip /think — fast direct answers.
2601            None if !is_gemma
2602                && !self.workflow_mode.is_chat()
2603                && !is_quick_tool_request(&effective_user_input) =>
2604            {
2605                format!("/think\n{}", effective_user_input)
2606            }
2607            None => effective_user_input.clone(),
2608        };
2609        if let Some(image) = user_turn.attached_image.as_ref() {
2610            let image_url =
2611                crate::tools::vision::encode_image_as_data_url(std::path::Path::new(&image.path))
2612                    .map_err(|e| format!("Image attachment failed for {}: {}", image.name, e))?;
2613            self.history
2614                .push(ChatMessage::user_with_image(&user_content, &image_url));
2615        } else {
2616            self.history.push(ChatMessage::user(&user_content));
2617        }
2618        self.transcript.log_user(&transcript_user_input);
2619
2620        // Incremental re-index and Vein context injection. Ordinary chat mode
2621        // still skips repo-snippet noise, but docs-only workspaces and explicit
2622        // session-recall prompts should keep Vein memory available.
2623        let vein_docs_only = self.vein_docs_only_mode();
2624        let allow_vein_context = !self.workflow_mode.is_chat()
2625            || should_use_vein_in_chat(&effective_user_input, vein_docs_only);
2626        let (vein_context, vein_paths) = if allow_vein_context {
2627            self.refresh_vein_index();
2628            let _ = tx
2629                .send(InferenceEvent::VeinStatus {
2630                    file_count: self.vein.file_count(),
2631                    embedded_count: self.vein.embedded_chunk_count(),
2632                    docs_only: vein_docs_only,
2633                })
2634                .await;
2635            match self.build_vein_context(&effective_user_input) {
2636                Some((ctx, paths)) => (Some(ctx), paths),
2637                None => (None, Vec::new()),
2638            }
2639        } else {
2640            (None, Vec::new())
2641        };
2642        if !vein_paths.is_empty() {
2643            let _ = tx
2644                .send(InferenceEvent::VeinContext { paths: vein_paths })
2645                .await;
2646        }
2647
2648        // Route: pick fast vs think model based on the complexity of this request.
2649        let routed_model = route_model(
2650            &effective_user_input,
2651            effective_fast.as_deref(),
2652            effective_think.as_deref(),
2653        )
2654        .map(|s| s.to_string());
2655
2656        let mut loop_intervention: Option<String> = None;
2657
2658        // ── Harness pre-run: multi-topic host inspection ─────────────────────
2659        // When the user asks for 2+ distinct inspect_host topics in one message,
2660        // run them all here and inject the combined results as a loop_intervention
2661        // so the model receives data instead of having to orchestrate tool calls.
2662        // This prevents the model from collapsing multiple topics into a generic
2663        // one, burning the tool loop budget, or retrying via shell.
2664        {
2665            let topics = all_host_inspection_topics(&effective_user_input);
2666            if topics.len() >= 2 {
2667                let _ = tx
2668                    .send(InferenceEvent::Thought(format!(
2669                        "Harness pre-run: {} host inspection topics detected — running all before model turn.",
2670                        topics.len()
2671                    )))
2672                    .await;
2673
2674                let topic_list = topics.join(", ");
2675                let mut combined = format!(
2676                    "## HARNESS PRE-RUN RESULTS\n\
2677                     The harness already ran inspect_host for the following topics: {topic_list}.\n\
2678                     Use the tool results in context to answer. Do NOT repeat these tool calls.\n\n"
2679                );
2680
2681                let mut tool_calls = Vec::new();
2682                let mut tool_msgs = Vec::new();
2683
2684                for topic in &topics {
2685                    let call_id = format!("prerun_{topic}");
2686                    let args_val = serde_json::json!({ "topic": *topic, "max_entries": 20 });
2687                    let args_str = serde_json::to_string(&args_val).unwrap_or_default();
2688
2689                    tool_calls.push(crate::agent::inference::ToolCallResponse {
2690                        id: call_id.clone(),
2691                        call_type: "function".to_string(),
2692                        function: crate::agent::inference::ToolCallFn {
2693                            name: "inspect_host".to_string(),
2694                            arguments: args_str,
2695                        },
2696                    });
2697
2698                    let label = format!("### inspect_host(topic=\"{topic}\")\n");
2699                    let _ = tx
2700                        .send(InferenceEvent::ToolCallStart {
2701                            id: call_id.clone(),
2702                            name: "inspect_host".to_string(),
2703                            args: format!("inspect host {topic}"),
2704                        })
2705                        .await;
2706
2707                    match crate::tools::host_inspect::inspect_host(&args_val).await {
2708                        Ok(out) => {
2709                            let _ = tx
2710                                .send(InferenceEvent::ToolCallResult {
2711                                    id: call_id.clone(),
2712                                    name: "inspect_host".to_string(),
2713                                    output: out.chars().take(300).collect::<String>() + "...",
2714                                    is_error: false,
2715                                })
2716                                .await;
2717                            combined.push_str(&label);
2718                            combined.push_str(&out);
2719                            combined.push_str("\n\n");
2720                            tool_msgs.push(ChatMessage::tool_result_for_model(
2721                                &call_id,
2722                                "inspect_host",
2723                                &out,
2724                                &self.engine.current_model(),
2725                            ));
2726                        }
2727                        Err(e) => {
2728                            let err_msg = format!("Error: {e}");
2729                            combined.push_str(&label);
2730                            combined.push_str(&err_msg);
2731                            combined.push_str("\n\n");
2732                            tool_msgs.push(ChatMessage::tool_result_for_model(
2733                                &call_id,
2734                                "inspect_host",
2735                                &err_msg,
2736                                &self.engine.current_model(),
2737                            ));
2738                        }
2739                    }
2740                }
2741
2742                // Add the simulated turn to history so the model sees it as context.
2743                self.history
2744                    .push(ChatMessage::assistant_tool_calls("", tool_calls));
2745                for msg in tool_msgs {
2746                    self.history.push(msg);
2747                }
2748
2749                loop_intervention = Some(combined);
2750            }
2751        }
2752
2753        // ── Computation Integrity: nudge model toward run_code for precise math ──
2754        // When the query involves exact numeric computation (hashes, financial math,
2755        // statistics, date arithmetic, unit conversions, algorithmic checks), inject
2756        // a brief pre-turn reminder so the model reaches for run_code instead of
2757        // answering from training-data memory. Only fires when no harness pre-run
2758        // already set a loop_intervention.
2759        if loop_intervention.is_none() && needs_computation_sandbox(&effective_user_input) {
2760            loop_intervention = Some(
2761                "COMPUTATION INTEGRITY NOTICE: This query involves precise numeric computation. \
2762                 Do NOT answer from training-data memory — memory answers for math are guesses. \
2763                 Use `run_code` to compute the real result and return the actual output. \
2764                 IMPORTANT: the `run_code` tool defaults to JavaScript (Deno). \
2765                 If you write Python code, you MUST pass `language: \"python\"` explicitly. \
2766                 If you write JavaScript/TypeScript, omit the language field or pass `language: \"javascript\"`. \
2767                 Write the code, run it, return the result."
2768                    .to_string(),
2769            );
2770        }
2771
2772        // ── Native Tool Mandate: nudge model toward create_directory/write_file for local mutations ──
2773        if loop_intervention.is_none() && intent.surgical_filesystem_mode {
2774            loop_intervention = Some(
2775                "NATIVE TOOL MANDATE: Your request involves local directory or file creation. \
2776                 You MUST use Hematite's native surgical tools (`create_directory`, `write_file`, `update_file`, `patch_hunk`). \
2777                 External `mcp__filesystem__*` mutation tools are BLOCKED for these actions and will fail. \
2778                 Use `@DESKTOP/`, `@DOCUMENTS/`, or `@DOWNLOADS/` sovereign tokens for 100% path accuracy."
2779                    .to_string(),
2780            );
2781        }
2782
2783        let mut implementation_started = false;
2784        let mut non_mutating_plan_steps = 0usize;
2785        let non_mutating_plan_soft_cap = 5usize;
2786        let non_mutating_plan_hard_cap = 8usize;
2787        let mut overview_runtime_trace: Option<String> = None;
2788
2789        // Safety cap – never spin forever on a broken model.
2790        let max_iters = 25;
2791        let mut consecutive_errors = 0;
2792        let mut empty_cleaned_nudges = 0u8;
2793        let mut first_iter = true;
2794        let _called_this_turn: std::collections::HashSet<String> = std::collections::HashSet::new();
2795        // Track identical tool results within this turn to detect logical loops.
2796        let _result_counts: std::collections::HashMap<String, usize> =
2797            std::collections::HashMap::new();
2798        // Track the count of identical (name, args) calls to detect infinite tool loops.
2799        let mut repeat_counts: std::collections::HashMap<String, usize> =
2800            std::collections::HashMap::new();
2801        let mut completed_tool_cache: std::collections::HashMap<String, CachedToolResult> =
2802            std::collections::HashMap::new();
2803        let mut successful_read_targets: std::collections::HashSet<String> =
2804            std::collections::HashSet::new();
2805        // (path, offset) pairs — catches repeated reads at the same non-zero offset.
2806        let mut successful_read_regions: std::collections::HashSet<(String, u64)> =
2807            std::collections::HashSet::new();
2808        let mut successful_grep_targets: std::collections::HashSet<String> =
2809            std::collections::HashSet::new();
2810        let mut no_match_grep_targets: std::collections::HashSet<String> =
2811            std::collections::HashSet::new();
2812        let mut broad_grep_targets: std::collections::HashSet<String> =
2813            std::collections::HashSet::new();
2814
2815        // Track the index of the message that started THIS turn, so compaction doesn't summarize it.
2816        let mut turn_anchor = self.history.len().saturating_sub(1);
2817
2818        for _iter in 0..max_iters {
2819            let mut mutation_occurred = false;
2820            // Priority Check: External Cancellation (via Esc key in TUI)
2821            if self.cancel_token.load(std::sync::atomic::Ordering::SeqCst) {
2822                self.cancel_token
2823                    .store(false, std::sync::atomic::Ordering::SeqCst);
2824                let _ = tx
2825                    .send(InferenceEvent::Thought("Turn cancelled by user.".into()))
2826                    .await;
2827                let _ = tx.send(InferenceEvent::Done).await;
2828                return Ok(());
2829            }
2830
2831            // ── Intelligence Surge: Proactive Compaction Check ──────────────────────
2832            if self
2833                .compact_history_if_needed(&tx, Some(turn_anchor))
2834                .await?
2835            {
2836                // After compaction, history is [system, summary, turn_anchor, ...]
2837                // The new turn_anchor is index 2.
2838                turn_anchor = 2;
2839            }
2840
2841            // On the first iteration inject Vein context into the system message.
2842            // Subsequent iterations use the plain slice — tool results are now in
2843            // history so Vein context would be redundant.
2844            let inject_vein = first_iter && !implement_current_plan;
2845            let messages = if implement_current_plan {
2846                first_iter = false;
2847                self.context_window_slice_from(turn_anchor)
2848            } else {
2849                first_iter = false;
2850                self.context_window_slice()
2851            };
2852
2853            // Use the canonical system prompt from history[0] which was built
2854            // by InferenceEngine::build_system_prompt() + build_system_with_corrections()
2855            // and includes GPU state, git context, permissions, and instruction files.
2856            let mut prompt_msgs = if let Some(intervention) = loop_intervention.take() {
2857                // Gemma 4 handles multiple system messages natively.
2858                // Standard models (Qwen, etc.) reject a second system message — merge into history[0].
2859                if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2860                    let mut msgs = vec![self.history[0].clone()];
2861                    msgs.push(ChatMessage::system(&intervention));
2862                    msgs
2863                } else {
2864                    let merged =
2865                        format!("{}\n\n{}", self.history[0].content.as_str(), intervention);
2866                    vec![ChatMessage::system(&merged)]
2867                }
2868            } else {
2869                vec![self.history[0].clone()]
2870            };
2871
2872            // Inject Vein context into the system message on the first iteration.
2873            // Vein results are merged in the same way as loop_intervention so standard
2874            // models (Qwen etc.) only ever see one system message.
2875            if inject_vein {
2876                if let Some(ref ctx) = vein_context.as_ref() {
2877                    if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2878                        prompt_msgs.push(ChatMessage::system(ctx));
2879                    } else {
2880                        let merged = format!("{}\n\n{}", prompt_msgs[0].content.as_str(), ctx);
2881                        prompt_msgs[0] = ChatMessage::system(&merged);
2882                    }
2883                }
2884            }
2885            prompt_msgs.extend(messages);
2886            if let Some(budget_note) =
2887                enforce_prompt_budget(&mut prompt_msgs, self.engine.current_context_length())
2888            {
2889                self.emit_operator_checkpoint(
2890                    &tx,
2891                    OperatorCheckpointState::BudgetReduced,
2892                    budget_note,
2893                )
2894                .await;
2895                let recipe = plan_recovery(
2896                    RecoveryScenario::PromptBudgetPressure,
2897                    &self.recovery_context,
2898                );
2899                self.emit_recovery_recipe_summary(
2900                    &tx,
2901                    recipe.recipe.scenario.label(),
2902                    compact_recovery_plan_summary(&recipe),
2903                )
2904                .await;
2905            }
2906            self.emit_prompt_pressure_for_messages(&tx, &prompt_msgs)
2907                .await;
2908
2909            let turn_tools = if intent.sovereign_mode {
2910                self.tools
2911                    .iter()
2912                    .filter(|t| {
2913                        t.function.name != "shell" && t.function.name != "run_workspace_workflow"
2914                    })
2915                    .cloned()
2916                    .collect::<Vec<_>>()
2917            } else {
2918                self.tools.clone()
2919            };
2920
2921            let (mut text, mut tool_calls, usage, finish_reason) = match self
2922                .engine
2923                .call_with_tools(&prompt_msgs, &turn_tools, routed_model.as_deref())
2924                .await
2925            {
2926                Ok(result) => result,
2927                Err(e) => {
2928                    let class = classify_runtime_failure(&e);
2929                    if should_retry_runtime_failure(class) {
2930                        if self.recovery_context.consume_transient_retry() {
2931                            let label = match class {
2932                                RuntimeFailureClass::ProviderDegraded => "provider_degraded",
2933                                _ => "empty_model_response",
2934                            };
2935                            self.transcript.log_system(&format!(
2936                                "Automatic provider recovery triggered: {}",
2937                                e.trim()
2938                            ));
2939                            self.emit_recovery_recipe_summary(
2940                                &tx,
2941                                label,
2942                                compact_runtime_recovery_summary(class),
2943                            )
2944                            .await;
2945                            let _ = tx
2946                                .send(InferenceEvent::ProviderStatus {
2947                                    state: ProviderRuntimeState::Recovering,
2948                                    summary: compact_runtime_recovery_summary(class).into(),
2949                                })
2950                                .await;
2951                            self.emit_operator_checkpoint(
2952                                &tx,
2953                                OperatorCheckpointState::RecoveringProvider,
2954                                compact_runtime_recovery_summary(class),
2955                            )
2956                            .await;
2957                            continue;
2958                        }
2959                    }
2960
2961                    self.emit_runtime_failure(&tx, class, &e).await;
2962                    break;
2963                }
2964            };
2965            self.emit_provider_live(&tx).await;
2966
2967            // ── LOOP GUARD: Reasoning Collapse Detection ──────────────────────────
2968            // If the model returns no text AND no tool calls, but has a massive
2969            // block of hidden reasoning (often seen as infinite newlines in small models),
2970            // trigger a safety stop to prevent token drain.
2971            if text.is_none() && tool_calls.is_none() {
2972                if let Some(reasoning) = usage.as_ref().and_then(|u| {
2973                    if u.completion_tokens > 2000 {
2974                        Some(u.completion_tokens)
2975                    } else {
2976                        None
2977                    }
2978                }) {
2979                    self.emit_operator_checkpoint(
2980                        &tx,
2981                        OperatorCheckpointState::BlockedToolLoop,
2982                        format!(
2983                            "Reasoning collapse detected ({} tokens of empty output).",
2984                            reasoning
2985                        ),
2986                    )
2987                    .await;
2988                    break;
2989                }
2990            }
2991
2992            // Update TUI token counter with actual usage from LM Studio.
2993            if let Some(ref u) = usage {
2994                let _ = tx.send(InferenceEvent::UsageUpdate(u.clone())).await;
2995            }
2996
2997            // Fallback safety net: if native tool markup leaked past the inference-layer
2998            // extractor, recover it here instead of treating it as plain assistant text.
2999            if tool_calls
3000                .as_ref()
3001                .map(|calls| calls.is_empty())
3002                .unwrap_or(true)
3003            {
3004                if let Some(raw_text) = text.as_deref() {
3005                    let native_calls = crate::agent::inference::extract_native_tool_calls(raw_text);
3006                    if !native_calls.is_empty() {
3007                        tool_calls = Some(native_calls);
3008                        let stripped =
3009                            crate::agent::inference::strip_native_tool_call_text(raw_text);
3010                        text = if stripped.trim().is_empty() {
3011                            None
3012                        } else {
3013                            Some(stripped)
3014                        };
3015                    }
3016                }
3017            }
3018
3019            // Treat empty tool_calls arrays (Some(vec![])) the same as None –
3020            // the model returned text only; an empty array causes an infinite loop.
3021            let tool_calls = tool_calls.filter(|c| !c.is_empty());
3022            let near_context_ceiling = usage
3023                .as_ref()
3024                .map(|u| u.prompt_tokens >= (self.engine.current_context_length() * 82 / 100))
3025                .unwrap_or(false);
3026
3027            if let Some(calls) = tool_calls {
3028                let (calls, prune_trace_note) =
3029                    prune_architecture_trace_batch(calls, architecture_overview_mode);
3030                if let Some(note) = prune_trace_note {
3031                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3032                }
3033
3034                let (calls, prune_bloat_note) = prune_read_only_context_bloat_batch(
3035                    calls,
3036                    self.workflow_mode.is_read_only(),
3037                    architecture_overview_mode,
3038                );
3039                if let Some(note) = prune_bloat_note {
3040                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3041                }
3042
3043                let (calls, prune_note) = prune_authoritative_tool_batch(
3044                    calls,
3045                    grounded_trace_mode,
3046                    &effective_user_input,
3047                );
3048                if let Some(note) = prune_note {
3049                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3050                }
3051
3052                let (calls, prune_redir_note) = prune_redirected_shell_batch(calls);
3053                if let Some(note) = prune_redir_note {
3054                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3055                }
3056
3057                let (calls, batch_note) = order_batch_reads_first(calls);
3058                if let Some(note) = batch_note {
3059                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3060                }
3061
3062                if let Some(repeated_path) = calls
3063                    .iter()
3064                    .filter(|c| {
3065                        let parsed = serde_json::from_str::<Value>(
3066                            &crate::agent::inference::normalize_tool_argument_string(
3067                                &c.function.name,
3068                                &c.function.arguments,
3069                            ),
3070                        )
3071                        .ok();
3072                        let offset = parsed
3073                            .as_ref()
3074                            .and_then(|args| args.get("offset").and_then(|v| v.as_u64()))
3075                            .unwrap_or(0);
3076                        // Catch re-reads from the top (original behaviour) AND repeated
3077                        // reads at the exact same non-zero offset (new: catches targeted loops).
3078                        if offset < 200 {
3079                            return true;
3080                        }
3081                        if let Some(path) = parsed
3082                            .as_ref()
3083                            .and_then(|args| args.get("path").and_then(|v| v.as_str()))
3084                        {
3085                            let normalized = normalize_workspace_path(path);
3086                            return successful_read_regions.contains(&(normalized, offset));
3087                        }
3088                        false
3089                    })
3090                    .filter_map(|c| repeated_read_target(&c.function))
3091                    .find(|path| successful_read_targets.contains(path))
3092                {
3093                    loop_intervention = Some(format!(
3094                        "STOP. Already read `{}` this turn. Use `inspect_lines` on the relevant window or a specific `grep_files`, then continue.",
3095                        repeated_path
3096                    ));
3097                    let _ = tx
3098                        .send(InferenceEvent::Thought(
3099                            "Read discipline: preventing repeated full-file reads on the same path."
3100                                .into(),
3101                        ))
3102                        .await;
3103                    continue;
3104                }
3105
3106                if capability_mode
3107                    && !capability_needs_repo
3108                    && calls
3109                        .iter()
3110                        .all(|c| is_capability_probe_tool(&c.function.name))
3111                {
3112                    loop_intervention = Some(
3113                        "STOP. This is a stable capability question. Do not inspect the repository or call tools. \
3114                         Answer directly from verified Hematite capabilities, current runtime state, and the documented product boundary. \
3115                         Do not mention raw `mcp__*` names unless they are active and directly relevant."
3116                            .to_string(),
3117                    );
3118                    let _ = tx
3119                        .send(InferenceEvent::Thought(
3120                            "Capability mode: skipping unnecessary repo-inspection tools and answering directly."
3121                                .into(),
3122                        ))
3123                        .await;
3124                    continue;
3125                }
3126
3127                // VOCAL AGENT: If the model provided reasoning alongside tools,
3128                // stream it to the SPECULAR panel now using the hardened extraction.
3129                let raw_content = text.as_deref().unwrap_or(" ");
3130
3131                if let Some(thought) = crate::agent::inference::extract_think_block(raw_content) {
3132                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
3133                    // Reasoning is silent (hidden in SPECULAR only).
3134                    self.reasoning_history = Some(thought);
3135                }
3136
3137                // [Gemma-4 Protocol] Keep raw content (including thoughts) during tool loops.
3138                // Thoughts are only stripped before the 'final' user turn.
3139                let stored_tool_call_content = if implement_current_plan {
3140                    cap_output(raw_content, 1200)
3141                } else {
3142                    raw_content.to_string()
3143                };
3144                self.history.push(ChatMessage::assistant_tool_calls(
3145                    &stored_tool_call_content,
3146                    calls.clone(),
3147                ));
3148
3149                // ── LAYER 4: Parallel Tool Orchestration (Batching) ────────────────────
3150                let mut results = Vec::new();
3151                let gemma4_model =
3152                    crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
3153                let latest_user_prompt = self.latest_user_prompt();
3154                let mut seen_call_keys = std::collections::HashSet::new();
3155                let mut deduped_calls = Vec::new();
3156                for call in calls.clone() {
3157                    let (normalized_name, normalized_args) = normalized_tool_call_for_execution(
3158                        &call.function.name,
3159                        &call.function.arguments,
3160                        gemma4_model,
3161                        latest_user_prompt,
3162                    );
3163
3164                    // --- HALLUCINATION SANITIZER ---
3165                    if normalized_name == "shell" || normalized_name == "run_workspace_workflow" {
3166                        let cmd_val = normalized_args
3167                            .get("command")
3168                            .or_else(|| normalized_args.get("workflow"));
3169
3170                        if let Some(cmd) = cmd_val.and_then(|v| v.as_str()) {
3171                            if is_natural_language_hallucination(cmd) {
3172                                let err_msg = format!(
3173                                    "HALLUCINATION BLOCKED: You tried to pass natural language ('{}') into a command field. \
3174                                     Commands must be literal executables (e.g. `npm install`, `mkdir path`). \
3175                                     Use the correct surgical tool (like `create_directory`) instead of overthinking.",
3176                                    cmd
3177                                );
3178                                let _ = tx
3179                                    .send(InferenceEvent::Thought(format!(
3180                                        "Sanitizer error: {}",
3181                                        err_msg
3182                                    )))
3183                                    .await;
3184                                results.push(ToolExecutionOutcome {
3185                                    call_id: call.id.clone(),
3186                                    tool_name: normalized_name.clone(),
3187                                    args: normalized_args.clone(),
3188                                    output: err_msg,
3189                                    is_error: true,
3190                                    blocked_by_policy: false,
3191                                    msg_results: Vec::new(),
3192                                    latest_target_dir: None,
3193                                });
3194                                continue;
3195                            }
3196                        }
3197                    }
3198
3199                    let key = canonical_tool_call_key(&normalized_name, &normalized_args);
3200                    if seen_call_keys.insert(key) {
3201                        let repeat_guard_exempt = matches!(
3202                            normalized_name.as_str(),
3203                            "verify_build" | "git_commit" | "git_push"
3204                        );
3205                        if !repeat_guard_exempt {
3206                            if let Some(cached) = completed_tool_cache
3207                                .get(&canonical_tool_call_key(&normalized_name, &normalized_args))
3208                            {
3209                                let _ = tx
3210                                    .send(InferenceEvent::Thought(
3211                                        "Cached tool result reused: identical built-in invocation already completed earlier in this turn."
3212                                            .to_string(),
3213                                    ))
3214                                    .await;
3215                                loop_intervention = Some(format!(
3216                                    "STOP. You already called `{}` with identical arguments earlier in this turn and already have that result in conversation history. Do not call it again. Use the existing result to answer or choose a different next step.",
3217                                    cached.tool_name
3218                                ));
3219                                continue;
3220                            }
3221                        }
3222                        deduped_calls.push(call);
3223                    } else {
3224                        let _ = tx
3225                            .send(InferenceEvent::Thought(
3226                                "Duplicate tool call skipped: identical built-in invocation already ran this turn."
3227                                    .to_string(),
3228                            ))
3229                            .await;
3230                    }
3231                }
3232
3233                // Partition tool calls: Parallel Read vs Serial Mutating
3234                let (parallel_calls, serial_calls): (Vec<_>, Vec<_>) = deduped_calls
3235                    .into_iter()
3236                    .partition(|c| is_parallel_safe(&c.function.name));
3237
3238                // 1. Concurrent Execution (ParallelRead)
3239                if !parallel_calls.is_empty() {
3240                    let mut tasks = Vec::new();
3241                    for call in parallel_calls {
3242                        let tx_clone = tx.clone();
3243                        let config_clone = config.clone();
3244                        // Carry the real call ID into the outcome
3245                        let call_with_id = call.clone();
3246                        tasks.push(self.process_tool_call(
3247                            call_with_id.function,
3248                            config_clone,
3249                            yolo,
3250                            tx_clone,
3251                            call_with_id.id,
3252                        ));
3253                    }
3254                    // Wait for all read-only tasks to complete simultaneously.
3255                    results.extend(futures::future::join_all(tasks).await);
3256                }
3257
3258                // 2. Sequential Execution (SerialMutating)
3259                for call in serial_calls {
3260                    results.push(
3261                        self.process_tool_call(
3262                            call.function,
3263                            config.clone(),
3264                            yolo,
3265                            tx.clone(),
3266                            call.id,
3267                        )
3268                        .await,
3269                    );
3270                }
3271
3272                // 3. Collate Messages into History & UI
3273                let mut authoritative_tool_output: Option<String> = None;
3274                let mut blocked_policy_output: Option<String> = None;
3275                let mut recoverable_policy_intervention: Option<String> = None;
3276                let mut recoverable_policy_recipe: Option<RecoveryScenario> = None;
3277                let mut recoverable_policy_checkpoint: Option<(OperatorCheckpointState, String)> =
3278                    None;
3279                for res in results {
3280                    let call_id = res.call_id.clone();
3281                    let tool_name = res.tool_name.clone();
3282                    let final_output = res.output.clone();
3283                    let is_error = res.is_error;
3284                    for msg in res.msg_results {
3285                        self.history.push(msg);
3286                    }
3287
3288                    // Update State for Verification Loop
3289                    if let Some(path) = res.latest_target_dir {
3290                        self.latest_target_dir = Some(path);
3291                    }
3292                    if matches!(
3293                        tool_name.as_str(),
3294                        "patch_hunk" | "write_file" | "edit_file" | "multi_search_replace"
3295                    ) {
3296                        mutation_occurred = true;
3297                        implementation_started = true;
3298                        // Heat tracking: bump L1 score for the edited file.
3299                        if !is_error {
3300                            let path = res.args.get("path").and_then(|v| v.as_str()).unwrap_or("");
3301                            if !path.is_empty() {
3302                                self.vein.bump_heat(path);
3303                                self.l1_context = self.vein.l1_context();
3304                            }
3305                            // Refresh repo map so PageRank accounts for the new edit.
3306                            self.refresh_repo_map();
3307                        }
3308                    }
3309
3310                    if tool_name == "verify_build" {
3311                        self.record_session_verification(
3312                            !is_error
3313                                && (final_output.contains("BUILD OK")
3314                                    || final_output.contains("BUILD SUCCESS")
3315                                    || final_output.contains("BUILD OKAY")),
3316                            if is_error {
3317                                "Explicit verify_build failed."
3318                            } else {
3319                                "Explicit verify_build passed."
3320                            },
3321                        );
3322                    }
3323
3324                    // Update Repeat Guard
3325                    let call_key = format!(
3326                        "{}:{}",
3327                        tool_name,
3328                        serde_json::to_string(&res.args).unwrap_or_default()
3329                    );
3330                    let repeat_count = repeat_counts.entry(call_key.clone()).or_insert(0);
3331                    *repeat_count += 1;
3332
3333                    // verify_build is legitimately called multiple times in fix-verify loops.
3334                    let repeat_guard_exempt = matches!(
3335                        tool_name.as_str(),
3336                        "verify_build" | "git_commit" | "git_push"
3337                    );
3338                    if *repeat_count >= 2 && !repeat_guard_exempt {
3339                        loop_intervention = Some(format!(
3340                            "STOP. You have called `{}` with identical arguments {} times and keep getting the same result. \
3341                             Do not call it again. Either answer directly from what you already know, \
3342                             use a different tool or approach (e.g. if reading the same file, use grep or LSP symbols instead), \
3343                             or ask the user for clarification.",
3344                            tool_name, *repeat_count
3345                        ));
3346                        let _ = tx
3347                            .send(InferenceEvent::Thought(format!(
3348                                "Repeat guard: `{}` called {} times with same args — injecting stop intervention.",
3349                                tool_name, *repeat_count
3350                            )))
3351                            .await;
3352                    }
3353
3354                    if *repeat_count >= 3 && !repeat_guard_exempt {
3355                        self.emit_runtime_failure(
3356                            &tx,
3357                            RuntimeFailureClass::ToolLoop,
3358                            &format!("Hard termination: `{}` called {} times with identical arguments. Reasoning collapse detected.", tool_name, *repeat_count),
3359                        )
3360                        .await;
3361                        return Ok(());
3362                    }
3363
3364                    if is_error {
3365                        consecutive_errors += 1;
3366                    } else {
3367                        consecutive_errors = 0;
3368                    }
3369
3370                    if consecutive_errors >= 3 {
3371                        loop_intervention = Some(
3372                            "CRITICAL: Repeated tool failures detected. You are likely stuck in a loop. \
3373                             STOP all tool calls immediately. Analyze why your previous 3 calls failed \
3374                             (check for hallucinations or invalid arguments) and ask the user for \
3375                             clarification if you cannot proceed.".to_string()
3376                        );
3377                    }
3378
3379                    if consecutive_errors >= 4 {
3380                        self.emit_runtime_failure(
3381                            &tx,
3382                            RuntimeFailureClass::ToolLoop,
3383                            "Hard termination: too many consecutive tool errors.",
3384                        )
3385                        .await;
3386                        return Ok(());
3387                    }
3388
3389                    let _ = tx
3390                        .send(InferenceEvent::ToolCallResult {
3391                            id: call_id.clone(),
3392                            name: tool_name.clone(),
3393                            output: final_output.clone(),
3394                            is_error,
3395                        })
3396                        .await;
3397
3398                    let repeat_guard_exempt = matches!(
3399                        tool_name.as_str(),
3400                        "verify_build" | "git_commit" | "git_push"
3401                    );
3402                    if !repeat_guard_exempt {
3403                        completed_tool_cache.insert(
3404                            canonical_tool_call_key(&tool_name, &res.args),
3405                            CachedToolResult {
3406                                tool_name: tool_name.clone(),
3407                            },
3408                        );
3409                    }
3410
3411                    // Cap output before history
3412                    let compact_ctx = crate::agent::inference::is_compact_context_window_pub(
3413                        self.engine.current_context_length(),
3414                    );
3415                    let capped = if implement_current_plan {
3416                        cap_output(&final_output, 1200)
3417                    } else if compact_ctx
3418                        && (tool_name == "read_file" || tool_name == "inspect_lines")
3419                    {
3420                        // Compact context: cap file reads tightly and add a navigation hint on truncation.
3421                        let limit = 3000usize;
3422                        if final_output.len() > limit {
3423                            let total_lines = final_output.lines().count();
3424                            let mut split_at = limit;
3425                            while !final_output.is_char_boundary(split_at) && split_at > 0 {
3426                                split_at -= 1;
3427                            }
3428                            let scratch = write_output_to_scratch(&final_output, &tool_name)
3429                                .map(|p| format!(" Full file also saved to '{p}'."))
3430                                .unwrap_or_default();
3431                            format!(
3432                                "{}\n... [file truncated — {} total lines. Use `inspect_lines` with start_line near {} to reach the end of the file.{}]",
3433                                &final_output[..split_at],
3434                                total_lines,
3435                                total_lines.saturating_sub(150),
3436                                scratch,
3437                            )
3438                        } else {
3439                            final_output.clone()
3440                        }
3441                    } else {
3442                        cap_output_for_tool(&final_output, 8000, &tool_name)
3443                    };
3444                    self.history.push(ChatMessage::tool_result_for_model(
3445                        &call_id,
3446                        &tool_name,
3447                        &capped,
3448                        &self.engine.current_model(),
3449                    ));
3450
3451                    if architecture_overview_mode && !is_error && tool_name == "trace_runtime_flow"
3452                    {
3453                        overview_runtime_trace =
3454                            Some(summarize_runtime_trace_output(&final_output));
3455                    }
3456
3457                    if !architecture_overview_mode
3458                        && !is_error
3459                        && ((grounded_trace_mode && tool_name == "trace_runtime_flow")
3460                            || (toolchain_mode && tool_name == "describe_toolchain"))
3461                    {
3462                        authoritative_tool_output = Some(final_output.clone());
3463                    }
3464
3465                    if !is_error && tool_name == "read_file" {
3466                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
3467                            let normalized = normalize_workspace_path(path);
3468                            let read_offset =
3469                                res.args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
3470                            successful_read_targets.insert(normalized.clone());
3471                            successful_read_regions.insert((normalized.clone(), read_offset));
3472                        }
3473                    }
3474
3475                    if !is_error && tool_name == "grep_files" {
3476                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
3477                            let normalized = normalize_workspace_path(path);
3478                            if final_output.starts_with("No matches for ") {
3479                                no_match_grep_targets.insert(normalized);
3480                            } else if grep_output_is_high_fanout(&final_output) {
3481                                broad_grep_targets.insert(normalized);
3482                            } else {
3483                                successful_grep_targets.insert(normalized);
3484                            }
3485                        }
3486                    }
3487
3488                    if is_error
3489                        && matches!(tool_name.as_str(), "edit_file" | "multi_search_replace")
3490                        && (final_output.contains("search string not found")
3491                            || final_output.contains("search string is too short")
3492                            || final_output.contains("search string matched"))
3493                    {
3494                        if let Some(target) = action_target_path(&tool_name, &res.args) {
3495                            let guidance = if final_output.contains("matched") {
3496                                format!(
3497                                    "STOP. `{}` on `{}` — search string matched multiple times. Use `inspect_lines` on the exact region to get a unique anchor, then retry.",
3498                                    tool_name, target
3499                                )
3500                            } else {
3501                                format!(
3502                                    "STOP. `{}` on `{}` — search string did not match. Use `inspect_lines` on the target region to get the exact current text (check whitespace and indentation), then retry.",
3503                                    tool_name, target
3504                                )
3505                            };
3506                            loop_intervention = Some(guidance);
3507                            *repeat_count = 0;
3508                        }
3509                    }
3510
3511                    // When guard.rs blocks a shell call with the run_code redirect hint,
3512                    // force the model to recover with run_code instead of giving up.
3513                    if is_error
3514                        && tool_name == "shell"
3515                        && final_output.contains("Use the run_code tool instead")
3516                        && loop_intervention.is_none()
3517                    {
3518                        loop_intervention = Some(
3519                            "STOP. Shell was blocked because this is a computation task. \
3520                             You MUST use `run_code` now — write the code and run it. \
3521                             Do NOT output an error message or give up. \
3522                             Call `run_code` with the appropriate language and code to compute the answer. \
3523                             If writing Python, pass `language: \"python\"`. \
3524                             If writing JavaScript, omit language or pass `language: \"javascript\"`."
3525                                .to_string(),
3526                        );
3527                    }
3528
3529                    // When run_code fails with a Deno parse error, the model likely sent Python
3530                    // code without specifying language: "python". Force a corrective retry.
3531                    if is_error
3532                        && tool_name == "run_code"
3533                        && (final_output.contains("source code could not be parsed")
3534                            || final_output.contains("Expected ';'")
3535                            || final_output.contains("Expected '}'")
3536                            || final_output.contains("is not defined")
3537                                && final_output.contains("deno"))
3538                        && loop_intervention.is_none()
3539                    {
3540                        loop_intervention = Some(
3541                            "STOP. run_code failed with a JavaScript parse error — you likely wrote Python \
3542                             code but forgot to pass `language: \"python\"`. \
3543                             Retry run_code with `language: \"python\"` and the same code. \
3544                             Do NOT fall back to shell. Do NOT give up."
3545                                .to_string(),
3546                        );
3547                    }
3548
3549                    if res.blocked_by_policy
3550                        && is_mcp_workspace_read_tool(&tool_name)
3551                        && recoverable_policy_intervention.is_none()
3552                    {
3553                        recoverable_policy_intervention = Some(
3554                            "STOP. MCP filesystem reads are blocked. Use `read_file` or `inspect_lines` instead.".to_string(),
3555                        );
3556                        recoverable_policy_recipe = Some(RecoveryScenario::McpWorkspaceReadBlocked);
3557                        recoverable_policy_checkpoint = Some((
3558                            OperatorCheckpointState::BlockedPolicy,
3559                            "MCP workspace read blocked; rerouting to built-in file tools."
3560                                .to_string(),
3561                        ));
3562                    } else if res.blocked_by_policy
3563                        && implement_current_plan
3564                        && is_current_plan_irrelevant_tool(&tool_name)
3565                        && recoverable_policy_intervention.is_none()
3566                    {
3567                        recoverable_policy_intervention = Some(format!(
3568                            "STOP. `{}` is not a planned target. Use `inspect_lines` on a planned file, then edit.",
3569                            tool_name
3570                        ));
3571                        recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
3572                        recoverable_policy_checkpoint = Some((
3573                            OperatorCheckpointState::BlockedPolicy,
3574                            format!(
3575                                "Current-plan execution blocked unrelated tool `{}`.",
3576                                tool_name
3577                            ),
3578                        ));
3579                    } else if res.blocked_by_policy
3580                        && implement_current_plan
3581                        && final_output.contains("requires recent file evidence")
3582                        && recoverable_policy_intervention.is_none()
3583                    {
3584                        let target = action_target_path(&tool_name, &res.args)
3585                            .unwrap_or_else(|| "the target file".to_string());
3586                        recoverable_policy_intervention = Some(format!(
3587                            "STOP. Edit blocked — `{target}` has no recent read. Use `inspect_lines` or `read_file` on it first, then retry."
3588                        ));
3589                        recoverable_policy_recipe =
3590                            Some(RecoveryScenario::RecentFileEvidenceMissing);
3591                        recoverable_policy_checkpoint = Some((
3592                            OperatorCheckpointState::BlockedRecentFileEvidence,
3593                            format!("Edit blocked on `{target}`; recent file evidence missing."),
3594                        ));
3595                    } else if res.blocked_by_policy
3596                        && implement_current_plan
3597                        && final_output.contains("requires an exact local line window first")
3598                        && recoverable_policy_intervention.is_none()
3599                    {
3600                        let target = action_target_path(&tool_name, &res.args)
3601                            .unwrap_or_else(|| "the target file".to_string());
3602                        recoverable_policy_intervention = Some(format!(
3603                            "STOP. Edit blocked — `{target}` needs an inspected window. Use `inspect_lines` around the edit region, then retry."
3604                        ));
3605                        recoverable_policy_recipe = Some(RecoveryScenario::ExactLineWindowRequired);
3606                        recoverable_policy_checkpoint = Some((
3607                            OperatorCheckpointState::BlockedExactLineWindow,
3608                            format!("Edit blocked on `{target}`; exact line window required."),
3609                        ));
3610                    } else if res.blocked_by_policy
3611                        && (final_output.contains("Prefer `")
3612                            || final_output.contains("Prefer tool"))
3613                        && recoverable_policy_intervention.is_none()
3614                    {
3615                        recoverable_policy_intervention = Some(final_output.clone());
3616                        recoverable_policy_recipe = Some(RecoveryScenario::PolicyCorrection);
3617                        recoverable_policy_checkpoint = Some((
3618                            OperatorCheckpointState::BlockedPolicy,
3619                            "Action blocked by policy; self-correction triggered using tool recommendation."
3620                                .to_string(),
3621                        ));
3622                    } else if res.blocked_by_policy && blocked_policy_output.is_none() {
3623                        blocked_policy_output = Some(final_output.clone());
3624                    }
3625
3626                    if *repeat_count >= 5 {
3627                        let _ = tx.send(InferenceEvent::Done).await;
3628                        return Ok(());
3629                    }
3630
3631                    if implement_current_plan
3632                        && !implementation_started
3633                        && !is_error
3634                        && is_non_mutating_plan_step_tool(&tool_name)
3635                    {
3636                        non_mutating_plan_steps += 1;
3637                    }
3638                }
3639
3640                if let Some(intervention) = recoverable_policy_intervention {
3641                    if let Some((state, summary)) = recoverable_policy_checkpoint.take() {
3642                        self.emit_operator_checkpoint(&tx, state, summary).await;
3643                    }
3644                    if let Some(scenario) = recoverable_policy_recipe.take() {
3645                        let recipe = plan_recovery(scenario, &self.recovery_context);
3646                        self.emit_recovery_recipe_summary(
3647                            &tx,
3648                            recipe.recipe.scenario.label(),
3649                            compact_recovery_plan_summary(&recipe),
3650                        )
3651                        .await;
3652                    }
3653                    loop_intervention = Some(intervention);
3654                    let _ = tx
3655                        .send(InferenceEvent::Thought(
3656                            "Policy recovery: rerouting blocked MCP filesystem inspection to built-in workspace tools."
3657                                .into(),
3658                        ))
3659                        .await;
3660                    continue;
3661                }
3662
3663                if architecture_overview_mode {
3664                    match overview_runtime_trace.as_deref() {
3665                        Some(runtime_trace) => {
3666                            let response = build_architecture_overview_answer(runtime_trace);
3667                            self.history.push(ChatMessage::assistant_text(&response));
3668                            self.transcript.log_agent(&response);
3669
3670                            for chunk in chunk_text(&response, 8) {
3671                                if !chunk.is_empty() {
3672                                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
3673                                }
3674                            }
3675
3676                            let _ = tx.send(InferenceEvent::Done).await;
3677                            break;
3678                        }
3679                        None => {
3680                            loop_intervention = Some(
3681                                "Good. You now have the grounded repository structure. Next, call `trace_runtime_flow` for the runtime/control-flow half of the architecture overview. Prefer topic `user_turn` for the main execution path, or `runtime_subsystems` if that is more direct. Do not call `read_file`, `auto_pin_context`, or LSP tools here."
3682                                    .to_string(),
3683                            );
3684                            continue;
3685                        }
3686                    }
3687                }
3688
3689                if implement_current_plan
3690                    && !implementation_started
3691                    && non_mutating_plan_steps >= non_mutating_plan_hard_cap
3692                {
3693                    let msg = "Current-plan execution stalled: too many non-mutating inspection steps without a concrete edit. Stay on the saved target files, narrow with `inspect_lines`, and then mutate, or ask one specific blocking question instead of continuing broad exploration.".to_string();
3694                    self.history.push(ChatMessage::assistant_text(&msg));
3695                    self.transcript.log_agent(&msg);
3696
3697                    for chunk in chunk_text(&msg, 8) {
3698                        if !chunk.is_empty() {
3699                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3700                        }
3701                    }
3702
3703                    let _ = tx.send(InferenceEvent::Done).await;
3704                    break;
3705                }
3706
3707                if let Some(blocked_output) = blocked_policy_output {
3708                    self.emit_operator_checkpoint(
3709                        &tx,
3710                        OperatorCheckpointState::BlockedPolicy,
3711                        "A blocked tool path was surfaced directly to the operator.",
3712                    )
3713                    .await;
3714                    self.history
3715                        .push(ChatMessage::assistant_text(&blocked_output));
3716                    self.transcript.log_agent(&blocked_output);
3717
3718                    for chunk in chunk_text(&blocked_output, 8) {
3719                        if !chunk.is_empty() {
3720                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3721                        }
3722                    }
3723
3724                    let _ = tx.send(InferenceEvent::Done).await;
3725                    break;
3726                }
3727
3728                if let Some(tool_output) = authoritative_tool_output {
3729                    self.history.push(ChatMessage::assistant_text(&tool_output));
3730                    self.transcript.log_agent(&tool_output);
3731
3732                    for chunk in chunk_text(&tool_output, 8) {
3733                        if !chunk.is_empty() {
3734                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3735                        }
3736                    }
3737
3738                    let _ = tx.send(InferenceEvent::Done).await;
3739                    break;
3740                }
3741
3742                if implement_current_plan && !implementation_started {
3743                    let base = "STOP analyzing. The current plan already defines the task. Use the built-in file evidence you now have and begin implementing the plan in the target files. Do not output preliminary findings or restate contracts.";
3744                    if non_mutating_plan_steps >= non_mutating_plan_soft_cap {
3745                        loop_intervention = Some(format!(
3746                            "{} You are close to the non-mutation cap. Use `inspect_lines` on one saved target file, then make the edit now.",
3747                            base
3748                        ));
3749                    } else {
3750                        loop_intervention = Some(base.to_string());
3751                    }
3752                } else if self.workflow_mode == WorkflowMode::Architect {
3753                    loop_intervention = Some(
3754                        format!(
3755                            "STOP exploring. You have enough evidence for a plan-first answer.\n{}\nUse the tool results already in history. Do not narrate your process. Do not call more tools unless a missing file path makes the handoff impossible.",
3756                            architect_handoff_contract()
3757                        ),
3758                    );
3759                }
3760
3761                // 4. Auto-Verification Loop (The Perfect Bake)
3762                if mutation_occurred && !yolo {
3763                    let _ = tx
3764                        .send(InferenceEvent::Thought(
3765                            "Self-Verification: Running 'cargo check' to ensure build integrity..."
3766                                .into(),
3767                        ))
3768                        .await;
3769                    let verify_res = self.auto_verify_build().await;
3770                    let verify_ok = verify_res.contains("BUILD SUCCESS");
3771                    self.record_verify_build_result(verify_ok, &verify_res)
3772                        .await;
3773                    self.record_session_verification(
3774                        verify_ok,
3775                        if verify_ok {
3776                            "Automatic build verification passed."
3777                        } else {
3778                            "Automatic build verification failed."
3779                        },
3780                    );
3781                    self.history.push(ChatMessage::system(&format!(
3782                        "\n# SYSTEM VERIFICATION\n{verify_res}"
3783                    )));
3784                    let _ = tx
3785                        .send(InferenceEvent::Thought(
3786                            "Verification turn injected into history.".into(),
3787                        ))
3788                        .await;
3789                }
3790
3791                // Continue loop – the model will respond to the results.
3792                continue;
3793            } else if let Some(response_text) = text {
3794                if finish_reason.as_deref() == Some("length") && near_context_ceiling {
3795                    if intent.direct_answer == Some(DirectAnswerKind::SessionResetSemantics) {
3796                        let cleaned = build_session_reset_semantics_answer();
3797                        self.history.push(ChatMessage::assistant_text(&cleaned));
3798                        self.transcript.log_agent(&cleaned);
3799                        for chunk in chunk_text(&cleaned, 8) {
3800                            if !chunk.is_empty() {
3801                                let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3802                            }
3803                        }
3804                        let _ = tx.send(InferenceEvent::Done).await;
3805                        break;
3806                    }
3807
3808                    let warning = format_runtime_failure(
3809                        RuntimeFailureClass::ContextWindow,
3810                        "Context ceiling reached before the model completed the answer. Hematite trimmed what it could, but this turn still ran out of room. Retry with a narrower inspection step like `grep_files` or `inspect_lines`, or ask for a smaller scoped answer.",
3811                    );
3812                    self.history.push(ChatMessage::assistant_text(&warning));
3813                    self.transcript.log_agent(&warning);
3814                    let _ = tx
3815                        .send(InferenceEvent::Thought(
3816                            "Length recovery: model hit the context ceiling before completing the answer."
3817                                .into(),
3818                        ))
3819                        .await;
3820                    for chunk in chunk_text(&warning, 8) {
3821                        if !chunk.is_empty() {
3822                            let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3823                        }
3824                    }
3825                    let _ = tx.send(InferenceEvent::Done).await;
3826                    break;
3827                }
3828
3829                if response_text.contains("<|tool_call")
3830                    || response_text.contains("[END_TOOL_REQUEST]")
3831                    || response_text.contains("<|tool_response")
3832                    || response_text.contains("<tool_response|>")
3833                {
3834                    loop_intervention = Some(
3835                        "Your previous response leaked raw native tool transcript markup instead of a valid tool invocation or final answer. Retry immediately. If you need a tool, emit a valid tool call only. If you do not need a tool, answer in plain text with no `<|tool_call>`, `<|tool_response>`, or `[END_TOOL_REQUEST]` markup.".to_string(),
3836                    );
3837                    continue;
3838                }
3839
3840                // 1. Process and route the reasoning block to SPECULAR.
3841                if let Some(thought) = crate::agent::inference::extract_think_block(&response_text)
3842                {
3843                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
3844                    // Persist for history audit (stripped from next turn by Volatile Reasoning rule).
3845                    // This will be summarized in the next turn's system prompt.
3846                    self.reasoning_history = Some(thought);
3847                }
3848
3849                // 2. Process and stream the final answer to the chat interface.
3850                let cleaned = crate::agent::inference::strip_think_blocks(&response_text);
3851
3852                if implement_current_plan && !implementation_started {
3853                    loop_intervention = Some(
3854                        "Do not stop at analysis. Implement the current saved plan now using built-in workspace tools and the target files already named in the plan. Only answer without edits if you have a concrete blocking question.".to_string(),
3855                    );
3856                    continue;
3857                }
3858
3859                // [Hardened Interface] Strictly respect the stripper.
3860                // If it's empty after stripping think blocks, the model thought through its
3861                // answer but forgot to emit it (common with Qwen3 models in architect/ask mode).
3862                // Nudge it rather than silently dropping the turn — but cap at 2 retries so a
3863                // model that keeps returning whitespace/empty doesn't spin all 25 iterations.
3864                if cleaned.is_empty() {
3865                    empty_cleaned_nudges += 1;
3866                    if empty_cleaned_nudges == 1 {
3867                        loop_intervention = Some(
3868                            "Your visible response was empty. The tool already returned data. \
3869                             Write your answer now in plain text — no <think> tags, no tool calls. \
3870                             State the key facts in 2-5 sentences and stop."
3871                                .to_string(),
3872                        );
3873                        continue;
3874                    } else if empty_cleaned_nudges == 2 {
3875                        loop_intervention = Some(
3876                            "EMPTY RESPONSE. Do NOT use <think>. Do NOT call tools. \
3877                             Write the answer in plain text right now. \
3878                             Example format: \"Your CPU is X. Your GPU is Y. You have Z GB of RAM.\""
3879                                .to_string(),
3880                        );
3881                        continue;
3882                    }
3883                    // Nudge budget exhausted — surface as a recoverable empty-response failure
3884                    // so the TUI unblocks instead of hanging for the full max_iters budget.
3885                    let class = RuntimeFailureClass::EmptyModelResponse;
3886                    self.emit_runtime_failure(
3887                        &tx,
3888                        class,
3889                        "Model returned empty content after 2 nudge attempts.",
3890                    )
3891                    .await;
3892                    break;
3893                }
3894
3895                let architect_handoff = self.persist_architect_handoff(&cleaned);
3896                self.history.push(ChatMessage::assistant_text(&cleaned));
3897                self.transcript.log_agent(&cleaned);
3898
3899                // Send in smooth chunks for that professional UI feel.
3900                for chunk in chunk_text(&cleaned, 8) {
3901                    if !chunk.is_empty() {
3902                        let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3903                    }
3904                }
3905
3906                if let Some(plan) = architect_handoff.as_ref() {
3907                    let note = architect_handoff_operator_note(plan);
3908                    self.history.push(ChatMessage::system(&note));
3909                    self.transcript.log_system(&note);
3910                    let _ = tx
3911                        .send(InferenceEvent::MutedToken(format!("\n{}", note)))
3912                        .await;
3913                }
3914
3915                self.emit_done_events(&tx).await;
3916                break;
3917            } else {
3918                let detail = "Model returned an empty response.";
3919                let class = classify_runtime_failure(detail);
3920                if should_retry_runtime_failure(class) {
3921                    if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3922                        if let RecoveryDecision::Attempt(plan) =
3923                            attempt_recovery(scenario, &mut self.recovery_context)
3924                        {
3925                            self.transcript.log_system(
3926                                "Automatic provider recovery triggered: model returned an empty response.",
3927                            );
3928                            self.emit_recovery_recipe_summary(
3929                                &tx,
3930                                plan.recipe.scenario.label(),
3931                                compact_recovery_plan_summary(&plan),
3932                            )
3933                            .await;
3934                            let _ = tx
3935                                .send(InferenceEvent::ProviderStatus {
3936                                    state: ProviderRuntimeState::Recovering,
3937                                    summary: compact_runtime_recovery_summary(class).into(),
3938                                })
3939                                .await;
3940                            self.emit_operator_checkpoint(
3941                                &tx,
3942                                OperatorCheckpointState::RecoveringProvider,
3943                                compact_runtime_recovery_summary(class),
3944                            )
3945                            .await;
3946                            continue;
3947                        }
3948                    }
3949                }
3950
3951                self.emit_runtime_failure(&tx, class, detail).await;
3952                break;
3953            }
3954        }
3955
3956        self.trim_history(80);
3957        self.refresh_session_memory();
3958        // Record the goal and increment the turn counter before persisting.
3959        self.last_goal = Some(user_input.chars().take(300).collect());
3960        self.turn_count = self.turn_count.saturating_add(1);
3961        self.save_session();
3962        self.emit_compaction_pressure(&tx).await;
3963        Ok(())
3964    }
3965
3966    async fn emit_runtime_failure(
3967        &mut self,
3968        tx: &mpsc::Sender<InferenceEvent>,
3969        class: RuntimeFailureClass,
3970        detail: &str,
3971    ) {
3972        if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3973            let decision = preview_recovery_decision(scenario, &self.recovery_context);
3974            self.emit_recovery_recipe_summary(
3975                tx,
3976                scenario.label(),
3977                compact_recovery_decision_summary(&decision),
3978            )
3979            .await;
3980            let needs_refresh = match &decision {
3981                RecoveryDecision::Attempt(plan) => plan
3982                    .recipe
3983                    .steps
3984                    .contains(&RecoveryStep::RefreshRuntimeProfile),
3985                RecoveryDecision::Escalate { recipe, .. } => {
3986                    recipe.steps.contains(&RecoveryStep::RefreshRuntimeProfile)
3987                }
3988            };
3989            if needs_refresh {
3990                if let Some((model_id, context_length, changed)) = self
3991                    .refresh_runtime_profile_and_report(tx, "context_window_failure")
3992                    .await
3993                {
3994                    let note = if changed {
3995                        format!(
3996                            "Runtime refresh after context-window failure: model {} | CTX {}",
3997                            model_id, context_length
3998                        )
3999                    } else {
4000                        format!(
4001                            "Runtime refresh after context-window failure confirms model {} | CTX {}",
4002                            model_id, context_length
4003                        )
4004                    };
4005                    let _ = tx.send(InferenceEvent::Thought(note)).await;
4006                }
4007            }
4008        }
4009        if let Some(state) = provider_state_for_runtime_failure(class) {
4010            let _ = tx
4011                .send(InferenceEvent::ProviderStatus {
4012                    state,
4013                    summary: compact_runtime_failure_summary(class).into(),
4014                })
4015                .await;
4016        }
4017        if let Some(state) = checkpoint_state_for_runtime_failure(class) {
4018            self.emit_operator_checkpoint(tx, state, checkpoint_summary_for_runtime_failure(class))
4019                .await;
4020        }
4021        let formatted = format_runtime_failure(class, detail);
4022        self.history.push(ChatMessage::system(&format!(
4023            "# RUNTIME FAILURE\n{}",
4024            formatted
4025        )));
4026        self.transcript.log_system(&formatted);
4027        let _ = tx.send(InferenceEvent::Error(formatted)).await;
4028        let _ = tx.send(InferenceEvent::Done).await;
4029    }
4030
4031    /// [Task Analyzer] Run 'cargo check' and return a concise summary for the model.
4032    async fn auto_verify_build(&self) -> String {
4033        match crate::tools::verify_build::execute(&serde_json::json!({ "action": "build" })).await {
4034            Ok(out) => {
4035                "BUILD SUCCESS: Your changes are architecturally sound.\n\n".to_string()
4036                    + &cap_output(&out, 2000)
4037            }
4038            Err(e) => format!(
4039                "BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:\n\n{}",
4040                cap_output(&e, 2000)
4041            ),
4042        }
4043    }
4044
4045    /// Triggers an LLM call to summarize old messages if history exceeds the VRAM character limit.
4046    /// Triggers the Deterministic Smart Compaction algorithm to shrink history while preserving context.
4047    /// Triggers the Recursive Context Compactor.
4048    async fn compact_history_if_needed(
4049        &mut self,
4050        tx: &mpsc::Sender<InferenceEvent>,
4051        anchor_index: Option<usize>,
4052    ) -> Result<bool, String> {
4053        let vram_ratio = self.gpu_state.ratio();
4054        let context_length = self.engine.current_context_length();
4055        let config = CompactionConfig::adaptive(context_length, vram_ratio);
4056
4057        if !compaction::should_compact(&self.history, context_length, vram_ratio) {
4058            return Ok(false);
4059        }
4060
4061        let _ = tx
4062            .send(InferenceEvent::Thought(format!(
4063                "Compaction: ctx={}k vram={:.0}% threshold={}k tokens — chaining summary...",
4064                context_length / 1000,
4065                vram_ratio * 100.0,
4066                config.max_estimated_tokens / 1000,
4067            )))
4068            .await;
4069
4070        let result = compaction::compact_history(
4071            &self.history,
4072            self.running_summary.as_deref(),
4073            config,
4074            anchor_index,
4075        );
4076
4077        let removed_message_count = self.history.len().saturating_sub(result.messages.len());
4078        self.history = result.messages;
4079        self.running_summary = result.summary;
4080
4081        // Layer 6: Memory Synthesis (Task Context Persistence)
4082        let previous_memory = self.session_memory.clone();
4083        self.session_memory = compaction::extract_memory(&self.history);
4084        self.session_memory
4085            .inherit_runtime_ledger_from(&previous_memory);
4086        self.session_memory.record_compaction(
4087            removed_message_count,
4088            format!(
4089                "Compacted history around active task '{}' and preserved {} working-set file(s).",
4090                self.session_memory.current_task,
4091                self.session_memory.working_set.len()
4092            ),
4093        );
4094        self.emit_compaction_pressure(tx).await;
4095
4096        // Jinja alignment: preserved slice may start with assistant/tool messages.
4097        // Strip any leading non-user messages so the first non-system message is always user.
4098        let first_non_sys = self
4099            .history
4100            .iter()
4101            .position(|m| m.role != "system")
4102            .unwrap_or(self.history.len());
4103        if first_non_sys < self.history.len() {
4104            if let Some(user_offset) = self.history[first_non_sys..]
4105                .iter()
4106                .position(|m| m.role == "user")
4107            {
4108                if user_offset > 0 {
4109                    self.history
4110                        .drain(first_non_sys..first_non_sys + user_offset);
4111                }
4112            }
4113        }
4114
4115        let _ = tx
4116            .send(InferenceEvent::Thought(format!(
4117                "Memory Synthesis: Extracted context for task: '{}'. Working set: {} files.",
4118                self.session_memory.current_task,
4119                self.session_memory.working_set.len()
4120            )))
4121            .await;
4122        let recipe = plan_recovery(RecoveryScenario::HistoryPressure, &self.recovery_context);
4123        self.emit_recovery_recipe_summary(
4124            tx,
4125            recipe.recipe.scenario.label(),
4126            compact_recovery_plan_summary(&recipe),
4127        )
4128        .await;
4129        self.emit_operator_checkpoint(
4130            tx,
4131            OperatorCheckpointState::HistoryCompacted,
4132            format!(
4133                "History compacted into a recursive summary; active task '{}' with {} working-set file(s) carried forward.",
4134                self.session_memory.current_task,
4135                self.session_memory.working_set.len()
4136            ),
4137        )
4138        .await;
4139
4140        Ok(true)
4141    }
4142
4143    /// Query The Vein for context relevant to the user's message.
4144    /// Runs hybrid BM25 + semantic search (semantic requires embedding model in LM Studio).
4145    /// Returns a formatted system message string, or None if nothing useful found.
4146    fn build_vein_context(&self, query: &str) -> Option<(String, Vec<String>)> {
4147        // Skip trivial / very short inputs.
4148        if query.trim().split_whitespace().count() < 3 {
4149            return None;
4150        }
4151
4152        let results = tokio::task::block_in_place(|| self.vein.search_context(query, 4)).ok()?;
4153        if results.is_empty() {
4154            return None;
4155        }
4156
4157        let semantic_active = self.vein.has_any_embeddings();
4158        let header = if semantic_active {
4159            "# Relevant context from The Vein (hybrid BM25 + semantic retrieval)\n\
4160             Use this to answer without needing extra read_file calls where possible.\n\n"
4161        } else {
4162            "# Relevant context from The Vein (BM25 keyword retrieval)\n\
4163             Use this to answer without needing extra read_file calls where possible.\n\n"
4164        };
4165
4166        let mut ctx = String::from(header);
4167        let mut paths: Vec<String> = Vec::new();
4168
4169        let mut total = 0usize;
4170        const MAX_CTX_CHARS: usize = 1_500;
4171
4172        for r in results {
4173            if total >= MAX_CTX_CHARS {
4174                break;
4175            }
4176            let snippet = if r.content.len() > 500 {
4177                format!("{}...", &r.content[..500])
4178            } else {
4179                r.content.clone()
4180            };
4181            ctx.push_str(&format!("--- {} ---\n{}\n\n", r.path, snippet));
4182            total += snippet.len() + r.path.len() + 10;
4183            if !paths.contains(&r.path) {
4184                paths.push(r.path);
4185            }
4186        }
4187
4188        Some((ctx, paths))
4189    }
4190
4191    /// Returns the conversation history (WITHOUT the system prompt) for the context window.
4192    /// This ensures we don't have redundant system blocks and prevents Jinja crashes.
4193    fn context_window_slice(&self) -> Vec<ChatMessage> {
4194        let mut result = Vec::new();
4195
4196        // Skip index 0 (the raw system message) and any stray system messages in history.
4197        if self.history.len() > 1 {
4198            for m in &self.history[1..] {
4199                if m.role == "system" {
4200                    continue;
4201                }
4202
4203                let mut sanitized = m.clone();
4204                // DEEP SANITIZE: LM Studio Jinja templates for Qwen crash on truly empty content.
4205                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
4206                    sanitized.content = MessageContent::Text(" ".into());
4207                }
4208                result.push(sanitized);
4209            }
4210        }
4211
4212        // Jinja Guard: The first message after the system prompt MUST be 'user'.
4213        // If not (e.g. because of compaction), we insert a tiny anchor.
4214        if !result.is_empty() && result[0].role != "user" {
4215            result.insert(0, ChatMessage::user("Continuing previous context..."));
4216        }
4217
4218        result
4219    }
4220
4221    fn context_window_slice_from(&self, start_idx: usize) -> Vec<ChatMessage> {
4222        let mut result = Vec::new();
4223
4224        if self.history.len() > 1 {
4225            let start = start_idx.max(1).min(self.history.len());
4226            for m in &self.history[start..] {
4227                if m.role == "system" {
4228                    continue;
4229                }
4230
4231                let mut sanitized = m.clone();
4232                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
4233                    sanitized.content = MessageContent::Text(" ".into());
4234                }
4235                result.push(sanitized);
4236            }
4237        }
4238
4239        if !result.is_empty() && result[0].role != "user" {
4240            result.insert(0, ChatMessage::user("Continuing current plan execution..."));
4241        }
4242
4243        result
4244    }
4245
4246    /// Drop old turns from the middle of history.
4247    fn trim_history(&mut self, max_messages: usize) {
4248        if self.history.len() <= max_messages {
4249            return;
4250        }
4251        // Always keep [0] (system prompt).
4252        let excess = self.history.len() - max_messages;
4253        self.history.drain(1..=excess);
4254    }
4255
4256    /// P1: Attempt to fix malformed JSON tool arguments by asking the model to re-output them.
4257    async fn repair_tool_args(
4258        &self,
4259        tool_name: &str,
4260        bad_json: &str,
4261        tx: &mpsc::Sender<InferenceEvent>,
4262    ) -> Result<Value, String> {
4263        let _ = tx
4264            .send(InferenceEvent::Thought(format!(
4265                "Attempting to repair malformed JSON for '{}'...",
4266                tool_name
4267            )))
4268            .await;
4269
4270        let prompt = format!(
4271            "The following JSON for tool '{}' is malformed and failed to parse:\n\n```json\n{}\n```\n\nOutput ONLY the corrected JSON string that fixes the syntax error (e.g. missing commas, unescaped quotes). Do NOT include markdown blocks or any other text.",
4272            tool_name, bad_json
4273        );
4274
4275        let messages = vec![
4276            ChatMessage::system("You are a JSON repair tool. Output ONLY pure JSON."),
4277            ChatMessage::user(&prompt),
4278        ];
4279
4280        // Use fast model for speed if available.
4281        let (text, _, _, _) = self
4282            .engine
4283            .call_with_tools(&messages, &[], self.fast_model.as_deref())
4284            .await
4285            .map_err(|e| e.to_string())?;
4286
4287        let cleaned = text
4288            .unwrap_or_default()
4289            .trim()
4290            .trim_start_matches("```json")
4291            .trim_start_matches("```")
4292            .trim_end_matches("```")
4293            .trim()
4294            .to_string();
4295
4296        serde_json::from_str(&cleaned).map_err(|e| format!("Repair failed: {}", e))
4297    }
4298
4299    /// P2: Run a fast validation step after file writes to check for subtle logic errors.
4300    async fn run_critic_check(
4301        &self,
4302        path: &str,
4303        content: &str,
4304        tx: &mpsc::Sender<InferenceEvent>,
4305    ) -> Option<String> {
4306        // Only run for source code files.
4307        let ext = std::path::Path::new(path)
4308            .extension()
4309            .and_then(|e| e.to_str())
4310            .unwrap_or("");
4311        const CRITIC_EXTS: &[&str] = &["rs", "js", "ts", "py", "go", "c", "cpp"];
4312        if !CRITIC_EXTS.contains(&ext) {
4313            return None;
4314        }
4315
4316        let _ = tx
4317            .send(InferenceEvent::Thought(format!(
4318                "CRITIC: Reviewing changes to '{}'...",
4319                path
4320            )))
4321            .await;
4322
4323        let truncated = cap_output(content, 4000);
4324
4325        let prompt = format!(
4326            "You are a Senior Security and Code Quality auditor. Review this file content for '{}' and identify any critical logic errors, security vulnerabilities, or missing error handling. Be extremely concise. If the code looks good, output 'PASS'.\n\n```{}\n{}\n```",
4327            path, ext, truncated
4328        );
4329
4330        let messages = vec![
4331            ChatMessage::system("You are a technical critic. Identify ONLY critical issues. Output 'PASS' if none found."),
4332            ChatMessage::user(&prompt)
4333        ];
4334
4335        let (text, _, _, _) = self
4336            .engine
4337            .call_with_tools(&messages, &[], self.fast_model.as_deref())
4338            .await
4339            .ok()?;
4340
4341        let critique = text?.trim().to_string();
4342        if critique.to_uppercase().contains("PASS") || critique.is_empty() {
4343            None
4344        } else {
4345            Some(critique)
4346        }
4347    }
4348}
4349
4350// ── Tool dispatcher ───────────────────────────────────────────────────────────
4351
4352pub async fn dispatch_tool(name: &str, args: &Value) -> Result<String, String> {
4353    dispatch_builtin_tool(name, args).await
4354}
4355
4356fn normalize_fix_plan_issue_text(text: &str) -> Option<String> {
4357    let trimmed = text.trim();
4358    let stripped = trimmed
4359        .strip_prefix("/think")
4360        .or_else(|| trimmed.strip_prefix("/no_think"))
4361        .map(str::trim)
4362        .unwrap_or(trimmed)
4363        .trim_start_matches('\n')
4364        .trim();
4365    (!stripped.is_empty()).then(|| stripped.to_string())
4366}
4367
4368fn fill_missing_fix_plan_issue(tool_name: &str, args: &mut Value, fallback_issue: Option<&str>) {
4369    if tool_name != "inspect_host" {
4370        return;
4371    }
4372
4373    let Some(topic) = args.get("topic").and_then(|v| v.as_str()) else {
4374        return;
4375    };
4376    if topic != "fix_plan" {
4377        return;
4378    }
4379
4380    let issue_missing = args
4381        .get("issue")
4382        .and_then(|v| v.as_str())
4383        .map(str::trim)
4384        .is_none_or(|value| value.is_empty());
4385    if !issue_missing {
4386        return;
4387    }
4388
4389    let Some(fallback_issue) = fallback_issue.and_then(normalize_fix_plan_issue_text) else {
4390        return;
4391    };
4392
4393    let Value::Object(map) = args else {
4394        return;
4395    };
4396    map.insert(
4397        "issue".to_string(),
4398        Value::String(fallback_issue.to_string()),
4399    );
4400}
4401
4402fn should_rewrite_shell_to_fix_plan(
4403    tool_name: &str,
4404    args: &Value,
4405    latest_user_prompt: Option<&str>,
4406) -> bool {
4407    if tool_name != "shell" {
4408        return false;
4409    }
4410    let Some(prompt) = latest_user_prompt else {
4411        return false;
4412    };
4413    if preferred_host_inspection_topic(prompt) != Some("fix_plan") {
4414        return false;
4415    }
4416    let command = args
4417        .get("command")
4418        .and_then(|value| value.as_str())
4419        .unwrap_or("");
4420    shell_looks_like_structured_host_inspection(command)
4421}
4422
4423fn extract_release_arg(command: &str, flag: &str) -> Option<String> {
4424    let pattern = format!(r#"(?i){}\s+['"]?([^'" \r\n]+)['"]?"#, regex::escape(flag));
4425    let regex = regex::Regex::new(&pattern).ok()?;
4426    let captures = regex.captures(command)?;
4427    captures.get(1).map(|m| m.as_str().to_string())
4428}
4429
4430fn infer_maintainer_workflow_args_from_prompt(prompt: &str) -> Option<Value> {
4431    let workflow = preferred_maintainer_workflow(prompt)?;
4432    let lower = prompt.to_ascii_lowercase();
4433    match workflow {
4434        "clean" => Some(serde_json::json!({
4435            "workflow": "clean",
4436            "deep": lower.contains("deep clean")
4437                || lower.contains("deep cleanup")
4438                || lower.contains("deep"),
4439            "reset": lower.contains("reset"),
4440            "prune_dist": lower.contains("prune dist")
4441                || lower.contains("prune old dist")
4442                || lower.contains("prune old artifacts")
4443                || lower.contains("old dist artifacts")
4444                || lower.contains("old artifacts"),
4445        })),
4446        "package_windows" => Some(serde_json::json!({
4447            "workflow": "package_windows",
4448            "installer": lower.contains("installer") || lower.contains("setup.exe"),
4449            "add_to_path": lower.contains("addtopath")
4450                || lower.contains("add to path")
4451                || lower.contains("update path")
4452                || lower.contains("refresh path"),
4453        })),
4454        "release" => {
4455            let version = regex::Regex::new(r#"(?i)\b(\d+\.\d+\.\d+)\b"#)
4456                .ok()
4457                .and_then(|re| re.captures(prompt))
4458                .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()));
4459            let bump = if lower.contains("patch") {
4460                Some("patch")
4461            } else if lower.contains("minor") {
4462                Some("minor")
4463            } else if lower.contains("major") {
4464                Some("major")
4465            } else {
4466                None
4467            };
4468            let mut args = serde_json::json!({
4469                "workflow": "release",
4470                "push": lower.contains(" push") || lower.starts_with("push ") || lower.contains(" and push"),
4471                "add_to_path": lower.contains("addtopath")
4472                    || lower.contains("add to path")
4473                    || lower.contains("update path"),
4474                "skip_installer": lower.contains("skip installer"),
4475                "publish_crates": lower.contains("publish crates") || lower.contains("crates.io"),
4476                "publish_voice_crate": lower.contains("publish voice crate")
4477                    || lower.contains("publish hematite-kokoros"),
4478            });
4479            if let Some(version) = version {
4480                args["version"] = Value::String(version);
4481            }
4482            if let Some(bump) = bump {
4483                args["bump"] = Value::String(bump.to_string());
4484            }
4485            Some(args)
4486        }
4487        _ => None,
4488    }
4489}
4490
4491fn infer_workspace_workflow_args_from_prompt(prompt: &str) -> Option<Value> {
4492    let workflow = preferred_workspace_workflow(prompt)?;
4493    let lower = prompt.to_ascii_lowercase();
4494    let trimmed = prompt.trim();
4495
4496    if let Some(command) = extract_workspace_command_from_prompt(trimmed) {
4497        return Some(serde_json::json!({
4498            "workflow": "command",
4499            "command": command,
4500        }));
4501    }
4502
4503    if let Some(path) = extract_workspace_script_path_from_prompt(trimmed) {
4504        return Some(serde_json::json!({
4505            "workflow": "script_path",
4506            "path": path,
4507        }));
4508    }
4509
4510    match workflow {
4511        "build" | "test" | "lint" | "fix" => Some(serde_json::json!({
4512            "workflow": workflow,
4513        })),
4514        "script" => {
4515            let package_script = if lower.contains("npm run ") {
4516                extract_word_after(&lower, "npm run ")
4517            } else if lower.contains("pnpm run ") {
4518                extract_word_after(&lower, "pnpm run ")
4519            } else if lower.contains("bun run ") {
4520                extract_word_after(&lower, "bun run ")
4521            } else if lower.contains("yarn ") {
4522                extract_word_after(&lower, "yarn ")
4523            } else {
4524                None
4525            };
4526
4527            if let Some(name) = package_script {
4528                return Some(serde_json::json!({
4529                    "workflow": "package_script",
4530                    "name": name,
4531                }));
4532            }
4533
4534            if let Some(name) = extract_word_after(&lower, "just ") {
4535                return Some(serde_json::json!({
4536                    "workflow": "just",
4537                    "name": name,
4538                }));
4539            }
4540            if let Some(name) = extract_word_after(&lower, "make ") {
4541                return Some(serde_json::json!({
4542                    "workflow": "make",
4543                    "name": name,
4544                }));
4545            }
4546            if let Some(name) = extract_word_after(&lower, "task ") {
4547                return Some(serde_json::json!({
4548                    "workflow": "task",
4549                    "name": name,
4550                }));
4551            }
4552
4553            None
4554        }
4555        _ => None,
4556    }
4557}
4558
4559fn extract_workspace_command_from_prompt(prompt: &str) -> Option<String> {
4560    let lower = prompt.to_ascii_lowercase();
4561    for prefix in [
4562        "cargo ",
4563        "npm ",
4564        "pnpm ",
4565        "yarn ",
4566        "bun ",
4567        "pytest",
4568        "go build",
4569        "go test",
4570        "make ",
4571        "just ",
4572        "task ",
4573        "./gradlew",
4574        ".\\gradlew",
4575    ] {
4576        if let Some(index) = lower.find(prefix) {
4577            return Some(prompt[index..].trim().trim_matches('`').to_string());
4578        }
4579    }
4580    None
4581}
4582
4583fn extract_workspace_script_path_from_prompt(prompt: &str) -> Option<String> {
4584    let normalized = prompt.replace('\\', "/");
4585    for token in normalized.split_whitespace() {
4586        let candidate = token
4587            .trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | '.' | ')' | '('))
4588            .trim_start_matches("./");
4589        if candidate.starts_with("scripts/")
4590            && [".ps1", ".sh", ".py", ".cmd", ".bat", ".js", ".mjs", ".cjs"]
4591                .iter()
4592                .any(|ext| candidate.to_ascii_lowercase().ends_with(ext))
4593        {
4594            return Some(candidate.to_string());
4595        }
4596    }
4597    None
4598}
4599
4600fn extract_word_after(haystack: &str, prefix: &str) -> Option<String> {
4601    let start = haystack.find(prefix)? + prefix.len();
4602    let tail = &haystack[start..];
4603    let word = tail
4604        .split_whitespace()
4605        .next()
4606        .map(str::trim)
4607        .filter(|value| !value.is_empty())?;
4608    Some(
4609        word.trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | '.' | ')' | '('))
4610            .to_string(),
4611    )
4612}
4613
4614fn rewrite_shell_to_maintainer_workflow_args(command: &str) -> Option<Value> {
4615    let lower = command.to_ascii_lowercase();
4616    if lower.contains("clean.ps1") {
4617        return Some(serde_json::json!({
4618            "workflow": "clean",
4619            "deep": lower.contains("-deep"),
4620            "reset": lower.contains("-reset"),
4621            "prune_dist": lower.contains("-prunedist"),
4622        }));
4623    }
4624    if lower.contains("package-windows.ps1") {
4625        return Some(serde_json::json!({
4626            "workflow": "package_windows",
4627            "installer": lower.contains("-installer"),
4628            "add_to_path": lower.contains("-addtopath"),
4629        }));
4630    }
4631    if lower.contains("release.ps1") {
4632        let version = extract_release_arg(command, "-Version");
4633        let bump = extract_release_arg(command, "-Bump");
4634        if version.is_none() && bump.is_none() {
4635            return Some(serde_json::json!({
4636                "workflow": "release"
4637            }));
4638        }
4639        let mut args = serde_json::json!({
4640            "workflow": "release",
4641            "push": lower.contains("-push"),
4642            "add_to_path": lower.contains("-addtopath"),
4643            "skip_installer": lower.contains("-skipinstaller"),
4644            "publish_crates": lower.contains("-publishcrates"),
4645            "publish_voice_crate": lower.contains("-publishvoicecrate"),
4646        });
4647        if let Some(version) = version {
4648            args["version"] = Value::String(version);
4649        }
4650        if let Some(bump) = bump {
4651            args["bump"] = Value::String(bump);
4652        }
4653        return Some(args);
4654    }
4655    None
4656}
4657
4658fn rewrite_shell_to_workspace_workflow_args(command: &str) -> Option<Value> {
4659    let lower = command.to_ascii_lowercase();
4660    if lower.contains("clean.ps1")
4661        || lower.contains("package-windows.ps1")
4662        || lower.contains("release.ps1")
4663    {
4664        return None;
4665    }
4666
4667    if let Some(path) = extract_workspace_script_path_from_prompt(command) {
4668        return Some(serde_json::json!({
4669            "workflow": "script_path",
4670            "path": path,
4671        }));
4672    }
4673
4674    let looks_like_workspace_command = [
4675        "cargo ",
4676        "npm ",
4677        "pnpm ",
4678        "yarn ",
4679        "bun ",
4680        "pytest",
4681        "go build",
4682        "go test",
4683        "make ",
4684        "just ",
4685        "task ",
4686        "./gradlew",
4687        ".\\gradlew",
4688    ]
4689    .iter()
4690    .any(|needle| lower.contains(needle));
4691
4692    if looks_like_workspace_command {
4693        Some(serde_json::json!({
4694            "workflow": "command",
4695            "command": command.trim(),
4696        }))
4697    } else {
4698        None
4699    }
4700}
4701
4702fn rewrite_host_tool_call(
4703    tool_name: &mut String,
4704    args: &mut Value,
4705    latest_user_prompt: Option<&str>,
4706) {
4707    if *tool_name == "shell" {
4708        let command = args
4709            .get("command")
4710            .and_then(|value| value.as_str())
4711            .unwrap_or("");
4712        if let Some(maintainer_workflow_args) = rewrite_shell_to_maintainer_workflow_args(command) {
4713            *tool_name = "run_hematite_maintainer_workflow".to_string();
4714            *args = maintainer_workflow_args;
4715            return;
4716        }
4717        if let Some(workspace_workflow_args) = rewrite_shell_to_workspace_workflow_args(command) {
4718            *tool_name = "run_workspace_workflow".to_string();
4719            *args = workspace_workflow_args;
4720            return;
4721        }
4722    }
4723    let is_surgical_tool = matches!(
4724        tool_name.as_str(),
4725        "create_directory"
4726            | "write_file"
4727            | "edit_file"
4728            | "patch_hunk"
4729            | "multi_replace_file_content"
4730            | "replace_file_content"
4731            | "move_file"
4732            | "delete_file"
4733    );
4734
4735    if !is_surgical_tool && *tool_name != "run_hematite_maintainer_workflow" {
4736        if let Some(prompt_args) =
4737            latest_user_prompt.and_then(infer_maintainer_workflow_args_from_prompt)
4738        {
4739            *tool_name = "run_hematite_maintainer_workflow".to_string();
4740            *args = prompt_args;
4741            return;
4742        }
4743    }
4744    if !is_surgical_tool && *tool_name != "run_workspace_workflow" {
4745        if let Some(prompt_args) =
4746            latest_user_prompt.and_then(infer_workspace_workflow_args_from_prompt)
4747        {
4748            *tool_name = "run_workspace_workflow".to_string();
4749            *args = prompt_args;
4750            return;
4751        }
4752    }
4753    if should_rewrite_shell_to_fix_plan(tool_name, args, latest_user_prompt) {
4754        *tool_name = "inspect_host".to_string();
4755        *args = serde_json::json!({
4756            "topic": "fix_plan"
4757        });
4758    }
4759    fill_missing_fix_plan_issue(tool_name, args, latest_user_prompt);
4760}
4761
4762fn canonical_tool_call_key(tool_name: &str, args: &Value) -> String {
4763    format!(
4764        "{}:{}",
4765        tool_name,
4766        serde_json::to_string(args).unwrap_or_default()
4767    )
4768}
4769
4770fn normalized_tool_call_for_execution(
4771    tool_name: &str,
4772    raw_arguments: &str,
4773    gemma4_model: bool,
4774    latest_user_prompt: Option<&str>,
4775) -> (String, Value) {
4776    let normalized_arguments = if gemma4_model {
4777        crate::agent::inference::normalize_tool_argument_string(tool_name, raw_arguments)
4778    } else {
4779        raw_arguments.to_string()
4780    };
4781    let mut normalized_name = tool_name.to_string();
4782    let mut args = serde_json::from_str::<Value>(&normalized_arguments)
4783        .unwrap_or(Value::Object(Default::default()));
4784    rewrite_host_tool_call(&mut normalized_name, &mut args, latest_user_prompt);
4785    (normalized_name, args)
4786}
4787
4788#[cfg(test)]
4789fn normalized_tool_call_key_for_dedupe(
4790    tool_name: &str,
4791    raw_arguments: &str,
4792    gemma4_model: bool,
4793    latest_user_prompt: Option<&str>,
4794) -> String {
4795    let (normalized_name, args) = normalized_tool_call_for_execution(
4796        tool_name,
4797        raw_arguments,
4798        gemma4_model,
4799        latest_user_prompt,
4800    );
4801    canonical_tool_call_key(&normalized_name, &args)
4802}
4803
4804impl ConversationManager {
4805    /// Checks if a tool call is authorized given the current configuration and mode.
4806    fn check_authorization(
4807        &self,
4808        name: &str,
4809        args: &serde_json::Value,
4810        config: &crate::agent::config::HematiteConfig,
4811        yolo_flag: bool,
4812    ) -> crate::agent::permission_enforcer::AuthorizationDecision {
4813        crate::agent::permission_enforcer::authorize_tool_call(name, args, config, yolo_flag)
4814    }
4815
4816    /// Layer 4: Isolated tool execution logic. Does not mutate 'self' to allow parallelism.
4817    async fn process_tool_call(
4818        &self,
4819        mut call: ToolCallFn,
4820        config: crate::agent::config::HematiteConfig,
4821        yolo: bool,
4822        tx: mpsc::Sender<InferenceEvent>,
4823        real_id: String,
4824    ) -> ToolExecutionOutcome {
4825        let mut msg_results = Vec::new();
4826        let mut latest_target_dir = None;
4827        let gemma4_model =
4828            crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
4829        let normalized_arguments = if gemma4_model {
4830            crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments)
4831        } else {
4832            call.arguments.clone()
4833        };
4834
4835        // 1. Argument Parsing & Repair
4836        let mut args: Value = match serde_json::from_str(&normalized_arguments) {
4837            Ok(v) => v,
4838            Err(_) => {
4839                match self
4840                    .repair_tool_args(&call.name, &normalized_arguments, &tx)
4841                    .await
4842                {
4843                    Ok(v) => v,
4844                    Err(e) => {
4845                        let _ = tx
4846                            .send(InferenceEvent::Thought(format!(
4847                                "JSON Repair failed: {}",
4848                                e
4849                            )))
4850                            .await;
4851                        Value::Object(Default::default())
4852                    }
4853                }
4854            }
4855        };
4856        let last_user_prompt = self
4857            .history
4858            .iter()
4859            .rev()
4860            .find(|message| message.role == "user")
4861            .map(|message| message.content.as_str());
4862        rewrite_host_tool_call(&mut call.name, &mut args, last_user_prompt);
4863
4864        let display = format_tool_display(&call.name, &args);
4865        let precondition_result = self.validate_action_preconditions(&call.name, &args).await;
4866        let auth = self.check_authorization(&call.name, &args, &config, yolo);
4867
4868        // 2. Permission Check
4869        let decision_result = match precondition_result {
4870            Err(e) => Err(e),
4871            Ok(_) => match auth {
4872                crate::agent::permission_enforcer::AuthorizationDecision::Allow { .. } => Ok(()),
4873                crate::agent::permission_enforcer::AuthorizationDecision::Ask {
4874                    reason,
4875                    source: _,
4876                } => {
4877                    let mutation_label =
4878                        crate::agent::tool_registry::get_mutation_label(&call.name, &args);
4879                    let (approve_tx, approve_rx) = tokio::sync::oneshot::channel::<bool>();
4880                    let _ = tx
4881                        .send(InferenceEvent::ApprovalRequired {
4882                            id: real_id.clone(),
4883                            name: call.name.clone(),
4884                            display: format!("{}\nWhy: {}", display, reason),
4885                            diff: None,
4886                            mutation_label,
4887                            responder: approve_tx,
4888                        })
4889                        .await;
4890
4891                    match approve_rx.await {
4892                        Ok(true) => Ok(()),
4893                        _ => Err("Declined by user".into()),
4894                    }
4895                }
4896                crate::agent::permission_enforcer::AuthorizationDecision::Deny {
4897                    reason, ..
4898                } => Err(reason),
4899            },
4900        };
4901        let blocked_by_policy =
4902            matches!(&decision_result, Err(e) if e.starts_with("Action blocked:"));
4903
4904        // 3. Execution (Local or MCP)
4905        let (output, is_error) = match decision_result {
4906            Err(e) if e.starts_with("[auto-redirected shell→inspect_host") => (e, false),
4907            Err(e) => (format!("Error: {}", e), true),
4908            Ok(_) => {
4909                let _ = tx
4910                    .send(InferenceEvent::ToolCallStart {
4911                        id: real_id.clone(),
4912                        name: call.name.clone(),
4913                        args: display.clone(),
4914                    })
4915                    .await;
4916
4917                let result = if call.name.starts_with("lsp_") {
4918                    let lsp = self.lsp_manager.clone();
4919                    let path = args
4920                        .get("path")
4921                        .and_then(|v| v.as_str())
4922                        .unwrap_or("")
4923                        .to_string();
4924                    let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
4925                    let character =
4926                        args.get("character").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
4927
4928                    match call.name.as_str() {
4929                        "lsp_definitions" => {
4930                            crate::tools::lsp_tools::lsp_definitions(lsp, path, line, character)
4931                                .await
4932                        }
4933                        "lsp_references" => {
4934                            crate::tools::lsp_tools::lsp_references(lsp, path, line, character)
4935                                .await
4936                        }
4937                        "lsp_hover" => {
4938                            crate::tools::lsp_tools::lsp_hover(lsp, path, line, character).await
4939                        }
4940                        "lsp_search_symbol" => {
4941                            let query = args
4942                                .get("query")
4943                                .and_then(|v| v.as_str())
4944                                .unwrap_or_default()
4945                                .to_string();
4946                            crate::tools::lsp_tools::lsp_search_symbol(lsp, query).await
4947                        }
4948                        "lsp_rename_symbol" => {
4949                            let new_name = args
4950                                .get("new_name")
4951                                .and_then(|v| v.as_str())
4952                                .unwrap_or_default()
4953                                .to_string();
4954                            crate::tools::lsp_tools::lsp_rename_symbol(
4955                                lsp, path, line, character, new_name,
4956                            )
4957                            .await
4958                        }
4959                        "lsp_get_diagnostics" => {
4960                            crate::tools::lsp_tools::lsp_get_diagnostics(lsp, path).await
4961                        }
4962                        _ => Err(format!("Unknown LSP tool: {}", call.name)),
4963                    }
4964                } else if call.name == "auto_pin_context" {
4965                    let pts = args.get("paths").and_then(|v| v.as_array());
4966                    let reason = args
4967                        .get("reason")
4968                        .and_then(|v| v.as_str())
4969                        .unwrap_or("uninformed scoping");
4970                    if let Some(arr) = pts {
4971                        let mut pinned = Vec::new();
4972                        {
4973                            let mut guard = self.pinned_files.lock().await;
4974                            const MAX_PINNED_SIZE: u64 = 25 * 1024 * 1024; // 25MB Safety Valve
4975
4976                            for v in arr.iter().take(3) {
4977                                if let Some(p) = v.as_str() {
4978                                    if let Ok(meta) = std::fs::metadata(p) {
4979                                        if meta.len() > MAX_PINNED_SIZE {
4980                                            let _ = tx.send(InferenceEvent::Thought(format!("[GUARD] Skipping {} - size ({} bytes) exceeds VRAM safety limit (25MB).", p, meta.len()))).await;
4981                                            continue;
4982                                        }
4983                                        if let Ok(content) = std::fs::read_to_string(p) {
4984                                            guard.insert(p.to_string(), content);
4985                                            pinned.push(p.to_string());
4986                                        }
4987                                    }
4988                                }
4989                            }
4990                        }
4991                        let msg = format!(
4992                            "Autonomous Scoping: Locked {} in prioritized memory. Reason: {}",
4993                            pinned.join(", "),
4994                            reason
4995                        );
4996                        let _ = tx
4997                            .send(InferenceEvent::Thought(format!("[AUTO-PIN] {}", msg)))
4998                            .await;
4999                        Ok(msg)
5000                    } else {
5001                        Err("Missing 'paths' array for auto_pin_context.".to_string())
5002                    }
5003                } else if call.name == "list_pinned" {
5004                    let paths_msg = {
5005                        let pinned = self.pinned_files.lock().await;
5006                        if pinned.is_empty() {
5007                            "No files are currently pinned.".to_string()
5008                        } else {
5009                            let paths: Vec<_> = pinned.keys().cloned().collect();
5010                            format!(
5011                                "Currently pinned files in active memory:\n- {}",
5012                                paths.join("\n- ")
5013                            )
5014                        }
5015                    };
5016                    Ok(paths_msg)
5017                } else if call.name.starts_with("mcp__") {
5018                    let mut mcp = self.mcp_manager.lock().await;
5019                    match mcp.call_tool(&call.name, &args).await {
5020                        Ok(res) => Ok(res),
5021                        Err(e) => Err(e.to_string()),
5022                    }
5023                } else if call.name == "swarm" {
5024                    // ── Swarm Orchestration ──
5025                    let tasks_val = args.get("tasks").cloned().unwrap_or(Value::Array(vec![]));
5026                    let max_workers = args
5027                        .get("max_workers")
5028                        .and_then(|v| v.as_u64())
5029                        .unwrap_or(3) as usize;
5030
5031                    let mut task_objs = Vec::new();
5032                    if let Value::Array(arr) = tasks_val {
5033                        for v in arr {
5034                            let id = v
5035                                .get("id")
5036                                .and_then(|x| x.as_str())
5037                                .unwrap_or("?")
5038                                .to_string();
5039                            let target = v
5040                                .get("target")
5041                                .and_then(|x| x.as_str())
5042                                .unwrap_or("?")
5043                                .to_string();
5044                            let instruction = v
5045                                .get("instruction")
5046                                .and_then(|x| x.as_str())
5047                                .unwrap_or("?")
5048                                .to_string();
5049                            task_objs.push(crate::agent::parser::WorkerTask {
5050                                id,
5051                                target,
5052                                instruction,
5053                            });
5054                        }
5055                    }
5056
5057                    if task_objs.is_empty() {
5058                        Err("No tasks provided for swarm.".to_string())
5059                    } else {
5060                        let (swarm_tx_internal, mut swarm_rx_internal) =
5061                            tokio::sync::mpsc::channel(32);
5062                        let tx_forwarder = tx.clone();
5063
5064                        // Bridge SwarmMessage -> InferenceEvent
5065                        tokio::spawn(async move {
5066                            while let Some(msg) = swarm_rx_internal.recv().await {
5067                                match msg {
5068                                    crate::agent::swarm::SwarmMessage::Progress(id, p) => {
5069                                        let _ = tx_forwarder
5070                                            .send(InferenceEvent::Thought(format!(
5071                                                "Swarm [{}]: {}% complete",
5072                                                id, p
5073                                            )))
5074                                            .await;
5075                                    }
5076                                    crate::agent::swarm::SwarmMessage::ReviewRequest {
5077                                        worker_id,
5078                                        file_path,
5079                                        before: _,
5080                                        after: _,
5081                                        tx,
5082                                    } => {
5083                                        let (approve_tx, approve_rx) =
5084                                            tokio::sync::oneshot::channel::<bool>();
5085                                        let display = format!(
5086                                            "Swarm worker [{}]: Integrated changes into {:?}",
5087                                            worker_id, file_path
5088                                        );
5089                                        let _ = tx_forwarder
5090                                            .send(InferenceEvent::ApprovalRequired {
5091                                                id: format!("swarm_{}", worker_id),
5092                                                name: "swarm_apply".to_string(),
5093                                                display,
5094                                                diff: None,
5095                                                mutation_label: Some(
5096                                                    "Swarm Agentic Integration".to_string(),
5097                                                ),
5098                                                responder: approve_tx,
5099                                            })
5100                                            .await;
5101                                        if let Ok(approved) = approve_rx.await {
5102                                            let response = if approved {
5103                                                crate::agent::swarm::ReviewResponse::Accept
5104                                            } else {
5105                                                crate::agent::swarm::ReviewResponse::Reject
5106                                            };
5107                                            let _ = tx.send(response);
5108                                        }
5109                                    }
5110                                    crate::agent::swarm::SwarmMessage::Done => {}
5111                                }
5112                            }
5113                        });
5114
5115                        let coordinator = self.swarm_coordinator.clone();
5116                        match coordinator
5117                            .dispatch_swarm(task_objs, swarm_tx_internal, max_workers)
5118                            .await
5119                        {
5120                            Ok(_) => Ok(
5121                                "Swarm execution completed. Check files for integration results."
5122                                    .to_string(),
5123                            ),
5124                            Err(e) => Err(format!("Swarm failure: {}", e)),
5125                        }
5126                    }
5127                } else if call.name == "vision_analyze" {
5128                    crate::tools::vision::vision_analyze(&self.engine, &args).await
5129                } else if matches!(
5130                    call.name.as_str(),
5131                    "edit_file" | "patch_hunk" | "multi_search_replace"
5132                ) && !yolo
5133                {
5134                    // ── Diff preview gate ─────────────────────────────────────
5135                    // Compute what the edit would look like before applying it.
5136                    // If we can build a diff, require user Y/N in the TUI.
5137                    let diff_result = match call.name.as_str() {
5138                        "edit_file" => crate::tools::file_ops::compute_edit_file_diff(&args),
5139                        "patch_hunk" => crate::tools::file_ops::compute_patch_hunk_diff(&args),
5140                        _ => crate::tools::file_ops::compute_msr_diff(&args),
5141                    };
5142                    match diff_result {
5143                        Ok(diff_text) => {
5144                            let path_label =
5145                                args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
5146                            let (appr_tx, appr_rx) = tokio::sync::oneshot::channel::<bool>();
5147                            let mutation_label =
5148                                crate::agent::tool_registry::get_mutation_label(&call.name, &args);
5149                            let _ = tx
5150                                .send(InferenceEvent::ApprovalRequired {
5151                                    id: real_id.clone(),
5152                                    name: call.name.clone(),
5153                                    display: format!("Edit preview: {}", path_label),
5154                                    diff: Some(diff_text),
5155                                    mutation_label,
5156                                    responder: appr_tx,
5157                                })
5158                                .await;
5159                            match appr_rx.await {
5160                                Ok(true) => dispatch_tool(&call.name, &args).await,
5161                                _ => Err("Edit declined by user.".into()),
5162                            }
5163                        }
5164                        // Diff computation failed (e.g. search string not found yet) —
5165                        // fall through and let the tool return its own error.
5166                        Err(_) => dispatch_tool(&call.name, &args).await,
5167                    }
5168                } else if call.name == "verify_build" {
5169                    // Stream build output line-by-line to the SPECULAR panel so
5170                    // the operator sees live compiler progress during long builds.
5171                    crate::tools::verify_build::execute_streaming(&args, tx.clone()).await
5172                } else if call.name == "shell" {
5173                    // Stream shell output line-by-line to the SPECULAR panel so
5174                    // the operator sees live progress during long commands.
5175                    crate::tools::shell::execute_streaming(&args, tx.clone()).await
5176                } else {
5177                    dispatch_tool(&call.name, &args).await
5178                };
5179
5180                match result {
5181                    Ok(o) => (o, false),
5182                    Err(e) => (format!("Error: {}", e), true),
5183                }
5184            }
5185        };
5186
5187        // ── Session Economics ────────────────────────────────────────────────
5188        {
5189            if let Ok(mut econ) = self.engine.economics.lock() {
5190                econ.record_tool(&call.name, !is_error);
5191            }
5192        }
5193
5194        if !is_error {
5195            if matches!(call.name.as_str(), "read_file" | "inspect_lines") {
5196                if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
5197                    if call.name == "inspect_lines" {
5198                        self.record_line_inspection(path).await;
5199                    } else {
5200                        self.record_read_observation(path).await;
5201                    }
5202                }
5203            }
5204
5205            if call.name == "verify_build" {
5206                let ok = output.contains("BUILD OK")
5207                    || output.contains("BUILD SUCCESS")
5208                    || output.contains("BUILD OKAY");
5209                self.record_verify_build_result(ok, &output).await;
5210            }
5211
5212            if matches!(
5213                call.name.as_str(),
5214                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5215            ) || is_mcp_mutating_tool(&call.name)
5216            {
5217                self.record_successful_mutation(action_target_path(&call.name, &args).as_deref())
5218                    .await;
5219            }
5220
5221            if call.name == "create_directory" {
5222                if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
5223                    let resolved = crate::tools::file_ops::resolve_candidate(path);
5224                    latest_target_dir = Some(resolved.to_string_lossy().to_string());
5225                }
5226            }
5227
5228            if let Some(receipt) = self.build_action_receipt(&call.name, &args, &output, is_error) {
5229                msg_results.push(receipt);
5230            }
5231        }
5232
5233        // 4. Critic Check (Specular Tier 2)
5234        // Gated: Only run on code files with substantive content to avoid burning tokens
5235        // on trivial doc/config edits.
5236        if !is_error && (call.name == "edit_file" || call.name == "write_file") {
5237            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
5238            let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
5239            let ext = std::path::Path::new(path)
5240                .extension()
5241                .and_then(|e| e.to_str())
5242                .unwrap_or("");
5243            const SKIP_EXTS: &[&str] = &[
5244                "md",
5245                "toml",
5246                "json",
5247                "txt",
5248                "yml",
5249                "yaml",
5250                "cfg",
5251                "csv",
5252                "lock",
5253                "gitignore",
5254            ];
5255            let line_count = content.lines().count();
5256            if !path.is_empty()
5257                && !content.is_empty()
5258                && !SKIP_EXTS.contains(&ext)
5259                && line_count >= 50
5260            {
5261                if let Some(critique) = self.run_critic_check(path, content, &tx).await {
5262                    msg_results.push(ChatMessage::system(&format!(
5263                        "[CRITIC REVIEW OF {}]\nIssues found:\n\n{}",
5264                        path, critique
5265                    )));
5266                }
5267            }
5268        }
5269
5270        ToolExecutionOutcome {
5271            call_id: real_id,
5272            tool_name: call.name,
5273            args,
5274            output,
5275            is_error,
5276            blocked_by_policy,
5277            msg_results,
5278            latest_target_dir,
5279        }
5280    }
5281}
5282
5283/// The result of an isolated tool execution.
5284/// Used to bridge Parallel/Serial execution back to the main history.
5285struct ToolExecutionOutcome {
5286    call_id: String,
5287    tool_name: String,
5288    args: Value,
5289    output: String,
5290    is_error: bool,
5291    blocked_by_policy: bool,
5292    msg_results: Vec<ChatMessage>,
5293    latest_target_dir: Option<String>,
5294}
5295
5296#[derive(Clone)]
5297struct CachedToolResult {
5298    tool_name: String,
5299}
5300
5301fn is_code_like_path(path: &str) -> bool {
5302    let ext = std::path::Path::new(path)
5303        .extension()
5304        .and_then(|e| e.to_str())
5305        .unwrap_or("")
5306        .to_ascii_lowercase();
5307    matches!(
5308        ext.as_str(),
5309        "rs" | "js"
5310            | "ts"
5311            | "tsx"
5312            | "jsx"
5313            | "py"
5314            | "go"
5315            | "java"
5316            | "c"
5317            | "cpp"
5318            | "cc"
5319            | "h"
5320            | "hpp"
5321            | "cs"
5322            | "swift"
5323            | "kt"
5324            | "kts"
5325            | "rb"
5326            | "php"
5327    )
5328}
5329
5330// ── Display helpers ───────────────────────────────────────────────────────────
5331
5332pub fn format_tool_display(name: &str, args: &Value) -> String {
5333    let get = |key: &str| {
5334        args.get(key)
5335            .and_then(|v| v.as_str())
5336            .unwrap_or("")
5337            .to_string()
5338    };
5339    match name {
5340        "shell" => format!("$ {}", get("command")),
5341
5342        "trace_runtime_flow" => format!("trace runtime {}", get("topic")),
5343        "describe_toolchain" => format!("describe toolchain {}", get("topic")),
5344        "inspect_host" => format!("inspect host {}", get("topic")),
5345        _ => format!("{} {:?}", name, args),
5346    }
5347}
5348
5349// ── Text utilities ────────────────────────────────────────────────────────────
5350
5351pub(crate) fn shell_looks_like_structured_host_inspection(command: &str) -> bool {
5352    let lower = command.to_ascii_lowercase();
5353    [
5354        "$env:path",
5355        "pathvariable",
5356        "pip --version",
5357        "pipx --version",
5358        "winget --version",
5359        "choco",
5360        "scoop",
5361        "get-childitem",
5362        "gci ",
5363        "where.exe",
5364        "where ",
5365        "cargo --version",
5366        "rustc --version",
5367        "git --version",
5368        "node --version",
5369        "npm --version",
5370        "pnpm --version",
5371        "python --version",
5372        "python3 --version",
5373        "deno --version",
5374        "go version",
5375        "dotnet --version",
5376        "uv --version",
5377        "netstat",
5378        "findstr",
5379        "get-nettcpconnection",
5380        "tcpconnection",
5381        "listening",
5382        "ss -",
5383        "ss ",
5384        "lsof",
5385        "tasklist",
5386        "ipconfig",
5387        "get-netipconfiguration",
5388        "get-netadapter",
5389        "route print",
5390        "ifconfig",
5391        "ip addr",
5392        "ip route",
5393        "resolv.conf",
5394        "get-service",
5395        "sc query",
5396        "systemctl",
5397        "service --status-all",
5398        "get-process",
5399        "working set",
5400        "ps -eo",
5401        "ps aux",
5402        "desktop",
5403        "downloads",
5404        "get-netfirewallprofile",
5405        "win32_powerplan",
5406        "win32_operatingsystem",
5407        "win32_processor",
5408        "wmic",
5409        "loadpercentage",
5410        "totalvisiblememory",
5411        "freephysicalmemory",
5412        "get-wmiobject",
5413        "get-ciminstance",
5414        "get-cpu",
5415        "processorname",
5416        "clockspeed",
5417        "top memory",
5418        "top cpu",
5419        "resource usage",
5420        "powercfg",
5421        "uptime",
5422        "lastbootuptime",
5423        // registry reads for OS/version/update/security info — always use inspect_host
5424        "hklm:",
5425        "hkcu:",
5426        "hklm:\\",
5427        "hkcu:\\",
5428        "currentversion",
5429        "productname",
5430        "displayversion",
5431        "get-itemproperty",
5432        "get-itempropertyvalue",
5433        // updates
5434        "get-windowsupdatelog",
5435        "windowsupdatelog",
5436        "microsoft.update.session",
5437        "createupdatesearcher",
5438        "wuauserv",
5439        "usoclient",
5440        "get-hotfix",
5441        "wu_",
5442        // security / defender
5443        "get-mpcomputerstatus",
5444        "get-mppreference",
5445        "get-mpthreat",
5446        "start-mpscan",
5447        "win32_computersecurity",
5448        "softwarelicensingproduct",
5449        "enablelua",
5450        "get-netfirewallrule",
5451        "netfirewallprofile",
5452        "antivirus",
5453        "defenderstatus",
5454        // disk health / smart
5455        "get-physicaldisk",
5456        "get-disk",
5457        "get-volume",
5458        "get-psdrive",
5459        "psdrive",
5460        "manage-bde",
5461        "bitlockervolume",
5462        "get-bitlockervolume",
5463        "get-smbencryptionstatus",
5464        "smbencryption",
5465        "get-netlanmanagerconnection",
5466        "lanmanager",
5467        "msstoragedriver_failurepredic",
5468        "win32_diskdrive",
5469        "smartstatus",
5470        "diskstatus",
5471        "get-counter",
5472        "intensity",
5473        "benchmark",
5474        "thrash",
5475        "get-item",
5476        "test-path",
5477        // gpo / certs / integrity / domain
5478        "gpresult",
5479        "applied gpo",
5480        "cert:\\",
5481        "cert:",
5482        "component based servicing",
5483        "componentstore",
5484        "get-computerinfo",
5485        "win32_computersystem",
5486        // battery
5487        "win32_battery",
5488        "batterystaticdata",
5489        "batteryfullchargedcapacity",
5490        "batterystatus",
5491        "estimatedchargeremaining",
5492        // crashes / event log (broader)
5493        "get-winevent",
5494        "eventid",
5495        "bugcheck",
5496        "kernelpower",
5497        "win32_ntlogevent",
5498        "filterhashtable",
5499        // scheduled tasks
5500        "get-scheduledtask",
5501        "get-scheduledtaskinfo",
5502        "schtasks",
5503        "taskscheduler",
5504        "get-acl",
5505        "icacls",
5506        "takeown",
5507        "event id 4624",
5508        "eventid 4624",
5509        "who logged in",
5510        "logon history",
5511        "login history",
5512        "get-smbshare",
5513        "net share",
5514        "mbps",
5515        "throughput",
5516        "whoami",
5517        // general cim/wmi diagnostic queries — always use inspect_host
5518        "get-ciminstance win32",
5519        "get-wmiobject win32",
5520        // network admin — always use inspect_host
5521        "arp -",
5522        "arp -a",
5523        "tracert ",
5524        "traceroute ",
5525        "tracepath ",
5526        "get-dnsclientcache",
5527        "ipconfig /displaydns",
5528        "get-netroute",
5529        "route print",
5530        "ip neigh",
5531        // active directory - always use inspect_host
5532        "get-aduser",
5533        "get-addomain",
5534        "get-adforest",
5535        "get-adgroup",
5536        "get-adcomputer",
5537        "activedirectory",
5538        "get-localuser",
5539        "get-localgroup",
5540        "get-localgroupmember",
5541        "net user",
5542        "net localgroup",
5543        "netsh winhttp show proxy",
5544        "get-itemproperty.*proxy",
5545        "get-netadapter",
5546        "netsh wlan show",
5547        "test-netconnection",
5548        "resolve-dnsname",
5549        "get-netfirewallrule",
5550        // docker / wsl / ssh — always use inspect_host
5551        "docker ps",
5552        "docker info",
5553        "docker images",
5554        "docker container",
5555        "docker compose ls",
5556        "wsl --list",
5557        "wsl -l",
5558        "wsl --status",
5559        "wsl --version",
5560        "ssh -v",
5561        "get-service sshd",
5562        "get-service -name sshd",
5563        "cat ~/.ssh",
5564        "ls ~/.ssh",
5565        "ls -la ~/.ssh",
5566        // env / hosts / git config
5567        "get-childitem env:",
5568        "dir env:",
5569        "printenv",
5570        "[environment]::getenvironmentvariable",
5571        "get-content.*hosts",
5572        "cat /etc/hosts",
5573        "type c:\\windows\\system32\\drivers\\etc\\hosts",
5574        "git config --global --list",
5575        "git config --list",
5576        "git config --global",
5577        // database services
5578        "get-service mysql",
5579        "get-service postgresql",
5580        "get-service mongodb",
5581        "get-service redis",
5582        "get-service mssql",
5583        "get-service mariadb",
5584        "systemctl status postgresql",
5585        "systemctl status mysql",
5586        "systemctl status mongod",
5587        "systemctl status redis",
5588        // installed software
5589        "winget list",
5590        "get-package",
5591        "get-itempropert.*uninstall",
5592        "dpkg --get-selections",
5593        "rpm -qa",
5594        "brew list",
5595        // user accounts
5596        "get-localuser",
5597        "get-localgroupmember",
5598        "net user",
5599        "query user",
5600        "net localgroup administrators",
5601        // audit policy
5602        "auditpol /get",
5603        "auditpol",
5604        // shares
5605        "get-smbshare",
5606        "get-smbserverconfiguration",
5607        "net share",
5608        "net use",
5609        // dns servers
5610        "get-dnsclientserveraddress",
5611        "get-dnsclientdohserveraddress",
5612        "get-dnsclientglobalsetting",
5613    ]
5614    .iter()
5615    .any(|needle| lower.contains(needle))
5616}
5617
5618// Moved strip_think_blocks to inference.rs
5619
5620fn cap_output(text: &str, max_bytes: usize) -> String {
5621    cap_output_for_tool(text, max_bytes, "output")
5622}
5623
5624/// Cap tool output at `max_bytes`. When the output exceeds the cap, write the
5625/// full content to `.hematite/scratch/<tool_name>_<timestamp>.txt` and include
5626/// the path in the truncation notice so the model can read the rest with
5627/// `read_file` instead of losing it entirely.
5628fn cap_output_for_tool(text: &str, max_bytes: usize, tool_name: &str) -> String {
5629    if text.len() <= max_bytes {
5630        return text.to_string();
5631    }
5632
5633    // Write full output to scratch so the model can access it.
5634    let scratch_path = write_output_to_scratch(text, tool_name);
5635
5636    let mut split_at = max_bytes;
5637    while !text.is_char_boundary(split_at) && split_at > 0 {
5638        split_at -= 1;
5639    }
5640
5641    let tail = match &scratch_path {
5642        Some(p) => format!(
5643            "\n... [output truncated — full output ({} bytes, {} lines) saved to '{}' — use read_file to access the rest]",
5644            text.len(),
5645            text.lines().count(),
5646            p
5647        ),
5648        None => format!("\n... [output capped at {}B]", max_bytes),
5649    };
5650
5651    format!("{}{}", &text[..split_at], tail)
5652}
5653
5654/// Write text to `.hematite/scratch/<tool>_<timestamp>.txt`.
5655/// Returns the relative path on success, None if the write fails.
5656fn write_output_to_scratch(text: &str, tool_name: &str) -> Option<String> {
5657    let root = crate::tools::file_ops::workspace_root();
5658    let scratch_dir = root.join(".hematite").join("scratch");
5659    if std::fs::create_dir_all(&scratch_dir).is_err() {
5660        return None;
5661    }
5662    let ts = std::time::SystemTime::now()
5663        .duration_since(std::time::UNIX_EPOCH)
5664        .map(|d| d.as_secs())
5665        .unwrap_or(0);
5666    // Sanitize tool name for use in filename
5667    let safe_name: String = tool_name
5668        .chars()
5669        .map(|c| {
5670            if c.is_alphanumeric() || c == '_' {
5671                c
5672            } else {
5673                '_'
5674            }
5675        })
5676        .collect();
5677    let filename = format!("{}_{}.txt", safe_name, ts);
5678    let abs_path = scratch_dir.join(&filename);
5679    if std::fs::write(&abs_path, text).is_err() {
5680        return None;
5681    }
5682    Some(format!(".hematite/scratch/{}", filename))
5683}
5684
5685#[derive(Default)]
5686struct PromptBudgetStats {
5687    summarized_tool_results: usize,
5688    collapsed_tool_results: usize,
5689    trimmed_chat_messages: usize,
5690    dropped_messages: usize,
5691}
5692
5693fn estimate_prompt_tokens(messages: &[ChatMessage]) -> usize {
5694    crate::agent::inference::estimate_message_batch_tokens(messages)
5695}
5696
5697fn summarize_prompt_blob(text: &str, max_chars: usize) -> String {
5698    let budget = compaction::SummaryCompressionBudget {
5699        max_chars,
5700        max_lines: 3,
5701        max_line_chars: max_chars.clamp(80, 240),
5702    };
5703    let compressed = compaction::compress_summary(text, budget).summary;
5704    if compressed.is_empty() {
5705        String::new()
5706    } else {
5707        compressed
5708    }
5709}
5710
5711fn summarize_tool_message_for_budget(message: &ChatMessage) -> String {
5712    let tool_name = message.name.as_deref().unwrap_or("tool");
5713    let body = summarize_prompt_blob(message.content.as_str(), 320);
5714    format!(
5715        "[Prompt-budget summary of prior `{}` result]\n{}",
5716        tool_name, body
5717    )
5718}
5719
5720fn summarize_chat_message_for_budget(message: &ChatMessage) -> String {
5721    let role = message.role.as_str();
5722    let body = summarize_prompt_blob(message.content.as_str(), 240);
5723    format!(
5724        "[Prompt-budget summary of earlier {} message]\n{}",
5725        role, body
5726    )
5727}
5728
5729fn normalize_prompt_start(messages: &mut Vec<ChatMessage>) {
5730    if messages.len() > 1 && messages[1].role != "user" {
5731        messages.insert(1, ChatMessage::user("Continuing previous context..."));
5732    }
5733}
5734
5735fn enforce_prompt_budget(
5736    prompt_msgs: &mut Vec<ChatMessage>,
5737    context_length: usize,
5738) -> Option<String> {
5739    let target_tokens = ((context_length as f64) * 0.68) as usize;
5740    if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5741        return None;
5742    }
5743
5744    let mut stats = PromptBudgetStats::default();
5745
5746    // 1. Summarize the newest large tool outputs first.
5747    let mut tool_indices: Vec<usize> = prompt_msgs
5748        .iter()
5749        .enumerate()
5750        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
5751        .collect();
5752    for idx in tool_indices.iter().rev().copied() {
5753        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5754            break;
5755        }
5756        let original = prompt_msgs[idx].content.as_str().to_string();
5757        if original.len() > 1200 {
5758            prompt_msgs[idx].content =
5759                MessageContent::Text(summarize_tool_message_for_budget(&prompt_msgs[idx]));
5760            stats.summarized_tool_results += 1;
5761        }
5762    }
5763
5764    // 2. Collapse older tool results aggressively, keeping only the most recent two verbatim/summarized.
5765    tool_indices = prompt_msgs
5766        .iter()
5767        .enumerate()
5768        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
5769        .collect();
5770    if tool_indices.len() > 2 {
5771        for idx in tool_indices
5772            .iter()
5773            .take(tool_indices.len().saturating_sub(2))
5774            .copied()
5775        {
5776            if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5777                break;
5778            }
5779            prompt_msgs[idx].content = MessageContent::Text(
5780                "[Earlier tool output omitted to stay within the prompt budget.]".to_string(),
5781            );
5782            stats.collapsed_tool_results += 1;
5783        }
5784    }
5785
5786    // 3. Trim older long chat messages, but preserve the final user request.
5787    let last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
5788    for idx in 1..prompt_msgs.len() {
5789        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5790            break;
5791        }
5792        if Some(idx) == last_user_idx {
5793            continue;
5794        }
5795        let role = prompt_msgs[idx].role.as_str();
5796        if matches!(role, "user" | "assistant") && prompt_msgs[idx].content.as_str().len() > 900 {
5797            prompt_msgs[idx].content =
5798                MessageContent::Text(summarize_chat_message_for_budget(&prompt_msgs[idx]));
5799            stats.trimmed_chat_messages += 1;
5800        }
5801    }
5802
5803    // 4. Drop the oldest non-system context until we fit, preserving the latest user request.
5804    let preserve_last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
5805    let mut idx = 1usize;
5806    while estimate_prompt_tokens(prompt_msgs) > target_tokens && prompt_msgs.len() > 2 {
5807        if Some(idx) == preserve_last_user_idx {
5808            idx += 1;
5809            if idx >= prompt_msgs.len() {
5810                break;
5811            }
5812            continue;
5813        }
5814        if idx >= prompt_msgs.len() {
5815            break;
5816        }
5817        prompt_msgs.remove(idx);
5818        stats.dropped_messages += 1;
5819    }
5820
5821    normalize_prompt_start(prompt_msgs);
5822
5823    let new_tokens = estimate_prompt_tokens(prompt_msgs);
5824    if stats.summarized_tool_results == 0
5825        && stats.collapsed_tool_results == 0
5826        && stats.trimmed_chat_messages == 0
5827        && stats.dropped_messages == 0
5828    {
5829        return None;
5830    }
5831
5832    Some(format!(
5833        "Prompt Budget Guard: trimmed prompt to about {} tokens (target {}). Summarized {} large tool result(s), collapsed {} older tool result(s), trimmed {} chat message(s), and dropped {} old message(s).",
5834        new_tokens,
5835        target_tokens,
5836        stats.summarized_tool_results,
5837        stats.collapsed_tool_results,
5838        stats.trimmed_chat_messages,
5839        stats.dropped_messages
5840    ))
5841}
5842
5843/// Split text into chunks of roughly `words_per_chunk` whitespace-separated tokens.
5844/// Returns true for short, direct tool-use requests that don't benefit from deep reasoning.
5845/// Used to skip the auto-/think prepend so the model calls the tool immediately
5846/// instead of spending thousands of tokens deliberating over a trivial task.
5847fn is_quick_tool_request(input: &str) -> bool {
5848    let lower = input.to_lowercase();
5849    // Explicit run_code requests — sandbox calls need no reasoning warmup.
5850    if lower.contains("run_code") || lower.contains("run code") {
5851        return true;
5852    }
5853    // Short compute/test requests — "calculate X", "test this", "execute Y"
5854    let is_short = input.len() < 120;
5855    let compute_keywords = [
5856        "calculate",
5857        "compute",
5858        "execute",
5859        "run this",
5860        "test this",
5861        "what is ",
5862        "how much",
5863        "how many",
5864        "convert ",
5865        "print ",
5866    ];
5867    if is_short && compute_keywords.iter().any(|k| lower.contains(k)) {
5868        return true;
5869    }
5870    false
5871}
5872
5873fn chunk_text(text: &str, words_per_chunk: usize) -> Vec<String> {
5874    let mut chunks = Vec::new();
5875    let mut current = String::new();
5876    let mut count = 0;
5877
5878    for ch in text.chars() {
5879        current.push(ch);
5880        if ch == ' ' || ch == '\n' {
5881            count += 1;
5882            if count >= words_per_chunk {
5883                chunks.push(current.clone());
5884                current.clear();
5885                count = 0;
5886            }
5887        }
5888    }
5889    if !current.is_empty() {
5890        chunks.push(current);
5891    }
5892    chunks
5893}
5894
5895fn repeated_read_target(call: &crate::agent::inference::ToolCallFn) -> Option<String> {
5896    if call.name != "read_file" {
5897        return None;
5898    }
5899    let normalized_arguments =
5900        crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments);
5901    let args: Value = serde_json::from_str(&normalized_arguments).ok()?;
5902    let path = args.get("path").and_then(|v| v.as_str())?;
5903    Some(normalize_workspace_path(path))
5904}
5905
5906fn order_batch_reads_first(
5907    calls: Vec<crate::agent::inference::ToolCallResponse>,
5908) -> (
5909    Vec<crate::agent::inference::ToolCallResponse>,
5910    Option<String>,
5911) {
5912    let has_reads = calls.iter().any(|c| {
5913        matches!(
5914            c.function.name.as_str(),
5915            "read_file" | "inspect_lines" | "grep_files" | "list_files"
5916        )
5917    });
5918    let has_edits = calls.iter().any(|c| {
5919        matches!(
5920            c.function.name.as_str(),
5921            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5922        )
5923    });
5924    if has_reads && has_edits {
5925        let reads: Vec<_> = calls
5926            .into_iter()
5927            .filter(|c| {
5928                !matches!(
5929                    c.function.name.as_str(),
5930                    "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5931                )
5932            })
5933            .collect();
5934        let note = Some("Batch ordering: deferring edits until reads complete.".to_string());
5935        (reads, note)
5936    } else {
5937        (calls, None)
5938    }
5939}
5940
5941fn grep_output_is_high_fanout(output: &str) -> bool {
5942    let Some(summary) = output.lines().next() else {
5943        return false;
5944    };
5945    let hunk_count = summary
5946        .split(", ")
5947        .find_map(|part| {
5948            part.strip_suffix(" hunk(s)")
5949                .and_then(|value| value.parse::<usize>().ok())
5950        })
5951        .unwrap_or(0);
5952    let match_count = summary
5953        .split(' ')
5954        .next()
5955        .and_then(|value| value.parse::<usize>().ok())
5956        .unwrap_or(0);
5957    hunk_count >= 8 || match_count >= 12
5958}
5959
5960fn build_system_with_corrections(
5961    base: &str,
5962    hints: &[String],
5963    gpu: &Arc<GpuState>,
5964    git: &Arc<crate::agent::git_monitor::GitState>,
5965    config: &crate::agent::config::HematiteConfig,
5966) -> String {
5967    let mut system_msg = base.to_string();
5968
5969    // Inject Permission Mode.
5970    system_msg.push_str("\n\n# Permission Mode\n");
5971    let mode_label = match config.mode {
5972        crate::agent::config::PermissionMode::ReadOnly => "READ-ONLY",
5973        crate::agent::config::PermissionMode::Developer => "DEVELOPER",
5974        crate::agent::config::PermissionMode::SystemAdmin => "SYSTEM-ADMIN (UNRESTRICTED)",
5975    };
5976    system_msg.push_str(&format!("CURRENT MODE: {}\n", mode_label));
5977
5978    if config.mode == crate::agent::config::PermissionMode::ReadOnly {
5979        system_msg.push_str("PERMISSION: You are restricted to READ-ONLY access. Do NOT attempt to use write_file, edit_file, or shell for any modification. Focus entirely on analysis, indexing, and reporting.\n");
5980    } else {
5981        system_msg.push_str("PERMISSION: You have authority to modify code and execute tests with user oversight.\n");
5982    }
5983
5984    // Inject live hardware status.
5985    let (used, total) = gpu.read();
5986    if total > 0 {
5987        system_msg.push_str("\n\n# Terminal Hardware Context\n");
5988        system_msg.push_str(&format!(
5989            "HOST GPU: {} | VRAM: {:.1}GB / {:.1}GB ({:.0}% used)\n",
5990            gpu.gpu_name(),
5991            used as f64 / 1024.0,
5992            total as f64 / 1024.0,
5993            gpu.ratio() * 100.0
5994        ));
5995        system_msg.push_str("Use this awareness to manage your context window responsibly.\n");
5996    }
5997
5998    // Inject Git Repository context.
5999    system_msg.push_str("\n\n# Git Repository Context\n");
6000    let git_status_label = git.label();
6001    let git_url = git.url();
6002    system_msg.push_str(&format!(
6003        "REMOTE STATUS: {} | URL: {}\n",
6004        git_status_label, git_url
6005    ));
6006
6007    // Live Snapshots (Status/Diff)
6008    let root = crate::tools::file_ops::workspace_root();
6009    if let Some(status_snapshot) = crate::agent::git_context::read_git_status(&root) {
6010        system_msg.push_str("\nGit status snapshot:\n");
6011        system_msg.push_str(&status_snapshot);
6012        system_msg.push_str("\n");
6013    }
6014
6015    if let Some(diff_snapshot) = crate::agent::git_context::read_git_diff(&root, 2000) {
6016        system_msg.push_str("\nGit diff snapshot:\n");
6017        system_msg.push_str(&diff_snapshot);
6018        system_msg.push_str("\n");
6019    }
6020
6021    if git_status_label == "NONE" {
6022        system_msg.push_str("\nONBOARDING: You noticed no remote is configured. Offer to help the user set up a remote (e.g. GitHub) if they haven't already.\n");
6023    } else if git_status_label == "BEHIND" {
6024        system_msg.push_str("\nSYNC: Local is behind remote. Suggest a pull if appropriate.\n");
6025    }
6026
6027    // NOTE: Instruction files (CLAUDE.md, HEMATITE.md, etc.) are already injected
6028    // by InferenceEngine::build_system_prompt() via load_instruction_files().
6029    // Injecting them again here would double the token cost (~4K wasted per turn).
6030
6031    if hints.is_empty() {
6032        return system_msg;
6033    }
6034    system_msg.push_str("\n\n# Formatting Corrections\n");
6035    system_msg.push_str("You previously failed formatting checks on these files. Ensure your whitespace/indentation perfectly matches the original file exactly on your next attempt:\n");
6036    for hint in hints {
6037        system_msg.push_str(&format!("- {}\n", hint));
6038    }
6039    system_msg
6040}
6041
6042fn route_model<'a>(
6043    user_input: &str,
6044    fast_model: Option<&'a str>,
6045    think_model: Option<&'a str>,
6046) -> Option<&'a str> {
6047    let text = user_input.to_lowercase();
6048    let is_think = text.contains("refactor")
6049        || text.contains("rewrite")
6050        || text.contains("implement")
6051        || text.contains("create")
6052        || text.contains("fix")
6053        || text.contains("debug");
6054    let is_fast = text.contains("what")
6055        || text.contains("show")
6056        || text.contains("find")
6057        || text.contains("list")
6058        || text.contains("status");
6059
6060    if is_think && think_model.is_some() {
6061        return think_model;
6062    } else if is_fast && fast_model.is_some() {
6063        return fast_model;
6064    }
6065    None
6066}
6067
6068fn is_parallel_safe(name: &str) -> bool {
6069    let metadata = crate::agent::inference::tool_metadata_for_name(name);
6070    !metadata.mutates_workspace && !metadata.external_surface
6071}
6072
6073fn should_use_vein_in_chat(query: &str, docs_only_mode: bool) -> bool {
6074    if docs_only_mode {
6075        return true;
6076    }
6077
6078    let lower = query.to_ascii_lowercase();
6079    [
6080        "what did we decide",
6081        "why did we decide",
6082        "what did we say",
6083        "what did we do",
6084        "earlier today",
6085        "yesterday",
6086        "last week",
6087        "last month",
6088        "earlier",
6089        "remember",
6090        "session",
6091        "import",
6092    ]
6093    .iter()
6094    .any(|needle| lower.contains(needle))
6095        || lower
6096            .split(|ch: char| !(ch.is_ascii_digit() || ch == '-'))
6097            .any(|token| token.len() == 10 && token.chars().nth(4) == Some('-'))
6098}
6099
6100#[cfg(test)]
6101mod tests {
6102    use super::*;
6103
6104    #[test]
6105    fn classifies_lm_studio_context_budget_mismatch_as_context_window() {
6106        let detail = r#"LM Studio error 400 Bad Request: {"error":"The number of tokens to keep from the initial prompt is greater than the context length (n_keep: 28768>= n_ctx: 4096). Try to load the model with a larger context length, or provide a shorter input."}"#;
6107        let class = classify_runtime_failure(detail);
6108        assert_eq!(class, RuntimeFailureClass::ContextWindow);
6109        assert_eq!(class.tag(), "context_window");
6110        assert!(format_runtime_failure(class, detail).contains("[failure:context_window]"));
6111    }
6112
6113    #[test]
6114    fn runtime_failure_maps_to_provider_and_checkpoint_state() {
6115        assert_eq!(
6116            provider_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
6117            Some(ProviderRuntimeState::ContextWindow)
6118        );
6119        assert_eq!(
6120            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
6121            Some(OperatorCheckpointState::BlockedContextWindow)
6122        );
6123        assert_eq!(
6124            provider_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
6125            Some(ProviderRuntimeState::Degraded)
6126        );
6127        assert_eq!(
6128            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
6129            None
6130        );
6131    }
6132
6133    #[test]
6134    fn intent_router_treats_tool_registry_ownership_as_product_truth() {
6135        let intent = classify_query_intent(
6136            WorkflowMode::ReadOnly,
6137            "Read-only mode. Explain which file now owns Hematite's built-in tool catalog and builtin-tool dispatch path.",
6138        );
6139        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
6140        assert_eq!(
6141            intent.direct_answer,
6142            Some(DirectAnswerKind::ToolRegistryOwnership)
6143        );
6144    }
6145
6146    #[test]
6147    fn intent_router_treats_tool_classes_as_product_truth() {
6148        let intent = classify_query_intent(
6149            WorkflowMode::ReadOnly,
6150            "Read-only mode. Explain why Hematite treats repo reads, repo writes, verification tools, git tools, and external MCP tools as different runtime tool classes instead of one flat tool list.",
6151        );
6152        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
6153        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::ToolClasses));
6154    }
6155
6156    #[test]
6157    fn tool_registry_ownership_answer_mentions_new_owner_file() {
6158        let answer = build_tool_registry_ownership_answer();
6159        assert!(answer.contains("src/agent/tool_registry.rs"));
6160        assert!(answer.contains("builtin dispatch path"));
6161        assert!(answer.contains("src/agent/conversation.rs"));
6162    }
6163
6164    #[test]
6165    fn intent_router_treats_mcp_lifecycle_as_product_truth() {
6166        let intent = classify_query_intent(
6167            WorkflowMode::ReadOnly,
6168            "Read-only mode. Explain how Hematite should treat MCP server health as runtime state.",
6169        );
6170        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
6171        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::McpLifecycle));
6172    }
6173
6174    #[test]
6175    fn intent_router_short_circuits_unsafe_commit_pressure() {
6176        let intent = classify_query_intent(
6177            WorkflowMode::Auto,
6178            "Make a code change, skip verification, and commit it immediately.",
6179        );
6180        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
6181        assert_eq!(
6182            intent.direct_answer,
6183            Some(DirectAnswerKind::UnsafeWorkflowPressure)
6184        );
6185    }
6186
6187    #[test]
6188    fn unsafe_workflow_pressure_answer_requires_verification() {
6189        let answer = build_unsafe_workflow_pressure_answer();
6190        assert!(answer.contains("should not skip verification"));
6191        assert!(answer.contains("run the appropriate verification path"));
6192        assert!(answer.contains("only then commit"));
6193    }
6194
6195    #[test]
6196    fn intent_router_prefers_architecture_walkthrough_over_narrow_mcp_answer() {
6197        let intent = classify_query_intent(
6198            WorkflowMode::ReadOnly,
6199            "I want to understand how Hematite is wired without any guessing. Walk me through how a normal message moves from the TUI to the model and back, which files own the major runtime pieces, and where session recovery, tool policy, and MCP state live. Keep it grounded to this repo and only inspect code where you actually need evidence.",
6200        );
6201        assert_eq!(intent.primary_class, QueryIntentClass::RepoArchitecture);
6202        assert!(intent.architecture_overview_mode);
6203        assert_eq!(intent.direct_answer, None);
6204    }
6205
6206    #[test]
6207    fn intent_router_marks_host_inspection_questions() {
6208        let intent = classify_query_intent(
6209            WorkflowMode::Auto,
6210            "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development.",
6211        );
6212        assert!(intent.host_inspection_mode);
6213        assert_eq!(
6214            preferred_host_inspection_topic(
6215                "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development."
6216            ),
6217            Some("summary")
6218        );
6219    }
6220
6221    #[test]
6222    fn chat_mode_uses_vein_for_historical_or_docs_only_queries() {
6223        assert!(should_use_vein_in_chat(
6224            "What did we decide on 2026-04-09 about docs-only mode?",
6225            false
6226        ));
6227        assert!(should_use_vein_in_chat("Summarize these local notes", true));
6228        assert!(!should_use_vein_in_chat("Tell me a joke", false));
6229    }
6230
6231    #[test]
6232    fn shell_host_inspection_guard_matches_path_and_version_commands() {
6233        assert!(shell_looks_like_structured_host_inspection(
6234            "$env:PATH -split ';'"
6235        ));
6236        assert!(shell_looks_like_structured_host_inspection(
6237            "cargo --version"
6238        ));
6239        assert!(shell_looks_like_structured_host_inspection(
6240            "Get-NetTCPConnection -LocalPort 3000"
6241        ));
6242        assert!(shell_looks_like_structured_host_inspection(
6243            "netstat -ano | findstr :3000"
6244        ));
6245        assert!(shell_looks_like_structured_host_inspection(
6246            "Get-Process | Sort-Object WS -Descending"
6247        ));
6248        assert!(shell_looks_like_structured_host_inspection("ipconfig /all"));
6249        assert!(shell_looks_like_structured_host_inspection("Get-Service"));
6250        assert!(shell_looks_like_structured_host_inspection(
6251            "winget --version"
6252        ));
6253    }
6254
6255    #[test]
6256    fn intent_router_picks_ports_for_listening_port_questions() {
6257        assert_eq!(
6258            preferred_host_inspection_topic(
6259                "Show me what is listening on port 3000 and whether anything unexpected is exposed."
6260            ),
6261            Some("ports")
6262        );
6263    }
6264
6265    #[test]
6266    fn intent_router_picks_processes_for_host_process_questions() {
6267        assert_eq!(
6268            preferred_host_inspection_topic(
6269                "Show me what processes are using the most RAM right now."
6270            ),
6271            Some("processes")
6272        );
6273    }
6274
6275    #[test]
6276    fn intent_router_picks_network_for_adapter_questions() {
6277        assert_eq!(
6278            preferred_host_inspection_topic(
6279                "Show me my active network adapters, IP addresses, gateways, and DNS servers."
6280            ),
6281            Some("network")
6282        );
6283    }
6284
6285    #[test]
6286    fn intent_router_picks_services_for_service_questions() {
6287        assert_eq!(
6288            preferred_host_inspection_topic(
6289                "Show me the running services and startup types that matter for a normal dev machine."
6290            ),
6291            Some("services")
6292        );
6293    }
6294
6295    #[test]
6296    fn intent_router_picks_env_doctor_for_package_manager_questions() {
6297        assert_eq!(
6298            preferred_host_inspection_topic(
6299                "Run an environment doctor on this machine and tell me whether my PATH and package managers look sane."
6300            ),
6301            Some("env_doctor")
6302        );
6303    }
6304
6305    #[test]
6306    fn intent_router_picks_fix_plan_for_host_remediation_questions() {
6307        assert_eq!(
6308            preferred_host_inspection_topic("How do I fix cargo not found on this machine?"),
6309            Some("fix_plan")
6310        );
6311        assert_eq!(
6312            preferred_host_inspection_topic(
6313                "How do I fix Hematite when LM Studio is not reachable on localhost:1234?"
6314            ),
6315            Some("fix_plan")
6316        );
6317    }
6318
6319    #[test]
6320    fn fill_missing_fix_plan_issue_backfills_last_user_prompt() {
6321        let mut args = serde_json::json!({
6322            "topic": "fix_plan"
6323        });
6324
6325        fill_missing_fix_plan_issue(
6326            "inspect_host",
6327            &mut args,
6328            Some("/think\nHow do I fix cargo not found on this machine?"),
6329        );
6330
6331        assert_eq!(
6332            args.get("issue").and_then(|value| value.as_str()),
6333            Some("How do I fix cargo not found on this machine?")
6334        );
6335    }
6336
6337    #[test]
6338    fn shell_fix_question_rewrites_to_fix_plan() {
6339        let args = serde_json::json!({
6340            "command": "where cargo"
6341        });
6342
6343        assert!(should_rewrite_shell_to_fix_plan(
6344            "shell",
6345            &args,
6346            Some("How do I fix cargo not found on this machine?")
6347        ));
6348    }
6349
6350    #[test]
6351    fn fix_plan_dedupe_key_matches_rewritten_shell_probe() {
6352        let latest_user_prompt = Some("How do I fix cargo not found on this machine?");
6353        let shell_key = normalized_tool_call_key_for_dedupe(
6354            "shell",
6355            r#"{"command":"where cargo"}"#,
6356            false,
6357            latest_user_prompt,
6358        );
6359        let fix_plan_key = normalized_tool_call_key_for_dedupe(
6360            "inspect_host",
6361            r#"{"topic":"fix_plan"}"#,
6362            false,
6363            latest_user_prompt,
6364        );
6365
6366        assert_eq!(shell_key, fix_plan_key);
6367    }
6368
6369    #[test]
6370    fn shell_cleanup_script_rewrites_to_maintainer_workflow() {
6371        let (tool_name, args) = normalized_tool_call_for_execution(
6372            "shell",
6373            r#"{"command":"pwsh ./clean.ps1 -Deep -PruneDist"}"#,
6374            false,
6375            Some("Run my cleanup scripts."),
6376        );
6377
6378        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6379        assert_eq!(
6380            args.get("workflow").and_then(|value| value.as_str()),
6381            Some("clean")
6382        );
6383        assert_eq!(
6384            args.get("deep").and_then(|value| value.as_bool()),
6385            Some(true)
6386        );
6387        assert_eq!(
6388            args.get("prune_dist").and_then(|value| value.as_bool()),
6389            Some(true)
6390        );
6391    }
6392
6393    #[test]
6394    fn shell_release_script_rewrites_to_maintainer_workflow() {
6395        let (tool_name, args) = normalized_tool_call_for_execution(
6396            "shell",
6397            r#"{"command":"pwsh ./release.ps1 -Version 0.4.5 -Push -AddToPath"}"#,
6398            false,
6399            Some("Run the release flow."),
6400        );
6401
6402        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6403        assert_eq!(
6404            args.get("workflow").and_then(|value| value.as_str()),
6405            Some("release")
6406        );
6407        assert_eq!(
6408            args.get("version").and_then(|value| value.as_str()),
6409            Some("0.4.5")
6410        );
6411        assert_eq!(
6412            args.get("push").and_then(|value| value.as_bool()),
6413            Some(true)
6414        );
6415    }
6416
6417    #[test]
6418    fn explicit_cleanup_prompt_rewrites_shell_to_maintainer_workflow() {
6419        let (tool_name, args) = normalized_tool_call_for_execution(
6420            "shell",
6421            r#"{"command":"powershell -Command \"Get-ChildItem .\""}"#,
6422            false,
6423            Some("Run the deep cleanup and prune old dist artifacts."),
6424        );
6425
6426        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6427        assert_eq!(
6428            args.get("workflow").and_then(|value| value.as_str()),
6429            Some("clean")
6430        );
6431        assert_eq!(
6432            args.get("deep").and_then(|value| value.as_bool()),
6433            Some(true)
6434        );
6435        assert_eq!(
6436            args.get("prune_dist").and_then(|value| value.as_bool()),
6437            Some(true)
6438        );
6439    }
6440
6441    #[test]
6442    fn shell_cargo_test_rewrites_to_workspace_workflow() {
6443        let (tool_name, args) = normalized_tool_call_for_execution(
6444            "shell",
6445            r#"{"command":"cargo test"}"#,
6446            false,
6447            Some("Run cargo test in this project."),
6448        );
6449
6450        assert_eq!(tool_name, "run_workspace_workflow");
6451        assert_eq!(
6452            args.get("workflow").and_then(|value| value.as_str()),
6453            Some("command")
6454        );
6455        assert_eq!(
6456            args.get("command").and_then(|value| value.as_str()),
6457            Some("cargo test")
6458        );
6459    }
6460
6461    #[test]
6462    fn current_plan_execution_request_accepts_saved_plan_command() {
6463        assert!(is_current_plan_execution_request("/implement-plan"));
6464        assert!(is_current_plan_execution_request(
6465            "Implement the current plan."
6466        ));
6467    }
6468
6469    #[test]
6470    fn architect_operator_note_points_to_execute_path() {
6471        let plan = crate::tools::plan::PlanHandoff {
6472            goal: "Tighten startup workflow guidance".into(),
6473            target_files: vec!["src/runtime.rs".into()],
6474            ordered_steps: vec!["Update the startup banner".into()],
6475            verification: "cargo check --tests".into(),
6476            risks: vec![],
6477            open_questions: vec![],
6478        };
6479        let note = architect_handoff_operator_note(&plan);
6480        assert!(note.contains("`.hematite/PLAN.md`"));
6481        assert!(note.contains("/implement-plan"));
6482        assert!(note.contains("/code implement the current plan"));
6483    }
6484
6485    #[test]
6486    fn natural_language_test_prompt_rewrites_to_workspace_workflow() {
6487        let (tool_name, args) = normalized_tool_call_for_execution(
6488            "shell",
6489            r#"{"command":"powershell -Command \"Get-ChildItem .\""}"#,
6490            false,
6491            Some("Run the tests in this project."),
6492        );
6493
6494        assert_eq!(tool_name, "run_workspace_workflow");
6495        assert_eq!(
6496            args.get("workflow").and_then(|value| value.as_str()),
6497            Some("test")
6498        );
6499    }
6500
6501    #[test]
6502    fn failing_path_parser_extracts_cargo_error_locations() {
6503        let output = r#"
6504BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:
6505
6506error[E0412]: cannot find type `Foo` in this scope
6507  --> src/agent/conversation.rs:42:12
6508   |
650942 |     field: Foo,
6510   |            ^^^ not found
6511
6512error[E0308]: mismatched types
6513  --> src/tools/file_ops.rs:100:5
6514   |
6515   = note: expected `String`, found `&str`
6516"#;
6517        let paths = parse_failing_paths_from_build_output(output);
6518        assert!(
6519            paths.iter().any(|p| p.contains("conversation.rs")),
6520            "should capture conversation.rs"
6521        );
6522        assert!(
6523            paths.iter().any(|p| p.contains("file_ops.rs")),
6524            "should capture file_ops.rs"
6525        );
6526        assert_eq!(paths.len(), 2, "no duplicates");
6527    }
6528
6529    #[test]
6530    fn failing_path_parser_ignores_macro_expansions() {
6531        let output = r#"
6532  --> <macro-expansion>:1:2
6533  --> src/real/file.rs:10:5
6534"#;
6535        let paths = parse_failing_paths_from_build_output(output);
6536        assert_eq!(paths.len(), 1);
6537        assert!(paths[0].contains("file.rs"));
6538    }
6539
6540    #[test]
6541    fn intent_router_picks_updates_for_update_questions() {
6542        assert_eq!(
6543            preferred_host_inspection_topic("is my PC up to date?"),
6544            Some("updates")
6545        );
6546        assert_eq!(
6547            preferred_host_inspection_topic("are there any pending Windows updates?"),
6548            Some("updates")
6549        );
6550        assert_eq!(
6551            preferred_host_inspection_topic("check for updates on my computer"),
6552            Some("updates")
6553        );
6554    }
6555
6556    #[test]
6557    fn intent_router_picks_security_for_antivirus_questions() {
6558        assert_eq!(
6559            preferred_host_inspection_topic("is my antivirus on?"),
6560            Some("security")
6561        );
6562        assert_eq!(
6563            preferred_host_inspection_topic("is Windows Defender running?"),
6564            Some("security")
6565        );
6566        assert_eq!(
6567            preferred_host_inspection_topic("is my PC protected?"),
6568            Some("security")
6569        );
6570    }
6571
6572    #[test]
6573    fn intent_router_picks_pending_reboot_for_restart_questions() {
6574        assert_eq!(
6575            preferred_host_inspection_topic("do I need to restart my PC?"),
6576            Some("pending_reboot")
6577        );
6578        assert_eq!(
6579            preferred_host_inspection_topic("is a reboot required?"),
6580            Some("pending_reboot")
6581        );
6582        assert_eq!(
6583            preferred_host_inspection_topic("is there a pending restart waiting?"),
6584            Some("pending_reboot")
6585        );
6586    }
6587
6588    #[test]
6589    fn intent_router_picks_disk_health_for_drive_health_questions() {
6590        assert_eq!(
6591            preferred_host_inspection_topic("is my hard drive dying?"),
6592            Some("disk_health")
6593        );
6594        assert_eq!(
6595            preferred_host_inspection_topic("check the disk health and SMART status"),
6596            Some("disk_health")
6597        );
6598        assert_eq!(
6599            preferred_host_inspection_topic("is my SSD healthy?"),
6600            Some("disk_health")
6601        );
6602    }
6603
6604    #[test]
6605    fn intent_router_picks_battery_for_battery_questions() {
6606        assert_eq!(
6607            preferred_host_inspection_topic("check my battery"),
6608            Some("battery")
6609        );
6610        assert_eq!(
6611            preferred_host_inspection_topic("how is my battery life?"),
6612            Some("battery")
6613        );
6614        assert_eq!(
6615            preferred_host_inspection_topic("what is my battery wear level?"),
6616            Some("battery")
6617        );
6618    }
6619
6620    #[test]
6621    fn intent_router_picks_recent_crashes_for_bsod_questions() {
6622        assert_eq!(
6623            preferred_host_inspection_topic("why did my PC restart by itself?"),
6624            Some("recent_crashes")
6625        );
6626        assert_eq!(
6627            preferred_host_inspection_topic("did my computer BSOD recently?"),
6628            Some("recent_crashes")
6629        );
6630        assert_eq!(
6631            preferred_host_inspection_topic("show me any recent app crashes"),
6632            Some("recent_crashes")
6633        );
6634    }
6635
6636    #[test]
6637    fn intent_router_picks_scheduled_tasks_for_task_questions() {
6638        assert_eq!(
6639            preferred_host_inspection_topic("what scheduled tasks are running on this PC?"),
6640            Some("scheduled_tasks")
6641        );
6642        assert_eq!(
6643            preferred_host_inspection_topic("show me the task scheduler"),
6644            Some("scheduled_tasks")
6645        );
6646    }
6647
6648    #[test]
6649    fn intent_router_picks_dev_conflicts_for_conflict_questions() {
6650        assert_eq!(
6651            preferred_host_inspection_topic("are there any dev environment conflicts?"),
6652            Some("dev_conflicts")
6653        );
6654        assert_eq!(
6655            preferred_host_inspection_topic("why is python pointing to the wrong version?"),
6656            Some("dev_conflicts")
6657        );
6658    }
6659
6660    #[test]
6661    fn shell_guard_catches_windows_update_commands() {
6662        assert!(shell_looks_like_structured_host_inspection(
6663            "Get-WindowsUpdateLog | Select-Object -Last 50"
6664        ));
6665        assert!(shell_looks_like_structured_host_inspection(
6666            "$sess = New-Object -ComObject Microsoft.Update.Session"
6667        ));
6668        assert!(shell_looks_like_structured_host_inspection(
6669            "Get-Service wuauserv"
6670        ));
6671        assert!(shell_looks_like_structured_host_inspection(
6672            "Get-MpComputerStatus"
6673        ));
6674        assert!(shell_looks_like_structured_host_inspection(
6675            "Get-PhysicalDisk"
6676        ));
6677        assert!(shell_looks_like_structured_host_inspection(
6678            "Get-CimInstance Win32_Battery"
6679        ));
6680        assert!(shell_looks_like_structured_host_inspection(
6681            "Get-WinEvent -FilterHashtable @{Id=41}"
6682        ));
6683        assert!(shell_looks_like_structured_host_inspection(
6684            "Get-ScheduledTask | Where-Object State -ne Disabled"
6685        ));
6686    }
6687
6688    #[test]
6689    fn intent_router_picks_permissions_for_acl_questions() {
6690        assert_eq!(
6691            preferred_host_inspection_topic("who has permission to access the downloads folder?"),
6692            Some("permissions")
6693        );
6694        assert_eq!(
6695            preferred_host_inspection_topic("audit the ntfs permissions for this path"),
6696            Some("permissions")
6697        );
6698    }
6699
6700    #[test]
6701    fn intent_router_picks_login_history_for_logon_questions() {
6702        assert_eq!(
6703            preferred_host_inspection_topic("who logged in recently on this machine?"),
6704            Some("login_history")
6705        );
6706        assert_eq!(
6707            preferred_host_inspection_topic("show me the logon history for the last 48 hours"),
6708            Some("login_history")
6709        );
6710    }
6711
6712    #[test]
6713    fn intent_router_picks_share_access_for_unc_questions() {
6714        assert_eq!(
6715            preferred_host_inspection_topic("can i reach \\\\server\\share right now?"),
6716            Some("share_access")
6717        );
6718        assert_eq!(
6719            preferred_host_inspection_topic("test accessibility of a network share"),
6720            Some("share_access")
6721        );
6722    }
6723
6724    #[test]
6725    fn intent_router_picks_registry_audit_for_persistence_questions() {
6726        assert_eq!(
6727            preferred_host_inspection_topic(
6728                "audit my registry for persistence hacks or debugger hijacking"
6729            ),
6730            Some("registry_audit")
6731        );
6732        assert_eq!(
6733            preferred_host_inspection_topic("check winlogon shell integrity and ifeo hijacks"),
6734            Some("registry_audit")
6735        );
6736    }
6737
6738    #[test]
6739    fn intent_router_picks_network_stats_for_mbps_questions() {
6740        assert_eq!(
6741            preferred_host_inspection_topic("what is my network throughput in mbps right now?"),
6742            Some("network_stats")
6743        );
6744    }
6745
6746    #[test]
6747    fn intent_router_picks_processes_for_cpu_percentage_questions() {
6748        assert_eq!(
6749            preferred_host_inspection_topic("which processes are using the most cpu % right now?"),
6750            Some("processes")
6751        );
6752    }
6753
6754    #[test]
6755    fn intent_router_picks_log_check_for_recent_window_questions() {
6756        assert_eq!(
6757            preferred_host_inspection_topic("show me system errors from the last 2 hours"),
6758            Some("log_check")
6759        );
6760    }
6761
6762    #[test]
6763    fn intent_router_picks_battery_for_health_and_cycles() {
6764        assert_eq!(
6765            preferred_host_inspection_topic("check my battery health and cycle count"),
6766            Some("battery")
6767        );
6768    }
6769
6770    #[test]
6771    fn intent_router_picks_thermal_for_throttling_questions() {
6772        assert_eq!(
6773            preferred_host_inspection_topic(
6774                "why is my laptop slow? check for overheating or throttling"
6775            ),
6776            Some("thermal")
6777        );
6778        assert_eq!(
6779            preferred_host_inspection_topic("show me the current cpu temp"),
6780            Some("thermal")
6781        );
6782    }
6783
6784    #[test]
6785    fn intent_router_picks_activation_for_genuine_questions() {
6786        assert_eq!(
6787            preferred_host_inspection_topic("is my windows genuine? check activation status"),
6788            Some("activation")
6789        );
6790        assert_eq!(
6791            preferred_host_inspection_topic("run slmgr to check my license state"),
6792            Some("activation")
6793        );
6794    }
6795
6796    #[test]
6797    fn intent_router_picks_patch_history_for_hotfix_questions() {
6798        assert_eq!(
6799            preferred_host_inspection_topic("show me the recently installed hotfixes"),
6800            Some("patch_history")
6801        );
6802        assert_eq!(
6803            preferred_host_inspection_topic(
6804                "list the windows update patch history for the last 48 hours"
6805            ),
6806            Some("patch_history")
6807        );
6808    }
6809
6810    #[test]
6811    fn intent_router_detects_multiple_symptoms_for_prerun() {
6812        let topics = all_host_inspection_topics("Why is my laptop slow? Check if it is overheating, throttling, or under heavy I/O pressure.");
6813        assert!(topics.contains(&"thermal"));
6814        assert!(topics.contains(&"resource_load"));
6815        assert!(topics.contains(&"storage"));
6816        assert!(topics.len() >= 3);
6817    }
6818}
hematite/agent/conversation.rs

hematite/agent/
conversation.rs