hematite/agent/
conversation.rs

1use crate::agent::architecture_summary::{
2    build_architecture_overview_answer, prune_architecture_trace_batch,
3    prune_authoritative_tool_batch, prune_read_only_context_bloat_batch,
4    prune_redirected_shell_batch, summarize_runtime_trace_output,
5};
6use crate::agent::direct_answers::{
7    build_about_answer, build_architect_session_reset_plan, build_authorization_policy_answer,
8    build_gemma_native_answer, build_gemma_native_settings_answer, build_identity_answer,
9    build_language_capability_answer, build_mcp_lifecycle_answer, build_product_surface_answer,
10    build_reasoning_split_answer, build_recovery_recipes_answer, build_session_memory_answer,
11    build_session_reset_semantics_answer, build_tool_classes_answer,
12    build_tool_registry_ownership_answer, build_unsafe_workflow_pressure_answer,
13    build_verify_profiles_answer, build_workflow_modes_answer,
14};
15use crate::agent::inference::{
16    ChatMessage, InferenceEngine, InferenceEvent, MessageContent, OperatorCheckpointState,
17    ProviderRuntimeState, ToolCallFn, ToolDefinition, ToolFunction,
18};
19use crate::agent::policy::{
20    action_target_path, docs_edit_without_explicit_request, is_destructive_tool,
21    is_mcp_mutating_tool, is_mcp_workspace_read_tool, normalize_workspace_path,
22};
23use crate::agent::recovery_recipes::{
24    attempt_recovery, plan_recovery, preview_recovery_decision, RecoveryContext, RecoveryDecision,
25    RecoveryPlan, RecoveryScenario, RecoveryStep,
26};
27use crate::agent::routing::{
28    classify_query_intent, is_capability_probe_tool, looks_like_mutation_request,
29    needs_computation_sandbox, preferred_host_inspection_topic, preferred_maintainer_workflow,
30    preferred_workspace_workflow, DirectAnswerKind, QueryIntentClass,
31};
32use crate::agent::tool_registry::dispatch_builtin_tool;
33// SystemPromptBuilder is no longer used — InferenceEngine::build_system_prompt() is canonical.
34use crate::agent::compaction::{self, CompactionConfig};
35use crate::ui::gpu_monitor::GpuState;
36
37use serde_json::Value;
38use std::sync::Arc;
39use tokio::sync::{mpsc, Mutex};
40// -- Session persistence -------------------------------------------------------
41
42#[derive(Clone, Debug, Default)]
43pub struct UserTurn {
44    pub text: String,
45    pub attached_document: Option<AttachedDocument>,
46    pub attached_image: Option<AttachedImage>,
47}
48
49#[derive(Clone, Debug)]
50pub struct AttachedDocument {
51    pub name: String,
52    pub content: String,
53}
54
55#[derive(Clone, Debug)]
56pub struct AttachedImage {
57    pub name: String,
58    pub path: String,
59}
60
61impl UserTurn {
62    pub fn text(text: impl Into<String>) -> Self {
63        Self {
64            text: text.into(),
65            attached_document: None,
66            attached_image: None,
67        }
68    }
69}
70
71#[derive(serde::Serialize, serde::Deserialize)]
72struct SavedSession {
73    running_summary: Option<String>,
74    #[serde(default)]
75    session_memory: crate::agent::compaction::SessionMemory,
76    /// Last user message from the previous session — shown as resume hint on startup.
77    #[serde(default)]
78    last_goal: Option<String>,
79    /// Number of real inference turns completed in the previous session.
80    #[serde(default)]
81    turn_count: u32,
82}
83
84/// Snapshot of the previous session, surfaced on startup when a workspace is
85/// resumed after a restart or crash.
86pub struct CheckpointResume {
87    pub last_goal: String,
88    pub turn_count: u32,
89    pub working_files: Vec<String>,
90    pub last_verify_ok: Option<bool>,
91}
92
93/// Load the prior-session checkpoint from `.hematite/session.json`.
94/// Returns `None` when there is no prior session or it has no real turns.
95pub fn load_checkpoint() -> Option<CheckpointResume> {
96    let path = session_path();
97    let data = std::fs::read_to_string(&path).ok()?;
98    let saved: SavedSession = serde_json::from_str(&data).ok()?;
99    let goal = saved.last_goal.filter(|g| !g.trim().is_empty())?;
100    if saved.turn_count == 0 {
101        return None;
102    }
103    let mut working_files: Vec<String> = saved
104        .session_memory
105        .working_set
106        .into_iter()
107        .take(4)
108        .collect();
109    working_files.sort();
110    let last_verify_ok = saved.session_memory.last_verification.map(|v| v.successful);
111    Some(CheckpointResume {
112        last_goal: goal,
113        turn_count: saved.turn_count,
114        working_files,
115        last_verify_ok,
116    })
117}
118
119#[derive(Default)]
120struct ActionGroundingState {
121    turn_index: u64,
122    observed_paths: std::collections::HashMap<String, u64>,
123    inspected_paths: std::collections::HashMap<String, u64>,
124    last_verify_build_turn: Option<u64>,
125    last_verify_build_ok: bool,
126    last_failed_build_paths: Vec<String>,
127    code_changed_since_verify: bool,
128    /// Track topics redirected from shell to inspect_host in the current turn to break loops.
129    redirected_host_inspection_topics: std::collections::HashMap<String, u64>,
130}
131
132struct PlanExecutionGuard {
133    flag: Arc<std::sync::atomic::AtomicBool>,
134}
135
136impl Drop for PlanExecutionGuard {
137    fn drop(&mut self) {
138        self.flag.store(false, std::sync::atomic::Ordering::SeqCst);
139    }
140}
141
142#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
143pub(crate) enum WorkflowMode {
144    #[default]
145    Auto,
146    Ask,
147    Code,
148    Architect,
149    ReadOnly,
150    /// Clean conversational mode — lighter prompt, no coding agent scaffolding,
151    /// tools available but not pushed. Vein RAG still runs for context.
152    Chat,
153    /// Teacher/guide mode — inspect the real machine state first, then walk the user
154    /// through the admin/config task as a grounded, numbered tutorial. Never executes
155    /// write operations itself; instructs the user to perform them manually.
156    Teach,
157}
158
159impl WorkflowMode {
160    fn label(self) -> &'static str {
161        match self {
162            WorkflowMode::Auto => "AUTO",
163            WorkflowMode::Ask => "ASK",
164            WorkflowMode::Code => "CODE",
165            WorkflowMode::Architect => "ARCHITECT",
166            WorkflowMode::ReadOnly => "READ-ONLY",
167            WorkflowMode::Chat => "CHAT",
168            WorkflowMode::Teach => "TEACH",
169        }
170    }
171
172    fn is_read_only(self) -> bool {
173        matches!(
174            self,
175            WorkflowMode::Ask
176                | WorkflowMode::Architect
177                | WorkflowMode::ReadOnly
178                | WorkflowMode::Teach
179        )
180    }
181
182    pub(crate) fn is_chat(self) -> bool {
183        matches!(self, WorkflowMode::Chat)
184    }
185}
186
187fn session_path() -> std::path::PathBuf {
188    if let Ok(overridden) = std::env::var("HEMATITE_SESSION_PATH") {
189        return std::path::PathBuf::from(overridden);
190    }
191    crate::tools::file_ops::workspace_root()
192        .join(".hematite")
193        .join("session.json")
194}
195
196fn load_session_data() -> (Option<String>, crate::agent::compaction::SessionMemory) {
197    let path = session_path();
198    if !path.exists() {
199        return (None, crate::agent::compaction::SessionMemory::default());
200    }
201    let Ok(data) = std::fs::read_to_string(&path) else {
202        return (None, crate::agent::compaction::SessionMemory::default());
203    };
204    let Ok(saved) = serde_json::from_str::<SavedSession>(&data) else {
205        return (None, crate::agent::compaction::SessionMemory::default());
206    };
207    (saved.running_summary, saved.session_memory)
208}
209
210fn reset_task_files() {
211    let root = crate::tools::file_ops::workspace_root();
212    let _ = std::fs::remove_file(root.join(".hematite").join("TASK.md"));
213    let _ = std::fs::remove_file(root.join(".hematite").join("PLAN.md"));
214    let _ = std::fs::remove_file(root.join(".hematite").join("WALKTHROUGH.md"));
215    let _ = std::fs::remove_file(root.join(".github").join("WALKTHROUGH.md"));
216    let _ = std::fs::write(root.join(".hematite").join("TASK.md"), "");
217    let _ = std::fs::write(root.join(".hematite").join("PLAN.md"), "");
218}
219
220fn purge_persistent_memory() {
221    let root = crate::tools::file_ops::workspace_root();
222    let mem_dir = root.join(".hematite").join("memories");
223    if mem_dir.exists() {
224        let _ = std::fs::remove_dir_all(&mem_dir);
225        let _ = std::fs::create_dir_all(&mem_dir);
226    }
227
228    let log_dir = root.join(".hematite_logs");
229    if log_dir.exists() {
230        if let Ok(entries) = std::fs::read_dir(&log_dir) {
231            for entry in entries.flatten() {
232                let _ = std::fs::write(entry.path(), "");
233            }
234        }
235    }
236}
237
238fn apply_turn_attachments(user_turn: &UserTurn, prompt: &str) -> String {
239    let mut out = prompt.trim().to_string();
240    if let Some(doc) = user_turn.attached_document.as_ref() {
241        out = format!(
242            "[Attached document: {}]\n\n{}\n\n---\n\n{}",
243            doc.name, doc.content, out
244        );
245    }
246    if let Some(image) = user_turn.attached_image.as_ref() {
247        out = if out.is_empty() {
248            format!("[Attached image: {}]", image.name)
249        } else {
250            format!("[Attached image: {}]\n\n{}", image.name, out)
251        };
252    }
253    out
254}
255
256fn transcript_user_turn_text(user_turn: &UserTurn, prompt: &str) -> String {
257    let mut prefixes = Vec::new();
258    if let Some(doc) = user_turn.attached_document.as_ref() {
259        prefixes.push(format!("[Attached document: {}]", doc.name));
260    }
261    if let Some(image) = user_turn.attached_image.as_ref() {
262        prefixes.push(format!("[Attached image: {}]", image.name));
263    }
264    if prefixes.is_empty() {
265        prompt.to_string()
266    } else if prompt.trim().is_empty() {
267        prefixes.join("\n")
268    } else {
269        format!("{}\n{}", prefixes.join("\n"), prompt)
270    }
271}
272
273#[derive(Debug, Clone, Copy, PartialEq, Eq)]
274enum RuntimeFailureClass {
275    ContextWindow,
276    ProviderDegraded,
277    ToolArgMalformed,
278    ToolPolicyBlocked,
279    ToolLoop,
280    VerificationFailed,
281    EmptyModelResponse,
282    Unknown,
283}
284
285impl RuntimeFailureClass {
286    fn tag(self) -> &'static str {
287        match self {
288            RuntimeFailureClass::ContextWindow => "context_window",
289            RuntimeFailureClass::ProviderDegraded => "provider_degraded",
290            RuntimeFailureClass::ToolArgMalformed => "tool_arg_malformed",
291            RuntimeFailureClass::ToolPolicyBlocked => "tool_policy_blocked",
292            RuntimeFailureClass::ToolLoop => "tool_loop",
293            RuntimeFailureClass::VerificationFailed => "verification_failed",
294            RuntimeFailureClass::EmptyModelResponse => "empty_model_response",
295            RuntimeFailureClass::Unknown => "unknown",
296        }
297    }
298
299    fn operator_guidance(self) -> &'static str {
300        match self {
301            RuntimeFailureClass::ContextWindow => {
302                "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
303            }
304            RuntimeFailureClass::ProviderDegraded => {
305                "Retry once automatically, then narrow the turn or restart LM Studio if it persists."
306            }
307            RuntimeFailureClass::ToolArgMalformed => {
308                "Retry with repaired or narrower tool arguments instead of repeating the same malformed call."
309            }
310            RuntimeFailureClass::ToolPolicyBlocked => {
311                "Stay inside the allowed workflow or switch modes before retrying."
312            }
313            RuntimeFailureClass::ToolLoop => {
314                "Stop repeating the same failing tool pattern and switch to a narrower recovery step."
315            }
316            RuntimeFailureClass::VerificationFailed => {
317                "Fix the build or test failure before treating the task as complete."
318            }
319            RuntimeFailureClass::EmptyModelResponse => {
320                "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
321            }
322            RuntimeFailureClass::Unknown => {
323                "Inspect the latest grounded tool results or provider status before retrying."
324            }
325        }
326    }
327}
328
329fn classify_runtime_failure(detail: &str) -> RuntimeFailureClass {
330    let lower = detail.to_ascii_lowercase();
331    if lower.contains("context_window_blocked")
332        || lower.contains("context ceiling reached")
333        || lower.contains("exceeds the")
334        || ((lower.contains("n_keep") && lower.contains("n_ctx"))
335            || lower.contains("context length")
336            || lower.contains("keep from the initial prompt")
337            || lower.contains("prompt is greater than the context length"))
338    {
339        RuntimeFailureClass::ContextWindow
340    } else if lower.contains("empty response from model")
341        || lower.contains("model returned an empty response")
342    {
343        RuntimeFailureClass::EmptyModelResponse
344    } else if lower.contains("lm studio unreachable")
345        || lower.contains("lm studio error")
346        || lower.contains("request failed")
347        || lower.contains("response parse error")
348        || lower.contains("provider degraded")
349    {
350        RuntimeFailureClass::ProviderDegraded
351    } else if lower.contains("missing required argument")
352        || lower.contains("json repair failed")
353        || lower.contains("invalid pattern")
354        || lower.contains("invalid line range")
355    {
356        RuntimeFailureClass::ToolArgMalformed
357    } else if lower.contains("action blocked:")
358        || lower.contains("access denied")
359        || lower.contains("declined by user")
360    {
361        RuntimeFailureClass::ToolPolicyBlocked
362    } else if lower.contains("too many consecutive tool errors")
363        || lower.contains("repeated tool failures")
364        || lower.contains("stuck in a loop")
365    {
366        RuntimeFailureClass::ToolLoop
367    } else if lower.contains("build failed")
368        || lower.contains("verification failed")
369        || lower.contains("verify_build")
370    {
371        RuntimeFailureClass::VerificationFailed
372    } else {
373        RuntimeFailureClass::Unknown
374    }
375}
376
377fn format_runtime_failure(class: RuntimeFailureClass, detail: &str) -> String {
378    format!(
379        "[failure:{}] {} Detail: {}",
380        class.tag(),
381        class.operator_guidance(),
382        detail.trim()
383    )
384}
385
386fn provider_state_for_runtime_failure(class: RuntimeFailureClass) -> Option<ProviderRuntimeState> {
387    match class {
388        RuntimeFailureClass::ContextWindow => Some(ProviderRuntimeState::ContextWindow),
389        RuntimeFailureClass::ProviderDegraded => Some(ProviderRuntimeState::Degraded),
390        RuntimeFailureClass::EmptyModelResponse => Some(ProviderRuntimeState::EmptyResponse),
391        _ => None,
392    }
393}
394
395fn checkpoint_state_for_runtime_failure(
396    class: RuntimeFailureClass,
397) -> Option<OperatorCheckpointState> {
398    match class {
399        RuntimeFailureClass::ContextWindow => Some(OperatorCheckpointState::BlockedContextWindow),
400        RuntimeFailureClass::ToolPolicyBlocked => Some(OperatorCheckpointState::BlockedPolicy),
401        RuntimeFailureClass::ToolLoop => Some(OperatorCheckpointState::BlockedToolLoop),
402        RuntimeFailureClass::VerificationFailed => {
403            Some(OperatorCheckpointState::BlockedVerification)
404        }
405        _ => None,
406    }
407}
408
409fn compact_runtime_recovery_summary(class: RuntimeFailureClass) -> &'static str {
410    match class {
411        RuntimeFailureClass::ProviderDegraded => {
412            "LM Studio degraded during the turn; retrying once before surfacing a failure."
413        }
414        RuntimeFailureClass::EmptyModelResponse => {
415            "The model returned an empty reply; retrying once before surfacing a failure."
416        }
417        _ => "Runtime recovery in progress.",
418    }
419}
420
421fn checkpoint_summary_for_runtime_failure(class: RuntimeFailureClass) -> &'static str {
422    match class {
423        RuntimeFailureClass::ContextWindow => "Provider context ceiling confirmed.",
424        RuntimeFailureClass::ToolPolicyBlocked => "Policy blocked the current action.",
425        RuntimeFailureClass::ToolLoop => "Repeated failing tool pattern stopped.",
426        RuntimeFailureClass::VerificationFailed => "Verification failed; fix before continuing.",
427        _ => "Operator checkpoint updated.",
428    }
429}
430
431fn compact_runtime_failure_summary(class: RuntimeFailureClass) -> &'static str {
432    match class {
433        RuntimeFailureClass::ContextWindow => "LM context ceiling hit.",
434        RuntimeFailureClass::ProviderDegraded => {
435            "LM Studio degraded and did not recover cleanly; operator action is now required."
436        }
437        RuntimeFailureClass::EmptyModelResponse => {
438            "LM Studio returned an empty reply after recovery; operator action is now required."
439        }
440        RuntimeFailureClass::ToolLoop => {
441            "Repeated failing tool pattern detected; Hematite stopped the loop."
442        }
443        _ => "Runtime failure surfaced to the operator.",
444    }
445}
446
447fn should_retry_runtime_failure(class: RuntimeFailureClass) -> bool {
448    matches!(
449        class,
450        RuntimeFailureClass::ProviderDegraded | RuntimeFailureClass::EmptyModelResponse
451    )
452}
453
454fn recovery_scenario_for_runtime_failure(class: RuntimeFailureClass) -> Option<RecoveryScenario> {
455    match class {
456        RuntimeFailureClass::ContextWindow => Some(RecoveryScenario::ContextWindow),
457        RuntimeFailureClass::ProviderDegraded => Some(RecoveryScenario::ProviderDegraded),
458        RuntimeFailureClass::EmptyModelResponse => Some(RecoveryScenario::EmptyModelResponse),
459        RuntimeFailureClass::ToolPolicyBlocked => Some(RecoveryScenario::McpWorkspaceReadBlocked),
460        RuntimeFailureClass::ToolLoop => Some(RecoveryScenario::ToolLoop),
461        RuntimeFailureClass::VerificationFailed => Some(RecoveryScenario::VerificationFailed),
462        RuntimeFailureClass::ToolArgMalformed | RuntimeFailureClass::Unknown => None,
463    }
464}
465
466fn compact_recovery_plan_summary(plan: &RecoveryPlan) -> String {
467    format!(
468        "{} [{}]",
469        plan.recipe.scenario.label(),
470        plan.recipe.steps_summary()
471    )
472}
473
474fn compact_recovery_decision_summary(decision: &RecoveryDecision) -> String {
475    match decision {
476        RecoveryDecision::Attempt(plan) => compact_recovery_plan_summary(plan),
477        RecoveryDecision::Escalate {
478            recipe,
479            attempts_made,
480            ..
481        } => format!(
482            "{} escalated after {} / {} [{}]",
483            recipe.scenario.label(),
484            attempts_made,
485            recipe.max_attempts.max(1),
486            recipe.steps_summary()
487        ),
488    }
489}
490
491/// Parse file paths from cargo/compiler error output.
492/// Handles lines like `  --> src/foo/bar.rs:34:12` and `error: could not compile`.
493fn parse_failing_paths_from_build_output(output: &str) -> Vec<String> {
494    let root = crate::tools::file_ops::workspace_root();
495    let mut paths: Vec<String> = output
496        .lines()
497        .filter_map(|line| {
498            let trimmed = line.trim_start();
499            // Cargo error location: "--> path/to/file.rs:line:col"
500            let after_arrow = trimmed.strip_prefix("--> ")?;
501            let file_part = after_arrow.split(':').next()?;
502            if file_part.is_empty() || file_part.starts_with('<') {
503                return None;
504            }
505            let p = std::path::Path::new(file_part);
506            let resolved = if p.is_absolute() {
507                p.to_path_buf()
508            } else {
509                root.join(p)
510            };
511            Some(resolved.to_string_lossy().replace('\\', "/").to_lowercase())
512        })
513        .collect();
514    paths.sort();
515    paths.dedup();
516    paths
517}
518
519fn build_mode_redirect_answer(mode: WorkflowMode) -> String {
520    match mode {
521        WorkflowMode::Ask => "Workflow mode ASK is read-only. I can inspect the code, explain what should change, or review the target area, but I will not modify files here. Switch to `/code` to implement the change, or `/auto` to let Hematite choose.".to_string(),
522        WorkflowMode::Architect => "Workflow mode ARCHITECT is plan-first. I can inspect the code and design the implementation approach, but I will not mutate files until you explicitly switch to `/code` or ask me to implement.".to_string(),
523        WorkflowMode::ReadOnly => "Workflow mode READ-ONLY is a hard no-mutation mode. I can analyze, inspect, and explain, but I will not edit files, run mutating shell commands, or commit changes. Switch to `/code` or `/auto` if you want implementation.".to_string(),
524        WorkflowMode::Teach => "Workflow mode TEACH is a guided walkthrough mode. I will inspect the real state of your machine first, then give you a numbered step-by-step tutorial so you can perform the task yourself. I do not execute write operations in TEACH mode — I show you exactly how to do it.".to_string(),
525        _ => "Switch to `/code` or `/auto` to allow implementation.".to_string(),
526    }
527}
528
529fn architect_handoff_contract() -> &'static str {
530    "ARCHITECT OUTPUT CONTRACT:\n\
531Use a compact implementation handoff, not a process narrative.\n\
532Do not say \"the first step\" or describe what you are about to do.\n\
533After one or two read-only inspection tools at most, stop and answer.\n\
534For runtime wiring, reset behavior, or control-flow questions, prefer `trace_runtime_flow`.\n\
535Use these exact ASCII headings and keep each section short:\n\
536# Goal\n\
537# Target Files\n\
538# Ordered Steps\n\
539# Verification\n\
540# Risks\n\
541# Open Questions\n\
542Keep the whole handoff concise and implementation-oriented."
543}
544
545fn implement_current_plan_prompt() -> &'static str {
546    "Implement the current plan."
547}
548
549fn architect_handoff_operator_note(plan: &crate::tools::plan::PlanHandoff) -> String {
550    format!(
551        "Implementation handoff saved to `.hematite/PLAN.md`.\nNext step: run `/implement-plan` to execute it in `/code`, or use `/code {}` directly.\nPlan: {}",
552        implement_current_plan_prompt().to_ascii_lowercase(),
553        plan.summary_line()
554    )
555}
556
557fn is_current_plan_execution_request(user_input: &str) -> bool {
558    let lower = user_input.trim().to_ascii_lowercase();
559    lower == "/implement-plan"
560        || lower == implement_current_plan_prompt().to_ascii_lowercase()
561        || lower
562            == implement_current_plan_prompt()
563                .trim_end_matches('.')
564                .to_ascii_lowercase()
565        || lower.contains("implement the current plan")
566}
567
568fn is_plan_scoped_tool(name: &str) -> bool {
569    crate::agent::inference::tool_metadata_for_name(name).plan_scope
570}
571
572fn is_current_plan_irrelevant_tool(name: &str) -> bool {
573    !crate::agent::inference::tool_metadata_for_name(name).plan_scope
574}
575
576fn is_non_mutating_plan_step_tool(name: &str) -> bool {
577    let metadata = crate::agent::inference::tool_metadata_for_name(name);
578    metadata.plan_scope && !metadata.mutates_workspace
579}
580
581fn parse_inline_workflow_prompt(user_input: &str) -> Option<(WorkflowMode, &str)> {
582    let trimmed = user_input.trim();
583    for (prefix, mode) in [
584        ("/ask", WorkflowMode::Ask),
585        ("/code", WorkflowMode::Code),
586        ("/architect", WorkflowMode::Architect),
587        ("/read-only", WorkflowMode::ReadOnly),
588        ("/auto", WorkflowMode::Auto),
589        ("/teach", WorkflowMode::Teach),
590    ] {
591        if let Some(rest) = trimmed.strip_prefix(prefix) {
592            let rest = rest.trim();
593            if !rest.is_empty() {
594                return Some((mode, rest));
595            }
596        }
597    }
598    None
599}
600
601// Tool catalogue
602
603/// Returns the full set of tools exposed to the model.
604pub fn get_tools() -> Vec<ToolDefinition> {
605    crate::agent::tool_registry::get_tools()
606}
607
608pub struct ConversationManager {
609    /// Full conversation history in OpenAI format.
610    pub history: Vec<ChatMessage>,
611    pub engine: Arc<InferenceEngine>,
612    pub tools: Vec<ToolDefinition>,
613    pub mcp_manager: Arc<Mutex<crate::agent::mcp_manager::McpManager>>,
614    pub professional: bool,
615    pub brief: bool,
616    pub snark: u8,
617    pub chaos: u8,
618    /// Model to use for simple read-only tasks (optional, user-supplied via --fast-model).
619    pub fast_model: Option<String>,
620    /// Model to use for complex write/build tasks (optional, user-supplied via --think-model).
621    pub think_model: Option<String>,
622    /// Files where whitespace auto-correction fired this session.
623    pub correction_hints: Vec<String>,
624    /// Running background summary of pruned older messages.
625    pub running_summary: Option<String>,
626    /// Live hardware telemetry handle.
627    pub gpu_state: Arc<GpuState>,
628    /// Local RAG memory — FTS5-indexed project source.
629    pub vein: crate::memory::vein::Vein,
630    /// Append-only session transcript logger.
631    pub transcript: crate::agent::transcript::TranscriptLogger,
632    /// Thread-safe cancellation signal for the current agent turn.
633    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
634    /// Shared Git remote state (for persistent connectivity checks).
635    pub git_state: Arc<crate::agent::git_monitor::GitState>,
636    /// Reasoning think-mode override. None = let model decide. Some(true) = force /think.
637    /// Some(false) = force /no_think (fast mode, 3-5x quicker for simple tasks).
638    pub think_mode: Option<bool>,
639    workflow_mode: WorkflowMode,
640    /// Layer 6: Dynamic Task Context (extracted during compaction)
641    pub session_memory: crate::agent::compaction::SessionMemory,
642    pub swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
643    pub voice_manager: Arc<crate::ui::voice::VoiceManager>,
644    /// Personality description for the current Rusty soul — used in chat mode system prompt.
645    pub soul_personality: String,
646    pub lsp_manager: Arc<Mutex<crate::agent::lsp::manager::LspManager>>,
647    /// Active reasoning summary extracted from the previous model turn (Gemma-4 Native).
648    pub reasoning_history: Option<String>,
649    /// Layer 8: Active Reference Pinning (Context Locked)
650    pub pinned_files: Arc<Mutex<std::collections::HashMap<String, String>>>,
651    /// Hard action-grounding state for proof-before-action checks.
652    action_grounding: Arc<Mutex<ActionGroundingState>>,
653    /// True only during `/code Implement the current plan.` style execution turns.
654    plan_execution_active: Arc<std::sync::atomic::AtomicBool>,
655    /// Typed per-turn recovery attempt tracking.
656    recovery_context: RecoveryContext,
657    /// L1 context block — hot files summary injected into the system prompt.
658    /// Built once after vein init and updated as edits accumulate heat.
659    pub l1_context: Option<String>,
660    /// Condensed AST repository layout for the active project.
661    pub repo_map: Option<String>,
662    /// Number of real inference turns completed this session.
663    pub turn_count: u32,
664    /// Last user message sent to the model — persisted as checkpoint goal.
665    pub last_goal: Option<String>,
666}
667
668impl ConversationManager {
669    fn vein_docs_only_mode(&self) -> bool {
670        !crate::tools::file_ops::is_project_workspace()
671    }
672
673    fn refresh_vein_index(&mut self) -> usize {
674        let count = if self.vein_docs_only_mode() {
675            let root = crate::tools::file_ops::workspace_root();
676            tokio::task::block_in_place(|| self.vein.index_workspace_artifacts(&root))
677        } else {
678            tokio::task::block_in_place(|| self.vein.index_project())
679        };
680        self.l1_context = self.vein.l1_context();
681        count
682    }
683
684    fn build_vein_inspection_report(&self, indexed_this_pass: usize) -> String {
685        let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(8));
686        let workspace_mode = if self.vein_docs_only_mode() {
687            "docs-only (outside a project workspace)"
688        } else {
689            "project workspace"
690        };
691        let active_room = snapshot.active_room.as_deref().unwrap_or("none");
692        let mut out = format!(
693            "Vein Inspection\n\
694             Workspace mode: {workspace_mode}\n\
695             Indexed this pass: {indexed_this_pass}\n\
696             Indexed source files: {}\n\
697             Indexed docs: {}\n\
698             Indexed session exchanges: {}\n\
699             Embedded source/doc chunks: {}\n\
700             Embeddings available: {}\n\
701             Active room bias: {active_room}\n\
702             L1 hot-files block: {}\n",
703            snapshot.indexed_source_files,
704            snapshot.indexed_docs,
705            snapshot.indexed_session_exchanges,
706            snapshot.embedded_source_doc_chunks,
707            if snapshot.has_any_embeddings {
708                "yes"
709            } else {
710                "no"
711            },
712            if snapshot.l1_ready {
713                "ready"
714            } else {
715                "not built yet"
716            },
717        );
718
719        if snapshot.hot_files.is_empty() {
720            out.push_str("Hot files: none yet.\n");
721            return out;
722        }
723
724        out.push_str("\nHot files by room:\n");
725        let mut by_room: std::collections::BTreeMap<&str, Vec<&crate::memory::vein::VeinHotFile>> =
726            std::collections::BTreeMap::new();
727        for file in &snapshot.hot_files {
728            by_room.entry(file.room.as_str()).or_default().push(file);
729        }
730        for (room, files) in by_room {
731            out.push_str(&format!("[{}]\n", room));
732            for file in files {
733                out.push_str(&format!(
734                    "- {} [{} edit{}]\n",
735                    file.path,
736                    file.heat,
737                    if file.heat == 1 { "" } else { "s" }
738                ));
739            }
740        }
741
742        out
743    }
744
745    fn latest_user_prompt(&self) -> Option<&str> {
746        self.history
747            .iter()
748            .rev()
749            .find(|msg| msg.role == "user")
750            .map(|msg| msg.content.as_str())
751    }
752
753    async fn emit_direct_response(
754        &mut self,
755        tx: &mpsc::Sender<InferenceEvent>,
756        raw_user_input: &str,
757        effective_user_input: &str,
758        response: &str,
759    ) {
760        self.history.push(ChatMessage::user(effective_user_input));
761        self.history.push(ChatMessage::assistant_text(response));
762        self.transcript.log_user(raw_user_input);
763        self.transcript.log_agent(response);
764        for chunk in chunk_text(response, 8) {
765            if !chunk.is_empty() {
766                let _ = tx.send(InferenceEvent::Token(chunk)).await;
767            }
768        }
769        let _ = tx.send(InferenceEvent::Done).await;
770        self.trim_history(80);
771        self.refresh_session_memory();
772        self.save_session();
773    }
774
775    async fn emit_operator_checkpoint(
776        &mut self,
777        tx: &mpsc::Sender<InferenceEvent>,
778        state: OperatorCheckpointState,
779        summary: impl Into<String>,
780    ) {
781        let summary = summary.into();
782        self.session_memory
783            .record_checkpoint(state.label(), summary.clone());
784        let _ = tx
785            .send(InferenceEvent::OperatorCheckpoint { state, summary })
786            .await;
787    }
788
789    async fn emit_recovery_recipe_summary(
790        &mut self,
791        tx: &mpsc::Sender<InferenceEvent>,
792        state: impl Into<String>,
793        summary: impl Into<String>,
794    ) {
795        let state = state.into();
796        let summary = summary.into();
797        self.session_memory.record_recovery(state, summary.clone());
798        let _ = tx.send(InferenceEvent::RecoveryRecipe { summary }).await;
799    }
800
801    async fn emit_provider_live(&mut self, tx: &mpsc::Sender<InferenceEvent>) {
802        let _ = tx
803            .send(InferenceEvent::ProviderStatus {
804                state: ProviderRuntimeState::Live,
805                summary: String::new(),
806            })
807            .await;
808        self.emit_operator_checkpoint(tx, OperatorCheckpointState::Idle, "")
809            .await;
810    }
811
812    async fn emit_prompt_pressure_for_messages(
813        &self,
814        tx: &mpsc::Sender<InferenceEvent>,
815        messages: &[ChatMessage],
816    ) {
817        let context_length = self.engine.current_context_length();
818        let (estimated_input_tokens, reserved_output_tokens, estimated_total_tokens, percent) =
819            crate::agent::inference::estimate_prompt_pressure(
820                messages,
821                &self.tools,
822                context_length,
823            );
824        let _ = tx
825            .send(InferenceEvent::PromptPressure {
826                estimated_input_tokens,
827                reserved_output_tokens,
828                estimated_total_tokens,
829                context_length,
830                percent,
831            })
832            .await;
833    }
834
835    async fn emit_prompt_pressure_idle(&self, tx: &mpsc::Sender<InferenceEvent>) {
836        let context_length = self.engine.current_context_length();
837        let _ = tx
838            .send(InferenceEvent::PromptPressure {
839                estimated_input_tokens: 0,
840                reserved_output_tokens: 0,
841                estimated_total_tokens: 0,
842                context_length,
843                percent: 0,
844            })
845            .await;
846    }
847
848    async fn emit_compaction_pressure(&self, tx: &mpsc::Sender<InferenceEvent>) {
849        let context_length = self.engine.current_context_length();
850        let vram_ratio = self.gpu_state.ratio();
851        let config = CompactionConfig::adaptive(context_length, vram_ratio);
852        let estimated_tokens = compaction::estimate_compactable_tokens(&self.history);
853        let percent = if config.max_estimated_tokens == 0 {
854            0
855        } else {
856            ((estimated_tokens.saturating_mul(100)) / config.max_estimated_tokens).min(100) as u8
857        };
858
859        let _ = tx
860            .send(InferenceEvent::CompactionPressure {
861                estimated_tokens,
862                threshold_tokens: config.max_estimated_tokens,
863                percent,
864            })
865            .await;
866    }
867
868    async fn refresh_runtime_profile_and_report(
869        &mut self,
870        tx: &mpsc::Sender<InferenceEvent>,
871        reason: &str,
872    ) -> Option<(String, usize, bool)> {
873        let refreshed = self.engine.refresh_runtime_profile().await;
874        if let Some((model_id, context_length, changed)) = refreshed.as_ref() {
875            let _ = tx
876                .send(InferenceEvent::RuntimeProfile {
877                    model_id: model_id.clone(),
878                    context_length: *context_length,
879                })
880                .await;
881            self.transcript.log_system(&format!(
882                "Runtime profile refresh ({}): model={} ctx={} changed={}",
883                reason, model_id, context_length, changed
884            ));
885        }
886        refreshed
887    }
888
889    pub fn new(
890        engine: Arc<InferenceEngine>,
891        professional: bool,
892        brief: bool,
893        snark: u8,
894        chaos: u8,
895        soul_personality: String,
896        fast_model: Option<String>,
897        think_model: Option<String>,
898        gpu_state: Arc<GpuState>,
899        git_state: Arc<crate::agent::git_monitor::GitState>,
900        swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
901        voice_manager: Arc<crate::ui::voice::VoiceManager>,
902    ) -> Self {
903        let (saved_summary, saved_memory) = load_session_data();
904
905        // Build the initial mcp_manager
906        let mcp_manager = Arc::new(tokio::sync::Mutex::new(
907            crate::agent::mcp_manager::McpManager::new(),
908        ));
909
910        // Build the initial system prompt using the canonical InferenceEngine path.
911        let dynamic_instructions =
912            engine.build_system_prompt(snark, chaos, brief, professional, &[], None, &[]);
913
914        let history = vec![ChatMessage::system(&dynamic_instructions)];
915
916        let vein_path = crate::tools::file_ops::workspace_root()
917            .join(".hematite")
918            .join("vein.db");
919        let vein_base_url = engine.base_url.clone();
920        let vein = crate::memory::vein::Vein::new(&vein_path, vein_base_url.clone())
921            .unwrap_or_else(|_| crate::memory::vein::Vein::new(":memory:", vein_base_url).unwrap());
922
923        Self {
924            history,
925            engine,
926            tools: get_tools(),
927            mcp_manager,
928            professional,
929            brief,
930            snark,
931            chaos,
932            fast_model,
933            think_model,
934            correction_hints: Vec::new(),
935            running_summary: saved_summary,
936            gpu_state,
937            vein,
938            transcript: crate::agent::transcript::TranscriptLogger::new(),
939            cancel_token: Arc::new(std::sync::atomic::AtomicBool::new(false)),
940            git_state,
941            think_mode: None,
942            workflow_mode: WorkflowMode::Auto,
943            session_memory: saved_memory,
944            swarm_coordinator,
945            voice_manager,
946            soul_personality,
947            lsp_manager: Arc::new(Mutex::new(crate::agent::lsp::manager::LspManager::new(
948                crate::tools::file_ops::workspace_root(),
949            ))),
950            reasoning_history: None,
951            pinned_files: Arc::new(Mutex::new(std::collections::HashMap::new())),
952            action_grounding: Arc::new(Mutex::new(ActionGroundingState::default())),
953            plan_execution_active: Arc::new(std::sync::atomic::AtomicBool::new(false)),
954            recovery_context: RecoveryContext::default(),
955            l1_context: None,
956            repo_map: None,
957            turn_count: 0,
958            last_goal: None,
959        }
960    }
961
962    /// Index the project into The Vein. Call once after construction.
963    /// Uses block_in_place so the tokio runtime thread isn't parked.
964    pub fn initialize_vein(&mut self) -> usize {
965        self.refresh_vein_index()
966    }
967
968    /// Generate the AST Repo Map. Call once after construction or when resetting context.
969    pub fn initialize_repo_map(&mut self) {
970        if !self.vein_docs_only_mode() {
971            let root = crate::tools::file_ops::workspace_root();
972            let hot = self.vein.hot_files_weighted(10);
973            let gen = crate::memory::repo_map::RepoMapGenerator::new(&root).with_hot_files(&hot);
974            match tokio::task::block_in_place(|| gen.generate()) {
975                Ok(map) => self.repo_map = Some(map),
976                Err(e) => {
977                    self.repo_map = Some(format!("Repo Map generation failed: {}", e));
978                }
979            }
980        }
981    }
982
983    /// Re-generate the repo map after a file edit so rankings stay fresh.
984    /// Lightweight (~100-200ms) — called after successful mutations.
985    fn refresh_repo_map(&mut self) {
986        self.initialize_repo_map();
987    }
988
989    fn save_session(&self) {
990        let path = session_path();
991        if let Some(parent) = path.parent() {
992            let _ = std::fs::create_dir_all(parent);
993        }
994        let saved = SavedSession {
995            running_summary: self.running_summary.clone(),
996            session_memory: self.session_memory.clone(),
997            last_goal: self.last_goal.clone(),
998            turn_count: self.turn_count,
999        };
1000        if let Ok(json) = serde_json::to_string(&saved) {
1001            let _ = std::fs::write(&path, json);
1002        }
1003    }
1004
1005    fn save_empty_session(&self) {
1006        let path = session_path();
1007        if let Some(parent) = path.parent() {
1008            let _ = std::fs::create_dir_all(parent);
1009        }
1010        let saved = SavedSession {
1011            running_summary: None,
1012            session_memory: crate::agent::compaction::SessionMemory::default(),
1013            last_goal: None,
1014            turn_count: 0,
1015        };
1016        if let Ok(json) = serde_json::to_string(&saved) {
1017            let _ = std::fs::write(&path, json);
1018        }
1019    }
1020
1021    fn refresh_session_memory(&mut self) {
1022        let current_plan = self.session_memory.current_plan.clone();
1023        let previous_memory = self.session_memory.clone();
1024        self.session_memory = compaction::extract_memory(&self.history);
1025        self.session_memory.current_plan = current_plan;
1026        self.session_memory
1027            .inherit_runtime_ledger_from(&previous_memory);
1028    }
1029
1030    fn build_chat_system_prompt(&self) -> String {
1031        let species = &self.engine.species;
1032        let personality = &self.soul_personality;
1033        format!(
1034            "You are {species}, a local AI companion running entirely on the user's GPU — no cloud, no subscriptions, no phoning home.\n\
1035             {personality}\n\n\
1036             This is CHAT mode — a clean conversational surface. Behave like a sharp friend who happens to know everything about code, not like an agent following a workflow.\n\n\
1037             Rules:\n\
1038             - Talk like a person. Skip the bullet-point breakdowns unless the topic genuinely needs structure.\n\
1039             - Answer directly. One paragraph is usually right.\n\
1040             - Don't call tools unless the user explicitly asks you to look at a file or run something.\n\
1041             - Don't narrate your reasoning or mention tool names unprompted.\n\
1042             - You can discuss code, debug ideas, explain concepts, help plan, or just talk.\n\
1043             - If the user clearly wants you to edit or build something, do it — but lead with conversation, not scaffolding.\n\
1044             - If the user wants the full coding harness, they can type `/agent`.\n",
1045        )
1046    }
1047
1048    fn append_session_handoff(&self, system_msg: &mut String) {
1049        let has_summary = self
1050            .running_summary
1051            .as_ref()
1052            .map(|s| !s.trim().is_empty())
1053            .unwrap_or(false);
1054        let has_memory = self.session_memory.has_signal();
1055
1056        if !has_summary && !has_memory {
1057            return;
1058        }
1059
1060        system_msg.push_str(
1061            "\n\n# LIGHTWEIGHT SESSION HANDOFF\n\
1062             This is compact carry-over from earlier work on this machine.\n\
1063             Use it only when it helps the current request.\n\
1064             Prefer current repository state, pinned files, and fresh tool results over stale session memory.\n",
1065        );
1066
1067        if has_memory {
1068            system_msg.push_str("\n## Active Task Memory\n");
1069            system_msg.push_str(&self.session_memory.to_prompt());
1070        }
1071
1072        if let Some(summary) = self.running_summary.as_deref() {
1073            if !summary.trim().is_empty() {
1074                system_msg.push_str("\n## Compacted Session Summary\n");
1075                system_msg.push_str(summary);
1076                system_msg.push('\n');
1077            }
1078        }
1079    }
1080
1081    fn set_workflow_mode(&mut self, mode: WorkflowMode) {
1082        self.workflow_mode = mode;
1083    }
1084
1085    fn current_plan_summary(&self) -> Option<String> {
1086        self.session_memory
1087            .current_plan
1088            .as_ref()
1089            .filter(|plan| plan.has_signal())
1090            .map(|plan| plan.summary_line())
1091    }
1092
1093    fn current_plan_allowed_paths(&self) -> Vec<String> {
1094        self.session_memory
1095            .current_plan
1096            .as_ref()
1097            .map(|plan| {
1098                plan.target_files
1099                    .iter()
1100                    .map(|path| normalize_workspace_path(path))
1101                    .collect()
1102            })
1103            .unwrap_or_default()
1104    }
1105
1106    fn persist_architect_handoff(
1107        &mut self,
1108        response: &str,
1109    ) -> Option<crate::tools::plan::PlanHandoff> {
1110        if self.workflow_mode != WorkflowMode::Architect {
1111            return None;
1112        }
1113        let Some(plan) = crate::tools::plan::parse_plan_handoff(response) else {
1114            return None;
1115        };
1116        let _ = crate::tools::plan::save_plan_handoff(&plan);
1117        self.session_memory.current_plan = Some(plan.clone());
1118        Some(plan)
1119    }
1120
1121    async fn begin_grounded_turn(&self) -> u64 {
1122        let mut state = self.action_grounding.lock().await;
1123        state.turn_index += 1;
1124        state.turn_index
1125    }
1126
1127    async fn reset_action_grounding(&self) {
1128        let mut state = self.action_grounding.lock().await;
1129        *state = ActionGroundingState::default();
1130    }
1131
1132    async fn record_read_observation(&self, path: &str) {
1133        let normalized = normalize_workspace_path(path);
1134        let mut state = self.action_grounding.lock().await;
1135        let turn = state.turn_index;
1136        // read_file returns full file content with line numbers — sufficient for
1137        // the model to know exact text before editing, so it satisfies the
1138        // line-inspection grounding check too.
1139        state.observed_paths.insert(normalized.clone(), turn);
1140        state.inspected_paths.insert(normalized, turn);
1141    }
1142
1143    async fn record_line_inspection(&self, path: &str) {
1144        let normalized = normalize_workspace_path(path);
1145        let mut state = self.action_grounding.lock().await;
1146        let turn = state.turn_index;
1147        state.observed_paths.insert(normalized.clone(), turn);
1148        state.inspected_paths.insert(normalized, turn);
1149    }
1150
1151    async fn record_verify_build_result(&self, ok: bool, output: &str) {
1152        let mut state = self.action_grounding.lock().await;
1153        let turn = state.turn_index;
1154        state.last_verify_build_turn = Some(turn);
1155        state.last_verify_build_ok = ok;
1156        if ok {
1157            state.code_changed_since_verify = false;
1158            state.last_failed_build_paths.clear();
1159        } else {
1160            state.last_failed_build_paths = parse_failing_paths_from_build_output(output);
1161        }
1162    }
1163
1164    fn record_session_verification(&mut self, ok: bool, summary: impl Into<String>) {
1165        self.session_memory.record_verification(ok, summary);
1166    }
1167
1168    async fn record_successful_mutation(&self, path: Option<&str>) {
1169        let mut state = self.action_grounding.lock().await;
1170        state.code_changed_since_verify = match path {
1171            Some(p) => is_code_like_path(p),
1172            None => true,
1173        };
1174    }
1175
1176    async fn validate_action_preconditions(&self, name: &str, args: &Value) -> Result<(), String> {
1177        if self
1178            .plan_execution_active
1179            .load(std::sync::atomic::Ordering::SeqCst)
1180        {
1181            if is_current_plan_irrelevant_tool(name) {
1182                return Err(format!(
1183                    "Action blocked: `{}` is not part of current-plan execution. Stay on the saved target files, use built-in workspace file tools only, and either make a concrete edit or surface one specific blocker.",
1184                    name
1185                ));
1186            }
1187
1188            if is_plan_scoped_tool(name) {
1189                let allowed_paths = self.current_plan_allowed_paths();
1190                if !allowed_paths.is_empty() {
1191                    let in_allowed = match name {
1192                        "auto_pin_context" => args
1193                            .get("paths")
1194                            .and_then(|v| v.as_array())
1195                            .map(|paths| {
1196                                !paths.is_empty()
1197                                    && paths.iter().all(|v| {
1198                                        v.as_str()
1199                                            .map(normalize_workspace_path)
1200                                            .map(|p| allowed_paths.contains(&p))
1201                                            .unwrap_or(false)
1202                                    })
1203                            })
1204                            .unwrap_or(false),
1205                        "grep_files" | "list_files" => args
1206                            .get("path")
1207                            .and_then(|v| v.as_str())
1208                            .map(normalize_workspace_path)
1209                            .map(|p| allowed_paths.contains(&p))
1210                            .unwrap_or(false),
1211                        _ => action_target_path(name, args)
1212                            .map(|p| allowed_paths.contains(&p))
1213                            .unwrap_or(false),
1214                    };
1215
1216                    if !in_allowed {
1217                        let allowed = allowed_paths
1218                            .iter()
1219                            .map(|p| format!("`{}`", p))
1220                            .collect::<Vec<_>>()
1221                            .join(", ");
1222                        return Err(format!(
1223                            "Action blocked: current-plan execution is locked to the saved target files. Use a path-scoped built-in tool on one of these files only: {}.",
1224                            allowed
1225                        ));
1226                    }
1227                }
1228            }
1229
1230            if matches!(name, "edit_file" | "multi_search_replace" | "patch_hunk") {
1231                if let Some(target) = action_target_path(name, args) {
1232                    let state = self.action_grounding.lock().await;
1233                    let recently_inspected = state
1234                        .inspected_paths
1235                        .get(&target)
1236                        .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1237                        .unwrap_or(false);
1238                    drop(state);
1239                    if !recently_inspected {
1240                        return Err(format!(
1241                            "Action blocked: `{}` on '{}' requires an exact local line window first during current-plan execution. Use `inspect_lines` on that file around the intended edit region, then retry the mutation.",
1242                            name, target
1243                        ));
1244                    }
1245                }
1246            }
1247        }
1248
1249        if self.workflow_mode.is_read_only() && name == "auto_pin_context" {
1250            return Err(
1251                "Action blocked: `auto_pin_context` is disabled in read-only workflows. Use the grounded file evidence you already have, or narrow with `inspect_lines` instead of pinning more files into active context."
1252                    .to_string(),
1253            );
1254        }
1255
1256        if self.workflow_mode.is_read_only() && is_destructive_tool(name) {
1257            if name == "shell" {
1258                let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1259                let risk = crate::tools::guard::classify_bash_risk(command);
1260                if !matches!(risk, crate::tools::RiskLevel::Safe) {
1261                    return Err(format!(
1262                        "Action blocked: workflow mode `{}` is read-only for risky or mutating operations. Switch to `/code` or `/auto` before making changes.",
1263                        self.workflow_mode.label()
1264                    ));
1265                }
1266            } else {
1267                return Err(format!(
1268                    "Action blocked: workflow mode `{}` is read-only. Use `/code` to implement changes or `/auto` to leave mode selection to Hematite.",
1269                    self.workflow_mode.label()
1270                ));
1271            }
1272        }
1273
1274        let normalized_target = action_target_path(name, args);
1275        if let Some(target) = normalized_target.as_deref() {
1276            if matches!(
1277                name,
1278                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1279            ) {
1280                if let Some(prompt) = self.latest_user_prompt() {
1281                    if docs_edit_without_explicit_request(prompt, target) {
1282                        return Err(format!(
1283                            "Action blocked: '{}' is a docs file but the current request did not explicitly ask for documentation changes. Finish the code task first. If docs need updating, the user will ask.",
1284                            target
1285                        ));
1286                    }
1287                }
1288            }
1289            let path_exists = std::path::Path::new(target).exists();
1290            if path_exists {
1291                let state = self.action_grounding.lock().await;
1292                let pinned = self.pinned_files.lock().await;
1293                let pinned_match = pinned.keys().any(|p| normalize_workspace_path(p) == target);
1294                drop(pinned);
1295
1296                // edit_file and multi_search_replace match text exactly, so they need a
1297                // tighter evidence bar than a plain read. Require inspect_lines on the
1298                // target within the last 3 turns. A read_file in the *same* turn is also
1299                // accepted (the model just loaded the file and is making an immediate edit).
1300                let needs_exact_window = matches!(name, "edit_file" | "multi_search_replace");
1301                let recently_inspected = state
1302                    .inspected_paths
1303                    .get(target)
1304                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1305                    .unwrap_or(false);
1306                let same_turn_read = state
1307                    .observed_paths
1308                    .get(target)
1309                    .map(|turn| state.turn_index.saturating_sub(*turn) == 0)
1310                    .unwrap_or(false);
1311                let recent_observed = state
1312                    .observed_paths
1313                    .get(target)
1314                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1315                    .unwrap_or(false);
1316
1317                if needs_exact_window {
1318                    if !recently_inspected && !same_turn_read && !pinned_match {
1319                        return Err(format!(
1320                            "Action blocked: `{}` on '{}' requires a line-level inspection first. \
1321                             Use `inspect_lines` on the target region to get the exact current text \
1322                             (whitespace and indentation included), then retry the edit.",
1323                            name, target
1324                        ));
1325                    }
1326                } else if !recent_observed && !pinned_match {
1327                    return Err(format!(
1328                        "Action blocked: `{}` on '{}' requires recent file evidence. Use `read_file` or `inspect_lines` on that path first, or pin the file into active context.",
1329                        name, target
1330                    ));
1331                }
1332            }
1333        }
1334
1335        if is_mcp_mutating_tool(name) {
1336            return Err(format!(
1337                "Action blocked: `{}` is an external MCP mutation tool. For workspace file edits, prefer Hematite's built-in edit path (`read_file`/`inspect_lines` plus `patch_hunk`, `edit_file`, or `multi_search_replace`) unless the user explicitly requires MCP for that action.",
1338                name
1339            ));
1340        }
1341
1342        if is_mcp_workspace_read_tool(name) {
1343            return Err(format!(
1344                "Action blocked: `{}` is an external MCP filesystem read tool. For local workspace inspection, prefer Hematite's built-in read path (`read_file`, `inspect_lines`, `list_files`, or `grep_files`) unless the user explicitly requires MCP for that action.",
1345                name
1346            ));
1347        }
1348
1349        // Phase gate: if the build is broken, constrain edits to files that cargo flagged.
1350        // This prevents the model from wandering to unrelated files after a failed verify.
1351        if matches!(
1352            name,
1353            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1354        ) {
1355            if let Some(target) = normalized_target.as_deref() {
1356                let state = self.action_grounding.lock().await;
1357                if state.code_changed_since_verify
1358                    && !state.last_verify_build_ok
1359                    && !state.last_failed_build_paths.is_empty()
1360                    && !state.last_failed_build_paths.iter().any(|p| p == target)
1361                {
1362                    let files = state
1363                        .last_failed_build_paths
1364                        .iter()
1365                        .map(|p| format!("`{}`", p))
1366                        .collect::<Vec<_>>()
1367                        .join(", ");
1368                    return Err(format!(
1369                        "Action blocked: the build is broken. Fix the errors in {} before editing other files. Run `verify_build` to confirm the fix, then continue.",
1370                        files
1371                    ));
1372                }
1373            }
1374        }
1375
1376        if name == "git_commit" || name == "git_push" {
1377            let state = self.action_grounding.lock().await;
1378            if state.code_changed_since_verify && !state.last_verify_build_ok {
1379                return Err(format!(
1380                    "Action blocked: `{}` requires a successful `verify_build` after the latest code edits. Run verification first so Hematite has proof that the tree is build-clean.",
1381                    name
1382                ));
1383            }
1384        }
1385
1386        if name == "shell" {
1387            let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1388            if shell_looks_like_structured_host_inspection(command) {
1389                // Auto-redirect: silently call inspect_host with the right topic instead of
1390                // returning a block error that the model may fail to recover from.
1391                // Derive topic from the shell command itself first; fall back to the user prompt.
1392                let topic = preferred_host_inspection_topic(command)
1393                    .or_else(|| {
1394                        self.latest_user_prompt()
1395                            .and_then(|p| preferred_host_inspection_topic(p))
1396                    })
1397                    .unwrap_or("summary")
1398                    .to_string();
1399
1400                {
1401                    let mut state = self.action_grounding.lock().await;
1402                    let current_turn = state.turn_index;
1403                    if let Some(turn) = state.redirected_host_inspection_topics.get(&topic) {
1404                        if *turn == current_turn {
1405                            return Err(format!(
1406                                "Action already handled: The diagnostic data for topic `{topic}` was already provided in a previous redirected shell result this turn. Do not retry the same shell command."
1407                            ));
1408                        }
1409                    }
1410                    state
1411                        .redirected_host_inspection_topics
1412                        .insert(topic.clone(), current_turn);
1413                }
1414
1415                let path_val = self
1416                    .latest_user_prompt()
1417                    .and_then(|p| {
1418                        // Very basic heuristic for path extraction: look for strings with dots/slashes
1419                        p.split_whitespace()
1420                            .find(|w| w.contains('.') || w.contains('/') || w.contains('\\'))
1421                            .map(|s| {
1422                                s.trim_matches(|c: char| {
1423                                    !c.is_alphanumeric() && c != '.' && c != '/' && c != '\\'
1424                                })
1425                            })
1426                    })
1427                    .unwrap_or("");
1428
1429                let redirect_args = if !path_val.is_empty() {
1430                    serde_json::json!({ "topic": topic, "path": path_val })
1431                } else {
1432                    serde_json::json!({ "topic": topic })
1433                };
1434
1435                let result = crate::tools::host_inspect::inspect_host(&redirect_args).await;
1436                return match result {
1437                    Ok(output) => Err(format!(
1438                        "[successfully auto-redirected shell→inspect_host(topic=\"{topic}\")]\n\n{output}\n\n[Note: Shell is blocked for host inspection. The diagnostic data above fulfills your request. Call inspect_host directly with the correct topic for any further diagnostics.]"
1439                    )),
1440                    Err(e) => Err(format!(
1441                        "Redirection to native tool `{topic}` failed: {e}\n\nAction blocked: use `inspect_host(topic: \"{topic}\")` instead of raw `shell` for host-inspection questions. Available topics: updates, security, pending_reboot, disk_health, battery, recent_crashes, scheduled_tasks, dev_conflicts, health_report, storage, hardware, resource_load, processes, network, services, ports, env_doctor, fix_plan, connectivity, wifi, connections, vpn, proxy, firewall_rules, traceroute, dns_cache, arp, route_table, docker, wsl, ssh, env, hosts_file, installed_software, git_config, databases, disk_benchmark, directory.",
1442                    )),
1443                };
1444            }
1445            let reason = args
1446                .get("reason")
1447                .and_then(|v| v.as_str())
1448                .unwrap_or("")
1449                .trim();
1450            let risk = crate::tools::guard::classify_bash_risk(command);
1451            if !matches!(risk, crate::tools::RiskLevel::Safe) && reason.is_empty() {
1452                return Err(
1453                    "Action blocked: risky `shell` calls require a concrete `reason` argument that explains what is being verified or changed."
1454                        .to_string(),
1455                );
1456            }
1457        }
1458
1459        Ok(())
1460    }
1461
1462    fn build_action_receipt(
1463        &self,
1464        name: &str,
1465        args: &Value,
1466        output: &str,
1467        is_error: bool,
1468    ) -> Option<ChatMessage> {
1469        if is_error || !is_destructive_tool(name) {
1470            return None;
1471        }
1472
1473        let mut receipt = String::from("[ACTION RECEIPT]\n");
1474        receipt.push_str(&format!("- tool: {}\n", name));
1475        if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
1476            receipt.push_str(&format!("- target: {}\n", path));
1477        }
1478        if name == "shell" {
1479            if let Some(command) = args.get("command").and_then(|v| v.as_str()) {
1480                receipt.push_str(&format!("- command: {}\n", command));
1481            }
1482            if let Some(reason) = args.get("reason").and_then(|v| v.as_str()) {
1483                if !reason.trim().is_empty() {
1484                    receipt.push_str(&format!("- reason: {}\n", reason.trim()));
1485                }
1486            }
1487        }
1488        let first_line = output.lines().next().unwrap_or(output).trim();
1489        receipt.push_str(&format!("- outcome: {}\n", first_line));
1490        Some(ChatMessage::system(&receipt))
1491    }
1492
1493    fn replace_mcp_tool_definitions(&mut self, mcp_tools: &[crate::agent::mcp::McpTool]) {
1494        self.tools
1495            .retain(|tool| !tool.function.name.starts_with("mcp__"));
1496        self.tools
1497            .extend(mcp_tools.iter().map(|tool| ToolDefinition {
1498                tool_type: "function".into(),
1499                function: ToolFunction {
1500                    name: tool.name.clone(),
1501                    description: tool.description.clone().unwrap_or_default(),
1502                    parameters: tool.input_schema.clone(),
1503                },
1504                metadata: crate::agent::inference::tool_metadata_for_name(&tool.name),
1505            }));
1506    }
1507
1508    async fn emit_mcp_runtime_status(&self, tx: &mpsc::Sender<InferenceEvent>) {
1509        let summary = {
1510            let mcp = self.mcp_manager.lock().await;
1511            mcp.runtime_report()
1512        };
1513        let _ = tx
1514            .send(InferenceEvent::McpStatus {
1515                state: summary.state,
1516                summary: summary.summary,
1517            })
1518            .await;
1519    }
1520
1521    async fn refresh_mcp_tools(
1522        &mut self,
1523        tx: &mpsc::Sender<InferenceEvent>,
1524    ) -> Result<Vec<crate::agent::mcp::McpTool>, Box<dyn std::error::Error + Send + Sync>> {
1525        let mcp_tools = {
1526            let mut mcp = self.mcp_manager.lock().await;
1527            match mcp.initialize_all().await {
1528                Ok(()) => mcp.discover_tools().await,
1529                Err(e) => {
1530                    drop(mcp);
1531                    self.replace_mcp_tool_definitions(&[]);
1532                    self.emit_mcp_runtime_status(tx).await;
1533                    return Err(e.into());
1534                }
1535            }
1536        };
1537
1538        self.replace_mcp_tool_definitions(&mcp_tools);
1539        self.emit_mcp_runtime_status(tx).await;
1540        Ok(mcp_tools)
1541    }
1542
1543    /// Spawns and initializes all configured MCP servers, discovering their tools.
1544    pub async fn initialize_mcp(
1545        &mut self,
1546        tx: &mpsc::Sender<InferenceEvent>,
1547    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1548        let _ = self.refresh_mcp_tools(tx).await?;
1549        Ok(())
1550    }
1551
1552    /// Run one user turn through the full agentic loop.
1553    ///
1554    /// Adds the user message, calls the model, executes any tools, and loops
1555    /// until the model produces a final text reply.  All progress is streamed
1556    /// as `InferenceEvent` values via `tx`.
1557    pub async fn run_turn(
1558        &mut self,
1559        user_turn: &UserTurn,
1560        tx: mpsc::Sender<InferenceEvent>,
1561        yolo: bool,
1562    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1563        let user_input = user_turn.text.as_str();
1564        // ── Fast-path reset commands: handled locally, no network I/O needed ──
1565        if user_input.trim() == "/new" {
1566            self.history.clear();
1567            self.reasoning_history = None;
1568            self.session_memory.clear();
1569            self.running_summary = None;
1570            self.correction_hints.clear();
1571            self.pinned_files.lock().await.clear();
1572            self.reset_action_grounding().await;
1573            reset_task_files();
1574            let _ = std::fs::remove_file(session_path());
1575            self.save_empty_session();
1576            self.emit_compaction_pressure(&tx).await;
1577            self.emit_prompt_pressure_idle(&tx).await;
1578            for chunk in chunk_text(
1579                "Fresh task context started. Chat history, pins, and task files cleared. Saved memory remains available.",
1580                8,
1581            ) {
1582                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1583            }
1584            let _ = tx.send(InferenceEvent::Done).await;
1585            return Ok(());
1586        }
1587
1588        if user_input.trim() == "/forget" {
1589            self.history.clear();
1590            self.reasoning_history = None;
1591            self.session_memory.clear();
1592            self.running_summary = None;
1593            self.correction_hints.clear();
1594            self.pinned_files.lock().await.clear();
1595            self.reset_action_grounding().await;
1596            reset_task_files();
1597            purge_persistent_memory();
1598            tokio::task::block_in_place(|| self.vein.reset());
1599            let _ = std::fs::remove_file(session_path());
1600            self.save_empty_session();
1601            self.emit_compaction_pressure(&tx).await;
1602            self.emit_prompt_pressure_idle(&tx).await;
1603            for chunk in chunk_text(
1604                "Hard forget complete. Chat history, saved memory, task files, and the Vein index were purged.",
1605                8,
1606            ) {
1607                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1608            }
1609            let _ = tx.send(InferenceEvent::Done).await;
1610            return Ok(());
1611        }
1612
1613        if user_input.trim() == "/vein-inspect" {
1614            let indexed = self.refresh_vein_index();
1615            let report = self.build_vein_inspection_report(indexed);
1616            let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(1));
1617            let _ = tx
1618                .send(InferenceEvent::VeinStatus {
1619                    file_count: snapshot.indexed_source_files + snapshot.indexed_docs,
1620                    embedded_count: snapshot.embedded_source_doc_chunks,
1621                    docs_only: self.vein_docs_only_mode(),
1622                })
1623                .await;
1624            for chunk in chunk_text(&report, 8) {
1625                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1626            }
1627            let _ = tx.send(InferenceEvent::Done).await;
1628            return Ok(());
1629        }
1630
1631        if user_input.trim() == "/workspace-profile" {
1632            let root = crate::tools::file_ops::workspace_root();
1633            let _ = crate::agent::workspace_profile::ensure_workspace_profile(&root);
1634            let report = crate::agent::workspace_profile::profile_report(&root);
1635            for chunk in chunk_text(&report, 8) {
1636                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1637            }
1638            let _ = tx.send(InferenceEvent::Done).await;
1639            return Ok(());
1640        }
1641
1642        if user_input.trim() == "/rules" {
1643            let root = crate::tools::file_ops::workspace_root();
1644            let rules_path = root.join(".hematite").join("rules.md");
1645            let report = if rules_path.exists() {
1646                match std::fs::read_to_string(&rules_path) {
1647                    Ok(content) => format!(
1648                        "## Behavioral Rules (.hematite/rules.md)\n\n{}\n\n---\nTo update: ask Hematite to edit your rules, or open `.hematite/rules.md` directly. Changes take effect on the next turn.",
1649                        content.trim()
1650                    ),
1651                    Err(e) => format!("Error reading .hematite/rules.md: {e}"),
1652                }
1653            } else {
1654                format!(
1655                    "No behavioral rules file found at `.hematite/rules.md`.\n\nCreate it to add custom behavioral guidelines — they are injected into the system prompt on every turn and apply to any model you load.\n\nExample: ask Hematite to \"create a rules.md with simplicity-first and surgical-edit guidelines\" and it will write the file for you.\n\nExpected path: {}",
1656                    rules_path.display()
1657                )
1658            };
1659            for chunk in chunk_text(&report, 8) {
1660                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1661            }
1662            let _ = tx.send(InferenceEvent::Done).await;
1663            return Ok(());
1664        }
1665
1666        if user_input.trim() == "/vein-reset" {
1667            tokio::task::block_in_place(|| self.vein.reset());
1668            let _ = tx
1669                .send(InferenceEvent::VeinStatus {
1670                    file_count: 0,
1671                    embedded_count: 0,
1672                    docs_only: self.vein_docs_only_mode(),
1673                })
1674                .await;
1675            for chunk in chunk_text("Vein index cleared. Will rebuild on the next turn.", 8) {
1676                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1677            }
1678            let _ = tx.send(InferenceEvent::Done).await;
1679            return Ok(());
1680        }
1681
1682        // Reload config every turn (edits apply immediately, no restart needed).
1683        let config = crate::agent::config::load_config();
1684        self.recovery_context.clear();
1685        let manual_runtime_refresh = user_input.trim() == "/runtime-refresh";
1686        if !manual_runtime_refresh {
1687            if let Some((model_id, context_length, changed)) = self
1688                .refresh_runtime_profile_and_report(&tx, "turn_start")
1689                .await
1690            {
1691                if changed {
1692                    let _ = tx
1693                        .send(InferenceEvent::Thought(format!(
1694                            "Runtime refresh: using model `{}` with CTX {} for this turn.",
1695                            model_id, context_length
1696                        )))
1697                        .await;
1698                }
1699            }
1700        }
1701        self.emit_compaction_pressure(&tx).await;
1702        let current_model = self.engine.current_model();
1703        self.engine.set_gemma_native_formatting(
1704            crate::agent::config::effective_gemma_native_formatting(&config, &current_model),
1705        );
1706        let _turn_id = self.begin_grounded_turn().await;
1707        let _hook_runner = crate::agent::hooks::HookRunner::new(config.hooks.clone());
1708        let mcp_tools = match self.refresh_mcp_tools(&tx).await {
1709            Ok(tools) => tools,
1710            Err(e) => {
1711                let _ = tx
1712                    .send(InferenceEvent::Error(format!("MCP refresh failed: {}", e)))
1713                    .await;
1714                Vec::new()
1715            }
1716        };
1717
1718        // Apply config model overrides (config takes precedence over CLI flags).
1719        let effective_fast = config
1720            .fast_model
1721            .clone()
1722            .or_else(|| self.fast_model.clone());
1723        let effective_think = config
1724            .think_model
1725            .clone()
1726            .or_else(|| self.think_model.clone());
1727
1728        // ── /lsp: start language servers manually if needed ──────────────────
1729        if user_input.trim() == "/lsp" {
1730            let mut lsp = self.lsp_manager.lock().await;
1731            match lsp.start_servers().await {
1732                Ok(_) => {
1733                    let _ = tx
1734                        .send(InferenceEvent::MutedToken(
1735                            "LSP: Servers Initialized OK.".to_string(),
1736                        ))
1737                        .await;
1738                }
1739                Err(e) => {
1740                    let _ = tx
1741                        .send(InferenceEvent::Error(format!(
1742                            "LSP: Failed to start servers - {}",
1743                            e
1744                        )))
1745                        .await;
1746                }
1747            }
1748            let _ = tx.send(InferenceEvent::Done).await;
1749            return Ok(());
1750        }
1751
1752        if user_input.trim() == "/runtime-refresh" {
1753            match self
1754                .refresh_runtime_profile_and_report(&tx, "manual_command")
1755                .await
1756            {
1757                Some((model_id, context_length, changed)) => {
1758                    let msg = if changed {
1759                        format!(
1760                            "Runtime profile refreshed. Model: {} | CTX: {}",
1761                            model_id, context_length
1762                        )
1763                    } else {
1764                        format!(
1765                            "Runtime profile unchanged. Model: {} | CTX: {}",
1766                            model_id, context_length
1767                        )
1768                    };
1769                    for chunk in chunk_text(&msg, 8) {
1770                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
1771                    }
1772                }
1773                None => {
1774                    let _ = tx
1775                        .send(InferenceEvent::Error(
1776                            "Runtime refresh failed: LM Studio profile could not be read."
1777                                .to_string(),
1778                        ))
1779                        .await;
1780                }
1781            }
1782            let _ = tx.send(InferenceEvent::Done).await;
1783            return Ok(());
1784        }
1785
1786        if user_input.trim() == "/ask" {
1787            self.set_workflow_mode(WorkflowMode::Ask);
1788            for chunk in chunk_text(
1789                "Workflow mode: ASK. Stay read-only, explain, inspect, and answer without making changes.",
1790                8,
1791            ) {
1792                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1793            }
1794            let _ = tx.send(InferenceEvent::Done).await;
1795            return Ok(());
1796        }
1797
1798        if user_input.trim() == "/code" {
1799            self.set_workflow_mode(WorkflowMode::Code);
1800            let mut message =
1801                "Workflow mode: CODE. Make changes when needed, but keep proof-before-action and verification discipline.".to_string();
1802            if let Some(plan) = self.current_plan_summary() {
1803                message.push_str(&format!(" Current plan: {plan}."));
1804            }
1805            for chunk in chunk_text(&message, 8) {
1806                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1807            }
1808            let _ = tx.send(InferenceEvent::Done).await;
1809            return Ok(());
1810        }
1811
1812        if user_input.trim() == "/architect" {
1813            self.set_workflow_mode(WorkflowMode::Architect);
1814            let mut message =
1815                "Workflow mode: ARCHITECT. Plan, inspect, and shape the approach first. Do not mutate code unless the user explicitly asks to implement. When the handoff is ready, use `/implement-plan` or switch to `/code` to execute it.".to_string();
1816            if let Some(plan) = self.current_plan_summary() {
1817                message.push_str(&format!(" Existing plan: {plan}."));
1818            }
1819            for chunk in chunk_text(&message, 8) {
1820                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1821            }
1822            let _ = tx.send(InferenceEvent::Done).await;
1823            return Ok(());
1824        }
1825
1826        if user_input.trim() == "/read-only" {
1827            self.set_workflow_mode(WorkflowMode::ReadOnly);
1828            for chunk in chunk_text(
1829                "Workflow mode: READ-ONLY. Analysis only. Do not modify files, run mutating shell commands, or commit changes.",
1830                8,
1831            ) {
1832                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1833            }
1834            let _ = tx.send(InferenceEvent::Done).await;
1835            return Ok(());
1836        }
1837
1838        if user_input.trim() == "/auto" {
1839            self.set_workflow_mode(WorkflowMode::Auto);
1840            for chunk in chunk_text(
1841                "Workflow mode: AUTO. Hematite will choose the narrowest effective path for the request.",
1842                8,
1843            ) {
1844                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1845            }
1846            let _ = tx.send(InferenceEvent::Done).await;
1847            return Ok(());
1848        }
1849
1850        if user_input.trim() == "/chat" {
1851            self.set_workflow_mode(WorkflowMode::Chat);
1852            let _ = tx.send(InferenceEvent::Done).await;
1853            return Ok(());
1854        }
1855
1856        if user_input.trim() == "/teach" {
1857            self.set_workflow_mode(WorkflowMode::Teach);
1858            for chunk in chunk_text(
1859                "Workflow mode: TEACH. I will inspect your actual machine state first, then walk you through any admin, config, or write task as a grounded, numbered tutorial. I will not execute write operations — I will show you exactly how to do each step yourself.",
1860                8,
1861            ) {
1862                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1863            }
1864            let _ = tx.send(InferenceEvent::Done).await;
1865            return Ok(());
1866        }
1867
1868        if user_input.trim() == "/reroll" {
1869            let soul = crate::ui::hatch::generate_soul_random();
1870            self.snark = soul.snark;
1871            self.chaos = soul.chaos;
1872            self.soul_personality = soul.personality.clone();
1873            // Update the engine's species name so build_chat_system_prompt uses it
1874            // SAFETY: engine is Arc but species is a plain String field we own logically.
1875            // We use Arc::get_mut which only succeeds if this is the only strong ref.
1876            // If it fails (swarm workers hold refs), we fall back to a best-effort clone approach.
1877            let species = soul.species.clone();
1878            if let Some(eng) = Arc::get_mut(&mut self.engine) {
1879                eng.species = species.clone();
1880            }
1881            let shiny_tag = if soul.shiny { " 🌟 SHINY" } else { "" };
1882            let _ = tx
1883                .send(InferenceEvent::SoulReroll {
1884                    species: soul.species.clone(),
1885                    rarity: soul.rarity.label().to_string(),
1886                    shiny: soul.shiny,
1887                    personality: soul.personality.clone(),
1888                })
1889                .await;
1890            for chunk in chunk_text(
1891                &format!(
1892                    "A new companion awakens!\n[{}{}] {} — \"{}\"",
1893                    soul.rarity.label(),
1894                    shiny_tag,
1895                    soul.species,
1896                    soul.personality
1897                ),
1898                8,
1899            ) {
1900                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1901            }
1902            let _ = tx.send(InferenceEvent::Done).await;
1903            return Ok(());
1904        }
1905
1906        if user_input.trim() == "/agent" {
1907            self.set_workflow_mode(WorkflowMode::Auto);
1908            let _ = tx.send(InferenceEvent::Done).await;
1909            return Ok(());
1910        }
1911
1912        let implement_plan_alias = user_input.trim() == "/implement-plan";
1913        if implement_plan_alias
1914            && !self
1915                .session_memory
1916                .current_plan
1917                .as_ref()
1918                .map(|plan| plan.has_signal())
1919                .unwrap_or(false)
1920        {
1921            for chunk in chunk_text(
1922                "No saved architect handoff is active. Run `/architect` first, or switch to `/code` with an explicit implementation request.",
1923                8,
1924            ) {
1925                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1926            }
1927            let _ = tx.send(InferenceEvent::Done).await;
1928            return Ok(());
1929        }
1930
1931        let mut effective_user_input = if implement_plan_alias {
1932            self.set_workflow_mode(WorkflowMode::Code);
1933            implement_current_plan_prompt().to_string()
1934        } else {
1935            user_input.trim().to_string()
1936        };
1937        if let Some((mode, rest)) = parse_inline_workflow_prompt(user_input) {
1938            self.set_workflow_mode(mode);
1939            effective_user_input = rest.to_string();
1940        }
1941        let transcript_user_input = if implement_plan_alias {
1942            transcript_user_turn_text(user_turn, "/implement-plan")
1943        } else {
1944            transcript_user_turn_text(user_turn, &effective_user_input)
1945        };
1946        effective_user_input = apply_turn_attachments(user_turn, &effective_user_input);
1947        let implement_current_plan = self.workflow_mode == WorkflowMode::Code
1948            && is_current_plan_execution_request(&effective_user_input)
1949            && self
1950                .session_memory
1951                .current_plan
1952                .as_ref()
1953                .map(|plan| plan.has_signal())
1954                .unwrap_or(false);
1955        self.plan_execution_active
1956            .store(implement_current_plan, std::sync::atomic::Ordering::SeqCst);
1957        let _plan_execution_guard = PlanExecutionGuard {
1958            flag: self.plan_execution_active.clone(),
1959        };
1960        let intent = classify_query_intent(self.workflow_mode, &effective_user_input);
1961
1962        // ── /think / /no_think: reasoning budget toggle ──────────────────────
1963        if let Some(answer_kind) = intent.direct_answer {
1964            match answer_kind {
1965                DirectAnswerKind::About => {
1966                    let response = build_about_answer();
1967                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1968                        .await;
1969                    return Ok(());
1970                }
1971                DirectAnswerKind::LanguageCapability => {
1972                    let response = build_language_capability_answer();
1973                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1974                        .await;
1975                    return Ok(());
1976                }
1977                DirectAnswerKind::UnsafeWorkflowPressure => {
1978                    let response = build_unsafe_workflow_pressure_answer();
1979                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1980                        .await;
1981                    return Ok(());
1982                }
1983                DirectAnswerKind::SessionMemory => {
1984                    let response = build_session_memory_answer();
1985                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1986                        .await;
1987                    return Ok(());
1988                }
1989                DirectAnswerKind::RecoveryRecipes => {
1990                    let response = build_recovery_recipes_answer();
1991                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1992                        .await;
1993                    return Ok(());
1994                }
1995                DirectAnswerKind::McpLifecycle => {
1996                    let response = build_mcp_lifecycle_answer();
1997                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1998                        .await;
1999                    return Ok(());
2000                }
2001                DirectAnswerKind::AuthorizationPolicy => {
2002                    let response = build_authorization_policy_answer();
2003                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2004                        .await;
2005                    return Ok(());
2006                }
2007                DirectAnswerKind::ToolClasses => {
2008                    let response = build_tool_classes_answer();
2009                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2010                        .await;
2011                    return Ok(());
2012                }
2013                DirectAnswerKind::ToolRegistryOwnership => {
2014                    let response = build_tool_registry_ownership_answer();
2015                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2016                        .await;
2017                    return Ok(());
2018                }
2019                DirectAnswerKind::SessionResetSemantics => {
2020                    let response = build_session_reset_semantics_answer();
2021                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2022                        .await;
2023                    return Ok(());
2024                }
2025                DirectAnswerKind::ProductSurface => {
2026                    let response = build_product_surface_answer();
2027                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2028                        .await;
2029                    return Ok(());
2030                }
2031                DirectAnswerKind::ReasoningSplit => {
2032                    let response = build_reasoning_split_answer();
2033                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2034                        .await;
2035                    return Ok(());
2036                }
2037                DirectAnswerKind::Identity => {
2038                    let response = build_identity_answer();
2039                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2040                        .await;
2041                    return Ok(());
2042                }
2043                DirectAnswerKind::WorkflowModes => {
2044                    let response = build_workflow_modes_answer();
2045                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2046                        .await;
2047                    return Ok(());
2048                }
2049                DirectAnswerKind::GemmaNative => {
2050                    let response = build_gemma_native_answer();
2051                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2052                        .await;
2053                    return Ok(());
2054                }
2055                DirectAnswerKind::GemmaNativeSettings => {
2056                    let response = build_gemma_native_settings_answer();
2057                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2058                        .await;
2059                    return Ok(());
2060                }
2061                DirectAnswerKind::VerifyProfiles => {
2062                    let response = build_verify_profiles_answer();
2063                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2064                        .await;
2065                    return Ok(());
2066                }
2067                DirectAnswerKind::Toolchain => {
2068                    let lower = effective_user_input.to_lowercase();
2069                    let topic = if (lower.contains("voice output") || lower.contains("voice"))
2070                        && (lower.contains("lag")
2071                            || lower.contains("behind visible text")
2072                            || lower.contains("latency"))
2073                    {
2074                        "voice_latency_plan"
2075                    } else {
2076                        "all"
2077                    };
2078                    let response =
2079                        crate::tools::toolchain::describe_toolchain(&serde_json::json!({
2080                            "topic": topic,
2081                            "question": effective_user_input,
2082                        }))
2083                        .await
2084                        .unwrap_or_else(|e| format!("Error: {}", e));
2085                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2086                        .await;
2087                    return Ok(());
2088                }
2089                DirectAnswerKind::ArchitectSessionResetPlan => {
2090                    let plan = build_architect_session_reset_plan();
2091                    let response = plan.to_markdown();
2092                    let _ = crate::tools::plan::save_plan_handoff(&plan);
2093                    self.session_memory.current_plan = Some(plan);
2094                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
2095                        .await;
2096                    return Ok(());
2097                }
2098            }
2099        }
2100
2101        if matches!(
2102            self.workflow_mode,
2103            WorkflowMode::Ask | WorkflowMode::ReadOnly
2104        ) && looks_like_mutation_request(&effective_user_input)
2105        {
2106            let response = build_mode_redirect_answer(self.workflow_mode);
2107            self.history.push(ChatMessage::user(&effective_user_input));
2108            self.history.push(ChatMessage::assistant_text(&response));
2109            self.transcript.log_user(&transcript_user_input);
2110            self.transcript.log_agent(&response);
2111            for chunk in chunk_text(&response, 8) {
2112                if !chunk.is_empty() {
2113                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
2114                }
2115            }
2116            let _ = tx.send(InferenceEvent::Done).await;
2117            self.trim_history(80);
2118            self.refresh_session_memory();
2119            self.save_session();
2120            return Ok(());
2121        }
2122
2123        if user_input.trim() == "/think" {
2124            self.think_mode = Some(true);
2125            for chunk in chunk_text("Think mode: ON — full chain-of-thought enabled.", 8) {
2126                let _ = tx.send(InferenceEvent::Token(chunk)).await;
2127            }
2128            let _ = tx.send(InferenceEvent::Done).await;
2129            return Ok(());
2130        }
2131        if user_input.trim() == "/no_think" {
2132            self.think_mode = Some(false);
2133            for chunk in chunk_text(
2134                "Think mode: OFF — fast mode enabled (no chain-of-thought).",
2135                8,
2136            ) {
2137                let _ = tx.send(InferenceEvent::Token(chunk)).await;
2138            }
2139            let _ = tx.send(InferenceEvent::Done).await;
2140            return Ok(());
2141        }
2142
2143        // ── /pin: add file to active context ────────────────────────────────
2144        if user_input.trim_start().starts_with("/pin ") {
2145            let path = user_input.trim_start()[5..].trim();
2146            match std::fs::read_to_string(path) {
2147                Ok(content) => {
2148                    self.pinned_files
2149                        .lock()
2150                        .await
2151                        .insert(path.to_string(), content);
2152                    let msg = format!(
2153                        "Pinned: {} — this file is now locked in model context.",
2154                        path
2155                    );
2156                    for chunk in chunk_text(&msg, 8) {
2157                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
2158                    }
2159                }
2160                Err(e) => {
2161                    let _ = tx
2162                        .send(InferenceEvent::Error(format!(
2163                            "Failed to pin {}: {}",
2164                            path, e
2165                        )))
2166                        .await;
2167                }
2168            }
2169            let _ = tx.send(InferenceEvent::Done).await;
2170            return Ok(());
2171        }
2172
2173        // ── /unpin: remove file from active context ──────────────────────────
2174        if user_input.trim_start().starts_with("/unpin ") {
2175            let path = user_input.trim_start()[7..].trim();
2176            if self.pinned_files.lock().await.remove(path).is_some() {
2177                let msg = format!("Unpinned: {} — file removed from active context.", path);
2178                for chunk in chunk_text(&msg, 8) {
2179                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
2180                }
2181            } else {
2182                let _ = tx
2183                    .send(InferenceEvent::Error(format!(
2184                        "File {} was not pinned.",
2185                        path
2186                    )))
2187                    .await;
2188            }
2189            let _ = tx.send(InferenceEvent::Done).await;
2190            return Ok(());
2191        }
2192
2193        // ── Normal processing ───────────────────────────────────────────────
2194
2195        // Ensure MCP is initialized and tools are discovered for this turn.
2196        let tiny_context_mode = self.engine.current_context_length() <= 8_192;
2197        let mut base_prompt = self.engine.build_system_prompt(
2198            self.snark,
2199            self.chaos,
2200            self.brief,
2201            self.professional,
2202            &self.tools,
2203            self.reasoning_history.as_deref(),
2204            &mcp_tools,
2205        );
2206        if !tiny_context_mode {
2207            if let Some(hint) = &config.context_hint {
2208                if !hint.trim().is_empty() {
2209                    base_prompt.push_str(&format!(
2210                        "\n\n# Project Context (from .hematite/settings.json)\n{}",
2211                        hint
2212                    ));
2213                }
2214            }
2215            if let Some(profile_block) = crate::agent::workspace_profile::profile_prompt_block(
2216                &crate::tools::file_ops::workspace_root(),
2217            ) {
2218                base_prompt.push_str(&format!("\n\n{}", profile_block));
2219            }
2220            // L1: inject hot-files block if available (persists across sessions via vein.db).
2221            if let Some(ref l1) = self.l1_context {
2222                base_prompt.push_str(&format!("\n\n{}", l1));
2223            }
2224            if let Some(ref repo_map_block) = self.repo_map {
2225                base_prompt.push_str(&format!("\n\n{}", repo_map_block));
2226            }
2227        }
2228        let grounded_trace_mode = intent.grounded_trace_mode
2229            || intent.primary_class == QueryIntentClass::RuntimeDiagnosis;
2230        let capability_mode =
2231            intent.capability_mode || intent.primary_class == QueryIntentClass::Capability;
2232        let toolchain_mode =
2233            intent.toolchain_mode || intent.primary_class == QueryIntentClass::Toolchain;
2234        let host_inspection_mode = intent.host_inspection_mode;
2235        let maintainer_workflow_mode = intent.maintainer_workflow_mode
2236            || preferred_maintainer_workflow(&effective_user_input).is_some();
2237        let workspace_workflow_mode = intent.workspace_workflow_mode
2238            || preferred_workspace_workflow(&effective_user_input).is_some();
2239        let fix_plan_mode =
2240            preferred_host_inspection_topic(&effective_user_input) == Some("fix_plan");
2241        let architecture_overview_mode = intent.architecture_overview_mode;
2242        let capability_needs_repo = intent.capability_needs_repo;
2243        let mut system_msg = build_system_with_corrections(
2244            &base_prompt,
2245            &self.correction_hints,
2246            &self.gpu_state,
2247            &self.git_state,
2248            &config,
2249        );
2250        if tiny_context_mode {
2251            system_msg.push_str(
2252                "\n\n# TINY CONTEXT TURN MODE\n\
2253                 Keep this turn compact. Prefer direct answers or one narrow tool step over broad exploration.\n",
2254            );
2255        }
2256        if !tiny_context_mode && grounded_trace_mode {
2257            system_msg.push_str(
2258                "\n\n# GROUNDED TRACE MODE\n\
2259                 This turn is read-only architecture analysis unless the user explicitly asks otherwise.\n\
2260                 Before answering trace, architecture, or control-flow questions, inspect the repo with real tools.\n\
2261                 Use verified file paths, function names, structs, enums, channels, and event types only.\n\
2262                 Prefer `trace_runtime_flow` for runtime wiring, session reset, startup, or reasoning/specular questions.\n\
2263                 Treat `trace_runtime_flow` output as authoritative over your own memory.\n\
2264                 If `trace_runtime_flow` fully answers the question, preserve its identifiers exactly and do not rename them in a styled rewrite.\n\
2265                 Do not invent names such as synthetic channels or subsystems.\n\
2266                 If a detail is not verified from the code or tool output, say `uncertain`.\n\
2267                For exact flow questions, answer in ordered steps and name the concrete functions and event types involved.\n"
2268            );
2269        }
2270        if !tiny_context_mode && capability_mode {
2271            system_msg.push_str(
2272                "\n\n# CAPABILITY QUESTION MODE\n\
2273                 This is a product or capability question unless the user explicitly asks about repository implementation.\n\
2274                 Answer from stable Hematite capabilities and current runtime state.\n\
2275                 It is correct to mention that Hematite itself is built in Rust when relevant, but do not imply that its project support is limited to Rust.\n\
2276                 Do NOT call repo-inspection tools like `read_file` or LSP lookup tools unless the user explicitly asks about implementation or file ownership.\n\
2277                 Do NOT infer language or project support from unrelated dependencies, crates, or config files.\n\
2278                 Describe language and project support in terms of real mechanisms: reading files, editing code, searching the workspace, running shell commands, build verification, language-aware tooling when available, web research, vision analysis, and optional MCP tools if configured.\n\
2279                 If the user asks about languages, answer at the harness level: Hematite can help across many project languages even though Hematite itself is written in Rust.\n\
2280                 Prefer real programming language examples like Python, JavaScript, TypeScript, Go, C#, or similar over file extensions like `.json` or `.md`.\n\
2281                 For project-building questions, describe cross-project workflows like scaffolding files, shaping structure, implementing features, and running the appropriate local build or test commands for the target stack. Do not overclaim certainty.\n\
2282                 Never mention raw `mcp__*` tool names unless those tools are active this turn and directly relevant.\n\
2283                 Keep the answer short, plain, and ASCII-first.\n"
2284            );
2285        }
2286        if !tiny_context_mode && toolchain_mode {
2287            system_msg.push_str(
2288                "\n\n# TOOLCHAIN DISCIPLINE MODE\n\
2289                 This turn is about Hematite's real built-in tools and how to choose them.\n\
2290                 Prefer `describe_toolchain` before you try to summarize tool capabilities or propose a read-only investigation plan from memory.\n\
2291                 Use only real built-in tool names.\n\
2292                 Do not invent helper tools, MCP tool names, synthetic symbols, or example function names.\n\
2293                 If `describe_toolchain` fully answers the question, preserve its output exactly instead of restyling it.\n\
2294                 Be explicit about which tools are optional or conditional.\n"
2295            );
2296        }
2297        if !tiny_context_mode && host_inspection_mode {
2298            system_msg.push_str(
2299                 "\n\n# HOST INSPECTION MODE\n\
2300                 This turn is about the local machine. Make EXACTLY ONE `inspect_host` call using the best matching topic below, then answer. Do NOT call `summary` first. Do NOT make exploratory shell calls.\n\
2301                 - Drive space / disk usage / free space / storage across drives → `storage`\n\
2302                 - CPU model / RAM size / GPU name / hardware specs / BIOS / motherboard → `hardware`\n\
2303                 - CPU % / RAM % / what is using resources / slow machine → `resource_load`\n\
2304                 - Running processes / task manager / what is using RAM → `processes`\n\
2305                 - Windows services / daemons / service state → `services`\n\
2306                 - Listening ports / open ports / what process owns port N / which processes are listening / what is bound to a port → `ports` (waiting for inbound connections — includes PIDs and process names — do NOT also call `processes`)\n\
2307                 - Active connections / established connections / what is connected right now / outbound sessions / show me connections / network connections → `connections` (live two-way sessions, NOT listening ports)\n\
2308                 - Network adapters / IP / gateway / DNS overview → `network`\n\
2309                 - Internet / online / can I reach the internet → `connectivity`\n\
2310                 - Wi-Fi / wireless / signal strength / SSID → `wifi`\n\
2311                 - VPN tunnel / VPN adapter → `vpn`\n\
2312                 - Security / Defender / antivirus / firewall / UAC → `security`\n\
2313                 - Windows Update / pending updates → `updates`\n\
2314                 - Health report / system status overall → `health_report`\n\
2315                 - PATH entries / raw PATH → `path`\n\
2316                 - Installed developer tools / versions / toolchain → `toolchains`\n\
2317                 - Environment/package-manager conflicts → `env_doctor`\n\
2318                 - Fix a workstation problem (cargo not found, port in use, LM Studio) → `fix_plan`\n\
2319                 - Recent Windows errors / warnings / event log / event viewer / show me errors / what failed recently → `log_check` (do NOT call health_report first)\n\
2320                 - Repo / git / workspace health → `repo_doctor`\n\
2321                 - List a specific directory → `directory` (pass `path` arg)\n\
2322                 - Desktop or Downloads folder → `desktop` or `downloads`\n\
2323                 NEVER use `disk` or `directory` for storage/space questions — use `storage`.\n\
2324                 Only use `shell` if the question truly cannot be answered by any topic above.\n\
2325                 NEVER tell the user to run PowerShell, cmd, or shell commands themselves. If the data is incomplete, say so and tell them to ask a more specific question instead.\n\
2326                 NEVER expose internal tool names or API syntax (like `inspect_host(topic=...)`) in your response. Refer to capabilities in plain English: say 'ask me for a fix plan' not 'run inspect_host(topic=fix_plan)'.\n"
2327              );
2328        }
2329        if !tiny_context_mode && fix_plan_mode {
2330            system_msg.push_str(
2331                "\n\n# FIX PLAN MODE\n\
2332                 This turn is a workstation remediation question, not just a diagnosis question.\n\
2333                 Call `inspect_host` with `topic=fix_plan` first.\n\
2334                 Do not start with `path`, `toolchains`, `env_doctor`, or `ports` unless the user explicitly asks for diagnosis details instead of a fix plan.\n\
2335                 Keep the answer grounded, stepwise, and approval-aware.\n"
2336            );
2337        }
2338        if !tiny_context_mode && maintainer_workflow_mode {
2339            system_msg.push_str(
2340                "\n\n# HEMATITE MAINTAINER WORKFLOW MODE\n\
2341                 This turn asks Hematite to run one of Hematite's own maintainer workflows, not invent an ad hoc shell command.\n\
2342                 Prefer `run_hematite_maintainer_workflow` for existing Hematite workflows such as `clean.ps1`, `scripts/package-windows.ps1`, or `release.ps1`.\n\
2343                 Use workflow `clean` for cleanup, workflow `package_windows` for rebuilding the local portable or installer, and workflow `release` for the normal version bump/tag/push/publish flow.\n\
2344                 Do not treat this as a generic current-workspace script runner. Only fall back to raw `shell` if the user asks for a script or command outside those Hematite maintainer workflows.\n"
2345            );
2346        }
2347        if !tiny_context_mode && workspace_workflow_mode {
2348            system_msg.push_str(
2349                "\n\n# WORKSPACE WORKFLOW MODE\n\
2350                 This turn asks Hematite to run something in the active project workspace, not in Hematite's own source tree.\n\
2351                 Prefer `run_workspace_workflow` for the current project's build, test, lint, fix, package scripts, just/task/make targets, local repo scripts, or an exact workspace command.\n\
2352                 This tool always runs from the locked workspace root.\n\
2353                 If no real project workspace is locked, say so and tell the user to relaunch Hematite in the target project directory.\n\
2354                 Do not use `run_hematite_maintainer_workflow` unless the request is specifically about Hematite's own cleanup, packaging, or release scripts.\n"
2355            );
2356        }
2357
2358        if !tiny_context_mode && architecture_overview_mode {
2359            system_msg.push_str(
2360                "\n\n# ARCHITECTURE OVERVIEW DISCIPLINE MODE\n\
2361                 For broad runtime or architecture walkthroughs, prefer authoritative tools first: `trace_runtime_flow` for control flow.\n\
2362                 Do not call `auto_pin_context` or `list_pinned` in read-only analysis. Avoid broad `read_file` calls unless the user explicitly asks for implementation detail in one named file.\n\
2363                 Preserve grounded tool output rather than restyling it into a larger answer.\n"
2364            );
2365        }
2366
2367        // ── Inject Pinned Files (Context Locking) ───────────────────────────
2368        system_msg.push_str(&format!(
2369            "\n\n# WORKFLOW MODE\nCURRENT WORKFLOW: {}\n",
2370            self.workflow_mode.label()
2371        ));
2372        if tiny_context_mode {
2373            system_msg
2374                .push_str("Use the narrowest safe behavior for this mode. Keep the turn short.\n");
2375        } else {
2376            match self.workflow_mode {
2377                WorkflowMode::Auto => system_msg.push_str(
2378                    "AUTO means choose the narrowest effective path for the request. Answer directly when stable product logic exists. Inspect before editing. Mutate only when the user is clearly asking for implementation.\n",
2379                ),
2380                WorkflowMode::Ask => system_msg.push_str(
2381                    "ASK means analysis only. Stay read-only, inspect the repo, explain findings, and do not make changes unless the user explicitly switches modes.\n",
2382                ),
2383                WorkflowMode::Code => system_msg.push_str(
2384                    "CODE means implementation is allowed when needed. Keep proof-before-action, verification, and edit precision discipline. If an active plan handoff exists in session memory or `.hematite/PLAN.md`, treat it as the implementation brief unless the user explicitly overrides it. For ordinary workspace inspection during implementation, use built-in read/edit tools first and do not reach for `mcp__filesystem__*` unless the user explicitly requires MCP.\n",
2385                ),
2386                WorkflowMode::Architect => system_msg.push_str(
2387                    "ARCHITECT means plan first. Inspect, reason, and produce a concrete implementation approach before editing. Do not mutate code unless the user explicitly asks to implement. When you produce an implementation handoff, use these exact ASCII headings so Hematite can persist the plan: `# Goal`, `# Target Files`, `# Ordered Steps`, `# Verification`, `# Risks`, `# Open Questions`.\n",
2388                ),
2389                WorkflowMode::ReadOnly => system_msg.push_str(
2390                    "READ-ONLY means analysis only. Do not modify files, run mutating shell commands, or commit changes.\n",
2391                ),
2392                WorkflowMode::Teach => system_msg.push_str(
2393                    "TEACH means you are a senior technician giving the user a grounded, numbered walkthrough. \
2394                     MANDATORY PROTOCOL for every admin/config/write task:\n\
2395                     1. Call inspect_host with the most relevant topic(s) FIRST to observe the actual machine state.\n\
2396                     2. Then deliver a numbered step-by-step tutorial that references what you actually observed — exact commands, exact paths, exact values.\n\
2397                     3. End with a verification step the user can run to confirm success.\n\
2398                     4. Do NOT execute write operations yourself. You are the teacher; the user performs the steps.\n\
2399                     5. Treat the user as capable — give precise instructions, not hedged warnings.\n\
2400                     Relevant inspect_host topics for common tasks: hardware (driver installs), security (firewall), ssh (SSH keys), wsl (WSL setup), env (PATH/env vars), services (service config), recent_crashes (troubleshooting), disk_health (storage issues).\n",
2401                ),
2402                WorkflowMode::Chat => {} // replaced by build_chat_system_prompt below
2403            }
2404        }
2405        if !tiny_context_mode && self.workflow_mode == WorkflowMode::Architect {
2406            system_msg.push_str("\n\n# ARCHITECT HANDOFF CONTRACT\n");
2407            system_msg.push_str(architect_handoff_contract());
2408            system_msg.push('\n');
2409        }
2410        if !tiny_context_mode && implement_current_plan {
2411            system_msg.push_str(
2412                "\n\n# CURRENT PLAN EXECUTION CONTRACT\n\
2413                 The user explicitly asked you to implement the current saved plan.\n\
2414                 Do not restate the plan, do not provide preliminary contracts, and do not stop at analysis.\n\
2415                 Use the saved plan as the brief, gather only the minimum built-in file evidence you need, then start editing the target files.\n\
2416                 Every file inspection or edit call must be path-scoped to one of the saved target files.\n\
2417                 If a built-in workspace read tool gives you enough context, your next step should be mutation or a concrete blocking question, not another summary.\n",
2418            );
2419            if let Some(plan) = self.session_memory.current_plan.as_ref() {
2420                if !plan.target_files.is_empty() {
2421                    system_msg.push_str("\n# CURRENT PLAN TARGET FILES\n");
2422                    for path in &plan.target_files {
2423                        system_msg.push_str(&format!("- {}\n", path));
2424                    }
2425                }
2426            }
2427        }
2428        if !tiny_context_mode {
2429            let pinned = self.pinned_files.lock().await;
2430            if !pinned.is_empty() {
2431                system_msg.push_str("\n\n# ACTIVE CONTEXT (PINNED FILES)\n");
2432                system_msg.push_str("The following files are locked in your active memory for high-fidelity reference.\n\n");
2433                for (path, content) in pinned.iter() {
2434                    system_msg.push_str(&format!("## FILE: {}\n```\n{}\n```\n\n", path, content));
2435                }
2436            }
2437        }
2438        if !tiny_context_mode {
2439            self.append_session_handoff(&mut system_msg);
2440        }
2441        // In chat mode, replace the full harness prompt with a clean conversational surface.
2442        // The harness prompt (built above) is discarded — Rusty personality takes over.
2443        let system_msg = if self.workflow_mode.is_chat() {
2444            self.build_chat_system_prompt()
2445        } else {
2446            system_msg
2447        };
2448        if self.history.is_empty() || self.history[0].role != "system" {
2449            self.history.insert(0, ChatMessage::system(&system_msg));
2450        } else {
2451            self.history[0] = ChatMessage::system(&system_msg);
2452        }
2453
2454        // Ensure a clean state for the new turn.
2455        self.cancel_token
2456            .store(false, std::sync::atomic::Ordering::SeqCst);
2457
2458        // [Official Gemma-4 Spec] Purge reasoning history for new user turns.
2459        // History from previous turns must not be fed back into the prompt to prevent duplication.
2460        self.reasoning_history = None;
2461
2462        let is_gemma = crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2463        let user_content = match self.think_mode {
2464            Some(true) => format!("/think\n{}", effective_user_input),
2465            Some(false) => format!("/no_think\n{}", effective_user_input),
2466            // For non-Gemma models (Qwen etc.) default to /think so the model uses
2467            // hybrid thinking — it decides how much reasoning each turn needs.
2468            // Gemma handles reasoning via <|think|> in the system prompt instead.
2469            // Chat mode and quick tool calls skip /think — fast direct answers.
2470            None if !is_gemma
2471                && !self.workflow_mode.is_chat()
2472                && !is_quick_tool_request(&effective_user_input) =>
2473            {
2474                format!("/think\n{}", effective_user_input)
2475            }
2476            None => effective_user_input.clone(),
2477        };
2478        if let Some(image) = user_turn.attached_image.as_ref() {
2479            let image_url =
2480                crate::tools::vision::encode_image_as_data_url(std::path::Path::new(&image.path))
2481                    .map_err(|e| format!("Image attachment failed for {}: {}", image.name, e))?;
2482            self.history
2483                .push(ChatMessage::user_with_image(&user_content, &image_url));
2484        } else {
2485            self.history.push(ChatMessage::user(&user_content));
2486        }
2487        self.transcript.log_user(&transcript_user_input);
2488
2489        // Incremental re-index and Vein context injection. Ordinary chat mode
2490        // still skips repo-snippet noise, but docs-only workspaces and explicit
2491        // session-recall prompts should keep Vein memory available.
2492        let vein_docs_only = self.vein_docs_only_mode();
2493        let allow_vein_context = !self.workflow_mode.is_chat()
2494            || should_use_vein_in_chat(&effective_user_input, vein_docs_only);
2495        let (vein_context, vein_paths) = if allow_vein_context {
2496            self.refresh_vein_index();
2497            let _ = tx
2498                .send(InferenceEvent::VeinStatus {
2499                    file_count: self.vein.file_count(),
2500                    embedded_count: self.vein.embedded_chunk_count(),
2501                    docs_only: vein_docs_only,
2502                })
2503                .await;
2504            match self.build_vein_context(&effective_user_input) {
2505                Some((ctx, paths)) => (Some(ctx), paths),
2506                None => (None, Vec::new()),
2507            }
2508        } else {
2509            (None, Vec::new())
2510        };
2511        if !vein_paths.is_empty() {
2512            let _ = tx
2513                .send(InferenceEvent::VeinContext { paths: vein_paths })
2514                .await;
2515        }
2516
2517        // Route: pick fast vs think model based on the complexity of this request.
2518        let routed_model = route_model(
2519            &effective_user_input,
2520            effective_fast.as_deref(),
2521            effective_think.as_deref(),
2522        )
2523        .map(|s| s.to_string());
2524
2525        let mut loop_intervention: Option<String> = None;
2526
2527        // ── Harness pre-run: multi-topic host inspection ─────────────────────
2528        // When the user asks for 2+ distinct inspect_host topics in one message,
2529        // run them all here and inject the combined results as a loop_intervention
2530        // so the model receives data instead of having to orchestrate tool calls.
2531        // This prevents the model from collapsing multiple topics into a generic
2532        // one, burning the tool loop budget, or retrying via shell.
2533        {
2534            let topics = crate::agent::routing::all_host_inspection_topics(&effective_user_input);
2535            if topics.len() >= 2 {
2536                let _ = tx
2537                    .send(InferenceEvent::Thought(format!(
2538                        "Harness pre-run: {} host inspection topics detected — running all before model turn.",
2539                        topics.len()
2540                    )))
2541                    .await;
2542
2543                let topic_list = topics.join(", ");
2544                let mut combined = format!(
2545                    "## HARNESS PRE-RUN RESULTS\n\
2546                     The harness already ran inspect_host for the following topics: {topic_list}.\n\
2547                     Use the tool results in context to answer. Do NOT repeat these tool calls.\n\n"
2548                );
2549
2550                let mut tool_calls = Vec::new();
2551                let mut tool_msgs = Vec::new();
2552
2553                for topic in &topics {
2554                    let call_id = format!("prerun_{topic}");
2555                    let args_val = serde_json::json!({ "topic": *topic, "max_entries": 20 });
2556                    let args_str = serde_json::to_string(&args_val).unwrap_or_default();
2557
2558                    tool_calls.push(crate::agent::inference::ToolCallResponse {
2559                        id: call_id.clone(),
2560                        call_type: "function".to_string(),
2561                        function: crate::agent::inference::ToolCallFn {
2562                            name: "inspect_host".to_string(),
2563                            arguments: args_str,
2564                        },
2565                    });
2566
2567                    let label = format!("### inspect_host(topic=\"{topic}\")\n");
2568                    let _ = tx
2569                        .send(InferenceEvent::ToolCallStart {
2570                            id: call_id.clone(),
2571                            name: "inspect_host".to_string(),
2572                            args: format!("inspect host {topic}"),
2573                        })
2574                        .await;
2575
2576                    match crate::tools::host_inspect::inspect_host(&args_val).await {
2577                        Ok(out) => {
2578                            let _ = tx
2579                                .send(InferenceEvent::ToolCallResult {
2580                                    id: call_id.clone(),
2581                                    name: "inspect_host".to_string(),
2582                                    output: out.chars().take(300).collect::<String>() + "...",
2583                                    is_error: false,
2584                                })
2585                                .await;
2586                            combined.push_str(&label);
2587                            combined.push_str(&out);
2588                            combined.push_str("\n\n");
2589                            tool_msgs.push(ChatMessage::tool_result_for_model(
2590                                &call_id,
2591                                "inspect_host",
2592                                &out,
2593                                &self.engine.current_model(),
2594                            ));
2595                        }
2596                        Err(e) => {
2597                            let err_msg = format!("Error: {e}");
2598                            combined.push_str(&label);
2599                            combined.push_str(&err_msg);
2600                            combined.push_str("\n\n");
2601                            tool_msgs.push(ChatMessage::tool_result_for_model(
2602                                &call_id,
2603                                "inspect_host",
2604                                &err_msg,
2605                                &self.engine.current_model(),
2606                            ));
2607                        }
2608                    }
2609                }
2610
2611                // Add the simulated turn to history so the model sees it as context.
2612                self.history
2613                    .push(ChatMessage::assistant_tool_calls("", tool_calls));
2614                for msg in tool_msgs {
2615                    self.history.push(msg);
2616                }
2617
2618                loop_intervention = Some(combined);
2619            }
2620        }
2621
2622        // ── Computation Integrity: nudge model toward run_code for precise math ──
2623        // When the query involves exact numeric computation (hashes, financial math,
2624        // statistics, date arithmetic, unit conversions, algorithmic checks), inject
2625        // a brief pre-turn reminder so the model reaches for run_code instead of
2626        // answering from training-data memory. Only fires when no harness pre-run
2627        // already set a loop_intervention.
2628        if loop_intervention.is_none() && needs_computation_sandbox(&effective_user_input) {
2629            loop_intervention = Some(
2630                "COMPUTATION INTEGRITY NOTICE: This query involves precise numeric computation. \
2631                 Do NOT answer from training-data memory — memory answers for math are guesses. \
2632                 Use `run_code` to compute the real result and return the actual output. \
2633                 IMPORTANT: the `run_code` tool defaults to JavaScript (Deno). \
2634                 If you write Python code, you MUST pass `language: \"python\"` explicitly. \
2635                 If you write JavaScript/TypeScript, omit the language field or pass `language: \"javascript\"`. \
2636                 Write the code, run it, return the result."
2637                    .to_string(),
2638            );
2639        }
2640
2641        let mut implementation_started = false;
2642        let mut non_mutating_plan_steps = 0usize;
2643        let non_mutating_plan_soft_cap = 5usize;
2644        let non_mutating_plan_hard_cap = 8usize;
2645        let mut overview_runtime_trace: Option<String> = None;
2646
2647        // Safety cap – never spin forever on a broken model.
2648        let max_iters = 25;
2649        let mut consecutive_errors = 0;
2650        let mut empty_cleaned_nudges = 0u8;
2651        let mut first_iter = true;
2652        let _called_this_turn: std::collections::HashSet<String> = std::collections::HashSet::new();
2653        // Track identical tool results within this turn to detect logical loops.
2654        let _result_counts: std::collections::HashMap<String, usize> =
2655            std::collections::HashMap::new();
2656        // Track the count of identical (name, args) calls to detect infinite tool loops.
2657        let mut repeat_counts: std::collections::HashMap<String, usize> =
2658            std::collections::HashMap::new();
2659        let mut completed_tool_cache: std::collections::HashMap<String, CachedToolResult> =
2660            std::collections::HashMap::new();
2661        let mut successful_read_targets: std::collections::HashSet<String> =
2662            std::collections::HashSet::new();
2663        // (path, offset) pairs — catches repeated reads at the same non-zero offset.
2664        let mut successful_read_regions: std::collections::HashSet<(String, u64)> =
2665            std::collections::HashSet::new();
2666        let mut successful_grep_targets: std::collections::HashSet<String> =
2667            std::collections::HashSet::new();
2668        let mut no_match_grep_targets: std::collections::HashSet<String> =
2669            std::collections::HashSet::new();
2670        let mut broad_grep_targets: std::collections::HashSet<String> =
2671            std::collections::HashSet::new();
2672
2673        // Track the index of the message that started THIS turn, so compaction doesn't summarize it.
2674        let mut turn_anchor = self.history.len().saturating_sub(1);
2675
2676        for _iter in 0..max_iters {
2677            let mut mutation_occurred = false;
2678            // Priority Check: External Cancellation (via Esc key in TUI)
2679            if self.cancel_token.load(std::sync::atomic::Ordering::SeqCst) {
2680                self.cancel_token
2681                    .store(false, std::sync::atomic::Ordering::SeqCst);
2682                let _ = tx
2683                    .send(InferenceEvent::Thought("Turn cancelled by user.".into()))
2684                    .await;
2685                let _ = tx.send(InferenceEvent::Done).await;
2686                return Ok(());
2687            }
2688
2689            // ── Intelligence Surge: Proactive Compaction Check ──────────────────────
2690            if self
2691                .compact_history_if_needed(&tx, Some(turn_anchor))
2692                .await?
2693            {
2694                // After compaction, history is [system, summary, turn_anchor, ...]
2695                // The new turn_anchor is index 2.
2696                turn_anchor = 2;
2697            }
2698
2699            // On the first iteration inject Vein context into the system message.
2700            // Subsequent iterations use the plain slice — tool results are now in
2701            // history so Vein context would be redundant.
2702            let inject_vein = first_iter && !implement_current_plan;
2703            let messages = if implement_current_plan {
2704                first_iter = false;
2705                self.context_window_slice_from(turn_anchor)
2706            } else {
2707                first_iter = false;
2708                self.context_window_slice()
2709            };
2710
2711            // Use the canonical system prompt from history[0] which was built
2712            // by InferenceEngine::build_system_prompt() + build_system_with_corrections()
2713            // and includes GPU state, git context, permissions, and instruction files.
2714            let mut prompt_msgs = if let Some(intervention) = loop_intervention.take() {
2715                // Gemma 4 handles multiple system messages natively.
2716                // Standard models (Qwen, etc.) reject a second system message — merge into history[0].
2717                if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2718                    let mut msgs = vec![self.history[0].clone()];
2719                    msgs.push(ChatMessage::system(&intervention));
2720                    msgs
2721                } else {
2722                    let merged =
2723                        format!("{}\n\n{}", self.history[0].content.as_str(), intervention);
2724                    vec![ChatMessage::system(&merged)]
2725                }
2726            } else {
2727                vec![self.history[0].clone()]
2728            };
2729
2730            // Inject Vein context into the system message on the first iteration.
2731            // Vein results are merged in the same way as loop_intervention so standard
2732            // models (Qwen etc.) only ever see one system message.
2733            if inject_vein {
2734                if let Some(ref ctx) = vein_context.as_ref() {
2735                    if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2736                        prompt_msgs.push(ChatMessage::system(ctx));
2737                    } else {
2738                        let merged = format!("{}\n\n{}", prompt_msgs[0].content.as_str(), ctx);
2739                        prompt_msgs[0] = ChatMessage::system(&merged);
2740                    }
2741                }
2742            }
2743            prompt_msgs.extend(messages);
2744            if let Some(budget_note) =
2745                enforce_prompt_budget(&mut prompt_msgs, self.engine.current_context_length())
2746            {
2747                self.emit_operator_checkpoint(
2748                    &tx,
2749                    OperatorCheckpointState::BudgetReduced,
2750                    budget_note,
2751                )
2752                .await;
2753                let recipe = plan_recovery(
2754                    RecoveryScenario::PromptBudgetPressure,
2755                    &self.recovery_context,
2756                );
2757                self.emit_recovery_recipe_summary(
2758                    &tx,
2759                    recipe.recipe.scenario.label(),
2760                    compact_recovery_plan_summary(&recipe),
2761                )
2762                .await;
2763            }
2764            self.emit_prompt_pressure_for_messages(&tx, &prompt_msgs)
2765                .await;
2766
2767            let (mut text, mut tool_calls, usage, finish_reason) = match self
2768                .engine
2769                .call_with_tools(&prompt_msgs, &self.tools, routed_model.as_deref())
2770                .await
2771            {
2772                Ok(result) => result,
2773                Err(e) => {
2774                    let class = classify_runtime_failure(&e);
2775                    if should_retry_runtime_failure(class) {
2776                        if self.recovery_context.consume_transient_retry() {
2777                            let label = match class {
2778                                RuntimeFailureClass::ProviderDegraded => "provider_degraded",
2779                                _ => "empty_model_response",
2780                            };
2781                            self.transcript.log_system(&format!(
2782                                "Automatic provider recovery triggered: {}",
2783                                e.trim()
2784                            ));
2785                            self.emit_recovery_recipe_summary(
2786                                &tx,
2787                                label,
2788                                compact_runtime_recovery_summary(class),
2789                            )
2790                            .await;
2791                            let _ = tx
2792                                .send(InferenceEvent::ProviderStatus {
2793                                    state: ProviderRuntimeState::Recovering,
2794                                    summary: compact_runtime_recovery_summary(class).into(),
2795                                })
2796                                .await;
2797                            self.emit_operator_checkpoint(
2798                                &tx,
2799                                OperatorCheckpointState::RecoveringProvider,
2800                                compact_runtime_recovery_summary(class),
2801                            )
2802                            .await;
2803                            continue;
2804                        }
2805                    }
2806
2807                    self.emit_runtime_failure(&tx, class, &e).await;
2808                    break;
2809                }
2810            };
2811            self.emit_provider_live(&tx).await;
2812
2813            // Update TUI token counter with actual usage from LM Studio.
2814            if let Some(ref u) = usage {
2815                let _ = tx.send(InferenceEvent::UsageUpdate(u.clone())).await;
2816            }
2817
2818            // Fallback safety net: if native tool markup leaked past the inference-layer
2819            // extractor, recover it here instead of treating it as plain assistant text.
2820            if tool_calls
2821                .as_ref()
2822                .map(|calls| calls.is_empty())
2823                .unwrap_or(true)
2824            {
2825                if let Some(raw_text) = text.as_deref() {
2826                    let native_calls = crate::agent::inference::extract_native_tool_calls(raw_text);
2827                    if !native_calls.is_empty() {
2828                        tool_calls = Some(native_calls);
2829                        let stripped =
2830                            crate::agent::inference::strip_native_tool_call_text(raw_text);
2831                        text = if stripped.trim().is_empty() {
2832                            None
2833                        } else {
2834                            Some(stripped)
2835                        };
2836                    }
2837                }
2838            }
2839
2840            // Treat empty tool_calls arrays (Some(vec![])) the same as None –
2841            // the model returned text only; an empty array causes an infinite loop.
2842            let tool_calls = tool_calls.filter(|c| !c.is_empty());
2843            let near_context_ceiling = usage
2844                .as_ref()
2845                .map(|u| u.prompt_tokens >= (self.engine.current_context_length() * 82 / 100))
2846                .unwrap_or(false);
2847
2848            if let Some(calls) = tool_calls {
2849                let (calls, prune_trace_note) =
2850                    prune_architecture_trace_batch(calls, architecture_overview_mode);
2851                if let Some(note) = prune_trace_note {
2852                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2853                }
2854
2855                let (calls, prune_bloat_note) = prune_read_only_context_bloat_batch(
2856                    calls,
2857                    self.workflow_mode.is_read_only(),
2858                    architecture_overview_mode,
2859                );
2860                if let Some(note) = prune_bloat_note {
2861                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2862                }
2863
2864                let (calls, prune_note) = prune_authoritative_tool_batch(
2865                    calls,
2866                    grounded_trace_mode,
2867                    &effective_user_input,
2868                );
2869                if let Some(note) = prune_note {
2870                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2871                }
2872
2873                let (calls, prune_redir_note) = prune_redirected_shell_batch(calls);
2874                if let Some(note) = prune_redir_note {
2875                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2876                }
2877
2878                let (calls, batch_note) = order_batch_reads_first(calls);
2879                if let Some(note) = batch_note {
2880                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2881                }
2882
2883                if let Some(repeated_path) = calls
2884                    .iter()
2885                    .filter(|c| {
2886                        let parsed = serde_json::from_str::<Value>(
2887                            &crate::agent::inference::normalize_tool_argument_string(
2888                                &c.function.name,
2889                                &c.function.arguments,
2890                            ),
2891                        )
2892                        .ok();
2893                        let offset = parsed
2894                            .as_ref()
2895                            .and_then(|args| args.get("offset").and_then(|v| v.as_u64()))
2896                            .unwrap_or(0);
2897                        // Catch re-reads from the top (original behaviour) AND repeated
2898                        // reads at the exact same non-zero offset (new: catches targeted loops).
2899                        if offset < 200 {
2900                            return true;
2901                        }
2902                        if let Some(path) = parsed
2903                            .as_ref()
2904                            .and_then(|args| args.get("path").and_then(|v| v.as_str()))
2905                        {
2906                            let normalized = normalize_workspace_path(path);
2907                            return successful_read_regions.contains(&(normalized, offset));
2908                        }
2909                        false
2910                    })
2911                    .filter_map(|c| repeated_read_target(&c.function))
2912                    .find(|path| successful_read_targets.contains(path))
2913                {
2914                    loop_intervention = Some(format!(
2915                        "STOP. Already read `{}` this turn. Use `inspect_lines` on the relevant window or a specific `grep_files`, then continue.",
2916                        repeated_path
2917                    ));
2918                    let _ = tx
2919                        .send(InferenceEvent::Thought(
2920                            "Read discipline: preventing repeated full-file reads on the same path."
2921                                .into(),
2922                        ))
2923                        .await;
2924                    continue;
2925                }
2926
2927                if capability_mode
2928                    && !capability_needs_repo
2929                    && calls
2930                        .iter()
2931                        .all(|c| is_capability_probe_tool(&c.function.name))
2932                {
2933                    loop_intervention = Some(
2934                        "STOP. This is a stable capability question. Do not inspect the repository or call tools. \
2935                         Answer directly from verified Hematite capabilities, current runtime state, and the documented product boundary. \
2936                         Do not mention raw `mcp__*` names unless they are active and directly relevant."
2937                            .to_string(),
2938                    );
2939                    let _ = tx
2940                        .send(InferenceEvent::Thought(
2941                            "Capability mode: skipping unnecessary repo-inspection tools and answering directly."
2942                                .into(),
2943                        ))
2944                        .await;
2945                    continue;
2946                }
2947
2948                // VOCAL AGENT: If the model provided reasoning alongside tools,
2949                // stream it to the SPECULAR panel now using the hardened extraction.
2950                let raw_content = text.as_deref().unwrap_or(" ");
2951
2952                if let Some(thought) = crate::agent::inference::extract_think_block(raw_content) {
2953                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
2954                    // Reasoning is silent (hidden in SPECULAR only).
2955                    self.reasoning_history = Some(thought);
2956                }
2957
2958                // [Gemma-4 Protocol] Keep raw content (including thoughts) during tool loops.
2959                // Thoughts are only stripped before the 'final' user turn.
2960                let stored_tool_call_content = if implement_current_plan {
2961                    cap_output(raw_content, 1200)
2962                } else {
2963                    raw_content.to_string()
2964                };
2965                self.history.push(ChatMessage::assistant_tool_calls(
2966                    &stored_tool_call_content,
2967                    calls.clone(),
2968                ));
2969
2970                // ── LAYER 4: Parallel Tool Orchestration (Batching) ────────────────────
2971                let mut results = Vec::new();
2972                let gemma4_model =
2973                    crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2974                let latest_user_prompt = self.latest_user_prompt();
2975                let mut seen_call_keys = std::collections::HashSet::new();
2976                let mut deduped_calls = Vec::new();
2977                for call in calls.clone() {
2978                    let (normalized_name, normalized_args) = normalized_tool_call_for_execution(
2979                        &call.function.name,
2980                        &call.function.arguments,
2981                        gemma4_model,
2982                        latest_user_prompt,
2983                    );
2984                    let key = canonical_tool_call_key(&normalized_name, &normalized_args);
2985                    if seen_call_keys.insert(key) {
2986                        let repeat_guard_exempt = matches!(
2987                            normalized_name.as_str(),
2988                            "verify_build" | "git_commit" | "git_push"
2989                        );
2990                        if !repeat_guard_exempt {
2991                            if let Some(cached) = completed_tool_cache
2992                                .get(&canonical_tool_call_key(&normalized_name, &normalized_args))
2993                            {
2994                                let _ = tx
2995                                    .send(InferenceEvent::Thought(
2996                                        "Cached tool result reused: identical built-in invocation already completed earlier in this turn."
2997                                            .to_string(),
2998                                    ))
2999                                    .await;
3000                                loop_intervention = Some(format!(
3001                                    "STOP. You already called `{}` with identical arguments earlier in this turn and already have that result in conversation history. Do not call it again. Use the existing result to answer or choose a different next step.",
3002                                    cached.tool_name
3003                                ));
3004                                continue;
3005                            }
3006                        }
3007                        deduped_calls.push(call);
3008                    } else {
3009                        let _ = tx
3010                            .send(InferenceEvent::Thought(
3011                                "Duplicate tool call skipped: identical built-in invocation already ran this turn."
3012                                    .to_string(),
3013                            ))
3014                            .await;
3015                    }
3016                }
3017
3018                // Partition tool calls: Parallel Read vs Serial Mutating
3019                let (parallel_calls, serial_calls): (Vec<_>, Vec<_>) = deduped_calls
3020                    .into_iter()
3021                    .partition(|c| is_parallel_safe(&c.function.name));
3022
3023                // 1. Concurrent Execution (ParallelRead)
3024                if !parallel_calls.is_empty() {
3025                    let mut tasks = Vec::new();
3026                    for call in parallel_calls {
3027                        let tx_clone = tx.clone();
3028                        let config_clone = config.clone();
3029                        // Carry the real call ID into the outcome
3030                        let call_with_id = call.clone();
3031                        tasks.push(self.process_tool_call(
3032                            call_with_id.function,
3033                            config_clone,
3034                            yolo,
3035                            tx_clone,
3036                            call_with_id.id,
3037                        ));
3038                    }
3039                    // Wait for all read-only tasks to complete simultaneously.
3040                    results.extend(futures::future::join_all(tasks).await);
3041                }
3042
3043                // 2. Sequential Execution (SerialMutating)
3044                for call in serial_calls {
3045                    results.push(
3046                        self.process_tool_call(
3047                            call.function,
3048                            config.clone(),
3049                            yolo,
3050                            tx.clone(),
3051                            call.id,
3052                        )
3053                        .await,
3054                    );
3055                }
3056
3057                // 3. Collate Messages into History & UI
3058                let mut authoritative_tool_output: Option<String> = None;
3059                let mut blocked_policy_output: Option<String> = None;
3060                let mut recoverable_policy_intervention: Option<String> = None;
3061                let mut recoverable_policy_recipe: Option<RecoveryScenario> = None;
3062                let mut recoverable_policy_checkpoint: Option<(OperatorCheckpointState, String)> =
3063                    None;
3064                for res in results {
3065                    let call_id = res.call_id.clone();
3066                    let tool_name = res.tool_name.clone();
3067                    let final_output = res.output.clone();
3068                    let is_error = res.is_error;
3069                    for msg in res.msg_results {
3070                        self.history.push(msg);
3071                    }
3072
3073                    // Update State for Verification Loop
3074                    if matches!(
3075                        tool_name.as_str(),
3076                        "patch_hunk" | "write_file" | "edit_file" | "multi_search_replace"
3077                    ) {
3078                        mutation_occurred = true;
3079                        implementation_started = true;
3080                        // Heat tracking: bump L1 score for the edited file.
3081                        if !is_error {
3082                            let path = res.args.get("path").and_then(|v| v.as_str()).unwrap_or("");
3083                            if !path.is_empty() {
3084                                self.vein.bump_heat(path);
3085                                self.l1_context = self.vein.l1_context();
3086                            }
3087                            // Refresh repo map so PageRank accounts for the new edit.
3088                            self.refresh_repo_map();
3089                        }
3090                    }
3091
3092                    if tool_name == "verify_build" {
3093                        self.record_session_verification(
3094                            !is_error
3095                                && (final_output.contains("BUILD OK")
3096                                    || final_output.contains("BUILD SUCCESS")
3097                                    || final_output.contains("BUILD OKAY")),
3098                            if is_error {
3099                                "Explicit verify_build failed."
3100                            } else {
3101                                "Explicit verify_build passed."
3102                            },
3103                        );
3104                    }
3105
3106                    // Update Repeat Guard
3107                    let call_key = format!(
3108                        "{}:{}",
3109                        tool_name,
3110                        serde_json::to_string(&res.args).unwrap_or_default()
3111                    );
3112                    let repeat_count = repeat_counts.entry(call_key.clone()).or_insert(0);
3113                    *repeat_count += 1;
3114
3115                    // verify_build is legitimately called multiple times in fix-verify loops.
3116                    let repeat_guard_exempt = matches!(
3117                        tool_name.as_str(),
3118                        "verify_build" | "git_commit" | "git_push"
3119                    );
3120                    if *repeat_count >= 3 && !repeat_guard_exempt {
3121                        loop_intervention = Some(format!(
3122                            "STOP. You have called `{}` with identical arguments {} times and keep getting the same result. \
3123                             Do not call it again. Either answer directly from what you already know, \
3124                             use a different tool or approach, or ask the user for clarification.",
3125                            tool_name, *repeat_count
3126                        ));
3127                        let _ = tx
3128                            .send(InferenceEvent::Thought(format!(
3129                                "Repeat guard: `{}` called {} times with same args — injecting stop intervention.",
3130                                tool_name, *repeat_count
3131                            )))
3132                            .await;
3133                    }
3134
3135                    if is_error {
3136                        consecutive_errors += 1;
3137                    } else {
3138                        consecutive_errors = 0;
3139                    }
3140
3141                    if consecutive_errors >= 3 {
3142                        loop_intervention = Some(
3143                            "CRITICAL: Repeated tool failures detected. You are likely stuck in a loop. \
3144                             STOP all tool calls immediately. Analyze why your previous 3 calls failed \
3145                             (check for hallucinations or invalid arguments) and ask the user for \
3146                             clarification if you cannot proceed.".to_string()
3147                        );
3148                    }
3149
3150                    if consecutive_errors >= 4 {
3151                        self.emit_runtime_failure(
3152                            &tx,
3153                            RuntimeFailureClass::ToolLoop,
3154                            "Hard termination: too many consecutive tool errors.",
3155                        )
3156                        .await;
3157                        return Ok(());
3158                    }
3159
3160                    let _ = tx
3161                        .send(InferenceEvent::ToolCallResult {
3162                            id: call_id.clone(),
3163                            name: tool_name.clone(),
3164                            output: final_output.clone(),
3165                            is_error,
3166                        })
3167                        .await;
3168
3169                    let repeat_guard_exempt = matches!(
3170                        tool_name.as_str(),
3171                        "verify_build" | "git_commit" | "git_push"
3172                    );
3173                    if !repeat_guard_exempt {
3174                        completed_tool_cache.insert(
3175                            canonical_tool_call_key(&tool_name, &res.args),
3176                            CachedToolResult {
3177                                tool_name: tool_name.clone(),
3178                            },
3179                        );
3180                    }
3181
3182                    // Cap output before history
3183                    let compact_ctx = crate::agent::inference::is_compact_context_window_pub(
3184                        self.engine.current_context_length(),
3185                    );
3186                    let capped = if implement_current_plan {
3187                        cap_output(&final_output, 1200)
3188                    } else if compact_ctx
3189                        && (tool_name == "read_file" || tool_name == "inspect_lines")
3190                    {
3191                        // Compact context: cap file reads tightly and add a navigation hint on truncation.
3192                        let limit = 3000usize;
3193                        if final_output.len() > limit {
3194                            let total_lines = final_output.lines().count();
3195                            let mut split_at = limit;
3196                            while !final_output.is_char_boundary(split_at) && split_at > 0 {
3197                                split_at -= 1;
3198                            }
3199                            let scratch = write_output_to_scratch(&final_output, &tool_name)
3200                                .map(|p| format!(" Full file also saved to '{p}'."))
3201                                .unwrap_or_default();
3202                            format!(
3203                                "{}\n... [file truncated — {} total lines. Use `inspect_lines` with start_line near {} to reach the end of the file.{}]",
3204                                &final_output[..split_at],
3205                                total_lines,
3206                                total_lines.saturating_sub(150),
3207                                scratch,
3208                            )
3209                        } else {
3210                            final_output.clone()
3211                        }
3212                    } else {
3213                        cap_output_for_tool(&final_output, 8000, &tool_name)
3214                    };
3215                    self.history.push(ChatMessage::tool_result_for_model(
3216                        &call_id,
3217                        &tool_name,
3218                        &capped,
3219                        &self.engine.current_model(),
3220                    ));
3221
3222                    if architecture_overview_mode && !is_error && tool_name == "trace_runtime_flow"
3223                    {
3224                        overview_runtime_trace =
3225                            Some(summarize_runtime_trace_output(&final_output));
3226                    }
3227
3228                    if !architecture_overview_mode
3229                        && !is_error
3230                        && ((grounded_trace_mode && tool_name == "trace_runtime_flow")
3231                            || (toolchain_mode && tool_name == "describe_toolchain"))
3232                    {
3233                        authoritative_tool_output = Some(final_output.clone());
3234                    }
3235
3236                    if !is_error && tool_name == "read_file" {
3237                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
3238                            let normalized = normalize_workspace_path(path);
3239                            let read_offset =
3240                                res.args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
3241                            successful_read_targets.insert(normalized.clone());
3242                            successful_read_regions.insert((normalized.clone(), read_offset));
3243                        }
3244                    }
3245
3246                    if !is_error && tool_name == "grep_files" {
3247                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
3248                            let normalized = normalize_workspace_path(path);
3249                            if final_output.starts_with("No matches for ") {
3250                                no_match_grep_targets.insert(normalized);
3251                            } else if grep_output_is_high_fanout(&final_output) {
3252                                broad_grep_targets.insert(normalized);
3253                            } else {
3254                                successful_grep_targets.insert(normalized);
3255                            }
3256                        }
3257                    }
3258
3259                    if is_error
3260                        && matches!(tool_name.as_str(), "edit_file" | "multi_search_replace")
3261                        && (final_output.contains("search string not found")
3262                            || final_output.contains("search string is too short")
3263                            || final_output.contains("search string matched"))
3264                    {
3265                        if let Some(target) = action_target_path(&tool_name, &res.args) {
3266                            let guidance = if final_output.contains("matched") {
3267                                format!(
3268                                    "STOP. `{}` on `{}` — search string matched multiple times. Use `inspect_lines` on the exact region to get a unique anchor, then retry.",
3269                                    tool_name, target
3270                                )
3271                            } else {
3272                                format!(
3273                                    "STOP. `{}` on `{}` — search string did not match. Use `inspect_lines` on the target region to get the exact current text (check whitespace and indentation), then retry.",
3274                                    tool_name, target
3275                                )
3276                            };
3277                            loop_intervention = Some(guidance);
3278                            *repeat_count = 0;
3279                        }
3280                    }
3281
3282                    // When guard.rs blocks a shell call with the run_code redirect hint,
3283                    // force the model to recover with run_code instead of giving up.
3284                    if is_error
3285                        && tool_name == "shell"
3286                        && final_output.contains("Use the run_code tool instead")
3287                        && loop_intervention.is_none()
3288                    {
3289                        loop_intervention = Some(
3290                            "STOP. Shell was blocked because this is a computation task. \
3291                             You MUST use `run_code` now — write the code and run it. \
3292                             Do NOT output an error message or give up. \
3293                             Call `run_code` with the appropriate language and code to compute the answer. \
3294                             If writing Python, pass `language: \"python\"`. \
3295                             If writing JavaScript, omit language or pass `language: \"javascript\"`."
3296                                .to_string(),
3297                        );
3298                    }
3299
3300                    // When run_code fails with a Deno parse error, the model likely sent Python
3301                    // code without specifying language: "python". Force a corrective retry.
3302                    if is_error
3303                        && tool_name == "run_code"
3304                        && (final_output.contains("source code could not be parsed")
3305                            || final_output.contains("Expected ';'")
3306                            || final_output.contains("Expected '}'")
3307                            || final_output.contains("is not defined")
3308                                && final_output.contains("deno"))
3309                        && loop_intervention.is_none()
3310                    {
3311                        loop_intervention = Some(
3312                            "STOP. run_code failed with a JavaScript parse error — you likely wrote Python \
3313                             code but forgot to pass `language: \"python\"`. \
3314                             Retry run_code with `language: \"python\"` and the same code. \
3315                             Do NOT fall back to shell. Do NOT give up."
3316                                .to_string(),
3317                        );
3318                    }
3319
3320                    if res.blocked_by_policy
3321                        && is_mcp_workspace_read_tool(&tool_name)
3322                        && recoverable_policy_intervention.is_none()
3323                    {
3324                        recoverable_policy_intervention = Some(
3325                            "STOP. MCP filesystem reads are blocked. Use `read_file` or `inspect_lines` instead.".to_string(),
3326                        );
3327                        recoverable_policy_recipe = Some(RecoveryScenario::McpWorkspaceReadBlocked);
3328                        recoverable_policy_checkpoint = Some((
3329                            OperatorCheckpointState::BlockedPolicy,
3330                            "MCP workspace read blocked; rerouting to built-in file tools."
3331                                .to_string(),
3332                        ));
3333                    } else if res.blocked_by_policy
3334                        && implement_current_plan
3335                        && is_current_plan_irrelevant_tool(&tool_name)
3336                        && recoverable_policy_intervention.is_none()
3337                    {
3338                        recoverable_policy_intervention = Some(format!(
3339                            "STOP. `{}` is not a planned target. Use `inspect_lines` on a planned file, then edit.",
3340                            tool_name
3341                        ));
3342                        recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
3343                        recoverable_policy_checkpoint = Some((
3344                            OperatorCheckpointState::BlockedPolicy,
3345                            format!(
3346                                "Current-plan execution blocked unrelated tool `{}`.",
3347                                tool_name
3348                            ),
3349                        ));
3350                    } else if res.blocked_by_policy
3351                        && implement_current_plan
3352                        && final_output.contains("requires recent file evidence")
3353                        && recoverable_policy_intervention.is_none()
3354                    {
3355                        let target = action_target_path(&tool_name, &res.args)
3356                            .unwrap_or_else(|| "the target file".to_string());
3357                        recoverable_policy_intervention = Some(format!(
3358                            "STOP. Edit blocked — `{target}` has no recent read. Use `inspect_lines` or `read_file` on it first, then retry."
3359                        ));
3360                        recoverable_policy_recipe =
3361                            Some(RecoveryScenario::RecentFileEvidenceMissing);
3362                        recoverable_policy_checkpoint = Some((
3363                            OperatorCheckpointState::BlockedRecentFileEvidence,
3364                            format!("Edit blocked on `{target}`; recent file evidence missing."),
3365                        ));
3366                    } else if res.blocked_by_policy
3367                        && implement_current_plan
3368                        && final_output.contains("requires an exact local line window first")
3369                        && recoverable_policy_intervention.is_none()
3370                    {
3371                        let target = action_target_path(&tool_name, &res.args)
3372                            .unwrap_or_else(|| "the target file".to_string());
3373                        recoverable_policy_intervention = Some(format!(
3374                            "STOP. Edit blocked — `{target}` needs an inspected window. Use `inspect_lines` around the edit region, then retry."
3375                        ));
3376                        recoverable_policy_recipe = Some(RecoveryScenario::ExactLineWindowRequired);
3377                        recoverable_policy_checkpoint = Some((
3378                            OperatorCheckpointState::BlockedExactLineWindow,
3379                            format!("Edit blocked on `{target}`; exact line window required."),
3380                        ));
3381                    } else if res.blocked_by_policy
3382                        && (final_output.contains("Prefer `")
3383                            || final_output.contains("Prefer tool"))
3384                        && recoverable_policy_intervention.is_none()
3385                    {
3386                        recoverable_policy_intervention = Some(final_output.clone());
3387                        recoverable_policy_recipe = Some(RecoveryScenario::PolicyCorrection);
3388                        recoverable_policy_checkpoint = Some((
3389                            OperatorCheckpointState::BlockedPolicy,
3390                            "Action blocked by policy; self-correction triggered using tool recommendation."
3391                                .to_string(),
3392                        ));
3393                    } else if res.blocked_by_policy && blocked_policy_output.is_none() {
3394                        blocked_policy_output = Some(final_output.clone());
3395                    }
3396
3397                    if *repeat_count >= 5 {
3398                        let _ = tx.send(InferenceEvent::Done).await;
3399                        return Ok(());
3400                    }
3401
3402                    if implement_current_plan
3403                        && !implementation_started
3404                        && !is_error
3405                        && is_non_mutating_plan_step_tool(&tool_name)
3406                    {
3407                        non_mutating_plan_steps += 1;
3408                    }
3409                }
3410
3411                if let Some(intervention) = recoverable_policy_intervention {
3412                    if let Some((state, summary)) = recoverable_policy_checkpoint.take() {
3413                        self.emit_operator_checkpoint(&tx, state, summary).await;
3414                    }
3415                    if let Some(scenario) = recoverable_policy_recipe.take() {
3416                        let recipe = plan_recovery(scenario, &self.recovery_context);
3417                        self.emit_recovery_recipe_summary(
3418                            &tx,
3419                            recipe.recipe.scenario.label(),
3420                            compact_recovery_plan_summary(&recipe),
3421                        )
3422                        .await;
3423                    }
3424                    loop_intervention = Some(intervention);
3425                    let _ = tx
3426                        .send(InferenceEvent::Thought(
3427                            "Policy recovery: rerouting blocked MCP filesystem inspection to built-in workspace tools."
3428                                .into(),
3429                        ))
3430                        .await;
3431                    continue;
3432                }
3433
3434                if architecture_overview_mode {
3435                    match overview_runtime_trace.as_deref() {
3436                        Some(runtime_trace) => {
3437                            let response = build_architecture_overview_answer(runtime_trace);
3438                            self.history.push(ChatMessage::assistant_text(&response));
3439                            self.transcript.log_agent(&response);
3440
3441                            for chunk in chunk_text(&response, 8) {
3442                                if !chunk.is_empty() {
3443                                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
3444                                }
3445                            }
3446
3447                            let _ = tx.send(InferenceEvent::Done).await;
3448                            break;
3449                        }
3450                        None => {
3451                            loop_intervention = Some(
3452                                "Good. You now have the grounded repository structure. Next, call `trace_runtime_flow` for the runtime/control-flow half of the architecture overview. Prefer topic `user_turn` for the main execution path, or `runtime_subsystems` if that is more direct. Do not call `read_file`, `auto_pin_context`, or LSP tools here."
3453                                    .to_string(),
3454                            );
3455                            continue;
3456                        }
3457                    }
3458                }
3459
3460                if implement_current_plan
3461                    && !implementation_started
3462                    && non_mutating_plan_steps >= non_mutating_plan_hard_cap
3463                {
3464                    let msg = "Current-plan execution stalled: too many non-mutating inspection steps without a concrete edit. Stay on the saved target files, narrow with `inspect_lines`, and then mutate, or ask one specific blocking question instead of continuing broad exploration.".to_string();
3465                    self.history.push(ChatMessage::assistant_text(&msg));
3466                    self.transcript.log_agent(&msg);
3467
3468                    for chunk in chunk_text(&msg, 8) {
3469                        if !chunk.is_empty() {
3470                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3471                        }
3472                    }
3473
3474                    let _ = tx.send(InferenceEvent::Done).await;
3475                    break;
3476                }
3477
3478                if let Some(blocked_output) = blocked_policy_output {
3479                    self.emit_operator_checkpoint(
3480                        &tx,
3481                        OperatorCheckpointState::BlockedPolicy,
3482                        "A blocked tool path was surfaced directly to the operator.",
3483                    )
3484                    .await;
3485                    self.history
3486                        .push(ChatMessage::assistant_text(&blocked_output));
3487                    self.transcript.log_agent(&blocked_output);
3488
3489                    for chunk in chunk_text(&blocked_output, 8) {
3490                        if !chunk.is_empty() {
3491                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3492                        }
3493                    }
3494
3495                    let _ = tx.send(InferenceEvent::Done).await;
3496                    break;
3497                }
3498
3499                if let Some(tool_output) = authoritative_tool_output {
3500                    self.history.push(ChatMessage::assistant_text(&tool_output));
3501                    self.transcript.log_agent(&tool_output);
3502
3503                    for chunk in chunk_text(&tool_output, 8) {
3504                        if !chunk.is_empty() {
3505                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3506                        }
3507                    }
3508
3509                    let _ = tx.send(InferenceEvent::Done).await;
3510                    break;
3511                }
3512
3513                if implement_current_plan && !implementation_started {
3514                    let base = "STOP analyzing. The current plan already defines the task. Use the built-in file evidence you now have and begin implementing the plan in the target files. Do not output preliminary findings or restate contracts.";
3515                    if non_mutating_plan_steps >= non_mutating_plan_soft_cap {
3516                        loop_intervention = Some(format!(
3517                            "{} You are close to the non-mutation cap. Use `inspect_lines` on one saved target file, then make the edit now.",
3518                            base
3519                        ));
3520                    } else {
3521                        loop_intervention = Some(base.to_string());
3522                    }
3523                } else if self.workflow_mode == WorkflowMode::Architect {
3524                    loop_intervention = Some(
3525                        format!(
3526                            "STOP exploring. You have enough evidence for a plan-first answer.\n{}\nUse the tool results already in history. Do not narrate your process. Do not call more tools unless a missing file path makes the handoff impossible.",
3527                            architect_handoff_contract()
3528                        ),
3529                    );
3530                }
3531
3532                // 4. Auto-Verification Loop (The Perfect Bake)
3533                if mutation_occurred && !yolo {
3534                    let _ = tx
3535                        .send(InferenceEvent::Thought(
3536                            "Self-Verification: Running 'cargo check' to ensure build integrity..."
3537                                .into(),
3538                        ))
3539                        .await;
3540                    let verify_res = self.auto_verify_build().await;
3541                    let verify_ok = verify_res.contains("BUILD SUCCESS");
3542                    self.record_verify_build_result(verify_ok, &verify_res)
3543                        .await;
3544                    self.record_session_verification(
3545                        verify_ok,
3546                        if verify_ok {
3547                            "Automatic build verification passed."
3548                        } else {
3549                            "Automatic build verification failed."
3550                        },
3551                    );
3552                    self.history.push(ChatMessage::system(&format!(
3553                        "\n# SYSTEM VERIFICATION\n{verify_res}"
3554                    )));
3555                    let _ = tx
3556                        .send(InferenceEvent::Thought(
3557                            "Verification turn injected into history.".into(),
3558                        ))
3559                        .await;
3560                }
3561
3562                // Continue loop – the model will respond to the results.
3563                continue;
3564            } else if let Some(response_text) = text {
3565                if finish_reason.as_deref() == Some("length") && near_context_ceiling {
3566                    if intent.direct_answer == Some(DirectAnswerKind::SessionResetSemantics) {
3567                        let cleaned = build_session_reset_semantics_answer();
3568                        self.history.push(ChatMessage::assistant_text(&cleaned));
3569                        self.transcript.log_agent(&cleaned);
3570                        for chunk in chunk_text(&cleaned, 8) {
3571                            if !chunk.is_empty() {
3572                                let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3573                            }
3574                        }
3575                        let _ = tx.send(InferenceEvent::Done).await;
3576                        break;
3577                    }
3578
3579                    let warning = format_runtime_failure(
3580                        RuntimeFailureClass::ContextWindow,
3581                        "Context ceiling reached before the model completed the answer. Hematite trimmed what it could, but this turn still ran out of room. Retry with a narrower inspection step like `grep_files` or `inspect_lines`, or ask for a smaller scoped answer.",
3582                    );
3583                    self.history.push(ChatMessage::assistant_text(&warning));
3584                    self.transcript.log_agent(&warning);
3585                    let _ = tx
3586                        .send(InferenceEvent::Thought(
3587                            "Length recovery: model hit the context ceiling before completing the answer."
3588                                .into(),
3589                        ))
3590                        .await;
3591                    for chunk in chunk_text(&warning, 8) {
3592                        if !chunk.is_empty() {
3593                            let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3594                        }
3595                    }
3596                    let _ = tx.send(InferenceEvent::Done).await;
3597                    break;
3598                }
3599
3600                if response_text.contains("<|tool_call")
3601                    || response_text.contains("[END_TOOL_REQUEST]")
3602                    || response_text.contains("<|tool_response")
3603                    || response_text.contains("<tool_response|>")
3604                {
3605                    loop_intervention = Some(
3606                        "Your previous response leaked raw native tool transcript markup instead of a valid tool invocation or final answer. Retry immediately. If you need a tool, emit a valid tool call only. If you do not need a tool, answer in plain text with no `<|tool_call>`, `<|tool_response>`, or `[END_TOOL_REQUEST]` markup.".to_string(),
3607                    );
3608                    continue;
3609                }
3610
3611                // 1. Process and route the reasoning block to SPECULAR.
3612                if let Some(thought) = crate::agent::inference::extract_think_block(&response_text)
3613                {
3614                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
3615                    // Persist for history audit (stripped from next turn by Volatile Reasoning rule).
3616                    // This will be summarized in the next turn's system prompt.
3617                    self.reasoning_history = Some(thought);
3618                }
3619
3620                // 2. Process and stream the final answer to the chat interface.
3621                let cleaned = crate::agent::inference::strip_think_blocks(&response_text);
3622
3623                if implement_current_plan && !implementation_started {
3624                    loop_intervention = Some(
3625                        "Do not stop at analysis. Implement the current saved plan now using built-in workspace tools and the target files already named in the plan. Only answer without edits if you have a concrete blocking question.".to_string(),
3626                    );
3627                    continue;
3628                }
3629
3630                // [Hardened Interface] Strictly respect the stripper.
3631                // If it's empty after stripping think blocks, the model thought through its
3632                // answer but forgot to emit it (common with Qwen3 models in architect/ask mode).
3633                // Nudge it rather than silently dropping the turn — but cap at 2 retries so a
3634                // model that keeps returning whitespace/empty doesn't spin all 25 iterations.
3635                if cleaned.is_empty() {
3636                    empty_cleaned_nudges += 1;
3637                    if empty_cleaned_nudges == 1 {
3638                        loop_intervention = Some(
3639                            "Your visible response was empty. The tool already returned data. \
3640                             Write your answer now in plain text — no <think> tags, no tool calls. \
3641                             State the key facts in 2-5 sentences and stop."
3642                                .to_string(),
3643                        );
3644                        continue;
3645                    } else if empty_cleaned_nudges == 2 {
3646                        loop_intervention = Some(
3647                            "EMPTY RESPONSE. Do NOT use <think>. Do NOT call tools. \
3648                             Write the answer in plain text right now. \
3649                             Example format: \"Your CPU is X. Your GPU is Y. You have Z GB of RAM.\""
3650                                .to_string(),
3651                        );
3652                        continue;
3653                    }
3654                    // Nudge budget exhausted — surface as a recoverable empty-response failure
3655                    // so the TUI unblocks instead of hanging for the full max_iters budget.
3656                    let class = RuntimeFailureClass::EmptyModelResponse;
3657                    self.emit_runtime_failure(
3658                        &tx,
3659                        class,
3660                        "Model returned empty content after 2 nudge attempts.",
3661                    )
3662                    .await;
3663                    break;
3664                }
3665
3666                let architect_handoff = self.persist_architect_handoff(&cleaned);
3667                self.history.push(ChatMessage::assistant_text(&cleaned));
3668                self.transcript.log_agent(&cleaned);
3669
3670                // Send in smooth chunks for that professional UI feel.
3671                for chunk in chunk_text(&cleaned, 8) {
3672                    if !chunk.is_empty() {
3673                        let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3674                    }
3675                }
3676
3677                if let Some(plan) = architect_handoff.as_ref() {
3678                    let note = architect_handoff_operator_note(plan);
3679                    self.history.push(ChatMessage::system(&note));
3680                    self.transcript.log_system(&note);
3681                    let _ = tx
3682                        .send(InferenceEvent::MutedToken(format!("\n{}", note)))
3683                        .await;
3684                }
3685
3686                let _ = tx.send(InferenceEvent::Done).await;
3687                break;
3688            } else {
3689                let detail = "Model returned an empty response.";
3690                let class = classify_runtime_failure(detail);
3691                if should_retry_runtime_failure(class) {
3692                    if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3693                        if let RecoveryDecision::Attempt(plan) =
3694                            attempt_recovery(scenario, &mut self.recovery_context)
3695                        {
3696                            self.transcript.log_system(
3697                                "Automatic provider recovery triggered: model returned an empty response.",
3698                            );
3699                            self.emit_recovery_recipe_summary(
3700                                &tx,
3701                                plan.recipe.scenario.label(),
3702                                compact_recovery_plan_summary(&plan),
3703                            )
3704                            .await;
3705                            let _ = tx
3706                                .send(InferenceEvent::ProviderStatus {
3707                                    state: ProviderRuntimeState::Recovering,
3708                                    summary: compact_runtime_recovery_summary(class).into(),
3709                                })
3710                                .await;
3711                            self.emit_operator_checkpoint(
3712                                &tx,
3713                                OperatorCheckpointState::RecoveringProvider,
3714                                compact_runtime_recovery_summary(class),
3715                            )
3716                            .await;
3717                            continue;
3718                        }
3719                    }
3720                }
3721
3722                self.emit_runtime_failure(&tx, class, detail).await;
3723                break;
3724            }
3725        }
3726
3727        self.trim_history(80);
3728        self.refresh_session_memory();
3729        // Record the goal and increment the turn counter before persisting.
3730        self.last_goal = Some(user_input.chars().take(300).collect());
3731        self.turn_count = self.turn_count.saturating_add(1);
3732        self.save_session();
3733        self.emit_compaction_pressure(&tx).await;
3734        Ok(())
3735    }
3736
3737    async fn emit_runtime_failure(
3738        &mut self,
3739        tx: &mpsc::Sender<InferenceEvent>,
3740        class: RuntimeFailureClass,
3741        detail: &str,
3742    ) {
3743        if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3744            let decision = preview_recovery_decision(scenario, &self.recovery_context);
3745            self.emit_recovery_recipe_summary(
3746                tx,
3747                scenario.label(),
3748                compact_recovery_decision_summary(&decision),
3749            )
3750            .await;
3751            let needs_refresh = match &decision {
3752                RecoveryDecision::Attempt(plan) => plan
3753                    .recipe
3754                    .steps
3755                    .contains(&RecoveryStep::RefreshRuntimeProfile),
3756                RecoveryDecision::Escalate { recipe, .. } => {
3757                    recipe.steps.contains(&RecoveryStep::RefreshRuntimeProfile)
3758                }
3759            };
3760            if needs_refresh {
3761                if let Some((model_id, context_length, changed)) = self
3762                    .refresh_runtime_profile_and_report(tx, "context_window_failure")
3763                    .await
3764                {
3765                    let note = if changed {
3766                        format!(
3767                            "Runtime refresh after context-window failure: model {} | CTX {}",
3768                            model_id, context_length
3769                        )
3770                    } else {
3771                        format!(
3772                            "Runtime refresh after context-window failure confirms model {} | CTX {}",
3773                            model_id, context_length
3774                        )
3775                    };
3776                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3777                }
3778            }
3779        }
3780        if let Some(state) = provider_state_for_runtime_failure(class) {
3781            let _ = tx
3782                .send(InferenceEvent::ProviderStatus {
3783                    state,
3784                    summary: compact_runtime_failure_summary(class).into(),
3785                })
3786                .await;
3787        }
3788        if let Some(state) = checkpoint_state_for_runtime_failure(class) {
3789            self.emit_operator_checkpoint(tx, state, checkpoint_summary_for_runtime_failure(class))
3790                .await;
3791        }
3792        let formatted = format_runtime_failure(class, detail);
3793        self.history.push(ChatMessage::system(&format!(
3794            "# RUNTIME FAILURE\n{}",
3795            formatted
3796        )));
3797        self.transcript.log_system(&formatted);
3798        let _ = tx.send(InferenceEvent::Error(formatted)).await;
3799        let _ = tx.send(InferenceEvent::Done).await;
3800    }
3801
3802    /// [Task Analyzer] Run 'cargo check' and return a concise summary for the model.
3803    async fn auto_verify_build(&self) -> String {
3804        match crate::tools::verify_build::execute(&serde_json::json!({ "action": "build" })).await {
3805            Ok(out) => {
3806                "BUILD SUCCESS: Your changes are architecturally sound.\n\n".to_string()
3807                    + &cap_output(&out, 2000)
3808            }
3809            Err(e) => format!(
3810                "BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:\n\n{}",
3811                cap_output(&e, 2000)
3812            ),
3813        }
3814    }
3815
3816    /// Triggers an LLM call to summarize old messages if history exceeds the VRAM character limit.
3817    /// Triggers the Deterministic Smart Compaction algorithm to shrink history while preserving context.
3818    /// Triggers the Recursive Context Compactor.
3819    async fn compact_history_if_needed(
3820        &mut self,
3821        tx: &mpsc::Sender<InferenceEvent>,
3822        anchor_index: Option<usize>,
3823    ) -> Result<bool, String> {
3824        let vram_ratio = self.gpu_state.ratio();
3825        let context_length = self.engine.current_context_length();
3826        let config = CompactionConfig::adaptive(context_length, vram_ratio);
3827
3828        if !compaction::should_compact(&self.history, context_length, vram_ratio) {
3829            return Ok(false);
3830        }
3831
3832        let _ = tx
3833            .send(InferenceEvent::Thought(format!(
3834                "Compaction: ctx={}k vram={:.0}% threshold={}k tokens — chaining summary...",
3835                context_length / 1000,
3836                vram_ratio * 100.0,
3837                config.max_estimated_tokens / 1000,
3838            )))
3839            .await;
3840
3841        let result = compaction::compact_history(
3842            &self.history,
3843            self.running_summary.as_deref(),
3844            config,
3845            anchor_index,
3846        );
3847
3848        let removed_message_count = self.history.len().saturating_sub(result.messages.len());
3849        self.history = result.messages;
3850        self.running_summary = result.summary;
3851
3852        // Layer 6: Memory Synthesis (Task Context Persistence)
3853        let previous_memory = self.session_memory.clone();
3854        self.session_memory = compaction::extract_memory(&self.history);
3855        self.session_memory
3856            .inherit_runtime_ledger_from(&previous_memory);
3857        self.session_memory.record_compaction(
3858            removed_message_count,
3859            format!(
3860                "Compacted history around active task '{}' and preserved {} working-set file(s).",
3861                self.session_memory.current_task,
3862                self.session_memory.working_set.len()
3863            ),
3864        );
3865        self.emit_compaction_pressure(tx).await;
3866
3867        // Jinja alignment: preserved slice may start with assistant/tool messages.
3868        // Strip any leading non-user messages so the first non-system message is always user.
3869        let first_non_sys = self
3870            .history
3871            .iter()
3872            .position(|m| m.role != "system")
3873            .unwrap_or(self.history.len());
3874        if first_non_sys < self.history.len() {
3875            if let Some(user_offset) = self.history[first_non_sys..]
3876                .iter()
3877                .position(|m| m.role == "user")
3878            {
3879                if user_offset > 0 {
3880                    self.history
3881                        .drain(first_non_sys..first_non_sys + user_offset);
3882                }
3883            }
3884        }
3885
3886        let _ = tx
3887            .send(InferenceEvent::Thought(format!(
3888                "Memory Synthesis: Extracted context for task: '{}'. Working set: {} files.",
3889                self.session_memory.current_task,
3890                self.session_memory.working_set.len()
3891            )))
3892            .await;
3893        let recipe = plan_recovery(RecoveryScenario::HistoryPressure, &self.recovery_context);
3894        self.emit_recovery_recipe_summary(
3895            tx,
3896            recipe.recipe.scenario.label(),
3897            compact_recovery_plan_summary(&recipe),
3898        )
3899        .await;
3900        self.emit_operator_checkpoint(
3901            tx,
3902            OperatorCheckpointState::HistoryCompacted,
3903            format!(
3904                "History compacted into a recursive summary; active task '{}' with {} working-set file(s) carried forward.",
3905                self.session_memory.current_task,
3906                self.session_memory.working_set.len()
3907            ),
3908        )
3909        .await;
3910
3911        Ok(true)
3912    }
3913
3914    /// Query The Vein for context relevant to the user's message.
3915    /// Runs hybrid BM25 + semantic search (semantic requires embedding model in LM Studio).
3916    /// Returns a formatted system message string, or None if nothing useful found.
3917    fn build_vein_context(&self, query: &str) -> Option<(String, Vec<String>)> {
3918        // Skip trivial / very short inputs.
3919        if query.trim().split_whitespace().count() < 3 {
3920            return None;
3921        }
3922
3923        let results = tokio::task::block_in_place(|| self.vein.search_context(query, 4)).ok()?;
3924        if results.is_empty() {
3925            return None;
3926        }
3927
3928        let semantic_active = self.vein.has_any_embeddings();
3929        let header = if semantic_active {
3930            "# Relevant context from The Vein (hybrid BM25 + semantic retrieval)\n\
3931             Use this to answer without needing extra read_file calls where possible.\n\n"
3932        } else {
3933            "# Relevant context from The Vein (BM25 keyword retrieval)\n\
3934             Use this to answer without needing extra read_file calls where possible.\n\n"
3935        };
3936
3937        let mut ctx = String::from(header);
3938        let mut paths: Vec<String> = Vec::new();
3939
3940        let mut total = 0usize;
3941        const MAX_CTX_CHARS: usize = 1_500;
3942
3943        for r in results {
3944            if total >= MAX_CTX_CHARS {
3945                break;
3946            }
3947            let snippet = if r.content.len() > 500 {
3948                format!("{}...", &r.content[..500])
3949            } else {
3950                r.content.clone()
3951            };
3952            ctx.push_str(&format!("--- {} ---\n{}\n\n", r.path, snippet));
3953            total += snippet.len() + r.path.len() + 10;
3954            if !paths.contains(&r.path) {
3955                paths.push(r.path);
3956            }
3957        }
3958
3959        Some((ctx, paths))
3960    }
3961
3962    /// Returns the conversation history (WITHOUT the system prompt) for the context window.
3963    /// This ensures we don't have redundant system blocks and prevents Jinja crashes.
3964    fn context_window_slice(&self) -> Vec<ChatMessage> {
3965        let mut result = Vec::new();
3966
3967        // Skip index 0 (the raw system message) and any stray system messages in history.
3968        if self.history.len() > 1 {
3969            for m in &self.history[1..] {
3970                if m.role == "system" {
3971                    continue;
3972                }
3973
3974                let mut sanitized = m.clone();
3975                // DEEP SANITIZE: LM Studio Jinja templates for Qwen crash on truly empty content.
3976                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3977                    sanitized.content = MessageContent::Text(" ".into());
3978                }
3979                result.push(sanitized);
3980            }
3981        }
3982
3983        // Jinja Guard: The first message after the system prompt MUST be 'user'.
3984        // If not (e.g. because of compaction), we insert a tiny anchor.
3985        if !result.is_empty() && result[0].role != "user" {
3986            result.insert(0, ChatMessage::user("Continuing previous context..."));
3987        }
3988
3989        result
3990    }
3991
3992    fn context_window_slice_from(&self, start_idx: usize) -> Vec<ChatMessage> {
3993        let mut result = Vec::new();
3994
3995        if self.history.len() > 1 {
3996            let start = start_idx.max(1).min(self.history.len());
3997            for m in &self.history[start..] {
3998                if m.role == "system" {
3999                    continue;
4000                }
4001
4002                let mut sanitized = m.clone();
4003                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
4004                    sanitized.content = MessageContent::Text(" ".into());
4005                }
4006                result.push(sanitized);
4007            }
4008        }
4009
4010        if !result.is_empty() && result[0].role != "user" {
4011            result.insert(0, ChatMessage::user("Continuing current plan execution..."));
4012        }
4013
4014        result
4015    }
4016
4017    /// Drop old turns from the middle of history.
4018    fn trim_history(&mut self, max_messages: usize) {
4019        if self.history.len() <= max_messages {
4020            return;
4021        }
4022        // Always keep [0] (system prompt).
4023        let excess = self.history.len() - max_messages;
4024        self.history.drain(1..=excess);
4025    }
4026
4027    /// P1: Attempt to fix malformed JSON tool arguments by asking the model to re-output them.
4028    async fn repair_tool_args(
4029        &self,
4030        tool_name: &str,
4031        bad_json: &str,
4032        tx: &mpsc::Sender<InferenceEvent>,
4033    ) -> Result<Value, String> {
4034        let _ = tx
4035            .send(InferenceEvent::Thought(format!(
4036                "Attempting to repair malformed JSON for '{}'...",
4037                tool_name
4038            )))
4039            .await;
4040
4041        let prompt = format!(
4042            "The following JSON for tool '{}' is malformed and failed to parse:\n\n```json\n{}\n```\n\nOutput ONLY the corrected JSON string that fixes the syntax error (e.g. missing commas, unescaped quotes). Do NOT include markdown blocks or any other text.",
4043            tool_name, bad_json
4044        );
4045
4046        let messages = vec![
4047            ChatMessage::system("You are a JSON repair tool. Output ONLY pure JSON."),
4048            ChatMessage::user(&prompt),
4049        ];
4050
4051        // Use fast model for speed if available.
4052        let (text, _, _, _) = self
4053            .engine
4054            .call_with_tools(&messages, &[], self.fast_model.as_deref())
4055            .await
4056            .map_err(|e| e.to_string())?;
4057
4058        let cleaned = text
4059            .unwrap_or_default()
4060            .trim()
4061            .trim_start_matches("```json")
4062            .trim_start_matches("```")
4063            .trim_end_matches("```")
4064            .trim()
4065            .to_string();
4066
4067        serde_json::from_str(&cleaned).map_err(|e| format!("Repair failed: {}", e))
4068    }
4069
4070    /// P2: Run a fast validation step after file writes to check for subtle logic errors.
4071    async fn run_critic_check(
4072        &self,
4073        path: &str,
4074        content: &str,
4075        tx: &mpsc::Sender<InferenceEvent>,
4076    ) -> Option<String> {
4077        // Only run for source code files.
4078        let ext = std::path::Path::new(path)
4079            .extension()
4080            .and_then(|e| e.to_str())
4081            .unwrap_or("");
4082        const CRITIC_EXTS: &[&str] = &["rs", "js", "ts", "py", "go", "c", "cpp"];
4083        if !CRITIC_EXTS.contains(&ext) {
4084            return None;
4085        }
4086
4087        let _ = tx
4088            .send(InferenceEvent::Thought(format!(
4089                "CRITIC: Reviewing changes to '{}'...",
4090                path
4091            )))
4092            .await;
4093
4094        let truncated = cap_output(content, 4000);
4095
4096        let prompt = format!(
4097            "You are a Senior Security and Code Quality auditor. Review this file content for '{}' and identify any critical logic errors, security vulnerabilities, or missing error handling. Be extremely concise. If the code looks good, output 'PASS'.\n\n```{}\n{}\n```",
4098            path, ext, truncated
4099        );
4100
4101        let messages = vec![
4102            ChatMessage::system("You are a technical critic. Identify ONLY critical issues. Output 'PASS' if none found."),
4103            ChatMessage::user(&prompt)
4104        ];
4105
4106        let (text, _, _, _) = self
4107            .engine
4108            .call_with_tools(&messages, &[], self.fast_model.as_deref())
4109            .await
4110            .ok()?;
4111
4112        let critique = text?.trim().to_string();
4113        if critique.to_uppercase().contains("PASS") || critique.is_empty() {
4114            None
4115        } else {
4116            Some(critique)
4117        }
4118    }
4119}
4120
4121// ── Tool dispatcher ───────────────────────────────────────────────────────────
4122
4123pub async fn dispatch_tool(name: &str, args: &Value) -> Result<String, String> {
4124    dispatch_builtin_tool(name, args).await
4125}
4126
4127fn normalize_fix_plan_issue_text(text: &str) -> Option<String> {
4128    let trimmed = text.trim();
4129    let stripped = trimmed
4130        .strip_prefix("/think")
4131        .or_else(|| trimmed.strip_prefix("/no_think"))
4132        .map(str::trim)
4133        .unwrap_or(trimmed)
4134        .trim_start_matches('\n')
4135        .trim();
4136    (!stripped.is_empty()).then(|| stripped.to_string())
4137}
4138
4139fn fill_missing_fix_plan_issue(tool_name: &str, args: &mut Value, fallback_issue: Option<&str>) {
4140    if tool_name != "inspect_host" {
4141        return;
4142    }
4143
4144    let Some(topic) = args.get("topic").and_then(|v| v.as_str()) else {
4145        return;
4146    };
4147    if topic != "fix_plan" {
4148        return;
4149    }
4150
4151    let issue_missing = args
4152        .get("issue")
4153        .and_then(|v| v.as_str())
4154        .map(str::trim)
4155        .is_none_or(|value| value.is_empty());
4156    if !issue_missing {
4157        return;
4158    }
4159
4160    let Some(fallback_issue) = fallback_issue.and_then(normalize_fix_plan_issue_text) else {
4161        return;
4162    };
4163
4164    let Value::Object(map) = args else {
4165        return;
4166    };
4167    map.insert(
4168        "issue".to_string(),
4169        Value::String(fallback_issue.to_string()),
4170    );
4171}
4172
4173fn should_rewrite_shell_to_fix_plan(
4174    tool_name: &str,
4175    args: &Value,
4176    latest_user_prompt: Option<&str>,
4177) -> bool {
4178    if tool_name != "shell" {
4179        return false;
4180    }
4181    let Some(prompt) = latest_user_prompt else {
4182        return false;
4183    };
4184    if preferred_host_inspection_topic(prompt) != Some("fix_plan") {
4185        return false;
4186    }
4187    let command = args
4188        .get("command")
4189        .and_then(|value| value.as_str())
4190        .unwrap_or("");
4191    shell_looks_like_structured_host_inspection(command)
4192}
4193
4194fn extract_release_arg(command: &str, flag: &str) -> Option<String> {
4195    let pattern = format!(r#"(?i){}\s+['"]?([^'" \r\n]+)['"]?"#, regex::escape(flag));
4196    let regex = regex::Regex::new(&pattern).ok()?;
4197    let captures = regex.captures(command)?;
4198    captures.get(1).map(|m| m.as_str().to_string())
4199}
4200
4201fn infer_maintainer_workflow_args_from_prompt(prompt: &str) -> Option<Value> {
4202    let workflow = preferred_maintainer_workflow(prompt)?;
4203    let lower = prompt.to_ascii_lowercase();
4204    match workflow {
4205        "clean" => Some(serde_json::json!({
4206            "workflow": "clean",
4207            "deep": lower.contains("deep clean")
4208                || lower.contains("deep cleanup")
4209                || lower.contains("deep"),
4210            "reset": lower.contains("reset"),
4211            "prune_dist": lower.contains("prune dist")
4212                || lower.contains("prune old dist")
4213                || lower.contains("prune old artifacts")
4214                || lower.contains("old dist artifacts")
4215                || lower.contains("old artifacts"),
4216        })),
4217        "package_windows" => Some(serde_json::json!({
4218            "workflow": "package_windows",
4219            "installer": lower.contains("installer") || lower.contains("setup.exe"),
4220            "add_to_path": lower.contains("addtopath")
4221                || lower.contains("add to path")
4222                || lower.contains("update path")
4223                || lower.contains("refresh path"),
4224        })),
4225        "release" => {
4226            let version = regex::Regex::new(r#"(?i)\b(\d+\.\d+\.\d+)\b"#)
4227                .ok()
4228                .and_then(|re| re.captures(prompt))
4229                .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()));
4230            let bump = if lower.contains("patch") {
4231                Some("patch")
4232            } else if lower.contains("minor") {
4233                Some("minor")
4234            } else if lower.contains("major") {
4235                Some("major")
4236            } else {
4237                None
4238            };
4239            let mut args = serde_json::json!({
4240                "workflow": "release",
4241                "push": lower.contains(" push") || lower.starts_with("push ") || lower.contains(" and push"),
4242                "add_to_path": lower.contains("addtopath")
4243                    || lower.contains("add to path")
4244                    || lower.contains("update path"),
4245                "skip_installer": lower.contains("skip installer"),
4246                "publish_crates": lower.contains("publish crates") || lower.contains("crates.io"),
4247                "publish_voice_crate": lower.contains("publish voice crate")
4248                    || lower.contains("publish hematite-kokoros"),
4249            });
4250            if let Some(version) = version {
4251                args["version"] = Value::String(version);
4252            }
4253            if let Some(bump) = bump {
4254                args["bump"] = Value::String(bump.to_string());
4255            }
4256            Some(args)
4257        }
4258        _ => None,
4259    }
4260}
4261
4262fn infer_workspace_workflow_args_from_prompt(prompt: &str) -> Option<Value> {
4263    let workflow = preferred_workspace_workflow(prompt)?;
4264    let lower = prompt.to_ascii_lowercase();
4265    let trimmed = prompt.trim();
4266
4267    if let Some(command) = extract_workspace_command_from_prompt(trimmed) {
4268        return Some(serde_json::json!({
4269            "workflow": "command",
4270            "command": command,
4271        }));
4272    }
4273
4274    if let Some(path) = extract_workspace_script_path_from_prompt(trimmed) {
4275        return Some(serde_json::json!({
4276            "workflow": "script_path",
4277            "path": path,
4278        }));
4279    }
4280
4281    match workflow {
4282        "build" | "test" | "lint" | "fix" => Some(serde_json::json!({
4283            "workflow": workflow,
4284        })),
4285        "script" => {
4286            let package_script = if lower.contains("npm run ") {
4287                extract_word_after(&lower, "npm run ")
4288            } else if lower.contains("pnpm run ") {
4289                extract_word_after(&lower, "pnpm run ")
4290            } else if lower.contains("bun run ") {
4291                extract_word_after(&lower, "bun run ")
4292            } else if lower.contains("yarn ") {
4293                extract_word_after(&lower, "yarn ")
4294            } else {
4295                None
4296            };
4297
4298            if let Some(name) = package_script {
4299                return Some(serde_json::json!({
4300                    "workflow": "package_script",
4301                    "name": name,
4302                }));
4303            }
4304
4305            if let Some(name) = extract_word_after(&lower, "just ") {
4306                return Some(serde_json::json!({
4307                    "workflow": "just",
4308                    "name": name,
4309                }));
4310            }
4311            if let Some(name) = extract_word_after(&lower, "make ") {
4312                return Some(serde_json::json!({
4313                    "workflow": "make",
4314                    "name": name,
4315                }));
4316            }
4317            if let Some(name) = extract_word_after(&lower, "task ") {
4318                return Some(serde_json::json!({
4319                    "workflow": "task",
4320                    "name": name,
4321                }));
4322            }
4323
4324            None
4325        }
4326        _ => None,
4327    }
4328}
4329
4330fn extract_workspace_command_from_prompt(prompt: &str) -> Option<String> {
4331    let lower = prompt.to_ascii_lowercase();
4332    for prefix in [
4333        "cargo ",
4334        "npm ",
4335        "pnpm ",
4336        "yarn ",
4337        "bun ",
4338        "pytest",
4339        "go build",
4340        "go test",
4341        "make ",
4342        "just ",
4343        "task ",
4344        "./gradlew",
4345        ".\\gradlew",
4346    ] {
4347        if let Some(index) = lower.find(prefix) {
4348            return Some(prompt[index..].trim().trim_matches('`').to_string());
4349        }
4350    }
4351    None
4352}
4353
4354fn extract_workspace_script_path_from_prompt(prompt: &str) -> Option<String> {
4355    let normalized = prompt.replace('\\', "/");
4356    for token in normalized.split_whitespace() {
4357        let candidate = token
4358            .trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | '.' | ')' | '('))
4359            .trim_start_matches("./");
4360        if candidate.starts_with("scripts/")
4361            && [".ps1", ".sh", ".py", ".cmd", ".bat", ".js", ".mjs", ".cjs"]
4362                .iter()
4363                .any(|ext| candidate.to_ascii_lowercase().ends_with(ext))
4364        {
4365            return Some(candidate.to_string());
4366        }
4367    }
4368    None
4369}
4370
4371fn extract_word_after(haystack: &str, prefix: &str) -> Option<String> {
4372    let start = haystack.find(prefix)? + prefix.len();
4373    let tail = &haystack[start..];
4374    let word = tail
4375        .split_whitespace()
4376        .next()
4377        .map(str::trim)
4378        .filter(|value| !value.is_empty())?;
4379    Some(
4380        word.trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | '.' | ')' | '('))
4381            .to_string(),
4382    )
4383}
4384
4385fn rewrite_shell_to_maintainer_workflow_args(command: &str) -> Option<Value> {
4386    let lower = command.to_ascii_lowercase();
4387    if lower.contains("clean.ps1") {
4388        return Some(serde_json::json!({
4389            "workflow": "clean",
4390            "deep": lower.contains("-deep"),
4391            "reset": lower.contains("-reset"),
4392            "prune_dist": lower.contains("-prunedist"),
4393        }));
4394    }
4395    if lower.contains("package-windows.ps1") {
4396        return Some(serde_json::json!({
4397            "workflow": "package_windows",
4398            "installer": lower.contains("-installer"),
4399            "add_to_path": lower.contains("-addtopath"),
4400        }));
4401    }
4402    if lower.contains("release.ps1") {
4403        let version = extract_release_arg(command, "-Version");
4404        let bump = extract_release_arg(command, "-Bump");
4405        if version.is_none() && bump.is_none() {
4406            return Some(serde_json::json!({
4407                "workflow": "release"
4408            }));
4409        }
4410        let mut args = serde_json::json!({
4411            "workflow": "release",
4412            "push": lower.contains("-push"),
4413            "add_to_path": lower.contains("-addtopath"),
4414            "skip_installer": lower.contains("-skipinstaller"),
4415            "publish_crates": lower.contains("-publishcrates"),
4416            "publish_voice_crate": lower.contains("-publishvoicecrate"),
4417        });
4418        if let Some(version) = version {
4419            args["version"] = Value::String(version);
4420        }
4421        if let Some(bump) = bump {
4422            args["bump"] = Value::String(bump);
4423        }
4424        return Some(args);
4425    }
4426    None
4427}
4428
4429fn rewrite_shell_to_workspace_workflow_args(command: &str) -> Option<Value> {
4430    let lower = command.to_ascii_lowercase();
4431    if lower.contains("clean.ps1")
4432        || lower.contains("package-windows.ps1")
4433        || lower.contains("release.ps1")
4434    {
4435        return None;
4436    }
4437
4438    if let Some(path) = extract_workspace_script_path_from_prompt(command) {
4439        return Some(serde_json::json!({
4440            "workflow": "script_path",
4441            "path": path,
4442        }));
4443    }
4444
4445    let looks_like_workspace_command = [
4446        "cargo ",
4447        "npm ",
4448        "pnpm ",
4449        "yarn ",
4450        "bun ",
4451        "pytest",
4452        "go build",
4453        "go test",
4454        "make ",
4455        "just ",
4456        "task ",
4457        "./gradlew",
4458        ".\\gradlew",
4459    ]
4460    .iter()
4461    .any(|needle| lower.contains(needle));
4462
4463    if looks_like_workspace_command {
4464        Some(serde_json::json!({
4465            "workflow": "command",
4466            "command": command.trim(),
4467        }))
4468    } else {
4469        None
4470    }
4471}
4472
4473fn rewrite_host_tool_call(
4474    tool_name: &mut String,
4475    args: &mut Value,
4476    latest_user_prompt: Option<&str>,
4477) {
4478    if *tool_name == "shell" {
4479        let command = args
4480            .get("command")
4481            .and_then(|value| value.as_str())
4482            .unwrap_or("");
4483        if let Some(maintainer_workflow_args) = rewrite_shell_to_maintainer_workflow_args(command) {
4484            *tool_name = "run_hematite_maintainer_workflow".to_string();
4485            *args = maintainer_workflow_args;
4486            return;
4487        }
4488        if let Some(workspace_workflow_args) = rewrite_shell_to_workspace_workflow_args(command) {
4489            *tool_name = "run_workspace_workflow".to_string();
4490            *args = workspace_workflow_args;
4491            return;
4492        }
4493    }
4494    if *tool_name != "run_hematite_maintainer_workflow" {
4495        if let Some(prompt_args) =
4496            latest_user_prompt.and_then(infer_maintainer_workflow_args_from_prompt)
4497        {
4498            *tool_name = "run_hematite_maintainer_workflow".to_string();
4499            *args = prompt_args;
4500            return;
4501        }
4502    }
4503    if *tool_name != "run_workspace_workflow" {
4504        if let Some(prompt_args) =
4505            latest_user_prompt.and_then(infer_workspace_workflow_args_from_prompt)
4506        {
4507            *tool_name = "run_workspace_workflow".to_string();
4508            *args = prompt_args;
4509            return;
4510        }
4511    }
4512    if should_rewrite_shell_to_fix_plan(tool_name, args, latest_user_prompt) {
4513        *tool_name = "inspect_host".to_string();
4514        *args = serde_json::json!({
4515            "topic": "fix_plan"
4516        });
4517    }
4518    fill_missing_fix_plan_issue(tool_name, args, latest_user_prompt);
4519}
4520
4521fn canonical_tool_call_key(tool_name: &str, args: &Value) -> String {
4522    format!(
4523        "{}:{}",
4524        tool_name,
4525        serde_json::to_string(args).unwrap_or_default()
4526    )
4527}
4528
4529fn normalized_tool_call_for_execution(
4530    tool_name: &str,
4531    raw_arguments: &str,
4532    gemma4_model: bool,
4533    latest_user_prompt: Option<&str>,
4534) -> (String, Value) {
4535    let normalized_arguments = if gemma4_model {
4536        crate::agent::inference::normalize_tool_argument_string(tool_name, raw_arguments)
4537    } else {
4538        raw_arguments.to_string()
4539    };
4540    let mut normalized_name = tool_name.to_string();
4541    let mut args = serde_json::from_str::<Value>(&normalized_arguments)
4542        .unwrap_or(Value::Object(Default::default()));
4543    rewrite_host_tool_call(&mut normalized_name, &mut args, latest_user_prompt);
4544    (normalized_name, args)
4545}
4546
4547#[cfg(test)]
4548fn normalized_tool_call_key_for_dedupe(
4549    tool_name: &str,
4550    raw_arguments: &str,
4551    gemma4_model: bool,
4552    latest_user_prompt: Option<&str>,
4553) -> String {
4554    let (normalized_name, args) = normalized_tool_call_for_execution(
4555        tool_name,
4556        raw_arguments,
4557        gemma4_model,
4558        latest_user_prompt,
4559    );
4560    canonical_tool_call_key(&normalized_name, &args)
4561}
4562
4563impl ConversationManager {
4564    /// Checks if a tool call is authorized given the current configuration and mode.
4565    fn check_authorization(
4566        &self,
4567        name: &str,
4568        args: &serde_json::Value,
4569        config: &crate::agent::config::HematiteConfig,
4570        yolo_flag: bool,
4571    ) -> crate::agent::permission_enforcer::AuthorizationDecision {
4572        crate::agent::permission_enforcer::authorize_tool_call(name, args, config, yolo_flag)
4573    }
4574
4575    /// Layer 4: Isolated tool execution logic. Does not mutate 'self' to allow parallelism.
4576    async fn process_tool_call(
4577        &self,
4578        mut call: ToolCallFn,
4579        config: crate::agent::config::HematiteConfig,
4580        yolo: bool,
4581        tx: mpsc::Sender<InferenceEvent>,
4582        real_id: String,
4583    ) -> ToolExecutionOutcome {
4584        let mut msg_results = Vec::new();
4585        let gemma4_model =
4586            crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
4587        let normalized_arguments = if gemma4_model {
4588            crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments)
4589        } else {
4590            call.arguments.clone()
4591        };
4592
4593        // 1. Argument Parsing & Repair
4594        let mut args: Value = match serde_json::from_str(&normalized_arguments) {
4595            Ok(v) => v,
4596            Err(_) => {
4597                match self
4598                    .repair_tool_args(&call.name, &normalized_arguments, &tx)
4599                    .await
4600                {
4601                    Ok(v) => v,
4602                    Err(e) => {
4603                        let _ = tx
4604                            .send(InferenceEvent::Thought(format!(
4605                                "JSON Repair failed: {}",
4606                                e
4607                            )))
4608                            .await;
4609                        Value::Object(Default::default())
4610                    }
4611                }
4612            }
4613        };
4614        let last_user_prompt = self
4615            .history
4616            .iter()
4617            .rev()
4618            .find(|message| message.role == "user")
4619            .map(|message| message.content.as_str());
4620        rewrite_host_tool_call(&mut call.name, &mut args, last_user_prompt);
4621
4622        let display = format_tool_display(&call.name, &args);
4623        let precondition_result = self.validate_action_preconditions(&call.name, &args).await;
4624        let auth = self.check_authorization(&call.name, &args, &config, yolo);
4625
4626        // 2. Permission Check
4627        let decision_result = match precondition_result {
4628            Err(e) => Err(e),
4629            Ok(_) => match auth {
4630                crate::agent::permission_enforcer::AuthorizationDecision::Allow { .. } => Ok(()),
4631                crate::agent::permission_enforcer::AuthorizationDecision::Ask {
4632                    reason,
4633                    source: _,
4634                } => {
4635                    let (approve_tx, approve_rx) = tokio::sync::oneshot::channel::<bool>();
4636                    let _ = tx
4637                        .send(InferenceEvent::ApprovalRequired {
4638                            id: real_id.clone(),
4639                            name: call.name.clone(),
4640                            display: format!("{}\nWhy: {}", display, reason),
4641                            diff: None,
4642                            responder: approve_tx,
4643                        })
4644                        .await;
4645
4646                    match approve_rx.await {
4647                        Ok(true) => Ok(()),
4648                        _ => Err("Declined by user".into()),
4649                    }
4650                }
4651                crate::agent::permission_enforcer::AuthorizationDecision::Deny {
4652                    reason, ..
4653                } => Err(reason),
4654            },
4655        };
4656        let blocked_by_policy =
4657            matches!(&decision_result, Err(e) if e.starts_with("Action blocked:"));
4658
4659        // 3. Execution (Local or MCP)
4660        let (output, is_error) = match decision_result {
4661            Err(e) if e.starts_with("[auto-redirected shell→inspect_host") => (e, false),
4662            Err(e) => (format!("Error: {}", e), true),
4663            Ok(_) => {
4664                let _ = tx
4665                    .send(InferenceEvent::ToolCallStart {
4666                        id: real_id.clone(),
4667                        name: call.name.clone(),
4668                        args: display.clone(),
4669                    })
4670                    .await;
4671
4672                let result = if call.name.starts_with("lsp_") {
4673                    let lsp = self.lsp_manager.clone();
4674                    let path = args
4675                        .get("path")
4676                        .and_then(|v| v.as_str())
4677                        .unwrap_or("")
4678                        .to_string();
4679                    let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
4680                    let character =
4681                        args.get("character").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
4682
4683                    match call.name.as_str() {
4684                        "lsp_definitions" => {
4685                            crate::tools::lsp_tools::lsp_definitions(lsp, path, line, character)
4686                                .await
4687                        }
4688                        "lsp_references" => {
4689                            crate::tools::lsp_tools::lsp_references(lsp, path, line, character)
4690                                .await
4691                        }
4692                        "lsp_hover" => {
4693                            crate::tools::lsp_tools::lsp_hover(lsp, path, line, character).await
4694                        }
4695                        "lsp_search_symbol" => {
4696                            let query = args
4697                                .get("query")
4698                                .and_then(|v| v.as_str())
4699                                .unwrap_or_default()
4700                                .to_string();
4701                            crate::tools::lsp_tools::lsp_search_symbol(lsp, query).await
4702                        }
4703                        "lsp_rename_symbol" => {
4704                            let new_name = args
4705                                .get("new_name")
4706                                .and_then(|v| v.as_str())
4707                                .unwrap_or_default()
4708                                .to_string();
4709                            crate::tools::lsp_tools::lsp_rename_symbol(
4710                                lsp, path, line, character, new_name,
4711                            )
4712                            .await
4713                        }
4714                        "lsp_get_diagnostics" => {
4715                            crate::tools::lsp_tools::lsp_get_diagnostics(lsp, path).await
4716                        }
4717                        _ => Err(format!("Unknown LSP tool: {}", call.name)),
4718                    }
4719                } else if call.name == "auto_pin_context" {
4720                    let pts = args.get("paths").and_then(|v| v.as_array());
4721                    let reason = args
4722                        .get("reason")
4723                        .and_then(|v| v.as_str())
4724                        .unwrap_or("uninformed scoping");
4725                    if let Some(arr) = pts {
4726                        let mut pinned = Vec::new();
4727                        {
4728                            let mut guard = self.pinned_files.lock().await;
4729                            const MAX_PINNED_SIZE: u64 = 25 * 1024 * 1024; // 25MB Safety Valve
4730
4731                            for v in arr.iter().take(3) {
4732                                if let Some(p) = v.as_str() {
4733                                    if let Ok(meta) = std::fs::metadata(p) {
4734                                        if meta.len() > MAX_PINNED_SIZE {
4735                                            let _ = tx.send(InferenceEvent::Thought(format!("[GUARD] Skipping {} - size ({} bytes) exceeds VRAM safety limit (25MB).", p, meta.len()))).await;
4736                                            continue;
4737                                        }
4738                                        if let Ok(content) = std::fs::read_to_string(p) {
4739                                            guard.insert(p.to_string(), content);
4740                                            pinned.push(p.to_string());
4741                                        }
4742                                    }
4743                                }
4744                            }
4745                        }
4746                        let msg = format!(
4747                            "Autonomous Scoping: Locked {} in high-fidelity memory. Reason: {}",
4748                            pinned.join(", "),
4749                            reason
4750                        );
4751                        let _ = tx
4752                            .send(InferenceEvent::Thought(format!("[AUTO-PIN] {}", msg)))
4753                            .await;
4754                        Ok(msg)
4755                    } else {
4756                        Err("Missing 'paths' array for auto_pin_context.".to_string())
4757                    }
4758                } else if call.name == "list_pinned" {
4759                    let paths_msg = {
4760                        let pinned = self.pinned_files.lock().await;
4761                        if pinned.is_empty() {
4762                            "No files are currently pinned.".to_string()
4763                        } else {
4764                            let paths: Vec<_> = pinned.keys().cloned().collect();
4765                            format!(
4766                                "Currently pinned files in active memory:\n- {}",
4767                                paths.join("\n- ")
4768                            )
4769                        }
4770                    };
4771                    Ok(paths_msg)
4772                } else if call.name.starts_with("mcp__") {
4773                    let mut mcp = self.mcp_manager.lock().await;
4774                    match mcp.call_tool(&call.name, &args).await {
4775                        Ok(res) => Ok(res),
4776                        Err(e) => Err(e.to_string()),
4777                    }
4778                } else if call.name == "swarm" {
4779                    // ── Swarm Orchestration ──
4780                    let tasks_val = args.get("tasks").cloned().unwrap_or(Value::Array(vec![]));
4781                    let max_workers = args
4782                        .get("max_workers")
4783                        .and_then(|v| v.as_u64())
4784                        .unwrap_or(3) as usize;
4785
4786                    let mut task_objs = Vec::new();
4787                    if let Value::Array(arr) = tasks_val {
4788                        for v in arr {
4789                            let id = v
4790                                .get("id")
4791                                .and_then(|x| x.as_str())
4792                                .unwrap_or("?")
4793                                .to_string();
4794                            let target = v
4795                                .get("target")
4796                                .and_then(|x| x.as_str())
4797                                .unwrap_or("?")
4798                                .to_string();
4799                            let instruction = v
4800                                .get("instruction")
4801                                .and_then(|x| x.as_str())
4802                                .unwrap_or("?")
4803                                .to_string();
4804                            task_objs.push(crate::agent::parser::WorkerTask {
4805                                id,
4806                                target,
4807                                instruction,
4808                            });
4809                        }
4810                    }
4811
4812                    if task_objs.is_empty() {
4813                        Err("No tasks provided for swarm.".to_string())
4814                    } else {
4815                        let (swarm_tx_internal, mut swarm_rx_internal) =
4816                            tokio::sync::mpsc::channel(32);
4817                        let tx_forwarder = tx.clone();
4818
4819                        // Bridge SwarmMessage -> InferenceEvent
4820                        tokio::spawn(async move {
4821                            while let Some(msg) = swarm_rx_internal.recv().await {
4822                                match msg {
4823                                    crate::agent::swarm::SwarmMessage::Progress(id, p) => {
4824                                        let _ = tx_forwarder
4825                                            .send(InferenceEvent::Thought(format!(
4826                                                "Swarm [{}]: {}% complete",
4827                                                id, p
4828                                            )))
4829                                            .await;
4830                                    }
4831                                    crate::agent::swarm::SwarmMessage::ReviewRequest {
4832                                        worker_id,
4833                                        file_path,
4834                                        before: _,
4835                                        after: _,
4836                                        tx,
4837                                    } => {
4838                                        let (approve_tx, approve_rx) =
4839                                            tokio::sync::oneshot::channel::<bool>();
4840                                        let display = format!(
4841                                            "Swarm worker [{}]: Integrated changes into {:?}",
4842                                            worker_id, file_path
4843                                        );
4844                                        let _ = tx_forwarder
4845                                            .send(InferenceEvent::ApprovalRequired {
4846                                                id: format!("swarm_{}", worker_id),
4847                                                name: "swarm_apply".to_string(),
4848                                                display,
4849                                                diff: None,
4850                                                responder: approve_tx,
4851                                            })
4852                                            .await;
4853                                        if let Ok(approved) = approve_rx.await {
4854                                            let response = if approved {
4855                                                crate::agent::swarm::ReviewResponse::Accept
4856                                            } else {
4857                                                crate::agent::swarm::ReviewResponse::Reject
4858                                            };
4859                                            let _ = tx.send(response);
4860                                        }
4861                                    }
4862                                    crate::agent::swarm::SwarmMessage::Done => {}
4863                                }
4864                            }
4865                        });
4866
4867                        let coordinator = self.swarm_coordinator.clone();
4868                        match coordinator
4869                            .dispatch_swarm(task_objs, swarm_tx_internal, max_workers)
4870                            .await
4871                        {
4872                            Ok(_) => Ok(
4873                                "Swarm execution completed. Check files for integration results."
4874                                    .to_string(),
4875                            ),
4876                            Err(e) => Err(format!("Swarm failure: {}", e)),
4877                        }
4878                    }
4879                } else if call.name == "vision_analyze" {
4880                    crate::tools::vision::vision_analyze(&self.engine, &args).await
4881                } else if matches!(
4882                    call.name.as_str(),
4883                    "edit_file" | "patch_hunk" | "multi_search_replace"
4884                ) && !yolo
4885                {
4886                    // ── Diff preview gate ─────────────────────────────────────
4887                    // Compute what the edit would look like before applying it.
4888                    // If we can build a diff, require user Y/N in the TUI.
4889                    let diff_result = match call.name.as_str() {
4890                        "edit_file" => crate::tools::file_ops::compute_edit_file_diff(&args),
4891                        "patch_hunk" => crate::tools::file_ops::compute_patch_hunk_diff(&args),
4892                        _ => crate::tools::file_ops::compute_msr_diff(&args),
4893                    };
4894                    match diff_result {
4895                        Ok(diff_text) => {
4896                            let path_label =
4897                                args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
4898                            let (appr_tx, appr_rx) = tokio::sync::oneshot::channel::<bool>();
4899                            let _ = tx
4900                                .send(InferenceEvent::ApprovalRequired {
4901                                    id: real_id.clone(),
4902                                    name: call.name.clone(),
4903                                    display: format!("Edit preview: {}", path_label),
4904                                    diff: Some(diff_text),
4905                                    responder: appr_tx,
4906                                })
4907                                .await;
4908                            match appr_rx.await {
4909                                Ok(true) => dispatch_tool(&call.name, &args).await,
4910                                _ => Err("Edit declined by user.".into()),
4911                            }
4912                        }
4913                        // Diff computation failed (e.g. search string not found yet) —
4914                        // fall through and let the tool return its own error.
4915                        Err(_) => dispatch_tool(&call.name, &args).await,
4916                    }
4917                } else if call.name == "verify_build" {
4918                    // Stream build output line-by-line to the SPECULAR panel so
4919                    // the operator sees live compiler progress during long builds.
4920                    crate::tools::verify_build::execute_streaming(&args, tx.clone()).await
4921                } else if call.name == "shell" {
4922                    // Stream shell output line-by-line to the SPECULAR panel so
4923                    // the operator sees live progress during long commands.
4924                    crate::tools::shell::execute_streaming(&args, tx.clone()).await
4925                } else {
4926                    dispatch_tool(&call.name, &args).await
4927                };
4928
4929                match result {
4930                    Ok(o) => (o, false),
4931                    Err(e) => (format!("Error: {}", e), true),
4932                }
4933            }
4934        };
4935
4936        // ── Session Economics ────────────────────────────────────────────────
4937        {
4938            if let Ok(mut econ) = self.engine.economics.lock() {
4939                econ.record_tool(&call.name, !is_error);
4940            }
4941        }
4942
4943        if !is_error {
4944            if matches!(call.name.as_str(), "read_file" | "inspect_lines") {
4945                if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
4946                    if call.name == "inspect_lines" {
4947                        self.record_line_inspection(path).await;
4948                    } else {
4949                        self.record_read_observation(path).await;
4950                    }
4951                }
4952            }
4953
4954            if call.name == "verify_build" {
4955                let ok = output.contains("BUILD OK")
4956                    || output.contains("BUILD SUCCESS")
4957                    || output.contains("BUILD OKAY");
4958                self.record_verify_build_result(ok, &output).await;
4959            }
4960
4961            if matches!(
4962                call.name.as_str(),
4963                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4964            ) || is_mcp_mutating_tool(&call.name)
4965            {
4966                self.record_successful_mutation(action_target_path(&call.name, &args).as_deref())
4967                    .await;
4968            }
4969
4970            if let Some(receipt) = self.build_action_receipt(&call.name, &args, &output, is_error) {
4971                msg_results.push(receipt);
4972            }
4973        }
4974
4975        // 4. Critic Check (Specular Tier 2)
4976        // Gated: Only run on code files with substantive content to avoid burning tokens
4977        // on trivial doc/config edits.
4978        if !is_error && (call.name == "edit_file" || call.name == "write_file") {
4979            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
4980            let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
4981            let ext = std::path::Path::new(path)
4982                .extension()
4983                .and_then(|e| e.to_str())
4984                .unwrap_or("");
4985            const SKIP_EXTS: &[&str] = &[
4986                "md",
4987                "toml",
4988                "json",
4989                "txt",
4990                "yml",
4991                "yaml",
4992                "cfg",
4993                "csv",
4994                "lock",
4995                "gitignore",
4996            ];
4997            let line_count = content.lines().count();
4998            if !path.is_empty()
4999                && !content.is_empty()
5000                && !SKIP_EXTS.contains(&ext)
5001                && line_count >= 50
5002            {
5003                if let Some(critique) = self.run_critic_check(path, content, &tx).await {
5004                    msg_results.push(ChatMessage::system(&format!(
5005                        "[CRITIC REVIEW OF {}]\nIssues found:\n\n{}",
5006                        path, critique
5007                    )));
5008                }
5009            }
5010        }
5011
5012        ToolExecutionOutcome {
5013            call_id: real_id,
5014            tool_name: call.name,
5015            args,
5016            output,
5017            is_error,
5018            blocked_by_policy,
5019            msg_results,
5020        }
5021    }
5022}
5023
5024/// The result of an isolated tool execution.
5025/// Used to bridge Parallel/Serial execution back to the main history.
5026struct ToolExecutionOutcome {
5027    call_id: String,
5028    tool_name: String,
5029    args: Value,
5030    output: String,
5031    is_error: bool,
5032    blocked_by_policy: bool,
5033    msg_results: Vec<ChatMessage>,
5034}
5035
5036#[derive(Clone)]
5037struct CachedToolResult {
5038    tool_name: String,
5039}
5040
5041fn is_code_like_path(path: &str) -> bool {
5042    let ext = std::path::Path::new(path)
5043        .extension()
5044        .and_then(|e| e.to_str())
5045        .unwrap_or("")
5046        .to_ascii_lowercase();
5047    matches!(
5048        ext.as_str(),
5049        "rs" | "js"
5050            | "ts"
5051            | "tsx"
5052            | "jsx"
5053            | "py"
5054            | "go"
5055            | "java"
5056            | "c"
5057            | "cpp"
5058            | "cc"
5059            | "h"
5060            | "hpp"
5061            | "cs"
5062            | "swift"
5063            | "kt"
5064            | "kts"
5065            | "rb"
5066            | "php"
5067    )
5068}
5069
5070// ── Display helpers ───────────────────────────────────────────────────────────
5071
5072pub fn format_tool_display(name: &str, args: &Value) -> String {
5073    let get = |key: &str| {
5074        args.get(key)
5075            .and_then(|v| v.as_str())
5076            .unwrap_or("")
5077            .to_string()
5078    };
5079    match name {
5080        "shell" => format!("$ {}", get("command")),
5081
5082        "trace_runtime_flow" => format!("trace runtime {}", get("topic")),
5083        "describe_toolchain" => format!("describe toolchain {}", get("topic")),
5084        "inspect_host" => format!("inspect host {}", get("topic")),
5085        _ => format!("{} {:?}", name, args),
5086    }
5087}
5088
5089// ── Text utilities ────────────────────────────────────────────────────────────
5090
5091pub(crate) fn shell_looks_like_structured_host_inspection(command: &str) -> bool {
5092    let lower = command.to_ascii_lowercase();
5093    [
5094        "$env:path",
5095        "pathvariable",
5096        "pip --version",
5097        "pipx --version",
5098        "winget --version",
5099        "choco",
5100        "scoop",
5101        "get-childitem",
5102        "gci ",
5103        "where.exe",
5104        "where ",
5105        "cargo --version",
5106        "rustc --version",
5107        "git --version",
5108        "node --version",
5109        "npm --version",
5110        "pnpm --version",
5111        "python --version",
5112        "python3 --version",
5113        "deno --version",
5114        "go version",
5115        "dotnet --version",
5116        "uv --version",
5117        "netstat",
5118        "findstr",
5119        "get-nettcpconnection",
5120        "tcpconnection",
5121        "listening",
5122        "ss -",
5123        "ss ",
5124        "lsof",
5125        "tasklist",
5126        "ipconfig",
5127        "get-netipconfiguration",
5128        "get-netadapter",
5129        "route print",
5130        "ifconfig",
5131        "ip addr",
5132        "ip route",
5133        "resolv.conf",
5134        "get-service",
5135        "sc query",
5136        "systemctl",
5137        "service --status-all",
5138        "get-process",
5139        "working set",
5140        "ps -eo",
5141        "ps aux",
5142        "desktop",
5143        "downloads",
5144        "get-netfirewallprofile",
5145        "win32_powerplan",
5146        "win32_operatingsystem",
5147        "win32_processor",
5148        "wmic",
5149        "loadpercentage",
5150        "totalvisiblememory",
5151        "freephysicalmemory",
5152        "get-wmiobject",
5153        "get-ciminstance",
5154        "get-cpu",
5155        "processorname",
5156        "clockspeed",
5157        "top memory",
5158        "top cpu",
5159        "resource usage",
5160        "powercfg",
5161        "uptime",
5162        "lastbootuptime",
5163        // registry reads for OS/version/update/security info — always use inspect_host
5164        "hklm:",
5165        "hkcu:",
5166        "hklm:\\",
5167        "hkcu:\\",
5168        "currentversion",
5169        "productname",
5170        "displayversion",
5171        "get-itemproperty",
5172        "get-itempropertyvalue",
5173        // updates
5174        "get-windowsupdatelog",
5175        "windowsupdatelog",
5176        "microsoft.update.session",
5177        "createupdatesearcher",
5178        "wuauserv",
5179        "usoclient",
5180        "get-hotfix",
5181        "wu_",
5182        // security / defender
5183        "get-mpcomputerstatus",
5184        "get-mppreference",
5185        "get-mpthreat",
5186        "start-mpscan",
5187        "win32_computersecurity",
5188        "softwarelicensingproduct",
5189        "enablelua",
5190        "get-netfirewallrule",
5191        "netfirewallprofile",
5192        "antivirus",
5193        "defenderstatus",
5194        // disk health / smart
5195        "get-physicaldisk",
5196        "get-disk",
5197        "msstoragedriver_failurepredic",
5198        "win32_diskdrive",
5199        "smartstatus",
5200        "diskstatus",
5201        "get-counter",
5202        "intensity",
5203        "benchmark",
5204        "thrash",
5205        "get-item",
5206        "test-path",
5207        // gpo / certs / integrity / domain
5208        "gpresult",
5209        "applied gpo",
5210        "cert:\\",
5211        "cert:",
5212        "component based servicing",
5213        "componentstore",
5214        "get-computerinfo",
5215        "win32_computersystem",
5216        // battery
5217        "win32_battery",
5218        "batterystaticdata",
5219        "batteryfullchargedcapacity",
5220        "batterystatus",
5221        "estimatedchargeremaining",
5222        // crashes / event log (broader)
5223        "get-winevent",
5224        "eventid",
5225        "bugcheck",
5226        "kernelpower",
5227        "win32_ntlogevent",
5228        "filterhashtable",
5229        // scheduled tasks
5230        "get-scheduledtask",
5231        "get-scheduledtaskinfo",
5232        "schtasks",
5233        "taskscheduler",
5234        // general cim/wmi diagnostic queries — always use inspect_host
5235        "get-ciminstance win32",
5236        "get-wmiobject win32",
5237        // network admin — always use inspect_host
5238        "arp -",
5239        "arp -a",
5240        "tracert ",
5241        "traceroute ",
5242        "tracepath ",
5243        "get-dnsclientcache",
5244        "ipconfig /displaydns",
5245        "get-netroute",
5246        "route print",
5247        "ip neigh",
5248        "netsh winhttp show proxy",
5249        "get-itemproperty.*proxy",
5250        "get-netadapter",
5251        "netsh wlan show",
5252        "test-netconnection",
5253        "resolve-dnsname",
5254        "get-netfirewallrule",
5255        // docker / wsl / ssh — always use inspect_host
5256        "docker ps",
5257        "docker info",
5258        "docker images",
5259        "docker container",
5260        "docker compose ls",
5261        "wsl --list",
5262        "wsl -l",
5263        "wsl --status",
5264        "wsl --version",
5265        "ssh -v",
5266        "get-service sshd",
5267        "get-service -name sshd",
5268        "cat ~/.ssh",
5269        "ls ~/.ssh",
5270        "ls -la ~/.ssh",
5271        // env / hosts / git config
5272        "get-childitem env:",
5273        "dir env:",
5274        "printenv",
5275        "[environment]::getenvironmentvariable",
5276        "get-content.*hosts",
5277        "cat /etc/hosts",
5278        "type c:\\windows\\system32\\drivers\\etc\\hosts",
5279        "git config --global --list",
5280        "git config --list",
5281        "git config --global",
5282        // database services
5283        "get-service mysql",
5284        "get-service postgresql",
5285        "get-service mongodb",
5286        "get-service redis",
5287        "get-service mssql",
5288        "get-service mariadb",
5289        "systemctl status postgresql",
5290        "systemctl status mysql",
5291        "systemctl status mongod",
5292        "systemctl status redis",
5293        // installed software
5294        "winget list",
5295        "get-package",
5296        "get-itempropert.*uninstall",
5297        "dpkg --get-selections",
5298        "rpm -qa",
5299        "brew list",
5300        // user accounts
5301        "get-localuser",
5302        "get-localgroupmember",
5303        "net user",
5304        "query user",
5305        "net localgroup administrators",
5306        // audit policy
5307        "auditpol /get",
5308        "auditpol",
5309        // shares
5310        "get-smbshare",
5311        "get-smbserverconfiguration",
5312        "net share",
5313        "net use",
5314        // dns servers
5315        "get-dnsclientserveraddress",
5316        "get-dnsclientdohserveraddress",
5317        "get-dnsclientglobalsetting",
5318    ]
5319    .iter()
5320    .any(|needle| lower.contains(needle))
5321}
5322
5323// Moved strip_think_blocks to inference.rs
5324
5325fn cap_output(text: &str, max_bytes: usize) -> String {
5326    cap_output_for_tool(text, max_bytes, "output")
5327}
5328
5329/// Cap tool output at `max_bytes`. When the output exceeds the cap, write the
5330/// full content to `.hematite/scratch/<tool_name>_<timestamp>.txt` and include
5331/// the path in the truncation notice so the model can read the rest with
5332/// `read_file` instead of losing it entirely.
5333fn cap_output_for_tool(text: &str, max_bytes: usize, tool_name: &str) -> String {
5334    if text.len() <= max_bytes {
5335        return text.to_string();
5336    }
5337
5338    // Write full output to scratch so the model can access it.
5339    let scratch_path = write_output_to_scratch(text, tool_name);
5340
5341    let mut split_at = max_bytes;
5342    while !text.is_char_boundary(split_at) && split_at > 0 {
5343        split_at -= 1;
5344    }
5345
5346    let tail = match &scratch_path {
5347        Some(p) => format!(
5348            "\n... [output truncated — full output ({} bytes, {} lines) saved to '{}' — use read_file to access the rest]",
5349            text.len(),
5350            text.lines().count(),
5351            p
5352        ),
5353        None => format!("\n... [output capped at {}B]", max_bytes),
5354    };
5355
5356    format!("{}{}", &text[..split_at], tail)
5357}
5358
5359/// Write text to `.hematite/scratch/<tool>_<timestamp>.txt`.
5360/// Returns the relative path on success, None if the write fails.
5361fn write_output_to_scratch(text: &str, tool_name: &str) -> Option<String> {
5362    let root = crate::tools::file_ops::workspace_root();
5363    let scratch_dir = root.join(".hematite").join("scratch");
5364    if std::fs::create_dir_all(&scratch_dir).is_err() {
5365        return None;
5366    }
5367    let ts = std::time::SystemTime::now()
5368        .duration_since(std::time::UNIX_EPOCH)
5369        .map(|d| d.as_secs())
5370        .unwrap_or(0);
5371    // Sanitize tool name for use in filename
5372    let safe_name: String = tool_name
5373        .chars()
5374        .map(|c| {
5375            if c.is_alphanumeric() || c == '_' {
5376                c
5377            } else {
5378                '_'
5379            }
5380        })
5381        .collect();
5382    let filename = format!("{}_{}.txt", safe_name, ts);
5383    let abs_path = scratch_dir.join(&filename);
5384    if std::fs::write(&abs_path, text).is_err() {
5385        return None;
5386    }
5387    Some(format!(".hematite/scratch/{}", filename))
5388}
5389
5390#[derive(Default)]
5391struct PromptBudgetStats {
5392    summarized_tool_results: usize,
5393    collapsed_tool_results: usize,
5394    trimmed_chat_messages: usize,
5395    dropped_messages: usize,
5396}
5397
5398fn estimate_prompt_tokens(messages: &[ChatMessage]) -> usize {
5399    crate::agent::inference::estimate_message_batch_tokens(messages)
5400}
5401
5402fn summarize_prompt_blob(text: &str, max_chars: usize) -> String {
5403    let budget = compaction::SummaryCompressionBudget {
5404        max_chars,
5405        max_lines: 3,
5406        max_line_chars: max_chars.clamp(80, 240),
5407    };
5408    let compressed = compaction::compress_summary(text, budget).summary;
5409    if compressed.is_empty() {
5410        String::new()
5411    } else {
5412        compressed
5413    }
5414}
5415
5416fn summarize_tool_message_for_budget(message: &ChatMessage) -> String {
5417    let tool_name = message.name.as_deref().unwrap_or("tool");
5418    let body = summarize_prompt_blob(message.content.as_str(), 320);
5419    format!(
5420        "[Prompt-budget summary of prior `{}` result]\n{}",
5421        tool_name, body
5422    )
5423}
5424
5425fn summarize_chat_message_for_budget(message: &ChatMessage) -> String {
5426    let role = message.role.as_str();
5427    let body = summarize_prompt_blob(message.content.as_str(), 240);
5428    format!(
5429        "[Prompt-budget summary of earlier {} message]\n{}",
5430        role, body
5431    )
5432}
5433
5434fn normalize_prompt_start(messages: &mut Vec<ChatMessage>) {
5435    if messages.len() > 1 && messages[1].role != "user" {
5436        messages.insert(1, ChatMessage::user("Continuing previous context..."));
5437    }
5438}
5439
5440fn enforce_prompt_budget(
5441    prompt_msgs: &mut Vec<ChatMessage>,
5442    context_length: usize,
5443) -> Option<String> {
5444    let target_tokens = ((context_length as f64) * 0.68) as usize;
5445    if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5446        return None;
5447    }
5448
5449    let mut stats = PromptBudgetStats::default();
5450
5451    // 1. Summarize the newest large tool outputs first.
5452    let mut tool_indices: Vec<usize> = prompt_msgs
5453        .iter()
5454        .enumerate()
5455        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
5456        .collect();
5457    for idx in tool_indices.iter().rev().copied() {
5458        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5459            break;
5460        }
5461        let original = prompt_msgs[idx].content.as_str().to_string();
5462        if original.len() > 1200 {
5463            prompt_msgs[idx].content =
5464                MessageContent::Text(summarize_tool_message_for_budget(&prompt_msgs[idx]));
5465            stats.summarized_tool_results += 1;
5466        }
5467    }
5468
5469    // 2. Collapse older tool results aggressively, keeping only the most recent two verbatim/summarized.
5470    tool_indices = prompt_msgs
5471        .iter()
5472        .enumerate()
5473        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
5474        .collect();
5475    if tool_indices.len() > 2 {
5476        for idx in tool_indices
5477            .iter()
5478            .take(tool_indices.len().saturating_sub(2))
5479            .copied()
5480        {
5481            if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5482                break;
5483            }
5484            prompt_msgs[idx].content = MessageContent::Text(
5485                "[Earlier tool output omitted to stay within the prompt budget.]".to_string(),
5486            );
5487            stats.collapsed_tool_results += 1;
5488        }
5489    }
5490
5491    // 3. Trim older long chat messages, but preserve the final user request.
5492    let last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
5493    for idx in 1..prompt_msgs.len() {
5494        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
5495            break;
5496        }
5497        if Some(idx) == last_user_idx {
5498            continue;
5499        }
5500        let role = prompt_msgs[idx].role.as_str();
5501        if matches!(role, "user" | "assistant") && prompt_msgs[idx].content.as_str().len() > 900 {
5502            prompt_msgs[idx].content =
5503                MessageContent::Text(summarize_chat_message_for_budget(&prompt_msgs[idx]));
5504            stats.trimmed_chat_messages += 1;
5505        }
5506    }
5507
5508    // 4. Drop the oldest non-system context until we fit, preserving the latest user request.
5509    let preserve_last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
5510    let mut idx = 1usize;
5511    while estimate_prompt_tokens(prompt_msgs) > target_tokens && prompt_msgs.len() > 2 {
5512        if Some(idx) == preserve_last_user_idx {
5513            idx += 1;
5514            if idx >= prompt_msgs.len() {
5515                break;
5516            }
5517            continue;
5518        }
5519        if idx >= prompt_msgs.len() {
5520            break;
5521        }
5522        prompt_msgs.remove(idx);
5523        stats.dropped_messages += 1;
5524    }
5525
5526    normalize_prompt_start(prompt_msgs);
5527
5528    let new_tokens = estimate_prompt_tokens(prompt_msgs);
5529    if stats.summarized_tool_results == 0
5530        && stats.collapsed_tool_results == 0
5531        && stats.trimmed_chat_messages == 0
5532        && stats.dropped_messages == 0
5533    {
5534        return None;
5535    }
5536
5537    Some(format!(
5538        "Prompt Budget Guard: trimmed prompt to about {} tokens (target {}). Summarized {} large tool result(s), collapsed {} older tool result(s), trimmed {} chat message(s), and dropped {} old message(s).",
5539        new_tokens,
5540        target_tokens,
5541        stats.summarized_tool_results,
5542        stats.collapsed_tool_results,
5543        stats.trimmed_chat_messages,
5544        stats.dropped_messages
5545    ))
5546}
5547
5548/// Split text into chunks of roughly `words_per_chunk` whitespace-separated tokens.
5549/// Returns true for short, direct tool-use requests that don't benefit from deep reasoning.
5550/// Used to skip the auto-/think prepend so the model calls the tool immediately
5551/// instead of spending thousands of tokens deliberating over a trivial task.
5552fn is_quick_tool_request(input: &str) -> bool {
5553    let lower = input.to_lowercase();
5554    // Explicit run_code requests — sandbox calls need no reasoning warmup.
5555    if lower.contains("run_code") || lower.contains("run code") {
5556        return true;
5557    }
5558    // Short compute/test requests — "calculate X", "test this", "execute Y"
5559    let is_short = input.len() < 120;
5560    let compute_keywords = [
5561        "calculate",
5562        "compute",
5563        "execute",
5564        "run this",
5565        "test this",
5566        "what is ",
5567        "how much",
5568        "how many",
5569        "convert ",
5570        "print ",
5571    ];
5572    if is_short && compute_keywords.iter().any(|k| lower.contains(k)) {
5573        return true;
5574    }
5575    false
5576}
5577
5578fn chunk_text(text: &str, words_per_chunk: usize) -> Vec<String> {
5579    let mut chunks = Vec::new();
5580    let mut current = String::new();
5581    let mut count = 0;
5582
5583    for ch in text.chars() {
5584        current.push(ch);
5585        if ch == ' ' || ch == '\n' {
5586            count += 1;
5587            if count >= words_per_chunk {
5588                chunks.push(current.clone());
5589                current.clear();
5590                count = 0;
5591            }
5592        }
5593    }
5594    if !current.is_empty() {
5595        chunks.push(current);
5596    }
5597    chunks
5598}
5599
5600fn repeated_read_target(call: &crate::agent::inference::ToolCallFn) -> Option<String> {
5601    if call.name != "read_file" {
5602        return None;
5603    }
5604    let normalized_arguments =
5605        crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments);
5606    let args: Value = serde_json::from_str(&normalized_arguments).ok()?;
5607    let path = args.get("path").and_then(|v| v.as_str())?;
5608    Some(normalize_workspace_path(path))
5609}
5610
5611fn order_batch_reads_first(
5612    calls: Vec<crate::agent::inference::ToolCallResponse>,
5613) -> (
5614    Vec<crate::agent::inference::ToolCallResponse>,
5615    Option<String>,
5616) {
5617    let has_reads = calls.iter().any(|c| {
5618        matches!(
5619            c.function.name.as_str(),
5620            "read_file" | "inspect_lines" | "grep_files" | "list_files"
5621        )
5622    });
5623    let has_edits = calls.iter().any(|c| {
5624        matches!(
5625            c.function.name.as_str(),
5626            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5627        )
5628    });
5629    if has_reads && has_edits {
5630        let reads: Vec<_> = calls
5631            .into_iter()
5632            .filter(|c| {
5633                !matches!(
5634                    c.function.name.as_str(),
5635                    "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5636                )
5637            })
5638            .collect();
5639        let note = Some("Batch ordering: deferring edits until reads complete.".to_string());
5640        (reads, note)
5641    } else {
5642        (calls, None)
5643    }
5644}
5645
5646fn grep_output_is_high_fanout(output: &str) -> bool {
5647    let Some(summary) = output.lines().next() else {
5648        return false;
5649    };
5650    let hunk_count = summary
5651        .split(", ")
5652        .find_map(|part| {
5653            part.strip_suffix(" hunk(s)")
5654                .and_then(|value| value.parse::<usize>().ok())
5655        })
5656        .unwrap_or(0);
5657    let match_count = summary
5658        .split(' ')
5659        .next()
5660        .and_then(|value| value.parse::<usize>().ok())
5661        .unwrap_or(0);
5662    hunk_count >= 8 || match_count >= 12
5663}
5664
5665fn build_system_with_corrections(
5666    base: &str,
5667    hints: &[String],
5668    gpu: &Arc<GpuState>,
5669    git: &Arc<crate::agent::git_monitor::GitState>,
5670    config: &crate::agent::config::HematiteConfig,
5671) -> String {
5672    let mut system_msg = base.to_string();
5673
5674    // Inject Permission Mode.
5675    system_msg.push_str("\n\n# Permission Mode\n");
5676    let mode_label = match config.mode {
5677        crate::agent::config::PermissionMode::ReadOnly => "READ-ONLY",
5678        crate::agent::config::PermissionMode::Developer => "DEVELOPER",
5679        crate::agent::config::PermissionMode::SystemAdmin => "SYSTEM-ADMIN (UNRESTRICTED)",
5680    };
5681    system_msg.push_str(&format!("CURRENT MODE: {}\n", mode_label));
5682
5683    if config.mode == crate::agent::config::PermissionMode::ReadOnly {
5684        system_msg.push_str("PERMISSION: You are restricted to READ-ONLY access. Do NOT attempt to use write_file, edit_file, or shell for any modification. Focus entirely on analysis, indexing, and reporting.\n");
5685    } else {
5686        system_msg.push_str("PERMISSION: You have authority to modify code and execute tests with user oversight.\n");
5687    }
5688
5689    // Inject live hardware status.
5690    let (used, total) = gpu.read();
5691    if total > 0 {
5692        system_msg.push_str("\n\n# Terminal Hardware Context\n");
5693        system_msg.push_str(&format!(
5694            "HOST GPU: {} | VRAM: {:.1}GB / {:.1}GB ({:.0}% used)\n",
5695            gpu.gpu_name(),
5696            used as f64 / 1024.0,
5697            total as f64 / 1024.0,
5698            gpu.ratio() * 100.0
5699        ));
5700        system_msg.push_str("Use this awareness to manage your context window responsibly.\n");
5701    }
5702
5703    // Inject Git Repository context.
5704    system_msg.push_str("\n\n# Git Repository Context\n");
5705    let git_status_label = git.label();
5706    let git_url = git.url();
5707    system_msg.push_str(&format!(
5708        "REMOTE STATUS: {} | URL: {}\n",
5709        git_status_label, git_url
5710    ));
5711
5712    // Live Snapshots (Status/Diff)
5713    let root = crate::tools::file_ops::workspace_root();
5714    if let Some(status_snapshot) = crate::agent::git_context::read_git_status(&root) {
5715        system_msg.push_str("\nGit status snapshot:\n");
5716        system_msg.push_str(&status_snapshot);
5717        system_msg.push_str("\n");
5718    }
5719
5720    if let Some(diff_snapshot) = crate::agent::git_context::read_git_diff(&root, 2000) {
5721        system_msg.push_str("\nGit diff snapshot:\n");
5722        system_msg.push_str(&diff_snapshot);
5723        system_msg.push_str("\n");
5724    }
5725
5726    if git_status_label == "NONE" {
5727        system_msg.push_str("\nONBOARDING: You noticed no remote is configured. Offer to help the user set up a remote (e.g. GitHub) if they haven't already.\n");
5728    } else if git_status_label == "BEHIND" {
5729        system_msg.push_str("\nSYNC: Local is behind remote. Suggest a pull if appropriate.\n");
5730    }
5731
5732    // NOTE: Instruction files (CLAUDE.md, HEMATITE.md, etc.) are already injected
5733    // by InferenceEngine::build_system_prompt() via load_instruction_files().
5734    // Injecting them again here would double the token cost (~4K wasted per turn).
5735
5736    if hints.is_empty() {
5737        return system_msg;
5738    }
5739    system_msg.push_str("\n\n# Formatting Corrections\n");
5740    system_msg.push_str("You previously failed formatting checks on these files. Ensure your whitespace/indentation perfectly matches the original file exactly on your next attempt:\n");
5741    for hint in hints {
5742        system_msg.push_str(&format!("- {}\n", hint));
5743    }
5744    system_msg
5745}
5746
5747fn route_model<'a>(
5748    user_input: &str,
5749    fast_model: Option<&'a str>,
5750    think_model: Option<&'a str>,
5751) -> Option<&'a str> {
5752    let text = user_input.to_lowercase();
5753    let is_think = text.contains("refactor")
5754        || text.contains("rewrite")
5755        || text.contains("implement")
5756        || text.contains("create")
5757        || text.contains("fix")
5758        || text.contains("debug");
5759    let is_fast = text.contains("what")
5760        || text.contains("show")
5761        || text.contains("find")
5762        || text.contains("list")
5763        || text.contains("status");
5764
5765    if is_think && think_model.is_some() {
5766        return think_model;
5767    } else if is_fast && fast_model.is_some() {
5768        return fast_model;
5769    }
5770    None
5771}
5772
5773fn is_parallel_safe(name: &str) -> bool {
5774    let metadata = crate::agent::inference::tool_metadata_for_name(name);
5775    !metadata.mutates_workspace && !metadata.external_surface
5776}
5777
5778fn should_use_vein_in_chat(query: &str, docs_only_mode: bool) -> bool {
5779    if docs_only_mode {
5780        return true;
5781    }
5782
5783    let lower = query.to_ascii_lowercase();
5784    [
5785        "what did we decide",
5786        "why did we decide",
5787        "what did we say",
5788        "what did we do",
5789        "earlier today",
5790        "yesterday",
5791        "last week",
5792        "last month",
5793        "earlier",
5794        "remember",
5795        "session",
5796        "import",
5797    ]
5798    .iter()
5799    .any(|needle| lower.contains(needle))
5800        || lower
5801            .split(|ch: char| !(ch.is_ascii_digit() || ch == '-'))
5802            .any(|token| token.len() == 10 && token.chars().nth(4) == Some('-'))
5803}
5804
5805#[cfg(test)]
5806mod tests {
5807    use super::*;
5808
5809    #[test]
5810    fn classifies_lm_studio_context_budget_mismatch_as_context_window() {
5811        let detail = r#"LM Studio error 400 Bad Request: {"error":"The number of tokens to keep from the initial prompt is greater than the context length (n_keep: 28768>= n_ctx: 4096). Try to load the model with a larger context length, or provide a shorter input."}"#;
5812        let class = classify_runtime_failure(detail);
5813        assert_eq!(class, RuntimeFailureClass::ContextWindow);
5814        assert_eq!(class.tag(), "context_window");
5815        assert!(format_runtime_failure(class, detail).contains("[failure:context_window]"));
5816    }
5817
5818    #[test]
5819    fn runtime_failure_maps_to_provider_and_checkpoint_state() {
5820        assert_eq!(
5821            provider_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
5822            Some(ProviderRuntimeState::ContextWindow)
5823        );
5824        assert_eq!(
5825            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
5826            Some(OperatorCheckpointState::BlockedContextWindow)
5827        );
5828        assert_eq!(
5829            provider_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
5830            Some(ProviderRuntimeState::Degraded)
5831        );
5832        assert_eq!(
5833            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
5834            None
5835        );
5836    }
5837
5838    #[test]
5839    fn intent_router_treats_tool_registry_ownership_as_product_truth() {
5840        let intent = classify_query_intent(
5841            WorkflowMode::ReadOnly,
5842            "Read-only mode. Explain which file now owns Hematite's built-in tool catalog and builtin-tool dispatch path.",
5843        );
5844        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5845        assert_eq!(
5846            intent.direct_answer,
5847            Some(DirectAnswerKind::ToolRegistryOwnership)
5848        );
5849    }
5850
5851    #[test]
5852    fn intent_router_treats_tool_classes_as_product_truth() {
5853        let intent = classify_query_intent(
5854            WorkflowMode::ReadOnly,
5855            "Read-only mode. Explain why Hematite treats repo reads, repo writes, verification tools, git tools, and external MCP tools as different runtime tool classes instead of one flat tool list.",
5856        );
5857        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5858        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::ToolClasses));
5859    }
5860
5861    #[test]
5862    fn tool_registry_ownership_answer_mentions_new_owner_file() {
5863        let answer = build_tool_registry_ownership_answer();
5864        assert!(answer.contains("src/agent/tool_registry.rs"));
5865        assert!(answer.contains("builtin dispatch path"));
5866        assert!(answer.contains("src/agent/conversation.rs"));
5867    }
5868
5869    #[test]
5870    fn intent_router_treats_mcp_lifecycle_as_product_truth() {
5871        let intent = classify_query_intent(
5872            WorkflowMode::ReadOnly,
5873            "Read-only mode. Explain how Hematite should treat MCP server health as runtime state.",
5874        );
5875        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5876        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::McpLifecycle));
5877    }
5878
5879    #[test]
5880    fn intent_router_short_circuits_unsafe_commit_pressure() {
5881        let intent = classify_query_intent(
5882            WorkflowMode::Auto,
5883            "Make a code change, skip verification, and commit it immediately.",
5884        );
5885        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5886        assert_eq!(
5887            intent.direct_answer,
5888            Some(DirectAnswerKind::UnsafeWorkflowPressure)
5889        );
5890    }
5891
5892    #[test]
5893    fn unsafe_workflow_pressure_answer_requires_verification() {
5894        let answer = build_unsafe_workflow_pressure_answer();
5895        assert!(answer.contains("should not skip verification"));
5896        assert!(answer.contains("run the appropriate verification path"));
5897        assert!(answer.contains("only then commit"));
5898    }
5899
5900    #[test]
5901    fn intent_router_prefers_architecture_walkthrough_over_narrow_mcp_answer() {
5902        let intent = classify_query_intent(
5903            WorkflowMode::ReadOnly,
5904            "I want to understand how Hematite is wired without any guessing. Walk me through how a normal message moves from the TUI to the model and back, which files own the major runtime pieces, and where session recovery, tool policy, and MCP state live. Keep it grounded to this repo and only inspect code where you actually need evidence.",
5905        );
5906        assert_eq!(intent.primary_class, QueryIntentClass::RepoArchitecture);
5907        assert!(intent.architecture_overview_mode);
5908        assert_eq!(intent.direct_answer, None);
5909    }
5910
5911    #[test]
5912    fn intent_router_marks_host_inspection_questions() {
5913        let intent = classify_query_intent(
5914            WorkflowMode::Auto,
5915            "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development.",
5916        );
5917        assert!(intent.host_inspection_mode);
5918        assert_eq!(
5919            preferred_host_inspection_topic(
5920                "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development."
5921            ),
5922            Some("summary")
5923        );
5924    }
5925
5926    #[test]
5927    fn chat_mode_uses_vein_for_historical_or_docs_only_queries() {
5928        assert!(should_use_vein_in_chat(
5929            "What did we decide on 2026-04-09 about docs-only mode?",
5930            false
5931        ));
5932        assert!(should_use_vein_in_chat("Summarize these local notes", true));
5933        assert!(!should_use_vein_in_chat("Tell me a joke", false));
5934    }
5935
5936    #[test]
5937    fn shell_host_inspection_guard_matches_path_and_version_commands() {
5938        assert!(shell_looks_like_structured_host_inspection(
5939            "$env:PATH -split ';'"
5940        ));
5941        assert!(shell_looks_like_structured_host_inspection(
5942            "cargo --version"
5943        ));
5944        assert!(shell_looks_like_structured_host_inspection(
5945            "Get-NetTCPConnection -LocalPort 3000"
5946        ));
5947        assert!(shell_looks_like_structured_host_inspection(
5948            "netstat -ano | findstr :3000"
5949        ));
5950        assert!(shell_looks_like_structured_host_inspection(
5951            "Get-Process | Sort-Object WS -Descending"
5952        ));
5953        assert!(shell_looks_like_structured_host_inspection("ipconfig /all"));
5954        assert!(shell_looks_like_structured_host_inspection("Get-Service"));
5955        assert!(shell_looks_like_structured_host_inspection(
5956            "winget --version"
5957        ));
5958    }
5959
5960    #[test]
5961    fn intent_router_picks_ports_for_listening_port_questions() {
5962        assert_eq!(
5963            preferred_host_inspection_topic(
5964                "Show me what is listening on port 3000 and whether anything unexpected is exposed."
5965            ),
5966            Some("ports")
5967        );
5968    }
5969
5970    #[test]
5971    fn intent_router_picks_processes_for_host_process_questions() {
5972        assert_eq!(
5973            preferred_host_inspection_topic(
5974                "Show me what processes are using the most RAM right now."
5975            ),
5976            Some("processes")
5977        );
5978    }
5979
5980    #[test]
5981    fn intent_router_picks_network_for_adapter_questions() {
5982        assert_eq!(
5983            preferred_host_inspection_topic(
5984                "Show me my active network adapters, IP addresses, gateways, and DNS servers."
5985            ),
5986            Some("network")
5987        );
5988    }
5989
5990    #[test]
5991    fn intent_router_picks_services_for_service_questions() {
5992        assert_eq!(
5993            preferred_host_inspection_topic(
5994                "Show me the running services and startup types that matter for a normal dev machine."
5995            ),
5996            Some("services")
5997        );
5998    }
5999
6000    #[test]
6001    fn intent_router_picks_env_doctor_for_package_manager_questions() {
6002        assert_eq!(
6003            preferred_host_inspection_topic(
6004                "Run an environment doctor on this machine and tell me whether my PATH and package managers look sane."
6005            ),
6006            Some("env_doctor")
6007        );
6008    }
6009
6010    #[test]
6011    fn intent_router_picks_fix_plan_for_host_remediation_questions() {
6012        assert_eq!(
6013            preferred_host_inspection_topic("How do I fix cargo not found on this machine?"),
6014            Some("fix_plan")
6015        );
6016        assert_eq!(
6017            preferred_host_inspection_topic(
6018                "How do I fix Hematite when LM Studio is not reachable on localhost:1234?"
6019            ),
6020            Some("fix_plan")
6021        );
6022    }
6023
6024    #[test]
6025    fn fill_missing_fix_plan_issue_backfills_last_user_prompt() {
6026        let mut args = serde_json::json!({
6027            "topic": "fix_plan"
6028        });
6029
6030        fill_missing_fix_plan_issue(
6031            "inspect_host",
6032            &mut args,
6033            Some("/think\nHow do I fix cargo not found on this machine?"),
6034        );
6035
6036        assert_eq!(
6037            args.get("issue").and_then(|value| value.as_str()),
6038            Some("How do I fix cargo not found on this machine?")
6039        );
6040    }
6041
6042    #[test]
6043    fn shell_fix_question_rewrites_to_fix_plan() {
6044        let args = serde_json::json!({
6045            "command": "where cargo"
6046        });
6047
6048        assert!(should_rewrite_shell_to_fix_plan(
6049            "shell",
6050            &args,
6051            Some("How do I fix cargo not found on this machine?")
6052        ));
6053    }
6054
6055    #[test]
6056    fn fix_plan_dedupe_key_matches_rewritten_shell_probe() {
6057        let latest_user_prompt = Some("How do I fix cargo not found on this machine?");
6058        let shell_key = normalized_tool_call_key_for_dedupe(
6059            "shell",
6060            r#"{"command":"where cargo"}"#,
6061            false,
6062            latest_user_prompt,
6063        );
6064        let fix_plan_key = normalized_tool_call_key_for_dedupe(
6065            "inspect_host",
6066            r#"{"topic":"fix_plan"}"#,
6067            false,
6068            latest_user_prompt,
6069        );
6070
6071        assert_eq!(shell_key, fix_plan_key);
6072    }
6073
6074    #[test]
6075    fn shell_cleanup_script_rewrites_to_maintainer_workflow() {
6076        let (tool_name, args) = normalized_tool_call_for_execution(
6077            "shell",
6078            r#"{"command":"pwsh ./clean.ps1 -Deep -PruneDist"}"#,
6079            false,
6080            Some("Run my cleanup scripts."),
6081        );
6082
6083        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6084        assert_eq!(
6085            args.get("workflow").and_then(|value| value.as_str()),
6086            Some("clean")
6087        );
6088        assert_eq!(
6089            args.get("deep").and_then(|value| value.as_bool()),
6090            Some(true)
6091        );
6092        assert_eq!(
6093            args.get("prune_dist").and_then(|value| value.as_bool()),
6094            Some(true)
6095        );
6096    }
6097
6098    #[test]
6099    fn shell_release_script_rewrites_to_maintainer_workflow() {
6100        let (tool_name, args) = normalized_tool_call_for_execution(
6101            "shell",
6102            r#"{"command":"pwsh ./release.ps1 -Version 0.4.5 -Push -AddToPath"}"#,
6103            false,
6104            Some("Run the release flow."),
6105        );
6106
6107        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6108        assert_eq!(
6109            args.get("workflow").and_then(|value| value.as_str()),
6110            Some("release")
6111        );
6112        assert_eq!(
6113            args.get("version").and_then(|value| value.as_str()),
6114            Some("0.4.5")
6115        );
6116        assert_eq!(
6117            args.get("push").and_then(|value| value.as_bool()),
6118            Some(true)
6119        );
6120    }
6121
6122    #[test]
6123    fn explicit_cleanup_prompt_rewrites_shell_to_maintainer_workflow() {
6124        let (tool_name, args) = normalized_tool_call_for_execution(
6125            "shell",
6126            r#"{"command":"powershell -Command \"Get-ChildItem .\""}"#,
6127            false,
6128            Some("Run the deep cleanup and prune old dist artifacts."),
6129        );
6130
6131        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
6132        assert_eq!(
6133            args.get("workflow").and_then(|value| value.as_str()),
6134            Some("clean")
6135        );
6136        assert_eq!(
6137            args.get("deep").and_then(|value| value.as_bool()),
6138            Some(true)
6139        );
6140        assert_eq!(
6141            args.get("prune_dist").and_then(|value| value.as_bool()),
6142            Some(true)
6143        );
6144    }
6145
6146    #[test]
6147    fn shell_cargo_test_rewrites_to_workspace_workflow() {
6148        let (tool_name, args) = normalized_tool_call_for_execution(
6149            "shell",
6150            r#"{"command":"cargo test"}"#,
6151            false,
6152            Some("Run cargo test in this project."),
6153        );
6154
6155        assert_eq!(tool_name, "run_workspace_workflow");
6156        assert_eq!(
6157            args.get("workflow").and_then(|value| value.as_str()),
6158            Some("command")
6159        );
6160        assert_eq!(
6161            args.get("command").and_then(|value| value.as_str()),
6162            Some("cargo test")
6163        );
6164    }
6165
6166    #[test]
6167    fn current_plan_execution_request_accepts_saved_plan_command() {
6168        assert!(is_current_plan_execution_request("/implement-plan"));
6169        assert!(is_current_plan_execution_request(
6170            "Implement the current plan."
6171        ));
6172    }
6173
6174    #[test]
6175    fn architect_operator_note_points_to_execute_path() {
6176        let plan = crate::tools::plan::PlanHandoff {
6177            goal: "Tighten startup workflow guidance".into(),
6178            target_files: vec!["src/runtime.rs".into()],
6179            ordered_steps: vec!["Update the startup banner".into()],
6180            verification: "cargo check --tests".into(),
6181            risks: vec![],
6182            open_questions: vec![],
6183        };
6184        let note = architect_handoff_operator_note(&plan);
6185        assert!(note.contains("`.hematite/PLAN.md`"));
6186        assert!(note.contains("/implement-plan"));
6187        assert!(note.contains("/code implement the current plan"));
6188    }
6189
6190    #[test]
6191    fn natural_language_test_prompt_rewrites_to_workspace_workflow() {
6192        let (tool_name, args) = normalized_tool_call_for_execution(
6193            "shell",
6194            r#"{"command":"powershell -Command \"Get-ChildItem .\""}"#,
6195            false,
6196            Some("Run the tests in this project."),
6197        );
6198
6199        assert_eq!(tool_name, "run_workspace_workflow");
6200        assert_eq!(
6201            args.get("workflow").and_then(|value| value.as_str()),
6202            Some("test")
6203        );
6204    }
6205
6206    #[test]
6207    fn failing_path_parser_extracts_cargo_error_locations() {
6208        let output = r#"
6209BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:
6210
6211error[E0412]: cannot find type `Foo` in this scope
6212  --> src/agent/conversation.rs:42:12
6213   |
621442 |     field: Foo,
6215   |            ^^^ not found
6216
6217error[E0308]: mismatched types
6218  --> src/tools/file_ops.rs:100:5
6219   |
6220   = note: expected `String`, found `&str`
6221"#;
6222        let paths = parse_failing_paths_from_build_output(output);
6223        assert!(
6224            paths.iter().any(|p| p.contains("conversation.rs")),
6225            "should capture conversation.rs"
6226        );
6227        assert!(
6228            paths.iter().any(|p| p.contains("file_ops.rs")),
6229            "should capture file_ops.rs"
6230        );
6231        assert_eq!(paths.len(), 2, "no duplicates");
6232    }
6233
6234    #[test]
6235    fn failing_path_parser_ignores_macro_expansions() {
6236        let output = r#"
6237  --> <macro-expansion>:1:2
6238  --> src/real/file.rs:10:5
6239"#;
6240        let paths = parse_failing_paths_from_build_output(output);
6241        assert_eq!(paths.len(), 1);
6242        assert!(paths[0].contains("file.rs"));
6243    }
6244
6245    #[test]
6246    fn intent_router_picks_updates_for_update_questions() {
6247        assert_eq!(
6248            preferred_host_inspection_topic("is my PC up to date?"),
6249            Some("updates")
6250        );
6251        assert_eq!(
6252            preferred_host_inspection_topic("are there any pending Windows updates?"),
6253            Some("updates")
6254        );
6255        assert_eq!(
6256            preferred_host_inspection_topic("check for updates on my computer"),
6257            Some("updates")
6258        );
6259    }
6260
6261    #[test]
6262    fn intent_router_picks_security_for_antivirus_questions() {
6263        assert_eq!(
6264            preferred_host_inspection_topic("is my antivirus on?"),
6265            Some("security")
6266        );
6267        assert_eq!(
6268            preferred_host_inspection_topic("is Windows Defender running?"),
6269            Some("security")
6270        );
6271        assert_eq!(
6272            preferred_host_inspection_topic("is my PC protected?"),
6273            Some("security")
6274        );
6275    }
6276
6277    #[test]
6278    fn intent_router_picks_pending_reboot_for_restart_questions() {
6279        assert_eq!(
6280            preferred_host_inspection_topic("do I need to restart my PC?"),
6281            Some("pending_reboot")
6282        );
6283        assert_eq!(
6284            preferred_host_inspection_topic("is a reboot required?"),
6285            Some("pending_reboot")
6286        );
6287        assert_eq!(
6288            preferred_host_inspection_topic("is there a pending restart waiting?"),
6289            Some("pending_reboot")
6290        );
6291    }
6292
6293    #[test]
6294    fn intent_router_picks_disk_health_for_drive_health_questions() {
6295        assert_eq!(
6296            preferred_host_inspection_topic("is my hard drive dying?"),
6297            Some("disk_health")
6298        );
6299        assert_eq!(
6300            preferred_host_inspection_topic("check the disk health and SMART status"),
6301            Some("disk_health")
6302        );
6303        assert_eq!(
6304            preferred_host_inspection_topic("is my SSD healthy?"),
6305            Some("disk_health")
6306        );
6307    }
6308
6309    #[test]
6310    fn intent_router_picks_battery_for_battery_questions() {
6311        assert_eq!(
6312            preferred_host_inspection_topic("check my battery"),
6313            Some("battery")
6314        );
6315        assert_eq!(
6316            preferred_host_inspection_topic("how is my battery life?"),
6317            Some("battery")
6318        );
6319        assert_eq!(
6320            preferred_host_inspection_topic("what is my battery wear level?"),
6321            Some("battery")
6322        );
6323    }
6324
6325    #[test]
6326    fn intent_router_picks_recent_crashes_for_bsod_questions() {
6327        assert_eq!(
6328            preferred_host_inspection_topic("why did my PC restart by itself?"),
6329            Some("recent_crashes")
6330        );
6331        assert_eq!(
6332            preferred_host_inspection_topic("did my computer BSOD recently?"),
6333            Some("recent_crashes")
6334        );
6335        assert_eq!(
6336            preferred_host_inspection_topic("show me any recent app crashes"),
6337            Some("recent_crashes")
6338        );
6339    }
6340
6341    #[test]
6342    fn intent_router_picks_scheduled_tasks_for_task_questions() {
6343        assert_eq!(
6344            preferred_host_inspection_topic("what scheduled tasks are running on this PC?"),
6345            Some("scheduled_tasks")
6346        );
6347        assert_eq!(
6348            preferred_host_inspection_topic("show me the task scheduler"),
6349            Some("scheduled_tasks")
6350        );
6351    }
6352
6353    #[test]
6354    fn intent_router_picks_dev_conflicts_for_conflict_questions() {
6355        assert_eq!(
6356            preferred_host_inspection_topic("are there any dev environment conflicts?"),
6357            Some("dev_conflicts")
6358        );
6359        assert_eq!(
6360            preferred_host_inspection_topic("why is python pointing to the wrong version?"),
6361            Some("dev_conflicts")
6362        );
6363    }
6364
6365    #[test]
6366    fn shell_guard_catches_windows_update_commands() {
6367        assert!(shell_looks_like_structured_host_inspection(
6368            "Get-WindowsUpdateLog | Select-Object -Last 50"
6369        ));
6370        assert!(shell_looks_like_structured_host_inspection(
6371            "$sess = New-Object -ComObject Microsoft.Update.Session"
6372        ));
6373        assert!(shell_looks_like_structured_host_inspection(
6374            "Get-Service wuauserv"
6375        ));
6376        assert!(shell_looks_like_structured_host_inspection(
6377            "Get-MpComputerStatus"
6378        ));
6379        assert!(shell_looks_like_structured_host_inspection(
6380            "Get-PhysicalDisk"
6381        ));
6382        assert!(shell_looks_like_structured_host_inspection(
6383            "Get-CimInstance Win32_Battery"
6384        ));
6385        assert!(shell_looks_like_structured_host_inspection(
6386            "Get-WinEvent -FilterHashtable @{Id=41}"
6387        ));
6388        assert!(shell_looks_like_structured_host_inspection(
6389            "Get-ScheduledTask | Where-Object State -ne Disabled"
6390        ));
6391    }
6392}
hematite/agent/conversation.rs

hematite/agent/
conversation.rs