Skip to main content

hematite/agent/
conversation.rs

1use crate::agent::architecture_summary::{
2    build_architecture_overview_answer, prune_architecture_trace_batch,
3    prune_authoritative_tool_batch, prune_read_only_context_bloat_batch,
4    summarize_runtime_trace_output,
5};
6use crate::agent::direct_answers::{
7    build_about_answer, build_architect_session_reset_plan, build_authorization_policy_answer,
8    build_gemma_native_answer, build_gemma_native_settings_answer, build_identity_answer,
9    build_language_capability_answer, build_mcp_lifecycle_answer, build_product_surface_answer,
10    build_reasoning_split_answer, build_recovery_recipes_answer, build_session_memory_answer,
11    build_session_reset_semantics_answer, build_tool_classes_answer,
12    build_tool_registry_ownership_answer, build_unsafe_workflow_pressure_answer,
13    build_verify_profiles_answer, build_workflow_modes_answer,
14};
15use crate::agent::inference::{
16    ChatMessage, InferenceEngine, InferenceEvent, MessageContent, OperatorCheckpointState,
17    ProviderRuntimeState, ToolCallFn, ToolDefinition, ToolFunction,
18};
19use crate::agent::policy::{
20    action_target_path, docs_edit_without_explicit_request, is_destructive_tool,
21    is_mcp_mutating_tool, is_mcp_workspace_read_tool, normalize_workspace_path,
22};
23use crate::agent::recovery_recipes::{
24    attempt_recovery, plan_recovery, preview_recovery_decision, RecoveryContext, RecoveryDecision,
25    RecoveryPlan, RecoveryScenario, RecoveryStep,
26};
27use crate::agent::routing::{
28    classify_query_intent, is_capability_probe_tool, looks_like_mutation_request,
29    preferred_host_inspection_topic, preferred_maintainer_workflow, preferred_workspace_workflow,
30    DirectAnswerKind, QueryIntentClass,
31};
32use crate::agent::tool_registry::dispatch_builtin_tool;
33// SystemPromptBuilder is no longer used — InferenceEngine::build_system_prompt() is canonical.
34use crate::agent::compaction::{self, CompactionConfig};
35use crate::ui::gpu_monitor::GpuState;
36
37use serde_json::Value;
38use std::sync::Arc;
39use tokio::sync::{mpsc, Mutex};
40// -- Session persistence -------------------------------------------------------
41
42#[derive(Clone, Debug, Default)]
43pub struct UserTurn {
44    pub text: String,
45    pub attached_document: Option<AttachedDocument>,
46    pub attached_image: Option<AttachedImage>,
47}
48
49#[derive(Clone, Debug)]
50pub struct AttachedDocument {
51    pub name: String,
52    pub content: String,
53}
54
55#[derive(Clone, Debug)]
56pub struct AttachedImage {
57    pub name: String,
58    pub path: String,
59}
60
61impl UserTurn {
62    pub fn text(text: impl Into<String>) -> Self {
63        Self {
64            text: text.into(),
65            attached_document: None,
66            attached_image: None,
67        }
68    }
69}
70
71#[derive(serde::Serialize, serde::Deserialize)]
72struct SavedSession {
73    running_summary: Option<String>,
74    #[serde(default)]
75    session_memory: crate::agent::compaction::SessionMemory,
76}
77
78#[derive(Default)]
79struct ActionGroundingState {
80    turn_index: u64,
81    observed_paths: std::collections::HashMap<String, u64>,
82    inspected_paths: std::collections::HashMap<String, u64>,
83    last_verify_build_turn: Option<u64>,
84    last_verify_build_ok: bool,
85    last_failed_build_paths: Vec<String>,
86    code_changed_since_verify: bool,
87}
88
89struct PlanExecutionGuard {
90    flag: Arc<std::sync::atomic::AtomicBool>,
91}
92
93impl Drop for PlanExecutionGuard {
94    fn drop(&mut self) {
95        self.flag.store(false, std::sync::atomic::Ordering::SeqCst);
96    }
97}
98
99#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
100pub(crate) enum WorkflowMode {
101    #[default]
102    Auto,
103    Ask,
104    Code,
105    Architect,
106    ReadOnly,
107    /// Clean conversational mode — lighter prompt, no coding agent scaffolding,
108    /// tools available but not pushed. Vein RAG still runs for context.
109    Chat,
110}
111
112impl WorkflowMode {
113    fn label(self) -> &'static str {
114        match self {
115            WorkflowMode::Auto => "AUTO",
116            WorkflowMode::Ask => "ASK",
117            WorkflowMode::Code => "CODE",
118            WorkflowMode::Architect => "ARCHITECT",
119            WorkflowMode::ReadOnly => "READ-ONLY",
120            WorkflowMode::Chat => "CHAT",
121        }
122    }
123
124    fn is_read_only(self) -> bool {
125        matches!(
126            self,
127            WorkflowMode::Ask | WorkflowMode::Architect | WorkflowMode::ReadOnly
128        )
129    }
130
131    pub(crate) fn is_chat(self) -> bool {
132        matches!(self, WorkflowMode::Chat)
133    }
134}
135
136fn session_path() -> std::path::PathBuf {
137    crate::tools::file_ops::workspace_root()
138        .join(".hematite")
139        .join("session.json")
140}
141
142fn load_session_data() -> (Option<String>, crate::agent::compaction::SessionMemory) {
143    let path = session_path();
144    if !path.exists() {
145        return (None, crate::agent::compaction::SessionMemory::default());
146    }
147    let Ok(data) = std::fs::read_to_string(&path) else {
148        return (None, crate::agent::compaction::SessionMemory::default());
149    };
150    let Ok(saved) = serde_json::from_str::<SavedSession>(&data) else {
151        return (None, crate::agent::compaction::SessionMemory::default());
152    };
153    (saved.running_summary, saved.session_memory)
154}
155
156fn reset_task_files() {
157    let root = crate::tools::file_ops::workspace_root();
158    let _ = std::fs::remove_file(root.join(".hematite").join("TASK.md"));
159    let _ = std::fs::remove_file(root.join(".hematite").join("PLAN.md"));
160    let _ = std::fs::remove_file(root.join(".hematite").join("WALKTHROUGH.md"));
161    let _ = std::fs::remove_file(root.join(".github").join("WALKTHROUGH.md"));
162    let _ = std::fs::write(root.join(".hematite").join("TASK.md"), "");
163    let _ = std::fs::write(root.join(".hematite").join("PLAN.md"), "");
164}
165
166fn purge_persistent_memory() {
167    let root = crate::tools::file_ops::workspace_root();
168    let mem_dir = root.join(".hematite").join("memories");
169    if mem_dir.exists() {
170        let _ = std::fs::remove_dir_all(&mem_dir);
171        let _ = std::fs::create_dir_all(&mem_dir);
172    }
173
174    let log_dir = root.join(".hematite_logs");
175    if log_dir.exists() {
176        if let Ok(entries) = std::fs::read_dir(&log_dir) {
177            for entry in entries.flatten() {
178                let _ = std::fs::write(entry.path(), "");
179            }
180        }
181    }
182}
183
184fn apply_turn_attachments(user_turn: &UserTurn, prompt: &str) -> String {
185    let mut out = prompt.trim().to_string();
186    if let Some(doc) = user_turn.attached_document.as_ref() {
187        out = format!(
188            "[Attached document: {}]\n\n{}\n\n---\n\n{}",
189            doc.name, doc.content, out
190        );
191    }
192    if let Some(image) = user_turn.attached_image.as_ref() {
193        out = if out.is_empty() {
194            format!("[Attached image: {}]", image.name)
195        } else {
196            format!("[Attached image: {}]\n\n{}", image.name, out)
197        };
198    }
199    out
200}
201
202fn transcript_user_turn_text(user_turn: &UserTurn, prompt: &str) -> String {
203    let mut prefixes = Vec::new();
204    if let Some(doc) = user_turn.attached_document.as_ref() {
205        prefixes.push(format!("[Attached document: {}]", doc.name));
206    }
207    if let Some(image) = user_turn.attached_image.as_ref() {
208        prefixes.push(format!("[Attached image: {}]", image.name));
209    }
210    if prefixes.is_empty() {
211        prompt.to_string()
212    } else if prompt.trim().is_empty() {
213        prefixes.join("\n")
214    } else {
215        format!("{}\n{}", prefixes.join("\n"), prompt)
216    }
217}
218
219#[derive(Debug, Clone, Copy, PartialEq, Eq)]
220enum RuntimeFailureClass {
221    ContextWindow,
222    ProviderDegraded,
223    ToolArgMalformed,
224    ToolPolicyBlocked,
225    ToolLoop,
226    VerificationFailed,
227    EmptyModelResponse,
228    Unknown,
229}
230
231impl RuntimeFailureClass {
232    fn tag(self) -> &'static str {
233        match self {
234            RuntimeFailureClass::ContextWindow => "context_window",
235            RuntimeFailureClass::ProviderDegraded => "provider_degraded",
236            RuntimeFailureClass::ToolArgMalformed => "tool_arg_malformed",
237            RuntimeFailureClass::ToolPolicyBlocked => "tool_policy_blocked",
238            RuntimeFailureClass::ToolLoop => "tool_loop",
239            RuntimeFailureClass::VerificationFailed => "verification_failed",
240            RuntimeFailureClass::EmptyModelResponse => "empty_model_response",
241            RuntimeFailureClass::Unknown => "unknown",
242        }
243    }
244
245    fn operator_guidance(self) -> &'static str {
246        match self {
247            RuntimeFailureClass::ContextWindow => {
248                "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
249            }
250            RuntimeFailureClass::ProviderDegraded => {
251                "Retry once automatically, then narrow the turn or restart LM Studio if it persists."
252            }
253            RuntimeFailureClass::ToolArgMalformed => {
254                "Retry with repaired or narrower tool arguments instead of repeating the same malformed call."
255            }
256            RuntimeFailureClass::ToolPolicyBlocked => {
257                "Stay inside the allowed workflow or switch modes before retrying."
258            }
259            RuntimeFailureClass::ToolLoop => {
260                "Stop repeating the same failing tool pattern and switch to a narrower recovery step."
261            }
262            RuntimeFailureClass::VerificationFailed => {
263                "Fix the build or test failure before treating the task as complete."
264            }
265            RuntimeFailureClass::EmptyModelResponse => {
266                "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
267            }
268            RuntimeFailureClass::Unknown => {
269                "Inspect the latest grounded tool results or provider status before retrying."
270            }
271        }
272    }
273}
274
275fn classify_runtime_failure(detail: &str) -> RuntimeFailureClass {
276    let lower = detail.to_ascii_lowercase();
277    if lower.contains("context_window_blocked")
278        || lower.contains("context ceiling reached")
279        || lower.contains("exceeds the")
280        || ((lower.contains("n_keep") && lower.contains("n_ctx"))
281            || lower.contains("context length")
282            || lower.contains("keep from the initial prompt")
283            || lower.contains("prompt is greater than the context length"))
284    {
285        RuntimeFailureClass::ContextWindow
286    } else if lower.contains("empty response from model")
287        || lower.contains("model returned an empty response")
288    {
289        RuntimeFailureClass::EmptyModelResponse
290    } else if lower.contains("lm studio unreachable")
291        || lower.contains("lm studio error")
292        || lower.contains("request failed")
293        || lower.contains("response parse error")
294        || lower.contains("provider degraded")
295    {
296        RuntimeFailureClass::ProviderDegraded
297    } else if lower.contains("missing required argument")
298        || lower.contains("json repair failed")
299        || lower.contains("invalid pattern")
300        || lower.contains("invalid line range")
301    {
302        RuntimeFailureClass::ToolArgMalformed
303    } else if lower.contains("action blocked:")
304        || lower.contains("access denied")
305        || lower.contains("declined by user")
306    {
307        RuntimeFailureClass::ToolPolicyBlocked
308    } else if lower.contains("too many consecutive tool errors")
309        || lower.contains("repeated tool failures")
310        || lower.contains("stuck in a loop")
311    {
312        RuntimeFailureClass::ToolLoop
313    } else if lower.contains("build failed")
314        || lower.contains("verification failed")
315        || lower.contains("verify_build")
316    {
317        RuntimeFailureClass::VerificationFailed
318    } else {
319        RuntimeFailureClass::Unknown
320    }
321}
322
323fn format_runtime_failure(class: RuntimeFailureClass, detail: &str) -> String {
324    format!(
325        "[failure:{}] {} Detail: {}",
326        class.tag(),
327        class.operator_guidance(),
328        detail.trim()
329    )
330}
331
332fn provider_state_for_runtime_failure(class: RuntimeFailureClass) -> Option<ProviderRuntimeState> {
333    match class {
334        RuntimeFailureClass::ContextWindow => Some(ProviderRuntimeState::ContextWindow),
335        RuntimeFailureClass::ProviderDegraded => Some(ProviderRuntimeState::Degraded),
336        RuntimeFailureClass::EmptyModelResponse => Some(ProviderRuntimeState::EmptyResponse),
337        _ => None,
338    }
339}
340
341fn checkpoint_state_for_runtime_failure(
342    class: RuntimeFailureClass,
343) -> Option<OperatorCheckpointState> {
344    match class {
345        RuntimeFailureClass::ContextWindow => Some(OperatorCheckpointState::BlockedContextWindow),
346        RuntimeFailureClass::ToolPolicyBlocked => Some(OperatorCheckpointState::BlockedPolicy),
347        RuntimeFailureClass::ToolLoop => Some(OperatorCheckpointState::BlockedToolLoop),
348        RuntimeFailureClass::VerificationFailed => {
349            Some(OperatorCheckpointState::BlockedVerification)
350        }
351        _ => None,
352    }
353}
354
355fn compact_runtime_recovery_summary(class: RuntimeFailureClass) -> &'static str {
356    match class {
357        RuntimeFailureClass::ProviderDegraded => {
358            "LM Studio degraded during the turn; retrying once before surfacing a failure."
359        }
360        RuntimeFailureClass::EmptyModelResponse => {
361            "The model returned an empty reply; retrying once before surfacing a failure."
362        }
363        _ => "Runtime recovery in progress.",
364    }
365}
366
367fn checkpoint_summary_for_runtime_failure(class: RuntimeFailureClass) -> &'static str {
368    match class {
369        RuntimeFailureClass::ContextWindow => "Provider context ceiling confirmed.",
370        RuntimeFailureClass::ToolPolicyBlocked => "Policy blocked the current action.",
371        RuntimeFailureClass::ToolLoop => "Repeated failing tool pattern stopped.",
372        RuntimeFailureClass::VerificationFailed => "Verification failed; fix before continuing.",
373        _ => "Operator checkpoint updated.",
374    }
375}
376
377fn compact_runtime_failure_summary(class: RuntimeFailureClass) -> &'static str {
378    match class {
379        RuntimeFailureClass::ContextWindow => "LM context ceiling hit.",
380        RuntimeFailureClass::ProviderDegraded => {
381            "LM Studio degraded and did not recover cleanly; operator action is now required."
382        }
383        RuntimeFailureClass::EmptyModelResponse => {
384            "LM Studio returned an empty reply after recovery; operator action is now required."
385        }
386        RuntimeFailureClass::ToolLoop => {
387            "Repeated failing tool pattern detected; Hematite stopped the loop."
388        }
389        _ => "Runtime failure surfaced to the operator.",
390    }
391}
392
393fn should_retry_runtime_failure(class: RuntimeFailureClass) -> bool {
394    matches!(
395        class,
396        RuntimeFailureClass::ProviderDegraded | RuntimeFailureClass::EmptyModelResponse
397    )
398}
399
400fn recovery_scenario_for_runtime_failure(class: RuntimeFailureClass) -> Option<RecoveryScenario> {
401    match class {
402        RuntimeFailureClass::ContextWindow => Some(RecoveryScenario::ContextWindow),
403        RuntimeFailureClass::ProviderDegraded => Some(RecoveryScenario::ProviderDegraded),
404        RuntimeFailureClass::EmptyModelResponse => Some(RecoveryScenario::EmptyModelResponse),
405        RuntimeFailureClass::ToolPolicyBlocked => Some(RecoveryScenario::McpWorkspaceReadBlocked),
406        RuntimeFailureClass::ToolLoop => Some(RecoveryScenario::ToolLoop),
407        RuntimeFailureClass::VerificationFailed => Some(RecoveryScenario::VerificationFailed),
408        RuntimeFailureClass::ToolArgMalformed | RuntimeFailureClass::Unknown => None,
409    }
410}
411
412fn compact_recovery_plan_summary(plan: &RecoveryPlan) -> String {
413    format!(
414        "{} [{}]",
415        plan.recipe.scenario.label(),
416        plan.recipe.steps_summary()
417    )
418}
419
420fn compact_recovery_decision_summary(decision: &RecoveryDecision) -> String {
421    match decision {
422        RecoveryDecision::Attempt(plan) => compact_recovery_plan_summary(plan),
423        RecoveryDecision::Escalate {
424            recipe,
425            attempts_made,
426            ..
427        } => format!(
428            "{} escalated after {} / {} [{}]",
429            recipe.scenario.label(),
430            attempts_made,
431            recipe.max_attempts.max(1),
432            recipe.steps_summary()
433        ),
434    }
435}
436
437/// Parse file paths from cargo/compiler error output.
438/// Handles lines like `  --> src/foo/bar.rs:34:12` and `error: could not compile`.
439fn parse_failing_paths_from_build_output(output: &str) -> Vec<String> {
440    let root = crate::tools::file_ops::workspace_root();
441    let mut paths: Vec<String> = output
442        .lines()
443        .filter_map(|line| {
444            let trimmed = line.trim_start();
445            // Cargo error location: "--> path/to/file.rs:line:col"
446            let after_arrow = trimmed.strip_prefix("--> ")?;
447            let file_part = after_arrow.split(':').next()?;
448            if file_part.is_empty() || file_part.starts_with('<') {
449                return None;
450            }
451            let p = std::path::Path::new(file_part);
452            let resolved = if p.is_absolute() {
453                p.to_path_buf()
454            } else {
455                root.join(p)
456            };
457            Some(resolved.to_string_lossy().replace('\\', "/").to_lowercase())
458        })
459        .collect();
460    paths.sort();
461    paths.dedup();
462    paths
463}
464
465fn build_mode_redirect_answer(mode: WorkflowMode) -> String {
466    match mode {
467        WorkflowMode::Ask => "Workflow mode ASK is read-only. I can inspect the code, explain what should change, or review the target area, but I will not modify files here. Switch to `/code` to implement the change, or `/auto` to let Hematite choose.".to_string(),
468        WorkflowMode::Architect => "Workflow mode ARCHITECT is plan-first. I can inspect the code and design the implementation approach, but I will not mutate files until you explicitly switch to `/code` or ask me to implement.".to_string(),
469        WorkflowMode::ReadOnly => "Workflow mode READ-ONLY is a hard no-mutation mode. I can analyze, inspect, and explain, but I will not edit files, run mutating shell commands, or commit changes. Switch to `/code` or `/auto` if you want implementation.".to_string(),
470        _ => "Switch to `/code` or `/auto` to allow implementation.".to_string(),
471    }
472}
473
474fn architect_handoff_contract() -> &'static str {
475    "ARCHITECT OUTPUT CONTRACT:\n\
476Use a compact implementation handoff, not a process narrative.\n\
477Do not say \"the first step\" or describe what you are about to do.\n\
478After one or two read-only inspection tools at most, stop and answer.\n\
479For runtime wiring, reset behavior, or control-flow questions, prefer `trace_runtime_flow`.\n\
480Use these exact ASCII headings and keep each section short:\n\
481# Goal\n\
482# Target Files\n\
483# Ordered Steps\n\
484# Verification\n\
485# Risks\n\
486# Open Questions\n\
487Keep the whole handoff concise and implementation-oriented."
488}
489
490fn implement_current_plan_prompt() -> &'static str {
491    "Implement the current plan."
492}
493
494fn architect_handoff_operator_note(plan: &crate::tools::plan::PlanHandoff) -> String {
495    format!(
496        "Implementation handoff saved to `.hematite/PLAN.md`.\nNext step: run `/implement-plan` to execute it in `/code`, or use `/code {}` directly.\nPlan: {}",
497        implement_current_plan_prompt().to_ascii_lowercase(),
498        plan.summary_line()
499    )
500}
501
502fn is_current_plan_execution_request(user_input: &str) -> bool {
503    let lower = user_input.trim().to_ascii_lowercase();
504    lower == "/implement-plan"
505        || lower == implement_current_plan_prompt().to_ascii_lowercase()
506        || lower
507            == implement_current_plan_prompt()
508                .trim_end_matches('.')
509                .to_ascii_lowercase()
510        || lower.contains("implement the current plan")
511}
512
513fn is_plan_scoped_tool(name: &str) -> bool {
514    crate::agent::inference::tool_metadata_for_name(name).plan_scope
515}
516
517fn is_current_plan_irrelevant_tool(name: &str) -> bool {
518    !crate::agent::inference::tool_metadata_for_name(name).plan_scope
519}
520
521fn is_non_mutating_plan_step_tool(name: &str) -> bool {
522    let metadata = crate::agent::inference::tool_metadata_for_name(name);
523    metadata.plan_scope && !metadata.mutates_workspace
524}
525
526fn parse_inline_workflow_prompt(user_input: &str) -> Option<(WorkflowMode, &str)> {
527    let trimmed = user_input.trim();
528    for (prefix, mode) in [
529        ("/ask", WorkflowMode::Ask),
530        ("/code", WorkflowMode::Code),
531        ("/architect", WorkflowMode::Architect),
532        ("/read-only", WorkflowMode::ReadOnly),
533        ("/auto", WorkflowMode::Auto),
534    ] {
535        if let Some(rest) = trimmed.strip_prefix(prefix) {
536            let rest = rest.trim();
537            if !rest.is_empty() {
538                return Some((mode, rest));
539            }
540        }
541    }
542    None
543}
544
545// Tool catalogue
546
547/// Returns the full set of tools exposed to the model.
548pub fn get_tools() -> Vec<ToolDefinition> {
549    crate::agent::tool_registry::get_tools()
550}
551
552pub struct ConversationManager {
553    /// Full conversation history in OpenAI format.
554    pub history: Vec<ChatMessage>,
555    pub engine: Arc<InferenceEngine>,
556    pub tools: Vec<ToolDefinition>,
557    pub mcp_manager: Arc<Mutex<crate::agent::mcp_manager::McpManager>>,
558    pub professional: bool,
559    pub brief: bool,
560    pub snark: u8,
561    pub chaos: u8,
562    /// Model to use for simple read-only tasks (optional, user-supplied via --fast-model).
563    pub fast_model: Option<String>,
564    /// Model to use for complex write/build tasks (optional, user-supplied via --think-model).
565    pub think_model: Option<String>,
566    /// Files where whitespace auto-correction fired this session.
567    pub correction_hints: Vec<String>,
568    /// Running background summary of pruned older messages.
569    pub running_summary: Option<String>,
570    /// Live hardware telemetry handle.
571    pub gpu_state: Arc<GpuState>,
572    /// Local RAG memory — FTS5-indexed project source.
573    pub vein: crate::memory::vein::Vein,
574    /// Append-only session transcript logger.
575    pub transcript: crate::agent::transcript::TranscriptLogger,
576    /// Thread-safe cancellation signal for the current agent turn.
577    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
578    /// Shared Git remote state (for persistent connectivity checks).
579    pub git_state: Arc<crate::agent::git_monitor::GitState>,
580    /// Reasoning think-mode override. None = let model decide. Some(true) = force /think.
581    /// Some(false) = force /no_think (fast mode, 3-5x quicker for simple tasks).
582    pub think_mode: Option<bool>,
583    workflow_mode: WorkflowMode,
584    /// Layer 6: Dynamic Task Context (extracted during compaction)
585    pub session_memory: crate::agent::compaction::SessionMemory,
586    pub swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
587    pub voice_manager: Arc<crate::ui::voice::VoiceManager>,
588    /// Personality description for the current Rusty soul — used in chat mode system prompt.
589    pub soul_personality: String,
590    pub lsp_manager: Arc<Mutex<crate::agent::lsp::manager::LspManager>>,
591    /// Active reasoning summary extracted from the previous model turn (Gemma-4 Native).
592    pub reasoning_history: Option<String>,
593    /// Layer 8: Active Reference Pinning (Context Locked)
594    pub pinned_files: Arc<Mutex<std::collections::HashMap<String, String>>>,
595    /// Hard action-grounding state for proof-before-action checks.
596    action_grounding: Arc<Mutex<ActionGroundingState>>,
597    /// True only during `/code Implement the current plan.` style execution turns.
598    plan_execution_active: Arc<std::sync::atomic::AtomicBool>,
599    /// Typed per-turn recovery attempt tracking.
600    recovery_context: RecoveryContext,
601    /// L1 context block — hot files summary injected into the system prompt.
602    /// Built once after vein init and updated as edits accumulate heat.
603    pub l1_context: Option<String>,
604    /// Condensed AST repository layout for the active project.
605    pub repo_map: Option<String>,
606}
607
608impl ConversationManager {
609    fn vein_docs_only_mode(&self) -> bool {
610        !crate::tools::file_ops::is_project_workspace()
611    }
612
613    fn refresh_vein_index(&mut self) -> usize {
614        let count = if self.vein_docs_only_mode() {
615            let root = crate::tools::file_ops::workspace_root();
616            tokio::task::block_in_place(|| self.vein.index_workspace_artifacts(&root))
617        } else {
618            tokio::task::block_in_place(|| self.vein.index_project())
619        };
620        self.l1_context = self.vein.l1_context();
621        count
622    }
623
624    fn build_vein_inspection_report(&self, indexed_this_pass: usize) -> String {
625        let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(8));
626        let workspace_mode = if self.vein_docs_only_mode() {
627            "docs-only (outside a project workspace)"
628        } else {
629            "project workspace"
630        };
631        let active_room = snapshot.active_room.as_deref().unwrap_or("none");
632        let mut out = format!(
633            "Vein Inspection\n\
634             Workspace mode: {workspace_mode}\n\
635             Indexed this pass: {indexed_this_pass}\n\
636             Indexed source files: {}\n\
637             Indexed docs: {}\n\
638             Indexed session exchanges: {}\n\
639             Embedded source/doc chunks: {}\n\
640             Embeddings available: {}\n\
641             Active room bias: {active_room}\n\
642             L1 hot-files block: {}\n",
643            snapshot.indexed_source_files,
644            snapshot.indexed_docs,
645            snapshot.indexed_session_exchanges,
646            snapshot.embedded_source_doc_chunks,
647            if snapshot.has_any_embeddings {
648                "yes"
649            } else {
650                "no"
651            },
652            if snapshot.l1_ready {
653                "ready"
654            } else {
655                "not built yet"
656            },
657        );
658
659        if snapshot.hot_files.is_empty() {
660            out.push_str("Hot files: none yet.\n");
661            return out;
662        }
663
664        out.push_str("\nHot files by room:\n");
665        let mut by_room: std::collections::BTreeMap<&str, Vec<&crate::memory::vein::VeinHotFile>> =
666            std::collections::BTreeMap::new();
667        for file in &snapshot.hot_files {
668            by_room.entry(file.room.as_str()).or_default().push(file);
669        }
670        for (room, files) in by_room {
671            out.push_str(&format!("[{}]\n", room));
672            for file in files {
673                out.push_str(&format!(
674                    "- {} [{} edit{}]\n",
675                    file.path,
676                    file.heat,
677                    if file.heat == 1 { "" } else { "s" }
678                ));
679            }
680        }
681
682        out
683    }
684
685    fn latest_user_prompt(&self) -> Option<&str> {
686        self.history
687            .iter()
688            .rev()
689            .find(|msg| msg.role == "user")
690            .map(|msg| msg.content.as_str())
691    }
692
693    async fn emit_direct_response(
694        &mut self,
695        tx: &mpsc::Sender<InferenceEvent>,
696        raw_user_input: &str,
697        effective_user_input: &str,
698        response: &str,
699    ) {
700        self.history.push(ChatMessage::user(effective_user_input));
701        self.history.push(ChatMessage::assistant_text(response));
702        self.transcript.log_user(raw_user_input);
703        self.transcript.log_agent(response);
704        for chunk in chunk_text(response, 8) {
705            if !chunk.is_empty() {
706                let _ = tx.send(InferenceEvent::Token(chunk)).await;
707            }
708        }
709        let _ = tx.send(InferenceEvent::Done).await;
710        self.trim_history(80);
711        self.refresh_session_memory();
712        self.save_session();
713    }
714
715    async fn emit_operator_checkpoint(
716        &mut self,
717        tx: &mpsc::Sender<InferenceEvent>,
718        state: OperatorCheckpointState,
719        summary: impl Into<String>,
720    ) {
721        let summary = summary.into();
722        self.session_memory
723            .record_checkpoint(state.label(), summary.clone());
724        let _ = tx
725            .send(InferenceEvent::OperatorCheckpoint { state, summary })
726            .await;
727    }
728
729    async fn emit_recovery_recipe_summary(
730        &mut self,
731        tx: &mpsc::Sender<InferenceEvent>,
732        state: impl Into<String>,
733        summary: impl Into<String>,
734    ) {
735        let state = state.into();
736        let summary = summary.into();
737        self.session_memory.record_recovery(state, summary.clone());
738        let _ = tx.send(InferenceEvent::RecoveryRecipe { summary }).await;
739    }
740
741    async fn emit_provider_live(&mut self, tx: &mpsc::Sender<InferenceEvent>) {
742        let _ = tx
743            .send(InferenceEvent::ProviderStatus {
744                state: ProviderRuntimeState::Live,
745                summary: String::new(),
746            })
747            .await;
748        self.emit_operator_checkpoint(tx, OperatorCheckpointState::Idle, "")
749            .await;
750    }
751
752    async fn emit_prompt_pressure_for_messages(
753        &self,
754        tx: &mpsc::Sender<InferenceEvent>,
755        messages: &[ChatMessage],
756    ) {
757        let context_length = self.engine.current_context_length();
758        let (estimated_input_tokens, reserved_output_tokens, estimated_total_tokens, percent) =
759            crate::agent::inference::estimate_prompt_pressure(
760                messages,
761                &self.tools,
762                context_length,
763            );
764        let _ = tx
765            .send(InferenceEvent::PromptPressure {
766                estimated_input_tokens,
767                reserved_output_tokens,
768                estimated_total_tokens,
769                context_length,
770                percent,
771            })
772            .await;
773    }
774
775    async fn emit_prompt_pressure_idle(&self, tx: &mpsc::Sender<InferenceEvent>) {
776        let context_length = self.engine.current_context_length();
777        let _ = tx
778            .send(InferenceEvent::PromptPressure {
779                estimated_input_tokens: 0,
780                reserved_output_tokens: 0,
781                estimated_total_tokens: 0,
782                context_length,
783                percent: 0,
784            })
785            .await;
786    }
787
788    async fn emit_compaction_pressure(&self, tx: &mpsc::Sender<InferenceEvent>) {
789        let context_length = self.engine.current_context_length();
790        let vram_ratio = self.gpu_state.ratio();
791        let config = CompactionConfig::adaptive(context_length, vram_ratio);
792        let estimated_tokens = compaction::estimate_compactable_tokens(&self.history);
793        let percent = if config.max_estimated_tokens == 0 {
794            0
795        } else {
796            ((estimated_tokens.saturating_mul(100)) / config.max_estimated_tokens).min(100) as u8
797        };
798
799        let _ = tx
800            .send(InferenceEvent::CompactionPressure {
801                estimated_tokens,
802                threshold_tokens: config.max_estimated_tokens,
803                percent,
804            })
805            .await;
806    }
807
808    async fn refresh_runtime_profile_and_report(
809        &mut self,
810        tx: &mpsc::Sender<InferenceEvent>,
811        reason: &str,
812    ) -> Option<(String, usize, bool)> {
813        let refreshed = self.engine.refresh_runtime_profile().await;
814        if let Some((model_id, context_length, changed)) = refreshed.as_ref() {
815            let _ = tx
816                .send(InferenceEvent::RuntimeProfile {
817                    model_id: model_id.clone(),
818                    context_length: *context_length,
819                })
820                .await;
821            self.transcript.log_system(&format!(
822                "Runtime profile refresh ({}): model={} ctx={} changed={}",
823                reason, model_id, context_length, changed
824            ));
825        }
826        refreshed
827    }
828
829    pub fn new(
830        engine: Arc<InferenceEngine>,
831        professional: bool,
832        brief: bool,
833        snark: u8,
834        chaos: u8,
835        soul_personality: String,
836        fast_model: Option<String>,
837        think_model: Option<String>,
838        gpu_state: Arc<GpuState>,
839        git_state: Arc<crate::agent::git_monitor::GitState>,
840        swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
841        voice_manager: Arc<crate::ui::voice::VoiceManager>,
842    ) -> Self {
843        let (saved_summary, saved_memory) = load_session_data();
844
845        // Build the initial mcp_manager
846        let mcp_manager = Arc::new(tokio::sync::Mutex::new(
847            crate::agent::mcp_manager::McpManager::new(),
848        ));
849
850        // Build the initial system prompt using the canonical InferenceEngine path.
851        let dynamic_instructions =
852            engine.build_system_prompt(snark, chaos, brief, professional, &[], None, &[]);
853
854        let history = vec![ChatMessage::system(&dynamic_instructions)];
855
856        let vein_path = crate::tools::file_ops::workspace_root()
857            .join(".hematite")
858            .join("vein.db");
859        let vein_base_url = engine.base_url.clone();
860        let vein = crate::memory::vein::Vein::new(&vein_path, vein_base_url.clone())
861            .unwrap_or_else(|_| crate::memory::vein::Vein::new(":memory:", vein_base_url).unwrap());
862
863        Self {
864            history,
865            engine,
866            tools: get_tools(),
867            mcp_manager,
868            professional,
869            brief,
870            snark,
871            chaos,
872            fast_model,
873            think_model,
874            correction_hints: Vec::new(),
875            running_summary: saved_summary,
876            gpu_state,
877            vein,
878            transcript: crate::agent::transcript::TranscriptLogger::new(),
879            cancel_token: Arc::new(std::sync::atomic::AtomicBool::new(false)),
880            git_state,
881            think_mode: None,
882            workflow_mode: WorkflowMode::Auto,
883            session_memory: saved_memory,
884            swarm_coordinator,
885            voice_manager,
886            soul_personality,
887            lsp_manager: Arc::new(Mutex::new(crate::agent::lsp::manager::LspManager::new(
888                crate::tools::file_ops::workspace_root(),
889            ))),
890            reasoning_history: None,
891            pinned_files: Arc::new(Mutex::new(std::collections::HashMap::new())),
892            action_grounding: Arc::new(Mutex::new(ActionGroundingState::default())),
893            plan_execution_active: Arc::new(std::sync::atomic::AtomicBool::new(false)),
894            recovery_context: RecoveryContext::default(),
895            l1_context: None,
896            repo_map: None,
897        }
898    }
899
900    /// Index the project into The Vein. Call once after construction.
901    /// Uses block_in_place so the tokio runtime thread isn't parked.
902    pub fn initialize_vein(&mut self) -> usize {
903        self.refresh_vein_index()
904    }
905
906    /// Generate the AST Repo Map. Call once after construction or when resetting context.
907    pub fn initialize_repo_map(&mut self) {
908        if !self.vein_docs_only_mode() {
909            let root = crate::tools::file_ops::workspace_root();
910            let hot = self.vein.hot_files_weighted(10);
911            let gen = crate::memory::repo_map::RepoMapGenerator::new(&root)
912                .with_hot_files(&hot);
913            match tokio::task::block_in_place(|| gen.generate()) {
914                Ok(map) => self.repo_map = Some(map),
915                Err(e) => {
916                    self.repo_map = Some(format!("Repo Map generation failed: {}", e));
917                }
918            }
919        }
920    }
921
922    /// Re-generate the repo map after a file edit so rankings stay fresh.
923    /// Lightweight (~100-200ms) — called after successful mutations.
924    fn refresh_repo_map(&mut self) {
925        self.initialize_repo_map();
926    }
927
928    fn save_session(&self) {
929        let path = session_path();
930        if let Some(parent) = path.parent() {
931            let _ = std::fs::create_dir_all(parent);
932        }
933        let saved = SavedSession {
934            running_summary: self.running_summary.clone(),
935            session_memory: self.session_memory.clone(),
936        };
937        if let Ok(json) = serde_json::to_string(&saved) {
938            let _ = std::fs::write(&path, json);
939        }
940    }
941
942    fn save_empty_session(&self) {
943        let path = session_path();
944        if let Some(parent) = path.parent() {
945            let _ = std::fs::create_dir_all(parent);
946        }
947        let saved = SavedSession {
948            running_summary: None,
949            session_memory: crate::agent::compaction::SessionMemory::default(),
950        };
951        if let Ok(json) = serde_json::to_string(&saved) {
952            let _ = std::fs::write(&path, json);
953        }
954    }
955
956    fn refresh_session_memory(&mut self) {
957        let current_plan = self.session_memory.current_plan.clone();
958        let previous_memory = self.session_memory.clone();
959        self.session_memory = compaction::extract_memory(&self.history);
960        self.session_memory.current_plan = current_plan;
961        self.session_memory
962            .inherit_runtime_ledger_from(&previous_memory);
963    }
964
965    fn build_chat_system_prompt(&self) -> String {
966        let species = &self.engine.species;
967        let personality = &self.soul_personality;
968        format!(
969            "You are {species}, a local AI companion running entirely on the user's GPU — no cloud, no subscriptions, no phoning home.\n\
970             {personality}\n\n\
971             This is CHAT mode — a clean conversational surface. Behave like a sharp friend who happens to know everything about code, not like an agent following a workflow.\n\n\
972             Rules:\n\
973             - Talk like a person. Skip the bullet-point breakdowns unless the topic genuinely needs structure.\n\
974             - Answer directly. One paragraph is usually right.\n\
975             - Don't call tools unless the user explicitly asks you to look at a file or run something.\n\
976             - Don't narrate your reasoning or mention tool names unprompted.\n\
977             - You can discuss code, debug ideas, explain concepts, help plan, or just talk.\n\
978             - If the user clearly wants you to edit or build something, do it — but lead with conversation, not scaffolding.\n\
979             - If the user wants the full coding harness, they can type `/agent`.\n",
980        )
981    }
982
983    fn append_session_handoff(&self, system_msg: &mut String) {
984        let has_summary = self
985            .running_summary
986            .as_ref()
987            .map(|s| !s.trim().is_empty())
988            .unwrap_or(false);
989        let has_memory = self.session_memory.has_signal();
990
991        if !has_summary && !has_memory {
992            return;
993        }
994
995        system_msg.push_str(
996            "\n\n# LIGHTWEIGHT SESSION HANDOFF\n\
997             This is compact carry-over from earlier work on this machine.\n\
998             Use it only when it helps the current request.\n\
999             Prefer current repository state, pinned files, and fresh tool results over stale session memory.\n",
1000        );
1001
1002        if has_memory {
1003            system_msg.push_str("\n## Active Task Memory\n");
1004            system_msg.push_str(&self.session_memory.to_prompt());
1005        }
1006
1007        if let Some(summary) = self.running_summary.as_deref() {
1008            if !summary.trim().is_empty() {
1009                system_msg.push_str("\n## Compacted Session Summary\n");
1010                system_msg.push_str(summary);
1011                system_msg.push('\n');
1012            }
1013        }
1014    }
1015
1016    fn set_workflow_mode(&mut self, mode: WorkflowMode) {
1017        self.workflow_mode = mode;
1018    }
1019
1020    fn current_plan_summary(&self) -> Option<String> {
1021        self.session_memory
1022            .current_plan
1023            .as_ref()
1024            .filter(|plan| plan.has_signal())
1025            .map(|plan| plan.summary_line())
1026    }
1027
1028    fn current_plan_allowed_paths(&self) -> Vec<String> {
1029        self.session_memory
1030            .current_plan
1031            .as_ref()
1032            .map(|plan| {
1033                plan.target_files
1034                    .iter()
1035                    .map(|path| normalize_workspace_path(path))
1036                    .collect()
1037            })
1038            .unwrap_or_default()
1039    }
1040
1041    fn persist_architect_handoff(
1042        &mut self,
1043        response: &str,
1044    ) -> Option<crate::tools::plan::PlanHandoff> {
1045        if self.workflow_mode != WorkflowMode::Architect {
1046            return None;
1047        }
1048        let Some(plan) = crate::tools::plan::parse_plan_handoff(response) else {
1049            return None;
1050        };
1051        let _ = crate::tools::plan::save_plan_handoff(&plan);
1052        self.session_memory.current_plan = Some(plan.clone());
1053        Some(plan)
1054    }
1055
1056    async fn begin_grounded_turn(&self) -> u64 {
1057        let mut state = self.action_grounding.lock().await;
1058        state.turn_index += 1;
1059        state.turn_index
1060    }
1061
1062    async fn reset_action_grounding(&self) {
1063        let mut state = self.action_grounding.lock().await;
1064        *state = ActionGroundingState::default();
1065    }
1066
1067    async fn record_read_observation(&self, path: &str) {
1068        let normalized = normalize_workspace_path(path);
1069        let mut state = self.action_grounding.lock().await;
1070        let turn = state.turn_index;
1071        // read_file returns full file content with line numbers — sufficient for
1072        // the model to know exact text before editing, so it satisfies the
1073        // line-inspection grounding check too.
1074        state.observed_paths.insert(normalized.clone(), turn);
1075        state.inspected_paths.insert(normalized, turn);
1076    }
1077
1078    async fn record_line_inspection(&self, path: &str) {
1079        let normalized = normalize_workspace_path(path);
1080        let mut state = self.action_grounding.lock().await;
1081        let turn = state.turn_index;
1082        state.observed_paths.insert(normalized.clone(), turn);
1083        state.inspected_paths.insert(normalized, turn);
1084    }
1085
1086    async fn record_verify_build_result(&self, ok: bool, output: &str) {
1087        let mut state = self.action_grounding.lock().await;
1088        let turn = state.turn_index;
1089        state.last_verify_build_turn = Some(turn);
1090        state.last_verify_build_ok = ok;
1091        if ok {
1092            state.code_changed_since_verify = false;
1093            state.last_failed_build_paths.clear();
1094        } else {
1095            state.last_failed_build_paths = parse_failing_paths_from_build_output(output);
1096        }
1097    }
1098
1099    fn record_session_verification(&mut self, ok: bool, summary: impl Into<String>) {
1100        self.session_memory.record_verification(ok, summary);
1101    }
1102
1103    async fn record_successful_mutation(&self, path: Option<&str>) {
1104        let mut state = self.action_grounding.lock().await;
1105        state.code_changed_since_verify = match path {
1106            Some(p) => is_code_like_path(p),
1107            None => true,
1108        };
1109    }
1110
1111    async fn validate_action_preconditions(&self, name: &str, args: &Value) -> Result<(), String> {
1112        if self
1113            .plan_execution_active
1114            .load(std::sync::atomic::Ordering::SeqCst)
1115        {
1116            if is_current_plan_irrelevant_tool(name) {
1117                return Err(format!(
1118                    "Action blocked: `{}` is not part of current-plan execution. Stay on the saved target files, use built-in workspace file tools only, and either make a concrete edit or surface one specific blocker.",
1119                    name
1120                ));
1121            }
1122
1123
1124
1125            if is_plan_scoped_tool(name) {
1126                let allowed_paths = self.current_plan_allowed_paths();
1127                if !allowed_paths.is_empty() {
1128                    let in_allowed = match name {
1129                        "auto_pin_context" => args
1130                            .get("paths")
1131                            .and_then(|v| v.as_array())
1132                            .map(|paths| {
1133                                !paths.is_empty()
1134                                    && paths.iter().all(|v| {
1135                                        v.as_str()
1136                                            .map(normalize_workspace_path)
1137                                            .map(|p| allowed_paths.contains(&p))
1138                                            .unwrap_or(false)
1139                                    })
1140                            })
1141                            .unwrap_or(false),
1142                        "grep_files" | "list_files" => args
1143                            .get("path")
1144                            .and_then(|v| v.as_str())
1145                            .map(normalize_workspace_path)
1146                            .map(|p| allowed_paths.contains(&p))
1147                            .unwrap_or(false),
1148                        _ => action_target_path(name, args)
1149                            .map(|p| allowed_paths.contains(&p))
1150                            .unwrap_or(false),
1151                    };
1152
1153                    if !in_allowed {
1154                        let allowed = allowed_paths
1155                            .iter()
1156                            .map(|p| format!("`{}`", p))
1157                            .collect::<Vec<_>>()
1158                            .join(", ");
1159                        return Err(format!(
1160                            "Action blocked: current-plan execution is locked to the saved target files. Use a path-scoped built-in tool on one of these files only: {}.",
1161                            allowed
1162                        ));
1163                    }
1164                }
1165            }
1166
1167            if matches!(name, "edit_file" | "multi_search_replace" | "patch_hunk") {
1168                if let Some(target) = action_target_path(name, args) {
1169                    let state = self.action_grounding.lock().await;
1170                    let recently_inspected = state
1171                        .inspected_paths
1172                        .get(&target)
1173                        .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1174                        .unwrap_or(false);
1175                    drop(state);
1176                    if !recently_inspected {
1177                        return Err(format!(
1178                            "Action blocked: `{}` on '{}' requires an exact local line window first during current-plan execution. Use `inspect_lines` on that file around the intended edit region, then retry the mutation.",
1179                            name, target
1180                        ));
1181                    }
1182                }
1183            }
1184        }
1185
1186        if self.workflow_mode.is_read_only() && name == "auto_pin_context" {
1187            return Err(
1188                "Action blocked: `auto_pin_context` is disabled in read-only workflows. Use the grounded file evidence you already have, or narrow with `inspect_lines` instead of pinning more files into active context."
1189                    .to_string(),
1190            );
1191        }
1192
1193        if self.workflow_mode.is_read_only() && is_destructive_tool(name) {
1194            if name == "shell" {
1195                let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1196                let risk = crate::tools::guard::classify_bash_risk(command);
1197                if !matches!(risk, crate::tools::RiskLevel::Safe) {
1198                    return Err(format!(
1199                        "Action blocked: workflow mode `{}` is read-only for risky or mutating operations. Switch to `/code` or `/auto` before making changes.",
1200                        self.workflow_mode.label()
1201                    ));
1202                }
1203            } else {
1204                return Err(format!(
1205                    "Action blocked: workflow mode `{}` is read-only. Use `/code` to implement changes or `/auto` to leave mode selection to Hematite.",
1206                    self.workflow_mode.label()
1207                ));
1208            }
1209        }
1210
1211        let normalized_target = action_target_path(name, args);
1212        if let Some(target) = normalized_target.as_deref() {
1213            if matches!(
1214                name,
1215                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1216            ) {
1217                if let Some(prompt) = self.latest_user_prompt() {
1218                    if docs_edit_without_explicit_request(prompt, target) {
1219                        return Err(format!(
1220                            "Action blocked: '{}' is a docs file but the current request did not explicitly ask for documentation changes. Finish the code task first. If docs need updating, the user will ask.",
1221                            target
1222                        ));
1223                    }
1224                }
1225            }
1226            let path_exists = std::path::Path::new(target).exists();
1227            if path_exists {
1228                let state = self.action_grounding.lock().await;
1229                let pinned = self.pinned_files.lock().await;
1230                let pinned_match = pinned.keys().any(|p| normalize_workspace_path(p) == target);
1231                drop(pinned);
1232
1233                // edit_file and multi_search_replace match text exactly, so they need a
1234                // tighter evidence bar than a plain read. Require inspect_lines on the
1235                // target within the last 3 turns. A read_file in the *same* turn is also
1236                // accepted (the model just loaded the file and is making an immediate edit).
1237                let needs_exact_window = matches!(name, "edit_file" | "multi_search_replace");
1238                let recently_inspected = state
1239                    .inspected_paths
1240                    .get(target)
1241                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1242                    .unwrap_or(false);
1243                let same_turn_read = state
1244                    .observed_paths
1245                    .get(target)
1246                    .map(|turn| state.turn_index.saturating_sub(*turn) == 0)
1247                    .unwrap_or(false);
1248                let recent_observed = state
1249                    .observed_paths
1250                    .get(target)
1251                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1252                    .unwrap_or(false);
1253
1254                if needs_exact_window {
1255                    if !recently_inspected && !same_turn_read && !pinned_match {
1256                        return Err(format!(
1257                            "Action blocked: `{}` on '{}' requires a line-level inspection first. \
1258                             Use `inspect_lines` on the target region to get the exact current text \
1259                             (whitespace and indentation included), then retry the edit.",
1260                            name, target
1261                        ));
1262                    }
1263                } else if !recent_observed && !pinned_match {
1264                    return Err(format!(
1265                        "Action blocked: `{}` on '{}' requires recent file evidence. Use `read_file` or `inspect_lines` on that path first, or pin the file into active context.",
1266                        name, target
1267                    ));
1268                }
1269            }
1270        }
1271
1272        if is_mcp_mutating_tool(name) {
1273            return Err(format!(
1274                "Action blocked: `{}` is an external MCP mutation tool. For workspace file edits, prefer Hematite's built-in edit path (`read_file`/`inspect_lines` plus `patch_hunk`, `edit_file`, or `multi_search_replace`) unless the user explicitly requires MCP for that action.",
1275                name
1276            ));
1277        }
1278
1279        if is_mcp_workspace_read_tool(name) {
1280            return Err(format!(
1281                "Action blocked: `{}` is an external MCP filesystem read tool. For local workspace inspection, prefer Hematite's built-in read path (`read_file`, `inspect_lines`, `list_files`, or `grep_files`) unless the user explicitly requires MCP for that action.",
1282                name
1283            ));
1284        }
1285
1286        // Phase gate: if the build is broken, constrain edits to files that cargo flagged.
1287        // This prevents the model from wandering to unrelated files after a failed verify.
1288        if matches!(
1289            name,
1290            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1291        ) {
1292            if let Some(target) = normalized_target.as_deref() {
1293                let state = self.action_grounding.lock().await;
1294                if state.code_changed_since_verify
1295                    && !state.last_verify_build_ok
1296                    && !state.last_failed_build_paths.is_empty()
1297                    && !state.last_failed_build_paths.iter().any(|p| p == target)
1298                {
1299                    let files = state
1300                        .last_failed_build_paths
1301                        .iter()
1302                        .map(|p| format!("`{}`", p))
1303                        .collect::<Vec<_>>()
1304                        .join(", ");
1305                    return Err(format!(
1306                        "Action blocked: the build is broken. Fix the errors in {} before editing other files. Run `verify_build` to confirm the fix, then continue.",
1307                        files
1308                    ));
1309                }
1310            }
1311        }
1312
1313        if name == "git_commit" || name == "git_push" {
1314            let state = self.action_grounding.lock().await;
1315            if state.code_changed_since_verify && !state.last_verify_build_ok {
1316                return Err(format!(
1317                    "Action blocked: `{}` requires a successful `verify_build` after the latest code edits. Run verification first so Hematite has proof that the tree is build-clean.",
1318                    name
1319                ));
1320            }
1321        }
1322
1323        if name == "shell" {
1324            let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1325            if let Some(prompt) = self.latest_user_prompt() {
1326                if let Some(topic) = preferred_host_inspection_topic(prompt) {
1327                    if shell_looks_like_structured_host_inspection(command) {
1328                        return Err(format!(
1329                            "Action blocked: this is a host-inspection question. Prefer `inspect_host(topic: \"{}\")` instead of raw `shell` for PATH, toolchains, environment/package-manager health, network state, service state, running processes, desktop, Downloads, listening ports, repo-doctor checks, or directory/disk summaries. Use `shell` only if `inspect_host` cannot answer the question directly.",
1330                            topic
1331                        ));
1332                    }
1333                }
1334            }
1335            let reason = args
1336                .get("reason")
1337                .and_then(|v| v.as_str())
1338                .unwrap_or("")
1339                .trim();
1340            let risk = crate::tools::guard::classify_bash_risk(command);
1341            if !matches!(risk, crate::tools::RiskLevel::Safe) && reason.is_empty() {
1342                return Err(
1343                    "Action blocked: risky `shell` calls require a concrete `reason` argument that explains what is being verified or changed."
1344                        .to_string(),
1345                );
1346            }
1347        }
1348
1349        Ok(())
1350    }
1351
1352    fn build_action_receipt(
1353        &self,
1354        name: &str,
1355        args: &Value,
1356        output: &str,
1357        is_error: bool,
1358    ) -> Option<ChatMessage> {
1359        if is_error || !is_destructive_tool(name) {
1360            return None;
1361        }
1362
1363        let mut receipt = String::from("[ACTION RECEIPT]\n");
1364        receipt.push_str(&format!("- tool: {}\n", name));
1365        if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
1366            receipt.push_str(&format!("- target: {}\n", path));
1367        }
1368        if name == "shell" {
1369            if let Some(command) = args.get("command").and_then(|v| v.as_str()) {
1370                receipt.push_str(&format!("- command: {}\n", command));
1371            }
1372            if let Some(reason) = args.get("reason").and_then(|v| v.as_str()) {
1373                if !reason.trim().is_empty() {
1374                    receipt.push_str(&format!("- reason: {}\n", reason.trim()));
1375                }
1376            }
1377        }
1378        let first_line = output.lines().next().unwrap_or(output).trim();
1379        receipt.push_str(&format!("- outcome: {}\n", first_line));
1380        Some(ChatMessage::system(&receipt))
1381    }
1382
1383    fn replace_mcp_tool_definitions(&mut self, mcp_tools: &[crate::agent::mcp::McpTool]) {
1384        self.tools
1385            .retain(|tool| !tool.function.name.starts_with("mcp__"));
1386        self.tools
1387            .extend(mcp_tools.iter().map(|tool| ToolDefinition {
1388                tool_type: "function".into(),
1389                function: ToolFunction {
1390                    name: tool.name.clone(),
1391                    description: tool.description.clone().unwrap_or_default(),
1392                    parameters: tool.input_schema.clone(),
1393                },
1394                metadata: crate::agent::inference::tool_metadata_for_name(&tool.name),
1395            }));
1396    }
1397
1398    async fn emit_mcp_runtime_status(&self, tx: &mpsc::Sender<InferenceEvent>) {
1399        let summary = {
1400            let mcp = self.mcp_manager.lock().await;
1401            mcp.runtime_report()
1402        };
1403        let _ = tx
1404            .send(InferenceEvent::McpStatus {
1405                state: summary.state,
1406                summary: summary.summary,
1407            })
1408            .await;
1409    }
1410
1411    async fn refresh_mcp_tools(
1412        &mut self,
1413        tx: &mpsc::Sender<InferenceEvent>,
1414    ) -> Result<Vec<crate::agent::mcp::McpTool>, Box<dyn std::error::Error + Send + Sync>> {
1415        let mcp_tools = {
1416            let mut mcp = self.mcp_manager.lock().await;
1417            match mcp.initialize_all().await {
1418                Ok(()) => mcp.discover_tools().await,
1419                Err(e) => {
1420                    drop(mcp);
1421                    self.replace_mcp_tool_definitions(&[]);
1422                    self.emit_mcp_runtime_status(tx).await;
1423                    return Err(e.into());
1424                }
1425            }
1426        };
1427
1428        self.replace_mcp_tool_definitions(&mcp_tools);
1429        self.emit_mcp_runtime_status(tx).await;
1430        Ok(mcp_tools)
1431    }
1432
1433    /// Spawns and initializes all configured MCP servers, discovering their tools.
1434    pub async fn initialize_mcp(
1435        &mut self,
1436        tx: &mpsc::Sender<InferenceEvent>,
1437    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1438        let _ = self.refresh_mcp_tools(tx).await?;
1439        Ok(())
1440    }
1441
1442    /// Run one user turn through the full agentic loop.
1443    ///
1444    /// Adds the user message, calls the model, executes any tools, and loops
1445    /// until the model produces a final text reply.  All progress is streamed
1446    /// as `InferenceEvent` values via `tx`.
1447    pub async fn run_turn(
1448        &mut self,
1449        user_turn: &UserTurn,
1450        tx: mpsc::Sender<InferenceEvent>,
1451        yolo: bool,
1452    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1453        let user_input = user_turn.text.as_str();
1454        // ── Fast-path reset commands: handled locally, no network I/O needed ──
1455        if user_input.trim() == "/new" {
1456            self.history.clear();
1457            self.reasoning_history = None;
1458            self.session_memory.clear();
1459            self.running_summary = None;
1460            self.correction_hints.clear();
1461            self.pinned_files.lock().await.clear();
1462            self.reset_action_grounding().await;
1463            reset_task_files();
1464            let _ = std::fs::remove_file(session_path());
1465            self.save_empty_session();
1466            self.emit_compaction_pressure(&tx).await;
1467            self.emit_prompt_pressure_idle(&tx).await;
1468            for chunk in chunk_text(
1469                "Fresh task context started. Chat history, pins, and task files cleared. Saved memory remains available.",
1470                8,
1471            ) {
1472                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1473            }
1474            let _ = tx.send(InferenceEvent::Done).await;
1475            return Ok(());
1476        }
1477
1478        if user_input.trim() == "/forget" {
1479            self.history.clear();
1480            self.reasoning_history = None;
1481            self.session_memory.clear();
1482            self.running_summary = None;
1483            self.correction_hints.clear();
1484            self.pinned_files.lock().await.clear();
1485            self.reset_action_grounding().await;
1486            reset_task_files();
1487            purge_persistent_memory();
1488            tokio::task::block_in_place(|| self.vein.reset());
1489            let _ = std::fs::remove_file(session_path());
1490            self.save_empty_session();
1491            self.emit_compaction_pressure(&tx).await;
1492            self.emit_prompt_pressure_idle(&tx).await;
1493            for chunk in chunk_text(
1494                "Hard forget complete. Chat history, saved memory, task files, and the Vein index were purged.",
1495                8,
1496            ) {
1497                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1498            }
1499            let _ = tx.send(InferenceEvent::Done).await;
1500            return Ok(());
1501        }
1502
1503        if user_input.trim() == "/vein-inspect" {
1504            let indexed = self.refresh_vein_index();
1505            let report = self.build_vein_inspection_report(indexed);
1506            let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(1));
1507            let _ = tx
1508                .send(InferenceEvent::VeinStatus {
1509                    file_count: snapshot.indexed_source_files + snapshot.indexed_docs,
1510                    embedded_count: snapshot.embedded_source_doc_chunks,
1511                    docs_only: self.vein_docs_only_mode(),
1512                })
1513                .await;
1514            for chunk in chunk_text(&report, 8) {
1515                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1516            }
1517            let _ = tx.send(InferenceEvent::Done).await;
1518            return Ok(());
1519        }
1520
1521        if user_input.trim() == "/workspace-profile" {
1522            let root = crate::tools::file_ops::workspace_root();
1523            let _ = crate::agent::workspace_profile::ensure_workspace_profile(&root);
1524            let report = crate::agent::workspace_profile::profile_report(&root);
1525            for chunk in chunk_text(&report, 8) {
1526                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1527            }
1528            let _ = tx.send(InferenceEvent::Done).await;
1529            return Ok(());
1530        }
1531
1532        if user_input.trim() == "/vein-reset" {
1533            tokio::task::block_in_place(|| self.vein.reset());
1534            let _ = tx
1535                .send(InferenceEvent::VeinStatus {
1536                    file_count: 0,
1537                    embedded_count: 0,
1538                    docs_only: self.vein_docs_only_mode(),
1539                })
1540                .await;
1541            for chunk in chunk_text("Vein index cleared. Will rebuild on the next turn.", 8) {
1542                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1543            }
1544            let _ = tx.send(InferenceEvent::Done).await;
1545            return Ok(());
1546        }
1547
1548        // Reload config every turn (edits apply immediately, no restart needed).
1549        let config = crate::agent::config::load_config();
1550        self.recovery_context.clear();
1551        let manual_runtime_refresh = user_input.trim() == "/runtime-refresh";
1552        if !manual_runtime_refresh {
1553            if let Some((model_id, context_length, changed)) = self
1554                .refresh_runtime_profile_and_report(&tx, "turn_start")
1555                .await
1556            {
1557                if changed {
1558                    let _ = tx
1559                        .send(InferenceEvent::Thought(format!(
1560                            "Runtime refresh: using model `{}` with CTX {} for this turn.",
1561                            model_id, context_length
1562                        )))
1563                        .await;
1564                }
1565            }
1566        }
1567        self.emit_compaction_pressure(&tx).await;
1568        let current_model = self.engine.current_model();
1569        self.engine.set_gemma_native_formatting(
1570            crate::agent::config::effective_gemma_native_formatting(&config, &current_model),
1571        );
1572        let _turn_id = self.begin_grounded_turn().await;
1573        let _hook_runner = crate::agent::hooks::HookRunner::new(config.hooks.clone());
1574        let mcp_tools = match self.refresh_mcp_tools(&tx).await {
1575            Ok(tools) => tools,
1576            Err(e) => {
1577                let _ = tx
1578                    .send(InferenceEvent::Error(format!("MCP refresh failed: {}", e)))
1579                    .await;
1580                Vec::new()
1581            }
1582        };
1583
1584        // Apply config model overrides (config takes precedence over CLI flags).
1585        let effective_fast = config
1586            .fast_model
1587            .clone()
1588            .or_else(|| self.fast_model.clone());
1589        let effective_think = config
1590            .think_model
1591            .clone()
1592            .or_else(|| self.think_model.clone());
1593
1594        // ── /lsp: start language servers manually if needed ──────────────────
1595        if user_input.trim() == "/lsp" {
1596            let mut lsp = self.lsp_manager.lock().await;
1597            match lsp.start_servers().await {
1598                Ok(_) => {
1599                    let _ = tx
1600                        .send(InferenceEvent::MutedToken(
1601                            "LSP: Servers Initialized OK.".to_string(),
1602                        ))
1603                        .await;
1604                }
1605                Err(e) => {
1606                    let _ = tx
1607                        .send(InferenceEvent::Error(format!(
1608                            "LSP: Failed to start servers - {}",
1609                            e
1610                        )))
1611                        .await;
1612                }
1613            }
1614            let _ = tx.send(InferenceEvent::Done).await;
1615            return Ok(());
1616        }
1617
1618        if user_input.trim() == "/runtime-refresh" {
1619            match self
1620                .refresh_runtime_profile_and_report(&tx, "manual_command")
1621                .await
1622            {
1623                Some((model_id, context_length, changed)) => {
1624                    let msg = if changed {
1625                        format!(
1626                            "Runtime profile refreshed. Model: {} | CTX: {}",
1627                            model_id, context_length
1628                        )
1629                    } else {
1630                        format!(
1631                            "Runtime profile unchanged. Model: {} | CTX: {}",
1632                            model_id, context_length
1633                        )
1634                    };
1635                    for chunk in chunk_text(&msg, 8) {
1636                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
1637                    }
1638                }
1639                None => {
1640                    let _ = tx
1641                        .send(InferenceEvent::Error(
1642                            "Runtime refresh failed: LM Studio profile could not be read."
1643                                .to_string(),
1644                        ))
1645                        .await;
1646                }
1647            }
1648            let _ = tx.send(InferenceEvent::Done).await;
1649            return Ok(());
1650        }
1651
1652        if user_input.trim() == "/ask" {
1653            self.set_workflow_mode(WorkflowMode::Ask);
1654            for chunk in chunk_text(
1655                "Workflow mode: ASK. Stay read-only, explain, inspect, and answer without making changes.",
1656                8,
1657            ) {
1658                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1659            }
1660            let _ = tx.send(InferenceEvent::Done).await;
1661            return Ok(());
1662        }
1663
1664        if user_input.trim() == "/code" {
1665            self.set_workflow_mode(WorkflowMode::Code);
1666            let mut message =
1667                "Workflow mode: CODE. Make changes when needed, but keep proof-before-action and verification discipline.".to_string();
1668            if let Some(plan) = self.current_plan_summary() {
1669                message.push_str(&format!(" Current plan: {plan}."));
1670            }
1671            for chunk in chunk_text(&message, 8) {
1672                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1673            }
1674            let _ = tx.send(InferenceEvent::Done).await;
1675            return Ok(());
1676        }
1677
1678        if user_input.trim() == "/architect" {
1679            self.set_workflow_mode(WorkflowMode::Architect);
1680            let mut message =
1681                "Workflow mode: ARCHITECT. Plan, inspect, and shape the approach first. Do not mutate code unless the user explicitly asks to implement. When the handoff is ready, use `/implement-plan` or switch to `/code` to execute it.".to_string();
1682            if let Some(plan) = self.current_plan_summary() {
1683                message.push_str(&format!(" Existing plan: {plan}."));
1684            }
1685            for chunk in chunk_text(&message, 8) {
1686                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1687            }
1688            let _ = tx.send(InferenceEvent::Done).await;
1689            return Ok(());
1690        }
1691
1692        if user_input.trim() == "/read-only" {
1693            self.set_workflow_mode(WorkflowMode::ReadOnly);
1694            for chunk in chunk_text(
1695                "Workflow mode: READ-ONLY. Analysis only. Do not modify files, run mutating shell commands, or commit changes.",
1696                8,
1697            ) {
1698                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1699            }
1700            let _ = tx.send(InferenceEvent::Done).await;
1701            return Ok(());
1702        }
1703
1704        if user_input.trim() == "/auto" {
1705            self.set_workflow_mode(WorkflowMode::Auto);
1706            for chunk in chunk_text(
1707                "Workflow mode: AUTO. Hematite will choose the narrowest effective path for the request.",
1708                8,
1709            ) {
1710                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1711            }
1712            let _ = tx.send(InferenceEvent::Done).await;
1713            return Ok(());
1714        }
1715
1716        if user_input.trim() == "/chat" {
1717            self.set_workflow_mode(WorkflowMode::Chat);
1718            let _ = tx.send(InferenceEvent::Done).await;
1719            return Ok(());
1720        }
1721
1722        if user_input.trim() == "/reroll" {
1723            let soul = crate::ui::hatch::generate_soul_random();
1724            self.snark = soul.snark;
1725            self.chaos = soul.chaos;
1726            self.soul_personality = soul.personality.clone();
1727            // Update the engine's species name so build_chat_system_prompt uses it
1728            // SAFETY: engine is Arc but species is a plain String field we own logically.
1729            // We use Arc::get_mut which only succeeds if this is the only strong ref.
1730            // If it fails (swarm workers hold refs), we fall back to a best-effort clone approach.
1731            let species = soul.species.clone();
1732            if let Some(eng) = Arc::get_mut(&mut self.engine) {
1733                eng.species = species.clone();
1734            }
1735            let shiny_tag = if soul.shiny { " 🌟 SHINY" } else { "" };
1736            let _ = tx
1737                .send(InferenceEvent::SoulReroll {
1738                    species: soul.species.clone(),
1739                    rarity: soul.rarity.label().to_string(),
1740                    shiny: soul.shiny,
1741                    personality: soul.personality.clone(),
1742                })
1743                .await;
1744            for chunk in chunk_text(
1745                &format!(
1746                    "A new companion awakens!\n[{}{}] {} — \"{}\"",
1747                    soul.rarity.label(),
1748                    shiny_tag,
1749                    soul.species,
1750                    soul.personality
1751                ),
1752                8,
1753            ) {
1754                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1755            }
1756            let _ = tx.send(InferenceEvent::Done).await;
1757            return Ok(());
1758        }
1759
1760        if user_input.trim() == "/agent" {
1761            self.set_workflow_mode(WorkflowMode::Auto);
1762            let _ = tx.send(InferenceEvent::Done).await;
1763            return Ok(());
1764        }
1765
1766        let implement_plan_alias = user_input.trim() == "/implement-plan";
1767        if implement_plan_alias
1768            && !self
1769                .session_memory
1770                .current_plan
1771                .as_ref()
1772                .map(|plan| plan.has_signal())
1773                .unwrap_or(false)
1774        {
1775            for chunk in chunk_text(
1776                "No saved architect handoff is active. Run `/architect` first, or switch to `/code` with an explicit implementation request.",
1777                8,
1778            ) {
1779                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1780            }
1781            let _ = tx.send(InferenceEvent::Done).await;
1782            return Ok(());
1783        }
1784
1785        let mut effective_user_input = if implement_plan_alias {
1786            self.set_workflow_mode(WorkflowMode::Code);
1787            implement_current_plan_prompt().to_string()
1788        } else {
1789            user_input.trim().to_string()
1790        };
1791        if let Some((mode, rest)) = parse_inline_workflow_prompt(user_input) {
1792            self.set_workflow_mode(mode);
1793            effective_user_input = rest.to_string();
1794        }
1795        let transcript_user_input = if implement_plan_alias {
1796            transcript_user_turn_text(user_turn, "/implement-plan")
1797        } else {
1798            transcript_user_turn_text(user_turn, &effective_user_input)
1799        };
1800        effective_user_input = apply_turn_attachments(user_turn, &effective_user_input);
1801        let implement_current_plan = self.workflow_mode == WorkflowMode::Code
1802            && is_current_plan_execution_request(&effective_user_input)
1803            && self
1804                .session_memory
1805                .current_plan
1806                .as_ref()
1807                .map(|plan| plan.has_signal())
1808                .unwrap_or(false);
1809        self.plan_execution_active
1810            .store(implement_current_plan, std::sync::atomic::Ordering::SeqCst);
1811        let _plan_execution_guard = PlanExecutionGuard {
1812            flag: self.plan_execution_active.clone(),
1813        };
1814        let intent = classify_query_intent(self.workflow_mode, &effective_user_input);
1815
1816        // ── /think / /no_think: reasoning budget toggle ──────────────────────
1817        if let Some(answer_kind) = intent.direct_answer {
1818            match answer_kind {
1819                DirectAnswerKind::About => {
1820                    let response = build_about_answer();
1821                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1822                        .await;
1823                    return Ok(());
1824                }
1825                DirectAnswerKind::LanguageCapability => {
1826                    let response = build_language_capability_answer();
1827                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1828                        .await;
1829                    return Ok(());
1830                }
1831                DirectAnswerKind::UnsafeWorkflowPressure => {
1832                    let response = build_unsafe_workflow_pressure_answer();
1833                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1834                        .await;
1835                    return Ok(());
1836                }
1837                DirectAnswerKind::SessionMemory => {
1838                    let response = build_session_memory_answer();
1839                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1840                        .await;
1841                    return Ok(());
1842                }
1843                DirectAnswerKind::RecoveryRecipes => {
1844                    let response = build_recovery_recipes_answer();
1845                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1846                        .await;
1847                    return Ok(());
1848                }
1849                DirectAnswerKind::McpLifecycle => {
1850                    let response = build_mcp_lifecycle_answer();
1851                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1852                        .await;
1853                    return Ok(());
1854                }
1855                DirectAnswerKind::AuthorizationPolicy => {
1856                    let response = build_authorization_policy_answer();
1857                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1858                        .await;
1859                    return Ok(());
1860                }
1861                DirectAnswerKind::ToolClasses => {
1862                    let response = build_tool_classes_answer();
1863                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1864                        .await;
1865                    return Ok(());
1866                }
1867                DirectAnswerKind::ToolRegistryOwnership => {
1868                    let response = build_tool_registry_ownership_answer();
1869                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1870                        .await;
1871                    return Ok(());
1872                }
1873                DirectAnswerKind::SessionResetSemantics => {
1874                    let response = build_session_reset_semantics_answer();
1875                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1876                        .await;
1877                    return Ok(());
1878                }
1879                DirectAnswerKind::ProductSurface => {
1880                    let response = build_product_surface_answer();
1881                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1882                        .await;
1883                    return Ok(());
1884                }
1885                DirectAnswerKind::ReasoningSplit => {
1886                    let response = build_reasoning_split_answer();
1887                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1888                        .await;
1889                    return Ok(());
1890                }
1891                DirectAnswerKind::Identity => {
1892                    let response = build_identity_answer();
1893                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1894                        .await;
1895                    return Ok(());
1896                }
1897                DirectAnswerKind::WorkflowModes => {
1898                    let response = build_workflow_modes_answer();
1899                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1900                        .await;
1901                    return Ok(());
1902                }
1903                DirectAnswerKind::GemmaNative => {
1904                    let response = build_gemma_native_answer();
1905                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1906                        .await;
1907                    return Ok(());
1908                }
1909                DirectAnswerKind::GemmaNativeSettings => {
1910                    let response = build_gemma_native_settings_answer();
1911                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1912                        .await;
1913                    return Ok(());
1914                }
1915                DirectAnswerKind::VerifyProfiles => {
1916                    let response = build_verify_profiles_answer();
1917                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1918                        .await;
1919                    return Ok(());
1920                }
1921                DirectAnswerKind::Toolchain => {
1922                    let lower = effective_user_input.to_lowercase();
1923                    let topic = if (lower.contains("voice output") || lower.contains("voice"))
1924                        && (lower.contains("lag")
1925                            || lower.contains("behind visible text")
1926                            || lower.contains("latency"))
1927                    {
1928                        "voice_latency_plan"
1929                    } else {
1930                        "all"
1931                    };
1932                    let response =
1933                        crate::tools::toolchain::describe_toolchain(&serde_json::json!({
1934                            "topic": topic,
1935                            "question": effective_user_input,
1936                        }))
1937                        .await
1938                        .unwrap_or_else(|e| format!("Error: {}", e));
1939                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1940                        .await;
1941                    return Ok(());
1942                }
1943                DirectAnswerKind::ArchitectSessionResetPlan => {
1944                    let plan = build_architect_session_reset_plan();
1945                    let response = plan.to_markdown();
1946                    let _ = crate::tools::plan::save_plan_handoff(&plan);
1947                    self.session_memory.current_plan = Some(plan);
1948                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1949                        .await;
1950                    return Ok(());
1951                }
1952            }
1953        }
1954
1955        if matches!(
1956            self.workflow_mode,
1957            WorkflowMode::Ask | WorkflowMode::ReadOnly
1958        ) && looks_like_mutation_request(&effective_user_input)
1959        {
1960            let response = build_mode_redirect_answer(self.workflow_mode);
1961            self.history.push(ChatMessage::user(&effective_user_input));
1962            self.history.push(ChatMessage::assistant_text(&response));
1963            self.transcript.log_user(&transcript_user_input);
1964            self.transcript.log_agent(&response);
1965            for chunk in chunk_text(&response, 8) {
1966                if !chunk.is_empty() {
1967                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
1968                }
1969            }
1970            let _ = tx.send(InferenceEvent::Done).await;
1971            self.trim_history(80);
1972            self.refresh_session_memory();
1973            self.save_session();
1974            return Ok(());
1975        }
1976
1977        if user_input.trim() == "/think" {
1978            self.think_mode = Some(true);
1979            for chunk in chunk_text("Think mode: ON — full chain-of-thought enabled.", 8) {
1980                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1981            }
1982            let _ = tx.send(InferenceEvent::Done).await;
1983            return Ok(());
1984        }
1985        if user_input.trim() == "/no_think" {
1986            self.think_mode = Some(false);
1987            for chunk in chunk_text(
1988                "Think mode: OFF — fast mode enabled (no chain-of-thought).",
1989                8,
1990            ) {
1991                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1992            }
1993            let _ = tx.send(InferenceEvent::Done).await;
1994            return Ok(());
1995        }
1996
1997        // ── /pin: add file to active context ────────────────────────────────
1998        if user_input.trim_start().starts_with("/pin ") {
1999            let path = user_input.trim_start()[5..].trim();
2000            match std::fs::read_to_string(path) {
2001                Ok(content) => {
2002                    self.pinned_files
2003                        .lock()
2004                        .await
2005                        .insert(path.to_string(), content);
2006                    let msg = format!(
2007                        "Pinned: {} — this file is now locked in model context.",
2008                        path
2009                    );
2010                    for chunk in chunk_text(&msg, 8) {
2011                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
2012                    }
2013                }
2014                Err(e) => {
2015                    let _ = tx
2016                        .send(InferenceEvent::Error(format!(
2017                            "Failed to pin {}: {}",
2018                            path, e
2019                        )))
2020                        .await;
2021                }
2022            }
2023            let _ = tx.send(InferenceEvent::Done).await;
2024            return Ok(());
2025        }
2026
2027        // ── /unpin: remove file from active context ──────────────────────────
2028        if user_input.trim_start().starts_with("/unpin ") {
2029            let path = user_input.trim_start()[7..].trim();
2030            if self.pinned_files.lock().await.remove(path).is_some() {
2031                let msg = format!("Unpinned: {} — file removed from active context.", path);
2032                for chunk in chunk_text(&msg, 8) {
2033                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
2034                }
2035            } else {
2036                let _ = tx
2037                    .send(InferenceEvent::Error(format!(
2038                        "File {} was not pinned.",
2039                        path
2040                    )))
2041                    .await;
2042            }
2043            let _ = tx.send(InferenceEvent::Done).await;
2044            return Ok(());
2045        }
2046
2047        // ── Normal processing ───────────────────────────────────────────────
2048
2049        // Ensure MCP is initialized and tools are discovered for this turn.
2050        let tiny_context_mode = self.engine.current_context_length() <= 8_192;
2051        let mut base_prompt = self.engine.build_system_prompt(
2052            self.snark,
2053            self.chaos,
2054            self.brief,
2055            self.professional,
2056            &self.tools,
2057            self.reasoning_history.as_deref(),
2058            &mcp_tools,
2059        );
2060        if !tiny_context_mode {
2061            if let Some(hint) = &config.context_hint {
2062                if !hint.trim().is_empty() {
2063                    base_prompt.push_str(&format!(
2064                        "\n\n# Project Context (from .hematite/settings.json)\n{}",
2065                        hint
2066                    ));
2067                }
2068            }
2069            if let Some(profile_block) = crate::agent::workspace_profile::profile_prompt_block(
2070                &crate::tools::file_ops::workspace_root(),
2071            ) {
2072                base_prompt.push_str(&format!("\n\n{}", profile_block));
2073            }
2074            // L1: inject hot-files block if available (persists across sessions via vein.db).
2075            if let Some(ref l1) = self.l1_context {
2076                base_prompt.push_str(&format!("\n\n{}", l1));
2077            }
2078            if let Some(ref repo_map_block) = self.repo_map {
2079                base_prompt.push_str(&format!("\n\n{}", repo_map_block));
2080            }
2081        }
2082        let grounded_trace_mode = intent.grounded_trace_mode
2083            || intent.primary_class == QueryIntentClass::RuntimeDiagnosis;
2084        let capability_mode =
2085            intent.capability_mode || intent.primary_class == QueryIntentClass::Capability;
2086        let toolchain_mode =
2087            intent.toolchain_mode || intent.primary_class == QueryIntentClass::Toolchain;
2088        let host_inspection_mode = intent.host_inspection_mode;
2089        let maintainer_workflow_mode = intent.maintainer_workflow_mode
2090            || preferred_maintainer_workflow(&effective_user_input).is_some();
2091        let workspace_workflow_mode = intent.workspace_workflow_mode
2092            || preferred_workspace_workflow(&effective_user_input).is_some();
2093        let fix_plan_mode =
2094            preferred_host_inspection_topic(&effective_user_input) == Some("fix_plan");
2095        let architecture_overview_mode = intent.architecture_overview_mode;
2096        let capability_needs_repo = intent.capability_needs_repo;
2097        let mut system_msg = build_system_with_corrections(
2098            &base_prompt,
2099            &self.correction_hints,
2100            &self.gpu_state,
2101            &self.git_state,
2102            &config,
2103        );
2104        if tiny_context_mode {
2105            system_msg.push_str(
2106                "\n\n# TINY CONTEXT TURN MODE\n\
2107                 Keep this turn compact. Prefer direct answers or one narrow tool step over broad exploration.\n",
2108            );
2109        }
2110        if !tiny_context_mode && grounded_trace_mode {
2111            system_msg.push_str(
2112                "\n\n# GROUNDED TRACE MODE\n\
2113                 This turn is read-only architecture analysis unless the user explicitly asks otherwise.\n\
2114                 Before answering trace, architecture, or control-flow questions, inspect the repo with real tools.\n\
2115                 Use verified file paths, function names, structs, enums, channels, and event types only.\n\
2116                 Prefer `trace_runtime_flow` for runtime wiring, session reset, startup, or reasoning/specular questions.\n\
2117                 Treat `trace_runtime_flow` output as authoritative over your own memory.\n\
2118                 If `trace_runtime_flow` fully answers the question, preserve its identifiers exactly and do not rename them in a styled rewrite.\n\
2119                 Do not invent names such as synthetic channels or subsystems.\n\
2120                 If a detail is not verified from the code or tool output, say `uncertain`.\n\
2121                For exact flow questions, answer in ordered steps and name the concrete functions and event types involved.\n"
2122            );
2123        }
2124        if !tiny_context_mode && capability_mode {
2125            system_msg.push_str(
2126                "\n\n# CAPABILITY QUESTION MODE\n\
2127                 This is a product or capability question unless the user explicitly asks about repository implementation.\n\
2128                 Answer from stable Hematite capabilities and current runtime state.\n\
2129                 It is correct to mention that Hematite itself is built in Rust when relevant, but do not imply that its project support is limited to Rust.\n\
2130                 Do NOT call repo-inspection tools like `read_file` or LSP lookup tools unless the user explicitly asks about implementation or file ownership.\n\
2131                 Do NOT infer language or project support from unrelated dependencies, crates, or config files.\n\
2132                 Describe language and project support in terms of real mechanisms: reading files, editing code, searching the workspace, running shell commands, build verification, language-aware tooling when available, web research, vision analysis, and optional MCP tools if configured.\n\
2133                 If the user asks about languages, answer at the harness level: Hematite can help across many project languages even though Hematite itself is written in Rust.\n\
2134                 Prefer real programming language examples like Python, JavaScript, TypeScript, Go, C#, or similar over file extensions like `.json` or `.md`.\n\
2135                 For project-building questions, describe cross-project workflows like scaffolding files, shaping structure, implementing features, and running the appropriate local build or test commands for the target stack. Do not overclaim certainty.\n\
2136                 Never mention raw `mcp__*` tool names unless those tools are active this turn and directly relevant.\n\
2137                 Keep the answer short, plain, and ASCII-first.\n"
2138            );
2139        }
2140        if !tiny_context_mode && toolchain_mode {
2141            system_msg.push_str(
2142                "\n\n# TOOLCHAIN DISCIPLINE MODE\n\
2143                 This turn is about Hematite's real built-in tools and how to choose them.\n\
2144                 Prefer `describe_toolchain` before you try to summarize tool capabilities or propose a read-only investigation plan from memory.\n\
2145                 Use only real built-in tool names.\n\
2146                 Do not invent helper tools, MCP tool names, synthetic symbols, or example function names.\n\
2147                 If `describe_toolchain` fully answers the question, preserve its output exactly instead of restyling it.\n\
2148                 Be explicit about which tools are optional or conditional.\n"
2149            );
2150        }
2151        if !tiny_context_mode && host_inspection_mode {
2152            system_msg.push_str(
2153                 "\n\n# HOST INSPECTION MODE\n\
2154                   This turn is about the local machine and environment, not repository architecture.\n\
2155                 Prefer `inspect_host` before raw `shell` for PATH analysis, installed developer tool versions, environment/package-manager health, grounded fix plans, network snapshots, service snapshots, process snapshots, desktop item counts, Downloads summaries, listening ports, repo-doctor checks, and directory/disk-size reports.\n\
2156                 Use the closest built-in topic first: `summary`, `toolchains`, `path`, `env_doctor`, `fix_plan`, `network`, `services`, `processes`, `desktop`, `downloads`, `ports`, `repo_doctor`, `directory`, or `disk`.\n\
2157                 If the user asks how to fix a common workstation problem such as `cargo not found`, `port 3000 already in use`, or `LM Studio not reachable`, use `fix_plan` first instead of `env_doctor`, `path`, or `ports`.\n\
2158                 If `env_doctor` answers the question, stop there. Do not follow with `path` unless the user explicitly asks for raw PATH entries.\n\
2159                 Only use `shell` if the host question truly goes beyond `inspect_host`.\n"
2160              );
2161        }
2162        if !tiny_context_mode && fix_plan_mode {
2163            system_msg.push_str(
2164                "\n\n# FIX PLAN MODE\n\
2165                 This turn is a workstation remediation question, not just a diagnosis question.\n\
2166                 Call `inspect_host` with `topic=fix_plan` first.\n\
2167                 Do not start with `path`, `toolchains`, `env_doctor`, or `ports` unless the user explicitly asks for diagnosis details instead of a fix plan.\n\
2168                 Keep the answer grounded, stepwise, and approval-aware.\n"
2169            );
2170        }
2171        if !tiny_context_mode && maintainer_workflow_mode {
2172            system_msg.push_str(
2173                "\n\n# HEMATITE MAINTAINER WORKFLOW MODE\n\
2174                 This turn asks Hematite to run one of Hematite's own maintainer workflows, not invent an ad hoc shell command.\n\
2175                 Prefer `run_hematite_maintainer_workflow` for existing Hematite workflows such as `clean.ps1`, `scripts/package-windows.ps1`, or `release.ps1`.\n\
2176                 Use workflow `clean` for cleanup, workflow `package_windows` for rebuilding the local portable or installer, and workflow `release` for the normal version bump/tag/push/publish flow.\n\
2177                 Do not treat this as a generic current-workspace script runner. Only fall back to raw `shell` if the user asks for a script or command outside those Hematite maintainer workflows.\n"
2178            );
2179        }
2180        if !tiny_context_mode && workspace_workflow_mode {
2181            system_msg.push_str(
2182                "\n\n# WORKSPACE WORKFLOW MODE\n\
2183                 This turn asks Hematite to run something in the active project workspace, not in Hematite's own source tree.\n\
2184                 Prefer `run_workspace_workflow` for the current project's build, test, lint, fix, package scripts, just/task/make targets, local repo scripts, or an exact workspace command.\n\
2185                 This tool always runs from the locked workspace root.\n\
2186                 If no real project workspace is locked, say so and tell the user to relaunch Hematite in the target project directory.\n\
2187                 Do not use `run_hematite_maintainer_workflow` unless the request is specifically about Hematite's own cleanup, packaging, or release scripts.\n"
2188            );
2189        }
2190
2191        if !tiny_context_mode && architecture_overview_mode {
2192            system_msg.push_str(
2193                "\n\n# ARCHITECTURE OVERVIEW DISCIPLINE MODE\n\
2194                 For broad runtime or architecture walkthroughs, prefer authoritative tools first: `trace_runtime_flow` for control flow.\n\
2195                 Do not call `auto_pin_context` or `list_pinned` in read-only analysis. Avoid broad `read_file` calls unless the user explicitly asks for implementation detail in one named file.\n\
2196                 Preserve grounded tool output rather than restyling it into a larger answer.\n"
2197            );
2198        }
2199
2200        // ── Inject Pinned Files (Context Locking) ───────────────────────────
2201        system_msg.push_str(&format!(
2202            "\n\n# WORKFLOW MODE\nCURRENT WORKFLOW: {}\n",
2203            self.workflow_mode.label()
2204        ));
2205        if tiny_context_mode {
2206            system_msg
2207                .push_str("Use the narrowest safe behavior for this mode. Keep the turn short.\n");
2208        } else {
2209            match self.workflow_mode {
2210                WorkflowMode::Auto => system_msg.push_str(
2211                    "AUTO means choose the narrowest effective path for the request. Answer directly when stable product logic exists. Inspect before editing. Mutate only when the user is clearly asking for implementation.\n",
2212                ),
2213                WorkflowMode::Ask => system_msg.push_str(
2214                    "ASK means analysis only. Stay read-only, inspect the repo, explain findings, and do not make changes unless the user explicitly switches modes.\n",
2215                ),
2216                WorkflowMode::Code => system_msg.push_str(
2217                    "CODE means implementation is allowed when needed. Keep proof-before-action, verification, and edit precision discipline. If an active plan handoff exists in session memory or `.hematite/PLAN.md`, treat it as the implementation brief unless the user explicitly overrides it. For ordinary workspace inspection during implementation, use built-in read/edit tools first and do not reach for `mcp__filesystem__*` unless the user explicitly requires MCP.\n",
2218                ),
2219                WorkflowMode::Architect => system_msg.push_str(
2220                    "ARCHITECT means plan first. Inspect, reason, and produce a concrete implementation approach before editing. Do not mutate code unless the user explicitly asks to implement. When you produce an implementation handoff, use these exact ASCII headings so Hematite can persist the plan: `# Goal`, `# Target Files`, `# Ordered Steps`, `# Verification`, `# Risks`, `# Open Questions`.\n",
2221                ),
2222                WorkflowMode::ReadOnly => system_msg.push_str(
2223                    "READ-ONLY means analysis only. Do not modify files, run mutating shell commands, or commit changes.\n",
2224                ),
2225                WorkflowMode::Chat => {} // replaced by build_chat_system_prompt below
2226            }
2227        }
2228        if !tiny_context_mode && self.workflow_mode == WorkflowMode::Architect {
2229            system_msg.push_str("\n\n# ARCHITECT HANDOFF CONTRACT\n");
2230            system_msg.push_str(architect_handoff_contract());
2231            system_msg.push('\n');
2232        }
2233        if !tiny_context_mode && implement_current_plan {
2234            system_msg.push_str(
2235                "\n\n# CURRENT PLAN EXECUTION CONTRACT\n\
2236                 The user explicitly asked you to implement the current saved plan.\n\
2237                 Do not restate the plan, do not provide preliminary contracts, and do not stop at analysis.\n\
2238                 Use the saved plan as the brief, gather only the minimum built-in file evidence you need, then start editing the target files.\n\
2239                 Every file inspection or edit call must be path-scoped to one of the saved target files.\n\
2240                 If a built-in workspace read tool gives you enough context, your next step should be mutation or a concrete blocking question, not another summary.\n",
2241            );
2242            if let Some(plan) = self.session_memory.current_plan.as_ref() {
2243                if !plan.target_files.is_empty() {
2244                    system_msg.push_str("\n# CURRENT PLAN TARGET FILES\n");
2245                    for path in &plan.target_files {
2246                        system_msg.push_str(&format!("- {}\n", path));
2247                    }
2248                }
2249            }
2250        }
2251        if !tiny_context_mode {
2252            let pinned = self.pinned_files.lock().await;
2253            if !pinned.is_empty() {
2254                system_msg.push_str("\n\n# ACTIVE CONTEXT (PINNED FILES)\n");
2255                system_msg.push_str("The following files are locked in your active memory for high-fidelity reference.\n\n");
2256                for (path, content) in pinned.iter() {
2257                    system_msg.push_str(&format!("## FILE: {}\n```\n{}\n```\n\n", path, content));
2258                }
2259            }
2260        }
2261        if !tiny_context_mode {
2262            self.append_session_handoff(&mut system_msg);
2263        }
2264        // In chat mode, replace the full harness prompt with a clean conversational surface.
2265        // The harness prompt (built above) is discarded — Rusty personality takes over.
2266        let system_msg = if self.workflow_mode.is_chat() {
2267            self.build_chat_system_prompt()
2268        } else {
2269            system_msg
2270        };
2271        if self.history.is_empty() || self.history[0].role != "system" {
2272            self.history.insert(0, ChatMessage::system(&system_msg));
2273        } else {
2274            self.history[0] = ChatMessage::system(&system_msg);
2275        }
2276
2277        // Ensure a clean state for the new turn.
2278        self.cancel_token
2279            .store(false, std::sync::atomic::Ordering::SeqCst);
2280
2281        // [Official Gemma-4 Spec] Purge reasoning history for new user turns.
2282        // History from previous turns must not be fed back into the prompt to prevent duplication.
2283        self.reasoning_history = None;
2284
2285        let is_gemma = crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2286        let user_content = match self.think_mode {
2287            Some(true) => format!("/think\n{}", effective_user_input),
2288            Some(false) => format!("/no_think\n{}", effective_user_input),
2289            // For non-Gemma models (Qwen etc.) default to /think so the model uses
2290            // hybrid thinking — it decides how much reasoning each turn needs.
2291            // Gemma handles reasoning via <|think|> in the system prompt instead.
2292            // Chat mode and quick tool calls skip /think — fast direct answers.
2293            None if !is_gemma
2294                && !self.workflow_mode.is_chat()
2295                && !is_quick_tool_request(&effective_user_input) =>
2296            {
2297                format!("/think\n{}", effective_user_input)
2298            }
2299            None => effective_user_input.clone(),
2300        };
2301        if let Some(image) = user_turn.attached_image.as_ref() {
2302            let image_url =
2303                crate::tools::vision::encode_image_as_data_url(std::path::Path::new(&image.path))
2304                    .map_err(|e| format!("Image attachment failed for {}: {}", image.name, e))?;
2305            self.history
2306                .push(ChatMessage::user_with_image(&user_content, &image_url));
2307        } else {
2308            self.history.push(ChatMessage::user(&user_content));
2309        }
2310        self.transcript.log_user(&transcript_user_input);
2311
2312        // Incremental re-index and Vein context injection. Ordinary chat mode
2313        // still skips repo-snippet noise, but docs-only workspaces and explicit
2314        // session-recall prompts should keep Vein memory available.
2315        let vein_docs_only = self.vein_docs_only_mode();
2316        let allow_vein_context = !self.workflow_mode.is_chat()
2317            || should_use_vein_in_chat(&effective_user_input, vein_docs_only);
2318        let (vein_context, vein_paths) = if allow_vein_context {
2319            self.refresh_vein_index();
2320            let _ = tx
2321                .send(InferenceEvent::VeinStatus {
2322                    file_count: self.vein.file_count(),
2323                    embedded_count: self.vein.embedded_chunk_count(),
2324                    docs_only: vein_docs_only,
2325                })
2326                .await;
2327            match self.build_vein_context(&effective_user_input) {
2328                Some((ctx, paths)) => (Some(ctx), paths),
2329                None => (None, Vec::new()),
2330            }
2331        } else {
2332            (None, Vec::new())
2333        };
2334        if !vein_paths.is_empty() {
2335            let _ = tx
2336                .send(InferenceEvent::VeinContext { paths: vein_paths })
2337                .await;
2338        }
2339
2340        // Route: pick fast vs think model based on the complexity of this request.
2341        let routed_model = route_model(
2342            &effective_user_input,
2343            effective_fast.as_deref(),
2344            effective_think.as_deref(),
2345        )
2346        .map(|s| s.to_string());
2347
2348        let mut loop_intervention: Option<String> = None;
2349        let mut implementation_started = false;
2350        let mut non_mutating_plan_steps = 0usize;
2351        let non_mutating_plan_soft_cap = 5usize;
2352        let non_mutating_plan_hard_cap = 8usize;
2353        let mut overview_runtime_trace: Option<String> = None;
2354
2355        // Safety cap – never spin forever on a broken model.
2356        let max_iters = 25;
2357        let mut consecutive_errors = 0;
2358        let mut first_iter = true;
2359        let _called_this_turn: std::collections::HashSet<String> = std::collections::HashSet::new();
2360        // Track identical tool results within this turn to detect logical loops.
2361        let _result_counts: std::collections::HashMap<String, usize> =
2362            std::collections::HashMap::new();
2363        // Track the count of identical (name, args) calls to detect infinite tool loops.
2364        let mut repeat_counts: std::collections::HashMap<String, usize> =
2365            std::collections::HashMap::new();
2366        let mut completed_tool_cache: std::collections::HashMap<String, CachedToolResult> =
2367            std::collections::HashMap::new();
2368        let mut successful_read_targets: std::collections::HashSet<String> =
2369            std::collections::HashSet::new();
2370        // (path, offset) pairs — catches repeated reads at the same non-zero offset.
2371        let mut successful_read_regions: std::collections::HashSet<(String, u64)> =
2372            std::collections::HashSet::new();
2373        let mut successful_grep_targets: std::collections::HashSet<String> =
2374            std::collections::HashSet::new();
2375        let mut no_match_grep_targets: std::collections::HashSet<String> =
2376            std::collections::HashSet::new();
2377        let mut broad_grep_targets: std::collections::HashSet<String> =
2378            std::collections::HashSet::new();
2379
2380        // Track the index of the message that started THIS turn, so compaction doesn't summarize it.
2381        let mut turn_anchor = self.history.len().saturating_sub(1);
2382
2383        for _iter in 0..max_iters {
2384            let mut mutation_occurred = false;
2385            // Priority Check: External Cancellation (via Esc key in TUI)
2386            if self.cancel_token.load(std::sync::atomic::Ordering::SeqCst) {
2387                self.cancel_token
2388                    .store(false, std::sync::atomic::Ordering::SeqCst);
2389                let _ = tx
2390                    .send(InferenceEvent::Thought("Turn cancelled by user.".into()))
2391                    .await;
2392                let _ = tx.send(InferenceEvent::Done).await;
2393                return Ok(());
2394            }
2395
2396            // ── Intelligence Surge: Proactive Compaction Check ──────────────────────
2397            if self
2398                .compact_history_if_needed(&tx, Some(turn_anchor))
2399                .await?
2400            {
2401                // After compaction, history is [system, summary, turn_anchor, ...]
2402                // The new turn_anchor is index 2.
2403                turn_anchor = 2;
2404            }
2405
2406            // On the first iteration inject Vein context into the system message.
2407            // Subsequent iterations use the plain slice — tool results are now in
2408            // history so Vein context would be redundant.
2409            let inject_vein = first_iter && !implement_current_plan;
2410            let messages = if implement_current_plan {
2411                first_iter = false;
2412                self.context_window_slice_from(turn_anchor)
2413            } else {
2414                first_iter = false;
2415                self.context_window_slice()
2416            };
2417
2418            // Use the canonical system prompt from history[0] which was built
2419            // by InferenceEngine::build_system_prompt() + build_system_with_corrections()
2420            // and includes GPU state, git context, permissions, and instruction files.
2421            let mut prompt_msgs = if let Some(intervention) = loop_intervention.take() {
2422                // Gemma 4 handles multiple system messages natively.
2423                // Standard models (Qwen, etc.) reject a second system message — merge into history[0].
2424                if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2425                    let mut msgs = vec![self.history[0].clone()];
2426                    msgs.push(ChatMessage::system(&intervention));
2427                    msgs
2428                } else {
2429                    let merged =
2430                        format!("{}\n\n{}", self.history[0].content.as_str(), intervention);
2431                    vec![ChatMessage::system(&merged)]
2432                }
2433            } else {
2434                vec![self.history[0].clone()]
2435            };
2436
2437            // Inject Vein context into the system message on the first iteration.
2438            // Vein results are merged in the same way as loop_intervention so standard
2439            // models (Qwen etc.) only ever see one system message.
2440            if inject_vein {
2441                if let Some(ref ctx) = vein_context.as_ref() {
2442                    if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2443                        prompt_msgs.push(ChatMessage::system(ctx));
2444                    } else {
2445                        let merged = format!("{}\n\n{}", prompt_msgs[0].content.as_str(), ctx);
2446                        prompt_msgs[0] = ChatMessage::system(&merged);
2447                    }
2448                }
2449            }
2450            prompt_msgs.extend(messages);
2451            if let Some(budget_note) =
2452                enforce_prompt_budget(&mut prompt_msgs, self.engine.current_context_length())
2453            {
2454                self.emit_operator_checkpoint(
2455                    &tx,
2456                    OperatorCheckpointState::BudgetReduced,
2457                    budget_note,
2458                )
2459                .await;
2460                let recipe = plan_recovery(
2461                    RecoveryScenario::PromptBudgetPressure,
2462                    &self.recovery_context,
2463                );
2464                self.emit_recovery_recipe_summary(
2465                    &tx,
2466                    recipe.recipe.scenario.label(),
2467                    compact_recovery_plan_summary(&recipe),
2468                )
2469                .await;
2470            }
2471            self.emit_prompt_pressure_for_messages(&tx, &prompt_msgs)
2472                .await;
2473
2474            let (mut text, mut tool_calls, usage, finish_reason) = match self
2475                .engine
2476                .call_with_tools(&prompt_msgs, &self.tools, routed_model.as_deref())
2477                .await
2478            {
2479                Ok(result) => result,
2480                Err(e) => {
2481                    let class = classify_runtime_failure(&e);
2482                    if should_retry_runtime_failure(class) {
2483                        if self.recovery_context.consume_transient_retry() {
2484                            let label = match class {
2485                                RuntimeFailureClass::ProviderDegraded => "provider_degraded",
2486                                _ => "empty_model_response",
2487                            };
2488                            self.transcript.log_system(&format!(
2489                                "Automatic provider recovery triggered: {}",
2490                                e.trim()
2491                            ));
2492                            self.emit_recovery_recipe_summary(
2493                                &tx,
2494                                label,
2495                                compact_runtime_recovery_summary(class),
2496                            )
2497                            .await;
2498                            let _ = tx
2499                                .send(InferenceEvent::ProviderStatus {
2500                                    state: ProviderRuntimeState::Recovering,
2501                                    summary: compact_runtime_recovery_summary(class).into(),
2502                                })
2503                                .await;
2504                            self.emit_operator_checkpoint(
2505                                &tx,
2506                                OperatorCheckpointState::RecoveringProvider,
2507                                compact_runtime_recovery_summary(class),
2508                            )
2509                            .await;
2510                            continue;
2511                        }
2512                    }
2513
2514                    self.emit_runtime_failure(&tx, class, &e).await;
2515                    break;
2516                }
2517            };
2518            self.emit_provider_live(&tx).await;
2519
2520            // Update TUI token counter with actual usage from LM Studio.
2521            if let Some(ref u) = usage {
2522                let _ = tx.send(InferenceEvent::UsageUpdate(u.clone())).await;
2523            }
2524
2525            // Fallback safety net: if native tool markup leaked past the inference-layer
2526            // extractor, recover it here instead of treating it as plain assistant text.
2527            if tool_calls
2528                .as_ref()
2529                .map(|calls| calls.is_empty())
2530                .unwrap_or(true)
2531            {
2532                if let Some(raw_text) = text.as_deref() {
2533                    let native_calls = crate::agent::inference::extract_native_tool_calls(raw_text);
2534                    if !native_calls.is_empty() {
2535                        tool_calls = Some(native_calls);
2536                        let stripped =
2537                            crate::agent::inference::strip_native_tool_call_text(raw_text);
2538                        text = if stripped.trim().is_empty() {
2539                            None
2540                        } else {
2541                            Some(stripped)
2542                        };
2543                    }
2544                }
2545            }
2546
2547            // Treat empty tool_calls arrays (Some(vec![])) the same as None –
2548            // the model returned text only; an empty array causes an infinite loop.
2549            let tool_calls = tool_calls.filter(|c| !c.is_empty());
2550            let near_context_ceiling = usage
2551                .as_ref()
2552                .map(|u| u.prompt_tokens >= (self.engine.current_context_length() * 82 / 100))
2553                .unwrap_or(false);
2554
2555            if let Some(calls) = tool_calls {
2556                let (calls, prune_trace_note) =
2557                    prune_architecture_trace_batch(calls, architecture_overview_mode);
2558                if let Some(note) = prune_trace_note {
2559                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2560                }
2561
2562                let (calls, prune_bloat_note) = prune_read_only_context_bloat_batch(
2563                    calls,
2564                    self.workflow_mode.is_read_only(),
2565                    architecture_overview_mode,
2566                );
2567                if let Some(note) = prune_bloat_note {
2568                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2569                }
2570
2571                let (calls, prune_note) = prune_authoritative_tool_batch(
2572                    calls,
2573                    grounded_trace_mode,
2574                    &effective_user_input,
2575                );
2576                if let Some(note) = prune_note {
2577                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2578                }
2579
2580                let (calls, batch_note) = order_batch_reads_first(calls);
2581                if let Some(note) = batch_note {
2582                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2583                }
2584
2585                if let Some(repeated_path) = calls
2586                    .iter()
2587                    .filter(|c| {
2588                        let parsed = serde_json::from_str::<Value>(
2589                            &crate::agent::inference::normalize_tool_argument_string(
2590                                &c.function.name,
2591                                &c.function.arguments,
2592                            ),
2593                        )
2594                        .ok();
2595                        let offset = parsed
2596                            .as_ref()
2597                            .and_then(|args| args.get("offset").and_then(|v| v.as_u64()))
2598                            .unwrap_or(0);
2599                        // Catch re-reads from the top (original behaviour) AND repeated
2600                        // reads at the exact same non-zero offset (new: catches targeted loops).
2601                        if offset < 200 {
2602                            return true;
2603                        }
2604                        if let Some(path) = parsed
2605                            .as_ref()
2606                            .and_then(|args| args.get("path").and_then(|v| v.as_str()))
2607                        {
2608                            let normalized = normalize_workspace_path(path);
2609                            return successful_read_regions.contains(&(normalized, offset));
2610                        }
2611                        false
2612                    })
2613                    .filter_map(|c| repeated_read_target(&c.function))
2614                    .find(|path| successful_read_targets.contains(path))
2615                {
2616                    loop_intervention = Some(format!(
2617                        "STOP. Already read `{}` this turn. Use `inspect_lines` on the relevant window or a specific `grep_files`, then continue.",
2618                        repeated_path
2619                    ));
2620                    let _ = tx
2621                        .send(InferenceEvent::Thought(
2622                            "Read discipline: preventing repeated full-file reads on the same path."
2623                                .into(),
2624                        ))
2625                        .await;
2626                    continue;
2627                }
2628
2629                if capability_mode
2630                    && !capability_needs_repo
2631                    && calls
2632                        .iter()
2633                        .all(|c| is_capability_probe_tool(&c.function.name))
2634                {
2635                    loop_intervention = Some(
2636                        "STOP. This is a stable capability question. Do not inspect the repository or call tools. \
2637                         Answer directly from verified Hematite capabilities, current runtime state, and the documented product boundary. \
2638                         Do not mention raw `mcp__*` names unless they are active and directly relevant."
2639                            .to_string(),
2640                    );
2641                    let _ = tx
2642                        .send(InferenceEvent::Thought(
2643                            "Capability mode: skipping unnecessary repo-inspection tools and answering directly."
2644                                .into(),
2645                        ))
2646                        .await;
2647                    continue;
2648                }
2649
2650                // VOCAL AGENT: If the model provided reasoning alongside tools,
2651                // stream it to the SPECULAR panel now using the hardened extraction.
2652                let raw_content = text.as_deref().unwrap_or(" ");
2653
2654                if let Some(thought) = crate::agent::inference::extract_think_block(raw_content) {
2655                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
2656                    // Reasoning is silent (hidden in SPECULAR only).
2657                    self.reasoning_history = Some(thought);
2658                }
2659
2660                // [Gemma-4 Protocol] Keep raw content (including thoughts) during tool loops.
2661                // Thoughts are only stripped before the 'final' user turn.
2662                let stored_tool_call_content = if implement_current_plan {
2663                    cap_output(raw_content, 1200)
2664                } else {
2665                    raw_content.to_string()
2666                };
2667                self.history.push(ChatMessage::assistant_tool_calls(
2668                    &stored_tool_call_content,
2669                    calls.clone(),
2670                ));
2671
2672                // ── LAYER 4: Parallel Tool Orchestration (Batching) ────────────────────
2673                let mut results = Vec::new();
2674                let gemma4_model =
2675                    crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2676                let latest_user_prompt = self.latest_user_prompt();
2677                let mut seen_call_keys = std::collections::HashSet::new();
2678                let mut deduped_calls = Vec::new();
2679                for call in calls.clone() {
2680                    let (normalized_name, normalized_args) = normalized_tool_call_for_execution(
2681                        &call.function.name,
2682                        &call.function.arguments,
2683                        gemma4_model,
2684                        latest_user_prompt,
2685                    );
2686                    let key = canonical_tool_call_key(&normalized_name, &normalized_args);
2687                    if seen_call_keys.insert(key) {
2688                        let repeat_guard_exempt = matches!(
2689                            normalized_name.as_str(),
2690                            "verify_build" | "git_commit" | "git_push"
2691                        );
2692                        if !repeat_guard_exempt {
2693                            if let Some(cached) = completed_tool_cache
2694                                .get(&canonical_tool_call_key(&normalized_name, &normalized_args))
2695                            {
2696                                let _ = tx
2697                                    .send(InferenceEvent::Thought(
2698                                        "Cached tool result reused: identical built-in invocation already completed earlier in this turn."
2699                                            .to_string(),
2700                                    ))
2701                                    .await;
2702                                loop_intervention = Some(format!(
2703                                    "STOP. You already called `{}` with identical arguments earlier in this turn and already have that result in conversation history. Do not call it again. Use the existing result to answer or choose a different next step.",
2704                                    cached.tool_name
2705                                ));
2706                                continue;
2707                            }
2708                        }
2709                        deduped_calls.push(call);
2710                    } else {
2711                        let _ = tx
2712                            .send(InferenceEvent::Thought(
2713                                "Duplicate tool call skipped: identical built-in invocation already ran this turn."
2714                                    .to_string(),
2715                            ))
2716                            .await;
2717                    }
2718                }
2719
2720                // Partition tool calls: Parallel Read vs Serial Mutating
2721                let (parallel_calls, serial_calls): (Vec<_>, Vec<_>) = deduped_calls
2722                    .into_iter()
2723                    .partition(|c| is_parallel_safe(&c.function.name));
2724
2725                // 1. Concurrent Execution (ParallelRead)
2726                if !parallel_calls.is_empty() {
2727                    let mut tasks = Vec::new();
2728                    for call in parallel_calls {
2729                        let tx_clone = tx.clone();
2730                        let config_clone = config.clone();
2731                        // Carry the real call ID into the outcome
2732                        let call_with_id = call.clone();
2733                        tasks.push(self.process_tool_call(
2734                            call_with_id.function,
2735                            config_clone,
2736                            yolo,
2737                            tx_clone,
2738                            call_with_id.id,
2739                        ));
2740                    }
2741                    // Wait for all read-only tasks to complete simultaneously.
2742                    results.extend(futures::future::join_all(tasks).await);
2743                }
2744
2745                // 2. Sequential Execution (SerialMutating)
2746                for call in serial_calls {
2747                    results.push(
2748                        self.process_tool_call(
2749                            call.function,
2750                            config.clone(),
2751                            yolo,
2752                            tx.clone(),
2753                            call.id,
2754                        )
2755                        .await,
2756                    );
2757                }
2758
2759                // 3. Collate Messages into History & UI
2760                let mut authoritative_tool_output: Option<String> = None;
2761                let mut blocked_policy_output: Option<String> = None;
2762                let mut recoverable_policy_intervention: Option<String> = None;
2763                let mut recoverable_policy_recipe: Option<RecoveryScenario> = None;
2764                let mut recoverable_policy_checkpoint: Option<(OperatorCheckpointState, String)> =
2765                    None;
2766                for res in results {
2767                    let call_id = res.call_id.clone();
2768                    let tool_name = res.tool_name.clone();
2769                    let final_output = res.output.clone();
2770                    let is_error = res.is_error;
2771                    for msg in res.msg_results {
2772                        self.history.push(msg);
2773                    }
2774
2775                    // Update State for Verification Loop
2776                    if matches!(
2777                        tool_name.as_str(),
2778                        "patch_hunk" | "write_file" | "edit_file" | "multi_search_replace"
2779                    ) {
2780                        mutation_occurred = true;
2781                        implementation_started = true;
2782                        // Heat tracking: bump L1 score for the edited file.
2783                        if !is_error {
2784                            let path = res.args.get("path").and_then(|v| v.as_str()).unwrap_or("");
2785                            if !path.is_empty() {
2786                                self.vein.bump_heat(path);
2787                                self.l1_context = self.vein.l1_context();
2788                            }
2789                            // Refresh repo map so PageRank accounts for the new edit.
2790                            self.refresh_repo_map();
2791                        }
2792                    }
2793
2794                    if tool_name == "verify_build" {
2795                        self.record_session_verification(
2796                            !is_error
2797                                && (final_output.contains("BUILD OK")
2798                                    || final_output.contains("BUILD SUCCESS")
2799                                    || final_output.contains("BUILD OKAY")),
2800                            if is_error {
2801                                "Explicit verify_build failed."
2802                            } else {
2803                                "Explicit verify_build passed."
2804                            },
2805                        );
2806                    }
2807
2808                    // Update Repeat Guard
2809                    let call_key = format!(
2810                        "{}:{}",
2811                        tool_name,
2812                        serde_json::to_string(&res.args).unwrap_or_default()
2813                    );
2814                    let repeat_count = repeat_counts.entry(call_key.clone()).or_insert(0);
2815                    *repeat_count += 1;
2816
2817                    // verify_build is legitimately called multiple times in fix-verify loops.
2818                    let repeat_guard_exempt = matches!(
2819                        tool_name.as_str(),
2820                        "verify_build" | "git_commit" | "git_push"
2821                    );
2822                    if *repeat_count >= 3 && !repeat_guard_exempt {
2823                        loop_intervention = Some(format!(
2824                            "STOP. You have called `{}` with identical arguments {} times and keep getting the same result. \
2825                             Do not call it again. Either answer directly from what you already know, \
2826                             use a different tool or approach, or ask the user for clarification.",
2827                            tool_name, *repeat_count
2828                        ));
2829                        let _ = tx
2830                            .send(InferenceEvent::Thought(format!(
2831                                "Repeat guard: `{}` called {} times with same args — injecting stop intervention.",
2832                                tool_name, *repeat_count
2833                            )))
2834                            .await;
2835                    }
2836
2837                    if is_error {
2838                        consecutive_errors += 1;
2839                    } else {
2840                        consecutive_errors = 0;
2841                    }
2842
2843                    if consecutive_errors >= 3 {
2844                        loop_intervention = Some(
2845                            "CRITICAL: Repeated tool failures detected. You are likely stuck in a loop. \
2846                             STOP all tool calls immediately. Analyze why your previous 3 calls failed \
2847                             (check for hallucinations or invalid arguments) and ask the user for \
2848                             clarification if you cannot proceed.".to_string()
2849                        );
2850                    }
2851
2852                    if consecutive_errors >= 4 {
2853                        self.emit_runtime_failure(
2854                            &tx,
2855                            RuntimeFailureClass::ToolLoop,
2856                            "Hard termination: too many consecutive tool errors.",
2857                        )
2858                        .await;
2859                        return Ok(());
2860                    }
2861
2862                    let _ = tx
2863                        .send(InferenceEvent::ToolCallResult {
2864                            id: call_id.clone(),
2865                            name: tool_name.clone(),
2866                            output: final_output.clone(),
2867                            is_error,
2868                        })
2869                        .await;
2870
2871                    let repeat_guard_exempt = matches!(
2872                        tool_name.as_str(),
2873                        "verify_build" | "git_commit" | "git_push"
2874                    );
2875                    if !repeat_guard_exempt {
2876                        completed_tool_cache.insert(
2877                            canonical_tool_call_key(&tool_name, &res.args),
2878                            CachedToolResult {
2879                                tool_name: tool_name.clone(),
2880                            },
2881                        );
2882                    }
2883
2884                    // Cap output before history
2885                    let compact_ctx = crate::agent::inference::is_compact_context_window_pub(
2886                        self.engine.current_context_length(),
2887                    );
2888                    let capped = if implement_current_plan {
2889                        cap_output(&final_output, 1200)
2890                    } else if compact_ctx
2891                        && (tool_name == "read_file" || tool_name == "inspect_lines")
2892                    {
2893                        // Compact context: cap file reads tightly and add a navigation hint on truncation.
2894                        let limit = 3000usize;
2895                        if final_output.len() > limit {
2896                            let total_lines = final_output.lines().count();
2897                            let mut split_at = limit;
2898                            while !final_output.is_char_boundary(split_at) && split_at > 0 {
2899                                split_at -= 1;
2900                            }
2901                            format!(
2902                                "{}\n... [file truncated — {} total lines. Use `inspect_lines` with start_line near {} to reach the end of the file.]",
2903                                &final_output[..split_at],
2904                                total_lines,
2905                                total_lines.saturating_sub(150),
2906                            )
2907                        } else {
2908                            final_output.clone()
2909                        }
2910                    } else {
2911                        cap_output(&final_output, 8000)
2912                    };
2913                    self.history.push(ChatMessage::tool_result_for_model(
2914                        &call_id,
2915                        &tool_name,
2916                        &capped,
2917                        &self.engine.current_model(),
2918                    ));
2919
2920                    if architecture_overview_mode && !is_error && tool_name == "trace_runtime_flow"
2921                    {
2922                        overview_runtime_trace =
2923                            Some(summarize_runtime_trace_output(&final_output));
2924                    }
2925
2926                    if !architecture_overview_mode
2927                        && !is_error
2928                        && ((grounded_trace_mode && tool_name == "trace_runtime_flow")
2929                            || (toolchain_mode && tool_name == "describe_toolchain"))
2930                    {
2931                        authoritative_tool_output = Some(final_output.clone());
2932                    }
2933
2934                    if !is_error && tool_name == "read_file" {
2935                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
2936                            let normalized = normalize_workspace_path(path);
2937                            let read_offset =
2938                                res.args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
2939                            successful_read_targets.insert(normalized.clone());
2940                            successful_read_regions.insert((normalized.clone(), read_offset));
2941                        }
2942                    }
2943
2944                    if !is_error && tool_name == "grep_files" {
2945                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
2946                            let normalized = normalize_workspace_path(path);
2947                            if final_output.starts_with("No matches for ") {
2948                                no_match_grep_targets.insert(normalized);
2949                            } else if grep_output_is_high_fanout(&final_output) {
2950                                broad_grep_targets.insert(normalized);
2951                            } else {
2952                                successful_grep_targets.insert(normalized);
2953                            }
2954                        }
2955                    }
2956
2957                    if is_error
2958                        && matches!(tool_name.as_str(), "edit_file" | "multi_search_replace")
2959                        && (final_output.contains("search string not found")
2960                            || final_output.contains("search string is too short")
2961                            || final_output.contains("search string matched"))
2962                    {
2963                        if let Some(target) = action_target_path(&tool_name, &res.args) {
2964                            let guidance = if final_output.contains("matched") {
2965                                format!(
2966                                    "STOP. `{}` on `{}` — search string matched multiple times. Use `inspect_lines` on the exact region to get a unique anchor, then retry.",
2967                                    tool_name, target
2968                                )
2969                            } else {
2970                                format!(
2971                                    "STOP. `{}` on `{}` — search string did not match. Use `inspect_lines` on the target region to get the exact current text (check whitespace and indentation), then retry.",
2972                                    tool_name, target
2973                                )
2974                            };
2975                            loop_intervention = Some(guidance);
2976                            *repeat_count = 0;
2977                        }
2978                    }
2979
2980                    if res.blocked_by_policy
2981                        && is_mcp_workspace_read_tool(&tool_name)
2982                        && recoverable_policy_intervention.is_none()
2983                    {
2984                        recoverable_policy_intervention = Some(
2985                            "STOP. MCP filesystem reads are blocked. Use `read_file` or `inspect_lines` instead.".to_string(),
2986                        );
2987                        recoverable_policy_recipe = Some(RecoveryScenario::McpWorkspaceReadBlocked);
2988                        recoverable_policy_checkpoint = Some((
2989                            OperatorCheckpointState::BlockedPolicy,
2990                            "MCP workspace read blocked; rerouting to built-in file tools."
2991                                .to_string(),
2992                        ));
2993
2994                    } else if res.blocked_by_policy
2995                        && implement_current_plan
2996                        && is_current_plan_irrelevant_tool(&tool_name)
2997                        && recoverable_policy_intervention.is_none()
2998                    {
2999                        recoverable_policy_intervention = Some(format!(
3000                            "STOP. `{}` is not a planned target. Use `inspect_lines` on a planned file, then edit.",
3001                            tool_name
3002                        ));
3003                        recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
3004                        recoverable_policy_checkpoint = Some((
3005                            OperatorCheckpointState::BlockedPolicy,
3006                            format!(
3007                                "Current-plan execution blocked unrelated tool `{}`.",
3008                                tool_name
3009                            ),
3010                        ));
3011                    } else if res.blocked_by_policy
3012                        && implement_current_plan
3013                        && final_output.contains("requires recent file evidence")
3014                        && recoverable_policy_intervention.is_none()
3015                    {
3016                        let target = action_target_path(&tool_name, &res.args)
3017                            .unwrap_or_else(|| "the target file".to_string());
3018                        recoverable_policy_intervention = Some(format!(
3019                            "STOP. Edit blocked — `{target}` has no recent read. Use `inspect_lines` or `read_file` on it first, then retry."
3020                        ));
3021                        recoverable_policy_recipe =
3022                            Some(RecoveryScenario::RecentFileEvidenceMissing);
3023                        recoverable_policy_checkpoint = Some((
3024                            OperatorCheckpointState::BlockedRecentFileEvidence,
3025                            format!("Edit blocked on `{target}`; recent file evidence missing."),
3026                        ));
3027                    } else if res.blocked_by_policy
3028                        && implement_current_plan
3029                        && final_output.contains("requires an exact local line window first")
3030                        && recoverable_policy_intervention.is_none()
3031                    {
3032                        let target = action_target_path(&tool_name, &res.args)
3033                            .unwrap_or_else(|| "the target file".to_string());
3034                        recoverable_policy_intervention = Some(format!(
3035                            "STOP. Edit blocked — `{target}` needs an inspected window. Use `inspect_lines` around the edit region, then retry."
3036                        ));
3037                        recoverable_policy_recipe = Some(RecoveryScenario::ExactLineWindowRequired);
3038                        recoverable_policy_checkpoint = Some((
3039                            OperatorCheckpointState::BlockedExactLineWindow,
3040                            format!("Edit blocked on `{target}`; exact line window required."),
3041                        ));
3042                    } else if res.blocked_by_policy && blocked_policy_output.is_none() {
3043                        blocked_policy_output = Some(final_output.clone());
3044                    }
3045
3046                    if *repeat_count >= 5 {
3047                        let _ = tx.send(InferenceEvent::Done).await;
3048                        return Ok(());
3049                    }
3050
3051                    if implement_current_plan
3052                        && !implementation_started
3053                        && !is_error
3054                        && is_non_mutating_plan_step_tool(&tool_name)
3055                    {
3056                        non_mutating_plan_steps += 1;
3057                    }
3058                }
3059
3060                if let Some(intervention) = recoverable_policy_intervention {
3061                    if let Some((state, summary)) = recoverable_policy_checkpoint.take() {
3062                        self.emit_operator_checkpoint(&tx, state, summary).await;
3063                    }
3064                    if let Some(scenario) = recoverable_policy_recipe.take() {
3065                        let recipe = plan_recovery(scenario, &self.recovery_context);
3066                        self.emit_recovery_recipe_summary(
3067                            &tx,
3068                            recipe.recipe.scenario.label(),
3069                            compact_recovery_plan_summary(&recipe),
3070                        )
3071                        .await;
3072                    }
3073                    loop_intervention = Some(intervention);
3074                    let _ = tx
3075                        .send(InferenceEvent::Thought(
3076                            "Policy recovery: rerouting blocked MCP filesystem inspection to built-in workspace tools."
3077                                .into(),
3078                        ))
3079                        .await;
3080                    continue;
3081                }
3082
3083                if architecture_overview_mode {
3084                    match overview_runtime_trace.as_deref() {
3085                        Some(runtime_trace) => {
3086                            let response =
3087                                build_architecture_overview_answer(runtime_trace);
3088                            self.history.push(ChatMessage::assistant_text(&response));
3089                            self.transcript.log_agent(&response);
3090
3091                            for chunk in chunk_text(&response, 8) {
3092                                if !chunk.is_empty() {
3093                                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
3094                                }
3095                            }
3096
3097                            let _ = tx.send(InferenceEvent::Done).await;
3098                            break;
3099                        }
3100                        None => {
3101                            loop_intervention = Some(
3102                                "Good. You now have the grounded repository structure. Next, call `trace_runtime_flow` for the runtime/control-flow half of the architecture overview. Prefer topic `user_turn` for the main execution path, or `runtime_subsystems` if that is more direct. Do not call `read_file`, `auto_pin_context`, or LSP tools here."
3103                                    .to_string(),
3104                            );
3105                            continue;
3106                        }
3107                    }
3108                }
3109
3110                if implement_current_plan
3111                    && !implementation_started
3112                    && non_mutating_plan_steps >= non_mutating_plan_hard_cap
3113                {
3114                    let msg = "Current-plan execution stalled: too many non-mutating inspection steps without a concrete edit. Stay on the saved target files, narrow with `inspect_lines`, and then mutate, or ask one specific blocking question instead of continuing broad exploration.".to_string();
3115                    self.history.push(ChatMessage::assistant_text(&msg));
3116                    self.transcript.log_agent(&msg);
3117
3118                    for chunk in chunk_text(&msg, 8) {
3119                        if !chunk.is_empty() {
3120                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3121                        }
3122                    }
3123
3124                    let _ = tx.send(InferenceEvent::Done).await;
3125                    break;
3126                }
3127
3128                if let Some(blocked_output) = blocked_policy_output {
3129                    self.emit_operator_checkpoint(
3130                        &tx,
3131                        OperatorCheckpointState::BlockedPolicy,
3132                        "A blocked tool path was surfaced directly to the operator.",
3133                    )
3134                    .await;
3135                    self.history
3136                        .push(ChatMessage::assistant_text(&blocked_output));
3137                    self.transcript.log_agent(&blocked_output);
3138
3139                    for chunk in chunk_text(&blocked_output, 8) {
3140                        if !chunk.is_empty() {
3141                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3142                        }
3143                    }
3144
3145                    let _ = tx.send(InferenceEvent::Done).await;
3146                    break;
3147                }
3148
3149                if let Some(tool_output) = authoritative_tool_output {
3150                    self.history.push(ChatMessage::assistant_text(&tool_output));
3151                    self.transcript.log_agent(&tool_output);
3152
3153                    for chunk in chunk_text(&tool_output, 8) {
3154                        if !chunk.is_empty() {
3155                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3156                        }
3157                    }
3158
3159                    let _ = tx.send(InferenceEvent::Done).await;
3160                    break;
3161                }
3162
3163                if implement_current_plan && !implementation_started {
3164                    let base = "STOP analyzing. The current plan already defines the task. Use the built-in file evidence you now have and begin implementing the plan in the target files. Do not output preliminary findings or restate contracts.";
3165                    if non_mutating_plan_steps >= non_mutating_plan_soft_cap {
3166                        loop_intervention = Some(format!(
3167                            "{} You are close to the non-mutation cap. Use `inspect_lines` on one saved target file, then make the edit now.",
3168                            base
3169                        ));
3170                    } else {
3171                        loop_intervention = Some(base.to_string());
3172                    }
3173                } else if self.workflow_mode == WorkflowMode::Architect {
3174                    loop_intervention = Some(
3175                        format!(
3176                            "STOP exploring. You have enough evidence for a plan-first answer.\n{}\nUse the tool results already in history. Do not narrate your process. Do not call more tools unless a missing file path makes the handoff impossible.",
3177                            architect_handoff_contract()
3178                        ),
3179                    );
3180                }
3181
3182                // 4. Auto-Verification Loop (The Perfect Bake)
3183                if mutation_occurred && !yolo {
3184                    let _ = tx
3185                        .send(InferenceEvent::Thought(
3186                            "Self-Verification: Running 'cargo check' to ensure build integrity..."
3187                                .into(),
3188                        ))
3189                        .await;
3190                    let verify_res = self.auto_verify_build().await;
3191                    let verify_ok = verify_res.contains("BUILD SUCCESS");
3192                    self.record_verify_build_result(verify_ok, &verify_res)
3193                        .await;
3194                    self.record_session_verification(
3195                        verify_ok,
3196                        if verify_ok {
3197                            "Automatic build verification passed."
3198                        } else {
3199                            "Automatic build verification failed."
3200                        },
3201                    );
3202                    self.history.push(ChatMessage::system(&format!(
3203                        "\n# SYSTEM VERIFICATION\n{verify_res}"
3204                    )));
3205                    let _ = tx
3206                        .send(InferenceEvent::Thought(
3207                            "Verification turn injected into history.".into(),
3208                        ))
3209                        .await;
3210                }
3211
3212                // Continue loop – the model will respond to the results.
3213                continue;
3214            } else if let Some(response_text) = text {
3215                if finish_reason.as_deref() == Some("length") && near_context_ceiling {
3216                    if intent.direct_answer == Some(DirectAnswerKind::SessionResetSemantics) {
3217                        let cleaned = build_session_reset_semantics_answer();
3218                        self.history.push(ChatMessage::assistant_text(&cleaned));
3219                        self.transcript.log_agent(&cleaned);
3220                        for chunk in chunk_text(&cleaned, 8) {
3221                            if !chunk.is_empty() {
3222                                let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3223                            }
3224                        }
3225                        let _ = tx.send(InferenceEvent::Done).await;
3226                        break;
3227                    }
3228
3229                    let warning = format_runtime_failure(
3230                        RuntimeFailureClass::ContextWindow,
3231                        "Context ceiling reached before the model completed the answer. Hematite trimmed what it could, but this turn still ran out of room. Retry with a narrower inspection step like `grep_files` or `inspect_lines`, or ask for a smaller scoped answer.",
3232                    );
3233                    self.history.push(ChatMessage::assistant_text(&warning));
3234                    self.transcript.log_agent(&warning);
3235                    let _ = tx
3236                        .send(InferenceEvent::Thought(
3237                            "Length recovery: model hit the context ceiling before completing the answer."
3238                                .into(),
3239                        ))
3240                        .await;
3241                    for chunk in chunk_text(&warning, 8) {
3242                        if !chunk.is_empty() {
3243                            let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3244                        }
3245                    }
3246                    let _ = tx.send(InferenceEvent::Done).await;
3247                    break;
3248                }
3249
3250                if response_text.contains("<|tool_call")
3251                    || response_text.contains("[END_TOOL_REQUEST]")
3252                    || response_text.contains("<|tool_response")
3253                    || response_text.contains("<tool_response|>")
3254                {
3255                    loop_intervention = Some(
3256                        "Your previous response leaked raw native tool transcript markup instead of a valid tool invocation or final answer. Retry immediately. If you need a tool, emit a valid tool call only. If you do not need a tool, answer in plain text with no `<|tool_call>`, `<|tool_response>`, or `[END_TOOL_REQUEST]` markup.".to_string(),
3257                    );
3258                    continue;
3259                }
3260
3261                // 1. Process and route the reasoning block to SPECULAR.
3262                if let Some(thought) = crate::agent::inference::extract_think_block(&response_text)
3263                {
3264                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
3265                    // Persist for history audit (stripped from next turn by Volatile Reasoning rule).
3266                    // This will be summarized in the next turn's system prompt.
3267                    self.reasoning_history = Some(thought);
3268                }
3269
3270                // 2. Process and stream the final answer to the chat interface.
3271                let cleaned = crate::agent::inference::strip_think_blocks(&response_text);
3272
3273                if implement_current_plan && !implementation_started {
3274                    loop_intervention = Some(
3275                        "Do not stop at analysis. Implement the current saved plan now using built-in workspace tools and the target files already named in the plan. Only answer without edits if you have a concrete blocking question.".to_string(),
3276                    );
3277                    continue;
3278                }
3279
3280                // [Hardened Interface] Strictly respect the stripper.
3281                // If it's empty after stripping think blocks, the model thought through its
3282                // answer but forgot to emit it (common with Qwen3 models in architect/ask mode).
3283                // Nudge it rather than silently dropping the turn.
3284                if cleaned.is_empty() {
3285                    loop_intervention = Some(
3286                        "Your response was empty after your reasoning. You must now emit your complete written response based on your analysis. Do not reason further — write your answer now."
3287                            .to_string(),
3288                    );
3289                    continue;
3290                }
3291
3292                let architect_handoff = self.persist_architect_handoff(&cleaned);
3293                self.history.push(ChatMessage::assistant_text(&cleaned));
3294                self.transcript.log_agent(&cleaned);
3295
3296                // Send in smooth chunks for that professional UI feel.
3297                for chunk in chunk_text(&cleaned, 8) {
3298                    if !chunk.is_empty() {
3299                        let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3300                    }
3301                }
3302
3303                if let Some(plan) = architect_handoff.as_ref() {
3304                    let note = architect_handoff_operator_note(plan);
3305                    self.history.push(ChatMessage::system(&note));
3306                    self.transcript.log_system(&note);
3307                    let _ = tx
3308                        .send(InferenceEvent::MutedToken(format!("\n{}", note)))
3309                        .await;
3310                }
3311
3312                let _ = tx.send(InferenceEvent::Done).await;
3313                break;
3314            } else {
3315                let detail = "Model returned an empty response.";
3316                let class = classify_runtime_failure(detail);
3317                if should_retry_runtime_failure(class) {
3318                    if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3319                        if let RecoveryDecision::Attempt(plan) =
3320                            attempt_recovery(scenario, &mut self.recovery_context)
3321                        {
3322                            self.transcript.log_system(
3323                                "Automatic provider recovery triggered: model returned an empty response.",
3324                            );
3325                            self.emit_recovery_recipe_summary(
3326                                &tx,
3327                                plan.recipe.scenario.label(),
3328                                compact_recovery_plan_summary(&plan),
3329                            )
3330                            .await;
3331                            let _ = tx
3332                                .send(InferenceEvent::ProviderStatus {
3333                                    state: ProviderRuntimeState::Recovering,
3334                                    summary: compact_runtime_recovery_summary(class).into(),
3335                                })
3336                                .await;
3337                            self.emit_operator_checkpoint(
3338                                &tx,
3339                                OperatorCheckpointState::RecoveringProvider,
3340                                compact_runtime_recovery_summary(class),
3341                            )
3342                            .await;
3343                            continue;
3344                        }
3345                    }
3346                }
3347
3348                self.emit_runtime_failure(&tx, class, detail).await;
3349                break;
3350            }
3351        }
3352
3353        self.trim_history(80);
3354        self.refresh_session_memory();
3355        self.save_session();
3356        self.emit_compaction_pressure(&tx).await;
3357        Ok(())
3358    }
3359
3360    async fn emit_runtime_failure(
3361        &mut self,
3362        tx: &mpsc::Sender<InferenceEvent>,
3363        class: RuntimeFailureClass,
3364        detail: &str,
3365    ) {
3366        if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3367            let decision = preview_recovery_decision(scenario, &self.recovery_context);
3368            self.emit_recovery_recipe_summary(
3369                tx,
3370                scenario.label(),
3371                compact_recovery_decision_summary(&decision),
3372            )
3373            .await;
3374            let needs_refresh = match &decision {
3375                RecoveryDecision::Attempt(plan) => plan
3376                    .recipe
3377                    .steps
3378                    .contains(&RecoveryStep::RefreshRuntimeProfile),
3379                RecoveryDecision::Escalate { recipe, .. } => {
3380                    recipe.steps.contains(&RecoveryStep::RefreshRuntimeProfile)
3381                }
3382            };
3383            if needs_refresh {
3384                if let Some((model_id, context_length, changed)) = self
3385                    .refresh_runtime_profile_and_report(tx, "context_window_failure")
3386                    .await
3387                {
3388                    let note = if changed {
3389                        format!(
3390                            "Runtime refresh after context-window failure: model {} | CTX {}",
3391                            model_id, context_length
3392                        )
3393                    } else {
3394                        format!(
3395                            "Runtime refresh after context-window failure confirms model {} | CTX {}",
3396                            model_id, context_length
3397                        )
3398                    };
3399                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3400                }
3401            }
3402        }
3403        if let Some(state) = provider_state_for_runtime_failure(class) {
3404            let _ = tx
3405                .send(InferenceEvent::ProviderStatus {
3406                    state,
3407                    summary: compact_runtime_failure_summary(class).into(),
3408                })
3409                .await;
3410        }
3411        if let Some(state) = checkpoint_state_for_runtime_failure(class) {
3412            self.emit_operator_checkpoint(tx, state, checkpoint_summary_for_runtime_failure(class))
3413                .await;
3414        }
3415        let formatted = format_runtime_failure(class, detail);
3416        self.history.push(ChatMessage::system(&format!(
3417            "# RUNTIME FAILURE\n{}",
3418            formatted
3419        )));
3420        self.transcript.log_system(&formatted);
3421        let _ = tx.send(InferenceEvent::Error(formatted)).await;
3422        let _ = tx.send(InferenceEvent::Done).await;
3423    }
3424
3425    /// [Task Analyzer] Run 'cargo check' and return a concise summary for the model.
3426    async fn auto_verify_build(&self) -> String {
3427        match crate::tools::verify_build::execute(&serde_json::json!({ "action": "build" })).await {
3428            Ok(out) => {
3429                "BUILD SUCCESS: Your changes are architecturally sound.\n\n".to_string()
3430                    + &cap_output(&out, 2000)
3431            }
3432            Err(e) => format!(
3433                "BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:\n\n{}",
3434                cap_output(&e, 2000)
3435            ),
3436        }
3437    }
3438
3439    /// Triggers an LLM call to summarize old messages if history exceeds the VRAM character limit.
3440    /// Triggers the Deterministic Smart Compaction algorithm to shrink history while preserving context.
3441    /// Triggers the Recursive Context Compactor.
3442    async fn compact_history_if_needed(
3443        &mut self,
3444        tx: &mpsc::Sender<InferenceEvent>,
3445        anchor_index: Option<usize>,
3446    ) -> Result<bool, String> {
3447        let vram_ratio = self.gpu_state.ratio();
3448        let context_length = self.engine.current_context_length();
3449        let config = CompactionConfig::adaptive(context_length, vram_ratio);
3450
3451        if !compaction::should_compact(&self.history, context_length, vram_ratio) {
3452            return Ok(false);
3453        }
3454
3455        let _ = tx
3456            .send(InferenceEvent::Thought(format!(
3457                "Compaction: ctx={}k vram={:.0}% threshold={}k tokens — chaining summary...",
3458                context_length / 1000,
3459                vram_ratio * 100.0,
3460                config.max_estimated_tokens / 1000,
3461            )))
3462            .await;
3463
3464        let result = compaction::compact_history(
3465            &self.history,
3466            self.running_summary.as_deref(),
3467            config,
3468            anchor_index,
3469        );
3470
3471        let removed_message_count = self.history.len().saturating_sub(result.messages.len());
3472        self.history = result.messages;
3473        self.running_summary = result.summary;
3474
3475        // Layer 6: Memory Synthesis (Task Context Persistence)
3476        let previous_memory = self.session_memory.clone();
3477        self.session_memory = compaction::extract_memory(&self.history);
3478        self.session_memory
3479            .inherit_runtime_ledger_from(&previous_memory);
3480        self.session_memory.record_compaction(
3481            removed_message_count,
3482            format!(
3483                "Compacted history around active task '{}' and preserved {} working-set file(s).",
3484                self.session_memory.current_task,
3485                self.session_memory.working_set.len()
3486            ),
3487        );
3488        self.emit_compaction_pressure(tx).await;
3489
3490        // Jinja alignment: preserved slice may start with assistant/tool messages.
3491        // Strip any leading non-user messages so the first non-system message is always user.
3492        let first_non_sys = self
3493            .history
3494            .iter()
3495            .position(|m| m.role != "system")
3496            .unwrap_or(self.history.len());
3497        if first_non_sys < self.history.len() {
3498            if let Some(user_offset) = self.history[first_non_sys..]
3499                .iter()
3500                .position(|m| m.role == "user")
3501            {
3502                if user_offset > 0 {
3503                    self.history
3504                        .drain(first_non_sys..first_non_sys + user_offset);
3505                }
3506            }
3507        }
3508
3509        let _ = tx
3510            .send(InferenceEvent::Thought(format!(
3511                "Memory Synthesis: Extracted context for task: '{}'. Working set: {} files.",
3512                self.session_memory.current_task,
3513                self.session_memory.working_set.len()
3514            )))
3515            .await;
3516        let recipe = plan_recovery(RecoveryScenario::HistoryPressure, &self.recovery_context);
3517        self.emit_recovery_recipe_summary(
3518            tx,
3519            recipe.recipe.scenario.label(),
3520            compact_recovery_plan_summary(&recipe),
3521        )
3522        .await;
3523        self.emit_operator_checkpoint(
3524            tx,
3525            OperatorCheckpointState::HistoryCompacted,
3526            format!(
3527                "History compacted into a recursive summary; active task '{}' with {} working-set file(s) carried forward.",
3528                self.session_memory.current_task,
3529                self.session_memory.working_set.len()
3530            ),
3531        )
3532        .await;
3533
3534        Ok(true)
3535    }
3536
3537    /// Query The Vein for context relevant to the user's message.
3538    /// Runs hybrid BM25 + semantic search (semantic requires embedding model in LM Studio).
3539    /// Returns a formatted system message string, or None if nothing useful found.
3540    fn build_vein_context(&self, query: &str) -> Option<(String, Vec<String>)> {
3541        // Skip trivial / very short inputs.
3542        if query.trim().split_whitespace().count() < 3 {
3543            return None;
3544        }
3545
3546        let results = tokio::task::block_in_place(|| self.vein.search_context(query, 4)).ok()?;
3547        if results.is_empty() {
3548            return None;
3549        }
3550
3551        let semantic_active = self.vein.has_any_embeddings();
3552        let header = if semantic_active {
3553            "# Relevant context from The Vein (hybrid BM25 + semantic retrieval)\n\
3554             Use this to answer without needing extra read_file calls where possible.\n\n"
3555        } else {
3556            "# Relevant context from The Vein (BM25 keyword retrieval)\n\
3557             Use this to answer without needing extra read_file calls where possible.\n\n"
3558        };
3559
3560        let mut ctx = String::from(header);
3561        let mut paths: Vec<String> = Vec::new();
3562
3563        let mut total = 0usize;
3564        const MAX_CTX_CHARS: usize = 1_500;
3565
3566        for r in results {
3567            if total >= MAX_CTX_CHARS {
3568                break;
3569            }
3570            let snippet = if r.content.len() > 500 {
3571                format!("{}...", &r.content[..500])
3572            } else {
3573                r.content.clone()
3574            };
3575            ctx.push_str(&format!("--- {} ---\n{}\n\n", r.path, snippet));
3576            total += snippet.len() + r.path.len() + 10;
3577            if !paths.contains(&r.path) {
3578                paths.push(r.path);
3579            }
3580        }
3581
3582        Some((ctx, paths))
3583    }
3584
3585    /// Returns the conversation history (WITHOUT the system prompt) for the context window.
3586    /// This ensures we don't have redundant system blocks and prevents Jinja crashes.
3587    fn context_window_slice(&self) -> Vec<ChatMessage> {
3588        let mut result = Vec::new();
3589
3590        // Skip index 0 (the raw system message) and any stray system messages in history.
3591        if self.history.len() > 1 {
3592            for m in &self.history[1..] {
3593                if m.role == "system" {
3594                    continue;
3595                }
3596
3597                let mut sanitized = m.clone();
3598                // DEEP SANITIZE: LM Studio Jinja templates for Qwen crash on truly empty content.
3599                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3600                    sanitized.content = MessageContent::Text(" ".into());
3601                }
3602                result.push(sanitized);
3603            }
3604        }
3605
3606        // Jinja Guard: The first message after the system prompt MUST be 'user'.
3607        // If not (e.g. because of compaction), we insert a tiny anchor.
3608        if !result.is_empty() && result[0].role != "user" {
3609            result.insert(0, ChatMessage::user("Continuing previous context..."));
3610        }
3611
3612        result
3613    }
3614
3615    fn context_window_slice_from(&self, start_idx: usize) -> Vec<ChatMessage> {
3616        let mut result = Vec::new();
3617
3618        if self.history.len() > 1 {
3619            let start = start_idx.max(1).min(self.history.len());
3620            for m in &self.history[start..] {
3621                if m.role == "system" {
3622                    continue;
3623                }
3624
3625                let mut sanitized = m.clone();
3626                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3627                    sanitized.content = MessageContent::Text(" ".into());
3628                }
3629                result.push(sanitized);
3630            }
3631        }
3632
3633        if !result.is_empty() && result[0].role != "user" {
3634            result.insert(0, ChatMessage::user("Continuing current plan execution..."));
3635        }
3636
3637        result
3638    }
3639
3640    /// Drop old turns from the middle of history.
3641    fn trim_history(&mut self, max_messages: usize) {
3642        if self.history.len() <= max_messages {
3643            return;
3644        }
3645        // Always keep [0] (system prompt).
3646        let excess = self.history.len() - max_messages;
3647        self.history.drain(1..=excess);
3648    }
3649
3650    /// P1: Attempt to fix malformed JSON tool arguments by asking the model to re-output them.
3651    async fn repair_tool_args(
3652        &self,
3653        tool_name: &str,
3654        bad_json: &str,
3655        tx: &mpsc::Sender<InferenceEvent>,
3656    ) -> Result<Value, String> {
3657        let _ = tx
3658            .send(InferenceEvent::Thought(format!(
3659                "Attempting to repair malformed JSON for '{}'...",
3660                tool_name
3661            )))
3662            .await;
3663
3664        let prompt = format!(
3665            "The following JSON for tool '{}' is malformed and failed to parse:\n\n```json\n{}\n```\n\nOutput ONLY the corrected JSON string that fixes the syntax error (e.g. missing commas, unescaped quotes). Do NOT include markdown blocks or any other text.",
3666            tool_name, bad_json
3667        );
3668
3669        let messages = vec![
3670            ChatMessage::system("You are a JSON repair tool. Output ONLY pure JSON."),
3671            ChatMessage::user(&prompt),
3672        ];
3673
3674        // Use fast model for speed if available.
3675        let (text, _, _, _) = self
3676            .engine
3677            .call_with_tools(&messages, &[], self.fast_model.as_deref())
3678            .await
3679            .map_err(|e| e.to_string())?;
3680
3681        let cleaned = text
3682            .unwrap_or_default()
3683            .trim()
3684            .trim_start_matches("```json")
3685            .trim_start_matches("```")
3686            .trim_end_matches("```")
3687            .trim()
3688            .to_string();
3689
3690        serde_json::from_str(&cleaned).map_err(|e| format!("Repair failed: {}", e))
3691    }
3692
3693    /// P2: Run a fast validation step after file writes to check for subtle logic errors.
3694    async fn run_critic_check(
3695        &self,
3696        path: &str,
3697        content: &str,
3698        tx: &mpsc::Sender<InferenceEvent>,
3699    ) -> Option<String> {
3700        // Only run for source code files.
3701        let ext = std::path::Path::new(path)
3702            .extension()
3703            .and_then(|e| e.to_str())
3704            .unwrap_or("");
3705        const CRITIC_EXTS: &[&str] = &["rs", "js", "ts", "py", "go", "c", "cpp"];
3706        if !CRITIC_EXTS.contains(&ext) {
3707            return None;
3708        }
3709
3710        let _ = tx
3711            .send(InferenceEvent::Thought(format!(
3712                "CRITIC: Reviewing changes to '{}'...",
3713                path
3714            )))
3715            .await;
3716
3717        let truncated = cap_output(content, 4000);
3718
3719        let prompt = format!(
3720            "You are a Senior Security and Code Quality auditor. Review this file content for '{}' and identify any critical logic errors, security vulnerabilities, or missing error handling. Be extremely concise. If the code looks good, output 'PASS'.\n\n```{}\n{}\n```",
3721            path, ext, truncated
3722        );
3723
3724        let messages = vec![
3725            ChatMessage::system("You are a technical critic. Identify ONLY critical issues. Output 'PASS' if none found."),
3726            ChatMessage::user(&prompt)
3727        ];
3728
3729        let (text, _, _, _) = self
3730            .engine
3731            .call_with_tools(&messages, &[], self.fast_model.as_deref())
3732            .await
3733            .ok()?;
3734
3735        let critique = text?.trim().to_string();
3736        if critique.to_uppercase().contains("PASS") || critique.is_empty() {
3737            None
3738        } else {
3739            Some(critique)
3740        }
3741    }
3742}
3743
3744// ── Tool dispatcher ───────────────────────────────────────────────────────────
3745
3746pub async fn dispatch_tool(name: &str, args: &Value) -> Result<String, String> {
3747    dispatch_builtin_tool(name, args).await
3748}
3749
3750fn normalize_fix_plan_issue_text(text: &str) -> Option<String> {
3751    let trimmed = text.trim();
3752    let stripped = trimmed
3753        .strip_prefix("/think")
3754        .or_else(|| trimmed.strip_prefix("/no_think"))
3755        .map(str::trim)
3756        .unwrap_or(trimmed)
3757        .trim_start_matches('\n')
3758        .trim();
3759    (!stripped.is_empty()).then(|| stripped.to_string())
3760}
3761
3762fn fill_missing_fix_plan_issue(tool_name: &str, args: &mut Value, fallback_issue: Option<&str>) {
3763    if tool_name != "inspect_host" {
3764        return;
3765    }
3766
3767    let Some(topic) = args.get("topic").and_then(|v| v.as_str()) else {
3768        return;
3769    };
3770    if topic != "fix_plan" {
3771        return;
3772    }
3773
3774    let issue_missing = args
3775        .get("issue")
3776        .and_then(|v| v.as_str())
3777        .map(str::trim)
3778        .is_none_or(|value| value.is_empty());
3779    if !issue_missing {
3780        return;
3781    }
3782
3783    let Some(fallback_issue) = fallback_issue.and_then(normalize_fix_plan_issue_text) else {
3784        return;
3785    };
3786
3787    let Value::Object(map) = args else {
3788        return;
3789    };
3790    map.insert(
3791        "issue".to_string(),
3792        Value::String(fallback_issue.to_string()),
3793    );
3794}
3795
3796fn should_rewrite_shell_to_fix_plan(
3797    tool_name: &str,
3798    args: &Value,
3799    latest_user_prompt: Option<&str>,
3800) -> bool {
3801    if tool_name != "shell" {
3802        return false;
3803    }
3804    let Some(prompt) = latest_user_prompt else {
3805        return false;
3806    };
3807    if preferred_host_inspection_topic(prompt) != Some("fix_plan") {
3808        return false;
3809    }
3810    let command = args
3811        .get("command")
3812        .and_then(|value| value.as_str())
3813        .unwrap_or("");
3814    shell_looks_like_structured_host_inspection(command)
3815}
3816
3817fn extract_release_arg(command: &str, flag: &str) -> Option<String> {
3818    let pattern = format!(r#"(?i){}\s+['"]?([^'" \r\n]+)['"]?"#, regex::escape(flag));
3819    let regex = regex::Regex::new(&pattern).ok()?;
3820    let captures = regex.captures(command)?;
3821    captures.get(1).map(|m| m.as_str().to_string())
3822}
3823
3824fn infer_maintainer_workflow_args_from_prompt(prompt: &str) -> Option<Value> {
3825    let workflow = preferred_maintainer_workflow(prompt)?;
3826    let lower = prompt.to_ascii_lowercase();
3827    match workflow {
3828        "clean" => Some(serde_json::json!({
3829            "workflow": "clean",
3830            "deep": lower.contains("deep clean")
3831                || lower.contains("deep cleanup")
3832                || lower.contains("deep"),
3833            "reset": lower.contains("reset"),
3834            "prune_dist": lower.contains("prune dist")
3835                || lower.contains("prune old dist")
3836                || lower.contains("prune old artifacts")
3837                || lower.contains("old dist artifacts")
3838                || lower.contains("old artifacts"),
3839        })),
3840        "package_windows" => Some(serde_json::json!({
3841            "workflow": "package_windows",
3842            "installer": lower.contains("installer") || lower.contains("setup.exe"),
3843            "add_to_path": lower.contains("addtopath")
3844                || lower.contains("add to path")
3845                || lower.contains("update path")
3846                || lower.contains("refresh path"),
3847        })),
3848        "release" => {
3849            let version = regex::Regex::new(r#"(?i)\b(\d+\.\d+\.\d+)\b"#)
3850                .ok()
3851                .and_then(|re| re.captures(prompt))
3852                .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()));
3853            let bump = if lower.contains("patch") {
3854                Some("patch")
3855            } else if lower.contains("minor") {
3856                Some("minor")
3857            } else if lower.contains("major") {
3858                Some("major")
3859            } else {
3860                None
3861            };
3862            let mut args = serde_json::json!({
3863                "workflow": "release",
3864                "push": lower.contains(" push") || lower.starts_with("push ") || lower.contains(" and push"),
3865                "add_to_path": lower.contains("addtopath")
3866                    || lower.contains("add to path")
3867                    || lower.contains("update path"),
3868                "skip_installer": lower.contains("skip installer"),
3869                "publish_crates": lower.contains("publish crates") || lower.contains("crates.io"),
3870                "publish_voice_crate": lower.contains("publish voice crate")
3871                    || lower.contains("publish hematite-kokoros"),
3872            });
3873            if let Some(version) = version {
3874                args["version"] = Value::String(version);
3875            }
3876            if let Some(bump) = bump {
3877                args["bump"] = Value::String(bump.to_string());
3878            }
3879            Some(args)
3880        }
3881        _ => None,
3882    }
3883}
3884
3885fn infer_workspace_workflow_args_from_prompt(prompt: &str) -> Option<Value> {
3886    let workflow = preferred_workspace_workflow(prompt)?;
3887    let lower = prompt.to_ascii_lowercase();
3888    let trimmed = prompt.trim();
3889
3890    if let Some(command) = extract_workspace_command_from_prompt(trimmed) {
3891        return Some(serde_json::json!({
3892            "workflow": "command",
3893            "command": command,
3894        }));
3895    }
3896
3897    if let Some(path) = extract_workspace_script_path_from_prompt(trimmed) {
3898        return Some(serde_json::json!({
3899            "workflow": "script_path",
3900            "path": path,
3901        }));
3902    }
3903
3904    match workflow {
3905        "build" | "test" | "lint" | "fix" => Some(serde_json::json!({
3906            "workflow": workflow,
3907        })),
3908        "script" => {
3909            let package_script = if lower.contains("npm run ") {
3910                extract_word_after(&lower, "npm run ")
3911            } else if lower.contains("pnpm run ") {
3912                extract_word_after(&lower, "pnpm run ")
3913            } else if lower.contains("bun run ") {
3914                extract_word_after(&lower, "bun run ")
3915            } else if lower.contains("yarn ") {
3916                extract_word_after(&lower, "yarn ")
3917            } else {
3918                None
3919            };
3920
3921            if let Some(name) = package_script {
3922                return Some(serde_json::json!({
3923                    "workflow": "package_script",
3924                    "name": name,
3925                }));
3926            }
3927
3928            if let Some(name) = extract_word_after(&lower, "just ") {
3929                return Some(serde_json::json!({
3930                    "workflow": "just",
3931                    "name": name,
3932                }));
3933            }
3934            if let Some(name) = extract_word_after(&lower, "make ") {
3935                return Some(serde_json::json!({
3936                    "workflow": "make",
3937                    "name": name,
3938                }));
3939            }
3940            if let Some(name) = extract_word_after(&lower, "task ") {
3941                return Some(serde_json::json!({
3942                    "workflow": "task",
3943                    "name": name,
3944                }));
3945            }
3946
3947            None
3948        }
3949        _ => None,
3950    }
3951}
3952
3953fn extract_workspace_command_from_prompt(prompt: &str) -> Option<String> {
3954    let lower = prompt.to_ascii_lowercase();
3955    for prefix in [
3956        "cargo ",
3957        "npm ",
3958        "pnpm ",
3959        "yarn ",
3960        "bun ",
3961        "pytest",
3962        "go build",
3963        "go test",
3964        "make ",
3965        "just ",
3966        "task ",
3967        "./gradlew",
3968        ".\\gradlew",
3969    ] {
3970        if let Some(index) = lower.find(prefix) {
3971            return Some(prompt[index..].trim().trim_matches('`').to_string());
3972        }
3973    }
3974    None
3975}
3976
3977fn extract_workspace_script_path_from_prompt(prompt: &str) -> Option<String> {
3978    let normalized = prompt.replace('\\', "/");
3979    for token in normalized.split_whitespace() {
3980        let candidate = token
3981            .trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | '.' | ')' | '('))
3982            .trim_start_matches("./");
3983        if candidate.starts_with("scripts/")
3984            && [".ps1", ".sh", ".py", ".cmd", ".bat", ".js", ".mjs", ".cjs"]
3985                .iter()
3986                .any(|ext| candidate.to_ascii_lowercase().ends_with(ext))
3987        {
3988            return Some(candidate.to_string());
3989        }
3990    }
3991    None
3992}
3993
3994fn extract_word_after(haystack: &str, prefix: &str) -> Option<String> {
3995    let start = haystack.find(prefix)? + prefix.len();
3996    let tail = &haystack[start..];
3997    let word = tail
3998        .split_whitespace()
3999        .next()
4000        .map(str::trim)
4001        .filter(|value| !value.is_empty())?;
4002    Some(
4003        word.trim_matches(|c: char| matches!(c, '`' | '"' | '\'' | ',' | '.' | ')' | '('))
4004            .to_string(),
4005    )
4006}
4007
4008fn rewrite_shell_to_maintainer_workflow_args(command: &str) -> Option<Value> {
4009    let lower = command.to_ascii_lowercase();
4010    if lower.contains("clean.ps1") {
4011        return Some(serde_json::json!({
4012            "workflow": "clean",
4013            "deep": lower.contains("-deep"),
4014            "reset": lower.contains("-reset"),
4015            "prune_dist": lower.contains("-prunedist"),
4016        }));
4017    }
4018    if lower.contains("package-windows.ps1") {
4019        return Some(serde_json::json!({
4020            "workflow": "package_windows",
4021            "installer": lower.contains("-installer"),
4022            "add_to_path": lower.contains("-addtopath"),
4023        }));
4024    }
4025    if lower.contains("release.ps1") {
4026        let version = extract_release_arg(command, "-Version");
4027        let bump = extract_release_arg(command, "-Bump");
4028        if version.is_none() && bump.is_none() {
4029            return Some(serde_json::json!({
4030                "workflow": "release"
4031            }));
4032        }
4033        let mut args = serde_json::json!({
4034            "workflow": "release",
4035            "push": lower.contains("-push"),
4036            "add_to_path": lower.contains("-addtopath"),
4037            "skip_installer": lower.contains("-skipinstaller"),
4038            "publish_crates": lower.contains("-publishcrates"),
4039            "publish_voice_crate": lower.contains("-publishvoicecrate"),
4040        });
4041        if let Some(version) = version {
4042            args["version"] = Value::String(version);
4043        }
4044        if let Some(bump) = bump {
4045            args["bump"] = Value::String(bump);
4046        }
4047        return Some(args);
4048    }
4049    None
4050}
4051
4052fn rewrite_shell_to_workspace_workflow_args(command: &str) -> Option<Value> {
4053    let lower = command.to_ascii_lowercase();
4054    if lower.contains("clean.ps1")
4055        || lower.contains("package-windows.ps1")
4056        || lower.contains("release.ps1")
4057    {
4058        return None;
4059    }
4060
4061    if let Some(path) = extract_workspace_script_path_from_prompt(command) {
4062        return Some(serde_json::json!({
4063            "workflow": "script_path",
4064            "path": path,
4065        }));
4066    }
4067
4068    let looks_like_workspace_command = [
4069        "cargo ",
4070        "npm ",
4071        "pnpm ",
4072        "yarn ",
4073        "bun ",
4074        "pytest",
4075        "go build",
4076        "go test",
4077        "make ",
4078        "just ",
4079        "task ",
4080        "./gradlew",
4081        ".\\gradlew",
4082    ]
4083    .iter()
4084    .any(|needle| lower.contains(needle));
4085
4086    if looks_like_workspace_command {
4087        Some(serde_json::json!({
4088            "workflow": "command",
4089            "command": command.trim(),
4090        }))
4091    } else {
4092        None
4093    }
4094}
4095
4096fn rewrite_host_tool_call(
4097    tool_name: &mut String,
4098    args: &mut Value,
4099    latest_user_prompt: Option<&str>,
4100) {
4101    if *tool_name == "shell" {
4102        let command = args
4103            .get("command")
4104            .and_then(|value| value.as_str())
4105            .unwrap_or("");
4106        if let Some(maintainer_workflow_args) = rewrite_shell_to_maintainer_workflow_args(command) {
4107            *tool_name = "run_hematite_maintainer_workflow".to_string();
4108            *args = maintainer_workflow_args;
4109            return;
4110        }
4111        if let Some(workspace_workflow_args) = rewrite_shell_to_workspace_workflow_args(command) {
4112            *tool_name = "run_workspace_workflow".to_string();
4113            *args = workspace_workflow_args;
4114            return;
4115        }
4116    }
4117    if *tool_name != "run_hematite_maintainer_workflow" {
4118        if let Some(prompt_args) =
4119            latest_user_prompt.and_then(infer_maintainer_workflow_args_from_prompt)
4120        {
4121            *tool_name = "run_hematite_maintainer_workflow".to_string();
4122            *args = prompt_args;
4123            return;
4124        }
4125    }
4126    if *tool_name != "run_workspace_workflow" {
4127        if let Some(prompt_args) =
4128            latest_user_prompt.and_then(infer_workspace_workflow_args_from_prompt)
4129        {
4130            *tool_name = "run_workspace_workflow".to_string();
4131            *args = prompt_args;
4132            return;
4133        }
4134    }
4135    if should_rewrite_shell_to_fix_plan(tool_name, args, latest_user_prompt) {
4136        *tool_name = "inspect_host".to_string();
4137        *args = serde_json::json!({
4138            "topic": "fix_plan"
4139        });
4140    }
4141    fill_missing_fix_plan_issue(tool_name, args, latest_user_prompt);
4142}
4143
4144fn canonical_tool_call_key(tool_name: &str, args: &Value) -> String {
4145    format!(
4146        "{}:{}",
4147        tool_name,
4148        serde_json::to_string(args).unwrap_or_default()
4149    )
4150}
4151
4152fn normalized_tool_call_for_execution(
4153    tool_name: &str,
4154    raw_arguments: &str,
4155    gemma4_model: bool,
4156    latest_user_prompt: Option<&str>,
4157) -> (String, Value) {
4158    let normalized_arguments = if gemma4_model {
4159        crate::agent::inference::normalize_tool_argument_string(tool_name, raw_arguments)
4160    } else {
4161        raw_arguments.to_string()
4162    };
4163    let mut normalized_name = tool_name.to_string();
4164    let mut args = serde_json::from_str::<Value>(&normalized_arguments)
4165        .unwrap_or(Value::Object(Default::default()));
4166    rewrite_host_tool_call(&mut normalized_name, &mut args, latest_user_prompt);
4167    (normalized_name, args)
4168}
4169
4170#[cfg(test)]
4171fn normalized_tool_call_key_for_dedupe(
4172    tool_name: &str,
4173    raw_arguments: &str,
4174    gemma4_model: bool,
4175    latest_user_prompt: Option<&str>,
4176) -> String {
4177    let (normalized_name, args) = normalized_tool_call_for_execution(
4178        tool_name,
4179        raw_arguments,
4180        gemma4_model,
4181        latest_user_prompt,
4182    );
4183    canonical_tool_call_key(&normalized_name, &args)
4184}
4185
4186impl ConversationManager {
4187    /// Checks if a tool call is authorized given the current configuration and mode.
4188    fn check_authorization(
4189        &self,
4190        name: &str,
4191        args: &serde_json::Value,
4192        config: &crate::agent::config::HematiteConfig,
4193        yolo_flag: bool,
4194    ) -> crate::agent::permission_enforcer::AuthorizationDecision {
4195        crate::agent::permission_enforcer::authorize_tool_call(name, args, config, yolo_flag)
4196    }
4197
4198    /// Layer 4: Isolated tool execution logic. Does not mutate 'self' to allow parallelism.
4199    async fn process_tool_call(
4200        &self,
4201        mut call: ToolCallFn,
4202        config: crate::agent::config::HematiteConfig,
4203        yolo: bool,
4204        tx: mpsc::Sender<InferenceEvent>,
4205        real_id: String,
4206    ) -> ToolExecutionOutcome {
4207        let mut msg_results = Vec::new();
4208        let gemma4_model =
4209            crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
4210        let normalized_arguments = if gemma4_model {
4211            crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments)
4212        } else {
4213            call.arguments.clone()
4214        };
4215
4216        // 1. Argument Parsing & Repair
4217        let mut args: Value = match serde_json::from_str(&normalized_arguments) {
4218            Ok(v) => v,
4219            Err(_) => {
4220                match self
4221                    .repair_tool_args(&call.name, &normalized_arguments, &tx)
4222                    .await
4223                {
4224                    Ok(v) => v,
4225                    Err(e) => {
4226                        let _ = tx
4227                            .send(InferenceEvent::Thought(format!(
4228                                "JSON Repair failed: {}",
4229                                e
4230                            )))
4231                            .await;
4232                        Value::Object(Default::default())
4233                    }
4234                }
4235            }
4236        };
4237        let last_user_prompt = self
4238            .history
4239            .iter()
4240            .rev()
4241            .find(|message| message.role == "user")
4242            .map(|message| message.content.as_str());
4243        rewrite_host_tool_call(&mut call.name, &mut args, last_user_prompt);
4244
4245        let display = format_tool_display(&call.name, &args);
4246        let precondition_result = self.validate_action_preconditions(&call.name, &args).await;
4247        let auth = self.check_authorization(&call.name, &args, &config, yolo);
4248
4249        // 2. Permission Check
4250        let decision_result = match precondition_result {
4251            Err(e) => Err(e),
4252            Ok(_) => match auth {
4253                crate::agent::permission_enforcer::AuthorizationDecision::Allow { .. } => Ok(()),
4254                crate::agent::permission_enforcer::AuthorizationDecision::Ask {
4255                    reason,
4256                    source: _,
4257                } => {
4258                    let (approve_tx, approve_rx) = tokio::sync::oneshot::channel::<bool>();
4259                    let _ = tx
4260                        .send(InferenceEvent::ApprovalRequired {
4261                            id: real_id.clone(),
4262                            name: call.name.clone(),
4263                            display: format!("{}\nWhy: {}", display, reason),
4264                            diff: None,
4265                            responder: approve_tx,
4266                        })
4267                        .await;
4268
4269                    match approve_rx.await {
4270                        Ok(true) => Ok(()),
4271                        _ => Err("Declined by user".into()),
4272                    }
4273                }
4274                crate::agent::permission_enforcer::AuthorizationDecision::Deny {
4275                    reason, ..
4276                } => Err(reason),
4277            },
4278        };
4279        let blocked_by_policy =
4280            matches!(&decision_result, Err(e) if e.starts_with("Action blocked:"));
4281
4282        // 3. Execution (Local or MCP)
4283        let (output, is_error) = match decision_result {
4284            Err(e) => (format!("Error: {}", e), true),
4285            Ok(_) => {
4286                let _ = tx
4287                    .send(InferenceEvent::ToolCallStart {
4288                        id: real_id.clone(),
4289                        name: call.name.clone(),
4290                        args: display.clone(),
4291                    })
4292                    .await;
4293
4294                let result = if call.name.starts_with("lsp_") {
4295                    let lsp = self.lsp_manager.clone();
4296                    let path = args
4297                        .get("path")
4298                        .and_then(|v| v.as_str())
4299                        .unwrap_or("")
4300                        .to_string();
4301                    let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
4302                    let character =
4303                        args.get("character").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
4304
4305                    match call.name.as_str() {
4306                        "lsp_definitions" => {
4307                            crate::tools::lsp_tools::lsp_definitions(lsp, path, line, character)
4308                                .await
4309                        }
4310                        "lsp_references" => {
4311                            crate::tools::lsp_tools::lsp_references(lsp, path, line, character)
4312                                .await
4313                        }
4314                        "lsp_hover" => {
4315                            crate::tools::lsp_tools::lsp_hover(lsp, path, line, character).await
4316                        }
4317                        "lsp_search_symbol" => {
4318                            let query = args
4319                                .get("query")
4320                                .and_then(|v| v.as_str())
4321                                .unwrap_or_default()
4322                                .to_string();
4323                            crate::tools::lsp_tools::lsp_search_symbol(lsp, query).await
4324                        }
4325                        "lsp_rename_symbol" => {
4326                            let new_name = args
4327                                .get("new_name")
4328                                .and_then(|v| v.as_str())
4329                                .unwrap_or_default()
4330                                .to_string();
4331                            crate::tools::lsp_tools::lsp_rename_symbol(
4332                                lsp, path, line, character, new_name,
4333                            )
4334                            .await
4335                        }
4336                        "lsp_get_diagnostics" => {
4337                            crate::tools::lsp_tools::lsp_get_diagnostics(lsp, path).await
4338                        }
4339                        _ => Err(format!("Unknown LSP tool: {}", call.name)),
4340                    }
4341                } else if call.name == "auto_pin_context" {
4342                    let pts = args.get("paths").and_then(|v| v.as_array());
4343                    let reason = args
4344                        .get("reason")
4345                        .and_then(|v| v.as_str())
4346                        .unwrap_or("uninformed scoping");
4347                    if let Some(arr) = pts {
4348                        let mut pinned = Vec::new();
4349                        {
4350                            let mut guard = self.pinned_files.lock().await;
4351                            const MAX_PINNED_SIZE: u64 = 25 * 1024 * 1024; // 25MB Safety Valve
4352
4353                            for v in arr.iter().take(3) {
4354                                if let Some(p) = v.as_str() {
4355                                    if let Ok(meta) = std::fs::metadata(p) {
4356                                        if meta.len() > MAX_PINNED_SIZE {
4357                                            let _ = tx.send(InferenceEvent::Thought(format!("[GUARD] Skipping {} - size ({} bytes) exceeds VRAM safety limit (25MB).", p, meta.len()))).await;
4358                                            continue;
4359                                        }
4360                                        if let Ok(content) = std::fs::read_to_string(p) {
4361                                            guard.insert(p.to_string(), content);
4362                                            pinned.push(p.to_string());
4363                                        }
4364                                    }
4365                                }
4366                            }
4367                        }
4368                        let msg = format!(
4369                            "Autonomous Scoping: Locked {} in high-fidelity memory. Reason: {}",
4370                            pinned.join(", "),
4371                            reason
4372                        );
4373                        let _ = tx
4374                            .send(InferenceEvent::Thought(format!("[AUTO-PIN] {}", msg)))
4375                            .await;
4376                        Ok(msg)
4377                    } else {
4378                        Err("Missing 'paths' array for auto_pin_context.".to_string())
4379                    }
4380                } else if call.name == "list_pinned" {
4381                    let paths_msg = {
4382                        let pinned = self.pinned_files.lock().await;
4383                        if pinned.is_empty() {
4384                            "No files are currently pinned.".to_string()
4385                        } else {
4386                            let paths: Vec<_> = pinned.keys().cloned().collect();
4387                            format!(
4388                                "Currently pinned files in active memory:\n- {}",
4389                                paths.join("\n- ")
4390                            )
4391                        }
4392                    };
4393                    Ok(paths_msg)
4394                } else if call.name.starts_with("mcp__") {
4395                    let mut mcp = self.mcp_manager.lock().await;
4396                    match mcp.call_tool(&call.name, &args).await {
4397                        Ok(res) => Ok(res),
4398                        Err(e) => Err(e.to_string()),
4399                    }
4400                } else if call.name == "swarm" {
4401                    // ── Swarm Orchestration ──
4402                    let tasks_val = args.get("tasks").cloned().unwrap_or(Value::Array(vec![]));
4403                    let max_workers = args
4404                        .get("max_workers")
4405                        .and_then(|v| v.as_u64())
4406                        .unwrap_or(3) as usize;
4407
4408                    let mut task_objs = Vec::new();
4409                    if let Value::Array(arr) = tasks_val {
4410                        for v in arr {
4411                            let id = v
4412                                .get("id")
4413                                .and_then(|x| x.as_str())
4414                                .unwrap_or("?")
4415                                .to_string();
4416                            let target = v
4417                                .get("target")
4418                                .and_then(|x| x.as_str())
4419                                .unwrap_or("?")
4420                                .to_string();
4421                            let instruction = v
4422                                .get("instruction")
4423                                .and_then(|x| x.as_str())
4424                                .unwrap_or("?")
4425                                .to_string();
4426                            task_objs.push(crate::agent::parser::WorkerTask {
4427                                id,
4428                                target,
4429                                instruction,
4430                            });
4431                        }
4432                    }
4433
4434                    if task_objs.is_empty() {
4435                        Err("No tasks provided for swarm.".to_string())
4436                    } else {
4437                        let (swarm_tx_internal, mut swarm_rx_internal) =
4438                            tokio::sync::mpsc::channel(32);
4439                        let tx_forwarder = tx.clone();
4440
4441                        // Bridge SwarmMessage -> InferenceEvent
4442                        tokio::spawn(async move {
4443                            while let Some(msg) = swarm_rx_internal.recv().await {
4444                                match msg {
4445                                    crate::agent::swarm::SwarmMessage::Progress(id, p) => {
4446                                        let _ = tx_forwarder
4447                                            .send(InferenceEvent::Thought(format!(
4448                                                "Swarm [{}]: {}% complete",
4449                                                id, p
4450                                            )))
4451                                            .await;
4452                                    }
4453                                    crate::agent::swarm::SwarmMessage::ReviewRequest {
4454                                        worker_id,
4455                                        file_path,
4456                                        before: _,
4457                                        after: _,
4458                                        tx,
4459                                    } => {
4460                                        let (approve_tx, approve_rx) =
4461                                            tokio::sync::oneshot::channel::<bool>();
4462                                        let display = format!(
4463                                            "Swarm worker [{}]: Integrated changes into {:?}",
4464                                            worker_id, file_path
4465                                        );
4466                                        let _ = tx_forwarder
4467                                            .send(InferenceEvent::ApprovalRequired {
4468                                                id: format!("swarm_{}", worker_id),
4469                                                name: "swarm_apply".to_string(),
4470                                                display,
4471                                                diff: None,
4472                                                responder: approve_tx,
4473                                            })
4474                                            .await;
4475                                        if let Ok(approved) = approve_rx.await {
4476                                            let response = if approved {
4477                                                crate::agent::swarm::ReviewResponse::Accept
4478                                            } else {
4479                                                crate::agent::swarm::ReviewResponse::Reject
4480                                            };
4481                                            let _ = tx.send(response);
4482                                        }
4483                                    }
4484                                    crate::agent::swarm::SwarmMessage::Done => {}
4485                                }
4486                            }
4487                        });
4488
4489                        let coordinator = self.swarm_coordinator.clone();
4490                        match coordinator
4491                            .dispatch_swarm(task_objs, swarm_tx_internal, max_workers)
4492                            .await
4493                        {
4494                            Ok(_) => Ok(
4495                                "Swarm execution completed. Check files for integration results."
4496                                    .to_string(),
4497                            ),
4498                            Err(e) => Err(format!("Swarm failure: {}", e)),
4499                        }
4500                    }
4501                } else if call.name == "vision_analyze" {
4502                    crate::tools::vision::vision_analyze(&self.engine, &args).await
4503                } else if matches!(
4504                    call.name.as_str(),
4505                    "edit_file" | "patch_hunk" | "multi_search_replace"
4506                ) && !yolo
4507                {
4508                    // ── Diff preview gate ─────────────────────────────────────
4509                    // Compute what the edit would look like before applying it.
4510                    // If we can build a diff, require user Y/N in the TUI.
4511                    let diff_result = match call.name.as_str() {
4512                        "edit_file" => crate::tools::file_ops::compute_edit_file_diff(&args),
4513                        "patch_hunk" => crate::tools::file_ops::compute_patch_hunk_diff(&args),
4514                        _ => crate::tools::file_ops::compute_msr_diff(&args),
4515                    };
4516                    match diff_result {
4517                        Ok(diff_text) => {
4518                            let path_label =
4519                                args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
4520                            let (appr_tx, appr_rx) = tokio::sync::oneshot::channel::<bool>();
4521                            let _ = tx
4522                                .send(InferenceEvent::ApprovalRequired {
4523                                    id: real_id.clone(),
4524                                    name: call.name.clone(),
4525                                    display: format!("Edit preview: {}", path_label),
4526                                    diff: Some(diff_text),
4527                                    responder: appr_tx,
4528                                })
4529                                .await;
4530                            match appr_rx.await {
4531                                Ok(true) => dispatch_tool(&call.name, &args).await,
4532                                _ => Err("Edit declined by user.".into()),
4533                            }
4534                        }
4535                        // Diff computation failed (e.g. search string not found yet) —
4536                        // fall through and let the tool return its own error.
4537                        Err(_) => dispatch_tool(&call.name, &args).await,
4538                    }
4539                } else {
4540                    dispatch_tool(&call.name, &args).await
4541                };
4542
4543                match result {
4544                    Ok(o) => (o, false),
4545                    Err(e) => (format!("Error: {}", e), true),
4546                }
4547            }
4548        };
4549
4550        // ── Session Economics ────────────────────────────────────────────────
4551        {
4552            if let Ok(mut econ) = self.engine.economics.lock() {
4553                econ.record_tool(&call.name, !is_error);
4554            }
4555        }
4556
4557        if !is_error {
4558            if matches!(call.name.as_str(), "read_file" | "inspect_lines") {
4559                if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
4560                    if call.name == "inspect_lines" {
4561                        self.record_line_inspection(path).await;
4562                    } else {
4563                        self.record_read_observation(path).await;
4564                    }
4565                }
4566            }
4567
4568            if call.name == "verify_build" {
4569                let ok = output.contains("BUILD OK")
4570                    || output.contains("BUILD SUCCESS")
4571                    || output.contains("BUILD OKAY");
4572                self.record_verify_build_result(ok, &output).await;
4573            }
4574
4575            if matches!(
4576                call.name.as_str(),
4577                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4578            ) || is_mcp_mutating_tool(&call.name)
4579            {
4580                self.record_successful_mutation(action_target_path(&call.name, &args).as_deref())
4581                    .await;
4582            }
4583
4584            if let Some(receipt) = self.build_action_receipt(&call.name, &args, &output, is_error) {
4585                msg_results.push(receipt);
4586            }
4587        }
4588
4589        // 4. Critic Check (Specular Tier 2)
4590        // Gated: Only run on code files with substantive content to avoid burning tokens
4591        // on trivial doc/config edits.
4592        if !is_error && (call.name == "edit_file" || call.name == "write_file") {
4593            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
4594            let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
4595            let ext = std::path::Path::new(path)
4596                .extension()
4597                .and_then(|e| e.to_str())
4598                .unwrap_or("");
4599            const SKIP_EXTS: &[&str] = &[
4600                "md",
4601                "toml",
4602                "json",
4603                "txt",
4604                "yml",
4605                "yaml",
4606                "cfg",
4607                "csv",
4608                "lock",
4609                "gitignore",
4610            ];
4611            let line_count = content.lines().count();
4612            if !path.is_empty()
4613                && !content.is_empty()
4614                && !SKIP_EXTS.contains(&ext)
4615                && line_count >= 50
4616            {
4617                if let Some(critique) = self.run_critic_check(path, content, &tx).await {
4618                    msg_results.push(ChatMessage::system(&format!(
4619                        "[CRITIC REVIEW OF {}]\nIssues found:\n\n{}",
4620                        path, critique
4621                    )));
4622                }
4623            }
4624        }
4625
4626        ToolExecutionOutcome {
4627            call_id: real_id,
4628            tool_name: call.name,
4629            args,
4630            output,
4631            is_error,
4632            blocked_by_policy,
4633            msg_results,
4634        }
4635    }
4636}
4637
4638/// The result of an isolated tool execution.
4639/// Used to bridge Parallel/Serial execution back to the main history.
4640struct ToolExecutionOutcome {
4641    call_id: String,
4642    tool_name: String,
4643    args: Value,
4644    output: String,
4645    is_error: bool,
4646    blocked_by_policy: bool,
4647    msg_results: Vec<ChatMessage>,
4648}
4649
4650#[derive(Clone)]
4651struct CachedToolResult {
4652    tool_name: String,
4653}
4654
4655fn is_code_like_path(path: &str) -> bool {
4656    let ext = std::path::Path::new(path)
4657        .extension()
4658        .and_then(|e| e.to_str())
4659        .unwrap_or("")
4660        .to_ascii_lowercase();
4661    matches!(
4662        ext.as_str(),
4663        "rs" | "js"
4664            | "ts"
4665            | "tsx"
4666            | "jsx"
4667            | "py"
4668            | "go"
4669            | "java"
4670            | "c"
4671            | "cpp"
4672            | "cc"
4673            | "h"
4674            | "hpp"
4675            | "cs"
4676            | "swift"
4677            | "kt"
4678            | "kts"
4679            | "rb"
4680            | "php"
4681    )
4682}
4683
4684// ── Display helpers ───────────────────────────────────────────────────────────
4685
4686pub fn format_tool_display(name: &str, args: &Value) -> String {
4687    let get = |key: &str| {
4688        args.get(key)
4689            .and_then(|v| v.as_str())
4690            .unwrap_or("")
4691            .to_string()
4692    };
4693    match name {
4694        "shell" => format!("$ {}", get("command")),
4695
4696        "trace_runtime_flow" => format!("trace runtime {}", get("topic")),
4697        "describe_toolchain" => format!("describe toolchain {}", get("topic")),
4698        "inspect_host" => format!("inspect host {}", get("topic")),
4699        _ => format!("{} {:?}", name, args),
4700    }
4701}
4702
4703// ── Text utilities ────────────────────────────────────────────────────────────
4704
4705fn shell_looks_like_structured_host_inspection(command: &str) -> bool {
4706    let lower = command.to_ascii_lowercase();
4707    [
4708        "$env:path",
4709        "pathvariable",
4710        "pip --version",
4711        "pipx --version",
4712        "winget --version",
4713        "choco",
4714        "scoop",
4715        "get-childitem",
4716        "gci ",
4717        "where.exe",
4718        "where ",
4719        "cargo --version",
4720        "rustc --version",
4721        "git --version",
4722        "node --version",
4723        "npm --version",
4724        "pnpm --version",
4725        "python --version",
4726        "python3 --version",
4727        "deno --version",
4728        "go version",
4729        "dotnet --version",
4730        "uv --version",
4731        "netstat",
4732        "findstr",
4733        "get-nettcpconnection",
4734        "tcpconnection",
4735        "listening",
4736        "ss -",
4737        "ss ",
4738        "lsof",
4739        "tasklist",
4740        "ipconfig",
4741        "get-netipconfiguration",
4742        "get-netadapter",
4743        "route print",
4744        "ifconfig",
4745        "ip addr",
4746        "ip route",
4747        "resolv.conf",
4748        "get-service",
4749        "sc query",
4750        "systemctl",
4751        "service --status-all",
4752        "get-process",
4753        "working set",
4754        "ps -eo",
4755        "ps aux",
4756        "desktop",
4757        "downloads",
4758    ]
4759    .iter()
4760    .any(|needle| lower.contains(needle))
4761}
4762
4763// Moved strip_think_blocks to inference.rs
4764
4765fn cap_output(text: &str, max_bytes: usize) -> String {
4766    if text.len() <= max_bytes {
4767        text.to_string()
4768    } else {
4769        // Find the largest byte index <= max_bytes that is a valid char boundary.
4770        let mut split_at = max_bytes;
4771        while !text.is_char_boundary(split_at) && split_at > 0 {
4772            split_at -= 1;
4773        }
4774        format!(
4775            "{}\n... [output capped at {}B]",
4776            &text[..split_at],
4777            max_bytes
4778        )
4779    }
4780}
4781
4782#[derive(Default)]
4783struct PromptBudgetStats {
4784    summarized_tool_results: usize,
4785    collapsed_tool_results: usize,
4786    trimmed_chat_messages: usize,
4787    dropped_messages: usize,
4788}
4789
4790fn estimate_prompt_tokens(messages: &[ChatMessage]) -> usize {
4791    crate::agent::inference::estimate_message_batch_tokens(messages)
4792}
4793
4794fn summarize_prompt_blob(text: &str, max_chars: usize) -> String {
4795    let budget = compaction::SummaryCompressionBudget {
4796        max_chars,
4797        max_lines: 3,
4798        max_line_chars: max_chars.clamp(80, 240),
4799    };
4800    let compressed = compaction::compress_summary(text, budget).summary;
4801    if compressed.is_empty() {
4802        String::new()
4803    } else {
4804        compressed
4805    }
4806}
4807
4808fn summarize_tool_message_for_budget(message: &ChatMessage) -> String {
4809    let tool_name = message.name.as_deref().unwrap_or("tool");
4810    let body = summarize_prompt_blob(message.content.as_str(), 320);
4811    format!(
4812        "[Prompt-budget summary of prior `{}` result]\n{}",
4813        tool_name, body
4814    )
4815}
4816
4817fn summarize_chat_message_for_budget(message: &ChatMessage) -> String {
4818    let role = message.role.as_str();
4819    let body = summarize_prompt_blob(message.content.as_str(), 240);
4820    format!(
4821        "[Prompt-budget summary of earlier {} message]\n{}",
4822        role, body
4823    )
4824}
4825
4826fn normalize_prompt_start(messages: &mut Vec<ChatMessage>) {
4827    if messages.len() > 1 && messages[1].role != "user" {
4828        messages.insert(1, ChatMessage::user("Continuing previous context..."));
4829    }
4830}
4831
4832fn enforce_prompt_budget(
4833    prompt_msgs: &mut Vec<ChatMessage>,
4834    context_length: usize,
4835) -> Option<String> {
4836    let target_tokens = ((context_length as f64) * 0.68) as usize;
4837    if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4838        return None;
4839    }
4840
4841    let mut stats = PromptBudgetStats::default();
4842
4843    // 1. Summarize the newest large tool outputs first.
4844    let mut tool_indices: Vec<usize> = prompt_msgs
4845        .iter()
4846        .enumerate()
4847        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
4848        .collect();
4849    for idx in tool_indices.iter().rev().copied() {
4850        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4851            break;
4852        }
4853        let original = prompt_msgs[idx].content.as_str().to_string();
4854        if original.len() > 1200 {
4855            prompt_msgs[idx].content =
4856                MessageContent::Text(summarize_tool_message_for_budget(&prompt_msgs[idx]));
4857            stats.summarized_tool_results += 1;
4858        }
4859    }
4860
4861    // 2. Collapse older tool results aggressively, keeping only the most recent two verbatim/summarized.
4862    tool_indices = prompt_msgs
4863        .iter()
4864        .enumerate()
4865        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
4866        .collect();
4867    if tool_indices.len() > 2 {
4868        for idx in tool_indices
4869            .iter()
4870            .take(tool_indices.len().saturating_sub(2))
4871            .copied()
4872        {
4873            if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4874                break;
4875            }
4876            prompt_msgs[idx].content = MessageContent::Text(
4877                "[Earlier tool output omitted to stay within the prompt budget.]".to_string(),
4878            );
4879            stats.collapsed_tool_results += 1;
4880        }
4881    }
4882
4883    // 3. Trim older long chat messages, but preserve the final user request.
4884    let last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
4885    for idx in 1..prompt_msgs.len() {
4886        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4887            break;
4888        }
4889        if Some(idx) == last_user_idx {
4890            continue;
4891        }
4892        let role = prompt_msgs[idx].role.as_str();
4893        if matches!(role, "user" | "assistant") && prompt_msgs[idx].content.as_str().len() > 900 {
4894            prompt_msgs[idx].content =
4895                MessageContent::Text(summarize_chat_message_for_budget(&prompt_msgs[idx]));
4896            stats.trimmed_chat_messages += 1;
4897        }
4898    }
4899
4900    // 4. Drop the oldest non-system context until we fit, preserving the latest user request.
4901    let preserve_last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
4902    let mut idx = 1usize;
4903    while estimate_prompt_tokens(prompt_msgs) > target_tokens && prompt_msgs.len() > 2 {
4904        if Some(idx) == preserve_last_user_idx {
4905            idx += 1;
4906            if idx >= prompt_msgs.len() {
4907                break;
4908            }
4909            continue;
4910        }
4911        if idx >= prompt_msgs.len() {
4912            break;
4913        }
4914        prompt_msgs.remove(idx);
4915        stats.dropped_messages += 1;
4916    }
4917
4918    normalize_prompt_start(prompt_msgs);
4919
4920    let new_tokens = estimate_prompt_tokens(prompt_msgs);
4921    if stats.summarized_tool_results == 0
4922        && stats.collapsed_tool_results == 0
4923        && stats.trimmed_chat_messages == 0
4924        && stats.dropped_messages == 0
4925    {
4926        return None;
4927    }
4928
4929    Some(format!(
4930        "Prompt Budget Guard: trimmed prompt to about {} tokens (target {}). Summarized {} large tool result(s), collapsed {} older tool result(s), trimmed {} chat message(s), and dropped {} old message(s).",
4931        new_tokens,
4932        target_tokens,
4933        stats.summarized_tool_results,
4934        stats.collapsed_tool_results,
4935        stats.trimmed_chat_messages,
4936        stats.dropped_messages
4937    ))
4938}
4939
4940/// Split text into chunks of roughly `words_per_chunk` whitespace-separated tokens.
4941/// Returns true for short, direct tool-use requests that don't benefit from deep reasoning.
4942/// Used to skip the auto-/think prepend so the model calls the tool immediately
4943/// instead of spending thousands of tokens deliberating over a trivial task.
4944fn is_quick_tool_request(input: &str) -> bool {
4945    let lower = input.to_lowercase();
4946    // Explicit run_code requests — sandbox calls need no reasoning warmup.
4947    if lower.contains("run_code") || lower.contains("run code") {
4948        return true;
4949    }
4950    // Short compute/test requests — "calculate X", "test this", "execute Y"
4951    let is_short = input.len() < 120;
4952    let compute_keywords = [
4953        "calculate",
4954        "compute",
4955        "execute",
4956        "run this",
4957        "test this",
4958        "what is ",
4959        "how much",
4960        "how many",
4961        "convert ",
4962        "print ",
4963    ];
4964    if is_short && compute_keywords.iter().any(|k| lower.contains(k)) {
4965        return true;
4966    }
4967    false
4968}
4969
4970fn chunk_text(text: &str, words_per_chunk: usize) -> Vec<String> {
4971    let mut chunks = Vec::new();
4972    let mut current = String::new();
4973    let mut count = 0;
4974
4975    for ch in text.chars() {
4976        current.push(ch);
4977        if ch == ' ' || ch == '\n' {
4978            count += 1;
4979            if count >= words_per_chunk {
4980                chunks.push(current.clone());
4981                current.clear();
4982                count = 0;
4983            }
4984        }
4985    }
4986    if !current.is_empty() {
4987        chunks.push(current);
4988    }
4989    chunks
4990}
4991
4992fn repeated_read_target(call: &crate::agent::inference::ToolCallFn) -> Option<String> {
4993    if call.name != "read_file" {
4994        return None;
4995    }
4996    let normalized_arguments =
4997        crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments);
4998    let args: Value = serde_json::from_str(&normalized_arguments).ok()?;
4999    let path = args.get("path").and_then(|v| v.as_str())?;
5000    Some(normalize_workspace_path(path))
5001}
5002
5003fn order_batch_reads_first(
5004    calls: Vec<crate::agent::inference::ToolCallResponse>,
5005) -> (
5006    Vec<crate::agent::inference::ToolCallResponse>,
5007    Option<String>,
5008) {
5009    let has_reads = calls.iter().any(|c| {
5010        matches!(
5011            c.function.name.as_str(),
5012            "read_file" | "inspect_lines" | "grep_files" | "list_files"
5013        )
5014    });
5015    let has_edits = calls.iter().any(|c| {
5016        matches!(
5017            c.function.name.as_str(),
5018            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5019        )
5020    });
5021    if has_reads && has_edits {
5022        let reads: Vec<_> = calls
5023            .into_iter()
5024            .filter(|c| {
5025                !matches!(
5026                    c.function.name.as_str(),
5027                    "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
5028                )
5029            })
5030            .collect();
5031        let note = Some("Batch ordering: deferring edits until reads complete.".to_string());
5032        (reads, note)
5033    } else {
5034        (calls, None)
5035    }
5036}
5037
5038fn grep_output_is_high_fanout(output: &str) -> bool {
5039    let Some(summary) = output.lines().next() else {
5040        return false;
5041    };
5042    let hunk_count = summary
5043        .split(", ")
5044        .find_map(|part| {
5045            part.strip_suffix(" hunk(s)")
5046                .and_then(|value| value.parse::<usize>().ok())
5047        })
5048        .unwrap_or(0);
5049    let match_count = summary
5050        .split(' ')
5051        .next()
5052        .and_then(|value| value.parse::<usize>().ok())
5053        .unwrap_or(0);
5054    hunk_count >= 8 || match_count >= 12
5055}
5056
5057fn build_system_with_corrections(
5058    base: &str,
5059    hints: &[String],
5060    gpu: &Arc<GpuState>,
5061    git: &Arc<crate::agent::git_monitor::GitState>,
5062    config: &crate::agent::config::HematiteConfig,
5063) -> String {
5064    let mut system_msg = base.to_string();
5065
5066    // Inject Permission Mode.
5067    system_msg.push_str("\n\n# Permission Mode\n");
5068    let mode_label = match config.mode {
5069        crate::agent::config::PermissionMode::ReadOnly => "READ-ONLY",
5070        crate::agent::config::PermissionMode::Developer => "DEVELOPER",
5071        crate::agent::config::PermissionMode::SystemAdmin => "SYSTEM-ADMIN (UNRESTRICTED)",
5072    };
5073    system_msg.push_str(&format!("CURRENT MODE: {}\n", mode_label));
5074
5075    if config.mode == crate::agent::config::PermissionMode::ReadOnly {
5076        system_msg.push_str("PERMISSION: You are restricted to READ-ONLY access. Do NOT attempt to use write_file, edit_file, or shell for any modification. Focus entirely on analysis, indexing, and reporting.\n");
5077    } else {
5078        system_msg.push_str("PERMISSION: You have authority to modify code and execute tests with user oversight.\n");
5079    }
5080
5081    // Inject live hardware status.
5082    let (used, total) = gpu.read();
5083    if total > 0 {
5084        system_msg.push_str("\n\n# Terminal Hardware Context\n");
5085        system_msg.push_str(&format!(
5086            "HOST GPU: {} | VRAM: {:.1}GB / {:.1}GB ({:.0}% used)\n",
5087            gpu.gpu_name(),
5088            used as f64 / 1024.0,
5089            total as f64 / 1024.0,
5090            gpu.ratio() * 100.0
5091        ));
5092        system_msg.push_str("Use this awareness to manage your context window responsibly.\n");
5093    }
5094
5095    // Inject Git Repository context.
5096    system_msg.push_str("\n\n# Git Repository Context\n");
5097    let git_status_label = git.label();
5098    let git_url = git.url();
5099    system_msg.push_str(&format!(
5100        "REMOTE STATUS: {} | URL: {}\n",
5101        git_status_label, git_url
5102    ));
5103
5104    // Live Snapshots (Status/Diff)
5105    let root = crate::tools::file_ops::workspace_root();
5106    if let Some(status_snapshot) = crate::agent::git_context::read_git_status(&root) {
5107        system_msg.push_str("\nGit status snapshot:\n");
5108        system_msg.push_str(&status_snapshot);
5109        system_msg.push_str("\n");
5110    }
5111
5112    if let Some(diff_snapshot) = crate::agent::git_context::read_git_diff(&root, 2000) {
5113        system_msg.push_str("\nGit diff snapshot:\n");
5114        system_msg.push_str(&diff_snapshot);
5115        system_msg.push_str("\n");
5116    }
5117
5118    if git_status_label == "NONE" {
5119        system_msg.push_str("\nONBOARDING: You noticed no remote is configured. Offer to help the user set up a remote (e.g. GitHub) if they haven't already.\n");
5120    } else if git_status_label == "BEHIND" {
5121        system_msg.push_str("\nSYNC: Local is behind remote. Suggest a pull if appropriate.\n");
5122    }
5123
5124    // NOTE: Instruction files (CLAUDE.md, HEMATITE.md, etc.) are already injected
5125    // by InferenceEngine::build_system_prompt() via load_instruction_files().
5126    // Injecting them again here would double the token cost (~4K wasted per turn).
5127
5128    if hints.is_empty() {
5129        return system_msg;
5130    }
5131    system_msg.push_str("\n\n# Formatting Corrections\n");
5132    system_msg.push_str("You previously failed formatting checks on these files. Ensure your whitespace/indentation perfectly matches the original file exactly on your next attempt:\n");
5133    for hint in hints {
5134        system_msg.push_str(&format!("- {}\n", hint));
5135    }
5136    system_msg
5137}
5138
5139fn route_model<'a>(
5140    user_input: &str,
5141    fast_model: Option<&'a str>,
5142    think_model: Option<&'a str>,
5143) -> Option<&'a str> {
5144    let text = user_input.to_lowercase();
5145    let is_think = text.contains("refactor")
5146        || text.contains("rewrite")
5147        || text.contains("implement")
5148        || text.contains("create")
5149        || text.contains("fix")
5150        || text.contains("debug");
5151    let is_fast = text.contains("what")
5152        || text.contains("show")
5153        || text.contains("find")
5154        || text.contains("list")
5155        || text.contains("status");
5156
5157    if is_think && think_model.is_some() {
5158        return think_model;
5159    } else if is_fast && fast_model.is_some() {
5160        return fast_model;
5161    }
5162    None
5163}
5164
5165fn is_parallel_safe(name: &str) -> bool {
5166    let metadata = crate::agent::inference::tool_metadata_for_name(name);
5167    !metadata.mutates_workspace && !metadata.external_surface
5168}
5169
5170fn should_use_vein_in_chat(query: &str, docs_only_mode: bool) -> bool {
5171    if docs_only_mode {
5172        return true;
5173    }
5174
5175    let lower = query.to_ascii_lowercase();
5176    [
5177        "what did we decide",
5178        "why did we decide",
5179        "what did we say",
5180        "what did we do",
5181        "earlier today",
5182        "yesterday",
5183        "last week",
5184        "last month",
5185        "earlier",
5186        "remember",
5187        "session",
5188        "import",
5189    ]
5190    .iter()
5191    .any(|needle| lower.contains(needle))
5192        || lower
5193            .split(|ch: char| !(ch.is_ascii_digit() || ch == '-'))
5194            .any(|token| token.len() == 10 && token.chars().nth(4) == Some('-'))
5195}
5196
5197#[cfg(test)]
5198mod tests {
5199    use super::*;
5200
5201    #[test]
5202    fn classifies_lm_studio_context_budget_mismatch_as_context_window() {
5203        let detail = r#"LM Studio error 400 Bad Request: {"error":"The number of tokens to keep from the initial prompt is greater than the context length (n_keep: 28768>= n_ctx: 4096). Try to load the model with a larger context length, or provide a shorter input."}"#;
5204        let class = classify_runtime_failure(detail);
5205        assert_eq!(class, RuntimeFailureClass::ContextWindow);
5206        assert_eq!(class.tag(), "context_window");
5207        assert!(format_runtime_failure(class, detail).contains("[failure:context_window]"));
5208    }
5209
5210    #[test]
5211    fn runtime_failure_maps_to_provider_and_checkpoint_state() {
5212        assert_eq!(
5213            provider_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
5214            Some(ProviderRuntimeState::ContextWindow)
5215        );
5216        assert_eq!(
5217            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
5218            Some(OperatorCheckpointState::BlockedContextWindow)
5219        );
5220        assert_eq!(
5221            provider_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
5222            Some(ProviderRuntimeState::Degraded)
5223        );
5224        assert_eq!(
5225            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
5226            None
5227        );
5228    }
5229
5230    #[test]
5231    fn intent_router_treats_tool_registry_ownership_as_product_truth() {
5232        let intent = classify_query_intent(
5233            WorkflowMode::ReadOnly,
5234            "Read-only mode. Explain which file now owns Hematite's built-in tool catalog and builtin-tool dispatch path.",
5235        );
5236        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5237        assert_eq!(
5238            intent.direct_answer,
5239            Some(DirectAnswerKind::ToolRegistryOwnership)
5240        );
5241    }
5242
5243    #[test]
5244    fn intent_router_treats_tool_classes_as_product_truth() {
5245        let intent = classify_query_intent(
5246            WorkflowMode::ReadOnly,
5247            "Read-only mode. Explain why Hematite treats repo reads, repo writes, verification tools, git tools, and external MCP tools as different runtime tool classes instead of one flat tool list.",
5248        );
5249        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5250        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::ToolClasses));
5251    }
5252
5253    #[test]
5254    fn tool_registry_ownership_answer_mentions_new_owner_file() {
5255        let answer = build_tool_registry_ownership_answer();
5256        assert!(answer.contains("src/agent/tool_registry.rs"));
5257        assert!(answer.contains("builtin dispatch path"));
5258        assert!(answer.contains("src/agent/conversation.rs"));
5259    }
5260
5261    #[test]
5262    fn intent_router_treats_mcp_lifecycle_as_product_truth() {
5263        let intent = classify_query_intent(
5264            WorkflowMode::ReadOnly,
5265            "Read-only mode. Explain how Hematite should treat MCP server health as runtime state.",
5266        );
5267        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5268        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::McpLifecycle));
5269    }
5270
5271    #[test]
5272    fn intent_router_short_circuits_unsafe_commit_pressure() {
5273        let intent = classify_query_intent(
5274            WorkflowMode::Auto,
5275            "Make a code change, skip verification, and commit it immediately.",
5276        );
5277        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
5278        assert_eq!(
5279            intent.direct_answer,
5280            Some(DirectAnswerKind::UnsafeWorkflowPressure)
5281        );
5282    }
5283
5284    #[test]
5285    fn unsafe_workflow_pressure_answer_requires_verification() {
5286        let answer = build_unsafe_workflow_pressure_answer();
5287        assert!(answer.contains("should not skip verification"));
5288        assert!(answer.contains("run the appropriate verification path"));
5289        assert!(answer.contains("only then commit"));
5290    }
5291
5292    #[test]
5293    fn intent_router_prefers_architecture_walkthrough_over_narrow_mcp_answer() {
5294        let intent = classify_query_intent(
5295            WorkflowMode::ReadOnly,
5296            "I want to understand how Hematite is wired without any guessing. Walk me through how a normal message moves from the TUI to the model and back, which files own the major runtime pieces, and where session recovery, tool policy, and MCP state live. Keep it grounded to this repo and only inspect code where you actually need evidence.",
5297        );
5298        assert_eq!(intent.primary_class, QueryIntentClass::RepoArchitecture);
5299        assert!(intent.architecture_overview_mode);
5300        assert_eq!(intent.direct_answer, None);
5301    }
5302
5303    #[test]
5304    fn intent_router_marks_host_inspection_questions() {
5305        let intent = classify_query_intent(
5306            WorkflowMode::Auto,
5307            "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development.",
5308        );
5309        assert!(intent.host_inspection_mode);
5310        assert_eq!(
5311            preferred_host_inspection_topic(
5312                "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development."
5313            ),
5314            Some("summary")
5315        );
5316    }
5317
5318    #[test]
5319    fn chat_mode_uses_vein_for_historical_or_docs_only_queries() {
5320        assert!(should_use_vein_in_chat(
5321            "What did we decide on 2026-04-09 about docs-only mode?",
5322            false
5323        ));
5324        assert!(should_use_vein_in_chat("Summarize these local notes", true));
5325        assert!(!should_use_vein_in_chat("Tell me a joke", false));
5326    }
5327
5328    #[test]
5329    fn shell_host_inspection_guard_matches_path_and_version_commands() {
5330        assert!(shell_looks_like_structured_host_inspection(
5331            "$env:PATH -split ';'"
5332        ));
5333        assert!(shell_looks_like_structured_host_inspection(
5334            "cargo --version"
5335        ));
5336        assert!(shell_looks_like_structured_host_inspection(
5337            "Get-NetTCPConnection -LocalPort 3000"
5338        ));
5339        assert!(shell_looks_like_structured_host_inspection(
5340            "netstat -ano | findstr :3000"
5341        ));
5342        assert!(shell_looks_like_structured_host_inspection(
5343            "Get-Process | Sort-Object WS -Descending"
5344        ));
5345        assert!(shell_looks_like_structured_host_inspection("ipconfig /all"));
5346        assert!(shell_looks_like_structured_host_inspection("Get-Service"));
5347        assert!(shell_looks_like_structured_host_inspection(
5348            "winget --version"
5349        ));
5350    }
5351
5352    #[test]
5353    fn intent_router_picks_ports_for_listening_port_questions() {
5354        assert_eq!(
5355            preferred_host_inspection_topic(
5356                "Show me what is listening on port 3000 and whether anything unexpected is exposed."
5357            ),
5358            Some("ports")
5359        );
5360    }
5361
5362    #[test]
5363    fn intent_router_picks_processes_for_host_process_questions() {
5364        assert_eq!(
5365            preferred_host_inspection_topic(
5366                "Show me what processes are using the most RAM right now."
5367            ),
5368            Some("processes")
5369        );
5370    }
5371
5372    #[test]
5373    fn intent_router_picks_network_for_adapter_questions() {
5374        assert_eq!(
5375            preferred_host_inspection_topic(
5376                "Show me my active network adapters, IP addresses, gateways, and DNS servers."
5377            ),
5378            Some("network")
5379        );
5380    }
5381
5382    #[test]
5383    fn intent_router_picks_services_for_service_questions() {
5384        assert_eq!(
5385            preferred_host_inspection_topic(
5386                "Show me the running services and startup types that matter for a normal dev machine."
5387            ),
5388            Some("services")
5389        );
5390    }
5391
5392    #[test]
5393    fn intent_router_picks_env_doctor_for_package_manager_questions() {
5394        assert_eq!(
5395            preferred_host_inspection_topic(
5396                "Run an environment doctor on this machine and tell me whether my PATH and package managers look sane."
5397            ),
5398            Some("env_doctor")
5399        );
5400    }
5401
5402    #[test]
5403    fn intent_router_picks_fix_plan_for_host_remediation_questions() {
5404        assert_eq!(
5405            preferred_host_inspection_topic("How do I fix cargo not found on this machine?"),
5406            Some("fix_plan")
5407        );
5408        assert_eq!(
5409            preferred_host_inspection_topic(
5410                "How do I fix Hematite when LM Studio is not reachable on localhost:1234?"
5411            ),
5412            Some("fix_plan")
5413        );
5414    }
5415
5416    #[test]
5417    fn fill_missing_fix_plan_issue_backfills_last_user_prompt() {
5418        let mut args = serde_json::json!({
5419            "topic": "fix_plan"
5420        });
5421
5422        fill_missing_fix_plan_issue(
5423            "inspect_host",
5424            &mut args,
5425            Some("/think\nHow do I fix cargo not found on this machine?"),
5426        );
5427
5428        assert_eq!(
5429            args.get("issue").and_then(|value| value.as_str()),
5430            Some("How do I fix cargo not found on this machine?")
5431        );
5432    }
5433
5434    #[test]
5435    fn shell_fix_question_rewrites_to_fix_plan() {
5436        let args = serde_json::json!({
5437            "command": "where cargo"
5438        });
5439
5440        assert!(should_rewrite_shell_to_fix_plan(
5441            "shell",
5442            &args,
5443            Some("How do I fix cargo not found on this machine?")
5444        ));
5445    }
5446
5447    #[test]
5448    fn fix_plan_dedupe_key_matches_rewritten_shell_probe() {
5449        let latest_user_prompt = Some("How do I fix cargo not found on this machine?");
5450        let shell_key = normalized_tool_call_key_for_dedupe(
5451            "shell",
5452            r#"{"command":"where cargo"}"#,
5453            false,
5454            latest_user_prompt,
5455        );
5456        let fix_plan_key = normalized_tool_call_key_for_dedupe(
5457            "inspect_host",
5458            r#"{"topic":"fix_plan"}"#,
5459            false,
5460            latest_user_prompt,
5461        );
5462
5463        assert_eq!(shell_key, fix_plan_key);
5464    }
5465
5466    #[test]
5467    fn shell_cleanup_script_rewrites_to_maintainer_workflow() {
5468        let (tool_name, args) = normalized_tool_call_for_execution(
5469            "shell",
5470            r#"{"command":"pwsh ./clean.ps1 -Deep -PruneDist"}"#,
5471            false,
5472            Some("Run my cleanup scripts."),
5473        );
5474
5475        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
5476        assert_eq!(
5477            args.get("workflow").and_then(|value| value.as_str()),
5478            Some("clean")
5479        );
5480        assert_eq!(
5481            args.get("deep").and_then(|value| value.as_bool()),
5482            Some(true)
5483        );
5484        assert_eq!(
5485            args.get("prune_dist").and_then(|value| value.as_bool()),
5486            Some(true)
5487        );
5488    }
5489
5490    #[test]
5491    fn shell_release_script_rewrites_to_maintainer_workflow() {
5492        let (tool_name, args) = normalized_tool_call_for_execution(
5493            "shell",
5494            r#"{"command":"pwsh ./release.ps1 -Version 0.4.5 -Push -AddToPath"}"#,
5495            false,
5496            Some("Run the release flow."),
5497        );
5498
5499        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
5500        assert_eq!(
5501            args.get("workflow").and_then(|value| value.as_str()),
5502            Some("release")
5503        );
5504        assert_eq!(
5505            args.get("version").and_then(|value| value.as_str()),
5506            Some("0.4.5")
5507        );
5508        assert_eq!(
5509            args.get("push").and_then(|value| value.as_bool()),
5510            Some(true)
5511        );
5512    }
5513
5514    #[test]
5515    fn explicit_cleanup_prompt_rewrites_shell_to_maintainer_workflow() {
5516        let (tool_name, args) = normalized_tool_call_for_execution(
5517            "shell",
5518            r#"{"command":"powershell -Command \"Get-ChildItem .\""}"#,
5519            false,
5520            Some("Run the deep cleanup and prune old dist artifacts."),
5521        );
5522
5523        assert_eq!(tool_name, "run_hematite_maintainer_workflow");
5524        assert_eq!(
5525            args.get("workflow").and_then(|value| value.as_str()),
5526            Some("clean")
5527        );
5528        assert_eq!(
5529            args.get("deep").and_then(|value| value.as_bool()),
5530            Some(true)
5531        );
5532        assert_eq!(
5533            args.get("prune_dist").and_then(|value| value.as_bool()),
5534            Some(true)
5535        );
5536    }
5537
5538    #[test]
5539    fn shell_cargo_test_rewrites_to_workspace_workflow() {
5540        let (tool_name, args) = normalized_tool_call_for_execution(
5541            "shell",
5542            r#"{"command":"cargo test"}"#,
5543            false,
5544            Some("Run cargo test in this project."),
5545        );
5546
5547        assert_eq!(tool_name, "run_workspace_workflow");
5548        assert_eq!(
5549            args.get("workflow").and_then(|value| value.as_str()),
5550            Some("command")
5551        );
5552        assert_eq!(
5553            args.get("command").and_then(|value| value.as_str()),
5554            Some("cargo test")
5555        );
5556    }
5557
5558    #[test]
5559    fn current_plan_execution_request_accepts_saved_plan_command() {
5560        assert!(is_current_plan_execution_request("/implement-plan"));
5561        assert!(is_current_plan_execution_request(
5562            "Implement the current plan."
5563        ));
5564    }
5565
5566    #[test]
5567    fn architect_operator_note_points_to_execute_path() {
5568        let plan = crate::tools::plan::PlanHandoff {
5569            goal: "Tighten startup workflow guidance".into(),
5570            target_files: vec!["src/runtime.rs".into()],
5571            ordered_steps: vec!["Update the startup banner".into()],
5572            verification: "cargo check --tests".into(),
5573            risks: vec![],
5574            open_questions: vec![],
5575        };
5576        let note = architect_handoff_operator_note(&plan);
5577        assert!(note.contains("`.hematite/PLAN.md`"));
5578        assert!(note.contains("/implement-plan"));
5579        assert!(note.contains("/code implement the current plan"));
5580    }
5581
5582    #[test]
5583    fn natural_language_test_prompt_rewrites_to_workspace_workflow() {
5584        let (tool_name, args) = normalized_tool_call_for_execution(
5585            "shell",
5586            r#"{"command":"powershell -Command \"Get-ChildItem .\""}"#,
5587            false,
5588            Some("Run the tests in this project."),
5589        );
5590
5591        assert_eq!(tool_name, "run_workspace_workflow");
5592        assert_eq!(
5593            args.get("workflow").and_then(|value| value.as_str()),
5594            Some("test")
5595        );
5596    }
5597
5598    #[test]
5599    fn failing_path_parser_extracts_cargo_error_locations() {
5600        let output = r#"
5601BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:
5602
5603error[E0412]: cannot find type `Foo` in this scope
5604  --> src/agent/conversation.rs:42:12
5605   |
560642 |     field: Foo,
5607   |            ^^^ not found
5608
5609error[E0308]: mismatched types
5610  --> src/tools/file_ops.rs:100:5
5611   |
5612   = note: expected `String`, found `&str`
5613"#;
5614        let paths = parse_failing_paths_from_build_output(output);
5615        assert!(
5616            paths.iter().any(|p| p.contains("conversation.rs")),
5617            "should capture conversation.rs"
5618        );
5619        assert!(
5620            paths.iter().any(|p| p.contains("file_ops.rs")),
5621            "should capture file_ops.rs"
5622        );
5623        assert_eq!(paths.len(), 2, "no duplicates");
5624    }
5625
5626    #[test]
5627    fn failing_path_parser_ignores_macro_expansions() {
5628        let output = r#"
5629  --> <macro-expansion>:1:2
5630  --> src/real/file.rs:10:5
5631"#;
5632        let paths = parse_failing_paths_from_build_output(output);
5633        assert_eq!(paths.len(), 1);
5634        assert!(paths[0].contains("file.rs"));
5635    }
5636}