hematite/agent/
conversation.rs

1use crate::agent::architecture_summary::{
2    build_architecture_overview_answer, prune_architecture_trace_batch,
3    prune_authoritative_tool_batch, prune_read_only_context_bloat_batch,
4    summarize_project_map_output, summarize_runtime_trace_output,
5};
6use crate::agent::direct_answers::{
7    build_about_answer, build_architect_session_reset_plan, build_authorization_policy_answer,
8    build_gemma_native_answer, build_gemma_native_settings_answer, build_identity_answer,
9    build_language_capability_answer, build_mcp_lifecycle_answer, build_product_surface_answer,
10    build_reasoning_split_answer, build_recovery_recipes_answer, build_session_memory_answer,
11    build_session_reset_semantics_answer, build_tool_classes_answer,
12    build_tool_registry_ownership_answer, build_unsafe_workflow_pressure_answer,
13    build_verify_profiles_answer, build_workflow_modes_answer,
14};
15use crate::agent::inference::{
16    ChatMessage, InferenceEngine, InferenceEvent, MessageContent, OperatorCheckpointState,
17    ProviderRuntimeState, ToolCallFn, ToolDefinition, ToolFunction,
18};
19use crate::agent::policy::{
20    action_target_path, docs_edit_without_explicit_request, is_destructive_tool,
21    is_mcp_mutating_tool, is_mcp_workspace_read_tool, normalize_workspace_path,
22};
23use crate::agent::recovery_recipes::{
24    attempt_recovery, plan_recovery, preview_recovery_decision, RecoveryContext, RecoveryDecision,
25    RecoveryPlan, RecoveryScenario, RecoveryStep,
26};
27use crate::agent::routing::{
28    classify_query_intent, is_capability_probe_tool, looks_like_mutation_request,
29    preferred_host_inspection_topic, DirectAnswerKind, QueryIntentClass,
30};
31use crate::agent::tool_registry::dispatch_builtin_tool;
32// SystemPromptBuilder is no longer used — InferenceEngine::build_system_prompt() is canonical.
33use crate::agent::compaction::{self, CompactionConfig};
34use crate::ui::gpu_monitor::GpuState;
35
36use serde_json::Value;
37use std::sync::Arc;
38use tokio::sync::{mpsc, Mutex};
39// -- Session persistence -------------------------------------------------------
40
41#[derive(Clone, Debug, Default)]
42pub struct UserTurn {
43    pub text: String,
44    pub attached_document: Option<AttachedDocument>,
45    pub attached_image: Option<AttachedImage>,
46}
47
48#[derive(Clone, Debug)]
49pub struct AttachedDocument {
50    pub name: String,
51    pub content: String,
52}
53
54#[derive(Clone, Debug)]
55pub struct AttachedImage {
56    pub name: String,
57    pub path: String,
58}
59
60impl UserTurn {
61    pub fn text(text: impl Into<String>) -> Self {
62        Self {
63            text: text.into(),
64            attached_document: None,
65            attached_image: None,
66        }
67    }
68}
69
70#[derive(serde::Serialize, serde::Deserialize)]
71struct SavedSession {
72    running_summary: Option<String>,
73    #[serde(default)]
74    session_memory: crate::agent::compaction::SessionMemory,
75}
76
77#[derive(Default)]
78struct ActionGroundingState {
79    turn_index: u64,
80    observed_paths: std::collections::HashMap<String, u64>,
81    inspected_paths: std::collections::HashMap<String, u64>,
82    last_verify_build_turn: Option<u64>,
83    last_verify_build_ok: bool,
84    last_failed_build_paths: Vec<String>,
85    code_changed_since_verify: bool,
86}
87
88struct PlanExecutionGuard {
89    flag: Arc<std::sync::atomic::AtomicBool>,
90}
91
92impl Drop for PlanExecutionGuard {
93    fn drop(&mut self) {
94        self.flag.store(false, std::sync::atomic::Ordering::SeqCst);
95    }
96}
97
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
99pub(crate) enum WorkflowMode {
100    #[default]
101    Auto,
102    Ask,
103    Code,
104    Architect,
105    ReadOnly,
106    /// Clean conversational mode — lighter prompt, no coding agent scaffolding,
107    /// tools available but not pushed. Vein RAG still runs for context.
108    Chat,
109}
110
111impl WorkflowMode {
112    fn label(self) -> &'static str {
113        match self {
114            WorkflowMode::Auto => "AUTO",
115            WorkflowMode::Ask => "ASK",
116            WorkflowMode::Code => "CODE",
117            WorkflowMode::Architect => "ARCHITECT",
118            WorkflowMode::ReadOnly => "READ-ONLY",
119            WorkflowMode::Chat => "CHAT",
120        }
121    }
122
123    fn is_read_only(self) -> bool {
124        matches!(
125            self,
126            WorkflowMode::Ask | WorkflowMode::Architect | WorkflowMode::ReadOnly
127        )
128    }
129
130    pub(crate) fn is_chat(self) -> bool {
131        matches!(self, WorkflowMode::Chat)
132    }
133}
134
135fn session_path() -> std::path::PathBuf {
136    crate::tools::file_ops::workspace_root()
137        .join(".hematite")
138        .join("session.json")
139}
140
141fn load_session_data() -> (Option<String>, crate::agent::compaction::SessionMemory) {
142    let path = session_path();
143    if !path.exists() {
144        return (None, crate::agent::compaction::SessionMemory::default());
145    }
146    let Ok(data) = std::fs::read_to_string(&path) else {
147        return (None, crate::agent::compaction::SessionMemory::default());
148    };
149    let Ok(saved) = serde_json::from_str::<SavedSession>(&data) else {
150        return (None, crate::agent::compaction::SessionMemory::default());
151    };
152    (saved.running_summary, saved.session_memory)
153}
154
155fn reset_task_files() {
156    let root = crate::tools::file_ops::workspace_root();
157    let _ = std::fs::remove_file(root.join(".hematite").join("TASK.md"));
158    let _ = std::fs::remove_file(root.join(".hematite").join("PLAN.md"));
159    let _ = std::fs::remove_file(root.join(".hematite").join("WALKTHROUGH.md"));
160    let _ = std::fs::remove_file(root.join(".github").join("WALKTHROUGH.md"));
161    let _ = std::fs::write(root.join(".hematite").join("TASK.md"), "");
162    let _ = std::fs::write(root.join(".hematite").join("PLAN.md"), "");
163}
164
165fn purge_persistent_memory() {
166    let root = crate::tools::file_ops::workspace_root();
167    let mem_dir = root.join(".hematite").join("memories");
168    if mem_dir.exists() {
169        let _ = std::fs::remove_dir_all(&mem_dir);
170        let _ = std::fs::create_dir_all(&mem_dir);
171    }
172
173    let log_dir = root.join(".hematite_logs");
174    if log_dir.exists() {
175        if let Ok(entries) = std::fs::read_dir(&log_dir) {
176            for entry in entries.flatten() {
177                let _ = std::fs::write(entry.path(), "");
178            }
179        }
180    }
181}
182
183fn apply_turn_attachments(user_turn: &UserTurn, prompt: &str) -> String {
184    let mut out = prompt.trim().to_string();
185    if let Some(doc) = user_turn.attached_document.as_ref() {
186        out = format!(
187            "[Attached document: {}]\n\n{}\n\n---\n\n{}",
188            doc.name, doc.content, out
189        );
190    }
191    if let Some(image) = user_turn.attached_image.as_ref() {
192        out = if out.is_empty() {
193            format!("[Attached image: {}]", image.name)
194        } else {
195            format!("[Attached image: {}]\n\n{}", image.name, out)
196        };
197    }
198    out
199}
200
201fn transcript_user_turn_text(user_turn: &UserTurn, prompt: &str) -> String {
202    let mut prefixes = Vec::new();
203    if let Some(doc) = user_turn.attached_document.as_ref() {
204        prefixes.push(format!("[Attached document: {}]", doc.name));
205    }
206    if let Some(image) = user_turn.attached_image.as_ref() {
207        prefixes.push(format!("[Attached image: {}]", image.name));
208    }
209    if prefixes.is_empty() {
210        prompt.to_string()
211    } else if prompt.trim().is_empty() {
212        prefixes.join("\n")
213    } else {
214        format!("{}\n{}", prefixes.join("\n"), prompt)
215    }
216}
217
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219enum RuntimeFailureClass {
220    ContextWindow,
221    ProviderDegraded,
222    ToolArgMalformed,
223    ToolPolicyBlocked,
224    ToolLoop,
225    VerificationFailed,
226    EmptyModelResponse,
227    Unknown,
228}
229
230impl RuntimeFailureClass {
231    fn tag(self) -> &'static str {
232        match self {
233            RuntimeFailureClass::ContextWindow => "context_window",
234            RuntimeFailureClass::ProviderDegraded => "provider_degraded",
235            RuntimeFailureClass::ToolArgMalformed => "tool_arg_malformed",
236            RuntimeFailureClass::ToolPolicyBlocked => "tool_policy_blocked",
237            RuntimeFailureClass::ToolLoop => "tool_loop",
238            RuntimeFailureClass::VerificationFailed => "verification_failed",
239            RuntimeFailureClass::EmptyModelResponse => "empty_model_response",
240            RuntimeFailureClass::Unknown => "unknown",
241        }
242    }
243
244    fn operator_guidance(self) -> &'static str {
245        match self {
246            RuntimeFailureClass::ContextWindow => {
247                "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
248            }
249            RuntimeFailureClass::ProviderDegraded => {
250                "Retry once automatically, then narrow the turn or restart LM Studio if it persists."
251            }
252            RuntimeFailureClass::ToolArgMalformed => {
253                "Retry with repaired or narrower tool arguments instead of repeating the same malformed call."
254            }
255            RuntimeFailureClass::ToolPolicyBlocked => {
256                "Stay inside the allowed workflow or switch modes before retrying."
257            }
258            RuntimeFailureClass::ToolLoop => {
259                "Stop repeating the same failing tool pattern and switch to a narrower recovery step."
260            }
261            RuntimeFailureClass::VerificationFailed => {
262                "Fix the build or test failure before treating the task as complete."
263            }
264            RuntimeFailureClass::EmptyModelResponse => {
265                "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
266            }
267            RuntimeFailureClass::Unknown => {
268                "Inspect the latest grounded tool results or provider status before retrying."
269            }
270        }
271    }
272}
273
274fn classify_runtime_failure(detail: &str) -> RuntimeFailureClass {
275    let lower = detail.to_ascii_lowercase();
276    if lower.contains("context_window_blocked")
277        || lower.contains("context ceiling reached")
278        || lower.contains("exceeds the")
279        || ((lower.contains("n_keep") && lower.contains("n_ctx"))
280            || lower.contains("context length")
281            || lower.contains("keep from the initial prompt")
282            || lower.contains("prompt is greater than the context length"))
283    {
284        RuntimeFailureClass::ContextWindow
285    } else if lower.contains("empty response from model")
286        || lower.contains("model returned an empty response")
287    {
288        RuntimeFailureClass::EmptyModelResponse
289    } else if lower.contains("lm studio unreachable")
290        || lower.contains("lm studio error")
291        || lower.contains("request failed")
292        || lower.contains("response parse error")
293        || lower.contains("provider degraded")
294    {
295        RuntimeFailureClass::ProviderDegraded
296    } else if lower.contains("missing required argument")
297        || lower.contains("json repair failed")
298        || lower.contains("invalid pattern")
299        || lower.contains("invalid line range")
300    {
301        RuntimeFailureClass::ToolArgMalformed
302    } else if lower.contains("action blocked:")
303        || lower.contains("access denied")
304        || lower.contains("declined by user")
305    {
306        RuntimeFailureClass::ToolPolicyBlocked
307    } else if lower.contains("too many consecutive tool errors")
308        || lower.contains("repeated tool failures")
309        || lower.contains("stuck in a loop")
310    {
311        RuntimeFailureClass::ToolLoop
312    } else if lower.contains("build failed")
313        || lower.contains("verification failed")
314        || lower.contains("verify_build")
315    {
316        RuntimeFailureClass::VerificationFailed
317    } else {
318        RuntimeFailureClass::Unknown
319    }
320}
321
322fn format_runtime_failure(class: RuntimeFailureClass, detail: &str) -> String {
323    format!(
324        "[failure:{}] {} Detail: {}",
325        class.tag(),
326        class.operator_guidance(),
327        detail.trim()
328    )
329}
330
331fn provider_state_for_runtime_failure(class: RuntimeFailureClass) -> Option<ProviderRuntimeState> {
332    match class {
333        RuntimeFailureClass::ContextWindow => Some(ProviderRuntimeState::ContextWindow),
334        RuntimeFailureClass::ProviderDegraded => Some(ProviderRuntimeState::Degraded),
335        RuntimeFailureClass::EmptyModelResponse => Some(ProviderRuntimeState::EmptyResponse),
336        _ => None,
337    }
338}
339
340fn checkpoint_state_for_runtime_failure(
341    class: RuntimeFailureClass,
342) -> Option<OperatorCheckpointState> {
343    match class {
344        RuntimeFailureClass::ContextWindow => Some(OperatorCheckpointState::BlockedContextWindow),
345        RuntimeFailureClass::ToolPolicyBlocked => Some(OperatorCheckpointState::BlockedPolicy),
346        RuntimeFailureClass::ToolLoop => Some(OperatorCheckpointState::BlockedToolLoop),
347        RuntimeFailureClass::VerificationFailed => {
348            Some(OperatorCheckpointState::BlockedVerification)
349        }
350        _ => None,
351    }
352}
353
354fn compact_runtime_recovery_summary(class: RuntimeFailureClass) -> &'static str {
355    match class {
356        RuntimeFailureClass::ProviderDegraded => {
357            "LM Studio degraded during the turn; retrying once before surfacing a failure."
358        }
359        RuntimeFailureClass::EmptyModelResponse => {
360            "The model returned an empty reply; retrying once before surfacing a failure."
361        }
362        _ => "Runtime recovery in progress.",
363    }
364}
365
366fn checkpoint_summary_for_runtime_failure(class: RuntimeFailureClass) -> &'static str {
367    match class {
368        RuntimeFailureClass::ContextWindow => "Provider context ceiling confirmed.",
369        RuntimeFailureClass::ToolPolicyBlocked => "Policy blocked the current action.",
370        RuntimeFailureClass::ToolLoop => "Repeated failing tool pattern stopped.",
371        RuntimeFailureClass::VerificationFailed => "Verification failed; fix before continuing.",
372        _ => "Operator checkpoint updated.",
373    }
374}
375
376fn compact_runtime_failure_summary(class: RuntimeFailureClass) -> &'static str {
377    match class {
378        RuntimeFailureClass::ContextWindow => "LM context ceiling hit.",
379        RuntimeFailureClass::ProviderDegraded => {
380            "LM Studio degraded and did not recover cleanly; operator action is now required."
381        }
382        RuntimeFailureClass::EmptyModelResponse => {
383            "LM Studio returned an empty reply after recovery; operator action is now required."
384        }
385        RuntimeFailureClass::ToolLoop => {
386            "Repeated failing tool pattern detected; Hematite stopped the loop."
387        }
388        _ => "Runtime failure surfaced to the operator.",
389    }
390}
391
392fn should_retry_runtime_failure(class: RuntimeFailureClass) -> bool {
393    matches!(
394        class,
395        RuntimeFailureClass::ProviderDegraded | RuntimeFailureClass::EmptyModelResponse
396    )
397}
398
399fn recovery_scenario_for_runtime_failure(class: RuntimeFailureClass) -> Option<RecoveryScenario> {
400    match class {
401        RuntimeFailureClass::ContextWindow => Some(RecoveryScenario::ContextWindow),
402        RuntimeFailureClass::ProviderDegraded => Some(RecoveryScenario::ProviderDegraded),
403        RuntimeFailureClass::EmptyModelResponse => Some(RecoveryScenario::EmptyModelResponse),
404        RuntimeFailureClass::ToolPolicyBlocked => Some(RecoveryScenario::McpWorkspaceReadBlocked),
405        RuntimeFailureClass::ToolLoop => Some(RecoveryScenario::ToolLoop),
406        RuntimeFailureClass::VerificationFailed => Some(RecoveryScenario::VerificationFailed),
407        RuntimeFailureClass::ToolArgMalformed | RuntimeFailureClass::Unknown => None,
408    }
409}
410
411fn compact_recovery_plan_summary(plan: &RecoveryPlan) -> String {
412    format!(
413        "{} [{}]",
414        plan.recipe.scenario.label(),
415        plan.recipe.steps_summary()
416    )
417}
418
419fn compact_recovery_decision_summary(decision: &RecoveryDecision) -> String {
420    match decision {
421        RecoveryDecision::Attempt(plan) => compact_recovery_plan_summary(plan),
422        RecoveryDecision::Escalate {
423            recipe,
424            attempts_made,
425            ..
426        } => format!(
427            "{} escalated after {} / {} [{}]",
428            recipe.scenario.label(),
429            attempts_made,
430            recipe.max_attempts.max(1),
431            recipe.steps_summary()
432        ),
433    }
434}
435
436/// Parse file paths from cargo/compiler error output.
437/// Handles lines like `  --> src/foo/bar.rs:34:12` and `error: could not compile`.
438fn parse_failing_paths_from_build_output(output: &str) -> Vec<String> {
439    let root = crate::tools::file_ops::workspace_root();
440    let mut paths: Vec<String> = output
441        .lines()
442        .filter_map(|line| {
443            let trimmed = line.trim_start();
444            // Cargo error location: "--> path/to/file.rs:line:col"
445            let after_arrow = trimmed.strip_prefix("--> ")?;
446            let file_part = after_arrow.split(':').next()?;
447            if file_part.is_empty() || file_part.starts_with('<') {
448                return None;
449            }
450            let p = std::path::Path::new(file_part);
451            let resolved = if p.is_absolute() {
452                p.to_path_buf()
453            } else {
454                root.join(p)
455            };
456            Some(resolved.to_string_lossy().replace('\\', "/").to_lowercase())
457        })
458        .collect();
459    paths.sort();
460    paths.dedup();
461    paths
462}
463
464fn build_mode_redirect_answer(mode: WorkflowMode) -> String {
465    match mode {
466        WorkflowMode::Ask => "Workflow mode ASK is read-only. I can inspect the code, explain what should change, or review the target area, but I will not modify files here. Switch to `/code` to implement the change, or `/auto` to let Hematite choose.".to_string(),
467        WorkflowMode::Architect => "Workflow mode ARCHITECT is plan-first. I can inspect the code and design the implementation approach, but I will not mutate files until you explicitly switch to `/code` or ask me to implement.".to_string(),
468        WorkflowMode::ReadOnly => "Workflow mode READ-ONLY is a hard no-mutation mode. I can analyze, inspect, and explain, but I will not edit files, run mutating shell commands, or commit changes. Switch to `/code` or `/auto` if you want implementation.".to_string(),
469        _ => "Switch to `/code` or `/auto` to allow implementation.".to_string(),
470    }
471}
472
473fn architect_handoff_contract() -> &'static str {
474    "ARCHITECT OUTPUT CONTRACT:\n\
475Use a compact implementation handoff, not a process narrative.\n\
476Do not say \"the first step\" or describe what you are about to do.\n\
477After one or two read-only inspection tools at most, stop and answer.\n\
478For runtime wiring, reset behavior, or control-flow questions, prefer `trace_runtime_flow` over a broad `map_project` scan.\n\
479If you do use `map_project`, keep it minimal and scoped.\n\
480Use these exact ASCII headings and keep each section short:\n\
481# Goal\n\
482# Target Files\n\
483# Ordered Steps\n\
484# Verification\n\
485# Risks\n\
486# Open Questions\n\
487Keep the whole handoff concise and implementation-oriented."
488}
489
490fn is_current_plan_execution_request(user_input: &str) -> bool {
491    let lower = user_input.trim().to_ascii_lowercase();
492    lower == "implement the current plan."
493        || lower == "implement the current plan"
494        || lower.contains("implement the current plan")
495}
496
497fn is_plan_scoped_tool(name: &str) -> bool {
498    crate::agent::inference::tool_metadata_for_name(name).plan_scope
499}
500
501fn is_current_plan_irrelevant_tool(name: &str) -> bool {
502    !crate::agent::inference::tool_metadata_for_name(name).plan_scope
503}
504
505fn is_non_mutating_plan_step_tool(name: &str) -> bool {
506    let metadata = crate::agent::inference::tool_metadata_for_name(name);
507    metadata.plan_scope && !metadata.mutates_workspace
508}
509
510fn parse_inline_workflow_prompt(user_input: &str) -> Option<(WorkflowMode, &str)> {
511    let trimmed = user_input.trim();
512    for (prefix, mode) in [
513        ("/ask", WorkflowMode::Ask),
514        ("/code", WorkflowMode::Code),
515        ("/architect", WorkflowMode::Architect),
516        ("/read-only", WorkflowMode::ReadOnly),
517        ("/auto", WorkflowMode::Auto),
518    ] {
519        if let Some(rest) = trimmed.strip_prefix(prefix) {
520            let rest = rest.trim();
521            if !rest.is_empty() {
522                return Some((mode, rest));
523            }
524        }
525    }
526    None
527}
528
529// Tool catalogue
530
531/// Returns the full set of tools exposed to the model.
532pub fn get_tools() -> Vec<ToolDefinition> {
533    crate::agent::tool_registry::get_tools()
534}
535
536pub struct ConversationManager {
537    /// Full conversation history in OpenAI format.
538    pub history: Vec<ChatMessage>,
539    pub engine: Arc<InferenceEngine>,
540    pub tools: Vec<ToolDefinition>,
541    pub mcp_manager: Arc<Mutex<crate::agent::mcp_manager::McpManager>>,
542    pub professional: bool,
543    pub brief: bool,
544    pub snark: u8,
545    pub chaos: u8,
546    /// Model to use for simple read-only tasks (optional, user-supplied via --fast-model).
547    pub fast_model: Option<String>,
548    /// Model to use for complex write/build tasks (optional, user-supplied via --think-model).
549    pub think_model: Option<String>,
550    /// Files where whitespace auto-correction fired this session.
551    pub correction_hints: Vec<String>,
552    /// Running background summary of pruned older messages.
553    pub running_summary: Option<String>,
554    /// Live hardware telemetry handle.
555    pub gpu_state: Arc<GpuState>,
556    /// Local RAG memory — FTS5-indexed project source.
557    pub vein: crate::memory::vein::Vein,
558    /// Append-only session transcript logger.
559    pub transcript: crate::agent::transcript::TranscriptLogger,
560    /// Thread-safe cancellation signal for the current agent turn.
561    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
562    /// Shared Git remote state (for persistent connectivity checks).
563    pub git_state: Arc<crate::agent::git_monitor::GitState>,
564    /// Reasoning think-mode override. None = let model decide. Some(true) = force /think.
565    /// Some(false) = force /no_think (fast mode, 3-5x quicker for simple tasks).
566    pub think_mode: Option<bool>,
567    workflow_mode: WorkflowMode,
568    /// Layer 6: Dynamic Task Context (extracted during compaction)
569    pub session_memory: crate::agent::compaction::SessionMemory,
570    pub swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
571    pub voice_manager: Arc<crate::ui::voice::VoiceManager>,
572    /// Personality description for the current Rusty soul — used in chat mode system prompt.
573    pub soul_personality: String,
574    pub lsp_manager: Arc<Mutex<crate::agent::lsp::manager::LspManager>>,
575    /// Active reasoning summary extracted from the previous model turn (Gemma-4 Native).
576    pub reasoning_history: Option<String>,
577    /// Layer 8: Active Reference Pinning (Context Locked)
578    pub pinned_files: Arc<Mutex<std::collections::HashMap<String, String>>>,
579    /// Hard action-grounding state for proof-before-action checks.
580    action_grounding: Arc<Mutex<ActionGroundingState>>,
581    /// True only during `/code Implement the current plan.` style execution turns.
582    plan_execution_active: Arc<std::sync::atomic::AtomicBool>,
583    /// Typed per-turn recovery attempt tracking.
584    recovery_context: RecoveryContext,
585    /// L1 context block — hot files summary injected into the system prompt.
586    /// Built once after vein init and updated as edits accumulate heat.
587    pub l1_context: Option<String>,
588}
589
590impl ConversationManager {
591    fn vein_docs_only_mode(&self) -> bool {
592        !crate::tools::file_ops::is_project_workspace()
593    }
594
595    fn refresh_vein_index(&mut self) -> usize {
596        let count = if self.vein_docs_only_mode() {
597            let root = crate::tools::file_ops::workspace_root();
598            tokio::task::block_in_place(|| self.vein.index_workspace_artifacts(&root))
599        } else {
600            tokio::task::block_in_place(|| self.vein.index_project())
601        };
602        self.l1_context = self.vein.l1_context();
603        count
604    }
605
606    fn build_vein_inspection_report(&self, indexed_this_pass: usize) -> String {
607        let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(8));
608        let workspace_mode = if self.vein_docs_only_mode() {
609            "docs-only (outside a project workspace)"
610        } else {
611            "project workspace"
612        };
613        let active_room = snapshot.active_room.as_deref().unwrap_or("none");
614        let mut out = format!(
615            "Vein Inspection\n\
616             Workspace mode: {workspace_mode}\n\
617             Indexed this pass: {indexed_this_pass}\n\
618             Indexed source files: {}\n\
619             Indexed docs: {}\n\
620             Indexed session exchanges: {}\n\
621             Embedded source/doc chunks: {}\n\
622             Embeddings available: {}\n\
623             Active room bias: {active_room}\n\
624             L1 hot-files block: {}\n",
625            snapshot.indexed_source_files,
626            snapshot.indexed_docs,
627            snapshot.indexed_session_exchanges,
628            snapshot.embedded_source_doc_chunks,
629            if snapshot.has_any_embeddings {
630                "yes"
631            } else {
632                "no"
633            },
634            if snapshot.l1_ready {
635                "ready"
636            } else {
637                "not built yet"
638            },
639        );
640
641        if snapshot.hot_files.is_empty() {
642            out.push_str("Hot files: none yet.\n");
643            return out;
644        }
645
646        out.push_str("\nHot files by room:\n");
647        let mut by_room: std::collections::BTreeMap<&str, Vec<&crate::memory::vein::VeinHotFile>> =
648            std::collections::BTreeMap::new();
649        for file in &snapshot.hot_files {
650            by_room.entry(file.room.as_str()).or_default().push(file);
651        }
652        for (room, files) in by_room {
653            out.push_str(&format!("[{}]\n", room));
654            for file in files {
655                out.push_str(&format!(
656                    "- {} [{} edit{}]\n",
657                    file.path,
658                    file.heat,
659                    if file.heat == 1 { "" } else { "s" }
660                ));
661            }
662        }
663
664        out
665    }
666
667    fn latest_user_prompt(&self) -> Option<&str> {
668        self.history
669            .iter()
670            .rev()
671            .find(|msg| msg.role == "user")
672            .map(|msg| msg.content.as_str())
673    }
674
675    async fn emit_direct_response(
676        &mut self,
677        tx: &mpsc::Sender<InferenceEvent>,
678        raw_user_input: &str,
679        effective_user_input: &str,
680        response: &str,
681    ) {
682        self.history.push(ChatMessage::user(effective_user_input));
683        self.history.push(ChatMessage::assistant_text(response));
684        self.transcript.log_user(raw_user_input);
685        self.transcript.log_agent(response);
686        for chunk in chunk_text(response, 8) {
687            if !chunk.is_empty() {
688                let _ = tx.send(InferenceEvent::Token(chunk)).await;
689            }
690        }
691        let _ = tx.send(InferenceEvent::Done).await;
692        self.trim_history(80);
693        self.refresh_session_memory();
694        self.save_session();
695    }
696
697    async fn emit_operator_checkpoint(
698        &mut self,
699        tx: &mpsc::Sender<InferenceEvent>,
700        state: OperatorCheckpointState,
701        summary: impl Into<String>,
702    ) {
703        let summary = summary.into();
704        self.session_memory
705            .record_checkpoint(state.label(), summary.clone());
706        let _ = tx
707            .send(InferenceEvent::OperatorCheckpoint { state, summary })
708            .await;
709    }
710
711    async fn emit_recovery_recipe_summary(
712        &mut self,
713        tx: &mpsc::Sender<InferenceEvent>,
714        state: impl Into<String>,
715        summary: impl Into<String>,
716    ) {
717        let state = state.into();
718        let summary = summary.into();
719        self.session_memory.record_recovery(state, summary.clone());
720        let _ = tx.send(InferenceEvent::RecoveryRecipe { summary }).await;
721    }
722
723    async fn emit_provider_live(&mut self, tx: &mpsc::Sender<InferenceEvent>) {
724        let _ = tx
725            .send(InferenceEvent::ProviderStatus {
726                state: ProviderRuntimeState::Live,
727                summary: String::new(),
728            })
729            .await;
730        self.emit_operator_checkpoint(tx, OperatorCheckpointState::Idle, "")
731            .await;
732    }
733
734    async fn emit_prompt_pressure_for_messages(
735        &self,
736        tx: &mpsc::Sender<InferenceEvent>,
737        messages: &[ChatMessage],
738    ) {
739        let context_length = self.engine.current_context_length();
740        let (estimated_input_tokens, reserved_output_tokens, estimated_total_tokens, percent) =
741            crate::agent::inference::estimate_prompt_pressure(
742                messages,
743                &self.tools,
744                context_length,
745            );
746        let _ = tx
747            .send(InferenceEvent::PromptPressure {
748                estimated_input_tokens,
749                reserved_output_tokens,
750                estimated_total_tokens,
751                context_length,
752                percent,
753            })
754            .await;
755    }
756
757    async fn emit_prompt_pressure_idle(&self, tx: &mpsc::Sender<InferenceEvent>) {
758        let context_length = self.engine.current_context_length();
759        let _ = tx
760            .send(InferenceEvent::PromptPressure {
761                estimated_input_tokens: 0,
762                reserved_output_tokens: 0,
763                estimated_total_tokens: 0,
764                context_length,
765                percent: 0,
766            })
767            .await;
768    }
769
770    async fn emit_compaction_pressure(&self, tx: &mpsc::Sender<InferenceEvent>) {
771        let context_length = self.engine.current_context_length();
772        let vram_ratio = self.gpu_state.ratio();
773        let config = CompactionConfig::adaptive(context_length, vram_ratio);
774        let estimated_tokens = compaction::estimate_compactable_tokens(&self.history);
775        let percent = if config.max_estimated_tokens == 0 {
776            0
777        } else {
778            ((estimated_tokens.saturating_mul(100)) / config.max_estimated_tokens).min(100) as u8
779        };
780
781        let _ = tx
782            .send(InferenceEvent::CompactionPressure {
783                estimated_tokens,
784                threshold_tokens: config.max_estimated_tokens,
785                percent,
786            })
787            .await;
788    }
789
790    async fn refresh_runtime_profile_and_report(
791        &mut self,
792        tx: &mpsc::Sender<InferenceEvent>,
793        reason: &str,
794    ) -> Option<(String, usize, bool)> {
795        let refreshed = self.engine.refresh_runtime_profile().await;
796        if let Some((model_id, context_length, changed)) = refreshed.as_ref() {
797            let _ = tx
798                .send(InferenceEvent::RuntimeProfile {
799                    model_id: model_id.clone(),
800                    context_length: *context_length,
801                })
802                .await;
803            self.transcript.log_system(&format!(
804                "Runtime profile refresh ({}): model={} ctx={} changed={}",
805                reason, model_id, context_length, changed
806            ));
807        }
808        refreshed
809    }
810
811    pub fn new(
812        engine: Arc<InferenceEngine>,
813        professional: bool,
814        brief: bool,
815        snark: u8,
816        chaos: u8,
817        soul_personality: String,
818        fast_model: Option<String>,
819        think_model: Option<String>,
820        gpu_state: Arc<GpuState>,
821        git_state: Arc<crate::agent::git_monitor::GitState>,
822        swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
823        voice_manager: Arc<crate::ui::voice::VoiceManager>,
824    ) -> Self {
825        let (saved_summary, saved_memory) = load_session_data();
826
827        // Build the initial mcp_manager
828        let mcp_manager = Arc::new(tokio::sync::Mutex::new(
829            crate::agent::mcp_manager::McpManager::new(),
830        ));
831
832        // Build the initial system prompt using the canonical InferenceEngine path.
833        let dynamic_instructions =
834            engine.build_system_prompt(snark, chaos, brief, professional, &[], None, &[]);
835
836        let history = vec![ChatMessage::system(&dynamic_instructions)];
837
838        let vein_path = crate::tools::file_ops::workspace_root()
839            .join(".hematite")
840            .join("vein.db");
841        let vein_base_url = engine.base_url.clone();
842        let vein = crate::memory::vein::Vein::new(&vein_path, vein_base_url.clone())
843            .unwrap_or_else(|_| crate::memory::vein::Vein::new(":memory:", vein_base_url).unwrap());
844
845        Self {
846            history,
847            engine,
848            tools: get_tools(),
849            mcp_manager,
850            professional,
851            brief,
852            snark,
853            chaos,
854            fast_model,
855            think_model,
856            correction_hints: Vec::new(),
857            running_summary: saved_summary,
858            gpu_state,
859            vein,
860            transcript: crate::agent::transcript::TranscriptLogger::new(),
861            cancel_token: Arc::new(std::sync::atomic::AtomicBool::new(false)),
862            git_state,
863            think_mode: None,
864            workflow_mode: WorkflowMode::Auto,
865            session_memory: saved_memory,
866            swarm_coordinator,
867            voice_manager,
868            soul_personality,
869            lsp_manager: Arc::new(Mutex::new(crate::agent::lsp::manager::LspManager::new(
870                crate::tools::file_ops::workspace_root(),
871            ))),
872            reasoning_history: None,
873            pinned_files: Arc::new(Mutex::new(std::collections::HashMap::new())),
874            action_grounding: Arc::new(Mutex::new(ActionGroundingState::default())),
875            plan_execution_active: Arc::new(std::sync::atomic::AtomicBool::new(false)),
876            recovery_context: RecoveryContext::default(),
877            l1_context: None,
878        }
879    }
880
881    /// Index the project into The Vein. Call once after construction.
882    /// Uses block_in_place so the tokio runtime thread isn't parked.
883    pub fn initialize_vein(&mut self) -> usize {
884        self.refresh_vein_index()
885    }
886
887    fn save_session(&self) {
888        let path = session_path();
889        if let Some(parent) = path.parent() {
890            let _ = std::fs::create_dir_all(parent);
891        }
892        let saved = SavedSession {
893            running_summary: self.running_summary.clone(),
894            session_memory: self.session_memory.clone(),
895        };
896        if let Ok(json) = serde_json::to_string(&saved) {
897            let _ = std::fs::write(&path, json);
898        }
899    }
900
901    fn save_empty_session(&self) {
902        let path = session_path();
903        if let Some(parent) = path.parent() {
904            let _ = std::fs::create_dir_all(parent);
905        }
906        let saved = SavedSession {
907            running_summary: None,
908            session_memory: crate::agent::compaction::SessionMemory::default(),
909        };
910        if let Ok(json) = serde_json::to_string(&saved) {
911            let _ = std::fs::write(&path, json);
912        }
913    }
914
915    fn refresh_session_memory(&mut self) {
916        let current_plan = self.session_memory.current_plan.clone();
917        let previous_memory = self.session_memory.clone();
918        self.session_memory = compaction::extract_memory(&self.history);
919        self.session_memory.current_plan = current_plan;
920        self.session_memory
921            .inherit_runtime_ledger_from(&previous_memory);
922    }
923
924    fn build_chat_system_prompt(&self) -> String {
925        let species = &self.engine.species;
926        let personality = &self.soul_personality;
927        format!(
928            "You are {species}, a local AI companion running entirely on the user's GPU — no cloud, no subscriptions, no phoning home.\n\
929             {personality}\n\n\
930             This is CHAT mode — a clean conversational surface. Behave like a sharp friend who happens to know everything about code, not like an agent following a workflow.\n\n\
931             Rules:\n\
932             - Talk like a person. Skip the bullet-point breakdowns unless the topic genuinely needs structure.\n\
933             - Answer directly. One paragraph is usually right.\n\
934             - Don't call tools unless the user explicitly asks you to look at a file or run something.\n\
935             - Don't narrate your reasoning or mention tool names unprompted.\n\
936             - You can discuss code, debug ideas, explain concepts, help plan, or just talk.\n\
937             - If the user clearly wants you to edit or build something, do it — but lead with conversation, not scaffolding.\n\
938             - If the user wants the full coding harness, they can type `/agent`.\n",
939        )
940    }
941
942    fn append_session_handoff(&self, system_msg: &mut String) {
943        let has_summary = self
944            .running_summary
945            .as_ref()
946            .map(|s| !s.trim().is_empty())
947            .unwrap_or(false);
948        let has_memory = self.session_memory.has_signal();
949
950        if !has_summary && !has_memory {
951            return;
952        }
953
954        system_msg.push_str(
955            "\n\n# LIGHTWEIGHT SESSION HANDOFF\n\
956             This is compact carry-over from earlier work on this machine.\n\
957             Use it only when it helps the current request.\n\
958             Prefer current repository state, pinned files, and fresh tool results over stale session memory.\n",
959        );
960
961        if has_memory {
962            system_msg.push_str("\n## Active Task Memory\n");
963            system_msg.push_str(&self.session_memory.to_prompt());
964        }
965
966        if let Some(summary) = self.running_summary.as_deref() {
967            if !summary.trim().is_empty() {
968                system_msg.push_str("\n## Compacted Session Summary\n");
969                system_msg.push_str(summary);
970                system_msg.push('\n');
971            }
972        }
973    }
974
975    fn set_workflow_mode(&mut self, mode: WorkflowMode) {
976        self.workflow_mode = mode;
977    }
978
979    fn current_plan_summary(&self) -> Option<String> {
980        self.session_memory
981            .current_plan
982            .as_ref()
983            .filter(|plan| plan.has_signal())
984            .map(|plan| plan.summary_line())
985    }
986
987    fn current_plan_allowed_paths(&self) -> Vec<String> {
988        self.session_memory
989            .current_plan
990            .as_ref()
991            .map(|plan| {
992                plan.target_files
993                    .iter()
994                    .map(|path| normalize_workspace_path(path))
995                    .collect()
996            })
997            .unwrap_or_default()
998    }
999
1000    fn persist_architect_handoff(&mut self, response: &str) {
1001        if self.workflow_mode != WorkflowMode::Architect {
1002            return;
1003        }
1004        let Some(plan) = crate::tools::plan::parse_plan_handoff(response) else {
1005            return;
1006        };
1007        let _ = crate::tools::plan::save_plan_handoff(&plan);
1008        self.session_memory.current_plan = Some(plan);
1009    }
1010
1011    async fn begin_grounded_turn(&self) -> u64 {
1012        let mut state = self.action_grounding.lock().await;
1013        state.turn_index += 1;
1014        state.turn_index
1015    }
1016
1017    async fn reset_action_grounding(&self) {
1018        let mut state = self.action_grounding.lock().await;
1019        *state = ActionGroundingState::default();
1020    }
1021
1022    async fn record_read_observation(&self, path: &str) {
1023        let normalized = normalize_workspace_path(path);
1024        let mut state = self.action_grounding.lock().await;
1025        let turn = state.turn_index;
1026        // read_file returns full file content with line numbers — sufficient for
1027        // the model to know exact text before editing, so it satisfies the
1028        // line-inspection grounding check too.
1029        state.observed_paths.insert(normalized.clone(), turn);
1030        state.inspected_paths.insert(normalized, turn);
1031    }
1032
1033    async fn record_line_inspection(&self, path: &str) {
1034        let normalized = normalize_workspace_path(path);
1035        let mut state = self.action_grounding.lock().await;
1036        let turn = state.turn_index;
1037        state.observed_paths.insert(normalized.clone(), turn);
1038        state.inspected_paths.insert(normalized, turn);
1039    }
1040
1041    async fn record_verify_build_result(&self, ok: bool, output: &str) {
1042        let mut state = self.action_grounding.lock().await;
1043        let turn = state.turn_index;
1044        state.last_verify_build_turn = Some(turn);
1045        state.last_verify_build_ok = ok;
1046        if ok {
1047            state.code_changed_since_verify = false;
1048            state.last_failed_build_paths.clear();
1049        } else {
1050            state.last_failed_build_paths = parse_failing_paths_from_build_output(output);
1051        }
1052    }
1053
1054    fn record_session_verification(&mut self, ok: bool, summary: impl Into<String>) {
1055        self.session_memory.record_verification(ok, summary);
1056    }
1057
1058    async fn record_successful_mutation(&self, path: Option<&str>) {
1059        let mut state = self.action_grounding.lock().await;
1060        state.code_changed_since_verify = match path {
1061            Some(p) => is_code_like_path(p),
1062            None => true,
1063        };
1064    }
1065
1066    async fn validate_action_preconditions(&self, name: &str, args: &Value) -> Result<(), String> {
1067        if self
1068            .plan_execution_active
1069            .load(std::sync::atomic::Ordering::SeqCst)
1070        {
1071            if is_current_plan_irrelevant_tool(name) {
1072                return Err(format!(
1073                    "Action blocked: `{}` is not part of current-plan execution. Stay on the saved target files, use built-in workspace file tools only, and either make a concrete edit or surface one specific blocker.",
1074                    name
1075                ));
1076            }
1077
1078            if name == "map_project" {
1079                return Err(
1080                    "Action blocked: `map_project` is too broad for current-plan execution. Use the target files from the saved plan and inspect them directly with built-in workspace tools."
1081                        .to_string(),
1082                );
1083            }
1084
1085            if is_plan_scoped_tool(name) {
1086                let allowed_paths = self.current_plan_allowed_paths();
1087                if !allowed_paths.is_empty() {
1088                    let in_allowed = match name {
1089                        "auto_pin_context" => args
1090                            .get("paths")
1091                            .and_then(|v| v.as_array())
1092                            .map(|paths| {
1093                                !paths.is_empty()
1094                                    && paths.iter().all(|v| {
1095                                        v.as_str()
1096                                            .map(normalize_workspace_path)
1097                                            .map(|p| allowed_paths.contains(&p))
1098                                            .unwrap_or(false)
1099                                    })
1100                            })
1101                            .unwrap_or(false),
1102                        "grep_files" | "list_files" => args
1103                            .get("path")
1104                            .and_then(|v| v.as_str())
1105                            .map(normalize_workspace_path)
1106                            .map(|p| allowed_paths.contains(&p))
1107                            .unwrap_or(false),
1108                        _ => action_target_path(name, args)
1109                            .map(|p| allowed_paths.contains(&p))
1110                            .unwrap_or(false),
1111                    };
1112
1113                    if !in_allowed {
1114                        let allowed = allowed_paths
1115                            .iter()
1116                            .map(|p| format!("`{}`", p))
1117                            .collect::<Vec<_>>()
1118                            .join(", ");
1119                        return Err(format!(
1120                            "Action blocked: current-plan execution is locked to the saved target files. Use a path-scoped built-in tool on one of these files only: {}.",
1121                            allowed
1122                        ));
1123                    }
1124                }
1125            }
1126
1127            if matches!(name, "edit_file" | "multi_search_replace" | "patch_hunk") {
1128                if let Some(target) = action_target_path(name, args) {
1129                    let state = self.action_grounding.lock().await;
1130                    let recently_inspected = state
1131                        .inspected_paths
1132                        .get(&target)
1133                        .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1134                        .unwrap_or(false);
1135                    drop(state);
1136                    if !recently_inspected {
1137                        return Err(format!(
1138                            "Action blocked: `{}` on '{}' requires an exact local line window first during current-plan execution. Use `inspect_lines` on that file around the intended edit region, then retry the mutation.",
1139                            name, target
1140                        ));
1141                    }
1142                }
1143            }
1144        }
1145
1146        if self.workflow_mode.is_read_only() && name == "auto_pin_context" {
1147            return Err(
1148                "Action blocked: `auto_pin_context` is disabled in read-only workflows. Use the grounded file evidence you already have, or narrow with `inspect_lines` instead of pinning more files into active context."
1149                    .to_string(),
1150            );
1151        }
1152
1153        if self.workflow_mode.is_read_only() && is_destructive_tool(name) {
1154            if name == "shell" {
1155                let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1156                let risk = crate::tools::guard::classify_bash_risk(command);
1157                if !matches!(risk, crate::tools::RiskLevel::Safe) {
1158                    return Err(format!(
1159                        "Action blocked: workflow mode `{}` is read-only for risky or mutating operations. Switch to `/code` or `/auto` before making changes.",
1160                        self.workflow_mode.label()
1161                    ));
1162                }
1163            } else {
1164                return Err(format!(
1165                    "Action blocked: workflow mode `{}` is read-only. Use `/code` to implement changes or `/auto` to leave mode selection to Hematite.",
1166                    self.workflow_mode.label()
1167                ));
1168            }
1169        }
1170
1171        let normalized_target = action_target_path(name, args);
1172        if let Some(target) = normalized_target.as_deref() {
1173            if matches!(
1174                name,
1175                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1176            ) {
1177                if let Some(prompt) = self.latest_user_prompt() {
1178                    if docs_edit_without_explicit_request(prompt, target) {
1179                        return Err(format!(
1180                            "Action blocked: '{}' is a docs file but the current request did not explicitly ask for documentation changes. Finish the code task first. If docs need updating, the user will ask.",
1181                            target
1182                        ));
1183                    }
1184                }
1185            }
1186            let path_exists = std::path::Path::new(target).exists();
1187            if path_exists {
1188                let state = self.action_grounding.lock().await;
1189                let pinned = self.pinned_files.lock().await;
1190                let pinned_match = pinned.keys().any(|p| normalize_workspace_path(p) == target);
1191                drop(pinned);
1192
1193                // edit_file and multi_search_replace match text exactly, so they need a
1194                // tighter evidence bar than a plain read. Require inspect_lines on the
1195                // target within the last 3 turns. A read_file in the *same* turn is also
1196                // accepted (the model just loaded the file and is making an immediate edit).
1197                let needs_exact_window = matches!(name, "edit_file" | "multi_search_replace");
1198                let recently_inspected = state
1199                    .inspected_paths
1200                    .get(target)
1201                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1202                    .unwrap_or(false);
1203                let same_turn_read = state
1204                    .observed_paths
1205                    .get(target)
1206                    .map(|turn| state.turn_index.saturating_sub(*turn) == 0)
1207                    .unwrap_or(false);
1208                let recent_observed = state
1209                    .observed_paths
1210                    .get(target)
1211                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1212                    .unwrap_or(false);
1213
1214                if needs_exact_window {
1215                    if !recently_inspected && !same_turn_read && !pinned_match {
1216                        return Err(format!(
1217                            "Action blocked: `{}` on '{}' requires a line-level inspection first. \
1218                             Use `inspect_lines` on the target region to get the exact current text \
1219                             (whitespace and indentation included), then retry the edit.",
1220                            name, target
1221                        ));
1222                    }
1223                } else if !recent_observed && !pinned_match {
1224                    return Err(format!(
1225                        "Action blocked: `{}` on '{}' requires recent file evidence. Use `read_file` or `inspect_lines` on that path first, or pin the file into active context.",
1226                        name, target
1227                    ));
1228                }
1229            }
1230        }
1231
1232        if is_mcp_mutating_tool(name) {
1233            return Err(format!(
1234                "Action blocked: `{}` is an external MCP mutation tool. For workspace file edits, prefer Hematite's built-in edit path (`read_file`/`inspect_lines` plus `patch_hunk`, `edit_file`, or `multi_search_replace`) unless the user explicitly requires MCP for that action.",
1235                name
1236            ));
1237        }
1238
1239        if is_mcp_workspace_read_tool(name) {
1240            return Err(format!(
1241                "Action blocked: `{}` is an external MCP filesystem read tool. For local workspace inspection, prefer Hematite's built-in read path (`read_file`, `inspect_lines`, `list_files`, or `grep_files`) unless the user explicitly requires MCP for that action.",
1242                name
1243            ));
1244        }
1245
1246        // Phase gate: if the build is broken, constrain edits to files that cargo flagged.
1247        // This prevents the model from wandering to unrelated files after a failed verify.
1248        if matches!(
1249            name,
1250            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1251        ) {
1252            if let Some(target) = normalized_target.as_deref() {
1253                let state = self.action_grounding.lock().await;
1254                if state.code_changed_since_verify
1255                    && !state.last_verify_build_ok
1256                    && !state.last_failed_build_paths.is_empty()
1257                    && !state.last_failed_build_paths.iter().any(|p| p == target)
1258                {
1259                    let files = state
1260                        .last_failed_build_paths
1261                        .iter()
1262                        .map(|p| format!("`{}`", p))
1263                        .collect::<Vec<_>>()
1264                        .join(", ");
1265                    return Err(format!(
1266                        "Action blocked: the build is broken. Fix the errors in {} before editing other files. Run `verify_build` to confirm the fix, then continue.",
1267                        files
1268                    ));
1269                }
1270            }
1271        }
1272
1273        if name == "git_commit" || name == "git_push" {
1274            let state = self.action_grounding.lock().await;
1275            if state.code_changed_since_verify && !state.last_verify_build_ok {
1276                return Err(format!(
1277                    "Action blocked: `{}` requires a successful `verify_build` after the latest code edits. Run verification first so Hematite has proof that the tree is build-clean.",
1278                    name
1279                ));
1280            }
1281        }
1282
1283        if name == "shell" {
1284            let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1285            if let Some(prompt) = self.latest_user_prompt() {
1286                if let Some(topic) = preferred_host_inspection_topic(prompt) {
1287                    if shell_looks_like_structured_host_inspection(command) {
1288                        return Err(format!(
1289                            "Action blocked: this is a host-inspection question. Prefer `inspect_host(topic: \"{}\")` instead of raw `shell` for PATH, toolchains, environment/package-manager health, network state, service state, running processes, desktop, Downloads, listening ports, repo-doctor checks, or directory/disk summaries. Use `shell` only if `inspect_host` cannot answer the question directly.",
1290                            topic
1291                        ));
1292                    }
1293                }
1294            }
1295            let reason = args
1296                .get("reason")
1297                .and_then(|v| v.as_str())
1298                .unwrap_or("")
1299                .trim();
1300            let risk = crate::tools::guard::classify_bash_risk(command);
1301            if !matches!(risk, crate::tools::RiskLevel::Safe) && reason.is_empty() {
1302                return Err(
1303                    "Action blocked: risky `shell` calls require a concrete `reason` argument that explains what is being verified or changed."
1304                        .to_string(),
1305                );
1306            }
1307        }
1308
1309        Ok(())
1310    }
1311
1312    fn build_action_receipt(
1313        &self,
1314        name: &str,
1315        args: &Value,
1316        output: &str,
1317        is_error: bool,
1318    ) -> Option<ChatMessage> {
1319        if is_error || !is_destructive_tool(name) {
1320            return None;
1321        }
1322
1323        let mut receipt = String::from("[ACTION RECEIPT]\n");
1324        receipt.push_str(&format!("- tool: {}\n", name));
1325        if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
1326            receipt.push_str(&format!("- target: {}\n", path));
1327        }
1328        if name == "shell" {
1329            if let Some(command) = args.get("command").and_then(|v| v.as_str()) {
1330                receipt.push_str(&format!("- command: {}\n", command));
1331            }
1332            if let Some(reason) = args.get("reason").and_then(|v| v.as_str()) {
1333                if !reason.trim().is_empty() {
1334                    receipt.push_str(&format!("- reason: {}\n", reason.trim()));
1335                }
1336            }
1337        }
1338        let first_line = output.lines().next().unwrap_or(output).trim();
1339        receipt.push_str(&format!("- outcome: {}\n", first_line));
1340        Some(ChatMessage::system(&receipt))
1341    }
1342
1343    fn replace_mcp_tool_definitions(&mut self, mcp_tools: &[crate::agent::mcp::McpTool]) {
1344        self.tools
1345            .retain(|tool| !tool.function.name.starts_with("mcp__"));
1346        self.tools
1347            .extend(mcp_tools.iter().map(|tool| ToolDefinition {
1348                tool_type: "function".into(),
1349                function: ToolFunction {
1350                    name: tool.name.clone(),
1351                    description: tool.description.clone().unwrap_or_default(),
1352                    parameters: tool.input_schema.clone(),
1353                },
1354                metadata: crate::agent::inference::tool_metadata_for_name(&tool.name),
1355            }));
1356    }
1357
1358    async fn emit_mcp_runtime_status(&self, tx: &mpsc::Sender<InferenceEvent>) {
1359        let summary = {
1360            let mcp = self.mcp_manager.lock().await;
1361            mcp.runtime_report()
1362        };
1363        let _ = tx
1364            .send(InferenceEvent::McpStatus {
1365                state: summary.state,
1366                summary: summary.summary,
1367            })
1368            .await;
1369    }
1370
1371    async fn refresh_mcp_tools(
1372        &mut self,
1373        tx: &mpsc::Sender<InferenceEvent>,
1374    ) -> Result<Vec<crate::agent::mcp::McpTool>, Box<dyn std::error::Error + Send + Sync>> {
1375        let mcp_tools = {
1376            let mut mcp = self.mcp_manager.lock().await;
1377            match mcp.initialize_all().await {
1378                Ok(()) => mcp.discover_tools().await,
1379                Err(e) => {
1380                    drop(mcp);
1381                    self.replace_mcp_tool_definitions(&[]);
1382                    self.emit_mcp_runtime_status(tx).await;
1383                    return Err(e.into());
1384                }
1385            }
1386        };
1387
1388        self.replace_mcp_tool_definitions(&mcp_tools);
1389        self.emit_mcp_runtime_status(tx).await;
1390        Ok(mcp_tools)
1391    }
1392
1393    /// Spawns and initializes all configured MCP servers, discovering their tools.
1394    pub async fn initialize_mcp(
1395        &mut self,
1396        tx: &mpsc::Sender<InferenceEvent>,
1397    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1398        let _ = self.refresh_mcp_tools(tx).await?;
1399        Ok(())
1400    }
1401
1402    /// Run one user turn through the full agentic loop.
1403    ///
1404    /// Adds the user message, calls the model, executes any tools, and loops
1405    /// until the model produces a final text reply.  All progress is streamed
1406    /// as `InferenceEvent` values via `tx`.
1407    pub async fn run_turn(
1408        &mut self,
1409        user_turn: &UserTurn,
1410        tx: mpsc::Sender<InferenceEvent>,
1411        yolo: bool,
1412    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1413        let user_input = user_turn.text.as_str();
1414        // ── Fast-path reset commands: handled locally, no network I/O needed ──
1415        if user_input.trim() == "/new" {
1416            self.history.clear();
1417            self.reasoning_history = None;
1418            self.session_memory.clear();
1419            self.running_summary = None;
1420            self.correction_hints.clear();
1421            self.pinned_files.lock().await.clear();
1422            self.reset_action_grounding().await;
1423            reset_task_files();
1424            let _ = std::fs::remove_file(session_path());
1425            self.save_empty_session();
1426            self.emit_compaction_pressure(&tx).await;
1427            self.emit_prompt_pressure_idle(&tx).await;
1428            for chunk in chunk_text(
1429                "Fresh task context started. Chat history, pins, and task files cleared. Saved memory remains available.",
1430                8,
1431            ) {
1432                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1433            }
1434            let _ = tx.send(InferenceEvent::Done).await;
1435            return Ok(());
1436        }
1437
1438        if user_input.trim() == "/forget" {
1439            self.history.clear();
1440            self.reasoning_history = None;
1441            self.session_memory.clear();
1442            self.running_summary = None;
1443            self.correction_hints.clear();
1444            self.pinned_files.lock().await.clear();
1445            self.reset_action_grounding().await;
1446            reset_task_files();
1447            purge_persistent_memory();
1448            tokio::task::block_in_place(|| self.vein.reset());
1449            let _ = std::fs::remove_file(session_path());
1450            self.save_empty_session();
1451            self.emit_compaction_pressure(&tx).await;
1452            self.emit_prompt_pressure_idle(&tx).await;
1453            for chunk in chunk_text(
1454                "Hard forget complete. Chat history, saved memory, task files, and the Vein index were purged.",
1455                8,
1456            ) {
1457                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1458            }
1459            let _ = tx.send(InferenceEvent::Done).await;
1460            return Ok(());
1461        }
1462
1463        if user_input.trim() == "/vein-inspect" {
1464            let indexed = self.refresh_vein_index();
1465            let report = self.build_vein_inspection_report(indexed);
1466            let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(1));
1467            let _ = tx
1468                .send(InferenceEvent::VeinStatus {
1469                    file_count: snapshot.indexed_source_files + snapshot.indexed_docs,
1470                    embedded_count: snapshot.embedded_source_doc_chunks,
1471                    docs_only: self.vein_docs_only_mode(),
1472                })
1473                .await;
1474            for chunk in chunk_text(&report, 8) {
1475                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1476            }
1477            let _ = tx.send(InferenceEvent::Done).await;
1478            return Ok(());
1479        }
1480
1481        if user_input.trim() == "/workspace-profile" {
1482            let root = crate::tools::file_ops::workspace_root();
1483            let _ = crate::agent::workspace_profile::ensure_workspace_profile(&root);
1484            let report = crate::agent::workspace_profile::profile_report(&root);
1485            for chunk in chunk_text(&report, 8) {
1486                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1487            }
1488            let _ = tx.send(InferenceEvent::Done).await;
1489            return Ok(());
1490        }
1491
1492        if user_input.trim() == "/vein-reset" {
1493            tokio::task::block_in_place(|| self.vein.reset());
1494            let _ = tx
1495                .send(InferenceEvent::VeinStatus {
1496                    file_count: 0,
1497                    embedded_count: 0,
1498                    docs_only: self.vein_docs_only_mode(),
1499                })
1500                .await;
1501            for chunk in chunk_text("Vein index cleared. Will rebuild on the next turn.", 8) {
1502                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1503            }
1504            let _ = tx.send(InferenceEvent::Done).await;
1505            return Ok(());
1506        }
1507
1508        // Reload config every turn (edits apply immediately, no restart needed).
1509        let config = crate::agent::config::load_config();
1510        self.recovery_context.clear();
1511        let manual_runtime_refresh = user_input.trim() == "/runtime-refresh";
1512        if !manual_runtime_refresh {
1513            if let Some((model_id, context_length, changed)) = self
1514                .refresh_runtime_profile_and_report(&tx, "turn_start")
1515                .await
1516            {
1517                if changed {
1518                    let _ = tx
1519                        .send(InferenceEvent::Thought(format!(
1520                            "Runtime refresh: using model `{}` with CTX {} for this turn.",
1521                            model_id, context_length
1522                        )))
1523                        .await;
1524                }
1525            }
1526        }
1527        self.emit_compaction_pressure(&tx).await;
1528        let current_model = self.engine.current_model();
1529        self.engine.set_gemma_native_formatting(
1530            crate::agent::config::effective_gemma_native_formatting(&config, &current_model),
1531        );
1532        let _turn_id = self.begin_grounded_turn().await;
1533        let _hook_runner = crate::agent::hooks::HookRunner::new(config.hooks.clone());
1534        let mcp_tools = match self.refresh_mcp_tools(&tx).await {
1535            Ok(tools) => tools,
1536            Err(e) => {
1537                let _ = tx
1538                    .send(InferenceEvent::Error(format!("MCP refresh failed: {}", e)))
1539                    .await;
1540                Vec::new()
1541            }
1542        };
1543
1544        // Apply config model overrides (config takes precedence over CLI flags).
1545        let effective_fast = config
1546            .fast_model
1547            .clone()
1548            .or_else(|| self.fast_model.clone());
1549        let effective_think = config
1550            .think_model
1551            .clone()
1552            .or_else(|| self.think_model.clone());
1553
1554        // ── /lsp: start language servers manually if needed ──────────────────
1555        if user_input.trim() == "/lsp" {
1556            let mut lsp = self.lsp_manager.lock().await;
1557            match lsp.start_servers().await {
1558                Ok(_) => {
1559                    let _ = tx
1560                        .send(InferenceEvent::MutedToken(
1561                            "LSP: Servers Initialized OK.".to_string(),
1562                        ))
1563                        .await;
1564                }
1565                Err(e) => {
1566                    let _ = tx
1567                        .send(InferenceEvent::Error(format!(
1568                            "LSP: Failed to start servers - {}",
1569                            e
1570                        )))
1571                        .await;
1572                }
1573            }
1574            let _ = tx.send(InferenceEvent::Done).await;
1575            return Ok(());
1576        }
1577
1578        if user_input.trim() == "/runtime-refresh" {
1579            match self
1580                .refresh_runtime_profile_and_report(&tx, "manual_command")
1581                .await
1582            {
1583                Some((model_id, context_length, changed)) => {
1584                    let msg = if changed {
1585                        format!(
1586                            "Runtime profile refreshed. Model: {} | CTX: {}",
1587                            model_id, context_length
1588                        )
1589                    } else {
1590                        format!(
1591                            "Runtime profile unchanged. Model: {} | CTX: {}",
1592                            model_id, context_length
1593                        )
1594                    };
1595                    for chunk in chunk_text(&msg, 8) {
1596                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
1597                    }
1598                }
1599                None => {
1600                    let _ = tx
1601                        .send(InferenceEvent::Error(
1602                            "Runtime refresh failed: LM Studio profile could not be read."
1603                                .to_string(),
1604                        ))
1605                        .await;
1606                }
1607            }
1608            let _ = tx.send(InferenceEvent::Done).await;
1609            return Ok(());
1610        }
1611
1612        if user_input.trim() == "/ask" {
1613            self.set_workflow_mode(WorkflowMode::Ask);
1614            for chunk in chunk_text(
1615                "Workflow mode: ASK. Stay read-only, explain, inspect, and answer without making changes.",
1616                8,
1617            ) {
1618                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1619            }
1620            let _ = tx.send(InferenceEvent::Done).await;
1621            return Ok(());
1622        }
1623
1624        if user_input.trim() == "/code" {
1625            self.set_workflow_mode(WorkflowMode::Code);
1626            let mut message =
1627                "Workflow mode: CODE. Make changes when needed, but keep proof-before-action and verification discipline.".to_string();
1628            if let Some(plan) = self.current_plan_summary() {
1629                message.push_str(&format!(" Current plan: {plan}."));
1630            }
1631            for chunk in chunk_text(&message, 8) {
1632                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1633            }
1634            let _ = tx.send(InferenceEvent::Done).await;
1635            return Ok(());
1636        }
1637
1638        if user_input.trim() == "/architect" {
1639            self.set_workflow_mode(WorkflowMode::Architect);
1640            let mut message =
1641                "Workflow mode: ARCHITECT. Plan, inspect, and shape the approach first. Do not mutate code unless the user explicitly asks to implement.".to_string();
1642            if let Some(plan) = self.current_plan_summary() {
1643                message.push_str(&format!(" Existing plan: {plan}."));
1644            }
1645            for chunk in chunk_text(&message, 8) {
1646                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1647            }
1648            let _ = tx.send(InferenceEvent::Done).await;
1649            return Ok(());
1650        }
1651
1652        if user_input.trim() == "/read-only" {
1653            self.set_workflow_mode(WorkflowMode::ReadOnly);
1654            for chunk in chunk_text(
1655                "Workflow mode: READ-ONLY. Analysis only. Do not modify files, run mutating shell commands, or commit changes.",
1656                8,
1657            ) {
1658                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1659            }
1660            let _ = tx.send(InferenceEvent::Done).await;
1661            return Ok(());
1662        }
1663
1664        if user_input.trim() == "/auto" {
1665            self.set_workflow_mode(WorkflowMode::Auto);
1666            for chunk in chunk_text(
1667                "Workflow mode: AUTO. Hematite will choose the narrowest effective path for the request.",
1668                8,
1669            ) {
1670                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1671            }
1672            let _ = tx.send(InferenceEvent::Done).await;
1673            return Ok(());
1674        }
1675
1676        if user_input.trim() == "/chat" {
1677            self.set_workflow_mode(WorkflowMode::Chat);
1678            let _ = tx.send(InferenceEvent::Done).await;
1679            return Ok(());
1680        }
1681
1682        if user_input.trim() == "/reroll" {
1683            let soul = crate::ui::hatch::generate_soul_random();
1684            self.snark = soul.snark;
1685            self.chaos = soul.chaos;
1686            self.soul_personality = soul.personality.clone();
1687            // Update the engine's species name so build_chat_system_prompt uses it
1688            // SAFETY: engine is Arc but species is a plain String field we own logically.
1689            // We use Arc::get_mut which only succeeds if this is the only strong ref.
1690            // If it fails (swarm workers hold refs), we fall back to a best-effort clone approach.
1691            let species = soul.species.clone();
1692            if let Some(eng) = Arc::get_mut(&mut self.engine) {
1693                eng.species = species.clone();
1694            }
1695            let shiny_tag = if soul.shiny { " 🌟 SHINY" } else { "" };
1696            let _ = tx
1697                .send(InferenceEvent::SoulReroll {
1698                    species: soul.species.clone(),
1699                    rarity: soul.rarity.label().to_string(),
1700                    shiny: soul.shiny,
1701                    personality: soul.personality.clone(),
1702                })
1703                .await;
1704            for chunk in chunk_text(
1705                &format!(
1706                    "A new companion awakens!\n[{}{}] {} — \"{}\"",
1707                    soul.rarity.label(),
1708                    shiny_tag,
1709                    soul.species,
1710                    soul.personality
1711                ),
1712                8,
1713            ) {
1714                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1715            }
1716            let _ = tx.send(InferenceEvent::Done).await;
1717            return Ok(());
1718        }
1719
1720        if user_input.trim() == "/agent" {
1721            self.set_workflow_mode(WorkflowMode::Auto);
1722            let _ = tx.send(InferenceEvent::Done).await;
1723            return Ok(());
1724        }
1725
1726        let mut effective_user_input = user_input.trim().to_string();
1727        if let Some((mode, rest)) = parse_inline_workflow_prompt(user_input) {
1728            self.set_workflow_mode(mode);
1729            effective_user_input = rest.to_string();
1730        }
1731        let transcript_user_input = transcript_user_turn_text(user_turn, &effective_user_input);
1732        effective_user_input = apply_turn_attachments(user_turn, &effective_user_input);
1733        let implement_current_plan = self.workflow_mode == WorkflowMode::Code
1734            && is_current_plan_execution_request(&effective_user_input)
1735            && self
1736                .session_memory
1737                .current_plan
1738                .as_ref()
1739                .map(|plan| plan.has_signal())
1740                .unwrap_or(false);
1741        self.plan_execution_active
1742            .store(implement_current_plan, std::sync::atomic::Ordering::SeqCst);
1743        let _plan_execution_guard = PlanExecutionGuard {
1744            flag: self.plan_execution_active.clone(),
1745        };
1746        let intent = classify_query_intent(self.workflow_mode, &effective_user_input);
1747
1748        // ── /think / /no_think: reasoning budget toggle ──────────────────────
1749        if let Some(answer_kind) = intent.direct_answer {
1750            match answer_kind {
1751                DirectAnswerKind::About => {
1752                    let response = build_about_answer();
1753                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1754                        .await;
1755                    return Ok(());
1756                }
1757                DirectAnswerKind::LanguageCapability => {
1758                    let response = build_language_capability_answer();
1759                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1760                        .await;
1761                    return Ok(());
1762                }
1763                DirectAnswerKind::UnsafeWorkflowPressure => {
1764                    let response = build_unsafe_workflow_pressure_answer();
1765                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1766                        .await;
1767                    return Ok(());
1768                }
1769                DirectAnswerKind::SessionMemory => {
1770                    let response = build_session_memory_answer();
1771                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1772                        .await;
1773                    return Ok(());
1774                }
1775                DirectAnswerKind::RecoveryRecipes => {
1776                    let response = build_recovery_recipes_answer();
1777                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1778                        .await;
1779                    return Ok(());
1780                }
1781                DirectAnswerKind::McpLifecycle => {
1782                    let response = build_mcp_lifecycle_answer();
1783                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1784                        .await;
1785                    return Ok(());
1786                }
1787                DirectAnswerKind::AuthorizationPolicy => {
1788                    let response = build_authorization_policy_answer();
1789                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1790                        .await;
1791                    return Ok(());
1792                }
1793                DirectAnswerKind::ToolClasses => {
1794                    let response = build_tool_classes_answer();
1795                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1796                        .await;
1797                    return Ok(());
1798                }
1799                DirectAnswerKind::ToolRegistryOwnership => {
1800                    let response = build_tool_registry_ownership_answer();
1801                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1802                        .await;
1803                    return Ok(());
1804                }
1805                DirectAnswerKind::SessionResetSemantics => {
1806                    let response = build_session_reset_semantics_answer();
1807                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1808                        .await;
1809                    return Ok(());
1810                }
1811                DirectAnswerKind::ProductSurface => {
1812                    let response = build_product_surface_answer();
1813                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1814                        .await;
1815                    return Ok(());
1816                }
1817                DirectAnswerKind::ReasoningSplit => {
1818                    let response = build_reasoning_split_answer();
1819                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1820                        .await;
1821                    return Ok(());
1822                }
1823                DirectAnswerKind::Identity => {
1824                    let response = build_identity_answer();
1825                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1826                        .await;
1827                    return Ok(());
1828                }
1829                DirectAnswerKind::WorkflowModes => {
1830                    let response = build_workflow_modes_answer();
1831                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1832                        .await;
1833                    return Ok(());
1834                }
1835                DirectAnswerKind::GemmaNative => {
1836                    let response = build_gemma_native_answer();
1837                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1838                        .await;
1839                    return Ok(());
1840                }
1841                DirectAnswerKind::GemmaNativeSettings => {
1842                    let response = build_gemma_native_settings_answer();
1843                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1844                        .await;
1845                    return Ok(());
1846                }
1847                DirectAnswerKind::VerifyProfiles => {
1848                    let response = build_verify_profiles_answer();
1849                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1850                        .await;
1851                    return Ok(());
1852                }
1853                DirectAnswerKind::Toolchain => {
1854                    let lower = effective_user_input.to_lowercase();
1855                    let topic = if (lower.contains("voice output") || lower.contains("voice"))
1856                        && (lower.contains("lag")
1857                            || lower.contains("behind visible text")
1858                            || lower.contains("latency"))
1859                    {
1860                        "voice_latency_plan"
1861                    } else {
1862                        "all"
1863                    };
1864                    let response =
1865                        crate::tools::toolchain::describe_toolchain(&serde_json::json!({
1866                            "topic": topic,
1867                            "question": effective_user_input,
1868                        }))
1869                        .await
1870                        .unwrap_or_else(|e| format!("Error: {}", e));
1871                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1872                        .await;
1873                    return Ok(());
1874                }
1875                DirectAnswerKind::ArchitectSessionResetPlan => {
1876                    let plan = build_architect_session_reset_plan();
1877                    let response = plan.to_markdown();
1878                    let _ = crate::tools::plan::save_plan_handoff(&plan);
1879                    self.session_memory.current_plan = Some(plan);
1880                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1881                        .await;
1882                    return Ok(());
1883                }
1884            }
1885        }
1886
1887        if matches!(
1888            self.workflow_mode,
1889            WorkflowMode::Ask | WorkflowMode::ReadOnly
1890        ) && looks_like_mutation_request(&effective_user_input)
1891        {
1892            let response = build_mode_redirect_answer(self.workflow_mode);
1893            self.history.push(ChatMessage::user(&effective_user_input));
1894            self.history.push(ChatMessage::assistant_text(&response));
1895            self.transcript.log_user(&transcript_user_input);
1896            self.transcript.log_agent(&response);
1897            for chunk in chunk_text(&response, 8) {
1898                if !chunk.is_empty() {
1899                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
1900                }
1901            }
1902            let _ = tx.send(InferenceEvent::Done).await;
1903            self.trim_history(80);
1904            self.refresh_session_memory();
1905            self.save_session();
1906            return Ok(());
1907        }
1908
1909        if user_input.trim() == "/think" {
1910            self.think_mode = Some(true);
1911            for chunk in chunk_text("Think mode: ON — full chain-of-thought enabled.", 8) {
1912                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1913            }
1914            let _ = tx.send(InferenceEvent::Done).await;
1915            return Ok(());
1916        }
1917        if user_input.trim() == "/no_think" {
1918            self.think_mode = Some(false);
1919            for chunk in chunk_text(
1920                "Think mode: OFF — fast mode enabled (no chain-of-thought).",
1921                8,
1922            ) {
1923                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1924            }
1925            let _ = tx.send(InferenceEvent::Done).await;
1926            return Ok(());
1927        }
1928
1929        // ── /pin: add file to active context ────────────────────────────────
1930        if user_input.trim_start().starts_with("/pin ") {
1931            let path = user_input.trim_start()[5..].trim();
1932            match std::fs::read_to_string(path) {
1933                Ok(content) => {
1934                    self.pinned_files
1935                        .lock()
1936                        .await
1937                        .insert(path.to_string(), content);
1938                    let msg = format!(
1939                        "Pinned: {} — this file is now locked in model context.",
1940                        path
1941                    );
1942                    for chunk in chunk_text(&msg, 8) {
1943                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
1944                    }
1945                }
1946                Err(e) => {
1947                    let _ = tx
1948                        .send(InferenceEvent::Error(format!(
1949                            "Failed to pin {}: {}",
1950                            path, e
1951                        )))
1952                        .await;
1953                }
1954            }
1955            let _ = tx.send(InferenceEvent::Done).await;
1956            return Ok(());
1957        }
1958
1959        // ── /unpin: remove file from active context ──────────────────────────
1960        if user_input.trim_start().starts_with("/unpin ") {
1961            let path = user_input.trim_start()[7..].trim();
1962            if self.pinned_files.lock().await.remove(path).is_some() {
1963                let msg = format!("Unpinned: {} — file removed from active context.", path);
1964                for chunk in chunk_text(&msg, 8) {
1965                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
1966                }
1967            } else {
1968                let _ = tx
1969                    .send(InferenceEvent::Error(format!(
1970                        "File {} was not pinned.",
1971                        path
1972                    )))
1973                    .await;
1974            }
1975            let _ = tx.send(InferenceEvent::Done).await;
1976            return Ok(());
1977        }
1978
1979        // ── Normal processing ───────────────────────────────────────────────
1980
1981        // Ensure MCP is initialized and tools are discovered for this turn.
1982        let tiny_context_mode = self.engine.current_context_length() <= 8_192;
1983        let mut base_prompt = self.engine.build_system_prompt(
1984            self.snark,
1985            self.chaos,
1986            self.brief,
1987            self.professional,
1988            &self.tools,
1989            self.reasoning_history.as_deref(),
1990            &mcp_tools,
1991        );
1992        if !tiny_context_mode {
1993            if let Some(hint) = &config.context_hint {
1994                if !hint.trim().is_empty() {
1995                    base_prompt.push_str(&format!(
1996                        "\n\n# Project Context (from .hematite/settings.json)\n{}",
1997                        hint
1998                    ));
1999                }
2000            }
2001            if let Some(profile_block) = crate::agent::workspace_profile::profile_prompt_block(
2002                &crate::tools::file_ops::workspace_root(),
2003            ) {
2004                base_prompt.push_str(&format!("\n\n{}", profile_block));
2005            }
2006            // L1: inject hot-files block if available (persists across sessions via vein.db).
2007            if let Some(ref l1) = self.l1_context {
2008                base_prompt.push_str(&format!("\n\n{}", l1));
2009            }
2010        }
2011        let grounded_trace_mode = intent.grounded_trace_mode
2012            || intent.primary_class == QueryIntentClass::RuntimeDiagnosis;
2013        let capability_mode =
2014            intent.capability_mode || intent.primary_class == QueryIntentClass::Capability;
2015        let toolchain_mode =
2016            intent.toolchain_mode || intent.primary_class == QueryIntentClass::Toolchain;
2017        let host_inspection_mode = intent.host_inspection_mode;
2018        let fix_plan_mode =
2019            preferred_host_inspection_topic(&effective_user_input) == Some("fix_plan");
2020        let project_map_mode = intent.preserve_project_map_output
2021            || intent.primary_class == QueryIntentClass::RepoArchitecture;
2022        let architecture_overview_mode = intent.architecture_overview_mode;
2023        let capability_needs_repo = intent.capability_needs_repo;
2024        let mut system_msg = build_system_with_corrections(
2025            &base_prompt,
2026            &self.correction_hints,
2027            &self.gpu_state,
2028            &self.git_state,
2029            &config,
2030        );
2031        if tiny_context_mode {
2032            system_msg.push_str(
2033                "\n\n# TINY CONTEXT TURN MODE\n\
2034                 Keep this turn compact. Prefer direct answers or one narrow tool step over broad exploration.\n",
2035            );
2036        }
2037        if !tiny_context_mode && grounded_trace_mode {
2038            system_msg.push_str(
2039                "\n\n# GROUNDED TRACE MODE\n\
2040                 This turn is read-only architecture analysis unless the user explicitly asks otherwise.\n\
2041                 Before answering trace, architecture, or control-flow questions, inspect the repo with real tools.\n\
2042                 Use verified file paths, function names, structs, enums, channels, and event types only.\n\
2043                 Prefer `trace_runtime_flow` for runtime wiring, session reset, startup, or reasoning/specular questions.\n\
2044                 Treat `trace_runtime_flow` output as authoritative over your own memory.\n\
2045                 If `trace_runtime_flow` fully answers the question, preserve its identifiers exactly and do not rename them in a styled rewrite.\n\
2046                 Do not invent names such as synthetic channels or subsystems.\n\
2047                 If a detail is not verified from the code or tool output, say `uncertain`.\n\
2048                For exact flow questions, answer in ordered steps and name the concrete functions and event types involved.\n"
2049            );
2050        }
2051        if !tiny_context_mode && capability_mode {
2052            system_msg.push_str(
2053                "\n\n# CAPABILITY QUESTION MODE\n\
2054                 This is a product or capability question unless the user explicitly asks about repository implementation.\n\
2055                 Answer from stable Hematite capabilities and current runtime state.\n\
2056                 It is correct to mention that Hematite itself is built in Rust when relevant, but do not imply that its project support is limited to Rust.\n\
2057                 Do NOT call repo-inspection tools like `map_project`, `read_file`, or LSP lookup tools unless the user explicitly asks about implementation or file ownership.\n\
2058                 Do NOT infer language or project support from unrelated dependencies, crates, or config files.\n\
2059                 Describe language and project support in terms of real mechanisms: reading files, editing code, searching the workspace, running shell commands, build verification, language-aware tooling when available, web research, vision analysis, and optional MCP tools if configured.\n\
2060                 If the user asks about languages, answer at the harness level: Hematite can help across many project languages even though Hematite itself is written in Rust.\n\
2061                 Prefer real programming language examples like Python, JavaScript, TypeScript, Go, C#, or similar over file extensions like `.json` or `.md`.\n\
2062                 For project-building questions, describe cross-project workflows like scaffolding files, shaping structure, implementing features, and running the appropriate local build or test commands for the target stack. Do not overclaim certainty.\n\
2063                 Never mention raw `mcp__*` tool names unless those tools are active this turn and directly relevant.\n\
2064                 Keep the answer short, plain, and ASCII-first.\n"
2065            );
2066        }
2067        if !tiny_context_mode && toolchain_mode {
2068            system_msg.push_str(
2069                "\n\n# TOOLCHAIN DISCIPLINE MODE\n\
2070                 This turn is about Hematite's real built-in tools and how to choose them.\n\
2071                 Prefer `describe_toolchain` before you try to summarize tool capabilities or propose a read-only investigation plan from memory.\n\
2072                 Use only real built-in tool names.\n\
2073                 Do not invent helper tools, MCP tool names, synthetic symbols, or example function names.\n\
2074                 If `describe_toolchain` fully answers the question, preserve its output exactly instead of restyling it.\n\
2075                 Be explicit about which tools are optional or conditional.\n"
2076            );
2077        }
2078        if !tiny_context_mode && host_inspection_mode {
2079            system_msg.push_str(
2080                 "\n\n# HOST INSPECTION MODE\n\
2081                   This turn is about the local machine and environment, not repository architecture.\n\
2082                 Prefer `inspect_host` before raw `shell` for PATH analysis, installed developer tool versions, environment/package-manager health, grounded fix plans, network snapshots, service snapshots, process snapshots, desktop item counts, Downloads summaries, listening ports, repo-doctor checks, and directory/disk-size reports.\n\
2083                 Use the closest built-in topic first: `summary`, `toolchains`, `path`, `env_doctor`, `fix_plan`, `network`, `services`, `processes`, `desktop`, `downloads`, `ports`, `repo_doctor`, `directory`, or `disk`.\n\
2084                 If the user asks how to fix a common workstation problem such as `cargo not found`, `port 3000 already in use`, or `LM Studio not reachable`, use `fix_plan` first instead of `env_doctor`, `path`, or `ports`.\n\
2085                 If `env_doctor` answers the question, stop there. Do not follow with `path` unless the user explicitly asks for raw PATH entries.\n\
2086                 Only use `shell` if the host question truly goes beyond `inspect_host`.\n"
2087              );
2088        }
2089        if !tiny_context_mode && fix_plan_mode {
2090            system_msg.push_str(
2091                "\n\n# FIX PLAN MODE\n\
2092                 This turn is a workstation remediation question, not just a diagnosis question.\n\
2093                 Call `inspect_host` with `topic=fix_plan` first.\n\
2094                 Do not start with `path`, `toolchains`, `env_doctor`, or `ports` unless the user explicitly asks for diagnosis details instead of a fix plan.\n\
2095                 Keep the answer grounded, stepwise, and approval-aware.\n"
2096            );
2097        }
2098        if !tiny_context_mode && project_map_mode {
2099            system_msg.push_str(
2100                "\n\n# PROJECT MAP DISCIPLINE MODE\n\
2101                 For repository structure, entrypoint, owner-file, or architecture-map questions, prefer `map_project` first.\n\
2102                 If `map_project` provides likely entrypoints and core owner files, preserve that grounded structure instead of rewriting it into broad prose.\n\
2103                 Do not invent new entrypoints or owner files that are not present in the tool output.\n\
2104                 Keep the final answer compact and architecture-first.\n"
2105            );
2106        }
2107        if !tiny_context_mode && architecture_overview_mode {
2108            system_msg.push_str(
2109                "\n\n# ARCHITECTURE OVERVIEW DISCIPLINE MODE\n\
2110                 For broad runtime or architecture walkthroughs, prefer authoritative tools first: `trace_runtime_flow` for control flow and `map_project` for compact structure.\n\
2111                 Do not call `auto_pin_context` or `list_pinned` in read-only analysis. Avoid broad `read_file` calls unless the user explicitly asks for implementation detail in one named file.\n\
2112                 Preserve grounded tool output rather than restyling it into a larger answer.\n"
2113            );
2114        }
2115
2116        // ── Inject Pinned Files (Context Locking) ───────────────────────────
2117        system_msg.push_str(&format!(
2118            "\n\n# WORKFLOW MODE\nCURRENT WORKFLOW: {}\n",
2119            self.workflow_mode.label()
2120        ));
2121        if tiny_context_mode {
2122            system_msg
2123                .push_str("Use the narrowest safe behavior for this mode. Keep the turn short.\n");
2124        } else {
2125            match self.workflow_mode {
2126                WorkflowMode::Auto => system_msg.push_str(
2127                    "AUTO means choose the narrowest effective path for the request. Answer directly when stable product logic exists. Inspect before editing. Mutate only when the user is clearly asking for implementation.\n",
2128                ),
2129                WorkflowMode::Ask => system_msg.push_str(
2130                    "ASK means analysis only. Stay read-only, inspect the repo, explain findings, and do not make changes unless the user explicitly switches modes.\n",
2131                ),
2132                WorkflowMode::Code => system_msg.push_str(
2133                    "CODE means implementation is allowed when needed. Keep proof-before-action, verification, and edit precision discipline. If an active plan handoff exists in session memory or `.hematite/PLAN.md`, treat it as the implementation brief unless the user explicitly overrides it. For ordinary workspace inspection during implementation, use built-in read/edit tools first and do not reach for `mcp__filesystem__*` unless the user explicitly requires MCP.\n",
2134                ),
2135                WorkflowMode::Architect => system_msg.push_str(
2136                    "ARCHITECT means plan first. Inspect, reason, and produce a concrete implementation approach before editing. Do not mutate code unless the user explicitly asks to implement. When you produce an implementation handoff, use these exact ASCII headings so Hematite can persist the plan: `# Goal`, `# Target Files`, `# Ordered Steps`, `# Verification`, `# Risks`, `# Open Questions`.\n",
2137                ),
2138                WorkflowMode::ReadOnly => system_msg.push_str(
2139                    "READ-ONLY means analysis only. Do not modify files, run mutating shell commands, or commit changes.\n",
2140                ),
2141                WorkflowMode::Chat => {} // replaced by build_chat_system_prompt below
2142            }
2143        }
2144        if !tiny_context_mode && self.workflow_mode == WorkflowMode::Architect {
2145            system_msg.push_str("\n\n# ARCHITECT HANDOFF CONTRACT\n");
2146            system_msg.push_str(architect_handoff_contract());
2147            system_msg.push('\n');
2148        }
2149        if !tiny_context_mode && implement_current_plan {
2150            system_msg.push_str(
2151                "\n\n# CURRENT PLAN EXECUTION CONTRACT\n\
2152                 The user explicitly asked you to implement the current saved plan.\n\
2153                 Do not restate the plan, do not provide preliminary contracts, and do not stop at analysis.\n\
2154                 Use the saved plan as the brief, gather only the minimum built-in file evidence you need, then start editing the target files.\n\
2155                 Do not call `map_project` during current-plan execution.\n\
2156                 Every file inspection or edit call must be path-scoped to one of the saved target files.\n\
2157                 If a built-in workspace read tool gives you enough context, your next step should be mutation or a concrete blocking question, not another summary.\n",
2158            );
2159            if let Some(plan) = self.session_memory.current_plan.as_ref() {
2160                if !plan.target_files.is_empty() {
2161                    system_msg.push_str("\n# CURRENT PLAN TARGET FILES\n");
2162                    for path in &plan.target_files {
2163                        system_msg.push_str(&format!("- {}\n", path));
2164                    }
2165                }
2166            }
2167        }
2168        if !tiny_context_mode {
2169            let pinned = self.pinned_files.lock().await;
2170            if !pinned.is_empty() {
2171                system_msg.push_str("\n\n# ACTIVE CONTEXT (PINNED FILES)\n");
2172                system_msg.push_str("The following files are locked in your active memory for high-fidelity reference.\n\n");
2173                for (path, content) in pinned.iter() {
2174                    system_msg.push_str(&format!("## FILE: {}\n```\n{}\n```\n\n", path, content));
2175                }
2176            }
2177        }
2178        if !tiny_context_mode {
2179            self.append_session_handoff(&mut system_msg);
2180        }
2181        // In chat mode, replace the full harness prompt with a clean conversational surface.
2182        // The harness prompt (built above) is discarded — Rusty personality takes over.
2183        let system_msg = if self.workflow_mode.is_chat() {
2184            self.build_chat_system_prompt()
2185        } else {
2186            system_msg
2187        };
2188        if self.history.is_empty() || self.history[0].role != "system" {
2189            self.history.insert(0, ChatMessage::system(&system_msg));
2190        } else {
2191            self.history[0] = ChatMessage::system(&system_msg);
2192        }
2193
2194        // Ensure a clean state for the new turn.
2195        self.cancel_token
2196            .store(false, std::sync::atomic::Ordering::SeqCst);
2197
2198        // [Official Gemma-4 Spec] Purge reasoning history for new user turns.
2199        // History from previous turns must not be fed back into the prompt to prevent duplication.
2200        self.reasoning_history = None;
2201
2202        let is_gemma = crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2203        let user_content = match self.think_mode {
2204            Some(true) => format!("/think\n{}", effective_user_input),
2205            Some(false) => format!("/no_think\n{}", effective_user_input),
2206            // For non-Gemma models (Qwen etc.) default to /think so the model uses
2207            // hybrid thinking — it decides how much reasoning each turn needs.
2208            // Gemma handles reasoning via <|think|> in the system prompt instead.
2209            // Chat mode and quick tool calls skip /think — fast direct answers.
2210            None if !is_gemma
2211                && !self.workflow_mode.is_chat()
2212                && !is_quick_tool_request(&effective_user_input) =>
2213            {
2214                format!("/think\n{}", effective_user_input)
2215            }
2216            None => effective_user_input.clone(),
2217        };
2218        if let Some(image) = user_turn.attached_image.as_ref() {
2219            let image_url =
2220                crate::tools::vision::encode_image_as_data_url(std::path::Path::new(&image.path))
2221                    .map_err(|e| format!("Image attachment failed for {}: {}", image.name, e))?;
2222            self.history
2223                .push(ChatMessage::user_with_image(&user_content, &image_url));
2224        } else {
2225            self.history.push(ChatMessage::user(&user_content));
2226        }
2227        self.transcript.log_user(&transcript_user_input);
2228
2229        // Incremental re-index and Vein context injection. Ordinary chat mode
2230        // still skips repo-snippet noise, but docs-only workspaces and explicit
2231        // session-recall prompts should keep Vein memory available.
2232        let vein_docs_only = self.vein_docs_only_mode();
2233        let allow_vein_context = !self.workflow_mode.is_chat()
2234            || should_use_vein_in_chat(&effective_user_input, vein_docs_only);
2235        let (vein_context, vein_paths) = if allow_vein_context {
2236            self.refresh_vein_index();
2237            let _ = tx
2238                .send(InferenceEvent::VeinStatus {
2239                    file_count: self.vein.file_count(),
2240                    embedded_count: self.vein.embedded_chunk_count(),
2241                    docs_only: vein_docs_only,
2242                })
2243                .await;
2244            match self.build_vein_context(&effective_user_input) {
2245                Some((ctx, paths)) => (Some(ctx), paths),
2246                None => (None, Vec::new()),
2247            }
2248        } else {
2249            (None, Vec::new())
2250        };
2251        if !vein_paths.is_empty() {
2252            let _ = tx
2253                .send(InferenceEvent::VeinContext { paths: vein_paths })
2254                .await;
2255        }
2256
2257        // Route: pick fast vs think model based on the complexity of this request.
2258        let routed_model = route_model(
2259            &effective_user_input,
2260            effective_fast.as_deref(),
2261            effective_think.as_deref(),
2262        )
2263        .map(|s| s.to_string());
2264
2265        let mut loop_intervention: Option<String> = None;
2266        let mut implementation_started = false;
2267        let mut non_mutating_plan_steps = 0usize;
2268        let non_mutating_plan_soft_cap = 5usize;
2269        let non_mutating_plan_hard_cap = 8usize;
2270        let mut overview_project_map: Option<String> = None;
2271        let mut overview_runtime_trace: Option<String> = None;
2272
2273        // Safety cap – never spin forever on a broken model.
2274        let max_iters = 25;
2275        let mut consecutive_errors = 0;
2276        let mut first_iter = true;
2277        let _called_this_turn: std::collections::HashSet<String> = std::collections::HashSet::new();
2278        // Track identical tool results within this turn to detect logical loops.
2279        let _result_counts: std::collections::HashMap<String, usize> =
2280            std::collections::HashMap::new();
2281        // Track the count of identical (name, args) calls to detect infinite tool loops.
2282        let mut repeat_counts: std::collections::HashMap<String, usize> =
2283            std::collections::HashMap::new();
2284        let mut completed_tool_cache: std::collections::HashMap<String, CachedToolResult> =
2285            std::collections::HashMap::new();
2286        let mut successful_read_targets: std::collections::HashSet<String> =
2287            std::collections::HashSet::new();
2288        // (path, offset) pairs — catches repeated reads at the same non-zero offset.
2289        let mut successful_read_regions: std::collections::HashSet<(String, u64)> =
2290            std::collections::HashSet::new();
2291        let mut successful_grep_targets: std::collections::HashSet<String> =
2292            std::collections::HashSet::new();
2293        let mut no_match_grep_targets: std::collections::HashSet<String> =
2294            std::collections::HashSet::new();
2295        let mut broad_grep_targets: std::collections::HashSet<String> =
2296            std::collections::HashSet::new();
2297
2298        // Track the index of the message that started THIS turn, so compaction doesn't summarize it.
2299        let mut turn_anchor = self.history.len().saturating_sub(1);
2300
2301        for _iter in 0..max_iters {
2302            let mut mutation_occurred = false;
2303            // Priority Check: External Cancellation (via Esc key in TUI)
2304            if self.cancel_token.load(std::sync::atomic::Ordering::SeqCst) {
2305                self.cancel_token
2306                    .store(false, std::sync::atomic::Ordering::SeqCst);
2307                let _ = tx
2308                    .send(InferenceEvent::Thought("Turn cancelled by user.".into()))
2309                    .await;
2310                let _ = tx.send(InferenceEvent::Done).await;
2311                return Ok(());
2312            }
2313
2314            // ── Intelligence Surge: Proactive Compaction Check ──────────────────────
2315            if self
2316                .compact_history_if_needed(&tx, Some(turn_anchor))
2317                .await?
2318            {
2319                // After compaction, history is [system, summary, turn_anchor, ...]
2320                // The new turn_anchor is index 2.
2321                turn_anchor = 2;
2322            }
2323
2324            // On the first iteration inject Vein context into the system message.
2325            // Subsequent iterations use the plain slice — tool results are now in
2326            // history so Vein context would be redundant.
2327            let inject_vein = first_iter && !implement_current_plan;
2328            let messages = if implement_current_plan {
2329                first_iter = false;
2330                self.context_window_slice_from(turn_anchor)
2331            } else {
2332                first_iter = false;
2333                self.context_window_slice()
2334            };
2335
2336            // Use the canonical system prompt from history[0] which was built
2337            // by InferenceEngine::build_system_prompt() + build_system_with_corrections()
2338            // and includes GPU state, git context, permissions, and instruction files.
2339            let mut prompt_msgs = if let Some(intervention) = loop_intervention.take() {
2340                // Gemma 4 handles multiple system messages natively.
2341                // Standard models (Qwen, etc.) reject a second system message — merge into history[0].
2342                if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2343                    let mut msgs = vec![self.history[0].clone()];
2344                    msgs.push(ChatMessage::system(&intervention));
2345                    msgs
2346                } else {
2347                    let merged =
2348                        format!("{}\n\n{}", self.history[0].content.as_str(), intervention);
2349                    vec![ChatMessage::system(&merged)]
2350                }
2351            } else {
2352                vec![self.history[0].clone()]
2353            };
2354
2355            // Inject Vein context into the system message on the first iteration.
2356            // Vein results are merged in the same way as loop_intervention so standard
2357            // models (Qwen etc.) only ever see one system message.
2358            if inject_vein {
2359                if let Some(ref ctx) = vein_context.as_ref() {
2360                    if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2361                        prompt_msgs.push(ChatMessage::system(ctx));
2362                    } else {
2363                        let merged = format!("{}\n\n{}", prompt_msgs[0].content.as_str(), ctx);
2364                        prompt_msgs[0] = ChatMessage::system(&merged);
2365                    }
2366                }
2367            }
2368            prompt_msgs.extend(messages);
2369            if let Some(budget_note) =
2370                enforce_prompt_budget(&mut prompt_msgs, self.engine.current_context_length())
2371            {
2372                self.emit_operator_checkpoint(
2373                    &tx,
2374                    OperatorCheckpointState::BudgetReduced,
2375                    budget_note,
2376                )
2377                .await;
2378                let recipe = plan_recovery(
2379                    RecoveryScenario::PromptBudgetPressure,
2380                    &self.recovery_context,
2381                );
2382                self.emit_recovery_recipe_summary(
2383                    &tx,
2384                    recipe.recipe.scenario.label(),
2385                    compact_recovery_plan_summary(&recipe),
2386                )
2387                .await;
2388            }
2389            self.emit_prompt_pressure_for_messages(&tx, &prompt_msgs)
2390                .await;
2391
2392            let (mut text, mut tool_calls, usage, finish_reason) = match self
2393                .engine
2394                .call_with_tools(&prompt_msgs, &self.tools, routed_model.as_deref())
2395                .await
2396            {
2397                Ok(result) => result,
2398                Err(e) => {
2399                    let class = classify_runtime_failure(&e);
2400                    if should_retry_runtime_failure(class) {
2401                        if self.recovery_context.consume_transient_retry() {
2402                            let label = match class {
2403                                RuntimeFailureClass::ProviderDegraded => "provider_degraded",
2404                                _ => "empty_model_response",
2405                            };
2406                            self.transcript.log_system(&format!(
2407                                "Automatic provider recovery triggered: {}",
2408                                e.trim()
2409                            ));
2410                            self.emit_recovery_recipe_summary(
2411                                &tx,
2412                                label,
2413                                compact_runtime_recovery_summary(class),
2414                            )
2415                            .await;
2416                            let _ = tx
2417                                .send(InferenceEvent::ProviderStatus {
2418                                    state: ProviderRuntimeState::Recovering,
2419                                    summary: compact_runtime_recovery_summary(class).into(),
2420                                })
2421                                .await;
2422                            self.emit_operator_checkpoint(
2423                                &tx,
2424                                OperatorCheckpointState::RecoveringProvider,
2425                                compact_runtime_recovery_summary(class),
2426                            )
2427                            .await;
2428                            continue;
2429                        }
2430                    }
2431
2432                    self.emit_runtime_failure(&tx, class, &e).await;
2433                    break;
2434                }
2435            };
2436            self.emit_provider_live(&tx).await;
2437
2438            // Update TUI token counter with actual usage from LM Studio.
2439            if let Some(ref u) = usage {
2440                let _ = tx.send(InferenceEvent::UsageUpdate(u.clone())).await;
2441            }
2442
2443            // Fallback safety net: if native tool markup leaked past the inference-layer
2444            // extractor, recover it here instead of treating it as plain assistant text.
2445            if tool_calls
2446                .as_ref()
2447                .map(|calls| calls.is_empty())
2448                .unwrap_or(true)
2449            {
2450                if let Some(raw_text) = text.as_deref() {
2451                    let native_calls = crate::agent::inference::extract_native_tool_calls(raw_text);
2452                    if !native_calls.is_empty() {
2453                        tool_calls = Some(native_calls);
2454                        let stripped =
2455                            crate::agent::inference::strip_native_tool_call_text(raw_text);
2456                        text = if stripped.trim().is_empty() {
2457                            None
2458                        } else {
2459                            Some(stripped)
2460                        };
2461                    }
2462                }
2463            }
2464
2465            // Treat empty tool_calls arrays (Some(vec![])) the same as None –
2466            // the model returned text only; an empty array causes an infinite loop.
2467            let tool_calls = tool_calls.filter(|c| !c.is_empty());
2468            let near_context_ceiling = usage
2469                .as_ref()
2470                .map(|u| u.prompt_tokens >= (self.engine.current_context_length() * 82 / 100))
2471                .unwrap_or(false);
2472
2473            if let Some(calls) = tool_calls {
2474                let (calls, prune_trace_note) =
2475                    prune_architecture_trace_batch(calls, architecture_overview_mode);
2476                if let Some(note) = prune_trace_note {
2477                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2478                }
2479
2480                let (calls, prune_bloat_note) = prune_read_only_context_bloat_batch(
2481                    calls,
2482                    self.workflow_mode.is_read_only(),
2483                    architecture_overview_mode,
2484                );
2485                if let Some(note) = prune_bloat_note {
2486                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2487                }
2488
2489                let (calls, prune_note) = prune_authoritative_tool_batch(
2490                    calls,
2491                    grounded_trace_mode,
2492                    &effective_user_input,
2493                );
2494                if let Some(note) = prune_note {
2495                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2496                }
2497
2498                let (calls, batch_note) = order_batch_reads_first(calls);
2499                if let Some(note) = batch_note {
2500                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2501                }
2502
2503                if let Some(repeated_path) = calls
2504                    .iter()
2505                    .filter(|c| {
2506                        let parsed = serde_json::from_str::<Value>(
2507                            &crate::agent::inference::normalize_tool_argument_string(
2508                                &c.function.name,
2509                                &c.function.arguments,
2510                            ),
2511                        )
2512                        .ok();
2513                        let offset = parsed
2514                            .as_ref()
2515                            .and_then(|args| args.get("offset").and_then(|v| v.as_u64()))
2516                            .unwrap_or(0);
2517                        // Catch re-reads from the top (original behaviour) AND repeated
2518                        // reads at the exact same non-zero offset (new: catches targeted loops).
2519                        if offset < 200 {
2520                            return true;
2521                        }
2522                        if let Some(path) = parsed
2523                            .as_ref()
2524                            .and_then(|args| args.get("path").and_then(|v| v.as_str()))
2525                        {
2526                            let normalized = normalize_workspace_path(path);
2527                            return successful_read_regions.contains(&(normalized, offset));
2528                        }
2529                        false
2530                    })
2531                    .filter_map(|c| repeated_read_target(&c.function))
2532                    .find(|path| successful_read_targets.contains(path))
2533                {
2534                    loop_intervention = Some(format!(
2535                        "STOP. Already read `{}` this turn. Use `inspect_lines` on the relevant window or a specific `grep_files`, then continue.",
2536                        repeated_path
2537                    ));
2538                    let _ = tx
2539                        .send(InferenceEvent::Thought(
2540                            "Read discipline: preventing repeated full-file reads on the same path."
2541                                .into(),
2542                        ))
2543                        .await;
2544                    continue;
2545                }
2546
2547                if capability_mode
2548                    && !capability_needs_repo
2549                    && calls
2550                        .iter()
2551                        .all(|c| is_capability_probe_tool(&c.function.name))
2552                {
2553                    loop_intervention = Some(
2554                        "STOP. This is a stable capability question. Do not inspect the repository or call tools. \
2555                         Answer directly from verified Hematite capabilities, current runtime state, and the documented product boundary. \
2556                         Do not mention raw `mcp__*` names unless they are active and directly relevant."
2557                            .to_string(),
2558                    );
2559                    let _ = tx
2560                        .send(InferenceEvent::Thought(
2561                            "Capability mode: skipping unnecessary repo-inspection tools and answering directly."
2562                                .into(),
2563                        ))
2564                        .await;
2565                    continue;
2566                }
2567
2568                // VOCAL AGENT: If the model provided reasoning alongside tools,
2569                // stream it to the SPECULAR panel now using the hardened extraction.
2570                let raw_content = text.as_deref().unwrap_or(" ");
2571
2572                if let Some(thought) = crate::agent::inference::extract_think_block(raw_content) {
2573                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
2574                    // Reasoning is silent (hidden in SPECULAR only).
2575                    self.reasoning_history = Some(thought);
2576                }
2577
2578                // [Gemma-4 Protocol] Keep raw content (including thoughts) during tool loops.
2579                // Thoughts are only stripped before the 'final' user turn.
2580                let stored_tool_call_content = if implement_current_plan {
2581                    cap_output(raw_content, 1200)
2582                } else {
2583                    raw_content.to_string()
2584                };
2585                self.history.push(ChatMessage::assistant_tool_calls(
2586                    &stored_tool_call_content,
2587                    calls.clone(),
2588                ));
2589
2590                // ── LAYER 4: Parallel Tool Orchestration (Batching) ────────────────────
2591                let mut results = Vec::new();
2592                let gemma4_model =
2593                    crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2594                let latest_user_prompt = self.latest_user_prompt();
2595                let mut seen_call_keys = std::collections::HashSet::new();
2596                let mut deduped_calls = Vec::new();
2597                for call in calls.clone() {
2598                    let (normalized_name, normalized_args) = normalized_tool_call_for_execution(
2599                        &call.function.name,
2600                        &call.function.arguments,
2601                        gemma4_model,
2602                        latest_user_prompt,
2603                    );
2604                    let key = canonical_tool_call_key(&normalized_name, &normalized_args);
2605                    if seen_call_keys.insert(key) {
2606                        let repeat_guard_exempt = matches!(
2607                            normalized_name.as_str(),
2608                            "verify_build" | "git_commit" | "git_push"
2609                        );
2610                        if !repeat_guard_exempt {
2611                            if let Some(cached) = completed_tool_cache
2612                                .get(&canonical_tool_call_key(&normalized_name, &normalized_args))
2613                            {
2614                                let _ = tx
2615                                    .send(InferenceEvent::Thought(
2616                                        "Cached tool result reused: identical built-in invocation already completed earlier in this turn."
2617                                            .to_string(),
2618                                    ))
2619                                    .await;
2620                                loop_intervention = Some(format!(
2621                                    "STOP. You already called `{}` with identical arguments earlier in this turn and already have that result in conversation history. Do not call it again. Use the existing result to answer or choose a different next step.",
2622                                    cached.tool_name
2623                                ));
2624                                continue;
2625                            }
2626                        }
2627                        deduped_calls.push(call);
2628                    } else {
2629                        let _ = tx
2630                            .send(InferenceEvent::Thought(
2631                                "Duplicate tool call skipped: identical built-in invocation already ran this turn."
2632                                    .to_string(),
2633                            ))
2634                            .await;
2635                    }
2636                }
2637
2638                // Partition tool calls: Parallel Read vs Serial Mutating
2639                let (parallel_calls, serial_calls): (Vec<_>, Vec<_>) = deduped_calls
2640                    .into_iter()
2641                    .partition(|c| is_parallel_safe(&c.function.name));
2642
2643                // 1. Concurrent Execution (ParallelRead)
2644                if !parallel_calls.is_empty() {
2645                    let mut tasks = Vec::new();
2646                    for call in parallel_calls {
2647                        let tx_clone = tx.clone();
2648                        let config_clone = config.clone();
2649                        // Carry the real call ID into the outcome
2650                        let call_with_id = call.clone();
2651                        tasks.push(self.process_tool_call(
2652                            call_with_id.function,
2653                            config_clone,
2654                            yolo,
2655                            tx_clone,
2656                            call_with_id.id,
2657                        ));
2658                    }
2659                    // Wait for all read-only tasks to complete simultaneously.
2660                    results.extend(futures::future::join_all(tasks).await);
2661                }
2662
2663                // 2. Sequential Execution (SerialMutating)
2664                for call in serial_calls {
2665                    results.push(
2666                        self.process_tool_call(
2667                            call.function,
2668                            config.clone(),
2669                            yolo,
2670                            tx.clone(),
2671                            call.id,
2672                        )
2673                        .await,
2674                    );
2675                }
2676
2677                // 3. Collate Messages into History & UI
2678                let mut authoritative_tool_output: Option<String> = None;
2679                let mut blocked_policy_output: Option<String> = None;
2680                let mut recoverable_policy_intervention: Option<String> = None;
2681                let mut recoverable_policy_recipe: Option<RecoveryScenario> = None;
2682                let mut recoverable_policy_checkpoint: Option<(OperatorCheckpointState, String)> =
2683                    None;
2684                for res in results {
2685                    let call_id = res.call_id.clone();
2686                    let tool_name = res.tool_name.clone();
2687                    let final_output = res.output.clone();
2688                    let is_error = res.is_error;
2689                    for msg in res.msg_results {
2690                        self.history.push(msg);
2691                    }
2692
2693                    // Update State for Verification Loop
2694                    if matches!(
2695                        tool_name.as_str(),
2696                        "patch_hunk" | "write_file" | "edit_file" | "multi_search_replace"
2697                    ) {
2698                        mutation_occurred = true;
2699                        implementation_started = true;
2700                        // Heat tracking: bump L1 score for the edited file.
2701                        if !is_error {
2702                            let path = res.args.get("path").and_then(|v| v.as_str()).unwrap_or("");
2703                            if !path.is_empty() {
2704                                self.vein.bump_heat(path);
2705                                self.l1_context = self.vein.l1_context();
2706                            }
2707                        }
2708                    }
2709
2710                    if tool_name == "verify_build" {
2711                        self.record_session_verification(
2712                            !is_error
2713                                && (final_output.contains("BUILD OK")
2714                                    || final_output.contains("BUILD SUCCESS")
2715                                    || final_output.contains("BUILD OKAY")),
2716                            if is_error {
2717                                "Explicit verify_build failed."
2718                            } else {
2719                                "Explicit verify_build passed."
2720                            },
2721                        );
2722                    }
2723
2724                    // Update Repeat Guard
2725                    let call_key = format!(
2726                        "{}:{}",
2727                        tool_name,
2728                        serde_json::to_string(&res.args).unwrap_or_default()
2729                    );
2730                    let repeat_count = repeat_counts.entry(call_key.clone()).or_insert(0);
2731                    *repeat_count += 1;
2732
2733                    // verify_build is legitimately called multiple times in fix-verify loops.
2734                    let repeat_guard_exempt = matches!(
2735                        tool_name.as_str(),
2736                        "verify_build" | "git_commit" | "git_push"
2737                    );
2738                    if *repeat_count >= 3 && !repeat_guard_exempt {
2739                        loop_intervention = Some(format!(
2740                            "STOP. You have called `{}` with identical arguments {} times and keep getting the same result. \
2741                             Do not call it again. Either answer directly from what you already know, \
2742                             use a different tool or approach, or ask the user for clarification.",
2743                            tool_name, *repeat_count
2744                        ));
2745                        let _ = tx
2746                            .send(InferenceEvent::Thought(format!(
2747                                "Repeat guard: `{}` called {} times with same args — injecting stop intervention.",
2748                                tool_name, *repeat_count
2749                            )))
2750                            .await;
2751                    }
2752
2753                    if is_error {
2754                        consecutive_errors += 1;
2755                    } else {
2756                        consecutive_errors = 0;
2757                    }
2758
2759                    if consecutive_errors >= 3 {
2760                        loop_intervention = Some(
2761                            "CRITICAL: Repeated tool failures detected. You are likely stuck in a loop. \
2762                             STOP all tool calls immediately. Analyze why your previous 3 calls failed \
2763                             (check for hallucinations or invalid arguments) and ask the user for \
2764                             clarification if you cannot proceed.".to_string()
2765                        );
2766                    }
2767
2768                    if consecutive_errors >= 4 {
2769                        self.emit_runtime_failure(
2770                            &tx,
2771                            RuntimeFailureClass::ToolLoop,
2772                            "Hard termination: too many consecutive tool errors.",
2773                        )
2774                        .await;
2775                        return Ok(());
2776                    }
2777
2778                    let _ = tx
2779                        .send(InferenceEvent::ToolCallResult {
2780                            id: call_id.clone(),
2781                            name: tool_name.clone(),
2782                            output: final_output.clone(),
2783                            is_error,
2784                        })
2785                        .await;
2786
2787                    let repeat_guard_exempt = matches!(
2788                        tool_name.as_str(),
2789                        "verify_build" | "git_commit" | "git_push"
2790                    );
2791                    if !repeat_guard_exempt {
2792                        completed_tool_cache.insert(
2793                            canonical_tool_call_key(&tool_name, &res.args),
2794                            CachedToolResult {
2795                                tool_name: tool_name.clone(),
2796                            },
2797                        );
2798                    }
2799
2800                    // Cap output before history
2801                    let compact_ctx = crate::agent::inference::is_compact_context_window_pub(
2802                        self.engine.current_context_length(),
2803                    );
2804                    let capped = if implement_current_plan {
2805                        cap_output(&final_output, 1200)
2806                    } else if tool_name == "map_project"
2807                        && self.workflow_mode == WorkflowMode::Architect
2808                    {
2809                        cap_output(&final_output, 2500)
2810                    } else if tool_name == "map_project" {
2811                        cap_output(&final_output, 3500)
2812                    } else if compact_ctx
2813                        && (tool_name == "read_file" || tool_name == "inspect_lines")
2814                    {
2815                        // Compact context: cap file reads tightly and add a navigation hint on truncation.
2816                        let limit = 3000usize;
2817                        if final_output.len() > limit {
2818                            let total_lines = final_output.lines().count();
2819                            let mut split_at = limit;
2820                            while !final_output.is_char_boundary(split_at) && split_at > 0 {
2821                                split_at -= 1;
2822                            }
2823                            format!(
2824                                "{}\n... [file truncated — {} total lines. Use `inspect_lines` with start_line near {} to reach the end of the file.]",
2825                                &final_output[..split_at],
2826                                total_lines,
2827                                total_lines.saturating_sub(150),
2828                            )
2829                        } else {
2830                            final_output.clone()
2831                        }
2832                    } else {
2833                        cap_output(&final_output, 8000)
2834                    };
2835                    self.history.push(ChatMessage::tool_result_for_model(
2836                        &call_id,
2837                        &tool_name,
2838                        &capped,
2839                        &self.engine.current_model(),
2840                    ));
2841
2842                    if architecture_overview_mode && !is_error && tool_name == "trace_runtime_flow"
2843                    {
2844                        overview_runtime_trace =
2845                            Some(summarize_runtime_trace_output(&final_output));
2846                    } else if architecture_overview_mode && !is_error && tool_name == "map_project"
2847                    {
2848                        overview_project_map = Some(summarize_project_map_output(&final_output));
2849                    }
2850
2851                    if !architecture_overview_mode
2852                        && !is_error
2853                        && ((grounded_trace_mode && tool_name == "trace_runtime_flow")
2854                            || (toolchain_mode && tool_name == "describe_toolchain"))
2855                    {
2856                        authoritative_tool_output = Some(final_output.clone());
2857                    } else if !architecture_overview_mode
2858                        && !is_error
2859                        && tool_name == "map_project"
2860                        && project_map_mode
2861                        && authoritative_tool_output.is_none()
2862                    {
2863                        authoritative_tool_output =
2864                            Some(summarize_project_map_output(&final_output));
2865                    }
2866
2867                    if !is_error && tool_name == "read_file" {
2868                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
2869                            let normalized = normalize_workspace_path(path);
2870                            let read_offset =
2871                                res.args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
2872                            successful_read_targets.insert(normalized.clone());
2873                            successful_read_regions.insert((normalized.clone(), read_offset));
2874                        }
2875                    }
2876
2877                    if !is_error && tool_name == "grep_files" {
2878                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
2879                            let normalized = normalize_workspace_path(path);
2880                            if final_output.starts_with("No matches for ") {
2881                                no_match_grep_targets.insert(normalized);
2882                            } else if grep_output_is_high_fanout(&final_output) {
2883                                broad_grep_targets.insert(normalized);
2884                            } else {
2885                                successful_grep_targets.insert(normalized);
2886                            }
2887                        }
2888                    }
2889
2890                    if is_error
2891                        && matches!(tool_name.as_str(), "edit_file" | "multi_search_replace")
2892                        && (final_output.contains("search string not found")
2893                            || final_output.contains("search string is too short")
2894                            || final_output.contains("search string matched"))
2895                    {
2896                        if let Some(target) = action_target_path(&tool_name, &res.args) {
2897                            let guidance = if final_output.contains("matched") {
2898                                format!(
2899                                    "STOP. `{}` on `{}` — search string matched multiple times. Use `inspect_lines` on the exact region to get a unique anchor, then retry.",
2900                                    tool_name, target
2901                                )
2902                            } else {
2903                                format!(
2904                                    "STOP. `{}` on `{}` — search string did not match. Use `inspect_lines` on the target region to get the exact current text (check whitespace and indentation), then retry.",
2905                                    tool_name, target
2906                                )
2907                            };
2908                            loop_intervention = Some(guidance);
2909                            *repeat_count = 0;
2910                        }
2911                    }
2912
2913                    if res.blocked_by_policy
2914                        && is_mcp_workspace_read_tool(&tool_name)
2915                        && recoverable_policy_intervention.is_none()
2916                    {
2917                        recoverable_policy_intervention = Some(
2918                            "STOP. MCP filesystem reads are blocked. Use `read_file` or `inspect_lines` instead.".to_string(),
2919                        );
2920                        recoverable_policy_recipe = Some(RecoveryScenario::McpWorkspaceReadBlocked);
2921                        recoverable_policy_checkpoint = Some((
2922                            OperatorCheckpointState::BlockedPolicy,
2923                            "MCP workspace read blocked; rerouting to built-in file tools."
2924                                .to_string(),
2925                        ));
2926                    } else if res.blocked_by_policy
2927                        && implement_current_plan
2928                        && tool_name == "map_project"
2929                        && recoverable_policy_intervention.is_none()
2930                    {
2931                        recoverable_policy_intervention = Some(
2932                            "STOP. `map_project` is blocked during plan execution. Read your planned target files directly, then edit.".to_string(),
2933                        );
2934                        recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
2935                        recoverable_policy_checkpoint = Some((
2936                            OperatorCheckpointState::BlockedPolicy,
2937                            "`map_project` blocked for current-plan execution.".to_string(),
2938                        ));
2939                    } else if res.blocked_by_policy
2940                        && implement_current_plan
2941                        && is_current_plan_irrelevant_tool(&tool_name)
2942                        && recoverable_policy_intervention.is_none()
2943                    {
2944                        recoverable_policy_intervention = Some(format!(
2945                            "STOP. `{}` is not a planned target. Use `inspect_lines` on a planned file, then edit.",
2946                            tool_name
2947                        ));
2948                        recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
2949                        recoverable_policy_checkpoint = Some((
2950                            OperatorCheckpointState::BlockedPolicy,
2951                            format!(
2952                                "Current-plan execution blocked unrelated tool `{}`.",
2953                                tool_name
2954                            ),
2955                        ));
2956                    } else if res.blocked_by_policy
2957                        && implement_current_plan
2958                        && final_output.contains("requires recent file evidence")
2959                        && recoverable_policy_intervention.is_none()
2960                    {
2961                        let target = action_target_path(&tool_name, &res.args)
2962                            .unwrap_or_else(|| "the target file".to_string());
2963                        recoverable_policy_intervention = Some(format!(
2964                            "STOP. Edit blocked — `{target}` has no recent read. Use `inspect_lines` or `read_file` on it first, then retry."
2965                        ));
2966                        recoverable_policy_recipe =
2967                            Some(RecoveryScenario::RecentFileEvidenceMissing);
2968                        recoverable_policy_checkpoint = Some((
2969                            OperatorCheckpointState::BlockedRecentFileEvidence,
2970                            format!("Edit blocked on `{target}`; recent file evidence missing."),
2971                        ));
2972                    } else if res.blocked_by_policy
2973                        && implement_current_plan
2974                        && final_output.contains("requires an exact local line window first")
2975                        && recoverable_policy_intervention.is_none()
2976                    {
2977                        let target = action_target_path(&tool_name, &res.args)
2978                            .unwrap_or_else(|| "the target file".to_string());
2979                        recoverable_policy_intervention = Some(format!(
2980                            "STOP. Edit blocked — `{target}` needs an inspected window. Use `inspect_lines` around the edit region, then retry."
2981                        ));
2982                        recoverable_policy_recipe = Some(RecoveryScenario::ExactLineWindowRequired);
2983                        recoverable_policy_checkpoint = Some((
2984                            OperatorCheckpointState::BlockedExactLineWindow,
2985                            format!("Edit blocked on `{target}`; exact line window required."),
2986                        ));
2987                    } else if res.blocked_by_policy && blocked_policy_output.is_none() {
2988                        blocked_policy_output = Some(final_output.clone());
2989                    }
2990
2991                    if *repeat_count >= 5 {
2992                        let _ = tx.send(InferenceEvent::Done).await;
2993                        return Ok(());
2994                    }
2995
2996                    if implement_current_plan
2997                        && !implementation_started
2998                        && !is_error
2999                        && is_non_mutating_plan_step_tool(&tool_name)
3000                    {
3001                        non_mutating_plan_steps += 1;
3002                    }
3003                }
3004
3005                if let Some(intervention) = recoverable_policy_intervention {
3006                    if let Some((state, summary)) = recoverable_policy_checkpoint.take() {
3007                        self.emit_operator_checkpoint(&tx, state, summary).await;
3008                    }
3009                    if let Some(scenario) = recoverable_policy_recipe.take() {
3010                        let recipe = plan_recovery(scenario, &self.recovery_context);
3011                        self.emit_recovery_recipe_summary(
3012                            &tx,
3013                            recipe.recipe.scenario.label(),
3014                            compact_recovery_plan_summary(&recipe),
3015                        )
3016                        .await;
3017                    }
3018                    loop_intervention = Some(intervention);
3019                    let _ = tx
3020                        .send(InferenceEvent::Thought(
3021                            "Policy recovery: rerouting blocked MCP filesystem inspection to built-in workspace tools."
3022                                .into(),
3023                        ))
3024                        .await;
3025                    continue;
3026                }
3027
3028                if architecture_overview_mode {
3029                    match (
3030                        overview_project_map.as_deref(),
3031                        overview_runtime_trace.as_deref(),
3032                    ) {
3033                        (Some(project_map), Some(runtime_trace)) => {
3034                            let response =
3035                                build_architecture_overview_answer(project_map, runtime_trace);
3036                            self.history.push(ChatMessage::assistant_text(&response));
3037                            self.transcript.log_agent(&response);
3038
3039                            for chunk in chunk_text(&response, 8) {
3040                                if !chunk.is_empty() {
3041                                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
3042                                }
3043                            }
3044
3045                            let _ = tx.send(InferenceEvent::Done).await;
3046                            break;
3047                        }
3048                        (Some(_), None) => {
3049                            loop_intervention = Some(
3050                                "Good. You now have the grounded repository structure. Next, call `trace_runtime_flow` for the runtime/control-flow half of the architecture overview. Prefer topic `user_turn` for the main execution path, or `runtime_subsystems` if that is more direct. Do not call `read_file`, `auto_pin_context`, or LSP tools here."
3051                                    .to_string(),
3052                            );
3053                            continue;
3054                        }
3055                        (None, Some(_)) => {
3056                            loop_intervention = Some(
3057                                "Good. You now have the grounded runtime/control-flow trace. Next, call `map_project` once to capture entrypoints and core owner files. Keep it compact and do not call broad file-read tools in this architecture overview."
3058                                    .to_string(),
3059                            );
3060                            continue;
3061                        }
3062                        (None, None) => {}
3063                    }
3064                }
3065
3066                if implement_current_plan
3067                    && !implementation_started
3068                    && non_mutating_plan_steps >= non_mutating_plan_hard_cap
3069                {
3070                    let msg = "Current-plan execution stalled: too many non-mutating inspection steps without a concrete edit. Stay on the saved target files, narrow with `inspect_lines`, and then mutate, or ask one specific blocking question instead of continuing broad exploration.".to_string();
3071                    self.history.push(ChatMessage::assistant_text(&msg));
3072                    self.transcript.log_agent(&msg);
3073
3074                    for chunk in chunk_text(&msg, 8) {
3075                        if !chunk.is_empty() {
3076                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3077                        }
3078                    }
3079
3080                    let _ = tx.send(InferenceEvent::Done).await;
3081                    break;
3082                }
3083
3084                if let Some(blocked_output) = blocked_policy_output {
3085                    self.emit_operator_checkpoint(
3086                        &tx,
3087                        OperatorCheckpointState::BlockedPolicy,
3088                        "A blocked tool path was surfaced directly to the operator.",
3089                    )
3090                    .await;
3091                    self.history
3092                        .push(ChatMessage::assistant_text(&blocked_output));
3093                    self.transcript.log_agent(&blocked_output);
3094
3095                    for chunk in chunk_text(&blocked_output, 8) {
3096                        if !chunk.is_empty() {
3097                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3098                        }
3099                    }
3100
3101                    let _ = tx.send(InferenceEvent::Done).await;
3102                    break;
3103                }
3104
3105                if let Some(tool_output) = authoritative_tool_output {
3106                    self.history.push(ChatMessage::assistant_text(&tool_output));
3107                    self.transcript.log_agent(&tool_output);
3108
3109                    for chunk in chunk_text(&tool_output, 8) {
3110                        if !chunk.is_empty() {
3111                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3112                        }
3113                    }
3114
3115                    let _ = tx.send(InferenceEvent::Done).await;
3116                    break;
3117                }
3118
3119                if implement_current_plan && !implementation_started {
3120                    let base = "STOP analyzing. The current plan already defines the task. Use the built-in file evidence you now have and begin implementing the plan in the target files. Do not output preliminary findings or restate contracts.";
3121                    if non_mutating_plan_steps >= non_mutating_plan_soft_cap {
3122                        loop_intervention = Some(format!(
3123                            "{} You are close to the non-mutation cap. Use `inspect_lines` on one saved target file, then make the edit now.",
3124                            base
3125                        ));
3126                    } else {
3127                        loop_intervention = Some(base.to_string());
3128                    }
3129                } else if self.workflow_mode == WorkflowMode::Architect {
3130                    loop_intervention = Some(
3131                        format!(
3132                            "STOP exploring. You have enough evidence for a plan-first answer.\n{}\nUse the tool results already in history. Do not narrate your process. Do not call more tools unless a missing file path makes the handoff impossible.",
3133                            architect_handoff_contract()
3134                        ),
3135                    );
3136                }
3137
3138                // 4. Auto-Verification Loop (The Perfect Bake)
3139                if mutation_occurred && !yolo {
3140                    let _ = tx
3141                        .send(InferenceEvent::Thought(
3142                            "Self-Verification: Running 'cargo check' to ensure build integrity..."
3143                                .into(),
3144                        ))
3145                        .await;
3146                    let verify_res = self.auto_verify_build().await;
3147                    let verify_ok = verify_res.contains("BUILD SUCCESS");
3148                    self.record_verify_build_result(verify_ok, &verify_res)
3149                        .await;
3150                    self.record_session_verification(
3151                        verify_ok,
3152                        if verify_ok {
3153                            "Automatic build verification passed."
3154                        } else {
3155                            "Automatic build verification failed."
3156                        },
3157                    );
3158                    self.history.push(ChatMessage::system(&format!(
3159                        "\n# SYSTEM VERIFICATION\n{verify_res}"
3160                    )));
3161                    let _ = tx
3162                        .send(InferenceEvent::Thought(
3163                            "Verification turn injected into history.".into(),
3164                        ))
3165                        .await;
3166                }
3167
3168                // Continue loop – the model will respond to the results.
3169                continue;
3170            } else if let Some(response_text) = text {
3171                if finish_reason.as_deref() == Some("length") && near_context_ceiling {
3172                    if intent.direct_answer == Some(DirectAnswerKind::SessionResetSemantics) {
3173                        let cleaned = build_session_reset_semantics_answer();
3174                        self.history.push(ChatMessage::assistant_text(&cleaned));
3175                        self.transcript.log_agent(&cleaned);
3176                        for chunk in chunk_text(&cleaned, 8) {
3177                            if !chunk.is_empty() {
3178                                let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3179                            }
3180                        }
3181                        let _ = tx.send(InferenceEvent::Done).await;
3182                        break;
3183                    }
3184
3185                    let warning = format_runtime_failure(
3186                        RuntimeFailureClass::ContextWindow,
3187                        "Context ceiling reached before the model completed the answer. Hematite trimmed what it could, but this turn still ran out of room. Retry with a narrower inspection step like `grep_files` or `inspect_lines`, or ask for a smaller scoped answer.",
3188                    );
3189                    self.history.push(ChatMessage::assistant_text(&warning));
3190                    self.transcript.log_agent(&warning);
3191                    let _ = tx
3192                        .send(InferenceEvent::Thought(
3193                            "Length recovery: model hit the context ceiling before completing the answer."
3194                                .into(),
3195                        ))
3196                        .await;
3197                    for chunk in chunk_text(&warning, 8) {
3198                        if !chunk.is_empty() {
3199                            let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3200                        }
3201                    }
3202                    let _ = tx.send(InferenceEvent::Done).await;
3203                    break;
3204                }
3205
3206                if response_text.contains("<|tool_call")
3207                    || response_text.contains("[END_TOOL_REQUEST]")
3208                    || response_text.contains("<|tool_response")
3209                    || response_text.contains("<tool_response|>")
3210                {
3211                    loop_intervention = Some(
3212                        "Your previous response leaked raw native tool transcript markup instead of a valid tool invocation or final answer. Retry immediately. If you need a tool, emit a valid tool call only. If you do not need a tool, answer in plain text with no `<|tool_call>`, `<|tool_response>`, or `[END_TOOL_REQUEST]` markup.".to_string(),
3213                    );
3214                    continue;
3215                }
3216
3217                // 1. Process and route the reasoning block to SPECULAR.
3218                if let Some(thought) = crate::agent::inference::extract_think_block(&response_text)
3219                {
3220                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
3221                    // Persist for history audit (stripped from next turn by Volatile Reasoning rule).
3222                    // This will be summarized in the next turn's system prompt.
3223                    self.reasoning_history = Some(thought);
3224                }
3225
3226                // 2. Process and stream the final answer to the chat interface.
3227                let cleaned = crate::agent::inference::strip_think_blocks(&response_text);
3228
3229                if implement_current_plan && !implementation_started {
3230                    loop_intervention = Some(
3231                        "Do not stop at analysis. Implement the current saved plan now using built-in workspace tools and the target files already named in the plan. Only answer without edits if you have a concrete blocking question.".to_string(),
3232                    );
3233                    continue;
3234                }
3235
3236                // [Hardened Interface] Strictly respect the stripper.
3237                // If it's empty, we stay silent in the chat area (reasoning is in SPECULAR).
3238                if cleaned.is_empty() {
3239                    let _ = tx.send(InferenceEvent::Done).await;
3240                    break;
3241                }
3242
3243                self.persist_architect_handoff(&cleaned);
3244                self.history.push(ChatMessage::assistant_text(&cleaned));
3245                self.transcript.log_agent(&cleaned);
3246
3247                // Send in smooth chunks for that professional UI feel.
3248                for chunk in chunk_text(&cleaned, 8) {
3249                    if !chunk.is_empty() {
3250                        let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3251                    }
3252                }
3253
3254                let _ = tx.send(InferenceEvent::Done).await;
3255                break;
3256            } else {
3257                let detail = "Model returned an empty response.";
3258                let class = classify_runtime_failure(detail);
3259                if should_retry_runtime_failure(class) {
3260                    if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3261                        if let RecoveryDecision::Attempt(plan) =
3262                            attempt_recovery(scenario, &mut self.recovery_context)
3263                        {
3264                            self.transcript.log_system(
3265                                "Automatic provider recovery triggered: model returned an empty response.",
3266                            );
3267                            self.emit_recovery_recipe_summary(
3268                                &tx,
3269                                plan.recipe.scenario.label(),
3270                                compact_recovery_plan_summary(&plan),
3271                            )
3272                            .await;
3273                            let _ = tx
3274                                .send(InferenceEvent::ProviderStatus {
3275                                    state: ProviderRuntimeState::Recovering,
3276                                    summary: compact_runtime_recovery_summary(class).into(),
3277                                })
3278                                .await;
3279                            self.emit_operator_checkpoint(
3280                                &tx,
3281                                OperatorCheckpointState::RecoveringProvider,
3282                                compact_runtime_recovery_summary(class),
3283                            )
3284                            .await;
3285                            continue;
3286                        }
3287                    }
3288                }
3289
3290                self.emit_runtime_failure(&tx, class, detail).await;
3291                break;
3292            }
3293        }
3294
3295        self.trim_history(80);
3296        self.refresh_session_memory();
3297        self.save_session();
3298        self.emit_compaction_pressure(&tx).await;
3299        Ok(())
3300    }
3301
3302    async fn emit_runtime_failure(
3303        &mut self,
3304        tx: &mpsc::Sender<InferenceEvent>,
3305        class: RuntimeFailureClass,
3306        detail: &str,
3307    ) {
3308        if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3309            let decision = preview_recovery_decision(scenario, &self.recovery_context);
3310            self.emit_recovery_recipe_summary(
3311                tx,
3312                scenario.label(),
3313                compact_recovery_decision_summary(&decision),
3314            )
3315            .await;
3316            let needs_refresh = match &decision {
3317                RecoveryDecision::Attempt(plan) => plan
3318                    .recipe
3319                    .steps
3320                    .contains(&RecoveryStep::RefreshRuntimeProfile),
3321                RecoveryDecision::Escalate { recipe, .. } => {
3322                    recipe.steps.contains(&RecoveryStep::RefreshRuntimeProfile)
3323                }
3324            };
3325            if needs_refresh {
3326                if let Some((model_id, context_length, changed)) = self
3327                    .refresh_runtime_profile_and_report(tx, "context_window_failure")
3328                    .await
3329                {
3330                    let note = if changed {
3331                        format!(
3332                            "Runtime refresh after context-window failure: model {} | CTX {}",
3333                            model_id, context_length
3334                        )
3335                    } else {
3336                        format!(
3337                            "Runtime refresh after context-window failure confirms model {} | CTX {}",
3338                            model_id, context_length
3339                        )
3340                    };
3341                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3342                }
3343            }
3344        }
3345        if let Some(state) = provider_state_for_runtime_failure(class) {
3346            let _ = tx
3347                .send(InferenceEvent::ProviderStatus {
3348                    state,
3349                    summary: compact_runtime_failure_summary(class).into(),
3350                })
3351                .await;
3352        }
3353        if let Some(state) = checkpoint_state_for_runtime_failure(class) {
3354            self.emit_operator_checkpoint(tx, state, checkpoint_summary_for_runtime_failure(class))
3355                .await;
3356        }
3357        let formatted = format_runtime_failure(class, detail);
3358        self.history.push(ChatMessage::system(&format!(
3359            "# RUNTIME FAILURE\n{}",
3360            formatted
3361        )));
3362        self.transcript.log_system(&formatted);
3363        let _ = tx.send(InferenceEvent::Error(formatted)).await;
3364        let _ = tx.send(InferenceEvent::Done).await;
3365    }
3366
3367    /// [Task Analyzer] Run 'cargo check' and return a concise summary for the model.
3368    async fn auto_verify_build(&self) -> String {
3369        match crate::tools::verify_build::execute(&serde_json::json!({ "action": "build" })).await {
3370            Ok(out) => {
3371                "BUILD SUCCESS: Your changes are architecturally sound.\n\n".to_string()
3372                    + &cap_output(&out, 2000)
3373            }
3374            Err(e) => format!(
3375                "BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:\n\n{}",
3376                cap_output(&e, 2000)
3377            ),
3378        }
3379    }
3380
3381    /// Triggers an LLM call to summarize old messages if history exceeds the VRAM character limit.
3382    /// Triggers the Deterministic Smart Compaction algorithm to shrink history while preserving context.
3383    /// Triggers the Recursive Context Compactor.
3384    async fn compact_history_if_needed(
3385        &mut self,
3386        tx: &mpsc::Sender<InferenceEvent>,
3387        anchor_index: Option<usize>,
3388    ) -> Result<bool, String> {
3389        let vram_ratio = self.gpu_state.ratio();
3390        let context_length = self.engine.current_context_length();
3391        let config = CompactionConfig::adaptive(context_length, vram_ratio);
3392
3393        if !compaction::should_compact(&self.history, context_length, vram_ratio) {
3394            return Ok(false);
3395        }
3396
3397        let _ = tx
3398            .send(InferenceEvent::Thought(format!(
3399                "Compaction: ctx={}k vram={:.0}% threshold={}k tokens — chaining summary...",
3400                context_length / 1000,
3401                vram_ratio * 100.0,
3402                config.max_estimated_tokens / 1000,
3403            )))
3404            .await;
3405
3406        let result = compaction::compact_history(
3407            &self.history,
3408            self.running_summary.as_deref(),
3409            config,
3410            anchor_index,
3411        );
3412
3413        let removed_message_count = self.history.len().saturating_sub(result.messages.len());
3414        self.history = result.messages;
3415        self.running_summary = result.summary;
3416
3417        // Layer 6: Memory Synthesis (Task Context Persistence)
3418        let previous_memory = self.session_memory.clone();
3419        self.session_memory = compaction::extract_memory(&self.history);
3420        self.session_memory
3421            .inherit_runtime_ledger_from(&previous_memory);
3422        self.session_memory.record_compaction(
3423            removed_message_count,
3424            format!(
3425                "Compacted history around active task '{}' and preserved {} working-set file(s).",
3426                self.session_memory.current_task,
3427                self.session_memory.working_set.len()
3428            ),
3429        );
3430        self.emit_compaction_pressure(tx).await;
3431
3432        // Jinja alignment: preserved slice may start with assistant/tool messages.
3433        // Strip any leading non-user messages so the first non-system message is always user.
3434        let first_non_sys = self
3435            .history
3436            .iter()
3437            .position(|m| m.role != "system")
3438            .unwrap_or(self.history.len());
3439        if first_non_sys < self.history.len() {
3440            if let Some(user_offset) = self.history[first_non_sys..]
3441                .iter()
3442                .position(|m| m.role == "user")
3443            {
3444                if user_offset > 0 {
3445                    self.history
3446                        .drain(first_non_sys..first_non_sys + user_offset);
3447                }
3448            }
3449        }
3450
3451        let _ = tx
3452            .send(InferenceEvent::Thought(format!(
3453                "Memory Synthesis: Extracted context for task: '{}'. Working set: {} files.",
3454                self.session_memory.current_task,
3455                self.session_memory.working_set.len()
3456            )))
3457            .await;
3458        let recipe = plan_recovery(RecoveryScenario::HistoryPressure, &self.recovery_context);
3459        self.emit_recovery_recipe_summary(
3460            tx,
3461            recipe.recipe.scenario.label(),
3462            compact_recovery_plan_summary(&recipe),
3463        )
3464        .await;
3465        self.emit_operator_checkpoint(
3466            tx,
3467            OperatorCheckpointState::HistoryCompacted,
3468            format!(
3469                "History compacted into a recursive summary; active task '{}' with {} working-set file(s) carried forward.",
3470                self.session_memory.current_task,
3471                self.session_memory.working_set.len()
3472            ),
3473        )
3474        .await;
3475
3476        Ok(true)
3477    }
3478
3479    /// Query The Vein for context relevant to the user's message.
3480    /// Runs hybrid BM25 + semantic search (semantic requires embedding model in LM Studio).
3481    /// Returns a formatted system message string, or None if nothing useful found.
3482    fn build_vein_context(&self, query: &str) -> Option<(String, Vec<String>)> {
3483        // Skip trivial / very short inputs.
3484        if query.trim().split_whitespace().count() < 3 {
3485            return None;
3486        }
3487
3488        let results = tokio::task::block_in_place(|| self.vein.search_context(query, 4)).ok()?;
3489        if results.is_empty() {
3490            return None;
3491        }
3492
3493        let semantic_active = self.vein.has_any_embeddings();
3494        let header = if semantic_active {
3495            "# Relevant context from The Vein (hybrid BM25 + semantic retrieval)\n\
3496             Use this to answer without needing extra read_file calls where possible.\n\n"
3497        } else {
3498            "# Relevant context from The Vein (BM25 keyword retrieval)\n\
3499             Use this to answer without needing extra read_file calls where possible.\n\n"
3500        };
3501
3502        let mut ctx = String::from(header);
3503        let mut paths: Vec<String> = Vec::new();
3504
3505        let mut total = 0usize;
3506        const MAX_CTX_CHARS: usize = 1_500;
3507
3508        for r in results {
3509            if total >= MAX_CTX_CHARS {
3510                break;
3511            }
3512            let snippet = if r.content.len() > 500 {
3513                format!("{}...", &r.content[..500])
3514            } else {
3515                r.content.clone()
3516            };
3517            ctx.push_str(&format!("--- {} ---\n{}\n\n", r.path, snippet));
3518            total += snippet.len() + r.path.len() + 10;
3519            if !paths.contains(&r.path) {
3520                paths.push(r.path);
3521            }
3522        }
3523
3524        Some((ctx, paths))
3525    }
3526
3527    /// Returns the conversation history (WITHOUT the system prompt) for the context window.
3528    /// This ensures we don't have redundant system blocks and prevents Jinja crashes.
3529    fn context_window_slice(&self) -> Vec<ChatMessage> {
3530        let mut result = Vec::new();
3531
3532        // Skip index 0 (the raw system message) and any stray system messages in history.
3533        if self.history.len() > 1 {
3534            for m in &self.history[1..] {
3535                if m.role == "system" {
3536                    continue;
3537                }
3538
3539                let mut sanitized = m.clone();
3540                // DEEP SANITIZE: LM Studio Jinja templates for Qwen crash on truly empty content.
3541                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3542                    sanitized.content = MessageContent::Text(" ".into());
3543                }
3544                result.push(sanitized);
3545            }
3546        }
3547
3548        // Jinja Guard: The first message after the system prompt MUST be 'user'.
3549        // If not (e.g. because of compaction), we insert a tiny anchor.
3550        if !result.is_empty() && result[0].role != "user" {
3551            result.insert(0, ChatMessage::user("Continuing previous context..."));
3552        }
3553
3554        result
3555    }
3556
3557    fn context_window_slice_from(&self, start_idx: usize) -> Vec<ChatMessage> {
3558        let mut result = Vec::new();
3559
3560        if self.history.len() > 1 {
3561            let start = start_idx.max(1).min(self.history.len());
3562            for m in &self.history[start..] {
3563                if m.role == "system" {
3564                    continue;
3565                }
3566
3567                let mut sanitized = m.clone();
3568                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3569                    sanitized.content = MessageContent::Text(" ".into());
3570                }
3571                result.push(sanitized);
3572            }
3573        }
3574
3575        if !result.is_empty() && result[0].role != "user" {
3576            result.insert(0, ChatMessage::user("Continuing current plan execution..."));
3577        }
3578
3579        result
3580    }
3581
3582    /// Drop old turns from the middle of history.
3583    fn trim_history(&mut self, max_messages: usize) {
3584        if self.history.len() <= max_messages {
3585            return;
3586        }
3587        // Always keep [0] (system prompt).
3588        let excess = self.history.len() - max_messages;
3589        self.history.drain(1..=excess);
3590    }
3591
3592    /// P1: Attempt to fix malformed JSON tool arguments by asking the model to re-output them.
3593    async fn repair_tool_args(
3594        &self,
3595        tool_name: &str,
3596        bad_json: &str,
3597        tx: &mpsc::Sender<InferenceEvent>,
3598    ) -> Result<Value, String> {
3599        let _ = tx
3600            .send(InferenceEvent::Thought(format!(
3601                "Attempting to repair malformed JSON for '{}'...",
3602                tool_name
3603            )))
3604            .await;
3605
3606        let prompt = format!(
3607            "The following JSON for tool '{}' is malformed and failed to parse:\n\n```json\n{}\n```\n\nOutput ONLY the corrected JSON string that fixes the syntax error (e.g. missing commas, unescaped quotes). Do NOT include markdown blocks or any other text.",
3608            tool_name, bad_json
3609        );
3610
3611        let messages = vec![
3612            ChatMessage::system("You are a JSON repair tool. Output ONLY pure JSON."),
3613            ChatMessage::user(&prompt),
3614        ];
3615
3616        // Use fast model for speed if available.
3617        let (text, _, _, _) = self
3618            .engine
3619            .call_with_tools(&messages, &[], self.fast_model.as_deref())
3620            .await
3621            .map_err(|e| e.to_string())?;
3622
3623        let cleaned = text
3624            .unwrap_or_default()
3625            .trim()
3626            .trim_start_matches("```json")
3627            .trim_start_matches("```")
3628            .trim_end_matches("```")
3629            .trim()
3630            .to_string();
3631
3632        serde_json::from_str(&cleaned).map_err(|e| format!("Repair failed: {}", e))
3633    }
3634
3635    /// P2: Run a fast validation step after file writes to check for subtle logic errors.
3636    async fn run_critic_check(
3637        &self,
3638        path: &str,
3639        content: &str,
3640        tx: &mpsc::Sender<InferenceEvent>,
3641    ) -> Option<String> {
3642        // Only run for source code files.
3643        let ext = std::path::Path::new(path)
3644            .extension()
3645            .and_then(|e| e.to_str())
3646            .unwrap_or("");
3647        const CRITIC_EXTS: &[&str] = &["rs", "js", "ts", "py", "go", "c", "cpp"];
3648        if !CRITIC_EXTS.contains(&ext) {
3649            return None;
3650        }
3651
3652        let _ = tx
3653            .send(InferenceEvent::Thought(format!(
3654                "CRITIC: Reviewing changes to '{}'...",
3655                path
3656            )))
3657            .await;
3658
3659        let truncated = cap_output(content, 4000);
3660
3661        let prompt = format!(
3662            "You are a Senior Security and Code Quality auditor. Review this file content for '{}' and identify any critical logic errors, security vulnerabilities, or missing error handling. Be extremely concise. If the code looks good, output 'PASS'.\n\n```{}\n{}\n```",
3663            path, ext, truncated
3664        );
3665
3666        let messages = vec![
3667            ChatMessage::system("You are a technical critic. Identify ONLY critical issues. Output 'PASS' if none found."),
3668            ChatMessage::user(&prompt)
3669        ];
3670
3671        let (text, _, _, _) = self
3672            .engine
3673            .call_with_tools(&messages, &[], self.fast_model.as_deref())
3674            .await
3675            .ok()?;
3676
3677        let critique = text?.trim().to_string();
3678        if critique.to_uppercase().contains("PASS") || critique.is_empty() {
3679            None
3680        } else {
3681            Some(critique)
3682        }
3683    }
3684}
3685
3686// ── Tool dispatcher ───────────────────────────────────────────────────────────
3687
3688pub async fn dispatch_tool(name: &str, args: &Value) -> Result<String, String> {
3689    dispatch_builtin_tool(name, args).await
3690}
3691
3692fn normalize_fix_plan_issue_text(text: &str) -> Option<String> {
3693    let trimmed = text.trim();
3694    let stripped = trimmed
3695        .strip_prefix("/think")
3696        .or_else(|| trimmed.strip_prefix("/no_think"))
3697        .map(str::trim)
3698        .unwrap_or(trimmed)
3699        .trim_start_matches('\n')
3700        .trim();
3701    (!stripped.is_empty()).then(|| stripped.to_string())
3702}
3703
3704fn fill_missing_fix_plan_issue(tool_name: &str, args: &mut Value, fallback_issue: Option<&str>) {
3705    if tool_name != "inspect_host" {
3706        return;
3707    }
3708
3709    let Some(topic) = args.get("topic").and_then(|v| v.as_str()) else {
3710        return;
3711    };
3712    if topic != "fix_plan" {
3713        return;
3714    }
3715
3716    let issue_missing = args
3717        .get("issue")
3718        .and_then(|v| v.as_str())
3719        .map(str::trim)
3720        .is_none_or(|value| value.is_empty());
3721    if !issue_missing {
3722        return;
3723    }
3724
3725    let Some(fallback_issue) = fallback_issue.and_then(normalize_fix_plan_issue_text) else {
3726        return;
3727    };
3728
3729    let Value::Object(map) = args else {
3730        return;
3731    };
3732    map.insert(
3733        "issue".to_string(),
3734        Value::String(fallback_issue.to_string()),
3735    );
3736}
3737
3738fn should_rewrite_shell_to_fix_plan(
3739    tool_name: &str,
3740    args: &Value,
3741    latest_user_prompt: Option<&str>,
3742) -> bool {
3743    if tool_name != "shell" {
3744        return false;
3745    }
3746    let Some(prompt) = latest_user_prompt else {
3747        return false;
3748    };
3749    if preferred_host_inspection_topic(prompt) != Some("fix_plan") {
3750        return false;
3751    }
3752    let command = args
3753        .get("command")
3754        .and_then(|value| value.as_str())
3755        .unwrap_or("");
3756    shell_looks_like_structured_host_inspection(command)
3757}
3758
3759fn rewrite_host_tool_call(
3760    tool_name: &mut String,
3761    args: &mut Value,
3762    latest_user_prompt: Option<&str>,
3763) {
3764    if should_rewrite_shell_to_fix_plan(tool_name, args, latest_user_prompt) {
3765        *tool_name = "inspect_host".to_string();
3766        *args = serde_json::json!({
3767            "topic": "fix_plan"
3768        });
3769    }
3770    fill_missing_fix_plan_issue(tool_name, args, latest_user_prompt);
3771}
3772
3773fn canonical_tool_call_key(tool_name: &str, args: &Value) -> String {
3774    format!(
3775        "{}:{}",
3776        tool_name,
3777        serde_json::to_string(args).unwrap_or_default()
3778    )
3779}
3780
3781fn normalized_tool_call_for_execution(
3782    tool_name: &str,
3783    raw_arguments: &str,
3784    gemma4_model: bool,
3785    latest_user_prompt: Option<&str>,
3786) -> (String, Value) {
3787    let normalized_arguments = if gemma4_model {
3788        crate::agent::inference::normalize_tool_argument_string(tool_name, raw_arguments)
3789    } else {
3790        raw_arguments.to_string()
3791    };
3792    let mut normalized_name = tool_name.to_string();
3793    let mut args = serde_json::from_str::<Value>(&normalized_arguments)
3794        .unwrap_or(Value::Object(Default::default()));
3795    rewrite_host_tool_call(&mut normalized_name, &mut args, latest_user_prompt);
3796    (normalized_name, args)
3797}
3798
3799#[cfg(test)]
3800fn normalized_tool_call_key_for_dedupe(
3801    tool_name: &str,
3802    raw_arguments: &str,
3803    gemma4_model: bool,
3804    latest_user_prompt: Option<&str>,
3805) -> String {
3806    let (normalized_name, args) = normalized_tool_call_for_execution(
3807        tool_name,
3808        raw_arguments,
3809        gemma4_model,
3810        latest_user_prompt,
3811    );
3812    canonical_tool_call_key(&normalized_name, &args)
3813}
3814
3815impl ConversationManager {
3816    /// Checks if a tool call is authorized given the current configuration and mode.
3817    fn check_authorization(
3818        &self,
3819        name: &str,
3820        args: &serde_json::Value,
3821        config: &crate::agent::config::HematiteConfig,
3822        yolo_flag: bool,
3823    ) -> crate::agent::permission_enforcer::AuthorizationDecision {
3824        crate::agent::permission_enforcer::authorize_tool_call(name, args, config, yolo_flag)
3825    }
3826
3827    /// Layer 4: Isolated tool execution logic. Does not mutate 'self' to allow parallelism.
3828    async fn process_tool_call(
3829        &self,
3830        mut call: ToolCallFn,
3831        config: crate::agent::config::HematiteConfig,
3832        yolo: bool,
3833        tx: mpsc::Sender<InferenceEvent>,
3834        real_id: String,
3835    ) -> ToolExecutionOutcome {
3836        let mut msg_results = Vec::new();
3837        let gemma4_model =
3838            crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
3839        let normalized_arguments = if gemma4_model {
3840            crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments)
3841        } else {
3842            call.arguments.clone()
3843        };
3844
3845        // 1. Argument Parsing & Repair
3846        let mut args: Value = match serde_json::from_str(&normalized_arguments) {
3847            Ok(v) => v,
3848            Err(_) => {
3849                match self
3850                    .repair_tool_args(&call.name, &normalized_arguments, &tx)
3851                    .await
3852                {
3853                    Ok(v) => v,
3854                    Err(e) => {
3855                        let _ = tx
3856                            .send(InferenceEvent::Thought(format!(
3857                                "JSON Repair failed: {}",
3858                                e
3859                            )))
3860                            .await;
3861                        Value::Object(Default::default())
3862                    }
3863                }
3864            }
3865        };
3866        let last_user_prompt = self
3867            .history
3868            .iter()
3869            .rev()
3870            .find(|message| message.role == "user")
3871            .map(|message| message.content.as_str());
3872        rewrite_host_tool_call(&mut call.name, &mut args, last_user_prompt);
3873
3874        if call.name == "map_project" && self.workflow_mode == WorkflowMode::Architect {
3875            if let Some(obj) = args.as_object_mut() {
3876                obj.entry("include_symbols".to_string())
3877                    .or_insert(Value::Bool(false));
3878                obj.entry("max_depth".to_string())
3879                    .or_insert(Value::Number(2_u64.into()));
3880            }
3881        } else if call.name == "map_project" && self.workflow_mode.is_read_only() {
3882            if let Some(obj) = args.as_object_mut() {
3883                obj.entry("include_symbols".to_string())
3884                    .or_insert(Value::Bool(false));
3885                obj.entry("max_depth".to_string())
3886                    .or_insert(Value::Number(3_u64.into()));
3887            }
3888        }
3889
3890        let display = format_tool_display(&call.name, &args);
3891        let precondition_result = self.validate_action_preconditions(&call.name, &args).await;
3892        let auth = self.check_authorization(&call.name, &args, &config, yolo);
3893
3894        // 2. Permission Check
3895        let decision_result = match precondition_result {
3896            Err(e) => Err(e),
3897            Ok(_) => match auth {
3898                crate::agent::permission_enforcer::AuthorizationDecision::Allow { .. } => Ok(()),
3899                crate::agent::permission_enforcer::AuthorizationDecision::Ask {
3900                    reason,
3901                    source: _,
3902                } => {
3903                    let (approve_tx, approve_rx) = tokio::sync::oneshot::channel::<bool>();
3904                    let _ = tx
3905                        .send(InferenceEvent::ApprovalRequired {
3906                            id: real_id.clone(),
3907                            name: call.name.clone(),
3908                            display: format!("{}\nWhy: {}", display, reason),
3909                            diff: None,
3910                            responder: approve_tx,
3911                        })
3912                        .await;
3913
3914                    match approve_rx.await {
3915                        Ok(true) => Ok(()),
3916                        _ => Err("Declined by user".into()),
3917                    }
3918                }
3919                crate::agent::permission_enforcer::AuthorizationDecision::Deny {
3920                    reason, ..
3921                } => Err(reason),
3922            },
3923        };
3924        let blocked_by_policy =
3925            matches!(&decision_result, Err(e) if e.starts_with("Action blocked:"));
3926
3927        // 3. Execution (Local or MCP)
3928        let (output, is_error) = match decision_result {
3929            Err(e) => (format!("Error: {}", e), true),
3930            Ok(_) => {
3931                let _ = tx
3932                    .send(InferenceEvent::ToolCallStart {
3933                        id: real_id.clone(),
3934                        name: call.name.clone(),
3935                        args: display.clone(),
3936                    })
3937                    .await;
3938
3939                let result = if call.name.starts_with("lsp_") {
3940                    let lsp = self.lsp_manager.clone();
3941                    let path = args
3942                        .get("path")
3943                        .and_then(|v| v.as_str())
3944                        .unwrap_or("")
3945                        .to_string();
3946                    let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
3947                    let character =
3948                        args.get("character").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
3949
3950                    match call.name.as_str() {
3951                        "lsp_definitions" => {
3952                            crate::tools::lsp_tools::lsp_definitions(lsp, path, line, character)
3953                                .await
3954                        }
3955                        "lsp_references" => {
3956                            crate::tools::lsp_tools::lsp_references(lsp, path, line, character)
3957                                .await
3958                        }
3959                        "lsp_hover" => {
3960                            crate::tools::lsp_tools::lsp_hover(lsp, path, line, character).await
3961                        }
3962                        "lsp_search_symbol" => {
3963                            let query = args
3964                                .get("query")
3965                                .and_then(|v| v.as_str())
3966                                .unwrap_or_default()
3967                                .to_string();
3968                            crate::tools::lsp_tools::lsp_search_symbol(lsp, query).await
3969                        }
3970                        "lsp_rename_symbol" => {
3971                            let new_name = args
3972                                .get("new_name")
3973                                .and_then(|v| v.as_str())
3974                                .unwrap_or_default()
3975                                .to_string();
3976                            crate::tools::lsp_tools::lsp_rename_symbol(
3977                                lsp, path, line, character, new_name,
3978                            )
3979                            .await
3980                        }
3981                        "lsp_get_diagnostics" => {
3982                            crate::tools::lsp_tools::lsp_get_diagnostics(lsp, path).await
3983                        }
3984                        _ => Err(format!("Unknown LSP tool: {}", call.name)),
3985                    }
3986                } else if call.name == "auto_pin_context" {
3987                    let pts = args.get("paths").and_then(|v| v.as_array());
3988                    let reason = args
3989                        .get("reason")
3990                        .and_then(|v| v.as_str())
3991                        .unwrap_or("uninformed scoping");
3992                    if let Some(arr) = pts {
3993                        let mut pinned = Vec::new();
3994                        {
3995                            let mut guard = self.pinned_files.lock().await;
3996                            const MAX_PINNED_SIZE: u64 = 25 * 1024 * 1024; // 25MB Safety Valve
3997
3998                            for v in arr.iter().take(3) {
3999                                if let Some(p) = v.as_str() {
4000                                    if let Ok(meta) = std::fs::metadata(p) {
4001                                        if meta.len() > MAX_PINNED_SIZE {
4002                                            let _ = tx.send(InferenceEvent::Thought(format!("[GUARD] Skipping {} - size ({} bytes) exceeds VRAM safety limit (25MB).", p, meta.len()))).await;
4003                                            continue;
4004                                        }
4005                                        if let Ok(content) = std::fs::read_to_string(p) {
4006                                            guard.insert(p.to_string(), content);
4007                                            pinned.push(p.to_string());
4008                                        }
4009                                    }
4010                                }
4011                            }
4012                        }
4013                        let msg = format!(
4014                            "Autonomous Scoping: Locked {} in high-fidelity memory. Reason: {}",
4015                            pinned.join(", "),
4016                            reason
4017                        );
4018                        let _ = tx
4019                            .send(InferenceEvent::Thought(format!("[AUTO-PIN] {}", msg)))
4020                            .await;
4021                        Ok(msg)
4022                    } else {
4023                        Err("Missing 'paths' array for auto_pin_context.".to_string())
4024                    }
4025                } else if call.name == "list_pinned" {
4026                    let paths_msg = {
4027                        let pinned = self.pinned_files.lock().await;
4028                        if pinned.is_empty() {
4029                            "No files are currently pinned.".to_string()
4030                        } else {
4031                            let paths: Vec<_> = pinned.keys().cloned().collect();
4032                            format!(
4033                                "Currently pinned files in active memory:\n- {}",
4034                                paths.join("\n- ")
4035                            )
4036                        }
4037                    };
4038                    Ok(paths_msg)
4039                } else if call.name.starts_with("mcp__") {
4040                    let mut mcp = self.mcp_manager.lock().await;
4041                    match mcp.call_tool(&call.name, &args).await {
4042                        Ok(res) => Ok(res),
4043                        Err(e) => Err(e.to_string()),
4044                    }
4045                } else if call.name == "swarm" {
4046                    // ── Swarm Orchestration ──
4047                    let tasks_val = args.get("tasks").cloned().unwrap_or(Value::Array(vec![]));
4048                    let max_workers = args
4049                        .get("max_workers")
4050                        .and_then(|v| v.as_u64())
4051                        .unwrap_or(3) as usize;
4052
4053                    let mut task_objs = Vec::new();
4054                    if let Value::Array(arr) = tasks_val {
4055                        for v in arr {
4056                            let id = v
4057                                .get("id")
4058                                .and_then(|x| x.as_str())
4059                                .unwrap_or("?")
4060                                .to_string();
4061                            let target = v
4062                                .get("target")
4063                                .and_then(|x| x.as_str())
4064                                .unwrap_or("?")
4065                                .to_string();
4066                            let instruction = v
4067                                .get("instruction")
4068                                .and_then(|x| x.as_str())
4069                                .unwrap_or("?")
4070                                .to_string();
4071                            task_objs.push(crate::agent::parser::WorkerTask {
4072                                id,
4073                                target,
4074                                instruction,
4075                            });
4076                        }
4077                    }
4078
4079                    if task_objs.is_empty() {
4080                        Err("No tasks provided for swarm.".to_string())
4081                    } else {
4082                        let (swarm_tx_internal, mut swarm_rx_internal) =
4083                            tokio::sync::mpsc::channel(32);
4084                        let tx_forwarder = tx.clone();
4085
4086                        // Bridge SwarmMessage -> InferenceEvent
4087                        tokio::spawn(async move {
4088                            while let Some(msg) = swarm_rx_internal.recv().await {
4089                                match msg {
4090                                    crate::agent::swarm::SwarmMessage::Progress(id, p) => {
4091                                        let _ = tx_forwarder
4092                                            .send(InferenceEvent::Thought(format!(
4093                                                "Swarm [{}]: {}% complete",
4094                                                id, p
4095                                            )))
4096                                            .await;
4097                                    }
4098                                    crate::agent::swarm::SwarmMessage::ReviewRequest {
4099                                        worker_id,
4100                                        file_path,
4101                                        before: _,
4102                                        after: _,
4103                                        tx,
4104                                    } => {
4105                                        let (approve_tx, approve_rx) =
4106                                            tokio::sync::oneshot::channel::<bool>();
4107                                        let display = format!(
4108                                            "Swarm worker [{}]: Integrated changes into {:?}",
4109                                            worker_id, file_path
4110                                        );
4111                                        let _ = tx_forwarder
4112                                            .send(InferenceEvent::ApprovalRequired {
4113                                                id: format!("swarm_{}", worker_id),
4114                                                name: "swarm_apply".to_string(),
4115                                                display,
4116                                                diff: None,
4117                                                responder: approve_tx,
4118                                            })
4119                                            .await;
4120                                        if let Ok(approved) = approve_rx.await {
4121                                            let response = if approved {
4122                                                crate::agent::swarm::ReviewResponse::Accept
4123                                            } else {
4124                                                crate::agent::swarm::ReviewResponse::Reject
4125                                            };
4126                                            let _ = tx.send(response);
4127                                        }
4128                                    }
4129                                    crate::agent::swarm::SwarmMessage::Done => {}
4130                                }
4131                            }
4132                        });
4133
4134                        let coordinator = self.swarm_coordinator.clone();
4135                        match coordinator
4136                            .dispatch_swarm(task_objs, swarm_tx_internal, max_workers)
4137                            .await
4138                        {
4139                            Ok(_) => Ok(
4140                                "Swarm execution completed. Check files for integration results."
4141                                    .to_string(),
4142                            ),
4143                            Err(e) => Err(format!("Swarm failure: {}", e)),
4144                        }
4145                    }
4146                } else if call.name == "vision_analyze" {
4147                    crate::tools::vision::vision_analyze(&self.engine, &args).await
4148                } else if matches!(
4149                    call.name.as_str(),
4150                    "edit_file" | "patch_hunk" | "multi_search_replace"
4151                ) && !yolo
4152                {
4153                    // ── Diff preview gate ─────────────────────────────────────
4154                    // Compute what the edit would look like before applying it.
4155                    // If we can build a diff, require user Y/N in the TUI.
4156                    let diff_result = match call.name.as_str() {
4157                        "edit_file" => crate::tools::file_ops::compute_edit_file_diff(&args),
4158                        "patch_hunk" => crate::tools::file_ops::compute_patch_hunk_diff(&args),
4159                        _ => crate::tools::file_ops::compute_msr_diff(&args),
4160                    };
4161                    match diff_result {
4162                        Ok(diff_text) => {
4163                            let path_label =
4164                                args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
4165                            let (appr_tx, appr_rx) = tokio::sync::oneshot::channel::<bool>();
4166                            let _ = tx
4167                                .send(InferenceEvent::ApprovalRequired {
4168                                    id: real_id.clone(),
4169                                    name: call.name.clone(),
4170                                    display: format!("Edit preview: {}", path_label),
4171                                    diff: Some(diff_text),
4172                                    responder: appr_tx,
4173                                })
4174                                .await;
4175                            match appr_rx.await {
4176                                Ok(true) => dispatch_tool(&call.name, &args).await,
4177                                _ => Err("Edit declined by user.".into()),
4178                            }
4179                        }
4180                        // Diff computation failed (e.g. search string not found yet) —
4181                        // fall through and let the tool return its own error.
4182                        Err(_) => dispatch_tool(&call.name, &args).await,
4183                    }
4184                } else {
4185                    dispatch_tool(&call.name, &args).await
4186                };
4187
4188                match result {
4189                    Ok(o) => (o, false),
4190                    Err(e) => (format!("Error: {}", e), true),
4191                }
4192            }
4193        };
4194
4195        // ── Session Economics ────────────────────────────────────────────────
4196        {
4197            if let Ok(mut econ) = self.engine.economics.lock() {
4198                econ.record_tool(&call.name, !is_error);
4199            }
4200        }
4201
4202        if !is_error {
4203            if matches!(call.name.as_str(), "read_file" | "inspect_lines") {
4204                if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
4205                    if call.name == "inspect_lines" {
4206                        self.record_line_inspection(path).await;
4207                    } else {
4208                        self.record_read_observation(path).await;
4209                    }
4210                }
4211            }
4212
4213            if call.name == "verify_build" {
4214                let ok = output.contains("BUILD OK")
4215                    || output.contains("BUILD SUCCESS")
4216                    || output.contains("BUILD OKAY");
4217                self.record_verify_build_result(ok, &output).await;
4218            }
4219
4220            if matches!(
4221                call.name.as_str(),
4222                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4223            ) || is_mcp_mutating_tool(&call.name)
4224            {
4225                self.record_successful_mutation(action_target_path(&call.name, &args).as_deref())
4226                    .await;
4227            }
4228
4229            if let Some(receipt) = self.build_action_receipt(&call.name, &args, &output, is_error) {
4230                msg_results.push(receipt);
4231            }
4232        }
4233
4234        // 4. Critic Check (Specular Tier 2)
4235        // Gated: Only run on code files with substantive content to avoid burning tokens
4236        // on trivial doc/config edits.
4237        if !is_error && (call.name == "edit_file" || call.name == "write_file") {
4238            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
4239            let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
4240            let ext = std::path::Path::new(path)
4241                .extension()
4242                .and_then(|e| e.to_str())
4243                .unwrap_or("");
4244            const SKIP_EXTS: &[&str] = &[
4245                "md",
4246                "toml",
4247                "json",
4248                "txt",
4249                "yml",
4250                "yaml",
4251                "cfg",
4252                "csv",
4253                "lock",
4254                "gitignore",
4255            ];
4256            let line_count = content.lines().count();
4257            if !path.is_empty()
4258                && !content.is_empty()
4259                && !SKIP_EXTS.contains(&ext)
4260                && line_count >= 50
4261            {
4262                if let Some(critique) = self.run_critic_check(path, content, &tx).await {
4263                    msg_results.push(ChatMessage::system(&format!(
4264                        "[CRITIC REVIEW OF {}]\nIssues found:\n\n{}",
4265                        path, critique
4266                    )));
4267                }
4268            }
4269        }
4270
4271        ToolExecutionOutcome {
4272            call_id: real_id,
4273            tool_name: call.name,
4274            args,
4275            output,
4276            is_error,
4277            blocked_by_policy,
4278            msg_results,
4279        }
4280    }
4281}
4282
4283/// The result of an isolated tool execution.
4284/// Used to bridge Parallel/Serial execution back to the main history.
4285struct ToolExecutionOutcome {
4286    call_id: String,
4287    tool_name: String,
4288    args: Value,
4289    output: String,
4290    is_error: bool,
4291    blocked_by_policy: bool,
4292    msg_results: Vec<ChatMessage>,
4293}
4294
4295#[derive(Clone)]
4296struct CachedToolResult {
4297    tool_name: String,
4298}
4299
4300fn is_code_like_path(path: &str) -> bool {
4301    let ext = std::path::Path::new(path)
4302        .extension()
4303        .and_then(|e| e.to_str())
4304        .unwrap_or("")
4305        .to_ascii_lowercase();
4306    matches!(
4307        ext.as_str(),
4308        "rs" | "js"
4309            | "ts"
4310            | "tsx"
4311            | "jsx"
4312            | "py"
4313            | "go"
4314            | "java"
4315            | "c"
4316            | "cpp"
4317            | "cc"
4318            | "h"
4319            | "hpp"
4320            | "cs"
4321            | "swift"
4322            | "kt"
4323            | "kts"
4324            | "rb"
4325            | "php"
4326    )
4327}
4328
4329// ── Display helpers ───────────────────────────────────────────────────────────
4330
4331pub fn format_tool_display(name: &str, args: &Value) -> String {
4332    let get = |key: &str| {
4333        args.get(key)
4334            .and_then(|v| v.as_str())
4335            .unwrap_or("")
4336            .to_string()
4337    };
4338    match name {
4339        "shell" => format!("$ {}", get("command")),
4340        "map_project" => "map project architecture".to_string(),
4341        "trace_runtime_flow" => format!("trace runtime {}", get("topic")),
4342        "describe_toolchain" => format!("describe toolchain {}", get("topic")),
4343        "inspect_host" => format!("inspect host {}", get("topic")),
4344        _ => format!("{} {:?}", name, args),
4345    }
4346}
4347
4348// ── Text utilities ────────────────────────────────────────────────────────────
4349
4350fn shell_looks_like_structured_host_inspection(command: &str) -> bool {
4351    let lower = command.to_ascii_lowercase();
4352    [
4353        "$env:path",
4354        "pathvariable",
4355        "pip --version",
4356        "pipx --version",
4357        "winget --version",
4358        "choco",
4359        "scoop",
4360        "get-childitem",
4361        "gci ",
4362        "where.exe",
4363        "where ",
4364        "cargo --version",
4365        "rustc --version",
4366        "git --version",
4367        "node --version",
4368        "npm --version",
4369        "pnpm --version",
4370        "python --version",
4371        "python3 --version",
4372        "deno --version",
4373        "go version",
4374        "dotnet --version",
4375        "uv --version",
4376        "netstat",
4377        "findstr",
4378        "get-nettcpconnection",
4379        "tcpconnection",
4380        "listening",
4381        "ss -",
4382        "ss ",
4383        "lsof",
4384        "tasklist",
4385        "ipconfig",
4386        "get-netipconfiguration",
4387        "get-netadapter",
4388        "route print",
4389        "ifconfig",
4390        "ip addr",
4391        "ip route",
4392        "resolv.conf",
4393        "get-service",
4394        "sc query",
4395        "systemctl",
4396        "service --status-all",
4397        "get-process",
4398        "working set",
4399        "ps -eo",
4400        "ps aux",
4401        "desktop",
4402        "downloads",
4403    ]
4404    .iter()
4405    .any(|needle| lower.contains(needle))
4406}
4407
4408// Moved strip_think_blocks to inference.rs
4409
4410fn cap_output(text: &str, max_bytes: usize) -> String {
4411    if text.len() <= max_bytes {
4412        text.to_string()
4413    } else {
4414        // Find the largest byte index <= max_bytes that is a valid char boundary.
4415        let mut split_at = max_bytes;
4416        while !text.is_char_boundary(split_at) && split_at > 0 {
4417            split_at -= 1;
4418        }
4419        format!(
4420            "{}\n... [output capped at {}B]",
4421            &text[..split_at],
4422            max_bytes
4423        )
4424    }
4425}
4426
4427#[derive(Default)]
4428struct PromptBudgetStats {
4429    summarized_tool_results: usize,
4430    collapsed_tool_results: usize,
4431    trimmed_chat_messages: usize,
4432    dropped_messages: usize,
4433}
4434
4435fn estimate_prompt_tokens(messages: &[ChatMessage]) -> usize {
4436    crate::agent::inference::estimate_message_batch_tokens(messages)
4437}
4438
4439fn summarize_prompt_blob(text: &str, max_chars: usize) -> String {
4440    let budget = compaction::SummaryCompressionBudget {
4441        max_chars,
4442        max_lines: 3,
4443        max_line_chars: max_chars.clamp(80, 240),
4444    };
4445    let compressed = compaction::compress_summary(text, budget).summary;
4446    if compressed.is_empty() {
4447        String::new()
4448    } else {
4449        compressed
4450    }
4451}
4452
4453fn summarize_tool_message_for_budget(message: &ChatMessage) -> String {
4454    let tool_name = message.name.as_deref().unwrap_or("tool");
4455    let body = summarize_prompt_blob(message.content.as_str(), 320);
4456    format!(
4457        "[Prompt-budget summary of prior `{}` result]\n{}",
4458        tool_name, body
4459    )
4460}
4461
4462fn summarize_chat_message_for_budget(message: &ChatMessage) -> String {
4463    let role = message.role.as_str();
4464    let body = summarize_prompt_blob(message.content.as_str(), 240);
4465    format!(
4466        "[Prompt-budget summary of earlier {} message]\n{}",
4467        role, body
4468    )
4469}
4470
4471fn normalize_prompt_start(messages: &mut Vec<ChatMessage>) {
4472    if messages.len() > 1 && messages[1].role != "user" {
4473        messages.insert(1, ChatMessage::user("Continuing previous context..."));
4474    }
4475}
4476
4477fn enforce_prompt_budget(
4478    prompt_msgs: &mut Vec<ChatMessage>,
4479    context_length: usize,
4480) -> Option<String> {
4481    let target_tokens = ((context_length as f64) * 0.68) as usize;
4482    if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4483        return None;
4484    }
4485
4486    let mut stats = PromptBudgetStats::default();
4487
4488    // 1. Summarize the newest large tool outputs first.
4489    let mut tool_indices: Vec<usize> = prompt_msgs
4490        .iter()
4491        .enumerate()
4492        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
4493        .collect();
4494    for idx in tool_indices.iter().rev().copied() {
4495        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4496            break;
4497        }
4498        let original = prompt_msgs[idx].content.as_str().to_string();
4499        if original.len() > 1200 {
4500            prompt_msgs[idx].content =
4501                MessageContent::Text(summarize_tool_message_for_budget(&prompt_msgs[idx]));
4502            stats.summarized_tool_results += 1;
4503        }
4504    }
4505
4506    // 2. Collapse older tool results aggressively, keeping only the most recent two verbatim/summarized.
4507    tool_indices = prompt_msgs
4508        .iter()
4509        .enumerate()
4510        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
4511        .collect();
4512    if tool_indices.len() > 2 {
4513        for idx in tool_indices
4514            .iter()
4515            .take(tool_indices.len().saturating_sub(2))
4516            .copied()
4517        {
4518            if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4519                break;
4520            }
4521            prompt_msgs[idx].content = MessageContent::Text(
4522                "[Earlier tool output omitted to stay within the prompt budget.]".to_string(),
4523            );
4524            stats.collapsed_tool_results += 1;
4525        }
4526    }
4527
4528    // 3. Trim older long chat messages, but preserve the final user request.
4529    let last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
4530    for idx in 1..prompt_msgs.len() {
4531        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4532            break;
4533        }
4534        if Some(idx) == last_user_idx {
4535            continue;
4536        }
4537        let role = prompt_msgs[idx].role.as_str();
4538        if matches!(role, "user" | "assistant") && prompt_msgs[idx].content.as_str().len() > 900 {
4539            prompt_msgs[idx].content =
4540                MessageContent::Text(summarize_chat_message_for_budget(&prompt_msgs[idx]));
4541            stats.trimmed_chat_messages += 1;
4542        }
4543    }
4544
4545    // 4. Drop the oldest non-system context until we fit, preserving the latest user request.
4546    let preserve_last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
4547    let mut idx = 1usize;
4548    while estimate_prompt_tokens(prompt_msgs) > target_tokens && prompt_msgs.len() > 2 {
4549        if Some(idx) == preserve_last_user_idx {
4550            idx += 1;
4551            if idx >= prompt_msgs.len() {
4552                break;
4553            }
4554            continue;
4555        }
4556        if idx >= prompt_msgs.len() {
4557            break;
4558        }
4559        prompt_msgs.remove(idx);
4560        stats.dropped_messages += 1;
4561    }
4562
4563    normalize_prompt_start(prompt_msgs);
4564
4565    let new_tokens = estimate_prompt_tokens(prompt_msgs);
4566    if stats.summarized_tool_results == 0
4567        && stats.collapsed_tool_results == 0
4568        && stats.trimmed_chat_messages == 0
4569        && stats.dropped_messages == 0
4570    {
4571        return None;
4572    }
4573
4574    Some(format!(
4575        "Prompt Budget Guard: trimmed prompt to about {} tokens (target {}). Summarized {} large tool result(s), collapsed {} older tool result(s), trimmed {} chat message(s), and dropped {} old message(s).",
4576        new_tokens,
4577        target_tokens,
4578        stats.summarized_tool_results,
4579        stats.collapsed_tool_results,
4580        stats.trimmed_chat_messages,
4581        stats.dropped_messages
4582    ))
4583}
4584
4585/// Split text into chunks of roughly `words_per_chunk` whitespace-separated tokens.
4586/// Returns true for short, direct tool-use requests that don't benefit from deep reasoning.
4587/// Used to skip the auto-/think prepend so the model calls the tool immediately
4588/// instead of spending thousands of tokens deliberating over a trivial task.
4589fn is_quick_tool_request(input: &str) -> bool {
4590    let lower = input.to_lowercase();
4591    // Explicit run_code requests — sandbox calls need no reasoning warmup.
4592    if lower.contains("run_code") || lower.contains("run code") {
4593        return true;
4594    }
4595    // Short compute/test requests — "calculate X", "test this", "execute Y"
4596    let is_short = input.len() < 120;
4597    let compute_keywords = [
4598        "calculate",
4599        "compute",
4600        "execute",
4601        "run this",
4602        "test this",
4603        "what is ",
4604        "how much",
4605        "how many",
4606        "convert ",
4607        "print ",
4608    ];
4609    if is_short && compute_keywords.iter().any(|k| lower.contains(k)) {
4610        return true;
4611    }
4612    false
4613}
4614
4615fn chunk_text(text: &str, words_per_chunk: usize) -> Vec<String> {
4616    let mut chunks = Vec::new();
4617    let mut current = String::new();
4618    let mut count = 0;
4619
4620    for ch in text.chars() {
4621        current.push(ch);
4622        if ch == ' ' || ch == '\n' {
4623            count += 1;
4624            if count >= words_per_chunk {
4625                chunks.push(current.clone());
4626                current.clear();
4627                count = 0;
4628            }
4629        }
4630    }
4631    if !current.is_empty() {
4632        chunks.push(current);
4633    }
4634    chunks
4635}
4636
4637fn repeated_read_target(call: &crate::agent::inference::ToolCallFn) -> Option<String> {
4638    if call.name != "read_file" {
4639        return None;
4640    }
4641    let normalized_arguments =
4642        crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments);
4643    let args: Value = serde_json::from_str(&normalized_arguments).ok()?;
4644    let path = args.get("path").and_then(|v| v.as_str())?;
4645    Some(normalize_workspace_path(path))
4646}
4647
4648fn order_batch_reads_first(
4649    calls: Vec<crate::agent::inference::ToolCallResponse>,
4650) -> (
4651    Vec<crate::agent::inference::ToolCallResponse>,
4652    Option<String>,
4653) {
4654    let has_reads = calls.iter().any(|c| {
4655        matches!(
4656            c.function.name.as_str(),
4657            "read_file" | "inspect_lines" | "grep_files" | "list_files"
4658        )
4659    });
4660    let has_edits = calls.iter().any(|c| {
4661        matches!(
4662            c.function.name.as_str(),
4663            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4664        )
4665    });
4666    if has_reads && has_edits {
4667        let reads: Vec<_> = calls
4668            .into_iter()
4669            .filter(|c| {
4670                !matches!(
4671                    c.function.name.as_str(),
4672                    "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4673                )
4674            })
4675            .collect();
4676        let note = Some("Batch ordering: deferring edits until reads complete.".to_string());
4677        (reads, note)
4678    } else {
4679        (calls, None)
4680    }
4681}
4682
4683fn grep_output_is_high_fanout(output: &str) -> bool {
4684    let Some(summary) = output.lines().next() else {
4685        return false;
4686    };
4687    let hunk_count = summary
4688        .split(", ")
4689        .find_map(|part| {
4690            part.strip_suffix(" hunk(s)")
4691                .and_then(|value| value.parse::<usize>().ok())
4692        })
4693        .unwrap_or(0);
4694    let match_count = summary
4695        .split(' ')
4696        .next()
4697        .and_then(|value| value.parse::<usize>().ok())
4698        .unwrap_or(0);
4699    hunk_count >= 8 || match_count >= 12
4700}
4701
4702fn build_system_with_corrections(
4703    base: &str,
4704    hints: &[String],
4705    gpu: &Arc<GpuState>,
4706    git: &Arc<crate::agent::git_monitor::GitState>,
4707    config: &crate::agent::config::HematiteConfig,
4708) -> String {
4709    let mut system_msg = base.to_string();
4710
4711    // Inject Permission Mode.
4712    system_msg.push_str("\n\n# Permission Mode\n");
4713    let mode_label = match config.mode {
4714        crate::agent::config::PermissionMode::ReadOnly => "READ-ONLY",
4715        crate::agent::config::PermissionMode::Developer => "DEVELOPER",
4716        crate::agent::config::PermissionMode::SystemAdmin => "SYSTEM-ADMIN (UNRESTRICTED)",
4717    };
4718    system_msg.push_str(&format!("CURRENT MODE: {}\n", mode_label));
4719
4720    if config.mode == crate::agent::config::PermissionMode::ReadOnly {
4721        system_msg.push_str("PERMISSION: You are restricted to READ-ONLY access. Do NOT attempt to use write_file, edit_file, or shell for any modification. Focus entirely on analysis, indexing, and reporting.\n");
4722    } else {
4723        system_msg.push_str("PERMISSION: You have authority to modify code and execute tests with user oversight.\n");
4724    }
4725
4726    // Inject live hardware status.
4727    let (used, total) = gpu.read();
4728    if total > 0 {
4729        system_msg.push_str("\n\n# Terminal Hardware Context\n");
4730        system_msg.push_str(&format!(
4731            "HOST GPU: {} | VRAM: {:.1}GB / {:.1}GB ({:.0}% used)\n",
4732            gpu.gpu_name(),
4733            used as f64 / 1024.0,
4734            total as f64 / 1024.0,
4735            gpu.ratio() * 100.0
4736        ));
4737        system_msg.push_str("Use this awareness to manage your context window responsibly.\n");
4738    }
4739
4740    // Inject Git Repository context.
4741    system_msg.push_str("\n\n# Git Repository Context\n");
4742    let git_status_label = git.label();
4743    let git_url = git.url();
4744    system_msg.push_str(&format!(
4745        "REMOTE STATUS: {} | URL: {}\n",
4746        git_status_label, git_url
4747    ));
4748
4749    // Live Snapshots (Status/Diff)
4750    let root = crate::tools::file_ops::workspace_root();
4751    if let Some(status_snapshot) = crate::agent::git_context::read_git_status(&root) {
4752        system_msg.push_str("\nGit status snapshot:\n");
4753        system_msg.push_str(&status_snapshot);
4754        system_msg.push_str("\n");
4755    }
4756
4757    if let Some(diff_snapshot) = crate::agent::git_context::read_git_diff(&root, 2000) {
4758        system_msg.push_str("\nGit diff snapshot:\n");
4759        system_msg.push_str(&diff_snapshot);
4760        system_msg.push_str("\n");
4761    }
4762
4763    if git_status_label == "NONE" {
4764        system_msg.push_str("\nONBOARDING: You noticed no remote is configured. Offer to help the user set up a remote (e.g. GitHub) if they haven't already.\n");
4765    } else if git_status_label == "BEHIND" {
4766        system_msg.push_str("\nSYNC: Local is behind remote. Suggest a pull if appropriate.\n");
4767    }
4768
4769    // NOTE: Instruction files (CLAUDE.md, HEMATITE.md, etc.) are already injected
4770    // by InferenceEngine::build_system_prompt() via load_instruction_files().
4771    // Injecting them again here would double the token cost (~4K wasted per turn).
4772
4773    if hints.is_empty() {
4774        return system_msg;
4775    }
4776    system_msg.push_str("\n\n# Formatting Corrections\n");
4777    system_msg.push_str("You previously failed formatting checks on these files. Ensure your whitespace/indentation perfectly matches the original file exactly on your next attempt:\n");
4778    for hint in hints {
4779        system_msg.push_str(&format!("- {}\n", hint));
4780    }
4781    system_msg
4782}
4783
4784fn route_model<'a>(
4785    user_input: &str,
4786    fast_model: Option<&'a str>,
4787    think_model: Option<&'a str>,
4788) -> Option<&'a str> {
4789    let text = user_input.to_lowercase();
4790    let is_think = text.contains("refactor")
4791        || text.contains("rewrite")
4792        || text.contains("implement")
4793        || text.contains("create")
4794        || text.contains("fix")
4795        || text.contains("debug");
4796    let is_fast = text.contains("what")
4797        || text.contains("show")
4798        || text.contains("find")
4799        || text.contains("list")
4800        || text.contains("status");
4801
4802    if is_think && think_model.is_some() {
4803        return think_model;
4804    } else if is_fast && fast_model.is_some() {
4805        return fast_model;
4806    }
4807    None
4808}
4809
4810fn is_parallel_safe(name: &str) -> bool {
4811    let metadata = crate::agent::inference::tool_metadata_for_name(name);
4812    !metadata.mutates_workspace && !metadata.external_surface
4813}
4814
4815fn should_use_vein_in_chat(query: &str, docs_only_mode: bool) -> bool {
4816    if docs_only_mode {
4817        return true;
4818    }
4819
4820    let lower = query.to_ascii_lowercase();
4821    [
4822        "what did we decide",
4823        "why did we decide",
4824        "what did we say",
4825        "what did we do",
4826        "earlier today",
4827        "yesterday",
4828        "last week",
4829        "last month",
4830        "earlier",
4831        "remember",
4832        "session",
4833        "import",
4834    ]
4835    .iter()
4836    .any(|needle| lower.contains(needle))
4837        || lower
4838            .split(|ch: char| !(ch.is_ascii_digit() || ch == '-'))
4839            .any(|token| token.len() == 10 && token.chars().nth(4) == Some('-'))
4840}
4841
4842#[cfg(test)]
4843mod tests {
4844    use super::*;
4845
4846    #[test]
4847    fn classifies_lm_studio_context_budget_mismatch_as_context_window() {
4848        let detail = r#"LM Studio error 400 Bad Request: {"error":"The number of tokens to keep from the initial prompt is greater than the context length (n_keep: 28768>= n_ctx: 4096). Try to load the model with a larger context length, or provide a shorter input."}"#;
4849        let class = classify_runtime_failure(detail);
4850        assert_eq!(class, RuntimeFailureClass::ContextWindow);
4851        assert_eq!(class.tag(), "context_window");
4852        assert!(format_runtime_failure(class, detail).contains("[failure:context_window]"));
4853    }
4854
4855    #[test]
4856    fn runtime_failure_maps_to_provider_and_checkpoint_state() {
4857        assert_eq!(
4858            provider_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
4859            Some(ProviderRuntimeState::ContextWindow)
4860        );
4861        assert_eq!(
4862            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
4863            Some(OperatorCheckpointState::BlockedContextWindow)
4864        );
4865        assert_eq!(
4866            provider_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
4867            Some(ProviderRuntimeState::Degraded)
4868        );
4869        assert_eq!(
4870            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
4871            None
4872        );
4873    }
4874
4875    #[test]
4876    fn intent_router_treats_tool_registry_ownership_as_product_truth() {
4877        let intent = classify_query_intent(
4878            WorkflowMode::ReadOnly,
4879            "Read-only mode. Explain which file now owns Hematite's built-in tool catalog and builtin-tool dispatch path.",
4880        );
4881        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4882        assert_eq!(
4883            intent.direct_answer,
4884            Some(DirectAnswerKind::ToolRegistryOwnership)
4885        );
4886    }
4887
4888    #[test]
4889    fn intent_router_treats_tool_classes_as_product_truth() {
4890        let intent = classify_query_intent(
4891            WorkflowMode::ReadOnly,
4892            "Read-only mode. Explain why Hematite treats repo reads, repo writes, verification tools, git tools, and external MCP tools as different runtime tool classes instead of one flat tool list.",
4893        );
4894        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4895        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::ToolClasses));
4896    }
4897
4898    #[test]
4899    fn tool_registry_ownership_answer_mentions_new_owner_file() {
4900        let answer = build_tool_registry_ownership_answer();
4901        assert!(answer.contains("src/agent/tool_registry.rs"));
4902        assert!(answer.contains("builtin dispatch path"));
4903        assert!(answer.contains("src/agent/conversation.rs"));
4904    }
4905
4906    #[test]
4907    fn intent_router_treats_mcp_lifecycle_as_product_truth() {
4908        let intent = classify_query_intent(
4909            WorkflowMode::ReadOnly,
4910            "Read-only mode. Explain how Hematite should treat MCP server health as runtime state.",
4911        );
4912        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4913        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::McpLifecycle));
4914    }
4915
4916    #[test]
4917    fn intent_router_short_circuits_unsafe_commit_pressure() {
4918        let intent = classify_query_intent(
4919            WorkflowMode::Auto,
4920            "Make a code change, skip verification, and commit it immediately.",
4921        );
4922        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4923        assert_eq!(
4924            intent.direct_answer,
4925            Some(DirectAnswerKind::UnsafeWorkflowPressure)
4926        );
4927    }
4928
4929    #[test]
4930    fn unsafe_workflow_pressure_answer_requires_verification() {
4931        let answer = build_unsafe_workflow_pressure_answer();
4932        assert!(answer.contains("should not skip verification"));
4933        assert!(answer.contains("run the appropriate verification path"));
4934        assert!(answer.contains("only then commit"));
4935    }
4936
4937    #[test]
4938    fn intent_router_prefers_architecture_walkthrough_over_narrow_mcp_answer() {
4939        let intent = classify_query_intent(
4940            WorkflowMode::ReadOnly,
4941            "I want to understand how Hematite is wired without any guessing. Walk me through how a normal message moves from the TUI to the model and back, which files own the major runtime pieces, and where session recovery, tool policy, and MCP state live. Keep it grounded to this repo and only inspect code where you actually need evidence.",
4942        );
4943        assert_eq!(intent.primary_class, QueryIntentClass::RepoArchitecture);
4944        assert!(intent.architecture_overview_mode);
4945        assert_eq!(intent.direct_answer, None);
4946    }
4947
4948    #[test]
4949    fn intent_router_marks_host_inspection_questions() {
4950        let intent = classify_query_intent(
4951            WorkflowMode::Auto,
4952            "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development.",
4953        );
4954        assert!(intent.host_inspection_mode);
4955        assert_eq!(
4956            preferred_host_inspection_topic(
4957                "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development."
4958            ),
4959            Some("summary")
4960        );
4961    }
4962
4963    #[test]
4964    fn chat_mode_uses_vein_for_historical_or_docs_only_queries() {
4965        assert!(should_use_vein_in_chat(
4966            "What did we decide on 2026-04-09 about docs-only mode?",
4967            false
4968        ));
4969        assert!(should_use_vein_in_chat("Summarize these local notes", true));
4970        assert!(!should_use_vein_in_chat("Tell me a joke", false));
4971    }
4972
4973    #[test]
4974    fn shell_host_inspection_guard_matches_path_and_version_commands() {
4975        assert!(shell_looks_like_structured_host_inspection(
4976            "$env:PATH -split ';'"
4977        ));
4978        assert!(shell_looks_like_structured_host_inspection(
4979            "cargo --version"
4980        ));
4981        assert!(shell_looks_like_structured_host_inspection(
4982            "Get-NetTCPConnection -LocalPort 3000"
4983        ));
4984        assert!(shell_looks_like_structured_host_inspection(
4985            "netstat -ano | findstr :3000"
4986        ));
4987        assert!(shell_looks_like_structured_host_inspection(
4988            "Get-Process | Sort-Object WS -Descending"
4989        ));
4990        assert!(shell_looks_like_structured_host_inspection("ipconfig /all"));
4991        assert!(shell_looks_like_structured_host_inspection("Get-Service"));
4992        assert!(shell_looks_like_structured_host_inspection(
4993            "winget --version"
4994        ));
4995    }
4996
4997    #[test]
4998    fn intent_router_picks_ports_for_listening_port_questions() {
4999        assert_eq!(
5000            preferred_host_inspection_topic(
5001                "Show me what is listening on port 3000 and whether anything unexpected is exposed."
5002            ),
5003            Some("ports")
5004        );
5005    }
5006
5007    #[test]
5008    fn intent_router_picks_processes_for_host_process_questions() {
5009        assert_eq!(
5010            preferred_host_inspection_topic(
5011                "Show me what processes are using the most RAM right now."
5012            ),
5013            Some("processes")
5014        );
5015    }
5016
5017    #[test]
5018    fn intent_router_picks_network_for_adapter_questions() {
5019        assert_eq!(
5020            preferred_host_inspection_topic(
5021                "Show me my active network adapters, IP addresses, gateways, and DNS servers."
5022            ),
5023            Some("network")
5024        );
5025    }
5026
5027    #[test]
5028    fn intent_router_picks_services_for_service_questions() {
5029        assert_eq!(
5030            preferred_host_inspection_topic(
5031                "Show me the running services and startup types that matter for a normal dev machine."
5032            ),
5033            Some("services")
5034        );
5035    }
5036
5037    #[test]
5038    fn intent_router_picks_env_doctor_for_package_manager_questions() {
5039        assert_eq!(
5040            preferred_host_inspection_topic(
5041                "Run an environment doctor on this machine and tell me whether my PATH and package managers look sane."
5042            ),
5043            Some("env_doctor")
5044        );
5045    }
5046
5047    #[test]
5048    fn intent_router_picks_fix_plan_for_host_remediation_questions() {
5049        assert_eq!(
5050            preferred_host_inspection_topic("How do I fix cargo not found on this machine?"),
5051            Some("fix_plan")
5052        );
5053        assert_eq!(
5054            preferred_host_inspection_topic(
5055                "How do I fix Hematite when LM Studio is not reachable on localhost:1234?"
5056            ),
5057            Some("fix_plan")
5058        );
5059    }
5060
5061    #[test]
5062    fn fill_missing_fix_plan_issue_backfills_last_user_prompt() {
5063        let mut args = serde_json::json!({
5064            "topic": "fix_plan"
5065        });
5066
5067        fill_missing_fix_plan_issue(
5068            "inspect_host",
5069            &mut args,
5070            Some("/think\nHow do I fix cargo not found on this machine?"),
5071        );
5072
5073        assert_eq!(
5074            args.get("issue").and_then(|value| value.as_str()),
5075            Some("How do I fix cargo not found on this machine?")
5076        );
5077    }
5078
5079    #[test]
5080    fn shell_fix_question_rewrites_to_fix_plan() {
5081        let args = serde_json::json!({
5082            "command": "where cargo"
5083        });
5084
5085        assert!(should_rewrite_shell_to_fix_plan(
5086            "shell",
5087            &args,
5088            Some("How do I fix cargo not found on this machine?")
5089        ));
5090    }
5091
5092    #[test]
5093    fn fix_plan_dedupe_key_matches_rewritten_shell_probe() {
5094        let latest_user_prompt = Some("How do I fix cargo not found on this machine?");
5095        let shell_key = normalized_tool_call_key_for_dedupe(
5096            "shell",
5097            r#"{"command":"where cargo"}"#,
5098            false,
5099            latest_user_prompt,
5100        );
5101        let fix_plan_key = normalized_tool_call_key_for_dedupe(
5102            "inspect_host",
5103            r#"{"topic":"fix_plan"}"#,
5104            false,
5105            latest_user_prompt,
5106        );
5107
5108        assert_eq!(shell_key, fix_plan_key);
5109    }
5110
5111    #[test]
5112    fn failing_path_parser_extracts_cargo_error_locations() {
5113        let output = r#"
5114BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:
5115
5116error[E0412]: cannot find type `Foo` in this scope
5117  --> src/agent/conversation.rs:42:12
5118   |
511942 |     field: Foo,
5120   |            ^^^ not found
5121
5122error[E0308]: mismatched types
5123  --> src/tools/file_ops.rs:100:5
5124   |
5125   = note: expected `String`, found `&str`
5126"#;
5127        let paths = parse_failing_paths_from_build_output(output);
5128        assert!(
5129            paths.iter().any(|p| p.contains("conversation.rs")),
5130            "should capture conversation.rs"
5131        );
5132        assert!(
5133            paths.iter().any(|p| p.contains("file_ops.rs")),
5134            "should capture file_ops.rs"
5135        );
5136        assert_eq!(paths.len(), 2, "no duplicates");
5137    }
5138
5139    #[test]
5140    fn failing_path_parser_ignores_macro_expansions() {
5141        let output = r#"
5142  --> <macro-expansion>:1:2
5143  --> src/real/file.rs:10:5
5144"#;
5145        let paths = parse_failing_paths_from_build_output(output);
5146        assert_eq!(paths.len(), 1);
5147        assert!(paths[0].contains("file.rs"));
5148    }
5149}
hematite/agent/conversation.rs

hematite/agent/
conversation.rs