Skip to main content

hematite/agent/
conversation.rs

1use crate::agent::architecture_summary::{
2    build_architecture_overview_answer, prune_architecture_trace_batch,
3    prune_authoritative_tool_batch, prune_read_only_context_bloat_batch,
4    summarize_project_map_output, summarize_runtime_trace_output,
5};
6use crate::agent::direct_answers::{
7    build_architect_session_reset_plan, build_authorization_policy_answer,
8    build_gemma_native_answer, build_gemma_native_settings_answer, build_identity_answer,
9    build_language_capability_answer, build_mcp_lifecycle_answer, build_product_surface_answer,
10    build_reasoning_split_answer, build_recovery_recipes_answer, build_session_memory_answer,
11    build_session_reset_semantics_answer, build_tool_classes_answer,
12    build_tool_registry_ownership_answer, build_unsafe_workflow_pressure_answer,
13    build_verify_profiles_answer, build_workflow_modes_answer,
14};
15use crate::agent::inference::{
16    ChatMessage, InferenceEngine, InferenceEvent, MessageContent, OperatorCheckpointState,
17    ProviderRuntimeState, ToolCallFn, ToolDefinition, ToolFunction,
18};
19use crate::agent::policy::{
20    action_target_path, docs_edit_without_explicit_request, is_destructive_tool,
21    is_mcp_mutating_tool, is_mcp_workspace_read_tool, normalize_workspace_path,
22};
23use crate::agent::recovery_recipes::{
24    attempt_recovery, plan_recovery, preview_recovery_decision, RecoveryContext, RecoveryDecision,
25    RecoveryPlan, RecoveryScenario, RecoveryStep,
26};
27use crate::agent::routing::{
28    classify_query_intent, is_capability_probe_tool, looks_like_mutation_request,
29    preferred_host_inspection_topic, DirectAnswerKind, QueryIntentClass,
30};
31use crate::agent::tool_registry::dispatch_builtin_tool;
32// SystemPromptBuilder is no longer used — InferenceEngine::build_system_prompt() is canonical.
33use crate::agent::compaction::{self, CompactionConfig};
34use crate::ui::gpu_monitor::GpuState;
35
36use serde_json::Value;
37use std::sync::Arc;
38use tokio::sync::{mpsc, Mutex};
39// -- Session persistence -------------------------------------------------------
40
41#[derive(Clone, Debug, Default)]
42pub struct UserTurn {
43    pub text: String,
44    pub attached_document: Option<AttachedDocument>,
45    pub attached_image: Option<AttachedImage>,
46}
47
48#[derive(Clone, Debug)]
49pub struct AttachedDocument {
50    pub name: String,
51    pub content: String,
52}
53
54#[derive(Clone, Debug)]
55pub struct AttachedImage {
56    pub name: String,
57    pub path: String,
58}
59
60impl UserTurn {
61    pub fn text(text: impl Into<String>) -> Self {
62        Self {
63            text: text.into(),
64            attached_document: None,
65            attached_image: None,
66        }
67    }
68}
69
70#[derive(serde::Serialize, serde::Deserialize)]
71struct SavedSession {
72    running_summary: Option<String>,
73    #[serde(default)]
74    session_memory: crate::agent::compaction::SessionMemory,
75}
76
77#[derive(Default)]
78struct ActionGroundingState {
79    turn_index: u64,
80    observed_paths: std::collections::HashMap<String, u64>,
81    inspected_paths: std::collections::HashMap<String, u64>,
82    last_verify_build_turn: Option<u64>,
83    last_verify_build_ok: bool,
84    last_failed_build_paths: Vec<String>,
85    code_changed_since_verify: bool,
86}
87
88struct PlanExecutionGuard {
89    flag: Arc<std::sync::atomic::AtomicBool>,
90}
91
92impl Drop for PlanExecutionGuard {
93    fn drop(&mut self) {
94        self.flag.store(false, std::sync::atomic::Ordering::SeqCst);
95    }
96}
97
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
99pub(crate) enum WorkflowMode {
100    #[default]
101    Auto,
102    Ask,
103    Code,
104    Architect,
105    ReadOnly,
106    /// Clean conversational mode — lighter prompt, no coding agent scaffolding,
107    /// tools available but not pushed. Vein RAG still runs for context.
108    Chat,
109}
110
111impl WorkflowMode {
112    fn label(self) -> &'static str {
113        match self {
114            WorkflowMode::Auto => "AUTO",
115            WorkflowMode::Ask => "ASK",
116            WorkflowMode::Code => "CODE",
117            WorkflowMode::Architect => "ARCHITECT",
118            WorkflowMode::ReadOnly => "READ-ONLY",
119            WorkflowMode::Chat => "CHAT",
120        }
121    }
122
123    fn is_read_only(self) -> bool {
124        matches!(
125            self,
126            WorkflowMode::Ask | WorkflowMode::Architect | WorkflowMode::ReadOnly
127        )
128    }
129
130    pub(crate) fn is_chat(self) -> bool {
131        matches!(self, WorkflowMode::Chat)
132    }
133}
134
135fn session_path() -> std::path::PathBuf {
136    crate::tools::file_ops::workspace_root()
137        .join(".hematite")
138        .join("session.json")
139}
140
141fn load_session_data() -> (Option<String>, crate::agent::compaction::SessionMemory) {
142    let path = session_path();
143    if !path.exists() {
144        return (None, crate::agent::compaction::SessionMemory::default());
145    }
146    let Ok(data) = std::fs::read_to_string(&path) else {
147        return (None, crate::agent::compaction::SessionMemory::default());
148    };
149    let Ok(saved) = serde_json::from_str::<SavedSession>(&data) else {
150        return (None, crate::agent::compaction::SessionMemory::default());
151    };
152    (saved.running_summary, saved.session_memory)
153}
154
155fn reset_task_files() {
156    let root = crate::tools::file_ops::workspace_root();
157    let _ = std::fs::remove_file(root.join(".hematite").join("TASK.md"));
158    let _ = std::fs::remove_file(root.join(".hematite").join("PLAN.md"));
159    let _ = std::fs::remove_file(root.join(".hematite").join("WALKTHROUGH.md"));
160    let _ = std::fs::remove_file(root.join(".github").join("WALKTHROUGH.md"));
161    let _ = std::fs::write(root.join(".hematite").join("TASK.md"), "");
162    let _ = std::fs::write(root.join(".hematite").join("PLAN.md"), "");
163}
164
165fn purge_persistent_memory() {
166    let root = crate::tools::file_ops::workspace_root();
167    let mem_dir = root.join(".hematite").join("memories");
168    if mem_dir.exists() {
169        let _ = std::fs::remove_dir_all(&mem_dir);
170        let _ = std::fs::create_dir_all(&mem_dir);
171    }
172
173    let log_dir = root.join(".hematite_logs");
174    if log_dir.exists() {
175        if let Ok(entries) = std::fs::read_dir(&log_dir) {
176            for entry in entries.flatten() {
177                let _ = std::fs::write(entry.path(), "");
178            }
179        }
180    }
181}
182
183fn apply_turn_attachments(user_turn: &UserTurn, prompt: &str) -> String {
184    let mut out = prompt.trim().to_string();
185    if let Some(doc) = user_turn.attached_document.as_ref() {
186        out = format!(
187            "[Attached document: {}]\n\n{}\n\n---\n\n{}",
188            doc.name, doc.content, out
189        );
190    }
191    if let Some(image) = user_turn.attached_image.as_ref() {
192        out = if out.is_empty() {
193            format!("[Attached image: {}]", image.name)
194        } else {
195            format!("[Attached image: {}]\n\n{}", image.name, out)
196        };
197    }
198    out
199}
200
201fn transcript_user_turn_text(user_turn: &UserTurn, prompt: &str) -> String {
202    let mut prefixes = Vec::new();
203    if let Some(doc) = user_turn.attached_document.as_ref() {
204        prefixes.push(format!("[Attached document: {}]", doc.name));
205    }
206    if let Some(image) = user_turn.attached_image.as_ref() {
207        prefixes.push(format!("[Attached image: {}]", image.name));
208    }
209    if prefixes.is_empty() {
210        prompt.to_string()
211    } else if prompt.trim().is_empty() {
212        prefixes.join("\n")
213    } else {
214        format!("{}\n{}", prefixes.join("\n"), prompt)
215    }
216}
217
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219enum RuntimeFailureClass {
220    ContextWindow,
221    ProviderDegraded,
222    ToolArgMalformed,
223    ToolPolicyBlocked,
224    ToolLoop,
225    VerificationFailed,
226    EmptyModelResponse,
227    Unknown,
228}
229
230impl RuntimeFailureClass {
231    fn tag(self) -> &'static str {
232        match self {
233            RuntimeFailureClass::ContextWindow => "context_window",
234            RuntimeFailureClass::ProviderDegraded => "provider_degraded",
235            RuntimeFailureClass::ToolArgMalformed => "tool_arg_malformed",
236            RuntimeFailureClass::ToolPolicyBlocked => "tool_policy_blocked",
237            RuntimeFailureClass::ToolLoop => "tool_loop",
238            RuntimeFailureClass::VerificationFailed => "verification_failed",
239            RuntimeFailureClass::EmptyModelResponse => "empty_model_response",
240            RuntimeFailureClass::Unknown => "unknown",
241        }
242    }
243
244    fn operator_guidance(self) -> &'static str {
245        match self {
246            RuntimeFailureClass::ContextWindow => {
247                "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
248            }
249            RuntimeFailureClass::ProviderDegraded => {
250                "Retry once automatically, then narrow the turn or restart LM Studio if it persists."
251            }
252            RuntimeFailureClass::ToolArgMalformed => {
253                "Retry with repaired or narrower tool arguments instead of repeating the same malformed call."
254            }
255            RuntimeFailureClass::ToolPolicyBlocked => {
256                "Stay inside the allowed workflow or switch modes before retrying."
257            }
258            RuntimeFailureClass::ToolLoop => {
259                "Stop repeating the same failing tool pattern and switch to a narrower recovery step."
260            }
261            RuntimeFailureClass::VerificationFailed => {
262                "Fix the build or test failure before treating the task as complete."
263            }
264            RuntimeFailureClass::EmptyModelResponse => {
265                "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
266            }
267            RuntimeFailureClass::Unknown => {
268                "Inspect the latest grounded tool results or provider status before retrying."
269            }
270        }
271    }
272}
273
274fn classify_runtime_failure(detail: &str) -> RuntimeFailureClass {
275    let lower = detail.to_ascii_lowercase();
276    if lower.contains("context_window_blocked")
277        || lower.contains("context ceiling reached")
278        || lower.contains("exceeds the")
279        || ((lower.contains("n_keep") && lower.contains("n_ctx"))
280            || lower.contains("context length")
281            || lower.contains("keep from the initial prompt")
282            || lower.contains("prompt is greater than the context length"))
283    {
284        RuntimeFailureClass::ContextWindow
285    } else if lower.contains("empty response from model")
286        || lower.contains("model returned an empty response")
287    {
288        RuntimeFailureClass::EmptyModelResponse
289    } else if lower.contains("lm studio unreachable")
290        || lower.contains("lm studio error")
291        || lower.contains("request failed")
292        || lower.contains("response parse error")
293        || lower.contains("provider degraded")
294    {
295        RuntimeFailureClass::ProviderDegraded
296    } else if lower.contains("missing required argument")
297        || lower.contains("json repair failed")
298        || lower.contains("invalid pattern")
299        || lower.contains("invalid line range")
300    {
301        RuntimeFailureClass::ToolArgMalformed
302    } else if lower.contains("action blocked:")
303        || lower.contains("access denied")
304        || lower.contains("declined by user")
305    {
306        RuntimeFailureClass::ToolPolicyBlocked
307    } else if lower.contains("too many consecutive tool errors")
308        || lower.contains("repeated tool failures")
309        || lower.contains("stuck in a loop")
310    {
311        RuntimeFailureClass::ToolLoop
312    } else if lower.contains("build failed")
313        || lower.contains("verification failed")
314        || lower.contains("verify_build")
315    {
316        RuntimeFailureClass::VerificationFailed
317    } else {
318        RuntimeFailureClass::Unknown
319    }
320}
321
322fn format_runtime_failure(class: RuntimeFailureClass, detail: &str) -> String {
323    format!(
324        "[failure:{}] {} Detail: {}",
325        class.tag(),
326        class.operator_guidance(),
327        detail.trim()
328    )
329}
330
331fn provider_state_for_runtime_failure(class: RuntimeFailureClass) -> Option<ProviderRuntimeState> {
332    match class {
333        RuntimeFailureClass::ContextWindow => Some(ProviderRuntimeState::ContextWindow),
334        RuntimeFailureClass::ProviderDegraded => Some(ProviderRuntimeState::Degraded),
335        RuntimeFailureClass::EmptyModelResponse => Some(ProviderRuntimeState::EmptyResponse),
336        _ => None,
337    }
338}
339
340fn checkpoint_state_for_runtime_failure(
341    class: RuntimeFailureClass,
342) -> Option<OperatorCheckpointState> {
343    match class {
344        RuntimeFailureClass::ContextWindow => Some(OperatorCheckpointState::BlockedContextWindow),
345        RuntimeFailureClass::ToolPolicyBlocked => Some(OperatorCheckpointState::BlockedPolicy),
346        RuntimeFailureClass::ToolLoop => Some(OperatorCheckpointState::BlockedToolLoop),
347        RuntimeFailureClass::VerificationFailed => {
348            Some(OperatorCheckpointState::BlockedVerification)
349        }
350        _ => None,
351    }
352}
353
354fn compact_runtime_recovery_summary(class: RuntimeFailureClass) -> &'static str {
355    match class {
356        RuntimeFailureClass::ProviderDegraded => {
357            "LM Studio degraded during the turn; retrying once before surfacing a failure."
358        }
359        RuntimeFailureClass::EmptyModelResponse => {
360            "The model returned an empty reply; retrying once before surfacing a failure."
361        }
362        _ => "Runtime recovery in progress.",
363    }
364}
365
366fn checkpoint_summary_for_runtime_failure(class: RuntimeFailureClass) -> &'static str {
367    match class {
368        RuntimeFailureClass::ContextWindow => "Provider context ceiling confirmed.",
369        RuntimeFailureClass::ToolPolicyBlocked => "Policy blocked the current action.",
370        RuntimeFailureClass::ToolLoop => "Repeated failing tool pattern stopped.",
371        RuntimeFailureClass::VerificationFailed => "Verification failed; fix before continuing.",
372        _ => "Operator checkpoint updated.",
373    }
374}
375
376fn compact_runtime_failure_summary(class: RuntimeFailureClass) -> &'static str {
377    match class {
378        RuntimeFailureClass::ContextWindow => "LM context ceiling hit.",
379        RuntimeFailureClass::ProviderDegraded => {
380            "LM Studio degraded and did not recover cleanly; operator action is now required."
381        }
382        RuntimeFailureClass::EmptyModelResponse => {
383            "LM Studio returned an empty reply after recovery; operator action is now required."
384        }
385        RuntimeFailureClass::ToolLoop => {
386            "Repeated failing tool pattern detected; Hematite stopped the loop."
387        }
388        _ => "Runtime failure surfaced to the operator.",
389    }
390}
391
392fn should_retry_runtime_failure(class: RuntimeFailureClass) -> bool {
393    matches!(
394        class,
395        RuntimeFailureClass::ProviderDegraded | RuntimeFailureClass::EmptyModelResponse
396    )
397}
398
399fn recovery_scenario_for_runtime_failure(class: RuntimeFailureClass) -> Option<RecoveryScenario> {
400    match class {
401        RuntimeFailureClass::ContextWindow => Some(RecoveryScenario::ContextWindow),
402        RuntimeFailureClass::ProviderDegraded => Some(RecoveryScenario::ProviderDegraded),
403        RuntimeFailureClass::EmptyModelResponse => Some(RecoveryScenario::EmptyModelResponse),
404        RuntimeFailureClass::ToolPolicyBlocked => Some(RecoveryScenario::McpWorkspaceReadBlocked),
405        RuntimeFailureClass::ToolLoop => Some(RecoveryScenario::ToolLoop),
406        RuntimeFailureClass::VerificationFailed => Some(RecoveryScenario::VerificationFailed),
407        RuntimeFailureClass::ToolArgMalformed | RuntimeFailureClass::Unknown => None,
408    }
409}
410
411fn compact_recovery_plan_summary(plan: &RecoveryPlan) -> String {
412    format!(
413        "{} [{}]",
414        plan.recipe.scenario.label(),
415        plan.recipe.steps_summary()
416    )
417}
418
419fn compact_recovery_decision_summary(decision: &RecoveryDecision) -> String {
420    match decision {
421        RecoveryDecision::Attempt(plan) => compact_recovery_plan_summary(plan),
422        RecoveryDecision::Escalate {
423            recipe,
424            attempts_made,
425            ..
426        } => format!(
427            "{} escalated after {} / {} [{}]",
428            recipe.scenario.label(),
429            attempts_made,
430            recipe.max_attempts.max(1),
431            recipe.steps_summary()
432        ),
433    }
434}
435
436/// Parse file paths from cargo/compiler error output.
437/// Handles lines like `  --> src/foo/bar.rs:34:12` and `error: could not compile`.
438fn parse_failing_paths_from_build_output(output: &str) -> Vec<String> {
439    let root = crate::tools::file_ops::workspace_root();
440    let mut paths: Vec<String> = output
441        .lines()
442        .filter_map(|line| {
443            let trimmed = line.trim_start();
444            // Cargo error location: "--> path/to/file.rs:line:col"
445            let after_arrow = trimmed.strip_prefix("--> ")?;
446            let file_part = after_arrow.split(':').next()?;
447            if file_part.is_empty() || file_part.starts_with('<') {
448                return None;
449            }
450            let p = std::path::Path::new(file_part);
451            let resolved = if p.is_absolute() {
452                p.to_path_buf()
453            } else {
454                root.join(p)
455            };
456            Some(resolved.to_string_lossy().replace('\\', "/").to_lowercase())
457        })
458        .collect();
459    paths.sort();
460    paths.dedup();
461    paths
462}
463
464fn build_mode_redirect_answer(mode: WorkflowMode) -> String {
465    match mode {
466        WorkflowMode::Ask => "Workflow mode ASK is read-only. I can inspect the code, explain what should change, or review the target area, but I will not modify files here. Switch to `/code` to implement the change, or `/auto` to let Hematite choose.".to_string(),
467        WorkflowMode::Architect => "Workflow mode ARCHITECT is plan-first. I can inspect the code and design the implementation approach, but I will not mutate files until you explicitly switch to `/code` or ask me to implement.".to_string(),
468        WorkflowMode::ReadOnly => "Workflow mode READ-ONLY is a hard no-mutation mode. I can analyze, inspect, and explain, but I will not edit files, run mutating shell commands, or commit changes. Switch to `/code` or `/auto` if you want implementation.".to_string(),
469        _ => "Switch to `/code` or `/auto` to allow implementation.".to_string(),
470    }
471}
472
473fn architect_handoff_contract() -> &'static str {
474    "ARCHITECT OUTPUT CONTRACT:\n\
475Use a compact implementation handoff, not a process narrative.\n\
476Do not say \"the first step\" or describe what you are about to do.\n\
477After one or two read-only inspection tools at most, stop and answer.\n\
478For runtime wiring, reset behavior, or control-flow questions, prefer `trace_runtime_flow` over a broad `map_project` scan.\n\
479If you do use `map_project`, keep it minimal and scoped.\n\
480Use these exact ASCII headings and keep each section short:\n\
481# Goal\n\
482# Target Files\n\
483# Ordered Steps\n\
484# Verification\n\
485# Risks\n\
486# Open Questions\n\
487Keep the whole handoff concise and implementation-oriented."
488}
489
490fn is_current_plan_execution_request(user_input: &str) -> bool {
491    let lower = user_input.trim().to_ascii_lowercase();
492    lower == "implement the current plan."
493        || lower == "implement the current plan"
494        || lower.contains("implement the current plan")
495}
496
497fn is_plan_scoped_tool(name: &str) -> bool {
498    crate::agent::inference::tool_metadata_for_name(name).plan_scope
499}
500
501fn is_current_plan_irrelevant_tool(name: &str) -> bool {
502    !crate::agent::inference::tool_metadata_for_name(name).plan_scope
503}
504
505fn is_non_mutating_plan_step_tool(name: &str) -> bool {
506    let metadata = crate::agent::inference::tool_metadata_for_name(name);
507    metadata.plan_scope && !metadata.mutates_workspace
508}
509
510fn parse_inline_workflow_prompt(user_input: &str) -> Option<(WorkflowMode, &str)> {
511    let trimmed = user_input.trim();
512    for (prefix, mode) in [
513        ("/ask", WorkflowMode::Ask),
514        ("/code", WorkflowMode::Code),
515        ("/architect", WorkflowMode::Architect),
516        ("/read-only", WorkflowMode::ReadOnly),
517        ("/auto", WorkflowMode::Auto),
518    ] {
519        if let Some(rest) = trimmed.strip_prefix(prefix) {
520            let rest = rest.trim();
521            if !rest.is_empty() {
522                return Some((mode, rest));
523            }
524        }
525    }
526    None
527}
528
529// Tool catalogue
530
531/// Returns the full set of tools exposed to the model.
532pub fn get_tools() -> Vec<ToolDefinition> {
533    crate::agent::tool_registry::get_tools()
534}
535
536pub struct ConversationManager {
537    /// Full conversation history in OpenAI format.
538    pub history: Vec<ChatMessage>,
539    pub engine: Arc<InferenceEngine>,
540    pub tools: Vec<ToolDefinition>,
541    pub mcp_manager: Arc<Mutex<crate::agent::mcp_manager::McpManager>>,
542    pub professional: bool,
543    pub brief: bool,
544    pub snark: u8,
545    pub chaos: u8,
546    /// Model to use for simple read-only tasks (optional, user-supplied via --fast-model).
547    pub fast_model: Option<String>,
548    /// Model to use for complex write/build tasks (optional, user-supplied via --think-model).
549    pub think_model: Option<String>,
550    /// Files where whitespace auto-correction fired this session.
551    pub correction_hints: Vec<String>,
552    /// Running background summary of pruned older messages.
553    pub running_summary: Option<String>,
554    /// Live hardware telemetry handle.
555    pub gpu_state: Arc<GpuState>,
556    /// Local RAG memory — FTS5-indexed project source.
557    pub vein: crate::memory::vein::Vein,
558    /// Append-only session transcript logger.
559    pub transcript: crate::agent::transcript::TranscriptLogger,
560    /// Thread-safe cancellation signal for the current agent turn.
561    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
562    /// Shared Git remote state (for persistent connectivity checks).
563    pub git_state: Arc<crate::agent::git_monitor::GitState>,
564    /// Reasoning think-mode override. None = let model decide. Some(true) = force /think.
565    /// Some(false) = force /no_think (fast mode, 3-5x quicker for simple tasks).
566    pub think_mode: Option<bool>,
567    workflow_mode: WorkflowMode,
568    /// Layer 6: Dynamic Task Context (extracted during compaction)
569    pub session_memory: crate::agent::compaction::SessionMemory,
570    pub swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
571    pub voice_manager: Arc<crate::ui::voice::VoiceManager>,
572    /// Personality description for the current Rusty soul — used in chat mode system prompt.
573    pub soul_personality: String,
574    pub lsp_manager: Arc<Mutex<crate::agent::lsp::manager::LspManager>>,
575    /// Active reasoning summary extracted from the previous model turn (Gemma-4 Native).
576    pub reasoning_history: Option<String>,
577    /// Layer 8: Active Reference Pinning (Context Locked)
578    pub pinned_files: Arc<Mutex<std::collections::HashMap<String, String>>>,
579    /// Hard action-grounding state for proof-before-action checks.
580    action_grounding: Arc<Mutex<ActionGroundingState>>,
581    /// True only during `/code Implement the current plan.` style execution turns.
582    plan_execution_active: Arc<std::sync::atomic::AtomicBool>,
583    /// Typed per-turn recovery attempt tracking.
584    recovery_context: RecoveryContext,
585    /// L1 context block — hot files summary injected into the system prompt.
586    /// Built once after vein init and updated as edits accumulate heat.
587    pub l1_context: Option<String>,
588}
589
590impl ConversationManager {
591    fn vein_docs_only_mode(&self) -> bool {
592        !crate::tools::file_ops::is_project_workspace()
593    }
594
595    fn refresh_vein_index(&mut self) -> usize {
596        let count = if self.vein_docs_only_mode() {
597            let root = crate::tools::file_ops::workspace_root();
598            tokio::task::block_in_place(|| self.vein.index_workspace_artifacts(&root))
599        } else {
600            tokio::task::block_in_place(|| self.vein.index_project())
601        };
602        self.l1_context = self.vein.l1_context();
603        count
604    }
605
606    fn build_vein_inspection_report(&self, indexed_this_pass: usize) -> String {
607        let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(8));
608        let workspace_mode = if self.vein_docs_only_mode() {
609            "docs-only (outside a project workspace)"
610        } else {
611            "project workspace"
612        };
613        let active_room = snapshot.active_room.as_deref().unwrap_or("none");
614        let mut out = format!(
615            "Vein Inspection\n\
616             Workspace mode: {workspace_mode}\n\
617             Indexed this pass: {indexed_this_pass}\n\
618             Indexed source files: {}\n\
619             Indexed docs: {}\n\
620             Indexed session exchanges: {}\n\
621             Embedded source/doc chunks: {}\n\
622             Embeddings available: {}\n\
623             Active room bias: {active_room}\n\
624             L1 hot-files block: {}\n",
625            snapshot.indexed_source_files,
626            snapshot.indexed_docs,
627            snapshot.indexed_session_exchanges,
628            snapshot.embedded_source_doc_chunks,
629            if snapshot.has_any_embeddings {
630                "yes"
631            } else {
632                "no"
633            },
634            if snapshot.l1_ready {
635                "ready"
636            } else {
637                "not built yet"
638            },
639        );
640
641        if snapshot.hot_files.is_empty() {
642            out.push_str("Hot files: none yet.\n");
643            return out;
644        }
645
646        out.push_str("\nHot files by room:\n");
647        let mut by_room: std::collections::BTreeMap<&str, Vec<&crate::memory::vein::VeinHotFile>> =
648            std::collections::BTreeMap::new();
649        for file in &snapshot.hot_files {
650            by_room.entry(file.room.as_str()).or_default().push(file);
651        }
652        for (room, files) in by_room {
653            out.push_str(&format!("[{}]\n", room));
654            for file in files {
655                out.push_str(&format!(
656                    "- {} [{} edit{}]\n",
657                    file.path,
658                    file.heat,
659                    if file.heat == 1 { "" } else { "s" }
660                ));
661            }
662        }
663
664        out
665    }
666
667    fn latest_user_prompt(&self) -> Option<&str> {
668        self.history
669            .iter()
670            .rev()
671            .find(|msg| msg.role == "user")
672            .map(|msg| msg.content.as_str())
673    }
674
675    async fn emit_direct_response(
676        &mut self,
677        tx: &mpsc::Sender<InferenceEvent>,
678        raw_user_input: &str,
679        effective_user_input: &str,
680        response: &str,
681    ) {
682        self.history.push(ChatMessage::user(effective_user_input));
683        self.history.push(ChatMessage::assistant_text(response));
684        self.transcript.log_user(raw_user_input);
685        self.transcript.log_agent(response);
686        for chunk in chunk_text(response, 8) {
687            if !chunk.is_empty() {
688                let _ = tx.send(InferenceEvent::Token(chunk)).await;
689            }
690        }
691        let _ = tx.send(InferenceEvent::Done).await;
692        self.trim_history(80);
693        self.refresh_session_memory();
694        self.save_session();
695    }
696
697    async fn emit_operator_checkpoint(
698        &mut self,
699        tx: &mpsc::Sender<InferenceEvent>,
700        state: OperatorCheckpointState,
701        summary: impl Into<String>,
702    ) {
703        let summary = summary.into();
704        self.session_memory
705            .record_checkpoint(state.label(), summary.clone());
706        let _ = tx
707            .send(InferenceEvent::OperatorCheckpoint { state, summary })
708            .await;
709    }
710
711    async fn emit_recovery_recipe_summary(
712        &mut self,
713        tx: &mpsc::Sender<InferenceEvent>,
714        state: impl Into<String>,
715        summary: impl Into<String>,
716    ) {
717        let state = state.into();
718        let summary = summary.into();
719        self.session_memory.record_recovery(state, summary.clone());
720        let _ = tx.send(InferenceEvent::RecoveryRecipe { summary }).await;
721    }
722
723    async fn emit_provider_live(&mut self, tx: &mpsc::Sender<InferenceEvent>) {
724        let _ = tx
725            .send(InferenceEvent::ProviderStatus {
726                state: ProviderRuntimeState::Live,
727                summary: String::new(),
728            })
729            .await;
730        self.emit_operator_checkpoint(tx, OperatorCheckpointState::Idle, "")
731            .await;
732    }
733
734    async fn emit_prompt_pressure_for_messages(
735        &self,
736        tx: &mpsc::Sender<InferenceEvent>,
737        messages: &[ChatMessage],
738    ) {
739        let context_length = self.engine.current_context_length();
740        let (estimated_input_tokens, reserved_output_tokens, estimated_total_tokens, percent) =
741            crate::agent::inference::estimate_prompt_pressure(
742                messages,
743                &self.tools,
744                context_length,
745            );
746        let _ = tx
747            .send(InferenceEvent::PromptPressure {
748                estimated_input_tokens,
749                reserved_output_tokens,
750                estimated_total_tokens,
751                context_length,
752                percent,
753            })
754            .await;
755    }
756
757    async fn emit_prompt_pressure_idle(&self, tx: &mpsc::Sender<InferenceEvent>) {
758        let context_length = self.engine.current_context_length();
759        let _ = tx
760            .send(InferenceEvent::PromptPressure {
761                estimated_input_tokens: 0,
762                reserved_output_tokens: 0,
763                estimated_total_tokens: 0,
764                context_length,
765                percent: 0,
766            })
767            .await;
768    }
769
770    async fn emit_compaction_pressure(&self, tx: &mpsc::Sender<InferenceEvent>) {
771        let context_length = self.engine.current_context_length();
772        let vram_ratio = self.gpu_state.ratio();
773        let config = CompactionConfig::adaptive(context_length, vram_ratio);
774        let estimated_tokens = compaction::estimate_compactable_tokens(&self.history);
775        let percent = if config.max_estimated_tokens == 0 {
776            0
777        } else {
778            ((estimated_tokens.saturating_mul(100)) / config.max_estimated_tokens).min(100) as u8
779        };
780
781        let _ = tx
782            .send(InferenceEvent::CompactionPressure {
783                estimated_tokens,
784                threshold_tokens: config.max_estimated_tokens,
785                percent,
786            })
787            .await;
788    }
789
790    async fn refresh_runtime_profile_and_report(
791        &mut self,
792        tx: &mpsc::Sender<InferenceEvent>,
793        reason: &str,
794    ) -> Option<(String, usize, bool)> {
795        let refreshed = self.engine.refresh_runtime_profile().await;
796        if let Some((model_id, context_length, changed)) = refreshed.as_ref() {
797            let _ = tx
798                .send(InferenceEvent::RuntimeProfile {
799                    model_id: model_id.clone(),
800                    context_length: *context_length,
801                })
802                .await;
803            self.transcript.log_system(&format!(
804                "Runtime profile refresh ({}): model={} ctx={} changed={}",
805                reason, model_id, context_length, changed
806            ));
807        }
808        refreshed
809    }
810
811    pub fn new(
812        engine: Arc<InferenceEngine>,
813        professional: bool,
814        brief: bool,
815        snark: u8,
816        chaos: u8,
817        soul_personality: String,
818        fast_model: Option<String>,
819        think_model: Option<String>,
820        gpu_state: Arc<GpuState>,
821        git_state: Arc<crate::agent::git_monitor::GitState>,
822        swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
823        voice_manager: Arc<crate::ui::voice::VoiceManager>,
824    ) -> Self {
825        let (saved_summary, saved_memory) = load_session_data();
826
827        // Build the initial mcp_manager
828        let mcp_manager = Arc::new(tokio::sync::Mutex::new(
829            crate::agent::mcp_manager::McpManager::new(),
830        ));
831
832        // Build the initial system prompt using the canonical InferenceEngine path.
833        let dynamic_instructions =
834            engine.build_system_prompt(snark, chaos, brief, professional, &[], None, &[]);
835
836        let history = vec![ChatMessage::system(&dynamic_instructions)];
837
838        let vein_path = crate::tools::file_ops::workspace_root()
839            .join(".hematite")
840            .join("vein.db");
841        let vein_base_url = engine.base_url.clone();
842        let vein = crate::memory::vein::Vein::new(&vein_path, vein_base_url.clone())
843            .unwrap_or_else(|_| crate::memory::vein::Vein::new(":memory:", vein_base_url).unwrap());
844
845        Self {
846            history,
847            engine,
848            tools: get_tools(),
849            mcp_manager,
850            professional,
851            brief,
852            snark,
853            chaos,
854            fast_model,
855            think_model,
856            correction_hints: Vec::new(),
857            running_summary: saved_summary,
858            gpu_state,
859            vein,
860            transcript: crate::agent::transcript::TranscriptLogger::new(),
861            cancel_token: Arc::new(std::sync::atomic::AtomicBool::new(false)),
862            git_state,
863            think_mode: None,
864            workflow_mode: WorkflowMode::Auto,
865            session_memory: saved_memory,
866            swarm_coordinator,
867            voice_manager,
868            soul_personality,
869            lsp_manager: Arc::new(Mutex::new(crate::agent::lsp::manager::LspManager::new(
870                crate::tools::file_ops::workspace_root(),
871            ))),
872            reasoning_history: None,
873            pinned_files: Arc::new(Mutex::new(std::collections::HashMap::new())),
874            action_grounding: Arc::new(Mutex::new(ActionGroundingState::default())),
875            plan_execution_active: Arc::new(std::sync::atomic::AtomicBool::new(false)),
876            recovery_context: RecoveryContext::default(),
877            l1_context: None,
878        }
879    }
880
881    /// Index the project into The Vein. Call once after construction.
882    /// Uses block_in_place so the tokio runtime thread isn't parked.
883    pub fn initialize_vein(&mut self) -> usize {
884        self.refresh_vein_index()
885    }
886
887    fn save_session(&self) {
888        let path = session_path();
889        if let Some(parent) = path.parent() {
890            let _ = std::fs::create_dir_all(parent);
891        }
892        let saved = SavedSession {
893            running_summary: self.running_summary.clone(),
894            session_memory: self.session_memory.clone(),
895        };
896        if let Ok(json) = serde_json::to_string(&saved) {
897            let _ = std::fs::write(&path, json);
898        }
899    }
900
901    fn save_empty_session(&self) {
902        let path = session_path();
903        if let Some(parent) = path.parent() {
904            let _ = std::fs::create_dir_all(parent);
905        }
906        let saved = SavedSession {
907            running_summary: None,
908            session_memory: crate::agent::compaction::SessionMemory::default(),
909        };
910        if let Ok(json) = serde_json::to_string(&saved) {
911            let _ = std::fs::write(&path, json);
912        }
913    }
914
915    fn refresh_session_memory(&mut self) {
916        let current_plan = self.session_memory.current_plan.clone();
917        let previous_memory = self.session_memory.clone();
918        self.session_memory = compaction::extract_memory(&self.history);
919        self.session_memory.current_plan = current_plan;
920        self.session_memory
921            .inherit_runtime_ledger_from(&previous_memory);
922    }
923
924    fn build_chat_system_prompt(&self) -> String {
925        let species = &self.engine.species;
926        let personality = &self.soul_personality;
927        format!(
928            "You are {species}, a local AI companion running entirely on the user's GPU — no cloud, no subscriptions, no phoning home.\n\
929             {personality}\n\n\
930             This is CHAT mode — a clean conversational surface. Behave like a sharp friend who happens to know everything about code, not like an agent following a workflow.\n\n\
931             Rules:\n\
932             - Talk like a person. Skip the bullet-point breakdowns unless the topic genuinely needs structure.\n\
933             - Answer directly. One paragraph is usually right.\n\
934             - Don't call tools unless the user explicitly asks you to look at a file or run something.\n\
935             - Don't narrate your reasoning or mention tool names unprompted.\n\
936             - You can discuss code, debug ideas, explain concepts, help plan, or just talk.\n\
937             - If the user clearly wants you to edit or build something, do it — but lead with conversation, not scaffolding.\n\
938             - If the user wants the full coding harness, they can type `/agent`.\n",
939        )
940    }
941
942    fn append_session_handoff(&self, system_msg: &mut String) {
943        let has_summary = self
944            .running_summary
945            .as_ref()
946            .map(|s| !s.trim().is_empty())
947            .unwrap_or(false);
948        let has_memory = self.session_memory.has_signal();
949
950        if !has_summary && !has_memory {
951            return;
952        }
953
954        system_msg.push_str(
955            "\n\n# LIGHTWEIGHT SESSION HANDOFF\n\
956             This is compact carry-over from earlier work on this machine.\n\
957             Use it only when it helps the current request.\n\
958             Prefer current repository state, pinned files, and fresh tool results over stale session memory.\n",
959        );
960
961        if has_memory {
962            system_msg.push_str("\n## Active Task Memory\n");
963            system_msg.push_str(&self.session_memory.to_prompt());
964        }
965
966        if let Some(summary) = self.running_summary.as_deref() {
967            if !summary.trim().is_empty() {
968                system_msg.push_str("\n## Compacted Session Summary\n");
969                system_msg.push_str(summary);
970                system_msg.push('\n');
971            }
972        }
973    }
974
975    fn set_workflow_mode(&mut self, mode: WorkflowMode) {
976        self.workflow_mode = mode;
977    }
978
979    fn current_plan_summary(&self) -> Option<String> {
980        self.session_memory
981            .current_plan
982            .as_ref()
983            .filter(|plan| plan.has_signal())
984            .map(|plan| plan.summary_line())
985    }
986
987    fn current_plan_allowed_paths(&self) -> Vec<String> {
988        self.session_memory
989            .current_plan
990            .as_ref()
991            .map(|plan| {
992                plan.target_files
993                    .iter()
994                    .map(|path| normalize_workspace_path(path))
995                    .collect()
996            })
997            .unwrap_or_default()
998    }
999
1000    fn persist_architect_handoff(&mut self, response: &str) {
1001        if self.workflow_mode != WorkflowMode::Architect {
1002            return;
1003        }
1004        let Some(plan) = crate::tools::plan::parse_plan_handoff(response) else {
1005            return;
1006        };
1007        let _ = crate::tools::plan::save_plan_handoff(&plan);
1008        self.session_memory.current_plan = Some(plan);
1009    }
1010
1011    async fn begin_grounded_turn(&self) -> u64 {
1012        let mut state = self.action_grounding.lock().await;
1013        state.turn_index += 1;
1014        state.turn_index
1015    }
1016
1017    async fn reset_action_grounding(&self) {
1018        let mut state = self.action_grounding.lock().await;
1019        *state = ActionGroundingState::default();
1020    }
1021
1022    async fn record_read_observation(&self, path: &str) {
1023        let normalized = normalize_workspace_path(path);
1024        let mut state = self.action_grounding.lock().await;
1025        let turn = state.turn_index;
1026        // read_file returns full file content with line numbers — sufficient for
1027        // the model to know exact text before editing, so it satisfies the
1028        // line-inspection grounding check too.
1029        state.observed_paths.insert(normalized.clone(), turn);
1030        state.inspected_paths.insert(normalized, turn);
1031    }
1032
1033    async fn record_line_inspection(&self, path: &str) {
1034        let normalized = normalize_workspace_path(path);
1035        let mut state = self.action_grounding.lock().await;
1036        let turn = state.turn_index;
1037        state.observed_paths.insert(normalized.clone(), turn);
1038        state.inspected_paths.insert(normalized, turn);
1039    }
1040
1041    async fn record_verify_build_result(&self, ok: bool, output: &str) {
1042        let mut state = self.action_grounding.lock().await;
1043        let turn = state.turn_index;
1044        state.last_verify_build_turn = Some(turn);
1045        state.last_verify_build_ok = ok;
1046        if ok {
1047            state.code_changed_since_verify = false;
1048            state.last_failed_build_paths.clear();
1049        } else {
1050            state.last_failed_build_paths = parse_failing_paths_from_build_output(output);
1051        }
1052    }
1053
1054    fn record_session_verification(&mut self, ok: bool, summary: impl Into<String>) {
1055        self.session_memory.record_verification(ok, summary);
1056    }
1057
1058    async fn record_successful_mutation(&self, path: Option<&str>) {
1059        let mut state = self.action_grounding.lock().await;
1060        state.code_changed_since_verify = match path {
1061            Some(p) => is_code_like_path(p),
1062            None => true,
1063        };
1064    }
1065
1066    async fn validate_action_preconditions(&self, name: &str, args: &Value) -> Result<(), String> {
1067        if self
1068            .plan_execution_active
1069            .load(std::sync::atomic::Ordering::SeqCst)
1070        {
1071            if is_current_plan_irrelevant_tool(name) {
1072                return Err(format!(
1073                    "Action blocked: `{}` is not part of current-plan execution. Stay on the saved target files, use built-in workspace file tools only, and either make a concrete edit or surface one specific blocker.",
1074                    name
1075                ));
1076            }
1077
1078            if name == "map_project" {
1079                return Err(
1080                    "Action blocked: `map_project` is too broad for current-plan execution. Use the target files from the saved plan and inspect them directly with built-in workspace tools."
1081                        .to_string(),
1082                );
1083            }
1084
1085            if is_plan_scoped_tool(name) {
1086                let allowed_paths = self.current_plan_allowed_paths();
1087                if !allowed_paths.is_empty() {
1088                    let in_allowed = match name {
1089                        "auto_pin_context" => args
1090                            .get("paths")
1091                            .and_then(|v| v.as_array())
1092                            .map(|paths| {
1093                                !paths.is_empty()
1094                                    && paths.iter().all(|v| {
1095                                        v.as_str()
1096                                            .map(normalize_workspace_path)
1097                                            .map(|p| allowed_paths.contains(&p))
1098                                            .unwrap_or(false)
1099                                    })
1100                            })
1101                            .unwrap_or(false),
1102                        "grep_files" | "list_files" => args
1103                            .get("path")
1104                            .and_then(|v| v.as_str())
1105                            .map(normalize_workspace_path)
1106                            .map(|p| allowed_paths.contains(&p))
1107                            .unwrap_or(false),
1108                        _ => action_target_path(name, args)
1109                            .map(|p| allowed_paths.contains(&p))
1110                            .unwrap_or(false),
1111                    };
1112
1113                    if !in_allowed {
1114                        let allowed = allowed_paths
1115                            .iter()
1116                            .map(|p| format!("`{}`", p))
1117                            .collect::<Vec<_>>()
1118                            .join(", ");
1119                        return Err(format!(
1120                            "Action blocked: current-plan execution is locked to the saved target files. Use a path-scoped built-in tool on one of these files only: {}.",
1121                            allowed
1122                        ));
1123                    }
1124                }
1125            }
1126
1127            if matches!(name, "edit_file" | "multi_search_replace" | "patch_hunk") {
1128                if let Some(target) = action_target_path(name, args) {
1129                    let state = self.action_grounding.lock().await;
1130                    let recently_inspected = state
1131                        .inspected_paths
1132                        .get(&target)
1133                        .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1134                        .unwrap_or(false);
1135                    drop(state);
1136                    if !recently_inspected {
1137                        return Err(format!(
1138                            "Action blocked: `{}` on '{}' requires an exact local line window first during current-plan execution. Use `inspect_lines` on that file around the intended edit region, then retry the mutation.",
1139                            name, target
1140                        ));
1141                    }
1142                }
1143            }
1144        }
1145
1146        if self.workflow_mode.is_read_only() && name == "auto_pin_context" {
1147            return Err(
1148                "Action blocked: `auto_pin_context` is disabled in read-only workflows. Use the grounded file evidence you already have, or narrow with `inspect_lines` instead of pinning more files into active context."
1149                    .to_string(),
1150            );
1151        }
1152
1153        if self.workflow_mode.is_read_only() && is_destructive_tool(name) {
1154            if name == "shell" {
1155                let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1156                let risk = crate::tools::guard::classify_bash_risk(command);
1157                if !matches!(risk, crate::tools::RiskLevel::Safe) {
1158                    return Err(format!(
1159                        "Action blocked: workflow mode `{}` is read-only for risky or mutating operations. Switch to `/code` or `/auto` before making changes.",
1160                        self.workflow_mode.label()
1161                    ));
1162                }
1163            } else {
1164                return Err(format!(
1165                    "Action blocked: workflow mode `{}` is read-only. Use `/code` to implement changes or `/auto` to leave mode selection to Hematite.",
1166                    self.workflow_mode.label()
1167                ));
1168            }
1169        }
1170
1171        let normalized_target = action_target_path(name, args);
1172        if let Some(target) = normalized_target.as_deref() {
1173            if matches!(
1174                name,
1175                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1176            ) {
1177                if let Some(prompt) = self.latest_user_prompt() {
1178                    if docs_edit_without_explicit_request(prompt, target) {
1179                        return Err(format!(
1180                            "Action blocked: '{}' is a docs file but the current request did not explicitly ask for documentation changes. Finish the code task first. If docs need updating, the user will ask.",
1181                            target
1182                        ));
1183                    }
1184                }
1185            }
1186            let path_exists = std::path::Path::new(target).exists();
1187            if path_exists {
1188                let state = self.action_grounding.lock().await;
1189                let pinned = self.pinned_files.lock().await;
1190                let pinned_match = pinned.keys().any(|p| normalize_workspace_path(p) == target);
1191                drop(pinned);
1192
1193                // edit_file and multi_search_replace match text exactly, so they need a
1194                // tighter evidence bar than a plain read. Require inspect_lines on the
1195                // target within the last 3 turns. A read_file in the *same* turn is also
1196                // accepted (the model just loaded the file and is making an immediate edit).
1197                let needs_exact_window = matches!(name, "edit_file" | "multi_search_replace");
1198                let recently_inspected = state
1199                    .inspected_paths
1200                    .get(target)
1201                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1202                    .unwrap_or(false);
1203                let same_turn_read = state
1204                    .observed_paths
1205                    .get(target)
1206                    .map(|turn| state.turn_index.saturating_sub(*turn) == 0)
1207                    .unwrap_or(false);
1208                let recent_observed = state
1209                    .observed_paths
1210                    .get(target)
1211                    .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1212                    .unwrap_or(false);
1213
1214                if needs_exact_window {
1215                    if !recently_inspected && !same_turn_read && !pinned_match {
1216                        return Err(format!(
1217                            "Action blocked: `{}` on '{}' requires a line-level inspection first. \
1218                             Use `inspect_lines` on the target region to get the exact current text \
1219                             (whitespace and indentation included), then retry the edit.",
1220                            name, target
1221                        ));
1222                    }
1223                } else if !recent_observed && !pinned_match {
1224                    return Err(format!(
1225                        "Action blocked: `{}` on '{}' requires recent file evidence. Use `read_file` or `inspect_lines` on that path first, or pin the file into active context.",
1226                        name, target
1227                    ));
1228                }
1229            }
1230        }
1231
1232        if is_mcp_mutating_tool(name) {
1233            return Err(format!(
1234                "Action blocked: `{}` is an external MCP mutation tool. For workspace file edits, prefer Hematite's built-in edit path (`read_file`/`inspect_lines` plus `patch_hunk`, `edit_file`, or `multi_search_replace`) unless the user explicitly requires MCP for that action.",
1235                name
1236            ));
1237        }
1238
1239        if is_mcp_workspace_read_tool(name) {
1240            return Err(format!(
1241                "Action blocked: `{}` is an external MCP filesystem read tool. For local workspace inspection, prefer Hematite's built-in read path (`read_file`, `inspect_lines`, `list_files`, or `grep_files`) unless the user explicitly requires MCP for that action.",
1242                name
1243            ));
1244        }
1245
1246        // Phase gate: if the build is broken, constrain edits to files that cargo flagged.
1247        // This prevents the model from wandering to unrelated files after a failed verify.
1248        if matches!(
1249            name,
1250            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1251        ) {
1252            if let Some(target) = normalized_target.as_deref() {
1253                let state = self.action_grounding.lock().await;
1254                if state.code_changed_since_verify
1255                    && !state.last_verify_build_ok
1256                    && !state.last_failed_build_paths.is_empty()
1257                    && !state.last_failed_build_paths.iter().any(|p| p == target)
1258                {
1259                    let files = state
1260                        .last_failed_build_paths
1261                        .iter()
1262                        .map(|p| format!("`{}`", p))
1263                        .collect::<Vec<_>>()
1264                        .join(", ");
1265                    return Err(format!(
1266                        "Action blocked: the build is broken. Fix the errors in {} before editing other files. Run `verify_build` to confirm the fix, then continue.",
1267                        files
1268                    ));
1269                }
1270            }
1271        }
1272
1273        if name == "git_commit" || name == "git_push" {
1274            let state = self.action_grounding.lock().await;
1275            if state.code_changed_since_verify && !state.last_verify_build_ok {
1276                return Err(format!(
1277                    "Action blocked: `{}` requires a successful `verify_build` after the latest code edits. Run verification first so Hematite has proof that the tree is build-clean.",
1278                    name
1279                ));
1280            }
1281        }
1282
1283        if name == "shell" {
1284            let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1285            if let Some(prompt) = self.latest_user_prompt() {
1286                if let Some(topic) = preferred_host_inspection_topic(prompt) {
1287                    if shell_looks_like_structured_host_inspection(command) {
1288                        return Err(format!(
1289                            "Action blocked: this is a host-inspection question. Prefer `inspect_host(topic: \"{}\")` instead of raw `shell` for PATH, toolchains, desktop, Downloads, listening ports, repo-doctor checks, or directory/disk summaries. Use `shell` only if `inspect_host` cannot answer the question directly.",
1290                            topic
1291                        ));
1292                    }
1293                }
1294            }
1295            let reason = args
1296                .get("reason")
1297                .and_then(|v| v.as_str())
1298                .unwrap_or("")
1299                .trim();
1300            let risk = crate::tools::guard::classify_bash_risk(command);
1301            if !matches!(risk, crate::tools::RiskLevel::Safe) && reason.is_empty() {
1302                return Err(
1303                    "Action blocked: risky `shell` calls require a concrete `reason` argument that explains what is being verified or changed."
1304                        .to_string(),
1305                );
1306            }
1307        }
1308
1309        Ok(())
1310    }
1311
1312    fn build_action_receipt(
1313        &self,
1314        name: &str,
1315        args: &Value,
1316        output: &str,
1317        is_error: bool,
1318    ) -> Option<ChatMessage> {
1319        if is_error || !is_destructive_tool(name) {
1320            return None;
1321        }
1322
1323        let mut receipt = String::from("[ACTION RECEIPT]\n");
1324        receipt.push_str(&format!("- tool: {}\n", name));
1325        if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
1326            receipt.push_str(&format!("- target: {}\n", path));
1327        }
1328        if name == "shell" {
1329            if let Some(command) = args.get("command").and_then(|v| v.as_str()) {
1330                receipt.push_str(&format!("- command: {}\n", command));
1331            }
1332            if let Some(reason) = args.get("reason").and_then(|v| v.as_str()) {
1333                if !reason.trim().is_empty() {
1334                    receipt.push_str(&format!("- reason: {}\n", reason.trim()));
1335                }
1336            }
1337        }
1338        let first_line = output.lines().next().unwrap_or(output).trim();
1339        receipt.push_str(&format!("- outcome: {}\n", first_line));
1340        Some(ChatMessage::system(&receipt))
1341    }
1342
1343    fn replace_mcp_tool_definitions(&mut self, mcp_tools: &[crate::agent::mcp::McpTool]) {
1344        self.tools
1345            .retain(|tool| !tool.function.name.starts_with("mcp__"));
1346        self.tools
1347            .extend(mcp_tools.iter().map(|tool| ToolDefinition {
1348                tool_type: "function".into(),
1349                function: ToolFunction {
1350                    name: tool.name.clone(),
1351                    description: tool.description.clone().unwrap_or_default(),
1352                    parameters: tool.input_schema.clone(),
1353                },
1354                metadata: crate::agent::inference::tool_metadata_for_name(&tool.name),
1355            }));
1356    }
1357
1358    async fn emit_mcp_runtime_status(&self, tx: &mpsc::Sender<InferenceEvent>) {
1359        let summary = {
1360            let mcp = self.mcp_manager.lock().await;
1361            mcp.runtime_report()
1362        };
1363        let _ = tx
1364            .send(InferenceEvent::McpStatus {
1365                state: summary.state,
1366                summary: summary.summary,
1367            })
1368            .await;
1369    }
1370
1371    async fn refresh_mcp_tools(
1372        &mut self,
1373        tx: &mpsc::Sender<InferenceEvent>,
1374    ) -> Result<Vec<crate::agent::mcp::McpTool>, Box<dyn std::error::Error + Send + Sync>> {
1375        let mcp_tools = {
1376            let mut mcp = self.mcp_manager.lock().await;
1377            match mcp.initialize_all().await {
1378                Ok(()) => mcp.discover_tools().await,
1379                Err(e) => {
1380                    drop(mcp);
1381                    self.replace_mcp_tool_definitions(&[]);
1382                    self.emit_mcp_runtime_status(tx).await;
1383                    return Err(e.into());
1384                }
1385            }
1386        };
1387
1388        self.replace_mcp_tool_definitions(&mcp_tools);
1389        self.emit_mcp_runtime_status(tx).await;
1390        Ok(mcp_tools)
1391    }
1392
1393    /// Spawns and initializes all configured MCP servers, discovering their tools.
1394    pub async fn initialize_mcp(
1395        &mut self,
1396        tx: &mpsc::Sender<InferenceEvent>,
1397    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1398        let _ = self.refresh_mcp_tools(tx).await?;
1399        Ok(())
1400    }
1401
1402    /// Run one user turn through the full agentic loop.
1403    ///
1404    /// Adds the user message, calls the model, executes any tools, and loops
1405    /// until the model produces a final text reply.  All progress is streamed
1406    /// as `InferenceEvent` values via `tx`.
1407    pub async fn run_turn(
1408        &mut self,
1409        user_turn: &UserTurn,
1410        tx: mpsc::Sender<InferenceEvent>,
1411        yolo: bool,
1412    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1413        let user_input = user_turn.text.as_str();
1414        // ── Fast-path reset commands: handled locally, no network I/O needed ──
1415        if user_input.trim() == "/new" {
1416            self.history.clear();
1417            self.reasoning_history = None;
1418            self.session_memory.clear();
1419            self.running_summary = None;
1420            self.correction_hints.clear();
1421            self.pinned_files.lock().await.clear();
1422            self.reset_action_grounding().await;
1423            reset_task_files();
1424            let _ = std::fs::remove_file(session_path());
1425            self.save_empty_session();
1426            self.emit_compaction_pressure(&tx).await;
1427            self.emit_prompt_pressure_idle(&tx).await;
1428            for chunk in chunk_text(
1429                "Fresh task context started. Chat history, pins, and task files cleared. Saved memory remains available.",
1430                8,
1431            ) {
1432                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1433            }
1434            let _ = tx.send(InferenceEvent::Done).await;
1435            return Ok(());
1436        }
1437
1438        if user_input.trim() == "/forget" {
1439            self.history.clear();
1440            self.reasoning_history = None;
1441            self.session_memory.clear();
1442            self.running_summary = None;
1443            self.correction_hints.clear();
1444            self.pinned_files.lock().await.clear();
1445            self.reset_action_grounding().await;
1446            reset_task_files();
1447            purge_persistent_memory();
1448            tokio::task::block_in_place(|| self.vein.reset());
1449            let _ = std::fs::remove_file(session_path());
1450            self.save_empty_session();
1451            self.emit_compaction_pressure(&tx).await;
1452            self.emit_prompt_pressure_idle(&tx).await;
1453            for chunk in chunk_text(
1454                "Hard forget complete. Chat history, saved memory, task files, and the Vein index were purged.",
1455                8,
1456            ) {
1457                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1458            }
1459            let _ = tx.send(InferenceEvent::Done).await;
1460            return Ok(());
1461        }
1462
1463        if user_input.trim() == "/vein-inspect" {
1464            let indexed = self.refresh_vein_index();
1465            let report = self.build_vein_inspection_report(indexed);
1466            let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(1));
1467            let _ = tx
1468                .send(InferenceEvent::VeinStatus {
1469                    file_count: snapshot.indexed_source_files + snapshot.indexed_docs,
1470                    embedded_count: snapshot.embedded_source_doc_chunks,
1471                    docs_only: self.vein_docs_only_mode(),
1472                })
1473                .await;
1474            for chunk in chunk_text(&report, 8) {
1475                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1476            }
1477            let _ = tx.send(InferenceEvent::Done).await;
1478            return Ok(());
1479        }
1480
1481        if user_input.trim() == "/workspace-profile" {
1482            let root = crate::tools::file_ops::workspace_root();
1483            let _ = crate::agent::workspace_profile::ensure_workspace_profile(&root);
1484            let report = crate::agent::workspace_profile::profile_report(&root);
1485            for chunk in chunk_text(&report, 8) {
1486                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1487            }
1488            let _ = tx.send(InferenceEvent::Done).await;
1489            return Ok(());
1490        }
1491
1492        if user_input.trim() == "/vein-reset" {
1493            tokio::task::block_in_place(|| self.vein.reset());
1494            let _ = tx
1495                .send(InferenceEvent::VeinStatus {
1496                    file_count: 0,
1497                    embedded_count: 0,
1498                    docs_only: self.vein_docs_only_mode(),
1499                })
1500                .await;
1501            for chunk in chunk_text("Vein index cleared. Will rebuild on the next turn.", 8) {
1502                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1503            }
1504            let _ = tx.send(InferenceEvent::Done).await;
1505            return Ok(());
1506        }
1507
1508        // Reload config every turn (edits apply immediately, no restart needed).
1509        let config = crate::agent::config::load_config();
1510        self.recovery_context.clear();
1511        let manual_runtime_refresh = user_input.trim() == "/runtime-refresh";
1512        if !manual_runtime_refresh {
1513            if let Some((model_id, context_length, changed)) = self
1514                .refresh_runtime_profile_and_report(&tx, "turn_start")
1515                .await
1516            {
1517                if changed {
1518                    let _ = tx
1519                        .send(InferenceEvent::Thought(format!(
1520                            "Runtime refresh: using model `{}` with CTX {} for this turn.",
1521                            model_id, context_length
1522                        )))
1523                        .await;
1524                }
1525            }
1526        }
1527        self.emit_compaction_pressure(&tx).await;
1528        let current_model = self.engine.current_model();
1529        self.engine.set_gemma_native_formatting(
1530            crate::agent::config::effective_gemma_native_formatting(&config, &current_model),
1531        );
1532        let _turn_id = self.begin_grounded_turn().await;
1533        let _hook_runner = crate::agent::hooks::HookRunner::new(config.hooks.clone());
1534        let mcp_tools = match self.refresh_mcp_tools(&tx).await {
1535            Ok(tools) => tools,
1536            Err(e) => {
1537                let _ = tx
1538                    .send(InferenceEvent::Error(format!("MCP refresh failed: {}", e)))
1539                    .await;
1540                Vec::new()
1541            }
1542        };
1543
1544        // Apply config model overrides (config takes precedence over CLI flags).
1545        let effective_fast = config
1546            .fast_model
1547            .clone()
1548            .or_else(|| self.fast_model.clone());
1549        let effective_think = config
1550            .think_model
1551            .clone()
1552            .or_else(|| self.think_model.clone());
1553
1554        // ── /lsp: start language servers manually if needed ──────────────────
1555        if user_input.trim() == "/lsp" {
1556            let mut lsp = self.lsp_manager.lock().await;
1557            match lsp.start_servers().await {
1558                Ok(_) => {
1559                    let _ = tx
1560                        .send(InferenceEvent::MutedToken(
1561                            "LSP: Servers Initialized OK.".to_string(),
1562                        ))
1563                        .await;
1564                }
1565                Err(e) => {
1566                    let _ = tx
1567                        .send(InferenceEvent::Error(format!(
1568                            "LSP: Failed to start servers - {}",
1569                            e
1570                        )))
1571                        .await;
1572                }
1573            }
1574            let _ = tx.send(InferenceEvent::Done).await;
1575            return Ok(());
1576        }
1577
1578        if user_input.trim() == "/runtime-refresh" {
1579            match self
1580                .refresh_runtime_profile_and_report(&tx, "manual_command")
1581                .await
1582            {
1583                Some((model_id, context_length, changed)) => {
1584                    let msg = if changed {
1585                        format!(
1586                            "Runtime profile refreshed. Model: {} | CTX: {}",
1587                            model_id, context_length
1588                        )
1589                    } else {
1590                        format!(
1591                            "Runtime profile unchanged. Model: {} | CTX: {}",
1592                            model_id, context_length
1593                        )
1594                    };
1595                    for chunk in chunk_text(&msg, 8) {
1596                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
1597                    }
1598                }
1599                None => {
1600                    let _ = tx
1601                        .send(InferenceEvent::Error(
1602                            "Runtime refresh failed: LM Studio profile could not be read."
1603                                .to_string(),
1604                        ))
1605                        .await;
1606                }
1607            }
1608            let _ = tx.send(InferenceEvent::Done).await;
1609            return Ok(());
1610        }
1611
1612        if user_input.trim() == "/ask" {
1613            self.set_workflow_mode(WorkflowMode::Ask);
1614            for chunk in chunk_text(
1615                "Workflow mode: ASK. Stay read-only, explain, inspect, and answer without making changes.",
1616                8,
1617            ) {
1618                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1619            }
1620            let _ = tx.send(InferenceEvent::Done).await;
1621            return Ok(());
1622        }
1623
1624        if user_input.trim() == "/code" {
1625            self.set_workflow_mode(WorkflowMode::Code);
1626            let mut message =
1627                "Workflow mode: CODE. Make changes when needed, but keep proof-before-action and verification discipline.".to_string();
1628            if let Some(plan) = self.current_plan_summary() {
1629                message.push_str(&format!(" Current plan: {plan}."));
1630            }
1631            for chunk in chunk_text(&message, 8) {
1632                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1633            }
1634            let _ = tx.send(InferenceEvent::Done).await;
1635            return Ok(());
1636        }
1637
1638        if user_input.trim() == "/architect" {
1639            self.set_workflow_mode(WorkflowMode::Architect);
1640            let mut message =
1641                "Workflow mode: ARCHITECT. Plan, inspect, and shape the approach first. Do not mutate code unless the user explicitly asks to implement.".to_string();
1642            if let Some(plan) = self.current_plan_summary() {
1643                message.push_str(&format!(" Existing plan: {plan}."));
1644            }
1645            for chunk in chunk_text(&message, 8) {
1646                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1647            }
1648            let _ = tx.send(InferenceEvent::Done).await;
1649            return Ok(());
1650        }
1651
1652        if user_input.trim() == "/read-only" {
1653            self.set_workflow_mode(WorkflowMode::ReadOnly);
1654            for chunk in chunk_text(
1655                "Workflow mode: READ-ONLY. Analysis only. Do not modify files, run mutating shell commands, or commit changes.",
1656                8,
1657            ) {
1658                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1659            }
1660            let _ = tx.send(InferenceEvent::Done).await;
1661            return Ok(());
1662        }
1663
1664        if user_input.trim() == "/auto" {
1665            self.set_workflow_mode(WorkflowMode::Auto);
1666            for chunk in chunk_text(
1667                "Workflow mode: AUTO. Hematite will choose the narrowest effective path for the request.",
1668                8,
1669            ) {
1670                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1671            }
1672            let _ = tx.send(InferenceEvent::Done).await;
1673            return Ok(());
1674        }
1675
1676        if user_input.trim() == "/chat" {
1677            self.set_workflow_mode(WorkflowMode::Chat);
1678            let _ = tx.send(InferenceEvent::Done).await;
1679            return Ok(());
1680        }
1681
1682        if user_input.trim() == "/reroll" {
1683            let soul = crate::ui::hatch::generate_soul_random();
1684            self.snark = soul.snark;
1685            self.chaos = soul.chaos;
1686            self.soul_personality = soul.personality.clone();
1687            // Update the engine's species name so build_chat_system_prompt uses it
1688            // SAFETY: engine is Arc but species is a plain String field we own logically.
1689            // We use Arc::get_mut which only succeeds if this is the only strong ref.
1690            // If it fails (swarm workers hold refs), we fall back to a best-effort clone approach.
1691            let species = soul.species.clone();
1692            if let Some(eng) = Arc::get_mut(&mut self.engine) {
1693                eng.species = species.clone();
1694            }
1695            let shiny_tag = if soul.shiny { " 🌟 SHINY" } else { "" };
1696            let _ = tx
1697                .send(InferenceEvent::SoulReroll {
1698                    species: soul.species.clone(),
1699                    rarity: soul.rarity.label().to_string(),
1700                    shiny: soul.shiny,
1701                    personality: soul.personality.clone(),
1702                })
1703                .await;
1704            for chunk in chunk_text(
1705                &format!(
1706                    "A new companion awakens!\n[{}{}] {} — \"{}\"",
1707                    soul.rarity.label(),
1708                    shiny_tag,
1709                    soul.species,
1710                    soul.personality
1711                ),
1712                8,
1713            ) {
1714                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1715            }
1716            let _ = tx.send(InferenceEvent::Done).await;
1717            return Ok(());
1718        }
1719
1720        if user_input.trim() == "/agent" {
1721            self.set_workflow_mode(WorkflowMode::Auto);
1722            let _ = tx.send(InferenceEvent::Done).await;
1723            return Ok(());
1724        }
1725
1726        let mut effective_user_input = user_input.trim().to_string();
1727        if let Some((mode, rest)) = parse_inline_workflow_prompt(user_input) {
1728            self.set_workflow_mode(mode);
1729            effective_user_input = rest.to_string();
1730        }
1731        let transcript_user_input = transcript_user_turn_text(user_turn, &effective_user_input);
1732        effective_user_input = apply_turn_attachments(user_turn, &effective_user_input);
1733        let implement_current_plan = self.workflow_mode == WorkflowMode::Code
1734            && is_current_plan_execution_request(&effective_user_input)
1735            && self
1736                .session_memory
1737                .current_plan
1738                .as_ref()
1739                .map(|plan| plan.has_signal())
1740                .unwrap_or(false);
1741        self.plan_execution_active
1742            .store(implement_current_plan, std::sync::atomic::Ordering::SeqCst);
1743        let _plan_execution_guard = PlanExecutionGuard {
1744            flag: self.plan_execution_active.clone(),
1745        };
1746        let intent = classify_query_intent(self.workflow_mode, &effective_user_input);
1747
1748        // ── /think / /no_think: reasoning budget toggle ──────────────────────
1749        if let Some(answer_kind) = intent.direct_answer {
1750            match answer_kind {
1751                DirectAnswerKind::LanguageCapability => {
1752                    let response = build_language_capability_answer();
1753                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1754                        .await;
1755                    return Ok(());
1756                }
1757                DirectAnswerKind::UnsafeWorkflowPressure => {
1758                    let response = build_unsafe_workflow_pressure_answer();
1759                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1760                        .await;
1761                    return Ok(());
1762                }
1763                DirectAnswerKind::SessionMemory => {
1764                    let response = build_session_memory_answer();
1765                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1766                        .await;
1767                    return Ok(());
1768                }
1769                DirectAnswerKind::RecoveryRecipes => {
1770                    let response = build_recovery_recipes_answer();
1771                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1772                        .await;
1773                    return Ok(());
1774                }
1775                DirectAnswerKind::McpLifecycle => {
1776                    let response = build_mcp_lifecycle_answer();
1777                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1778                        .await;
1779                    return Ok(());
1780                }
1781                DirectAnswerKind::AuthorizationPolicy => {
1782                    let response = build_authorization_policy_answer();
1783                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1784                        .await;
1785                    return Ok(());
1786                }
1787                DirectAnswerKind::ToolClasses => {
1788                    let response = build_tool_classes_answer();
1789                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1790                        .await;
1791                    return Ok(());
1792                }
1793                DirectAnswerKind::ToolRegistryOwnership => {
1794                    let response = build_tool_registry_ownership_answer();
1795                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1796                        .await;
1797                    return Ok(());
1798                }
1799                DirectAnswerKind::SessionResetSemantics => {
1800                    let response = build_session_reset_semantics_answer();
1801                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1802                        .await;
1803                    return Ok(());
1804                }
1805                DirectAnswerKind::ProductSurface => {
1806                    let response = build_product_surface_answer();
1807                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1808                        .await;
1809                    return Ok(());
1810                }
1811                DirectAnswerKind::ReasoningSplit => {
1812                    let response = build_reasoning_split_answer();
1813                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1814                        .await;
1815                    return Ok(());
1816                }
1817                DirectAnswerKind::Identity => {
1818                    let response = build_identity_answer();
1819                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1820                        .await;
1821                    return Ok(());
1822                }
1823                DirectAnswerKind::WorkflowModes => {
1824                    let response = build_workflow_modes_answer();
1825                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1826                        .await;
1827                    return Ok(());
1828                }
1829                DirectAnswerKind::GemmaNative => {
1830                    let response = build_gemma_native_answer();
1831                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1832                        .await;
1833                    return Ok(());
1834                }
1835                DirectAnswerKind::GemmaNativeSettings => {
1836                    let response = build_gemma_native_settings_answer();
1837                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1838                        .await;
1839                    return Ok(());
1840                }
1841                DirectAnswerKind::VerifyProfiles => {
1842                    let response = build_verify_profiles_answer();
1843                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1844                        .await;
1845                    return Ok(());
1846                }
1847                DirectAnswerKind::Toolchain => {
1848                    let lower = effective_user_input.to_lowercase();
1849                    let topic = if (lower.contains("voice output") || lower.contains("voice"))
1850                        && (lower.contains("lag")
1851                            || lower.contains("behind visible text")
1852                            || lower.contains("latency"))
1853                    {
1854                        "voice_latency_plan"
1855                    } else {
1856                        "all"
1857                    };
1858                    let response =
1859                        crate::tools::toolchain::describe_toolchain(&serde_json::json!({
1860                            "topic": topic,
1861                            "question": effective_user_input,
1862                        }))
1863                        .await
1864                        .unwrap_or_else(|e| format!("Error: {}", e));
1865                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1866                        .await;
1867                    return Ok(());
1868                }
1869                DirectAnswerKind::ArchitectSessionResetPlan => {
1870                    let plan = build_architect_session_reset_plan();
1871                    let response = plan.to_markdown();
1872                    let _ = crate::tools::plan::save_plan_handoff(&plan);
1873                    self.session_memory.current_plan = Some(plan);
1874                    self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1875                        .await;
1876                    return Ok(());
1877                }
1878            }
1879        }
1880
1881        if matches!(
1882            self.workflow_mode,
1883            WorkflowMode::Ask | WorkflowMode::ReadOnly
1884        ) && looks_like_mutation_request(&effective_user_input)
1885        {
1886            let response = build_mode_redirect_answer(self.workflow_mode);
1887            self.history.push(ChatMessage::user(&effective_user_input));
1888            self.history.push(ChatMessage::assistant_text(&response));
1889            self.transcript.log_user(&transcript_user_input);
1890            self.transcript.log_agent(&response);
1891            for chunk in chunk_text(&response, 8) {
1892                if !chunk.is_empty() {
1893                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
1894                }
1895            }
1896            let _ = tx.send(InferenceEvent::Done).await;
1897            self.trim_history(80);
1898            self.refresh_session_memory();
1899            self.save_session();
1900            return Ok(());
1901        }
1902
1903        if user_input.trim() == "/think" {
1904            self.think_mode = Some(true);
1905            for chunk in chunk_text("Think mode: ON — full chain-of-thought enabled.", 8) {
1906                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1907            }
1908            let _ = tx.send(InferenceEvent::Done).await;
1909            return Ok(());
1910        }
1911        if user_input.trim() == "/no_think" {
1912            self.think_mode = Some(false);
1913            for chunk in chunk_text(
1914                "Think mode: OFF — fast mode enabled (no chain-of-thought).",
1915                8,
1916            ) {
1917                let _ = tx.send(InferenceEvent::Token(chunk)).await;
1918            }
1919            let _ = tx.send(InferenceEvent::Done).await;
1920            return Ok(());
1921        }
1922
1923        // ── /pin: add file to active context ────────────────────────────────
1924        if user_input.trim_start().starts_with("/pin ") {
1925            let path = user_input.trim_start()[5..].trim();
1926            match std::fs::read_to_string(path) {
1927                Ok(content) => {
1928                    self.pinned_files
1929                        .lock()
1930                        .await
1931                        .insert(path.to_string(), content);
1932                    let msg = format!(
1933                        "Pinned: {} — this file is now locked in model context.",
1934                        path
1935                    );
1936                    for chunk in chunk_text(&msg, 8) {
1937                        let _ = tx.send(InferenceEvent::Token(chunk)).await;
1938                    }
1939                }
1940                Err(e) => {
1941                    let _ = tx
1942                        .send(InferenceEvent::Error(format!(
1943                            "Failed to pin {}: {}",
1944                            path, e
1945                        )))
1946                        .await;
1947                }
1948            }
1949            let _ = tx.send(InferenceEvent::Done).await;
1950            return Ok(());
1951        }
1952
1953        // ── /unpin: remove file from active context ──────────────────────────
1954        if user_input.trim_start().starts_with("/unpin ") {
1955            let path = user_input.trim_start()[7..].trim();
1956            if self.pinned_files.lock().await.remove(path).is_some() {
1957                let msg = format!("Unpinned: {} — file removed from active context.", path);
1958                for chunk in chunk_text(&msg, 8) {
1959                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
1960                }
1961            } else {
1962                let _ = tx
1963                    .send(InferenceEvent::Error(format!(
1964                        "File {} was not pinned.",
1965                        path
1966                    )))
1967                    .await;
1968            }
1969            let _ = tx.send(InferenceEvent::Done).await;
1970            return Ok(());
1971        }
1972
1973        // ── Normal processing ───────────────────────────────────────────────
1974
1975        // Ensure MCP is initialized and tools are discovered for this turn.
1976        let tiny_context_mode = self.engine.current_context_length() <= 8_192;
1977        let mut base_prompt = self.engine.build_system_prompt(
1978            self.snark,
1979            self.chaos,
1980            self.brief,
1981            self.professional,
1982            &self.tools,
1983            self.reasoning_history.as_deref(),
1984            &mcp_tools,
1985        );
1986        if !tiny_context_mode {
1987            if let Some(hint) = &config.context_hint {
1988                if !hint.trim().is_empty() {
1989                    base_prompt.push_str(&format!(
1990                        "\n\n# Project Context (from .hematite/settings.json)\n{}",
1991                        hint
1992                    ));
1993                }
1994            }
1995            if let Some(profile_block) = crate::agent::workspace_profile::profile_prompt_block(
1996                &crate::tools::file_ops::workspace_root(),
1997            ) {
1998                base_prompt.push_str(&format!("\n\n{}", profile_block));
1999            }
2000            // L1: inject hot-files block if available (persists across sessions via vein.db).
2001            if let Some(ref l1) = self.l1_context {
2002                base_prompt.push_str(&format!("\n\n{}", l1));
2003            }
2004        }
2005        let grounded_trace_mode = intent.grounded_trace_mode
2006            || intent.primary_class == QueryIntentClass::RuntimeDiagnosis;
2007        let capability_mode =
2008            intent.capability_mode || intent.primary_class == QueryIntentClass::Capability;
2009        let toolchain_mode =
2010            intent.toolchain_mode || intent.primary_class == QueryIntentClass::Toolchain;
2011        let host_inspection_mode = intent.host_inspection_mode;
2012        let project_map_mode = intent.preserve_project_map_output
2013            || intent.primary_class == QueryIntentClass::RepoArchitecture;
2014        let architecture_overview_mode = intent.architecture_overview_mode;
2015        let capability_needs_repo = intent.capability_needs_repo;
2016        let mut system_msg = build_system_with_corrections(
2017            &base_prompt,
2018            &self.correction_hints,
2019            &self.gpu_state,
2020            &self.git_state,
2021            &config,
2022        );
2023        if tiny_context_mode {
2024            system_msg.push_str(
2025                "\n\n# TINY CONTEXT TURN MODE\n\
2026                 Keep this turn compact. Prefer direct answers or one narrow tool step over broad exploration.\n",
2027            );
2028        }
2029        if !tiny_context_mode && grounded_trace_mode {
2030            system_msg.push_str(
2031                "\n\n# GROUNDED TRACE MODE\n\
2032                 This turn is read-only architecture analysis unless the user explicitly asks otherwise.\n\
2033                 Before answering trace, architecture, or control-flow questions, inspect the repo with real tools.\n\
2034                 Use verified file paths, function names, structs, enums, channels, and event types only.\n\
2035                 Prefer `trace_runtime_flow` for runtime wiring, session reset, startup, or reasoning/specular questions.\n\
2036                 Treat `trace_runtime_flow` output as authoritative over your own memory.\n\
2037                 If `trace_runtime_flow` fully answers the question, preserve its identifiers exactly and do not rename them in a styled rewrite.\n\
2038                 Do not invent names such as synthetic channels or subsystems.\n\
2039                 If a detail is not verified from the code or tool output, say `uncertain`.\n\
2040                For exact flow questions, answer in ordered steps and name the concrete functions and event types involved.\n"
2041            );
2042        }
2043        if !tiny_context_mode && capability_mode {
2044            system_msg.push_str(
2045                "\n\n# CAPABILITY QUESTION MODE\n\
2046                 This is a product or capability question unless the user explicitly asks about repository implementation.\n\
2047                 Answer from stable Hematite capabilities and current runtime state.\n\
2048                 It is correct to mention that Hematite itself is built in Rust when relevant, but do not imply that its project support is limited to Rust.\n\
2049                 Do NOT call repo-inspection tools like `map_project`, `read_file`, or LSP lookup tools unless the user explicitly asks about implementation or file ownership.\n\
2050                 Do NOT infer language or project support from unrelated dependencies, crates, or config files.\n\
2051                 Describe language and project support in terms of real mechanisms: reading files, editing code, searching the workspace, running shell commands, build verification, language-aware tooling when available, web research, vision analysis, and optional MCP tools if configured.\n\
2052                 If the user asks about languages, answer at the harness level: Hematite can help across many project languages even though Hematite itself is written in Rust.\n\
2053                 Prefer real programming language examples like Python, JavaScript, TypeScript, Go, C#, or similar over file extensions like `.json` or `.md`.\n\
2054                 For project-building questions, describe cross-project workflows like scaffolding files, shaping structure, implementing features, and running the appropriate local build or test commands for the target stack. Do not overclaim certainty.\n\
2055                 Never mention raw `mcp__*` tool names unless those tools are active this turn and directly relevant.\n\
2056                 Keep the answer short, plain, and ASCII-first.\n"
2057            );
2058        }
2059        if !tiny_context_mode && toolchain_mode {
2060            system_msg.push_str(
2061                "\n\n# TOOLCHAIN DISCIPLINE MODE\n\
2062                 This turn is about Hematite's real built-in tools and how to choose them.\n\
2063                 Prefer `describe_toolchain` before you try to summarize tool capabilities or propose a read-only investigation plan from memory.\n\
2064                 Use only real built-in tool names.\n\
2065                 Do not invent helper tools, MCP tool names, synthetic symbols, or example function names.\n\
2066                 If `describe_toolchain` fully answers the question, preserve its output exactly instead of restyling it.\n\
2067                 Be explicit about which tools are optional or conditional.\n"
2068            );
2069        }
2070        if !tiny_context_mode && host_inspection_mode {
2071            system_msg.push_str(
2072                "\n\n# HOST INSPECTION MODE\n\
2073                 This turn is about the local machine and environment, not repository architecture.\n\
2074                 Prefer `inspect_host` before raw `shell` for PATH analysis, installed developer tool versions, desktop item counts, Downloads summaries, listening ports, repo-doctor checks, and directory/disk-size reports.\n\
2075                 Use the closest built-in topic first: `summary`, `toolchains`, `path`, `desktop`, `downloads`, `ports`, `repo_doctor`, `directory`, or `disk`.\n\
2076                 Only use `shell` if the host question truly goes beyond `inspect_host`.\n"
2077            );
2078        }
2079        if !tiny_context_mode && project_map_mode {
2080            system_msg.push_str(
2081                "\n\n# PROJECT MAP DISCIPLINE MODE\n\
2082                 For repository structure, entrypoint, owner-file, or architecture-map questions, prefer `map_project` first.\n\
2083                 If `map_project` provides likely entrypoints and core owner files, preserve that grounded structure instead of rewriting it into broad prose.\n\
2084                 Do not invent new entrypoints or owner files that are not present in the tool output.\n\
2085                 Keep the final answer compact and architecture-first.\n"
2086            );
2087        }
2088        if !tiny_context_mode && architecture_overview_mode {
2089            system_msg.push_str(
2090                "\n\n# ARCHITECTURE OVERVIEW DISCIPLINE MODE\n\
2091                 For broad runtime or architecture walkthroughs, prefer authoritative tools first: `trace_runtime_flow` for control flow and `map_project` for compact structure.\n\
2092                 Do not call `auto_pin_context` or `list_pinned` in read-only analysis. Avoid broad `read_file` calls unless the user explicitly asks for implementation detail in one named file.\n\
2093                 Preserve grounded tool output rather than restyling it into a larger answer.\n"
2094            );
2095        }
2096
2097        // ── Inject Pinned Files (Context Locking) ───────────────────────────
2098        system_msg.push_str(&format!(
2099            "\n\n# WORKFLOW MODE\nCURRENT WORKFLOW: {}\n",
2100            self.workflow_mode.label()
2101        ));
2102        if tiny_context_mode {
2103            system_msg
2104                .push_str("Use the narrowest safe behavior for this mode. Keep the turn short.\n");
2105        } else {
2106            match self.workflow_mode {
2107                WorkflowMode::Auto => system_msg.push_str(
2108                    "AUTO means choose the narrowest effective path for the request. Answer directly when stable product logic exists. Inspect before editing. Mutate only when the user is clearly asking for implementation.\n",
2109                ),
2110                WorkflowMode::Ask => system_msg.push_str(
2111                    "ASK means analysis only. Stay read-only, inspect the repo, explain findings, and do not make changes unless the user explicitly switches modes.\n",
2112                ),
2113                WorkflowMode::Code => system_msg.push_str(
2114                    "CODE means implementation is allowed when needed. Keep proof-before-action, verification, and edit precision discipline. If an active plan handoff exists in session memory or `.hematite/PLAN.md`, treat it as the implementation brief unless the user explicitly overrides it. For ordinary workspace inspection during implementation, use built-in read/edit tools first and do not reach for `mcp__filesystem__*` unless the user explicitly requires MCP.\n",
2115                ),
2116                WorkflowMode::Architect => system_msg.push_str(
2117                    "ARCHITECT means plan first. Inspect, reason, and produce a concrete implementation approach before editing. Do not mutate code unless the user explicitly asks to implement. When you produce an implementation handoff, use these exact ASCII headings so Hematite can persist the plan: `# Goal`, `# Target Files`, `# Ordered Steps`, `# Verification`, `# Risks`, `# Open Questions`.\n",
2118                ),
2119                WorkflowMode::ReadOnly => system_msg.push_str(
2120                    "READ-ONLY means analysis only. Do not modify files, run mutating shell commands, or commit changes.\n",
2121                ),
2122                WorkflowMode::Chat => {} // replaced by build_chat_system_prompt below
2123            }
2124        }
2125        if !tiny_context_mode && self.workflow_mode == WorkflowMode::Architect {
2126            system_msg.push_str("\n\n# ARCHITECT HANDOFF CONTRACT\n");
2127            system_msg.push_str(architect_handoff_contract());
2128            system_msg.push('\n');
2129        }
2130        if !tiny_context_mode && implement_current_plan {
2131            system_msg.push_str(
2132                "\n\n# CURRENT PLAN EXECUTION CONTRACT\n\
2133                 The user explicitly asked you to implement the current saved plan.\n\
2134                 Do not restate the plan, do not provide preliminary contracts, and do not stop at analysis.\n\
2135                 Use the saved plan as the brief, gather only the minimum built-in file evidence you need, then start editing the target files.\n\
2136                 Do not call `map_project` during current-plan execution.\n\
2137                 Every file inspection or edit call must be path-scoped to one of the saved target files.\n\
2138                 If a built-in workspace read tool gives you enough context, your next step should be mutation or a concrete blocking question, not another summary.\n",
2139            );
2140            if let Some(plan) = self.session_memory.current_plan.as_ref() {
2141                if !plan.target_files.is_empty() {
2142                    system_msg.push_str("\n# CURRENT PLAN TARGET FILES\n");
2143                    for path in &plan.target_files {
2144                        system_msg.push_str(&format!("- {}\n", path));
2145                    }
2146                }
2147            }
2148        }
2149        if !tiny_context_mode {
2150            let pinned = self.pinned_files.lock().await;
2151            if !pinned.is_empty() {
2152                system_msg.push_str("\n\n# ACTIVE CONTEXT (PINNED FILES)\n");
2153                system_msg.push_str("The following files are locked in your active memory for high-fidelity reference.\n\n");
2154                for (path, content) in pinned.iter() {
2155                    system_msg.push_str(&format!("## FILE: {}\n```\n{}\n```\n\n", path, content));
2156                }
2157            }
2158        }
2159        if !tiny_context_mode {
2160            self.append_session_handoff(&mut system_msg);
2161        }
2162        // In chat mode, replace the full harness prompt with a clean conversational surface.
2163        // The harness prompt (built above) is discarded — Rusty personality takes over.
2164        let system_msg = if self.workflow_mode.is_chat() {
2165            self.build_chat_system_prompt()
2166        } else {
2167            system_msg
2168        };
2169        if self.history.is_empty() || self.history[0].role != "system" {
2170            self.history.insert(0, ChatMessage::system(&system_msg));
2171        } else {
2172            self.history[0] = ChatMessage::system(&system_msg);
2173        }
2174
2175        // Ensure a clean state for the new turn.
2176        self.cancel_token
2177            .store(false, std::sync::atomic::Ordering::SeqCst);
2178
2179        // [Official Gemma-4 Spec] Purge reasoning history for new user turns.
2180        // History from previous turns must not be fed back into the prompt to prevent duplication.
2181        self.reasoning_history = None;
2182
2183        let is_gemma = crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2184        let user_content = match self.think_mode {
2185            Some(true) => format!("/think\n{}", effective_user_input),
2186            Some(false) => format!("/no_think\n{}", effective_user_input),
2187            // For non-Gemma models (Qwen etc.) default to /think so the model uses
2188            // hybrid thinking — it decides how much reasoning each turn needs.
2189            // Gemma handles reasoning via <|think|> in the system prompt instead.
2190            // Chat mode and quick tool calls skip /think — fast direct answers.
2191            None if !is_gemma
2192                && !self.workflow_mode.is_chat()
2193                && !is_quick_tool_request(&effective_user_input) =>
2194            {
2195                format!("/think\n{}", effective_user_input)
2196            }
2197            None => effective_user_input.clone(),
2198        };
2199        if let Some(image) = user_turn.attached_image.as_ref() {
2200            let image_url =
2201                crate::tools::vision::encode_image_as_data_url(std::path::Path::new(&image.path))
2202                    .map_err(|e| format!("Image attachment failed for {}: {}", image.name, e))?;
2203            self.history
2204                .push(ChatMessage::user_with_image(&user_content, &image_url));
2205        } else {
2206            self.history.push(ChatMessage::user(&user_content));
2207        }
2208        self.transcript.log_user(&transcript_user_input);
2209
2210        // Incremental re-index and Vein context injection. Ordinary chat mode
2211        // still skips repo-snippet noise, but docs-only workspaces and explicit
2212        // session-recall prompts should keep Vein memory available.
2213        let vein_docs_only = self.vein_docs_only_mode();
2214        let allow_vein_context = !self.workflow_mode.is_chat()
2215            || should_use_vein_in_chat(&effective_user_input, vein_docs_only);
2216        let (vein_context, vein_paths) = if allow_vein_context {
2217            self.refresh_vein_index();
2218            let _ = tx
2219                .send(InferenceEvent::VeinStatus {
2220                    file_count: self.vein.file_count(),
2221                    embedded_count: self.vein.embedded_chunk_count(),
2222                    docs_only: vein_docs_only,
2223                })
2224                .await;
2225            match self.build_vein_context(&effective_user_input) {
2226                Some((ctx, paths)) => (Some(ctx), paths),
2227                None => (None, Vec::new()),
2228            }
2229        } else {
2230            (None, Vec::new())
2231        };
2232        if !vein_paths.is_empty() {
2233            let _ = tx
2234                .send(InferenceEvent::VeinContext { paths: vein_paths })
2235                .await;
2236        }
2237
2238        // Route: pick fast vs think model based on the complexity of this request.
2239        let routed_model = route_model(
2240            &effective_user_input,
2241            effective_fast.as_deref(),
2242            effective_think.as_deref(),
2243        )
2244        .map(|s| s.to_string());
2245
2246        let mut loop_intervention: Option<String> = None;
2247        let mut implementation_started = false;
2248        let mut non_mutating_plan_steps = 0usize;
2249        let non_mutating_plan_soft_cap = 5usize;
2250        let non_mutating_plan_hard_cap = 8usize;
2251        let mut overview_project_map: Option<String> = None;
2252        let mut overview_runtime_trace: Option<String> = None;
2253
2254        // Safety cap – never spin forever on a broken model.
2255        let max_iters = 25;
2256        let mut consecutive_errors = 0;
2257        let mut first_iter = true;
2258        let _called_this_turn: std::collections::HashSet<String> = std::collections::HashSet::new();
2259        // Track identical tool results within this turn to detect logical loops.
2260        let _result_counts: std::collections::HashMap<String, usize> =
2261            std::collections::HashMap::new();
2262        // Track the count of identical (name, args) calls to detect infinite tool loops.
2263        let mut repeat_counts: std::collections::HashMap<String, usize> =
2264            std::collections::HashMap::new();
2265        let mut successful_read_targets: std::collections::HashSet<String> =
2266            std::collections::HashSet::new();
2267        // (path, offset) pairs — catches repeated reads at the same non-zero offset.
2268        let mut successful_read_regions: std::collections::HashSet<(String, u64)> =
2269            std::collections::HashSet::new();
2270        let mut successful_grep_targets: std::collections::HashSet<String> =
2271            std::collections::HashSet::new();
2272        let mut no_match_grep_targets: std::collections::HashSet<String> =
2273            std::collections::HashSet::new();
2274        let mut broad_grep_targets: std::collections::HashSet<String> =
2275            std::collections::HashSet::new();
2276
2277        // Track the index of the message that started THIS turn, so compaction doesn't summarize it.
2278        let mut turn_anchor = self.history.len().saturating_sub(1);
2279
2280        for _iter in 0..max_iters {
2281            let mut mutation_occurred = false;
2282            // Priority Check: External Cancellation (via Esc key in TUI)
2283            if self.cancel_token.load(std::sync::atomic::Ordering::SeqCst) {
2284                self.cancel_token
2285                    .store(false, std::sync::atomic::Ordering::SeqCst);
2286                let _ = tx
2287                    .send(InferenceEvent::Thought("Turn cancelled by user.".into()))
2288                    .await;
2289                let _ = tx.send(InferenceEvent::Done).await;
2290                return Ok(());
2291            }
2292
2293            // ── Intelligence Surge: Proactive Compaction Check ──────────────────────
2294            if self
2295                .compact_history_if_needed(&tx, Some(turn_anchor))
2296                .await?
2297            {
2298                // After compaction, history is [system, summary, turn_anchor, ...]
2299                // The new turn_anchor is index 2.
2300                turn_anchor = 2;
2301            }
2302
2303            // On the first iteration inject Vein context into the system message.
2304            // Subsequent iterations use the plain slice — tool results are now in
2305            // history so Vein context would be redundant.
2306            let inject_vein = first_iter && !implement_current_plan;
2307            let messages = if implement_current_plan {
2308                first_iter = false;
2309                self.context_window_slice_from(turn_anchor)
2310            } else {
2311                first_iter = false;
2312                self.context_window_slice()
2313            };
2314
2315            // Use the canonical system prompt from history[0] which was built
2316            // by InferenceEngine::build_system_prompt() + build_system_with_corrections()
2317            // and includes GPU state, git context, permissions, and instruction files.
2318            let mut prompt_msgs = if let Some(intervention) = loop_intervention.take() {
2319                // Gemma 4 handles multiple system messages natively.
2320                // Standard models (Qwen, etc.) reject a second system message — merge into history[0].
2321                if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2322                    let mut msgs = vec![self.history[0].clone()];
2323                    msgs.push(ChatMessage::system(&intervention));
2324                    msgs
2325                } else {
2326                    let merged =
2327                        format!("{}\n\n{}", self.history[0].content.as_str(), intervention);
2328                    vec![ChatMessage::system(&merged)]
2329                }
2330            } else {
2331                vec![self.history[0].clone()]
2332            };
2333
2334            // Inject Vein context into the system message on the first iteration.
2335            // Vein results are merged in the same way as loop_intervention so standard
2336            // models (Qwen etc.) only ever see one system message.
2337            if inject_vein {
2338                if let Some(ref ctx) = vein_context.as_ref() {
2339                    if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2340                        prompt_msgs.push(ChatMessage::system(ctx));
2341                    } else {
2342                        let merged = format!("{}\n\n{}", prompt_msgs[0].content.as_str(), ctx);
2343                        prompt_msgs[0] = ChatMessage::system(&merged);
2344                    }
2345                }
2346            }
2347            prompt_msgs.extend(messages);
2348            if let Some(budget_note) =
2349                enforce_prompt_budget(&mut prompt_msgs, self.engine.current_context_length())
2350            {
2351                self.emit_operator_checkpoint(
2352                    &tx,
2353                    OperatorCheckpointState::BudgetReduced,
2354                    budget_note,
2355                )
2356                .await;
2357                let recipe = plan_recovery(
2358                    RecoveryScenario::PromptBudgetPressure,
2359                    &self.recovery_context,
2360                );
2361                self.emit_recovery_recipe_summary(
2362                    &tx,
2363                    recipe.recipe.scenario.label(),
2364                    compact_recovery_plan_summary(&recipe),
2365                )
2366                .await;
2367            }
2368            self.emit_prompt_pressure_for_messages(&tx, &prompt_msgs)
2369                .await;
2370
2371            let (mut text, mut tool_calls, usage, finish_reason) = match self
2372                .engine
2373                .call_with_tools(&prompt_msgs, &self.tools, routed_model.as_deref())
2374                .await
2375            {
2376                Ok(result) => result,
2377                Err(e) => {
2378                    let class = classify_runtime_failure(&e);
2379                    if should_retry_runtime_failure(class) {
2380                        if self.recovery_context.consume_transient_retry() {
2381                            let label = match class {
2382                                RuntimeFailureClass::ProviderDegraded => "provider_degraded",
2383                                _ => "empty_model_response",
2384                            };
2385                            self.transcript.log_system(&format!(
2386                                "Automatic provider recovery triggered: {}",
2387                                e.trim()
2388                            ));
2389                            self.emit_recovery_recipe_summary(
2390                                &tx,
2391                                label,
2392                                compact_runtime_recovery_summary(class),
2393                            )
2394                            .await;
2395                            let _ = tx
2396                                .send(InferenceEvent::ProviderStatus {
2397                                    state: ProviderRuntimeState::Recovering,
2398                                    summary: compact_runtime_recovery_summary(class).into(),
2399                                })
2400                                .await;
2401                            self.emit_operator_checkpoint(
2402                                &tx,
2403                                OperatorCheckpointState::RecoveringProvider,
2404                                compact_runtime_recovery_summary(class),
2405                            )
2406                            .await;
2407                            continue;
2408                        }
2409                    }
2410
2411                    self.emit_runtime_failure(&tx, class, &e).await;
2412                    break;
2413                }
2414            };
2415            self.emit_provider_live(&tx).await;
2416
2417            // Update TUI token counter with actual usage from LM Studio.
2418            if let Some(ref u) = usage {
2419                let _ = tx.send(InferenceEvent::UsageUpdate(u.clone())).await;
2420            }
2421
2422            // Fallback safety net: if native tool markup leaked past the inference-layer
2423            // extractor, recover it here instead of treating it as plain assistant text.
2424            if tool_calls
2425                .as_ref()
2426                .map(|calls| calls.is_empty())
2427                .unwrap_or(true)
2428            {
2429                if let Some(raw_text) = text.as_deref() {
2430                    let native_calls = crate::agent::inference::extract_native_tool_calls(raw_text);
2431                    if !native_calls.is_empty() {
2432                        tool_calls = Some(native_calls);
2433                        let stripped =
2434                            crate::agent::inference::strip_native_tool_call_text(raw_text);
2435                        text = if stripped.trim().is_empty() {
2436                            None
2437                        } else {
2438                            Some(stripped)
2439                        };
2440                    }
2441                }
2442            }
2443
2444            // Treat empty tool_calls arrays (Some(vec![])) the same as None –
2445            // the model returned text only; an empty array causes an infinite loop.
2446            let tool_calls = tool_calls.filter(|c| !c.is_empty());
2447            let near_context_ceiling = usage
2448                .as_ref()
2449                .map(|u| u.prompt_tokens >= (self.engine.current_context_length() * 82 / 100))
2450                .unwrap_or(false);
2451
2452            if let Some(calls) = tool_calls {
2453                let (calls, prune_trace_note) =
2454                    prune_architecture_trace_batch(calls, architecture_overview_mode);
2455                if let Some(note) = prune_trace_note {
2456                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2457                }
2458
2459                let (calls, prune_bloat_note) = prune_read_only_context_bloat_batch(
2460                    calls,
2461                    self.workflow_mode.is_read_only(),
2462                    architecture_overview_mode,
2463                );
2464                if let Some(note) = prune_bloat_note {
2465                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2466                }
2467
2468                let (calls, prune_note) = prune_authoritative_tool_batch(
2469                    calls,
2470                    grounded_trace_mode,
2471                    &effective_user_input,
2472                );
2473                if let Some(note) = prune_note {
2474                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2475                }
2476
2477                let (calls, batch_note) = order_batch_reads_first(calls);
2478                if let Some(note) = batch_note {
2479                    let _ = tx.send(InferenceEvent::Thought(note)).await;
2480                }
2481
2482                if let Some(repeated_path) = calls
2483                    .iter()
2484                    .filter(|c| {
2485                        let parsed = serde_json::from_str::<Value>(
2486                            &crate::agent::inference::normalize_tool_argument_string(
2487                                &c.function.name,
2488                                &c.function.arguments,
2489                            ),
2490                        )
2491                        .ok();
2492                        let offset = parsed
2493                            .as_ref()
2494                            .and_then(|args| args.get("offset").and_then(|v| v.as_u64()))
2495                            .unwrap_or(0);
2496                        // Catch re-reads from the top (original behaviour) AND repeated
2497                        // reads at the exact same non-zero offset (new: catches targeted loops).
2498                        if offset < 200 {
2499                            return true;
2500                        }
2501                        if let Some(path) = parsed
2502                            .as_ref()
2503                            .and_then(|args| args.get("path").and_then(|v| v.as_str()))
2504                        {
2505                            let normalized = normalize_workspace_path(path);
2506                            return successful_read_regions.contains(&(normalized, offset));
2507                        }
2508                        false
2509                    })
2510                    .filter_map(|c| repeated_read_target(&c.function))
2511                    .find(|path| successful_read_targets.contains(path))
2512                {
2513                    loop_intervention = Some(format!(
2514                        "STOP. Already read `{}` this turn. Use `inspect_lines` on the relevant window or a specific `grep_files`, then continue.",
2515                        repeated_path
2516                    ));
2517                    let _ = tx
2518                        .send(InferenceEvent::Thought(
2519                            "Read discipline: preventing repeated full-file reads on the same path."
2520                                .into(),
2521                        ))
2522                        .await;
2523                    continue;
2524                }
2525
2526                if capability_mode
2527                    && !capability_needs_repo
2528                    && calls
2529                        .iter()
2530                        .all(|c| is_capability_probe_tool(&c.function.name))
2531                {
2532                    loop_intervention = Some(
2533                        "STOP. This is a stable capability question. Do not inspect the repository or call tools. \
2534                         Answer directly from verified Hematite capabilities, current runtime state, and the documented product boundary. \
2535                         Do not mention raw `mcp__*` names unless they are active and directly relevant."
2536                            .to_string(),
2537                    );
2538                    let _ = tx
2539                        .send(InferenceEvent::Thought(
2540                            "Capability mode: skipping unnecessary repo-inspection tools and answering directly."
2541                                .into(),
2542                        ))
2543                        .await;
2544                    continue;
2545                }
2546
2547                // VOCAL AGENT: If the model provided reasoning alongside tools,
2548                // stream it to the SPECULAR panel now using the hardened extraction.
2549                let raw_content = text.as_deref().unwrap_or(" ");
2550
2551                if let Some(thought) = crate::agent::inference::extract_think_block(raw_content) {
2552                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
2553                    // Reasoning is silent (hidden in SPECULAR only).
2554                    self.reasoning_history = Some(thought);
2555                }
2556
2557                // [Gemma-4 Protocol] Keep raw content (including thoughts) during tool loops.
2558                // Thoughts are only stripped before the 'final' user turn.
2559                let stored_tool_call_content = if implement_current_plan {
2560                    cap_output(raw_content, 1200)
2561                } else {
2562                    raw_content.to_string()
2563                };
2564                self.history.push(ChatMessage::assistant_tool_calls(
2565                    &stored_tool_call_content,
2566                    calls.clone(),
2567                ));
2568
2569                // ── LAYER 4: Parallel Tool Orchestration (Batching) ────────────────────
2570                let mut results = Vec::new();
2571
2572                // Partition tool calls: Parallel Read vs Serial Mutating
2573                let (parallel_calls, serial_calls): (Vec<_>, Vec<_>) = calls
2574                    .clone()
2575                    .into_iter()
2576                    .partition(|c| is_parallel_safe(&c.function.name));
2577
2578                // 1. Concurrent Execution (ParallelRead)
2579                if !parallel_calls.is_empty() {
2580                    let mut tasks = Vec::new();
2581                    for call in parallel_calls {
2582                        let tx_clone = tx.clone();
2583                        let config_clone = config.clone();
2584                        // Carry the real call ID into the outcome
2585                        let call_with_id = call.clone();
2586                        tasks.push(self.process_tool_call(
2587                            call_with_id.function,
2588                            config_clone,
2589                            yolo,
2590                            tx_clone,
2591                            call_with_id.id,
2592                        ));
2593                    }
2594                    // Wait for all read-only tasks to complete simultaneously.
2595                    results.extend(futures::future::join_all(tasks).await);
2596                }
2597
2598                // 2. Sequential Execution (SerialMutating)
2599                for call in serial_calls {
2600                    results.push(
2601                        self.process_tool_call(
2602                            call.function,
2603                            config.clone(),
2604                            yolo,
2605                            tx.clone(),
2606                            call.id,
2607                        )
2608                        .await,
2609                    );
2610                }
2611
2612                // 3. Collate Messages into History & UI
2613                let mut authoritative_tool_output: Option<String> = None;
2614                let mut blocked_policy_output: Option<String> = None;
2615                let mut recoverable_policy_intervention: Option<String> = None;
2616                let mut recoverable_policy_recipe: Option<RecoveryScenario> = None;
2617                let mut recoverable_policy_checkpoint: Option<(OperatorCheckpointState, String)> =
2618                    None;
2619                for res in results {
2620                    let call_id = res.call_id.clone();
2621                    let tool_name = res.tool_name.clone();
2622                    let final_output = res.output.clone();
2623                    let is_error = res.is_error;
2624                    for msg in res.msg_results {
2625                        self.history.push(msg);
2626                    }
2627
2628                    // Update State for Verification Loop
2629                    if matches!(
2630                        tool_name.as_str(),
2631                        "patch_hunk" | "write_file" | "edit_file" | "multi_search_replace"
2632                    ) {
2633                        mutation_occurred = true;
2634                        implementation_started = true;
2635                        // Heat tracking: bump L1 score for the edited file.
2636                        if !is_error {
2637                            let path = res.args.get("path").and_then(|v| v.as_str()).unwrap_or("");
2638                            if !path.is_empty() {
2639                                self.vein.bump_heat(path);
2640                                self.l1_context = self.vein.l1_context();
2641                            }
2642                        }
2643                    }
2644
2645                    if tool_name == "verify_build" {
2646                        self.record_session_verification(
2647                            !is_error
2648                                && (final_output.contains("BUILD OK")
2649                                    || final_output.contains("BUILD SUCCESS")
2650                                    || final_output.contains("BUILD OKAY")),
2651                            if is_error {
2652                                "Explicit verify_build failed."
2653                            } else {
2654                                "Explicit verify_build passed."
2655                            },
2656                        );
2657                    }
2658
2659                    // Update Repeat Guard
2660                    let call_key = format!(
2661                        "{}:{}",
2662                        tool_name,
2663                        serde_json::to_string(&res.args).unwrap_or_default()
2664                    );
2665                    let repeat_count = repeat_counts.entry(call_key.clone()).or_insert(0);
2666                    *repeat_count += 1;
2667
2668                    // verify_build is legitimately called multiple times in fix-verify loops.
2669                    let repeat_guard_exempt = matches!(
2670                        tool_name.as_str(),
2671                        "verify_build" | "git_commit" | "git_push"
2672                    );
2673                    if *repeat_count >= 3 && !repeat_guard_exempt {
2674                        loop_intervention = Some(format!(
2675                            "STOP. You have called `{}` with identical arguments {} times and keep getting the same result. \
2676                             Do not call it again. Either answer directly from what you already know, \
2677                             use a different tool or approach, or ask the user for clarification.",
2678                            tool_name, *repeat_count
2679                        ));
2680                        let _ = tx
2681                            .send(InferenceEvent::Thought(format!(
2682                                "Repeat guard: `{}` called {} times with same args — injecting stop intervention.",
2683                                tool_name, *repeat_count
2684                            )))
2685                            .await;
2686                    }
2687
2688                    if is_error {
2689                        consecutive_errors += 1;
2690                    } else {
2691                        consecutive_errors = 0;
2692                    }
2693
2694                    if consecutive_errors >= 3 {
2695                        loop_intervention = Some(
2696                            "CRITICAL: Repeated tool failures detected. You are likely stuck in a loop. \
2697                             STOP all tool calls immediately. Analyze why your previous 3 calls failed \
2698                             (check for hallucinations or invalid arguments) and ask the user for \
2699                             clarification if you cannot proceed.".to_string()
2700                        );
2701                    }
2702
2703                    if consecutive_errors >= 4 {
2704                        self.emit_runtime_failure(
2705                            &tx,
2706                            RuntimeFailureClass::ToolLoop,
2707                            "Hard termination: too many consecutive tool errors.",
2708                        )
2709                        .await;
2710                        return Ok(());
2711                    }
2712
2713                    let _ = tx
2714                        .send(InferenceEvent::ToolCallResult {
2715                            id: call_id.clone(),
2716                            name: tool_name.clone(),
2717                            output: final_output.clone(),
2718                            is_error,
2719                        })
2720                        .await;
2721
2722                    // Cap output before history
2723                    let compact_ctx = crate::agent::inference::is_compact_context_window_pub(
2724                        self.engine.current_context_length(),
2725                    );
2726                    let capped = if implement_current_plan {
2727                        cap_output(&final_output, 1200)
2728                    } else if tool_name == "map_project"
2729                        && self.workflow_mode == WorkflowMode::Architect
2730                    {
2731                        cap_output(&final_output, 2500)
2732                    } else if tool_name == "map_project" {
2733                        cap_output(&final_output, 3500)
2734                    } else if compact_ctx
2735                        && (tool_name == "read_file" || tool_name == "inspect_lines")
2736                    {
2737                        // Compact context: cap file reads tightly and add a navigation hint on truncation.
2738                        let limit = 3000usize;
2739                        if final_output.len() > limit {
2740                            let total_lines = final_output.lines().count();
2741                            let mut split_at = limit;
2742                            while !final_output.is_char_boundary(split_at) && split_at > 0 {
2743                                split_at -= 1;
2744                            }
2745                            format!(
2746                                "{}\n... [file truncated — {} total lines. Use `inspect_lines` with start_line near {} to reach the end of the file.]",
2747                                &final_output[..split_at],
2748                                total_lines,
2749                                total_lines.saturating_sub(150),
2750                            )
2751                        } else {
2752                            final_output.clone()
2753                        }
2754                    } else {
2755                        cap_output(&final_output, 8000)
2756                    };
2757                    self.history.push(ChatMessage::tool_result_for_model(
2758                        &call_id,
2759                        &tool_name,
2760                        &capped,
2761                        &self.engine.current_model(),
2762                    ));
2763
2764                    if architecture_overview_mode && !is_error && tool_name == "trace_runtime_flow"
2765                    {
2766                        overview_runtime_trace =
2767                            Some(summarize_runtime_trace_output(&final_output));
2768                    } else if architecture_overview_mode && !is_error && tool_name == "map_project"
2769                    {
2770                        overview_project_map = Some(summarize_project_map_output(&final_output));
2771                    }
2772
2773                    if !architecture_overview_mode
2774                        && !is_error
2775                        && ((grounded_trace_mode && tool_name == "trace_runtime_flow")
2776                            || (toolchain_mode && tool_name == "describe_toolchain"))
2777                    {
2778                        authoritative_tool_output = Some(final_output.clone());
2779                    } else if !architecture_overview_mode
2780                        && !is_error
2781                        && tool_name == "map_project"
2782                        && project_map_mode
2783                        && authoritative_tool_output.is_none()
2784                    {
2785                        authoritative_tool_output =
2786                            Some(summarize_project_map_output(&final_output));
2787                    }
2788
2789                    if !is_error && tool_name == "read_file" {
2790                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
2791                            let normalized = normalize_workspace_path(path);
2792                            let read_offset =
2793                                res.args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
2794                            successful_read_targets.insert(normalized.clone());
2795                            successful_read_regions.insert((normalized.clone(), read_offset));
2796                        }
2797                    }
2798
2799                    if !is_error && tool_name == "grep_files" {
2800                        if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
2801                            let normalized = normalize_workspace_path(path);
2802                            if final_output.starts_with("No matches for ") {
2803                                no_match_grep_targets.insert(normalized);
2804                            } else if grep_output_is_high_fanout(&final_output) {
2805                                broad_grep_targets.insert(normalized);
2806                            } else {
2807                                successful_grep_targets.insert(normalized);
2808                            }
2809                        }
2810                    }
2811
2812                    if is_error
2813                        && matches!(tool_name.as_str(), "edit_file" | "multi_search_replace")
2814                        && (final_output.contains("search string not found")
2815                            || final_output.contains("search string is too short")
2816                            || final_output.contains("search string matched"))
2817                    {
2818                        if let Some(target) = action_target_path(&tool_name, &res.args) {
2819                            let guidance = if final_output.contains("matched") {
2820                                format!(
2821                                    "STOP. `{}` on `{}` — search string matched multiple times. Use `inspect_lines` on the exact region to get a unique anchor, then retry.",
2822                                    tool_name, target
2823                                )
2824                            } else {
2825                                format!(
2826                                    "STOP. `{}` on `{}` — search string did not match. Use `inspect_lines` on the target region to get the exact current text (check whitespace and indentation), then retry.",
2827                                    tool_name, target
2828                                )
2829                            };
2830                            loop_intervention = Some(guidance);
2831                            *repeat_count = 0;
2832                        }
2833                    }
2834
2835                    if res.blocked_by_policy
2836                        && is_mcp_workspace_read_tool(&tool_name)
2837                        && recoverable_policy_intervention.is_none()
2838                    {
2839                        recoverable_policy_intervention = Some(
2840                            "STOP. MCP filesystem reads are blocked. Use `read_file` or `inspect_lines` instead.".to_string(),
2841                        );
2842                        recoverable_policy_recipe = Some(RecoveryScenario::McpWorkspaceReadBlocked);
2843                        recoverable_policy_checkpoint = Some((
2844                            OperatorCheckpointState::BlockedPolicy,
2845                            "MCP workspace read blocked; rerouting to built-in file tools."
2846                                .to_string(),
2847                        ));
2848                    } else if res.blocked_by_policy
2849                        && implement_current_plan
2850                        && tool_name == "map_project"
2851                        && recoverable_policy_intervention.is_none()
2852                    {
2853                        recoverable_policy_intervention = Some(
2854                            "STOP. `map_project` is blocked during plan execution. Read your planned target files directly, then edit.".to_string(),
2855                        );
2856                        recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
2857                        recoverable_policy_checkpoint = Some((
2858                            OperatorCheckpointState::BlockedPolicy,
2859                            "`map_project` blocked for current-plan execution.".to_string(),
2860                        ));
2861                    } else if res.blocked_by_policy
2862                        && implement_current_plan
2863                        && is_current_plan_irrelevant_tool(&tool_name)
2864                        && recoverable_policy_intervention.is_none()
2865                    {
2866                        recoverable_policy_intervention = Some(format!(
2867                            "STOP. `{}` is not a planned target. Use `inspect_lines` on a planned file, then edit.",
2868                            tool_name
2869                        ));
2870                        recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
2871                        recoverable_policy_checkpoint = Some((
2872                            OperatorCheckpointState::BlockedPolicy,
2873                            format!(
2874                                "Current-plan execution blocked unrelated tool `{}`.",
2875                                tool_name
2876                            ),
2877                        ));
2878                    } else if res.blocked_by_policy
2879                        && implement_current_plan
2880                        && final_output.contains("requires recent file evidence")
2881                        && recoverable_policy_intervention.is_none()
2882                    {
2883                        let target = action_target_path(&tool_name, &res.args)
2884                            .unwrap_or_else(|| "the target file".to_string());
2885                        recoverable_policy_intervention = Some(format!(
2886                            "STOP. Edit blocked — `{target}` has no recent read. Use `inspect_lines` or `read_file` on it first, then retry."
2887                        ));
2888                        recoverable_policy_recipe =
2889                            Some(RecoveryScenario::RecentFileEvidenceMissing);
2890                        recoverable_policy_checkpoint = Some((
2891                            OperatorCheckpointState::BlockedRecentFileEvidence,
2892                            format!("Edit blocked on `{target}`; recent file evidence missing."),
2893                        ));
2894                    } else if res.blocked_by_policy
2895                        && implement_current_plan
2896                        && final_output.contains("requires an exact local line window first")
2897                        && recoverable_policy_intervention.is_none()
2898                    {
2899                        let target = action_target_path(&tool_name, &res.args)
2900                            .unwrap_or_else(|| "the target file".to_string());
2901                        recoverable_policy_intervention = Some(format!(
2902                            "STOP. Edit blocked — `{target}` needs an inspected window. Use `inspect_lines` around the edit region, then retry."
2903                        ));
2904                        recoverable_policy_recipe = Some(RecoveryScenario::ExactLineWindowRequired);
2905                        recoverable_policy_checkpoint = Some((
2906                            OperatorCheckpointState::BlockedExactLineWindow,
2907                            format!("Edit blocked on `{target}`; exact line window required."),
2908                        ));
2909                    } else if res.blocked_by_policy && blocked_policy_output.is_none() {
2910                        blocked_policy_output = Some(final_output.clone());
2911                    }
2912
2913                    if *repeat_count >= 5 {
2914                        let _ = tx.send(InferenceEvent::Done).await;
2915                        return Ok(());
2916                    }
2917
2918                    if implement_current_plan
2919                        && !implementation_started
2920                        && !is_error
2921                        && is_non_mutating_plan_step_tool(&tool_name)
2922                    {
2923                        non_mutating_plan_steps += 1;
2924                    }
2925                }
2926
2927                if let Some(intervention) = recoverable_policy_intervention {
2928                    if let Some((state, summary)) = recoverable_policy_checkpoint.take() {
2929                        self.emit_operator_checkpoint(&tx, state, summary).await;
2930                    }
2931                    if let Some(scenario) = recoverable_policy_recipe.take() {
2932                        let recipe = plan_recovery(scenario, &self.recovery_context);
2933                        self.emit_recovery_recipe_summary(
2934                            &tx,
2935                            recipe.recipe.scenario.label(),
2936                            compact_recovery_plan_summary(&recipe),
2937                        )
2938                        .await;
2939                    }
2940                    loop_intervention = Some(intervention);
2941                    let _ = tx
2942                        .send(InferenceEvent::Thought(
2943                            "Policy recovery: rerouting blocked MCP filesystem inspection to built-in workspace tools."
2944                                .into(),
2945                        ))
2946                        .await;
2947                    continue;
2948                }
2949
2950                if architecture_overview_mode {
2951                    match (
2952                        overview_project_map.as_deref(),
2953                        overview_runtime_trace.as_deref(),
2954                    ) {
2955                        (Some(project_map), Some(runtime_trace)) => {
2956                            let response =
2957                                build_architecture_overview_answer(project_map, runtime_trace);
2958                            self.history.push(ChatMessage::assistant_text(&response));
2959                            self.transcript.log_agent(&response);
2960
2961                            for chunk in chunk_text(&response, 8) {
2962                                if !chunk.is_empty() {
2963                                    let _ = tx.send(InferenceEvent::Token(chunk)).await;
2964                                }
2965                            }
2966
2967                            let _ = tx.send(InferenceEvent::Done).await;
2968                            break;
2969                        }
2970                        (Some(_), None) => {
2971                            loop_intervention = Some(
2972                                "Good. You now have the grounded repository structure. Next, call `trace_runtime_flow` for the runtime/control-flow half of the architecture overview. Prefer topic `user_turn` for the main execution path, or `runtime_subsystems` if that is more direct. Do not call `read_file`, `auto_pin_context`, or LSP tools here."
2973                                    .to_string(),
2974                            );
2975                            continue;
2976                        }
2977                        (None, Some(_)) => {
2978                            loop_intervention = Some(
2979                                "Good. You now have the grounded runtime/control-flow trace. Next, call `map_project` once to capture entrypoints and core owner files. Keep it compact and do not call broad file-read tools in this architecture overview."
2980                                    .to_string(),
2981                            );
2982                            continue;
2983                        }
2984                        (None, None) => {}
2985                    }
2986                }
2987
2988                if implement_current_plan
2989                    && !implementation_started
2990                    && non_mutating_plan_steps >= non_mutating_plan_hard_cap
2991                {
2992                    let msg = "Current-plan execution stalled: too many non-mutating inspection steps without a concrete edit. Stay on the saved target files, narrow with `inspect_lines`, and then mutate, or ask one specific blocking question instead of continuing broad exploration.".to_string();
2993                    self.history.push(ChatMessage::assistant_text(&msg));
2994                    self.transcript.log_agent(&msg);
2995
2996                    for chunk in chunk_text(&msg, 8) {
2997                        if !chunk.is_empty() {
2998                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
2999                        }
3000                    }
3001
3002                    let _ = tx.send(InferenceEvent::Done).await;
3003                    break;
3004                }
3005
3006                if let Some(blocked_output) = blocked_policy_output {
3007                    self.emit_operator_checkpoint(
3008                        &tx,
3009                        OperatorCheckpointState::BlockedPolicy,
3010                        "A blocked tool path was surfaced directly to the operator.",
3011                    )
3012                    .await;
3013                    self.history
3014                        .push(ChatMessage::assistant_text(&blocked_output));
3015                    self.transcript.log_agent(&blocked_output);
3016
3017                    for chunk in chunk_text(&blocked_output, 8) {
3018                        if !chunk.is_empty() {
3019                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3020                        }
3021                    }
3022
3023                    let _ = tx.send(InferenceEvent::Done).await;
3024                    break;
3025                }
3026
3027                if let Some(tool_output) = authoritative_tool_output {
3028                    self.history.push(ChatMessage::assistant_text(&tool_output));
3029                    self.transcript.log_agent(&tool_output);
3030
3031                    for chunk in chunk_text(&tool_output, 8) {
3032                        if !chunk.is_empty() {
3033                            let _ = tx.send(InferenceEvent::Token(chunk)).await;
3034                        }
3035                    }
3036
3037                    let _ = tx.send(InferenceEvent::Done).await;
3038                    break;
3039                }
3040
3041                if implement_current_plan && !implementation_started {
3042                    let base = "STOP analyzing. The current plan already defines the task. Use the built-in file evidence you now have and begin implementing the plan in the target files. Do not output preliminary findings or restate contracts.";
3043                    if non_mutating_plan_steps >= non_mutating_plan_soft_cap {
3044                        loop_intervention = Some(format!(
3045                            "{} You are close to the non-mutation cap. Use `inspect_lines` on one saved target file, then make the edit now.",
3046                            base
3047                        ));
3048                    } else {
3049                        loop_intervention = Some(base.to_string());
3050                    }
3051                } else if self.workflow_mode == WorkflowMode::Architect {
3052                    loop_intervention = Some(
3053                        format!(
3054                            "STOP exploring. You have enough evidence for a plan-first answer.\n{}\nUse the tool results already in history. Do not narrate your process. Do not call more tools unless a missing file path makes the handoff impossible.",
3055                            architect_handoff_contract()
3056                        ),
3057                    );
3058                }
3059
3060                // 4. Auto-Verification Loop (The Perfect Bake)
3061                if mutation_occurred && !yolo {
3062                    let _ = tx
3063                        .send(InferenceEvent::Thought(
3064                            "Self-Verification: Running 'cargo check' to ensure build integrity..."
3065                                .into(),
3066                        ))
3067                        .await;
3068                    let verify_res = self.auto_verify_build().await;
3069                    let verify_ok = verify_res.contains("BUILD SUCCESS");
3070                    self.record_verify_build_result(verify_ok, &verify_res)
3071                        .await;
3072                    self.record_session_verification(
3073                        verify_ok,
3074                        if verify_ok {
3075                            "Automatic build verification passed."
3076                        } else {
3077                            "Automatic build verification failed."
3078                        },
3079                    );
3080                    self.history.push(ChatMessage::system(&format!(
3081                        "\n# SYSTEM VERIFICATION\n{verify_res}"
3082                    )));
3083                    let _ = tx
3084                        .send(InferenceEvent::Thought(
3085                            "Verification turn injected into history.".into(),
3086                        ))
3087                        .await;
3088                }
3089
3090                // Continue loop – the model will respond to the results.
3091                continue;
3092            } else if let Some(response_text) = text {
3093                if finish_reason.as_deref() == Some("length") && near_context_ceiling {
3094                    if intent.direct_answer == Some(DirectAnswerKind::SessionResetSemantics) {
3095                        let cleaned = build_session_reset_semantics_answer();
3096                        self.history.push(ChatMessage::assistant_text(&cleaned));
3097                        self.transcript.log_agent(&cleaned);
3098                        for chunk in chunk_text(&cleaned, 8) {
3099                            if !chunk.is_empty() {
3100                                let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3101                            }
3102                        }
3103                        let _ = tx.send(InferenceEvent::Done).await;
3104                        break;
3105                    }
3106
3107                    let warning = format_runtime_failure(
3108                        RuntimeFailureClass::ContextWindow,
3109                        "Context ceiling reached before the model completed the answer. Hematite trimmed what it could, but this turn still ran out of room. Retry with a narrower inspection step like `grep_files` or `inspect_lines`, or ask for a smaller scoped answer.",
3110                    );
3111                    self.history.push(ChatMessage::assistant_text(&warning));
3112                    self.transcript.log_agent(&warning);
3113                    let _ = tx
3114                        .send(InferenceEvent::Thought(
3115                            "Length recovery: model hit the context ceiling before completing the answer."
3116                                .into(),
3117                        ))
3118                        .await;
3119                    for chunk in chunk_text(&warning, 8) {
3120                        if !chunk.is_empty() {
3121                            let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3122                        }
3123                    }
3124                    let _ = tx.send(InferenceEvent::Done).await;
3125                    break;
3126                }
3127
3128                if response_text.contains("<|tool_call")
3129                    || response_text.contains("[END_TOOL_REQUEST]")
3130                    || response_text.contains("<|tool_response")
3131                    || response_text.contains("<tool_response|>")
3132                {
3133                    loop_intervention = Some(
3134                        "Your previous response leaked raw native tool transcript markup instead of a valid tool invocation or final answer. Retry immediately. If you need a tool, emit a valid tool call only. If you do not need a tool, answer in plain text with no `<|tool_call>`, `<|tool_response>`, or `[END_TOOL_REQUEST]` markup.".to_string(),
3135                    );
3136                    continue;
3137                }
3138
3139                // 1. Process and route the reasoning block to SPECULAR.
3140                if let Some(thought) = crate::agent::inference::extract_think_block(&response_text)
3141                {
3142                    let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
3143                    // Persist for history audit (stripped from next turn by Volatile Reasoning rule).
3144                    // This will be summarized in the next turn's system prompt.
3145                    self.reasoning_history = Some(thought);
3146                }
3147
3148                // 2. Process and stream the final answer to the chat interface.
3149                let cleaned = crate::agent::inference::strip_think_blocks(&response_text);
3150
3151                if implement_current_plan && !implementation_started {
3152                    loop_intervention = Some(
3153                        "Do not stop at analysis. Implement the current saved plan now using built-in workspace tools and the target files already named in the plan. Only answer without edits if you have a concrete blocking question.".to_string(),
3154                    );
3155                    continue;
3156                }
3157
3158                // [Hardened Interface] Strictly respect the stripper.
3159                // If it's empty, we stay silent in the chat area (reasoning is in SPECULAR).
3160                if cleaned.is_empty() {
3161                    let _ = tx.send(InferenceEvent::Done).await;
3162                    break;
3163                }
3164
3165                self.persist_architect_handoff(&cleaned);
3166                self.history.push(ChatMessage::assistant_text(&cleaned));
3167                self.transcript.log_agent(&cleaned);
3168
3169                // Send in smooth chunks for that professional UI feel.
3170                for chunk in chunk_text(&cleaned, 8) {
3171                    if !chunk.is_empty() {
3172                        let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3173                    }
3174                }
3175
3176                let _ = tx.send(InferenceEvent::Done).await;
3177                break;
3178            } else {
3179                let detail = "Model returned an empty response.";
3180                let class = classify_runtime_failure(detail);
3181                if should_retry_runtime_failure(class) {
3182                    if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3183                        if let RecoveryDecision::Attempt(plan) =
3184                            attempt_recovery(scenario, &mut self.recovery_context)
3185                        {
3186                            self.transcript.log_system(
3187                                "Automatic provider recovery triggered: model returned an empty response.",
3188                            );
3189                            self.emit_recovery_recipe_summary(
3190                                &tx,
3191                                plan.recipe.scenario.label(),
3192                                compact_recovery_plan_summary(&plan),
3193                            )
3194                            .await;
3195                            let _ = tx
3196                                .send(InferenceEvent::ProviderStatus {
3197                                    state: ProviderRuntimeState::Recovering,
3198                                    summary: compact_runtime_recovery_summary(class).into(),
3199                                })
3200                                .await;
3201                            self.emit_operator_checkpoint(
3202                                &tx,
3203                                OperatorCheckpointState::RecoveringProvider,
3204                                compact_runtime_recovery_summary(class),
3205                            )
3206                            .await;
3207                            continue;
3208                        }
3209                    }
3210                }
3211
3212                self.emit_runtime_failure(&tx, class, detail).await;
3213                break;
3214            }
3215        }
3216
3217        self.trim_history(80);
3218        self.refresh_session_memory();
3219        self.save_session();
3220        self.emit_compaction_pressure(&tx).await;
3221        Ok(())
3222    }
3223
3224    async fn emit_runtime_failure(
3225        &mut self,
3226        tx: &mpsc::Sender<InferenceEvent>,
3227        class: RuntimeFailureClass,
3228        detail: &str,
3229    ) {
3230        if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3231            let decision = preview_recovery_decision(scenario, &self.recovery_context);
3232            self.emit_recovery_recipe_summary(
3233                tx,
3234                scenario.label(),
3235                compact_recovery_decision_summary(&decision),
3236            )
3237            .await;
3238            let needs_refresh = match &decision {
3239                RecoveryDecision::Attempt(plan) => plan
3240                    .recipe
3241                    .steps
3242                    .contains(&RecoveryStep::RefreshRuntimeProfile),
3243                RecoveryDecision::Escalate { recipe, .. } => {
3244                    recipe.steps.contains(&RecoveryStep::RefreshRuntimeProfile)
3245                }
3246            };
3247            if needs_refresh {
3248                if let Some((model_id, context_length, changed)) = self
3249                    .refresh_runtime_profile_and_report(tx, "context_window_failure")
3250                    .await
3251                {
3252                    let note = if changed {
3253                        format!(
3254                            "Runtime refresh after context-window failure: model {} | CTX {}",
3255                            model_id, context_length
3256                        )
3257                    } else {
3258                        format!(
3259                            "Runtime refresh after context-window failure confirms model {} | CTX {}",
3260                            model_id, context_length
3261                        )
3262                    };
3263                    let _ = tx.send(InferenceEvent::Thought(note)).await;
3264                }
3265            }
3266        }
3267        if let Some(state) = provider_state_for_runtime_failure(class) {
3268            let _ = tx
3269                .send(InferenceEvent::ProviderStatus {
3270                    state,
3271                    summary: compact_runtime_failure_summary(class).into(),
3272                })
3273                .await;
3274        }
3275        if let Some(state) = checkpoint_state_for_runtime_failure(class) {
3276            self.emit_operator_checkpoint(tx, state, checkpoint_summary_for_runtime_failure(class))
3277                .await;
3278        }
3279        let formatted = format_runtime_failure(class, detail);
3280        self.history.push(ChatMessage::system(&format!(
3281            "# RUNTIME FAILURE\n{}",
3282            formatted
3283        )));
3284        self.transcript.log_system(&formatted);
3285        let _ = tx.send(InferenceEvent::Error(formatted)).await;
3286        let _ = tx.send(InferenceEvent::Done).await;
3287    }
3288
3289    /// [Task Analyzer] Run 'cargo check' and return a concise summary for the model.
3290    async fn auto_verify_build(&self) -> String {
3291        match crate::tools::verify_build::execute(&serde_json::json!({ "action": "build" })).await {
3292            Ok(out) => {
3293                "BUILD SUCCESS: Your changes are architecturally sound.\n\n".to_string()
3294                    + &cap_output(&out, 2000)
3295            }
3296            Err(e) => format!(
3297                "BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:\n\n{}",
3298                cap_output(&e, 2000)
3299            ),
3300        }
3301    }
3302
3303    /// Triggers an LLM call to summarize old messages if history exceeds the VRAM character limit.
3304    /// Triggers the Deterministic Smart Compaction algorithm to shrink history while preserving context.
3305    /// Triggers the Recursive Context Compactor.
3306    async fn compact_history_if_needed(
3307        &mut self,
3308        tx: &mpsc::Sender<InferenceEvent>,
3309        anchor_index: Option<usize>,
3310    ) -> Result<bool, String> {
3311        let vram_ratio = self.gpu_state.ratio();
3312        let context_length = self.engine.current_context_length();
3313        let config = CompactionConfig::adaptive(context_length, vram_ratio);
3314
3315        if !compaction::should_compact(&self.history, context_length, vram_ratio) {
3316            return Ok(false);
3317        }
3318
3319        let _ = tx
3320            .send(InferenceEvent::Thought(format!(
3321                "Compaction: ctx={}k vram={:.0}% threshold={}k tokens — chaining summary...",
3322                context_length / 1000,
3323                vram_ratio * 100.0,
3324                config.max_estimated_tokens / 1000,
3325            )))
3326            .await;
3327
3328        let result = compaction::compact_history(
3329            &self.history,
3330            self.running_summary.as_deref(),
3331            config,
3332            anchor_index,
3333        );
3334
3335        let removed_message_count = self.history.len().saturating_sub(result.messages.len());
3336        self.history = result.messages;
3337        self.running_summary = result.summary;
3338
3339        // Layer 6: Memory Synthesis (Task Context Persistence)
3340        let previous_memory = self.session_memory.clone();
3341        self.session_memory = compaction::extract_memory(&self.history);
3342        self.session_memory
3343            .inherit_runtime_ledger_from(&previous_memory);
3344        self.session_memory.record_compaction(
3345            removed_message_count,
3346            format!(
3347                "Compacted history around active task '{}' and preserved {} working-set file(s).",
3348                self.session_memory.current_task,
3349                self.session_memory.working_set.len()
3350            ),
3351        );
3352        self.emit_compaction_pressure(tx).await;
3353
3354        // Jinja alignment: preserved slice may start with assistant/tool messages.
3355        // Strip any leading non-user messages so the first non-system message is always user.
3356        let first_non_sys = self
3357            .history
3358            .iter()
3359            .position(|m| m.role != "system")
3360            .unwrap_or(self.history.len());
3361        if first_non_sys < self.history.len() {
3362            if let Some(user_offset) = self.history[first_non_sys..]
3363                .iter()
3364                .position(|m| m.role == "user")
3365            {
3366                if user_offset > 0 {
3367                    self.history
3368                        .drain(first_non_sys..first_non_sys + user_offset);
3369                }
3370            }
3371        }
3372
3373        let _ = tx
3374            .send(InferenceEvent::Thought(format!(
3375                "Memory Synthesis: Extracted context for task: '{}'. Working set: {} files.",
3376                self.session_memory.current_task,
3377                self.session_memory.working_set.len()
3378            )))
3379            .await;
3380        let recipe = plan_recovery(RecoveryScenario::HistoryPressure, &self.recovery_context);
3381        self.emit_recovery_recipe_summary(
3382            tx,
3383            recipe.recipe.scenario.label(),
3384            compact_recovery_plan_summary(&recipe),
3385        )
3386        .await;
3387        self.emit_operator_checkpoint(
3388            tx,
3389            OperatorCheckpointState::HistoryCompacted,
3390            format!(
3391                "History compacted into a recursive summary; active task '{}' with {} working-set file(s) carried forward.",
3392                self.session_memory.current_task,
3393                self.session_memory.working_set.len()
3394            ),
3395        )
3396        .await;
3397
3398        Ok(true)
3399    }
3400
3401    /// Query The Vein for context relevant to the user's message.
3402    /// Runs hybrid BM25 + semantic search (semantic requires embedding model in LM Studio).
3403    /// Returns a formatted system message string, or None if nothing useful found.
3404    fn build_vein_context(&self, query: &str) -> Option<(String, Vec<String>)> {
3405        // Skip trivial / very short inputs.
3406        if query.trim().split_whitespace().count() < 3 {
3407            return None;
3408        }
3409
3410        let results = tokio::task::block_in_place(|| self.vein.search_context(query, 4)).ok()?;
3411        if results.is_empty() {
3412            return None;
3413        }
3414
3415        let semantic_active = self.vein.has_any_embeddings();
3416        let header = if semantic_active {
3417            "# Relevant context from The Vein (hybrid BM25 + semantic retrieval)\n\
3418             Use this to answer without needing extra read_file calls where possible.\n\n"
3419        } else {
3420            "# Relevant context from The Vein (BM25 keyword retrieval)\n\
3421             Use this to answer without needing extra read_file calls where possible.\n\n"
3422        };
3423
3424        let mut ctx = String::from(header);
3425        let mut paths: Vec<String> = Vec::new();
3426
3427        let mut total = 0usize;
3428        const MAX_CTX_CHARS: usize = 1_500;
3429
3430        for r in results {
3431            if total >= MAX_CTX_CHARS {
3432                break;
3433            }
3434            let snippet = if r.content.len() > 500 {
3435                format!("{}...", &r.content[..500])
3436            } else {
3437                r.content.clone()
3438            };
3439            ctx.push_str(&format!("--- {} ---\n{}\n\n", r.path, snippet));
3440            total += snippet.len() + r.path.len() + 10;
3441            if !paths.contains(&r.path) {
3442                paths.push(r.path);
3443            }
3444        }
3445
3446        Some((ctx, paths))
3447    }
3448
3449    /// Returns the conversation history (WITHOUT the system prompt) for the context window.
3450    /// This ensures we don't have redundant system blocks and prevents Jinja crashes.
3451    fn context_window_slice(&self) -> Vec<ChatMessage> {
3452        let mut result = Vec::new();
3453
3454        // Skip index 0 (the raw system message) and any stray system messages in history.
3455        if self.history.len() > 1 {
3456            for m in &self.history[1..] {
3457                if m.role == "system" {
3458                    continue;
3459                }
3460
3461                let mut sanitized = m.clone();
3462                // DEEP SANITIZE: LM Studio Jinja templates for Qwen crash on truly empty content.
3463                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3464                    sanitized.content = MessageContent::Text(" ".into());
3465                }
3466                result.push(sanitized);
3467            }
3468        }
3469
3470        // Jinja Guard: The first message after the system prompt MUST be 'user'.
3471        // If not (e.g. because of compaction), we insert a tiny anchor.
3472        if !result.is_empty() && result[0].role != "user" {
3473            result.insert(0, ChatMessage::user("Continuing previous context..."));
3474        }
3475
3476        result
3477    }
3478
3479    fn context_window_slice_from(&self, start_idx: usize) -> Vec<ChatMessage> {
3480        let mut result = Vec::new();
3481
3482        if self.history.len() > 1 {
3483            let start = start_idx.max(1).min(self.history.len());
3484            for m in &self.history[start..] {
3485                if m.role == "system" {
3486                    continue;
3487                }
3488
3489                let mut sanitized = m.clone();
3490                if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3491                    sanitized.content = MessageContent::Text(" ".into());
3492                }
3493                result.push(sanitized);
3494            }
3495        }
3496
3497        if !result.is_empty() && result[0].role != "user" {
3498            result.insert(0, ChatMessage::user("Continuing current plan execution..."));
3499        }
3500
3501        result
3502    }
3503
3504    /// Drop old turns from the middle of history.
3505    fn trim_history(&mut self, max_messages: usize) {
3506        if self.history.len() <= max_messages {
3507            return;
3508        }
3509        // Always keep [0] (system prompt).
3510        let excess = self.history.len() - max_messages;
3511        self.history.drain(1..=excess);
3512    }
3513
3514    /// P1: Attempt to fix malformed JSON tool arguments by asking the model to re-output them.
3515    async fn repair_tool_args(
3516        &self,
3517        tool_name: &str,
3518        bad_json: &str,
3519        tx: &mpsc::Sender<InferenceEvent>,
3520    ) -> Result<Value, String> {
3521        let _ = tx
3522            .send(InferenceEvent::Thought(format!(
3523                "Attempting to repair malformed JSON for '{}'...",
3524                tool_name
3525            )))
3526            .await;
3527
3528        let prompt = format!(
3529            "The following JSON for tool '{}' is malformed and failed to parse:\n\n```json\n{}\n```\n\nOutput ONLY the corrected JSON string that fixes the syntax error (e.g. missing commas, unescaped quotes). Do NOT include markdown blocks or any other text.",
3530            tool_name, bad_json
3531        );
3532
3533        let messages = vec![
3534            ChatMessage::system("You are a JSON repair tool. Output ONLY pure JSON."),
3535            ChatMessage::user(&prompt),
3536        ];
3537
3538        // Use fast model for speed if available.
3539        let (text, _, _, _) = self
3540            .engine
3541            .call_with_tools(&messages, &[], self.fast_model.as_deref())
3542            .await
3543            .map_err(|e| e.to_string())?;
3544
3545        let cleaned = text
3546            .unwrap_or_default()
3547            .trim()
3548            .trim_start_matches("```json")
3549            .trim_start_matches("```")
3550            .trim_end_matches("```")
3551            .trim()
3552            .to_string();
3553
3554        serde_json::from_str(&cleaned).map_err(|e| format!("Repair failed: {}", e))
3555    }
3556
3557    /// P2: Run a fast validation step after file writes to check for subtle logic errors.
3558    async fn run_critic_check(
3559        &self,
3560        path: &str,
3561        content: &str,
3562        tx: &mpsc::Sender<InferenceEvent>,
3563    ) -> Option<String> {
3564        // Only run for source code files.
3565        let ext = std::path::Path::new(path)
3566            .extension()
3567            .and_then(|e| e.to_str())
3568            .unwrap_or("");
3569        const CRITIC_EXTS: &[&str] = &["rs", "js", "ts", "py", "go", "c", "cpp"];
3570        if !CRITIC_EXTS.contains(&ext) {
3571            return None;
3572        }
3573
3574        let _ = tx
3575            .send(InferenceEvent::Thought(format!(
3576                "CRITIC: Reviewing changes to '{}'...",
3577                path
3578            )))
3579            .await;
3580
3581        let truncated = cap_output(content, 4000);
3582
3583        let prompt = format!(
3584            "You are a Senior Security and Code Quality auditor. Review this file content for '{}' and identify any critical logic errors, security vulnerabilities, or missing error handling. Be extremely concise. If the code looks good, output 'PASS'.\n\n```{}\n{}\n```",
3585            path, ext, truncated
3586        );
3587
3588        let messages = vec![
3589            ChatMessage::system("You are a technical critic. Identify ONLY critical issues. Output 'PASS' if none found."),
3590            ChatMessage::user(&prompt)
3591        ];
3592
3593        let (text, _, _, _) = self
3594            .engine
3595            .call_with_tools(&messages, &[], self.fast_model.as_deref())
3596            .await
3597            .ok()?;
3598
3599        let critique = text?.trim().to_string();
3600        if critique.to_uppercase().contains("PASS") || critique.is_empty() {
3601            None
3602        } else {
3603            Some(critique)
3604        }
3605    }
3606}
3607
3608// ── Tool dispatcher ───────────────────────────────────────────────────────────
3609
3610pub async fn dispatch_tool(name: &str, args: &Value) -> Result<String, String> {
3611    dispatch_builtin_tool(name, args).await
3612}
3613
3614impl ConversationManager {
3615    /// Checks if a tool call is authorized given the current configuration and mode.
3616    fn check_authorization(
3617        &self,
3618        name: &str,
3619        args: &serde_json::Value,
3620        config: &crate::agent::config::HematiteConfig,
3621        yolo_flag: bool,
3622    ) -> crate::agent::permission_enforcer::AuthorizationDecision {
3623        crate::agent::permission_enforcer::authorize_tool_call(name, args, config, yolo_flag)
3624    }
3625
3626    /// Layer 4: Isolated tool execution logic. Does not mutate 'self' to allow parallelism.
3627    async fn process_tool_call(
3628        &self,
3629        call: ToolCallFn,
3630        config: crate::agent::config::HematiteConfig,
3631        yolo: bool,
3632        tx: mpsc::Sender<InferenceEvent>,
3633        real_id: String,
3634    ) -> ToolExecutionOutcome {
3635        let mut msg_results = Vec::new();
3636        let gemma4_model =
3637            crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
3638        let normalized_arguments = if gemma4_model {
3639            crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments)
3640        } else {
3641            call.arguments.clone()
3642        };
3643
3644        // 1. Argument Parsing & Repair
3645        let mut args: Value = match serde_json::from_str(&normalized_arguments) {
3646            Ok(v) => v,
3647            Err(_) => {
3648                match self
3649                    .repair_tool_args(&call.name, &normalized_arguments, &tx)
3650                    .await
3651                {
3652                    Ok(v) => v,
3653                    Err(e) => {
3654                        let _ = tx
3655                            .send(InferenceEvent::Thought(format!(
3656                                "JSON Repair failed: {}",
3657                                e
3658                            )))
3659                            .await;
3660                        Value::Object(Default::default())
3661                    }
3662                }
3663            }
3664        };
3665
3666        if call.name == "map_project" && self.workflow_mode == WorkflowMode::Architect {
3667            if let Some(obj) = args.as_object_mut() {
3668                obj.entry("include_symbols".to_string())
3669                    .or_insert(Value::Bool(false));
3670                obj.entry("max_depth".to_string())
3671                    .or_insert(Value::Number(2_u64.into()));
3672            }
3673        } else if call.name == "map_project" && self.workflow_mode.is_read_only() {
3674            if let Some(obj) = args.as_object_mut() {
3675                obj.entry("include_symbols".to_string())
3676                    .or_insert(Value::Bool(false));
3677                obj.entry("max_depth".to_string())
3678                    .or_insert(Value::Number(3_u64.into()));
3679            }
3680        }
3681
3682        let display = format_tool_display(&call.name, &args);
3683        let precondition_result = self.validate_action_preconditions(&call.name, &args).await;
3684        let auth = self.check_authorization(&call.name, &args, &config, yolo);
3685
3686        // 2. Permission Check
3687        let decision_result = match precondition_result {
3688            Err(e) => Err(e),
3689            Ok(_) => match auth {
3690                crate::agent::permission_enforcer::AuthorizationDecision::Allow { .. } => Ok(()),
3691                crate::agent::permission_enforcer::AuthorizationDecision::Ask {
3692                    reason,
3693                    source: _,
3694                } => {
3695                    let (approve_tx, approve_rx) = tokio::sync::oneshot::channel::<bool>();
3696                    let _ = tx
3697                        .send(InferenceEvent::ApprovalRequired {
3698                            id: real_id.clone(),
3699                            name: call.name.clone(),
3700                            display: format!("{}\nWhy: {}", display, reason),
3701                            diff: None,
3702                            responder: approve_tx,
3703                        })
3704                        .await;
3705
3706                    match approve_rx.await {
3707                        Ok(true) => Ok(()),
3708                        _ => Err("Declined by user".into()),
3709                    }
3710                }
3711                crate::agent::permission_enforcer::AuthorizationDecision::Deny {
3712                    reason, ..
3713                } => Err(reason),
3714            },
3715        };
3716        let blocked_by_policy =
3717            matches!(&decision_result, Err(e) if e.starts_with("Action blocked:"));
3718
3719        // 3. Execution (Local or MCP)
3720        let (output, is_error) = match decision_result {
3721            Err(e) => (format!("Error: {}", e), true),
3722            Ok(_) => {
3723                let _ = tx
3724                    .send(InferenceEvent::ToolCallStart {
3725                        id: real_id.clone(),
3726                        name: call.name.clone(),
3727                        args: display.clone(),
3728                    })
3729                    .await;
3730
3731                let result = if call.name.starts_with("lsp_") {
3732                    let lsp = self.lsp_manager.clone();
3733                    let path = args
3734                        .get("path")
3735                        .and_then(|v| v.as_str())
3736                        .unwrap_or("")
3737                        .to_string();
3738                    let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
3739                    let character =
3740                        args.get("character").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
3741
3742                    match call.name.as_str() {
3743                        "lsp_definitions" => {
3744                            crate::tools::lsp_tools::lsp_definitions(lsp, path, line, character)
3745                                .await
3746                        }
3747                        "lsp_references" => {
3748                            crate::tools::lsp_tools::lsp_references(lsp, path, line, character)
3749                                .await
3750                        }
3751                        "lsp_hover" => {
3752                            crate::tools::lsp_tools::lsp_hover(lsp, path, line, character).await
3753                        }
3754                        "lsp_search_symbol" => {
3755                            let query = args
3756                                .get("query")
3757                                .and_then(|v| v.as_str())
3758                                .unwrap_or_default()
3759                                .to_string();
3760                            crate::tools::lsp_tools::lsp_search_symbol(lsp, query).await
3761                        }
3762                        "lsp_rename_symbol" => {
3763                            let new_name = args
3764                                .get("new_name")
3765                                .and_then(|v| v.as_str())
3766                                .unwrap_or_default()
3767                                .to_string();
3768                            crate::tools::lsp_tools::lsp_rename_symbol(
3769                                lsp, path, line, character, new_name,
3770                            )
3771                            .await
3772                        }
3773                        "lsp_get_diagnostics" => {
3774                            crate::tools::lsp_tools::lsp_get_diagnostics(lsp, path).await
3775                        }
3776                        _ => Err(format!("Unknown LSP tool: {}", call.name)),
3777                    }
3778                } else if call.name == "auto_pin_context" {
3779                    let pts = args.get("paths").and_then(|v| v.as_array());
3780                    let reason = args
3781                        .get("reason")
3782                        .and_then(|v| v.as_str())
3783                        .unwrap_or("uninformed scoping");
3784                    if let Some(arr) = pts {
3785                        let mut pinned = Vec::new();
3786                        {
3787                            let mut guard = self.pinned_files.lock().await;
3788                            const MAX_PINNED_SIZE: u64 = 25 * 1024 * 1024; // 25MB Safety Valve
3789
3790                            for v in arr.iter().take(3) {
3791                                if let Some(p) = v.as_str() {
3792                                    if let Ok(meta) = std::fs::metadata(p) {
3793                                        if meta.len() > MAX_PINNED_SIZE {
3794                                            let _ = tx.send(InferenceEvent::Thought(format!("[GUARD] Skipping {} - size ({} bytes) exceeds VRAM safety limit (25MB).", p, meta.len()))).await;
3795                                            continue;
3796                                        }
3797                                        if let Ok(content) = std::fs::read_to_string(p) {
3798                                            guard.insert(p.to_string(), content);
3799                                            pinned.push(p.to_string());
3800                                        }
3801                                    }
3802                                }
3803                            }
3804                        }
3805                        let msg = format!(
3806                            "Autonomous Scoping: Locked {} in high-fidelity memory. Reason: {}",
3807                            pinned.join(", "),
3808                            reason
3809                        );
3810                        let _ = tx
3811                            .send(InferenceEvent::Thought(format!("[AUTO-PIN] {}", msg)))
3812                            .await;
3813                        Ok(msg)
3814                    } else {
3815                        Err("Missing 'paths' array for auto_pin_context.".to_string())
3816                    }
3817                } else if call.name == "list_pinned" {
3818                    let paths_msg = {
3819                        let pinned = self.pinned_files.lock().await;
3820                        if pinned.is_empty() {
3821                            "No files are currently pinned.".to_string()
3822                        } else {
3823                            let paths: Vec<_> = pinned.keys().cloned().collect();
3824                            format!(
3825                                "Currently pinned files in active memory:\n- {}",
3826                                paths.join("\n- ")
3827                            )
3828                        }
3829                    };
3830                    Ok(paths_msg)
3831                } else if call.name.starts_with("mcp__") {
3832                    let mut mcp = self.mcp_manager.lock().await;
3833                    match mcp.call_tool(&call.name, &args).await {
3834                        Ok(res) => Ok(res),
3835                        Err(e) => Err(e.to_string()),
3836                    }
3837                } else if call.name == "swarm" {
3838                    // ── Swarm Orchestration ──
3839                    let tasks_val = args.get("tasks").cloned().unwrap_or(Value::Array(vec![]));
3840                    let max_workers = args
3841                        .get("max_workers")
3842                        .and_then(|v| v.as_u64())
3843                        .unwrap_or(3) as usize;
3844
3845                    let mut task_objs = Vec::new();
3846                    if let Value::Array(arr) = tasks_val {
3847                        for v in arr {
3848                            let id = v
3849                                .get("id")
3850                                .and_then(|x| x.as_str())
3851                                .unwrap_or("?")
3852                                .to_string();
3853                            let target = v
3854                                .get("target")
3855                                .and_then(|x| x.as_str())
3856                                .unwrap_or("?")
3857                                .to_string();
3858                            let instruction = v
3859                                .get("instruction")
3860                                .and_then(|x| x.as_str())
3861                                .unwrap_or("?")
3862                                .to_string();
3863                            task_objs.push(crate::agent::parser::WorkerTask {
3864                                id,
3865                                target,
3866                                instruction,
3867                            });
3868                        }
3869                    }
3870
3871                    if task_objs.is_empty() {
3872                        Err("No tasks provided for swarm.".to_string())
3873                    } else {
3874                        let (swarm_tx_internal, mut swarm_rx_internal) =
3875                            tokio::sync::mpsc::channel(32);
3876                        let tx_forwarder = tx.clone();
3877
3878                        // Bridge SwarmMessage -> InferenceEvent
3879                        tokio::spawn(async move {
3880                            while let Some(msg) = swarm_rx_internal.recv().await {
3881                                match msg {
3882                                    crate::agent::swarm::SwarmMessage::Progress(id, p) => {
3883                                        let _ = tx_forwarder
3884                                            .send(InferenceEvent::Thought(format!(
3885                                                "Swarm [{}]: {}% complete",
3886                                                id, p
3887                                            )))
3888                                            .await;
3889                                    }
3890                                    crate::agent::swarm::SwarmMessage::ReviewRequest {
3891                                        worker_id,
3892                                        file_path,
3893                                        before: _,
3894                                        after: _,
3895                                        tx,
3896                                    } => {
3897                                        let (approve_tx, approve_rx) =
3898                                            tokio::sync::oneshot::channel::<bool>();
3899                                        let display = format!(
3900                                            "Swarm worker [{}]: Integrated changes into {:?}",
3901                                            worker_id, file_path
3902                                        );
3903                                        let _ = tx_forwarder
3904                                            .send(InferenceEvent::ApprovalRequired {
3905                                                id: format!("swarm_{}", worker_id),
3906                                                name: "swarm_apply".to_string(),
3907                                                display,
3908                                                diff: None,
3909                                                responder: approve_tx,
3910                                            })
3911                                            .await;
3912                                        if let Ok(approved) = approve_rx.await {
3913                                            let response = if approved {
3914                                                crate::agent::swarm::ReviewResponse::Accept
3915                                            } else {
3916                                                crate::agent::swarm::ReviewResponse::Reject
3917                                            };
3918                                            let _ = tx.send(response);
3919                                        }
3920                                    }
3921                                    crate::agent::swarm::SwarmMessage::Done => {}
3922                                }
3923                            }
3924                        });
3925
3926                        let coordinator = self.swarm_coordinator.clone();
3927                        match coordinator
3928                            .dispatch_swarm(task_objs, swarm_tx_internal, max_workers)
3929                            .await
3930                        {
3931                            Ok(_) => Ok(
3932                                "Swarm execution completed. Check files for integration results."
3933                                    .to_string(),
3934                            ),
3935                            Err(e) => Err(format!("Swarm failure: {}", e)),
3936                        }
3937                    }
3938                } else if call.name == "vision_analyze" {
3939                    crate::tools::vision::vision_analyze(&self.engine, &args).await
3940                } else if matches!(
3941                    call.name.as_str(),
3942                    "edit_file" | "patch_hunk" | "multi_search_replace"
3943                ) && !yolo
3944                {
3945                    // ── Diff preview gate ─────────────────────────────────────
3946                    // Compute what the edit would look like before applying it.
3947                    // If we can build a diff, require user Y/N in the TUI.
3948                    let diff_result = match call.name.as_str() {
3949                        "edit_file" => crate::tools::file_ops::compute_edit_file_diff(&args),
3950                        "patch_hunk" => crate::tools::file_ops::compute_patch_hunk_diff(&args),
3951                        _ => crate::tools::file_ops::compute_msr_diff(&args),
3952                    };
3953                    match diff_result {
3954                        Ok(diff_text) => {
3955                            let path_label =
3956                                args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
3957                            let (appr_tx, appr_rx) = tokio::sync::oneshot::channel::<bool>();
3958                            let _ = tx
3959                                .send(InferenceEvent::ApprovalRequired {
3960                                    id: real_id.clone(),
3961                                    name: call.name.clone(),
3962                                    display: format!("Edit preview: {}", path_label),
3963                                    diff: Some(diff_text),
3964                                    responder: appr_tx,
3965                                })
3966                                .await;
3967                            match appr_rx.await {
3968                                Ok(true) => dispatch_tool(&call.name, &args).await,
3969                                _ => Err("Edit declined by user.".into()),
3970                            }
3971                        }
3972                        // Diff computation failed (e.g. search string not found yet) —
3973                        // fall through and let the tool return its own error.
3974                        Err(_) => dispatch_tool(&call.name, &args).await,
3975                    }
3976                } else {
3977                    dispatch_tool(&call.name, &args).await
3978                };
3979
3980                match result {
3981                    Ok(o) => (o, false),
3982                    Err(e) => (format!("Error: {}", e), true),
3983                }
3984            }
3985        };
3986
3987        // ── Session Economics ────────────────────────────────────────────────
3988        {
3989            if let Ok(mut econ) = self.engine.economics.lock() {
3990                econ.record_tool(&call.name, !is_error);
3991            }
3992        }
3993
3994        if !is_error {
3995            if matches!(call.name.as_str(), "read_file" | "inspect_lines") {
3996                if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
3997                    if call.name == "inspect_lines" {
3998                        self.record_line_inspection(path).await;
3999                    } else {
4000                        self.record_read_observation(path).await;
4001                    }
4002                }
4003            }
4004
4005            if call.name == "verify_build" {
4006                let ok = output.contains("BUILD OK")
4007                    || output.contains("BUILD SUCCESS")
4008                    || output.contains("BUILD OKAY");
4009                self.record_verify_build_result(ok, &output).await;
4010            }
4011
4012            if matches!(
4013                call.name.as_str(),
4014                "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4015            ) || is_mcp_mutating_tool(&call.name)
4016            {
4017                self.record_successful_mutation(action_target_path(&call.name, &args).as_deref())
4018                    .await;
4019            }
4020
4021            if let Some(receipt) = self.build_action_receipt(&call.name, &args, &output, is_error) {
4022                msg_results.push(receipt);
4023            }
4024        }
4025
4026        // 4. Critic Check (Specular Tier 2)
4027        // Gated: Only run on code files with substantive content to avoid burning tokens
4028        // on trivial doc/config edits.
4029        if !is_error && (call.name == "edit_file" || call.name == "write_file") {
4030            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
4031            let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
4032            let ext = std::path::Path::new(path)
4033                .extension()
4034                .and_then(|e| e.to_str())
4035                .unwrap_or("");
4036            const SKIP_EXTS: &[&str] = &[
4037                "md",
4038                "toml",
4039                "json",
4040                "txt",
4041                "yml",
4042                "yaml",
4043                "cfg",
4044                "csv",
4045                "lock",
4046                "gitignore",
4047            ];
4048            let line_count = content.lines().count();
4049            if !path.is_empty()
4050                && !content.is_empty()
4051                && !SKIP_EXTS.contains(&ext)
4052                && line_count >= 50
4053            {
4054                if let Some(critique) = self.run_critic_check(path, content, &tx).await {
4055                    msg_results.push(ChatMessage::system(&format!(
4056                        "[CRITIC REVIEW OF {}]\nIssues found:\n\n{}",
4057                        path, critique
4058                    )));
4059                }
4060            }
4061        }
4062
4063        ToolExecutionOutcome {
4064            call_id: real_id,
4065            tool_name: call.name,
4066            args,
4067            output,
4068            is_error,
4069            blocked_by_policy,
4070            msg_results,
4071        }
4072    }
4073}
4074
4075/// The result of an isolated tool execution.
4076/// Used to bridge Parallel/Serial execution back to the main history.
4077struct ToolExecutionOutcome {
4078    call_id: String,
4079    tool_name: String,
4080    args: Value,
4081    output: String,
4082    is_error: bool,
4083    blocked_by_policy: bool,
4084    msg_results: Vec<ChatMessage>,
4085}
4086
4087fn is_code_like_path(path: &str) -> bool {
4088    let ext = std::path::Path::new(path)
4089        .extension()
4090        .and_then(|e| e.to_str())
4091        .unwrap_or("")
4092        .to_ascii_lowercase();
4093    matches!(
4094        ext.as_str(),
4095        "rs" | "js"
4096            | "ts"
4097            | "tsx"
4098            | "jsx"
4099            | "py"
4100            | "go"
4101            | "java"
4102            | "c"
4103            | "cpp"
4104            | "cc"
4105            | "h"
4106            | "hpp"
4107            | "cs"
4108            | "swift"
4109            | "kt"
4110            | "kts"
4111            | "rb"
4112            | "php"
4113    )
4114}
4115
4116// ── Display helpers ───────────────────────────────────────────────────────────
4117
4118pub fn format_tool_display(name: &str, args: &Value) -> String {
4119    let get = |key: &str| {
4120        args.get(key)
4121            .and_then(|v| v.as_str())
4122            .unwrap_or("")
4123            .to_string()
4124    };
4125    match name {
4126        "shell" => format!("$ {}", get("command")),
4127        "map_project" => "map project architecture".to_string(),
4128        "trace_runtime_flow" => format!("trace runtime {}", get("topic")),
4129        "describe_toolchain" => format!("describe toolchain {}", get("topic")),
4130        "inspect_host" => format!("inspect host {}", get("topic")),
4131        _ => format!("{} {:?}", name, args),
4132    }
4133}
4134
4135// ── Text utilities ────────────────────────────────────────────────────────────
4136
4137fn shell_looks_like_structured_host_inspection(command: &str) -> bool {
4138    let lower = command.to_ascii_lowercase();
4139    [
4140        "$env:path",
4141        "pathvariable",
4142        "get-childitem",
4143        "gci ",
4144        "where.exe",
4145        "where ",
4146        "cargo --version",
4147        "rustc --version",
4148        "git --version",
4149        "node --version",
4150        "npm --version",
4151        "pnpm --version",
4152        "python --version",
4153        "python3 --version",
4154        "deno --version",
4155        "go version",
4156        "dotnet --version",
4157        "uv --version",
4158        "netstat",
4159        "findstr",
4160        "get-nettcpconnection",
4161        "tcpconnection",
4162        "listening",
4163        "ss -",
4164        "ss ",
4165        "lsof",
4166        "desktop",
4167        "downloads",
4168    ]
4169    .iter()
4170    .any(|needle| lower.contains(needle))
4171}
4172
4173// Moved strip_think_blocks to inference.rs
4174
4175fn cap_output(text: &str, max_bytes: usize) -> String {
4176    if text.len() <= max_bytes {
4177        text.to_string()
4178    } else {
4179        // Find the largest byte index <= max_bytes that is a valid char boundary.
4180        let mut split_at = max_bytes;
4181        while !text.is_char_boundary(split_at) && split_at > 0 {
4182            split_at -= 1;
4183        }
4184        format!(
4185            "{}\n... [output capped at {}B]",
4186            &text[..split_at],
4187            max_bytes
4188        )
4189    }
4190}
4191
4192#[derive(Default)]
4193struct PromptBudgetStats {
4194    summarized_tool_results: usize,
4195    collapsed_tool_results: usize,
4196    trimmed_chat_messages: usize,
4197    dropped_messages: usize,
4198}
4199
4200fn estimate_prompt_tokens(messages: &[ChatMessage]) -> usize {
4201    crate::agent::inference::estimate_message_batch_tokens(messages)
4202}
4203
4204fn summarize_prompt_blob(text: &str, max_chars: usize) -> String {
4205    let budget = compaction::SummaryCompressionBudget {
4206        max_chars,
4207        max_lines: 3,
4208        max_line_chars: max_chars.clamp(80, 240),
4209    };
4210    let compressed = compaction::compress_summary(text, budget).summary;
4211    if compressed.is_empty() {
4212        String::new()
4213    } else {
4214        compressed
4215    }
4216}
4217
4218fn summarize_tool_message_for_budget(message: &ChatMessage) -> String {
4219    let tool_name = message.name.as_deref().unwrap_or("tool");
4220    let body = summarize_prompt_blob(message.content.as_str(), 320);
4221    format!(
4222        "[Prompt-budget summary of prior `{}` result]\n{}",
4223        tool_name, body
4224    )
4225}
4226
4227fn summarize_chat_message_for_budget(message: &ChatMessage) -> String {
4228    let role = message.role.as_str();
4229    let body = summarize_prompt_blob(message.content.as_str(), 240);
4230    format!(
4231        "[Prompt-budget summary of earlier {} message]\n{}",
4232        role, body
4233    )
4234}
4235
4236fn normalize_prompt_start(messages: &mut Vec<ChatMessage>) {
4237    if messages.len() > 1 && messages[1].role != "user" {
4238        messages.insert(1, ChatMessage::user("Continuing previous context..."));
4239    }
4240}
4241
4242fn enforce_prompt_budget(
4243    prompt_msgs: &mut Vec<ChatMessage>,
4244    context_length: usize,
4245) -> Option<String> {
4246    let target_tokens = ((context_length as f64) * 0.68) as usize;
4247    if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4248        return None;
4249    }
4250
4251    let mut stats = PromptBudgetStats::default();
4252
4253    // 1. Summarize the newest large tool outputs first.
4254    let mut tool_indices: Vec<usize> = prompt_msgs
4255        .iter()
4256        .enumerate()
4257        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
4258        .collect();
4259    for idx in tool_indices.iter().rev().copied() {
4260        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4261            break;
4262        }
4263        let original = prompt_msgs[idx].content.as_str().to_string();
4264        if original.len() > 1200 {
4265            prompt_msgs[idx].content =
4266                MessageContent::Text(summarize_tool_message_for_budget(&prompt_msgs[idx]));
4267            stats.summarized_tool_results += 1;
4268        }
4269    }
4270
4271    // 2. Collapse older tool results aggressively, keeping only the most recent two verbatim/summarized.
4272    tool_indices = prompt_msgs
4273        .iter()
4274        .enumerate()
4275        .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
4276        .collect();
4277    if tool_indices.len() > 2 {
4278        for idx in tool_indices
4279            .iter()
4280            .take(tool_indices.len().saturating_sub(2))
4281            .copied()
4282        {
4283            if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4284                break;
4285            }
4286            prompt_msgs[idx].content = MessageContent::Text(
4287                "[Earlier tool output omitted to stay within the prompt budget.]".to_string(),
4288            );
4289            stats.collapsed_tool_results += 1;
4290        }
4291    }
4292
4293    // 3. Trim older long chat messages, but preserve the final user request.
4294    let last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
4295    for idx in 1..prompt_msgs.len() {
4296        if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4297            break;
4298        }
4299        if Some(idx) == last_user_idx {
4300            continue;
4301        }
4302        let role = prompt_msgs[idx].role.as_str();
4303        if matches!(role, "user" | "assistant") && prompt_msgs[idx].content.as_str().len() > 900 {
4304            prompt_msgs[idx].content =
4305                MessageContent::Text(summarize_chat_message_for_budget(&prompt_msgs[idx]));
4306            stats.trimmed_chat_messages += 1;
4307        }
4308    }
4309
4310    // 4. Drop the oldest non-system context until we fit, preserving the latest user request.
4311    let preserve_last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
4312    let mut idx = 1usize;
4313    while estimate_prompt_tokens(prompt_msgs) > target_tokens && prompt_msgs.len() > 2 {
4314        if Some(idx) == preserve_last_user_idx {
4315            idx += 1;
4316            if idx >= prompt_msgs.len() {
4317                break;
4318            }
4319            continue;
4320        }
4321        if idx >= prompt_msgs.len() {
4322            break;
4323        }
4324        prompt_msgs.remove(idx);
4325        stats.dropped_messages += 1;
4326    }
4327
4328    normalize_prompt_start(prompt_msgs);
4329
4330    let new_tokens = estimate_prompt_tokens(prompt_msgs);
4331    if stats.summarized_tool_results == 0
4332        && stats.collapsed_tool_results == 0
4333        && stats.trimmed_chat_messages == 0
4334        && stats.dropped_messages == 0
4335    {
4336        return None;
4337    }
4338
4339    Some(format!(
4340        "Prompt Budget Guard: trimmed prompt to about {} tokens (target {}). Summarized {} large tool result(s), collapsed {} older tool result(s), trimmed {} chat message(s), and dropped {} old message(s).",
4341        new_tokens,
4342        target_tokens,
4343        stats.summarized_tool_results,
4344        stats.collapsed_tool_results,
4345        stats.trimmed_chat_messages,
4346        stats.dropped_messages
4347    ))
4348}
4349
4350/// Split text into chunks of roughly `words_per_chunk` whitespace-separated tokens.
4351/// Returns true for short, direct tool-use requests that don't benefit from deep reasoning.
4352/// Used to skip the auto-/think prepend so the model calls the tool immediately
4353/// instead of spending thousands of tokens deliberating over a trivial task.
4354fn is_quick_tool_request(input: &str) -> bool {
4355    let lower = input.to_lowercase();
4356    // Explicit run_code requests — sandbox calls need no reasoning warmup.
4357    if lower.contains("run_code") || lower.contains("run code") {
4358        return true;
4359    }
4360    // Short compute/test requests — "calculate X", "test this", "execute Y"
4361    let is_short = input.len() < 120;
4362    let compute_keywords = [
4363        "calculate",
4364        "compute",
4365        "execute",
4366        "run this",
4367        "test this",
4368        "what is ",
4369        "how much",
4370        "how many",
4371        "convert ",
4372        "print ",
4373    ];
4374    if is_short && compute_keywords.iter().any(|k| lower.contains(k)) {
4375        return true;
4376    }
4377    false
4378}
4379
4380fn chunk_text(text: &str, words_per_chunk: usize) -> Vec<String> {
4381    let mut chunks = Vec::new();
4382    let mut current = String::new();
4383    let mut count = 0;
4384
4385    for ch in text.chars() {
4386        current.push(ch);
4387        if ch == ' ' || ch == '\n' {
4388            count += 1;
4389            if count >= words_per_chunk {
4390                chunks.push(current.clone());
4391                current.clear();
4392                count = 0;
4393            }
4394        }
4395    }
4396    if !current.is_empty() {
4397        chunks.push(current);
4398    }
4399    chunks
4400}
4401
4402fn repeated_read_target(call: &crate::agent::inference::ToolCallFn) -> Option<String> {
4403    if call.name != "read_file" {
4404        return None;
4405    }
4406    let normalized_arguments =
4407        crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments);
4408    let args: Value = serde_json::from_str(&normalized_arguments).ok()?;
4409    let path = args.get("path").and_then(|v| v.as_str())?;
4410    Some(normalize_workspace_path(path))
4411}
4412
4413fn order_batch_reads_first(
4414    calls: Vec<crate::agent::inference::ToolCallResponse>,
4415) -> (
4416    Vec<crate::agent::inference::ToolCallResponse>,
4417    Option<String>,
4418) {
4419    let has_reads = calls.iter().any(|c| {
4420        matches!(
4421            c.function.name.as_str(),
4422            "read_file" | "inspect_lines" | "grep_files" | "list_files"
4423        )
4424    });
4425    let has_edits = calls.iter().any(|c| {
4426        matches!(
4427            c.function.name.as_str(),
4428            "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4429        )
4430    });
4431    if has_reads && has_edits {
4432        let reads: Vec<_> = calls
4433            .into_iter()
4434            .filter(|c| {
4435                !matches!(
4436                    c.function.name.as_str(),
4437                    "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4438                )
4439            })
4440            .collect();
4441        let note = Some("Batch ordering: deferring edits until reads complete.".to_string());
4442        (reads, note)
4443    } else {
4444        (calls, None)
4445    }
4446}
4447
4448fn grep_output_is_high_fanout(output: &str) -> bool {
4449    let Some(summary) = output.lines().next() else {
4450        return false;
4451    };
4452    let hunk_count = summary
4453        .split(", ")
4454        .find_map(|part| {
4455            part.strip_suffix(" hunk(s)")
4456                .and_then(|value| value.parse::<usize>().ok())
4457        })
4458        .unwrap_or(0);
4459    let match_count = summary
4460        .split(' ')
4461        .next()
4462        .and_then(|value| value.parse::<usize>().ok())
4463        .unwrap_or(0);
4464    hunk_count >= 8 || match_count >= 12
4465}
4466
4467fn build_system_with_corrections(
4468    base: &str,
4469    hints: &[String],
4470    gpu: &Arc<GpuState>,
4471    git: &Arc<crate::agent::git_monitor::GitState>,
4472    config: &crate::agent::config::HematiteConfig,
4473) -> String {
4474    let mut system_msg = base.to_string();
4475
4476    // Inject Permission Mode.
4477    system_msg.push_str("\n\n# Permission Mode\n");
4478    let mode_label = match config.mode {
4479        crate::agent::config::PermissionMode::ReadOnly => "READ-ONLY",
4480        crate::agent::config::PermissionMode::Developer => "DEVELOPER",
4481        crate::agent::config::PermissionMode::SystemAdmin => "SYSTEM-ADMIN (UNRESTRICTED)",
4482    };
4483    system_msg.push_str(&format!("CURRENT MODE: {}\n", mode_label));
4484
4485    if config.mode == crate::agent::config::PermissionMode::ReadOnly {
4486        system_msg.push_str("PERMISSION: You are restricted to READ-ONLY access. Do NOT attempt to use write_file, edit_file, or shell for any modification. Focus entirely on analysis, indexing, and reporting.\n");
4487    } else {
4488        system_msg.push_str("PERMISSION: You have authority to modify code and execute tests with user oversight.\n");
4489    }
4490
4491    // Inject live hardware status.
4492    let (used, total) = gpu.read();
4493    if total > 0 {
4494        system_msg.push_str("\n\n# Terminal Hardware Context\n");
4495        system_msg.push_str(&format!(
4496            "HOST GPU: {} | VRAM: {:.1}GB / {:.1}GB ({:.0}% used)\n",
4497            gpu.gpu_name(),
4498            used as f64 / 1024.0,
4499            total as f64 / 1024.0,
4500            gpu.ratio() * 100.0
4501        ));
4502        system_msg.push_str("Use this awareness to manage your context window responsibly.\n");
4503    }
4504
4505    // Inject Git Repository context.
4506    system_msg.push_str("\n\n# Git Repository Context\n");
4507    let git_status_label = git.label();
4508    let git_url = git.url();
4509    system_msg.push_str(&format!(
4510        "REMOTE STATUS: {} | URL: {}\n",
4511        git_status_label, git_url
4512    ));
4513
4514    // Live Snapshots (Status/Diff)
4515    let root = crate::tools::file_ops::workspace_root();
4516    if let Some(status_snapshot) = crate::agent::git_context::read_git_status(&root) {
4517        system_msg.push_str("\nGit status snapshot:\n");
4518        system_msg.push_str(&status_snapshot);
4519        system_msg.push_str("\n");
4520    }
4521
4522    if let Some(diff_snapshot) = crate::agent::git_context::read_git_diff(&root, 2000) {
4523        system_msg.push_str("\nGit diff snapshot:\n");
4524        system_msg.push_str(&diff_snapshot);
4525        system_msg.push_str("\n");
4526    }
4527
4528    if git_status_label == "NONE" {
4529        system_msg.push_str("\nONBOARDING: You noticed no remote is configured. Offer to help the user set up a remote (e.g. GitHub) if they haven't already.\n");
4530    } else if git_status_label == "BEHIND" {
4531        system_msg.push_str("\nSYNC: Local is behind remote. Suggest a pull if appropriate.\n");
4532    }
4533
4534    // NOTE: Instruction files (CLAUDE.md, HEMATITE.md, etc.) are already injected
4535    // by InferenceEngine::build_system_prompt() via load_instruction_files().
4536    // Injecting them again here would double the token cost (~4K wasted per turn).
4537
4538    if hints.is_empty() {
4539        return system_msg;
4540    }
4541    system_msg.push_str("\n\n# Formatting Corrections\n");
4542    system_msg.push_str("You previously failed formatting checks on these files. Ensure your whitespace/indentation perfectly matches the original file exactly on your next attempt:\n");
4543    for hint in hints {
4544        system_msg.push_str(&format!("- {}\n", hint));
4545    }
4546    system_msg
4547}
4548
4549fn route_model<'a>(
4550    user_input: &str,
4551    fast_model: Option<&'a str>,
4552    think_model: Option<&'a str>,
4553) -> Option<&'a str> {
4554    let text = user_input.to_lowercase();
4555    let is_think = text.contains("refactor")
4556        || text.contains("rewrite")
4557        || text.contains("implement")
4558        || text.contains("create")
4559        || text.contains("fix")
4560        || text.contains("debug");
4561    let is_fast = text.contains("what")
4562        || text.contains("show")
4563        || text.contains("find")
4564        || text.contains("list")
4565        || text.contains("status");
4566
4567    if is_think && think_model.is_some() {
4568        return think_model;
4569    } else if is_fast && fast_model.is_some() {
4570        return fast_model;
4571    }
4572    None
4573}
4574
4575fn is_parallel_safe(name: &str) -> bool {
4576    let metadata = crate::agent::inference::tool_metadata_for_name(name);
4577    !metadata.mutates_workspace && !metadata.external_surface
4578}
4579
4580fn should_use_vein_in_chat(query: &str, docs_only_mode: bool) -> bool {
4581    if docs_only_mode {
4582        return true;
4583    }
4584
4585    let lower = query.to_ascii_lowercase();
4586    [
4587        "what did we decide",
4588        "why did we decide",
4589        "what did we say",
4590        "what did we do",
4591        "earlier today",
4592        "yesterday",
4593        "last week",
4594        "last month",
4595        "earlier",
4596        "remember",
4597        "session",
4598        "import",
4599    ]
4600    .iter()
4601    .any(|needle| lower.contains(needle))
4602        || lower
4603            .split(|ch: char| !(ch.is_ascii_digit() || ch == '-'))
4604            .any(|token| token.len() == 10 && token.chars().nth(4) == Some('-'))
4605}
4606
4607#[cfg(test)]
4608mod tests {
4609    use super::*;
4610
4611    #[test]
4612    fn classifies_lm_studio_context_budget_mismatch_as_context_window() {
4613        let detail = r#"LM Studio error 400 Bad Request: {"error":"The number of tokens to keep from the initial prompt is greater than the context length (n_keep: 28768>= n_ctx: 4096). Try to load the model with a larger context length, or provide a shorter input."}"#;
4614        let class = classify_runtime_failure(detail);
4615        assert_eq!(class, RuntimeFailureClass::ContextWindow);
4616        assert_eq!(class.tag(), "context_window");
4617        assert!(format_runtime_failure(class, detail).contains("[failure:context_window]"));
4618    }
4619
4620    #[test]
4621    fn runtime_failure_maps_to_provider_and_checkpoint_state() {
4622        assert_eq!(
4623            provider_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
4624            Some(ProviderRuntimeState::ContextWindow)
4625        );
4626        assert_eq!(
4627            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
4628            Some(OperatorCheckpointState::BlockedContextWindow)
4629        );
4630        assert_eq!(
4631            provider_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
4632            Some(ProviderRuntimeState::Degraded)
4633        );
4634        assert_eq!(
4635            checkpoint_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
4636            None
4637        );
4638    }
4639
4640    #[test]
4641    fn intent_router_treats_tool_registry_ownership_as_product_truth() {
4642        let intent = classify_query_intent(
4643            WorkflowMode::ReadOnly,
4644            "Read-only mode. Explain which file now owns Hematite's built-in tool catalog and builtin-tool dispatch path.",
4645        );
4646        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4647        assert_eq!(
4648            intent.direct_answer,
4649            Some(DirectAnswerKind::ToolRegistryOwnership)
4650        );
4651    }
4652
4653    #[test]
4654    fn intent_router_treats_tool_classes_as_product_truth() {
4655        let intent = classify_query_intent(
4656            WorkflowMode::ReadOnly,
4657            "Read-only mode. Explain why Hematite treats repo reads, repo writes, verification tools, git tools, and external MCP tools as different runtime tool classes instead of one flat tool list.",
4658        );
4659        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4660        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::ToolClasses));
4661    }
4662
4663    #[test]
4664    fn tool_registry_ownership_answer_mentions_new_owner_file() {
4665        let answer = build_tool_registry_ownership_answer();
4666        assert!(answer.contains("src/agent/tool_registry.rs"));
4667        assert!(answer.contains("builtin dispatch path"));
4668        assert!(answer.contains("src/agent/conversation.rs"));
4669    }
4670
4671    #[test]
4672    fn intent_router_treats_mcp_lifecycle_as_product_truth() {
4673        let intent = classify_query_intent(
4674            WorkflowMode::ReadOnly,
4675            "Read-only mode. Explain how Hematite should treat MCP server health as runtime state.",
4676        );
4677        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4678        assert_eq!(intent.direct_answer, Some(DirectAnswerKind::McpLifecycle));
4679    }
4680
4681    #[test]
4682    fn intent_router_short_circuits_unsafe_commit_pressure() {
4683        let intent = classify_query_intent(
4684            WorkflowMode::Auto,
4685            "Make a code change, skip verification, and commit it immediately.",
4686        );
4687        assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4688        assert_eq!(
4689            intent.direct_answer,
4690            Some(DirectAnswerKind::UnsafeWorkflowPressure)
4691        );
4692    }
4693
4694    #[test]
4695    fn unsafe_workflow_pressure_answer_requires_verification() {
4696        let answer = build_unsafe_workflow_pressure_answer();
4697        assert!(answer.contains("should not skip verification"));
4698        assert!(answer.contains("run the appropriate verification path"));
4699        assert!(answer.contains("only then commit"));
4700    }
4701
4702    #[test]
4703    fn intent_router_prefers_architecture_walkthrough_over_narrow_mcp_answer() {
4704        let intent = classify_query_intent(
4705            WorkflowMode::ReadOnly,
4706            "I want to understand how Hematite is wired without any guessing. Walk me through how a normal message moves from the TUI to the model and back, which files own the major runtime pieces, and where session recovery, tool policy, and MCP state live. Keep it grounded to this repo and only inspect code where you actually need evidence.",
4707        );
4708        assert_eq!(intent.primary_class, QueryIntentClass::RepoArchitecture);
4709        assert!(intent.architecture_overview_mode);
4710        assert_eq!(intent.direct_answer, None);
4711    }
4712
4713    #[test]
4714    fn intent_router_marks_host_inspection_questions() {
4715        let intent = classify_query_intent(
4716            WorkflowMode::Auto,
4717            "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development.",
4718        );
4719        assert!(intent.host_inspection_mode);
4720        assert_eq!(
4721            preferred_host_inspection_topic(
4722                "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development."
4723            ),
4724            Some("summary")
4725        );
4726    }
4727
4728    #[test]
4729    fn chat_mode_uses_vein_for_historical_or_docs_only_queries() {
4730        assert!(should_use_vein_in_chat(
4731            "What did we decide on 2026-04-09 about docs-only mode?",
4732            false
4733        ));
4734        assert!(should_use_vein_in_chat("Summarize these local notes", true));
4735        assert!(!should_use_vein_in_chat("Tell me a joke", false));
4736    }
4737
4738    #[test]
4739    fn shell_host_inspection_guard_matches_path_and_version_commands() {
4740        assert!(shell_looks_like_structured_host_inspection(
4741            "$env:PATH -split ';'"
4742        ));
4743        assert!(shell_looks_like_structured_host_inspection(
4744            "cargo --version"
4745        ));
4746        assert!(shell_looks_like_structured_host_inspection(
4747            "Get-NetTCPConnection -LocalPort 3000"
4748        ));
4749        assert!(shell_looks_like_structured_host_inspection(
4750            "netstat -ano | findstr :3000"
4751        ));
4752    }
4753
4754    #[test]
4755    fn intent_router_picks_ports_for_listening_port_questions() {
4756        assert_eq!(
4757            preferred_host_inspection_topic(
4758                "Show me what is listening on port 3000 and whether anything unexpected is exposed."
4759            ),
4760            Some("ports")
4761        );
4762    }
4763
4764    #[test]
4765    fn failing_path_parser_extracts_cargo_error_locations() {
4766        let output = r#"
4767BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:
4768
4769error[E0412]: cannot find type `Foo` in this scope
4770  --> src/agent/conversation.rs:42:12
4771   |
477242 |     field: Foo,
4773   |            ^^^ not found
4774
4775error[E0308]: mismatched types
4776  --> src/tools/file_ops.rs:100:5
4777   |
4778   = note: expected `String`, found `&str`
4779"#;
4780        let paths = parse_failing_paths_from_build_output(output);
4781        assert!(
4782            paths.iter().any(|p| p.contains("conversation.rs")),
4783            "should capture conversation.rs"
4784        );
4785        assert!(
4786            paths.iter().any(|p| p.contains("file_ops.rs")),
4787            "should capture file_ops.rs"
4788        );
4789        assert_eq!(paths.len(), 2, "no duplicates");
4790    }
4791
4792    #[test]
4793    fn failing_path_parser_ignores_macro_expansions() {
4794        let output = r#"
4795  --> <macro-expansion>:1:2
4796  --> src/real/file.rs:10:5
4797"#;
4798        let paths = parse_failing_paths_from_build_output(output);
4799        assert_eq!(paths.len(), 1);
4800        assert!(paths[0].contains("file.rs"));
4801    }
4802}