1use crate::agent::architecture_summary::{
2 build_architecture_overview_answer, prune_architecture_trace_batch,
3 prune_authoritative_tool_batch, prune_read_only_context_bloat_batch,
4 summarize_project_map_output, summarize_runtime_trace_output,
5};
6use crate::agent::direct_answers::{
7 build_architect_session_reset_plan, build_authorization_policy_answer,
8 build_gemma_native_answer, build_gemma_native_settings_answer, build_identity_answer,
9 build_language_capability_answer, build_mcp_lifecycle_answer, build_product_surface_answer,
10 build_reasoning_split_answer, build_recovery_recipes_answer, build_session_memory_answer,
11 build_session_reset_semantics_answer, build_tool_classes_answer,
12 build_tool_registry_ownership_answer, build_unsafe_workflow_pressure_answer,
13 build_verify_profiles_answer, build_workflow_modes_answer,
14};
15use crate::agent::inference::{
16 ChatMessage, InferenceEngine, InferenceEvent, MessageContent, OperatorCheckpointState,
17 ProviderRuntimeState, ToolCallFn, ToolDefinition, ToolFunction,
18};
19use crate::agent::policy::{
20 action_target_path, docs_edit_without_explicit_request, is_destructive_tool,
21 is_mcp_mutating_tool, is_mcp_workspace_read_tool, normalize_workspace_path,
22};
23use crate::agent::recovery_recipes::{
24 attempt_recovery, plan_recovery, preview_recovery_decision, RecoveryContext, RecoveryDecision,
25 RecoveryPlan, RecoveryScenario, RecoveryStep,
26};
27use crate::agent::routing::{
28 classify_query_intent, is_capability_probe_tool, looks_like_mutation_request,
29 preferred_host_inspection_topic, DirectAnswerKind, QueryIntentClass,
30};
31use crate::agent::tool_registry::dispatch_builtin_tool;
32use crate::agent::compaction::{self, CompactionConfig};
34use crate::ui::gpu_monitor::GpuState;
35
36use serde_json::Value;
37use std::sync::Arc;
38use tokio::sync::{mpsc, Mutex};
39#[derive(Clone, Debug, Default)]
42pub struct UserTurn {
43 pub text: String,
44 pub attached_document: Option<AttachedDocument>,
45 pub attached_image: Option<AttachedImage>,
46}
47
48#[derive(Clone, Debug)]
49pub struct AttachedDocument {
50 pub name: String,
51 pub content: String,
52}
53
54#[derive(Clone, Debug)]
55pub struct AttachedImage {
56 pub name: String,
57 pub path: String,
58}
59
60impl UserTurn {
61 pub fn text(text: impl Into<String>) -> Self {
62 Self {
63 text: text.into(),
64 attached_document: None,
65 attached_image: None,
66 }
67 }
68}
69
70#[derive(serde::Serialize, serde::Deserialize)]
71struct SavedSession {
72 running_summary: Option<String>,
73 #[serde(default)]
74 session_memory: crate::agent::compaction::SessionMemory,
75}
76
77#[derive(Default)]
78struct ActionGroundingState {
79 turn_index: u64,
80 observed_paths: std::collections::HashMap<String, u64>,
81 inspected_paths: std::collections::HashMap<String, u64>,
82 last_verify_build_turn: Option<u64>,
83 last_verify_build_ok: bool,
84 last_failed_build_paths: Vec<String>,
85 code_changed_since_verify: bool,
86}
87
88struct PlanExecutionGuard {
89 flag: Arc<std::sync::atomic::AtomicBool>,
90}
91
92impl Drop for PlanExecutionGuard {
93 fn drop(&mut self) {
94 self.flag.store(false, std::sync::atomic::Ordering::SeqCst);
95 }
96}
97
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
99pub(crate) enum WorkflowMode {
100 #[default]
101 Auto,
102 Ask,
103 Code,
104 Architect,
105 ReadOnly,
106 Chat,
109}
110
111impl WorkflowMode {
112 fn label(self) -> &'static str {
113 match self {
114 WorkflowMode::Auto => "AUTO",
115 WorkflowMode::Ask => "ASK",
116 WorkflowMode::Code => "CODE",
117 WorkflowMode::Architect => "ARCHITECT",
118 WorkflowMode::ReadOnly => "READ-ONLY",
119 WorkflowMode::Chat => "CHAT",
120 }
121 }
122
123 fn is_read_only(self) -> bool {
124 matches!(
125 self,
126 WorkflowMode::Ask | WorkflowMode::Architect | WorkflowMode::ReadOnly
127 )
128 }
129
130 pub(crate) fn is_chat(self) -> bool {
131 matches!(self, WorkflowMode::Chat)
132 }
133}
134
135fn session_path() -> std::path::PathBuf {
136 crate::tools::file_ops::workspace_root()
137 .join(".hematite")
138 .join("session.json")
139}
140
141fn load_session_data() -> (Option<String>, crate::agent::compaction::SessionMemory) {
142 let path = session_path();
143 if !path.exists() {
144 return (None, crate::agent::compaction::SessionMemory::default());
145 }
146 let Ok(data) = std::fs::read_to_string(&path) else {
147 return (None, crate::agent::compaction::SessionMemory::default());
148 };
149 let Ok(saved) = serde_json::from_str::<SavedSession>(&data) else {
150 return (None, crate::agent::compaction::SessionMemory::default());
151 };
152 (saved.running_summary, saved.session_memory)
153}
154
155fn reset_task_files() {
156 let root = crate::tools::file_ops::workspace_root();
157 let _ = std::fs::remove_file(root.join(".hematite").join("TASK.md"));
158 let _ = std::fs::remove_file(root.join(".hematite").join("PLAN.md"));
159 let _ = std::fs::remove_file(root.join(".hematite").join("WALKTHROUGH.md"));
160 let _ = std::fs::remove_file(root.join(".github").join("WALKTHROUGH.md"));
161 let _ = std::fs::write(root.join(".hematite").join("TASK.md"), "");
162 let _ = std::fs::write(root.join(".hematite").join("PLAN.md"), "");
163}
164
165fn purge_persistent_memory() {
166 let root = crate::tools::file_ops::workspace_root();
167 let mem_dir = root.join(".hematite").join("memories");
168 if mem_dir.exists() {
169 let _ = std::fs::remove_dir_all(&mem_dir);
170 let _ = std::fs::create_dir_all(&mem_dir);
171 }
172
173 let log_dir = root.join(".hematite_logs");
174 if log_dir.exists() {
175 if let Ok(entries) = std::fs::read_dir(&log_dir) {
176 for entry in entries.flatten() {
177 let _ = std::fs::write(entry.path(), "");
178 }
179 }
180 }
181}
182
183fn apply_turn_attachments(user_turn: &UserTurn, prompt: &str) -> String {
184 let mut out = prompt.trim().to_string();
185 if let Some(doc) = user_turn.attached_document.as_ref() {
186 out = format!(
187 "[Attached document: {}]\n\n{}\n\n---\n\n{}",
188 doc.name, doc.content, out
189 );
190 }
191 if let Some(image) = user_turn.attached_image.as_ref() {
192 out = if out.is_empty() {
193 format!("[Attached image: {}]", image.name)
194 } else {
195 format!("[Attached image: {}]\n\n{}", image.name, out)
196 };
197 }
198 out
199}
200
201fn transcript_user_turn_text(user_turn: &UserTurn, prompt: &str) -> String {
202 let mut prefixes = Vec::new();
203 if let Some(doc) = user_turn.attached_document.as_ref() {
204 prefixes.push(format!("[Attached document: {}]", doc.name));
205 }
206 if let Some(image) = user_turn.attached_image.as_ref() {
207 prefixes.push(format!("[Attached image: {}]", image.name));
208 }
209 if prefixes.is_empty() {
210 prompt.to_string()
211 } else if prompt.trim().is_empty() {
212 prefixes.join("\n")
213 } else {
214 format!("{}\n{}", prefixes.join("\n"), prompt)
215 }
216}
217
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219enum RuntimeFailureClass {
220 ContextWindow,
221 ProviderDegraded,
222 ToolArgMalformed,
223 ToolPolicyBlocked,
224 ToolLoop,
225 VerificationFailed,
226 EmptyModelResponse,
227 Unknown,
228}
229
230impl RuntimeFailureClass {
231 fn tag(self) -> &'static str {
232 match self {
233 RuntimeFailureClass::ContextWindow => "context_window",
234 RuntimeFailureClass::ProviderDegraded => "provider_degraded",
235 RuntimeFailureClass::ToolArgMalformed => "tool_arg_malformed",
236 RuntimeFailureClass::ToolPolicyBlocked => "tool_policy_blocked",
237 RuntimeFailureClass::ToolLoop => "tool_loop",
238 RuntimeFailureClass::VerificationFailed => "verification_failed",
239 RuntimeFailureClass::EmptyModelResponse => "empty_model_response",
240 RuntimeFailureClass::Unknown => "unknown",
241 }
242 }
243
244 fn operator_guidance(self) -> &'static str {
245 match self {
246 RuntimeFailureClass::ContextWindow => {
247 "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
248 }
249 RuntimeFailureClass::ProviderDegraded => {
250 "Retry once automatically, then narrow the turn or restart LM Studio if it persists."
251 }
252 RuntimeFailureClass::ToolArgMalformed => {
253 "Retry with repaired or narrower tool arguments instead of repeating the same malformed call."
254 }
255 RuntimeFailureClass::ToolPolicyBlocked => {
256 "Stay inside the allowed workflow or switch modes before retrying."
257 }
258 RuntimeFailureClass::ToolLoop => {
259 "Stop repeating the same failing tool pattern and switch to a narrower recovery step."
260 }
261 RuntimeFailureClass::VerificationFailed => {
262 "Fix the build or test failure before treating the task as complete."
263 }
264 RuntimeFailureClass::EmptyModelResponse => {
265 "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
266 }
267 RuntimeFailureClass::Unknown => {
268 "Inspect the latest grounded tool results or provider status before retrying."
269 }
270 }
271 }
272}
273
274fn classify_runtime_failure(detail: &str) -> RuntimeFailureClass {
275 let lower = detail.to_ascii_lowercase();
276 if lower.contains("context_window_blocked")
277 || lower.contains("context ceiling reached")
278 || lower.contains("exceeds the")
279 || ((lower.contains("n_keep") && lower.contains("n_ctx"))
280 || lower.contains("context length")
281 || lower.contains("keep from the initial prompt")
282 || lower.contains("prompt is greater than the context length"))
283 {
284 RuntimeFailureClass::ContextWindow
285 } else if lower.contains("empty response from model")
286 || lower.contains("model returned an empty response")
287 {
288 RuntimeFailureClass::EmptyModelResponse
289 } else if lower.contains("lm studio unreachable")
290 || lower.contains("lm studio error")
291 || lower.contains("request failed")
292 || lower.contains("response parse error")
293 || lower.contains("provider degraded")
294 {
295 RuntimeFailureClass::ProviderDegraded
296 } else if lower.contains("missing required argument")
297 || lower.contains("json repair failed")
298 || lower.contains("invalid pattern")
299 || lower.contains("invalid line range")
300 {
301 RuntimeFailureClass::ToolArgMalformed
302 } else if lower.contains("action blocked:")
303 || lower.contains("access denied")
304 || lower.contains("declined by user")
305 {
306 RuntimeFailureClass::ToolPolicyBlocked
307 } else if lower.contains("too many consecutive tool errors")
308 || lower.contains("repeated tool failures")
309 || lower.contains("stuck in a loop")
310 {
311 RuntimeFailureClass::ToolLoop
312 } else if lower.contains("build failed")
313 || lower.contains("verification failed")
314 || lower.contains("verify_build")
315 {
316 RuntimeFailureClass::VerificationFailed
317 } else {
318 RuntimeFailureClass::Unknown
319 }
320}
321
322fn format_runtime_failure(class: RuntimeFailureClass, detail: &str) -> String {
323 format!(
324 "[failure:{}] {} Detail: {}",
325 class.tag(),
326 class.operator_guidance(),
327 detail.trim()
328 )
329}
330
331fn provider_state_for_runtime_failure(class: RuntimeFailureClass) -> Option<ProviderRuntimeState> {
332 match class {
333 RuntimeFailureClass::ContextWindow => Some(ProviderRuntimeState::ContextWindow),
334 RuntimeFailureClass::ProviderDegraded => Some(ProviderRuntimeState::Degraded),
335 RuntimeFailureClass::EmptyModelResponse => Some(ProviderRuntimeState::EmptyResponse),
336 _ => None,
337 }
338}
339
340fn checkpoint_state_for_runtime_failure(
341 class: RuntimeFailureClass,
342) -> Option<OperatorCheckpointState> {
343 match class {
344 RuntimeFailureClass::ContextWindow => Some(OperatorCheckpointState::BlockedContextWindow),
345 RuntimeFailureClass::ToolPolicyBlocked => Some(OperatorCheckpointState::BlockedPolicy),
346 RuntimeFailureClass::ToolLoop => Some(OperatorCheckpointState::BlockedToolLoop),
347 RuntimeFailureClass::VerificationFailed => {
348 Some(OperatorCheckpointState::BlockedVerification)
349 }
350 _ => None,
351 }
352}
353
354fn compact_runtime_recovery_summary(class: RuntimeFailureClass) -> &'static str {
355 match class {
356 RuntimeFailureClass::ProviderDegraded => {
357 "LM Studio degraded during the turn; retrying once before surfacing a failure."
358 }
359 RuntimeFailureClass::EmptyModelResponse => {
360 "The model returned an empty reply; retrying once before surfacing a failure."
361 }
362 _ => "Runtime recovery in progress.",
363 }
364}
365
366fn checkpoint_summary_for_runtime_failure(class: RuntimeFailureClass) -> &'static str {
367 match class {
368 RuntimeFailureClass::ContextWindow => "Provider context ceiling confirmed.",
369 RuntimeFailureClass::ToolPolicyBlocked => "Policy blocked the current action.",
370 RuntimeFailureClass::ToolLoop => "Repeated failing tool pattern stopped.",
371 RuntimeFailureClass::VerificationFailed => "Verification failed; fix before continuing.",
372 _ => "Operator checkpoint updated.",
373 }
374}
375
376fn compact_runtime_failure_summary(class: RuntimeFailureClass) -> &'static str {
377 match class {
378 RuntimeFailureClass::ContextWindow => "LM context ceiling hit.",
379 RuntimeFailureClass::ProviderDegraded => {
380 "LM Studio degraded and did not recover cleanly; operator action is now required."
381 }
382 RuntimeFailureClass::EmptyModelResponse => {
383 "LM Studio returned an empty reply after recovery; operator action is now required."
384 }
385 RuntimeFailureClass::ToolLoop => {
386 "Repeated failing tool pattern detected; Hematite stopped the loop."
387 }
388 _ => "Runtime failure surfaced to the operator.",
389 }
390}
391
392fn should_retry_runtime_failure(class: RuntimeFailureClass) -> bool {
393 matches!(
394 class,
395 RuntimeFailureClass::ProviderDegraded | RuntimeFailureClass::EmptyModelResponse
396 )
397}
398
399fn recovery_scenario_for_runtime_failure(class: RuntimeFailureClass) -> Option<RecoveryScenario> {
400 match class {
401 RuntimeFailureClass::ContextWindow => Some(RecoveryScenario::ContextWindow),
402 RuntimeFailureClass::ProviderDegraded => Some(RecoveryScenario::ProviderDegraded),
403 RuntimeFailureClass::EmptyModelResponse => Some(RecoveryScenario::EmptyModelResponse),
404 RuntimeFailureClass::ToolPolicyBlocked => Some(RecoveryScenario::McpWorkspaceReadBlocked),
405 RuntimeFailureClass::ToolLoop => Some(RecoveryScenario::ToolLoop),
406 RuntimeFailureClass::VerificationFailed => Some(RecoveryScenario::VerificationFailed),
407 RuntimeFailureClass::ToolArgMalformed | RuntimeFailureClass::Unknown => None,
408 }
409}
410
411fn compact_recovery_plan_summary(plan: &RecoveryPlan) -> String {
412 format!(
413 "{} [{}]",
414 plan.recipe.scenario.label(),
415 plan.recipe.steps_summary()
416 )
417}
418
419fn compact_recovery_decision_summary(decision: &RecoveryDecision) -> String {
420 match decision {
421 RecoveryDecision::Attempt(plan) => compact_recovery_plan_summary(plan),
422 RecoveryDecision::Escalate {
423 recipe,
424 attempts_made,
425 ..
426 } => format!(
427 "{} escalated after {} / {} [{}]",
428 recipe.scenario.label(),
429 attempts_made,
430 recipe.max_attempts.max(1),
431 recipe.steps_summary()
432 ),
433 }
434}
435
436fn parse_failing_paths_from_build_output(output: &str) -> Vec<String> {
439 let root = crate::tools::file_ops::workspace_root();
440 let mut paths: Vec<String> = output
441 .lines()
442 .filter_map(|line| {
443 let trimmed = line.trim_start();
444 let after_arrow = trimmed.strip_prefix("--> ")?;
446 let file_part = after_arrow.split(':').next()?;
447 if file_part.is_empty() || file_part.starts_with('<') {
448 return None;
449 }
450 let p = std::path::Path::new(file_part);
451 let resolved = if p.is_absolute() {
452 p.to_path_buf()
453 } else {
454 root.join(p)
455 };
456 Some(resolved.to_string_lossy().replace('\\', "/").to_lowercase())
457 })
458 .collect();
459 paths.sort();
460 paths.dedup();
461 paths
462}
463
464fn build_mode_redirect_answer(mode: WorkflowMode) -> String {
465 match mode {
466 WorkflowMode::Ask => "Workflow mode ASK is read-only. I can inspect the code, explain what should change, or review the target area, but I will not modify files here. Switch to `/code` to implement the change, or `/auto` to let Hematite choose.".to_string(),
467 WorkflowMode::Architect => "Workflow mode ARCHITECT is plan-first. I can inspect the code and design the implementation approach, but I will not mutate files until you explicitly switch to `/code` or ask me to implement.".to_string(),
468 WorkflowMode::ReadOnly => "Workflow mode READ-ONLY is a hard no-mutation mode. I can analyze, inspect, and explain, but I will not edit files, run mutating shell commands, or commit changes. Switch to `/code` or `/auto` if you want implementation.".to_string(),
469 _ => "Switch to `/code` or `/auto` to allow implementation.".to_string(),
470 }
471}
472
473fn architect_handoff_contract() -> &'static str {
474 "ARCHITECT OUTPUT CONTRACT:\n\
475Use a compact implementation handoff, not a process narrative.\n\
476Do not say \"the first step\" or describe what you are about to do.\n\
477After one or two read-only inspection tools at most, stop and answer.\n\
478For runtime wiring, reset behavior, or control-flow questions, prefer `trace_runtime_flow` over a broad `map_project` scan.\n\
479If you do use `map_project`, keep it minimal and scoped.\n\
480Use these exact ASCII headings and keep each section short:\n\
481# Goal\n\
482# Target Files\n\
483# Ordered Steps\n\
484# Verification\n\
485# Risks\n\
486# Open Questions\n\
487Keep the whole handoff concise and implementation-oriented."
488}
489
490fn is_current_plan_execution_request(user_input: &str) -> bool {
491 let lower = user_input.trim().to_ascii_lowercase();
492 lower == "implement the current plan."
493 || lower == "implement the current plan"
494 || lower.contains("implement the current plan")
495}
496
497fn is_plan_scoped_tool(name: &str) -> bool {
498 crate::agent::inference::tool_metadata_for_name(name).plan_scope
499}
500
501fn is_current_plan_irrelevant_tool(name: &str) -> bool {
502 !crate::agent::inference::tool_metadata_for_name(name).plan_scope
503}
504
505fn is_non_mutating_plan_step_tool(name: &str) -> bool {
506 let metadata = crate::agent::inference::tool_metadata_for_name(name);
507 metadata.plan_scope && !metadata.mutates_workspace
508}
509
510fn parse_inline_workflow_prompt(user_input: &str) -> Option<(WorkflowMode, &str)> {
511 let trimmed = user_input.trim();
512 for (prefix, mode) in [
513 ("/ask", WorkflowMode::Ask),
514 ("/code", WorkflowMode::Code),
515 ("/architect", WorkflowMode::Architect),
516 ("/read-only", WorkflowMode::ReadOnly),
517 ("/auto", WorkflowMode::Auto),
518 ] {
519 if let Some(rest) = trimmed.strip_prefix(prefix) {
520 let rest = rest.trim();
521 if !rest.is_empty() {
522 return Some((mode, rest));
523 }
524 }
525 }
526 None
527}
528
529pub fn get_tools() -> Vec<ToolDefinition> {
533 crate::agent::tool_registry::get_tools()
534}
535
536pub struct ConversationManager {
537 pub history: Vec<ChatMessage>,
539 pub engine: Arc<InferenceEngine>,
540 pub tools: Vec<ToolDefinition>,
541 pub mcp_manager: Arc<Mutex<crate::agent::mcp_manager::McpManager>>,
542 pub professional: bool,
543 pub brief: bool,
544 pub snark: u8,
545 pub chaos: u8,
546 pub fast_model: Option<String>,
548 pub think_model: Option<String>,
550 pub correction_hints: Vec<String>,
552 pub running_summary: Option<String>,
554 pub gpu_state: Arc<GpuState>,
556 pub vein: crate::memory::vein::Vein,
558 pub transcript: crate::agent::transcript::TranscriptLogger,
560 pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
562 pub git_state: Arc<crate::agent::git_monitor::GitState>,
564 pub think_mode: Option<bool>,
567 workflow_mode: WorkflowMode,
568 pub session_memory: crate::agent::compaction::SessionMemory,
570 pub swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
571 pub voice_manager: Arc<crate::ui::voice::VoiceManager>,
572 pub soul_personality: String,
574 pub lsp_manager: Arc<Mutex<crate::agent::lsp::manager::LspManager>>,
575 pub reasoning_history: Option<String>,
577 pub pinned_files: Arc<Mutex<std::collections::HashMap<String, String>>>,
579 action_grounding: Arc<Mutex<ActionGroundingState>>,
581 plan_execution_active: Arc<std::sync::atomic::AtomicBool>,
583 recovery_context: RecoveryContext,
585 pub l1_context: Option<String>,
588}
589
590impl ConversationManager {
591 fn vein_docs_only_mode(&self) -> bool {
592 !crate::tools::file_ops::is_project_workspace()
593 }
594
595 fn refresh_vein_index(&mut self) -> usize {
596 let count = if self.vein_docs_only_mode() {
597 let root = crate::tools::file_ops::workspace_root();
598 tokio::task::block_in_place(|| self.vein.index_workspace_artifacts(&root))
599 } else {
600 tokio::task::block_in_place(|| self.vein.index_project())
601 };
602 self.l1_context = self.vein.l1_context();
603 count
604 }
605
606 fn build_vein_inspection_report(&self, indexed_this_pass: usize) -> String {
607 let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(8));
608 let workspace_mode = if self.vein_docs_only_mode() {
609 "docs-only (outside a project workspace)"
610 } else {
611 "project workspace"
612 };
613 let active_room = snapshot.active_room.as_deref().unwrap_or("none");
614 let mut out = format!(
615 "Vein Inspection\n\
616 Workspace mode: {workspace_mode}\n\
617 Indexed this pass: {indexed_this_pass}\n\
618 Indexed source files: {}\n\
619 Indexed docs: {}\n\
620 Indexed session exchanges: {}\n\
621 Embedded source/doc chunks: {}\n\
622 Embeddings available: {}\n\
623 Active room bias: {active_room}\n\
624 L1 hot-files block: {}\n",
625 snapshot.indexed_source_files,
626 snapshot.indexed_docs,
627 snapshot.indexed_session_exchanges,
628 snapshot.embedded_source_doc_chunks,
629 if snapshot.has_any_embeddings {
630 "yes"
631 } else {
632 "no"
633 },
634 if snapshot.l1_ready {
635 "ready"
636 } else {
637 "not built yet"
638 },
639 );
640
641 if snapshot.hot_files.is_empty() {
642 out.push_str("Hot files: none yet.\n");
643 return out;
644 }
645
646 out.push_str("\nHot files by room:\n");
647 let mut by_room: std::collections::BTreeMap<&str, Vec<&crate::memory::vein::VeinHotFile>> =
648 std::collections::BTreeMap::new();
649 for file in &snapshot.hot_files {
650 by_room.entry(file.room.as_str()).or_default().push(file);
651 }
652 for (room, files) in by_room {
653 out.push_str(&format!("[{}]\n", room));
654 for file in files {
655 out.push_str(&format!(
656 "- {} [{} edit{}]\n",
657 file.path,
658 file.heat,
659 if file.heat == 1 { "" } else { "s" }
660 ));
661 }
662 }
663
664 out
665 }
666
667 fn latest_user_prompt(&self) -> Option<&str> {
668 self.history
669 .iter()
670 .rev()
671 .find(|msg| msg.role == "user")
672 .map(|msg| msg.content.as_str())
673 }
674
675 async fn emit_direct_response(
676 &mut self,
677 tx: &mpsc::Sender<InferenceEvent>,
678 raw_user_input: &str,
679 effective_user_input: &str,
680 response: &str,
681 ) {
682 self.history.push(ChatMessage::user(effective_user_input));
683 self.history.push(ChatMessage::assistant_text(response));
684 self.transcript.log_user(raw_user_input);
685 self.transcript.log_agent(response);
686 for chunk in chunk_text(response, 8) {
687 if !chunk.is_empty() {
688 let _ = tx.send(InferenceEvent::Token(chunk)).await;
689 }
690 }
691 let _ = tx.send(InferenceEvent::Done).await;
692 self.trim_history(80);
693 self.refresh_session_memory();
694 self.save_session();
695 }
696
697 async fn emit_operator_checkpoint(
698 &mut self,
699 tx: &mpsc::Sender<InferenceEvent>,
700 state: OperatorCheckpointState,
701 summary: impl Into<String>,
702 ) {
703 let summary = summary.into();
704 self.session_memory
705 .record_checkpoint(state.label(), summary.clone());
706 let _ = tx
707 .send(InferenceEvent::OperatorCheckpoint { state, summary })
708 .await;
709 }
710
711 async fn emit_recovery_recipe_summary(
712 &mut self,
713 tx: &mpsc::Sender<InferenceEvent>,
714 state: impl Into<String>,
715 summary: impl Into<String>,
716 ) {
717 let state = state.into();
718 let summary = summary.into();
719 self.session_memory.record_recovery(state, summary.clone());
720 let _ = tx.send(InferenceEvent::RecoveryRecipe { summary }).await;
721 }
722
723 async fn emit_provider_live(&mut self, tx: &mpsc::Sender<InferenceEvent>) {
724 let _ = tx
725 .send(InferenceEvent::ProviderStatus {
726 state: ProviderRuntimeState::Live,
727 summary: String::new(),
728 })
729 .await;
730 self.emit_operator_checkpoint(tx, OperatorCheckpointState::Idle, "")
731 .await;
732 }
733
734 async fn emit_prompt_pressure_for_messages(
735 &self,
736 tx: &mpsc::Sender<InferenceEvent>,
737 messages: &[ChatMessage],
738 ) {
739 let context_length = self.engine.current_context_length();
740 let (estimated_input_tokens, reserved_output_tokens, estimated_total_tokens, percent) =
741 crate::agent::inference::estimate_prompt_pressure(
742 messages,
743 &self.tools,
744 context_length,
745 );
746 let _ = tx
747 .send(InferenceEvent::PromptPressure {
748 estimated_input_tokens,
749 reserved_output_tokens,
750 estimated_total_tokens,
751 context_length,
752 percent,
753 })
754 .await;
755 }
756
757 async fn emit_prompt_pressure_idle(&self, tx: &mpsc::Sender<InferenceEvent>) {
758 let context_length = self.engine.current_context_length();
759 let _ = tx
760 .send(InferenceEvent::PromptPressure {
761 estimated_input_tokens: 0,
762 reserved_output_tokens: 0,
763 estimated_total_tokens: 0,
764 context_length,
765 percent: 0,
766 })
767 .await;
768 }
769
770 async fn emit_compaction_pressure(&self, tx: &mpsc::Sender<InferenceEvent>) {
771 let context_length = self.engine.current_context_length();
772 let vram_ratio = self.gpu_state.ratio();
773 let config = CompactionConfig::adaptive(context_length, vram_ratio);
774 let estimated_tokens = compaction::estimate_compactable_tokens(&self.history);
775 let percent = if config.max_estimated_tokens == 0 {
776 0
777 } else {
778 ((estimated_tokens.saturating_mul(100)) / config.max_estimated_tokens).min(100) as u8
779 };
780
781 let _ = tx
782 .send(InferenceEvent::CompactionPressure {
783 estimated_tokens,
784 threshold_tokens: config.max_estimated_tokens,
785 percent,
786 })
787 .await;
788 }
789
790 async fn refresh_runtime_profile_and_report(
791 &mut self,
792 tx: &mpsc::Sender<InferenceEvent>,
793 reason: &str,
794 ) -> Option<(String, usize, bool)> {
795 let refreshed = self.engine.refresh_runtime_profile().await;
796 if let Some((model_id, context_length, changed)) = refreshed.as_ref() {
797 let _ = tx
798 .send(InferenceEvent::RuntimeProfile {
799 model_id: model_id.clone(),
800 context_length: *context_length,
801 })
802 .await;
803 self.transcript.log_system(&format!(
804 "Runtime profile refresh ({}): model={} ctx={} changed={}",
805 reason, model_id, context_length, changed
806 ));
807 }
808 refreshed
809 }
810
811 pub fn new(
812 engine: Arc<InferenceEngine>,
813 professional: bool,
814 brief: bool,
815 snark: u8,
816 chaos: u8,
817 soul_personality: String,
818 fast_model: Option<String>,
819 think_model: Option<String>,
820 gpu_state: Arc<GpuState>,
821 git_state: Arc<crate::agent::git_monitor::GitState>,
822 swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
823 voice_manager: Arc<crate::ui::voice::VoiceManager>,
824 ) -> Self {
825 let (saved_summary, saved_memory) = load_session_data();
826
827 let mcp_manager = Arc::new(tokio::sync::Mutex::new(
829 crate::agent::mcp_manager::McpManager::new(),
830 ));
831
832 let dynamic_instructions =
834 engine.build_system_prompt(snark, chaos, brief, professional, &[], None, &[]);
835
836 let history = vec![ChatMessage::system(&dynamic_instructions)];
837
838 let vein_path = crate::tools::file_ops::workspace_root()
839 .join(".hematite")
840 .join("vein.db");
841 let vein_base_url = engine.base_url.clone();
842 let vein = crate::memory::vein::Vein::new(&vein_path, vein_base_url.clone())
843 .unwrap_or_else(|_| crate::memory::vein::Vein::new(":memory:", vein_base_url).unwrap());
844
845 Self {
846 history,
847 engine,
848 tools: get_tools(),
849 mcp_manager,
850 professional,
851 brief,
852 snark,
853 chaos,
854 fast_model,
855 think_model,
856 correction_hints: Vec::new(),
857 running_summary: saved_summary,
858 gpu_state,
859 vein,
860 transcript: crate::agent::transcript::TranscriptLogger::new(),
861 cancel_token: Arc::new(std::sync::atomic::AtomicBool::new(false)),
862 git_state,
863 think_mode: None,
864 workflow_mode: WorkflowMode::Auto,
865 session_memory: saved_memory,
866 swarm_coordinator,
867 voice_manager,
868 soul_personality,
869 lsp_manager: Arc::new(Mutex::new(crate::agent::lsp::manager::LspManager::new(
870 crate::tools::file_ops::workspace_root(),
871 ))),
872 reasoning_history: None,
873 pinned_files: Arc::new(Mutex::new(std::collections::HashMap::new())),
874 action_grounding: Arc::new(Mutex::new(ActionGroundingState::default())),
875 plan_execution_active: Arc::new(std::sync::atomic::AtomicBool::new(false)),
876 recovery_context: RecoveryContext::default(),
877 l1_context: None,
878 }
879 }
880
881 pub fn initialize_vein(&mut self) -> usize {
884 self.refresh_vein_index()
885 }
886
887 fn save_session(&self) {
888 let path = session_path();
889 if let Some(parent) = path.parent() {
890 let _ = std::fs::create_dir_all(parent);
891 }
892 let saved = SavedSession {
893 running_summary: self.running_summary.clone(),
894 session_memory: self.session_memory.clone(),
895 };
896 if let Ok(json) = serde_json::to_string(&saved) {
897 let _ = std::fs::write(&path, json);
898 }
899 }
900
901 fn save_empty_session(&self) {
902 let path = session_path();
903 if let Some(parent) = path.parent() {
904 let _ = std::fs::create_dir_all(parent);
905 }
906 let saved = SavedSession {
907 running_summary: None,
908 session_memory: crate::agent::compaction::SessionMemory::default(),
909 };
910 if let Ok(json) = serde_json::to_string(&saved) {
911 let _ = std::fs::write(&path, json);
912 }
913 }
914
915 fn refresh_session_memory(&mut self) {
916 let current_plan = self.session_memory.current_plan.clone();
917 let previous_memory = self.session_memory.clone();
918 self.session_memory = compaction::extract_memory(&self.history);
919 self.session_memory.current_plan = current_plan;
920 self.session_memory
921 .inherit_runtime_ledger_from(&previous_memory);
922 }
923
924 fn build_chat_system_prompt(&self) -> String {
925 let species = &self.engine.species;
926 let personality = &self.soul_personality;
927 format!(
928 "You are {species}, a local AI companion running entirely on the user's GPU — no cloud, no subscriptions, no phoning home.\n\
929 {personality}\n\n\
930 This is CHAT mode — a clean conversational surface. Behave like a sharp friend who happens to know everything about code, not like an agent following a workflow.\n\n\
931 Rules:\n\
932 - Talk like a person. Skip the bullet-point breakdowns unless the topic genuinely needs structure.\n\
933 - Answer directly. One paragraph is usually right.\n\
934 - Don't call tools unless the user explicitly asks you to look at a file or run something.\n\
935 - Don't narrate your reasoning or mention tool names unprompted.\n\
936 - You can discuss code, debug ideas, explain concepts, help plan, or just talk.\n\
937 - If the user clearly wants you to edit or build something, do it — but lead with conversation, not scaffolding.\n\
938 - If the user wants the full coding harness, they can type `/agent`.\n",
939 )
940 }
941
942 fn append_session_handoff(&self, system_msg: &mut String) {
943 let has_summary = self
944 .running_summary
945 .as_ref()
946 .map(|s| !s.trim().is_empty())
947 .unwrap_or(false);
948 let has_memory = self.session_memory.has_signal();
949
950 if !has_summary && !has_memory {
951 return;
952 }
953
954 system_msg.push_str(
955 "\n\n# LIGHTWEIGHT SESSION HANDOFF\n\
956 This is compact carry-over from earlier work on this machine.\n\
957 Use it only when it helps the current request.\n\
958 Prefer current repository state, pinned files, and fresh tool results over stale session memory.\n",
959 );
960
961 if has_memory {
962 system_msg.push_str("\n## Active Task Memory\n");
963 system_msg.push_str(&self.session_memory.to_prompt());
964 }
965
966 if let Some(summary) = self.running_summary.as_deref() {
967 if !summary.trim().is_empty() {
968 system_msg.push_str("\n## Compacted Session Summary\n");
969 system_msg.push_str(summary);
970 system_msg.push('\n');
971 }
972 }
973 }
974
975 fn set_workflow_mode(&mut self, mode: WorkflowMode) {
976 self.workflow_mode = mode;
977 }
978
979 fn current_plan_summary(&self) -> Option<String> {
980 self.session_memory
981 .current_plan
982 .as_ref()
983 .filter(|plan| plan.has_signal())
984 .map(|plan| plan.summary_line())
985 }
986
987 fn current_plan_allowed_paths(&self) -> Vec<String> {
988 self.session_memory
989 .current_plan
990 .as_ref()
991 .map(|plan| {
992 plan.target_files
993 .iter()
994 .map(|path| normalize_workspace_path(path))
995 .collect()
996 })
997 .unwrap_or_default()
998 }
999
1000 fn persist_architect_handoff(&mut self, response: &str) {
1001 if self.workflow_mode != WorkflowMode::Architect {
1002 return;
1003 }
1004 let Some(plan) = crate::tools::plan::parse_plan_handoff(response) else {
1005 return;
1006 };
1007 let _ = crate::tools::plan::save_plan_handoff(&plan);
1008 self.session_memory.current_plan = Some(plan);
1009 }
1010
1011 async fn begin_grounded_turn(&self) -> u64 {
1012 let mut state = self.action_grounding.lock().await;
1013 state.turn_index += 1;
1014 state.turn_index
1015 }
1016
1017 async fn reset_action_grounding(&self) {
1018 let mut state = self.action_grounding.lock().await;
1019 *state = ActionGroundingState::default();
1020 }
1021
1022 async fn record_read_observation(&self, path: &str) {
1023 let normalized = normalize_workspace_path(path);
1024 let mut state = self.action_grounding.lock().await;
1025 let turn = state.turn_index;
1026 state.observed_paths.insert(normalized.clone(), turn);
1030 state.inspected_paths.insert(normalized, turn);
1031 }
1032
1033 async fn record_line_inspection(&self, path: &str) {
1034 let normalized = normalize_workspace_path(path);
1035 let mut state = self.action_grounding.lock().await;
1036 let turn = state.turn_index;
1037 state.observed_paths.insert(normalized.clone(), turn);
1038 state.inspected_paths.insert(normalized, turn);
1039 }
1040
1041 async fn record_verify_build_result(&self, ok: bool, output: &str) {
1042 let mut state = self.action_grounding.lock().await;
1043 let turn = state.turn_index;
1044 state.last_verify_build_turn = Some(turn);
1045 state.last_verify_build_ok = ok;
1046 if ok {
1047 state.code_changed_since_verify = false;
1048 state.last_failed_build_paths.clear();
1049 } else {
1050 state.last_failed_build_paths = parse_failing_paths_from_build_output(output);
1051 }
1052 }
1053
1054 fn record_session_verification(&mut self, ok: bool, summary: impl Into<String>) {
1055 self.session_memory.record_verification(ok, summary);
1056 }
1057
1058 async fn record_successful_mutation(&self, path: Option<&str>) {
1059 let mut state = self.action_grounding.lock().await;
1060 state.code_changed_since_verify = match path {
1061 Some(p) => is_code_like_path(p),
1062 None => true,
1063 };
1064 }
1065
1066 async fn validate_action_preconditions(&self, name: &str, args: &Value) -> Result<(), String> {
1067 if self
1068 .plan_execution_active
1069 .load(std::sync::atomic::Ordering::SeqCst)
1070 {
1071 if is_current_plan_irrelevant_tool(name) {
1072 return Err(format!(
1073 "Action blocked: `{}` is not part of current-plan execution. Stay on the saved target files, use built-in workspace file tools only, and either make a concrete edit or surface one specific blocker.",
1074 name
1075 ));
1076 }
1077
1078 if name == "map_project" {
1079 return Err(
1080 "Action blocked: `map_project` is too broad for current-plan execution. Use the target files from the saved plan and inspect them directly with built-in workspace tools."
1081 .to_string(),
1082 );
1083 }
1084
1085 if is_plan_scoped_tool(name) {
1086 let allowed_paths = self.current_plan_allowed_paths();
1087 if !allowed_paths.is_empty() {
1088 let in_allowed = match name {
1089 "auto_pin_context" => args
1090 .get("paths")
1091 .and_then(|v| v.as_array())
1092 .map(|paths| {
1093 !paths.is_empty()
1094 && paths.iter().all(|v| {
1095 v.as_str()
1096 .map(normalize_workspace_path)
1097 .map(|p| allowed_paths.contains(&p))
1098 .unwrap_or(false)
1099 })
1100 })
1101 .unwrap_or(false),
1102 "grep_files" | "list_files" => args
1103 .get("path")
1104 .and_then(|v| v.as_str())
1105 .map(normalize_workspace_path)
1106 .map(|p| allowed_paths.contains(&p))
1107 .unwrap_or(false),
1108 _ => action_target_path(name, args)
1109 .map(|p| allowed_paths.contains(&p))
1110 .unwrap_or(false),
1111 };
1112
1113 if !in_allowed {
1114 let allowed = allowed_paths
1115 .iter()
1116 .map(|p| format!("`{}`", p))
1117 .collect::<Vec<_>>()
1118 .join(", ");
1119 return Err(format!(
1120 "Action blocked: current-plan execution is locked to the saved target files. Use a path-scoped built-in tool on one of these files only: {}.",
1121 allowed
1122 ));
1123 }
1124 }
1125 }
1126
1127 if matches!(name, "edit_file" | "multi_search_replace" | "patch_hunk") {
1128 if let Some(target) = action_target_path(name, args) {
1129 let state = self.action_grounding.lock().await;
1130 let recently_inspected = state
1131 .inspected_paths
1132 .get(&target)
1133 .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1134 .unwrap_or(false);
1135 drop(state);
1136 if !recently_inspected {
1137 return Err(format!(
1138 "Action blocked: `{}` on '{}' requires an exact local line window first during current-plan execution. Use `inspect_lines` on that file around the intended edit region, then retry the mutation.",
1139 name, target
1140 ));
1141 }
1142 }
1143 }
1144 }
1145
1146 if self.workflow_mode.is_read_only() && name == "auto_pin_context" {
1147 return Err(
1148 "Action blocked: `auto_pin_context` is disabled in read-only workflows. Use the grounded file evidence you already have, or narrow with `inspect_lines` instead of pinning more files into active context."
1149 .to_string(),
1150 );
1151 }
1152
1153 if self.workflow_mode.is_read_only() && is_destructive_tool(name) {
1154 if name == "shell" {
1155 let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1156 let risk = crate::tools::guard::classify_bash_risk(command);
1157 if !matches!(risk, crate::tools::RiskLevel::Safe) {
1158 return Err(format!(
1159 "Action blocked: workflow mode `{}` is read-only for risky or mutating operations. Switch to `/code` or `/auto` before making changes.",
1160 self.workflow_mode.label()
1161 ));
1162 }
1163 } else {
1164 return Err(format!(
1165 "Action blocked: workflow mode `{}` is read-only. Use `/code` to implement changes or `/auto` to leave mode selection to Hematite.",
1166 self.workflow_mode.label()
1167 ));
1168 }
1169 }
1170
1171 let normalized_target = action_target_path(name, args);
1172 if let Some(target) = normalized_target.as_deref() {
1173 if matches!(
1174 name,
1175 "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1176 ) {
1177 if let Some(prompt) = self.latest_user_prompt() {
1178 if docs_edit_without_explicit_request(prompt, target) {
1179 return Err(format!(
1180 "Action blocked: '{}' is a docs file but the current request did not explicitly ask for documentation changes. Finish the code task first. If docs need updating, the user will ask.",
1181 target
1182 ));
1183 }
1184 }
1185 }
1186 let path_exists = std::path::Path::new(target).exists();
1187 if path_exists {
1188 let state = self.action_grounding.lock().await;
1189 let pinned = self.pinned_files.lock().await;
1190 let pinned_match = pinned.keys().any(|p| normalize_workspace_path(p) == target);
1191 drop(pinned);
1192
1193 let needs_exact_window = matches!(name, "edit_file" | "multi_search_replace");
1198 let recently_inspected = state
1199 .inspected_paths
1200 .get(target)
1201 .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1202 .unwrap_or(false);
1203 let same_turn_read = state
1204 .observed_paths
1205 .get(target)
1206 .map(|turn| state.turn_index.saturating_sub(*turn) == 0)
1207 .unwrap_or(false);
1208 let recent_observed = state
1209 .observed_paths
1210 .get(target)
1211 .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1212 .unwrap_or(false);
1213
1214 if needs_exact_window {
1215 if !recently_inspected && !same_turn_read && !pinned_match {
1216 return Err(format!(
1217 "Action blocked: `{}` on '{}' requires a line-level inspection first. \
1218 Use `inspect_lines` on the target region to get the exact current text \
1219 (whitespace and indentation included), then retry the edit.",
1220 name, target
1221 ));
1222 }
1223 } else if !recent_observed && !pinned_match {
1224 return Err(format!(
1225 "Action blocked: `{}` on '{}' requires recent file evidence. Use `read_file` or `inspect_lines` on that path first, or pin the file into active context.",
1226 name, target
1227 ));
1228 }
1229 }
1230 }
1231
1232 if is_mcp_mutating_tool(name) {
1233 return Err(format!(
1234 "Action blocked: `{}` is an external MCP mutation tool. For workspace file edits, prefer Hematite's built-in edit path (`read_file`/`inspect_lines` plus `patch_hunk`, `edit_file`, or `multi_search_replace`) unless the user explicitly requires MCP for that action.",
1235 name
1236 ));
1237 }
1238
1239 if is_mcp_workspace_read_tool(name) {
1240 return Err(format!(
1241 "Action blocked: `{}` is an external MCP filesystem read tool. For local workspace inspection, prefer Hematite's built-in read path (`read_file`, `inspect_lines`, `list_files`, or `grep_files`) unless the user explicitly requires MCP for that action.",
1242 name
1243 ));
1244 }
1245
1246 if matches!(
1249 name,
1250 "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1251 ) {
1252 if let Some(target) = normalized_target.as_deref() {
1253 let state = self.action_grounding.lock().await;
1254 if state.code_changed_since_verify
1255 && !state.last_verify_build_ok
1256 && !state.last_failed_build_paths.is_empty()
1257 && !state.last_failed_build_paths.iter().any(|p| p == target)
1258 {
1259 let files = state
1260 .last_failed_build_paths
1261 .iter()
1262 .map(|p| format!("`{}`", p))
1263 .collect::<Vec<_>>()
1264 .join(", ");
1265 return Err(format!(
1266 "Action blocked: the build is broken. Fix the errors in {} before editing other files. Run `verify_build` to confirm the fix, then continue.",
1267 files
1268 ));
1269 }
1270 }
1271 }
1272
1273 if name == "git_commit" || name == "git_push" {
1274 let state = self.action_grounding.lock().await;
1275 if state.code_changed_since_verify && !state.last_verify_build_ok {
1276 return Err(format!(
1277 "Action blocked: `{}` requires a successful `verify_build` after the latest code edits. Run verification first so Hematite has proof that the tree is build-clean.",
1278 name
1279 ));
1280 }
1281 }
1282
1283 if name == "shell" {
1284 let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1285 if let Some(prompt) = self.latest_user_prompt() {
1286 if let Some(topic) = preferred_host_inspection_topic(prompt) {
1287 if shell_looks_like_structured_host_inspection(command) {
1288 return Err(format!(
1289 "Action blocked: this is a host-inspection question. Prefer `inspect_host(topic: \"{}\")` instead of raw `shell` for PATH, toolchains, desktop, Downloads, listening ports, repo-doctor checks, or directory/disk summaries. Use `shell` only if `inspect_host` cannot answer the question directly.",
1290 topic
1291 ));
1292 }
1293 }
1294 }
1295 let reason = args
1296 .get("reason")
1297 .and_then(|v| v.as_str())
1298 .unwrap_or("")
1299 .trim();
1300 let risk = crate::tools::guard::classify_bash_risk(command);
1301 if !matches!(risk, crate::tools::RiskLevel::Safe) && reason.is_empty() {
1302 return Err(
1303 "Action blocked: risky `shell` calls require a concrete `reason` argument that explains what is being verified or changed."
1304 .to_string(),
1305 );
1306 }
1307 }
1308
1309 Ok(())
1310 }
1311
1312 fn build_action_receipt(
1313 &self,
1314 name: &str,
1315 args: &Value,
1316 output: &str,
1317 is_error: bool,
1318 ) -> Option<ChatMessage> {
1319 if is_error || !is_destructive_tool(name) {
1320 return None;
1321 }
1322
1323 let mut receipt = String::from("[ACTION RECEIPT]\n");
1324 receipt.push_str(&format!("- tool: {}\n", name));
1325 if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
1326 receipt.push_str(&format!("- target: {}\n", path));
1327 }
1328 if name == "shell" {
1329 if let Some(command) = args.get("command").and_then(|v| v.as_str()) {
1330 receipt.push_str(&format!("- command: {}\n", command));
1331 }
1332 if let Some(reason) = args.get("reason").and_then(|v| v.as_str()) {
1333 if !reason.trim().is_empty() {
1334 receipt.push_str(&format!("- reason: {}\n", reason.trim()));
1335 }
1336 }
1337 }
1338 let first_line = output.lines().next().unwrap_or(output).trim();
1339 receipt.push_str(&format!("- outcome: {}\n", first_line));
1340 Some(ChatMessage::system(&receipt))
1341 }
1342
1343 fn replace_mcp_tool_definitions(&mut self, mcp_tools: &[crate::agent::mcp::McpTool]) {
1344 self.tools
1345 .retain(|tool| !tool.function.name.starts_with("mcp__"));
1346 self.tools
1347 .extend(mcp_tools.iter().map(|tool| ToolDefinition {
1348 tool_type: "function".into(),
1349 function: ToolFunction {
1350 name: tool.name.clone(),
1351 description: tool.description.clone().unwrap_or_default(),
1352 parameters: tool.input_schema.clone(),
1353 },
1354 metadata: crate::agent::inference::tool_metadata_for_name(&tool.name),
1355 }));
1356 }
1357
1358 async fn emit_mcp_runtime_status(&self, tx: &mpsc::Sender<InferenceEvent>) {
1359 let summary = {
1360 let mcp = self.mcp_manager.lock().await;
1361 mcp.runtime_report()
1362 };
1363 let _ = tx
1364 .send(InferenceEvent::McpStatus {
1365 state: summary.state,
1366 summary: summary.summary,
1367 })
1368 .await;
1369 }
1370
1371 async fn refresh_mcp_tools(
1372 &mut self,
1373 tx: &mpsc::Sender<InferenceEvent>,
1374 ) -> Result<Vec<crate::agent::mcp::McpTool>, Box<dyn std::error::Error + Send + Sync>> {
1375 let mcp_tools = {
1376 let mut mcp = self.mcp_manager.lock().await;
1377 match mcp.initialize_all().await {
1378 Ok(()) => mcp.discover_tools().await,
1379 Err(e) => {
1380 drop(mcp);
1381 self.replace_mcp_tool_definitions(&[]);
1382 self.emit_mcp_runtime_status(tx).await;
1383 return Err(e.into());
1384 }
1385 }
1386 };
1387
1388 self.replace_mcp_tool_definitions(&mcp_tools);
1389 self.emit_mcp_runtime_status(tx).await;
1390 Ok(mcp_tools)
1391 }
1392
1393 pub async fn initialize_mcp(
1395 &mut self,
1396 tx: &mpsc::Sender<InferenceEvent>,
1397 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1398 let _ = self.refresh_mcp_tools(tx).await?;
1399 Ok(())
1400 }
1401
1402 pub async fn run_turn(
1408 &mut self,
1409 user_turn: &UserTurn,
1410 tx: mpsc::Sender<InferenceEvent>,
1411 yolo: bool,
1412 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1413 let user_input = user_turn.text.as_str();
1414 if user_input.trim() == "/new" {
1416 self.history.clear();
1417 self.reasoning_history = None;
1418 self.session_memory.clear();
1419 self.running_summary = None;
1420 self.correction_hints.clear();
1421 self.pinned_files.lock().await.clear();
1422 self.reset_action_grounding().await;
1423 reset_task_files();
1424 let _ = std::fs::remove_file(session_path());
1425 self.save_empty_session();
1426 self.emit_compaction_pressure(&tx).await;
1427 self.emit_prompt_pressure_idle(&tx).await;
1428 for chunk in chunk_text(
1429 "Fresh task context started. Chat history, pins, and task files cleared. Saved memory remains available.",
1430 8,
1431 ) {
1432 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1433 }
1434 let _ = tx.send(InferenceEvent::Done).await;
1435 return Ok(());
1436 }
1437
1438 if user_input.trim() == "/forget" {
1439 self.history.clear();
1440 self.reasoning_history = None;
1441 self.session_memory.clear();
1442 self.running_summary = None;
1443 self.correction_hints.clear();
1444 self.pinned_files.lock().await.clear();
1445 self.reset_action_grounding().await;
1446 reset_task_files();
1447 purge_persistent_memory();
1448 tokio::task::block_in_place(|| self.vein.reset());
1449 let _ = std::fs::remove_file(session_path());
1450 self.save_empty_session();
1451 self.emit_compaction_pressure(&tx).await;
1452 self.emit_prompt_pressure_idle(&tx).await;
1453 for chunk in chunk_text(
1454 "Hard forget complete. Chat history, saved memory, task files, and the Vein index were purged.",
1455 8,
1456 ) {
1457 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1458 }
1459 let _ = tx.send(InferenceEvent::Done).await;
1460 return Ok(());
1461 }
1462
1463 if user_input.trim() == "/vein-inspect" {
1464 let indexed = self.refresh_vein_index();
1465 let report = self.build_vein_inspection_report(indexed);
1466 let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(1));
1467 let _ = tx
1468 .send(InferenceEvent::VeinStatus {
1469 file_count: snapshot.indexed_source_files + snapshot.indexed_docs,
1470 embedded_count: snapshot.embedded_source_doc_chunks,
1471 docs_only: self.vein_docs_only_mode(),
1472 })
1473 .await;
1474 for chunk in chunk_text(&report, 8) {
1475 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1476 }
1477 let _ = tx.send(InferenceEvent::Done).await;
1478 return Ok(());
1479 }
1480
1481 if user_input.trim() == "/workspace-profile" {
1482 let root = crate::tools::file_ops::workspace_root();
1483 let _ = crate::agent::workspace_profile::ensure_workspace_profile(&root);
1484 let report = crate::agent::workspace_profile::profile_report(&root);
1485 for chunk in chunk_text(&report, 8) {
1486 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1487 }
1488 let _ = tx.send(InferenceEvent::Done).await;
1489 return Ok(());
1490 }
1491
1492 if user_input.trim() == "/vein-reset" {
1493 tokio::task::block_in_place(|| self.vein.reset());
1494 let _ = tx
1495 .send(InferenceEvent::VeinStatus {
1496 file_count: 0,
1497 embedded_count: 0,
1498 docs_only: self.vein_docs_only_mode(),
1499 })
1500 .await;
1501 for chunk in chunk_text("Vein index cleared. Will rebuild on the next turn.", 8) {
1502 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1503 }
1504 let _ = tx.send(InferenceEvent::Done).await;
1505 return Ok(());
1506 }
1507
1508 let config = crate::agent::config::load_config();
1510 self.recovery_context.clear();
1511 let manual_runtime_refresh = user_input.trim() == "/runtime-refresh";
1512 if !manual_runtime_refresh {
1513 if let Some((model_id, context_length, changed)) = self
1514 .refresh_runtime_profile_and_report(&tx, "turn_start")
1515 .await
1516 {
1517 if changed {
1518 let _ = tx
1519 .send(InferenceEvent::Thought(format!(
1520 "Runtime refresh: using model `{}` with CTX {} for this turn.",
1521 model_id, context_length
1522 )))
1523 .await;
1524 }
1525 }
1526 }
1527 self.emit_compaction_pressure(&tx).await;
1528 let current_model = self.engine.current_model();
1529 self.engine.set_gemma_native_formatting(
1530 crate::agent::config::effective_gemma_native_formatting(&config, ¤t_model),
1531 );
1532 let _turn_id = self.begin_grounded_turn().await;
1533 let _hook_runner = crate::agent::hooks::HookRunner::new(config.hooks.clone());
1534 let mcp_tools = match self.refresh_mcp_tools(&tx).await {
1535 Ok(tools) => tools,
1536 Err(e) => {
1537 let _ = tx
1538 .send(InferenceEvent::Error(format!("MCP refresh failed: {}", e)))
1539 .await;
1540 Vec::new()
1541 }
1542 };
1543
1544 let effective_fast = config
1546 .fast_model
1547 .clone()
1548 .or_else(|| self.fast_model.clone());
1549 let effective_think = config
1550 .think_model
1551 .clone()
1552 .or_else(|| self.think_model.clone());
1553
1554 if user_input.trim() == "/lsp" {
1556 let mut lsp = self.lsp_manager.lock().await;
1557 match lsp.start_servers().await {
1558 Ok(_) => {
1559 let _ = tx
1560 .send(InferenceEvent::MutedToken(
1561 "LSP: Servers Initialized OK.".to_string(),
1562 ))
1563 .await;
1564 }
1565 Err(e) => {
1566 let _ = tx
1567 .send(InferenceEvent::Error(format!(
1568 "LSP: Failed to start servers - {}",
1569 e
1570 )))
1571 .await;
1572 }
1573 }
1574 let _ = tx.send(InferenceEvent::Done).await;
1575 return Ok(());
1576 }
1577
1578 if user_input.trim() == "/runtime-refresh" {
1579 match self
1580 .refresh_runtime_profile_and_report(&tx, "manual_command")
1581 .await
1582 {
1583 Some((model_id, context_length, changed)) => {
1584 let msg = if changed {
1585 format!(
1586 "Runtime profile refreshed. Model: {} | CTX: {}",
1587 model_id, context_length
1588 )
1589 } else {
1590 format!(
1591 "Runtime profile unchanged. Model: {} | CTX: {}",
1592 model_id, context_length
1593 )
1594 };
1595 for chunk in chunk_text(&msg, 8) {
1596 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1597 }
1598 }
1599 None => {
1600 let _ = tx
1601 .send(InferenceEvent::Error(
1602 "Runtime refresh failed: LM Studio profile could not be read."
1603 .to_string(),
1604 ))
1605 .await;
1606 }
1607 }
1608 let _ = tx.send(InferenceEvent::Done).await;
1609 return Ok(());
1610 }
1611
1612 if user_input.trim() == "/ask" {
1613 self.set_workflow_mode(WorkflowMode::Ask);
1614 for chunk in chunk_text(
1615 "Workflow mode: ASK. Stay read-only, explain, inspect, and answer without making changes.",
1616 8,
1617 ) {
1618 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1619 }
1620 let _ = tx.send(InferenceEvent::Done).await;
1621 return Ok(());
1622 }
1623
1624 if user_input.trim() == "/code" {
1625 self.set_workflow_mode(WorkflowMode::Code);
1626 let mut message =
1627 "Workflow mode: CODE. Make changes when needed, but keep proof-before-action and verification discipline.".to_string();
1628 if let Some(plan) = self.current_plan_summary() {
1629 message.push_str(&format!(" Current plan: {plan}."));
1630 }
1631 for chunk in chunk_text(&message, 8) {
1632 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1633 }
1634 let _ = tx.send(InferenceEvent::Done).await;
1635 return Ok(());
1636 }
1637
1638 if user_input.trim() == "/architect" {
1639 self.set_workflow_mode(WorkflowMode::Architect);
1640 let mut message =
1641 "Workflow mode: ARCHITECT. Plan, inspect, and shape the approach first. Do not mutate code unless the user explicitly asks to implement.".to_string();
1642 if let Some(plan) = self.current_plan_summary() {
1643 message.push_str(&format!(" Existing plan: {plan}."));
1644 }
1645 for chunk in chunk_text(&message, 8) {
1646 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1647 }
1648 let _ = tx.send(InferenceEvent::Done).await;
1649 return Ok(());
1650 }
1651
1652 if user_input.trim() == "/read-only" {
1653 self.set_workflow_mode(WorkflowMode::ReadOnly);
1654 for chunk in chunk_text(
1655 "Workflow mode: READ-ONLY. Analysis only. Do not modify files, run mutating shell commands, or commit changes.",
1656 8,
1657 ) {
1658 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1659 }
1660 let _ = tx.send(InferenceEvent::Done).await;
1661 return Ok(());
1662 }
1663
1664 if user_input.trim() == "/auto" {
1665 self.set_workflow_mode(WorkflowMode::Auto);
1666 for chunk in chunk_text(
1667 "Workflow mode: AUTO. Hematite will choose the narrowest effective path for the request.",
1668 8,
1669 ) {
1670 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1671 }
1672 let _ = tx.send(InferenceEvent::Done).await;
1673 return Ok(());
1674 }
1675
1676 if user_input.trim() == "/chat" {
1677 self.set_workflow_mode(WorkflowMode::Chat);
1678 let _ = tx.send(InferenceEvent::Done).await;
1679 return Ok(());
1680 }
1681
1682 if user_input.trim() == "/reroll" {
1683 let soul = crate::ui::hatch::generate_soul_random();
1684 self.snark = soul.snark;
1685 self.chaos = soul.chaos;
1686 self.soul_personality = soul.personality.clone();
1687 let species = soul.species.clone();
1692 if let Some(eng) = Arc::get_mut(&mut self.engine) {
1693 eng.species = species.clone();
1694 }
1695 let shiny_tag = if soul.shiny { " 🌟 SHINY" } else { "" };
1696 let _ = tx
1697 .send(InferenceEvent::SoulReroll {
1698 species: soul.species.clone(),
1699 rarity: soul.rarity.label().to_string(),
1700 shiny: soul.shiny,
1701 personality: soul.personality.clone(),
1702 })
1703 .await;
1704 for chunk in chunk_text(
1705 &format!(
1706 "A new companion awakens!\n[{}{}] {} — \"{}\"",
1707 soul.rarity.label(),
1708 shiny_tag,
1709 soul.species,
1710 soul.personality
1711 ),
1712 8,
1713 ) {
1714 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1715 }
1716 let _ = tx.send(InferenceEvent::Done).await;
1717 return Ok(());
1718 }
1719
1720 if user_input.trim() == "/agent" {
1721 self.set_workflow_mode(WorkflowMode::Auto);
1722 let _ = tx.send(InferenceEvent::Done).await;
1723 return Ok(());
1724 }
1725
1726 let mut effective_user_input = user_input.trim().to_string();
1727 if let Some((mode, rest)) = parse_inline_workflow_prompt(user_input) {
1728 self.set_workflow_mode(mode);
1729 effective_user_input = rest.to_string();
1730 }
1731 let transcript_user_input = transcript_user_turn_text(user_turn, &effective_user_input);
1732 effective_user_input = apply_turn_attachments(user_turn, &effective_user_input);
1733 let implement_current_plan = self.workflow_mode == WorkflowMode::Code
1734 && is_current_plan_execution_request(&effective_user_input)
1735 && self
1736 .session_memory
1737 .current_plan
1738 .as_ref()
1739 .map(|plan| plan.has_signal())
1740 .unwrap_or(false);
1741 self.plan_execution_active
1742 .store(implement_current_plan, std::sync::atomic::Ordering::SeqCst);
1743 let _plan_execution_guard = PlanExecutionGuard {
1744 flag: self.plan_execution_active.clone(),
1745 };
1746 let intent = classify_query_intent(self.workflow_mode, &effective_user_input);
1747
1748 if let Some(answer_kind) = intent.direct_answer {
1750 match answer_kind {
1751 DirectAnswerKind::LanguageCapability => {
1752 let response = build_language_capability_answer();
1753 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1754 .await;
1755 return Ok(());
1756 }
1757 DirectAnswerKind::UnsafeWorkflowPressure => {
1758 let response = build_unsafe_workflow_pressure_answer();
1759 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1760 .await;
1761 return Ok(());
1762 }
1763 DirectAnswerKind::SessionMemory => {
1764 let response = build_session_memory_answer();
1765 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1766 .await;
1767 return Ok(());
1768 }
1769 DirectAnswerKind::RecoveryRecipes => {
1770 let response = build_recovery_recipes_answer();
1771 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1772 .await;
1773 return Ok(());
1774 }
1775 DirectAnswerKind::McpLifecycle => {
1776 let response = build_mcp_lifecycle_answer();
1777 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1778 .await;
1779 return Ok(());
1780 }
1781 DirectAnswerKind::AuthorizationPolicy => {
1782 let response = build_authorization_policy_answer();
1783 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1784 .await;
1785 return Ok(());
1786 }
1787 DirectAnswerKind::ToolClasses => {
1788 let response = build_tool_classes_answer();
1789 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1790 .await;
1791 return Ok(());
1792 }
1793 DirectAnswerKind::ToolRegistryOwnership => {
1794 let response = build_tool_registry_ownership_answer();
1795 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1796 .await;
1797 return Ok(());
1798 }
1799 DirectAnswerKind::SessionResetSemantics => {
1800 let response = build_session_reset_semantics_answer();
1801 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1802 .await;
1803 return Ok(());
1804 }
1805 DirectAnswerKind::ProductSurface => {
1806 let response = build_product_surface_answer();
1807 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1808 .await;
1809 return Ok(());
1810 }
1811 DirectAnswerKind::ReasoningSplit => {
1812 let response = build_reasoning_split_answer();
1813 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1814 .await;
1815 return Ok(());
1816 }
1817 DirectAnswerKind::Identity => {
1818 let response = build_identity_answer();
1819 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1820 .await;
1821 return Ok(());
1822 }
1823 DirectAnswerKind::WorkflowModes => {
1824 let response = build_workflow_modes_answer();
1825 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1826 .await;
1827 return Ok(());
1828 }
1829 DirectAnswerKind::GemmaNative => {
1830 let response = build_gemma_native_answer();
1831 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1832 .await;
1833 return Ok(());
1834 }
1835 DirectAnswerKind::GemmaNativeSettings => {
1836 let response = build_gemma_native_settings_answer();
1837 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1838 .await;
1839 return Ok(());
1840 }
1841 DirectAnswerKind::VerifyProfiles => {
1842 let response = build_verify_profiles_answer();
1843 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1844 .await;
1845 return Ok(());
1846 }
1847 DirectAnswerKind::Toolchain => {
1848 let lower = effective_user_input.to_lowercase();
1849 let topic = if (lower.contains("voice output") || lower.contains("voice"))
1850 && (lower.contains("lag")
1851 || lower.contains("behind visible text")
1852 || lower.contains("latency"))
1853 {
1854 "voice_latency_plan"
1855 } else {
1856 "all"
1857 };
1858 let response =
1859 crate::tools::toolchain::describe_toolchain(&serde_json::json!({
1860 "topic": topic,
1861 "question": effective_user_input,
1862 }))
1863 .await
1864 .unwrap_or_else(|e| format!("Error: {}", e));
1865 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1866 .await;
1867 return Ok(());
1868 }
1869 DirectAnswerKind::ArchitectSessionResetPlan => {
1870 let plan = build_architect_session_reset_plan();
1871 let response = plan.to_markdown();
1872 let _ = crate::tools::plan::save_plan_handoff(&plan);
1873 self.session_memory.current_plan = Some(plan);
1874 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1875 .await;
1876 return Ok(());
1877 }
1878 }
1879 }
1880
1881 if matches!(
1882 self.workflow_mode,
1883 WorkflowMode::Ask | WorkflowMode::ReadOnly
1884 ) && looks_like_mutation_request(&effective_user_input)
1885 {
1886 let response = build_mode_redirect_answer(self.workflow_mode);
1887 self.history.push(ChatMessage::user(&effective_user_input));
1888 self.history.push(ChatMessage::assistant_text(&response));
1889 self.transcript.log_user(&transcript_user_input);
1890 self.transcript.log_agent(&response);
1891 for chunk in chunk_text(&response, 8) {
1892 if !chunk.is_empty() {
1893 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1894 }
1895 }
1896 let _ = tx.send(InferenceEvent::Done).await;
1897 self.trim_history(80);
1898 self.refresh_session_memory();
1899 self.save_session();
1900 return Ok(());
1901 }
1902
1903 if user_input.trim() == "/think" {
1904 self.think_mode = Some(true);
1905 for chunk in chunk_text("Think mode: ON — full chain-of-thought enabled.", 8) {
1906 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1907 }
1908 let _ = tx.send(InferenceEvent::Done).await;
1909 return Ok(());
1910 }
1911 if user_input.trim() == "/no_think" {
1912 self.think_mode = Some(false);
1913 for chunk in chunk_text(
1914 "Think mode: OFF — fast mode enabled (no chain-of-thought).",
1915 8,
1916 ) {
1917 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1918 }
1919 let _ = tx.send(InferenceEvent::Done).await;
1920 return Ok(());
1921 }
1922
1923 if user_input.trim_start().starts_with("/pin ") {
1925 let path = user_input.trim_start()[5..].trim();
1926 match std::fs::read_to_string(path) {
1927 Ok(content) => {
1928 self.pinned_files
1929 .lock()
1930 .await
1931 .insert(path.to_string(), content);
1932 let msg = format!(
1933 "Pinned: {} — this file is now locked in model context.",
1934 path
1935 );
1936 for chunk in chunk_text(&msg, 8) {
1937 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1938 }
1939 }
1940 Err(e) => {
1941 let _ = tx
1942 .send(InferenceEvent::Error(format!(
1943 "Failed to pin {}: {}",
1944 path, e
1945 )))
1946 .await;
1947 }
1948 }
1949 let _ = tx.send(InferenceEvent::Done).await;
1950 return Ok(());
1951 }
1952
1953 if user_input.trim_start().starts_with("/unpin ") {
1955 let path = user_input.trim_start()[7..].trim();
1956 if self.pinned_files.lock().await.remove(path).is_some() {
1957 let msg = format!("Unpinned: {} — file removed from active context.", path);
1958 for chunk in chunk_text(&msg, 8) {
1959 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1960 }
1961 } else {
1962 let _ = tx
1963 .send(InferenceEvent::Error(format!(
1964 "File {} was not pinned.",
1965 path
1966 )))
1967 .await;
1968 }
1969 let _ = tx.send(InferenceEvent::Done).await;
1970 return Ok(());
1971 }
1972
1973 let tiny_context_mode = self.engine.current_context_length() <= 8_192;
1977 let mut base_prompt = self.engine.build_system_prompt(
1978 self.snark,
1979 self.chaos,
1980 self.brief,
1981 self.professional,
1982 &self.tools,
1983 self.reasoning_history.as_deref(),
1984 &mcp_tools,
1985 );
1986 if !tiny_context_mode {
1987 if let Some(hint) = &config.context_hint {
1988 if !hint.trim().is_empty() {
1989 base_prompt.push_str(&format!(
1990 "\n\n# Project Context (from .hematite/settings.json)\n{}",
1991 hint
1992 ));
1993 }
1994 }
1995 if let Some(profile_block) = crate::agent::workspace_profile::profile_prompt_block(
1996 &crate::tools::file_ops::workspace_root(),
1997 ) {
1998 base_prompt.push_str(&format!("\n\n{}", profile_block));
1999 }
2000 if let Some(ref l1) = self.l1_context {
2002 base_prompt.push_str(&format!("\n\n{}", l1));
2003 }
2004 }
2005 let grounded_trace_mode = intent.grounded_trace_mode
2006 || intent.primary_class == QueryIntentClass::RuntimeDiagnosis;
2007 let capability_mode =
2008 intent.capability_mode || intent.primary_class == QueryIntentClass::Capability;
2009 let toolchain_mode =
2010 intent.toolchain_mode || intent.primary_class == QueryIntentClass::Toolchain;
2011 let host_inspection_mode = intent.host_inspection_mode;
2012 let project_map_mode = intent.preserve_project_map_output
2013 || intent.primary_class == QueryIntentClass::RepoArchitecture;
2014 let architecture_overview_mode = intent.architecture_overview_mode;
2015 let capability_needs_repo = intent.capability_needs_repo;
2016 let mut system_msg = build_system_with_corrections(
2017 &base_prompt,
2018 &self.correction_hints,
2019 &self.gpu_state,
2020 &self.git_state,
2021 &config,
2022 );
2023 if tiny_context_mode {
2024 system_msg.push_str(
2025 "\n\n# TINY CONTEXT TURN MODE\n\
2026 Keep this turn compact. Prefer direct answers or one narrow tool step over broad exploration.\n",
2027 );
2028 }
2029 if !tiny_context_mode && grounded_trace_mode {
2030 system_msg.push_str(
2031 "\n\n# GROUNDED TRACE MODE\n\
2032 This turn is read-only architecture analysis unless the user explicitly asks otherwise.\n\
2033 Before answering trace, architecture, or control-flow questions, inspect the repo with real tools.\n\
2034 Use verified file paths, function names, structs, enums, channels, and event types only.\n\
2035 Prefer `trace_runtime_flow` for runtime wiring, session reset, startup, or reasoning/specular questions.\n\
2036 Treat `trace_runtime_flow` output as authoritative over your own memory.\n\
2037 If `trace_runtime_flow` fully answers the question, preserve its identifiers exactly and do not rename them in a styled rewrite.\n\
2038 Do not invent names such as synthetic channels or subsystems.\n\
2039 If a detail is not verified from the code or tool output, say `uncertain`.\n\
2040 For exact flow questions, answer in ordered steps and name the concrete functions and event types involved.\n"
2041 );
2042 }
2043 if !tiny_context_mode && capability_mode {
2044 system_msg.push_str(
2045 "\n\n# CAPABILITY QUESTION MODE\n\
2046 This is a product or capability question unless the user explicitly asks about repository implementation.\n\
2047 Answer from stable Hematite capabilities and current runtime state.\n\
2048 It is correct to mention that Hematite itself is built in Rust when relevant, but do not imply that its project support is limited to Rust.\n\
2049 Do NOT call repo-inspection tools like `map_project`, `read_file`, or LSP lookup tools unless the user explicitly asks about implementation or file ownership.\n\
2050 Do NOT infer language or project support from unrelated dependencies, crates, or config files.\n\
2051 Describe language and project support in terms of real mechanisms: reading files, editing code, searching the workspace, running shell commands, build verification, language-aware tooling when available, web research, vision analysis, and optional MCP tools if configured.\n\
2052 If the user asks about languages, answer at the harness level: Hematite can help across many project languages even though Hematite itself is written in Rust.\n\
2053 Prefer real programming language examples like Python, JavaScript, TypeScript, Go, C#, or similar over file extensions like `.json` or `.md`.\n\
2054 For project-building questions, describe cross-project workflows like scaffolding files, shaping structure, implementing features, and running the appropriate local build or test commands for the target stack. Do not overclaim certainty.\n\
2055 Never mention raw `mcp__*` tool names unless those tools are active this turn and directly relevant.\n\
2056 Keep the answer short, plain, and ASCII-first.\n"
2057 );
2058 }
2059 if !tiny_context_mode && toolchain_mode {
2060 system_msg.push_str(
2061 "\n\n# TOOLCHAIN DISCIPLINE MODE\n\
2062 This turn is about Hematite's real built-in tools and how to choose them.\n\
2063 Prefer `describe_toolchain` before you try to summarize tool capabilities or propose a read-only investigation plan from memory.\n\
2064 Use only real built-in tool names.\n\
2065 Do not invent helper tools, MCP tool names, synthetic symbols, or example function names.\n\
2066 If `describe_toolchain` fully answers the question, preserve its output exactly instead of restyling it.\n\
2067 Be explicit about which tools are optional or conditional.\n"
2068 );
2069 }
2070 if !tiny_context_mode && host_inspection_mode {
2071 system_msg.push_str(
2072 "\n\n# HOST INSPECTION MODE\n\
2073 This turn is about the local machine and environment, not repository architecture.\n\
2074 Prefer `inspect_host` before raw `shell` for PATH analysis, installed developer tool versions, desktop item counts, Downloads summaries, listening ports, repo-doctor checks, and directory/disk-size reports.\n\
2075 Use the closest built-in topic first: `summary`, `toolchains`, `path`, `desktop`, `downloads`, `ports`, `repo_doctor`, `directory`, or `disk`.\n\
2076 Only use `shell` if the host question truly goes beyond `inspect_host`.\n"
2077 );
2078 }
2079 if !tiny_context_mode && project_map_mode {
2080 system_msg.push_str(
2081 "\n\n# PROJECT MAP DISCIPLINE MODE\n\
2082 For repository structure, entrypoint, owner-file, or architecture-map questions, prefer `map_project` first.\n\
2083 If `map_project` provides likely entrypoints and core owner files, preserve that grounded structure instead of rewriting it into broad prose.\n\
2084 Do not invent new entrypoints or owner files that are not present in the tool output.\n\
2085 Keep the final answer compact and architecture-first.\n"
2086 );
2087 }
2088 if !tiny_context_mode && architecture_overview_mode {
2089 system_msg.push_str(
2090 "\n\n# ARCHITECTURE OVERVIEW DISCIPLINE MODE\n\
2091 For broad runtime or architecture walkthroughs, prefer authoritative tools first: `trace_runtime_flow` for control flow and `map_project` for compact structure.\n\
2092 Do not call `auto_pin_context` or `list_pinned` in read-only analysis. Avoid broad `read_file` calls unless the user explicitly asks for implementation detail in one named file.\n\
2093 Preserve grounded tool output rather than restyling it into a larger answer.\n"
2094 );
2095 }
2096
2097 system_msg.push_str(&format!(
2099 "\n\n# WORKFLOW MODE\nCURRENT WORKFLOW: {}\n",
2100 self.workflow_mode.label()
2101 ));
2102 if tiny_context_mode {
2103 system_msg
2104 .push_str("Use the narrowest safe behavior for this mode. Keep the turn short.\n");
2105 } else {
2106 match self.workflow_mode {
2107 WorkflowMode::Auto => system_msg.push_str(
2108 "AUTO means choose the narrowest effective path for the request. Answer directly when stable product logic exists. Inspect before editing. Mutate only when the user is clearly asking for implementation.\n",
2109 ),
2110 WorkflowMode::Ask => system_msg.push_str(
2111 "ASK means analysis only. Stay read-only, inspect the repo, explain findings, and do not make changes unless the user explicitly switches modes.\n",
2112 ),
2113 WorkflowMode::Code => system_msg.push_str(
2114 "CODE means implementation is allowed when needed. Keep proof-before-action, verification, and edit precision discipline. If an active plan handoff exists in session memory or `.hematite/PLAN.md`, treat it as the implementation brief unless the user explicitly overrides it. For ordinary workspace inspection during implementation, use built-in read/edit tools first and do not reach for `mcp__filesystem__*` unless the user explicitly requires MCP.\n",
2115 ),
2116 WorkflowMode::Architect => system_msg.push_str(
2117 "ARCHITECT means plan first. Inspect, reason, and produce a concrete implementation approach before editing. Do not mutate code unless the user explicitly asks to implement. When you produce an implementation handoff, use these exact ASCII headings so Hematite can persist the plan: `# Goal`, `# Target Files`, `# Ordered Steps`, `# Verification`, `# Risks`, `# Open Questions`.\n",
2118 ),
2119 WorkflowMode::ReadOnly => system_msg.push_str(
2120 "READ-ONLY means analysis only. Do not modify files, run mutating shell commands, or commit changes.\n",
2121 ),
2122 WorkflowMode::Chat => {} }
2124 }
2125 if !tiny_context_mode && self.workflow_mode == WorkflowMode::Architect {
2126 system_msg.push_str("\n\n# ARCHITECT HANDOFF CONTRACT\n");
2127 system_msg.push_str(architect_handoff_contract());
2128 system_msg.push('\n');
2129 }
2130 if !tiny_context_mode && implement_current_plan {
2131 system_msg.push_str(
2132 "\n\n# CURRENT PLAN EXECUTION CONTRACT\n\
2133 The user explicitly asked you to implement the current saved plan.\n\
2134 Do not restate the plan, do not provide preliminary contracts, and do not stop at analysis.\n\
2135 Use the saved plan as the brief, gather only the minimum built-in file evidence you need, then start editing the target files.\n\
2136 Do not call `map_project` during current-plan execution.\n\
2137 Every file inspection or edit call must be path-scoped to one of the saved target files.\n\
2138 If a built-in workspace read tool gives you enough context, your next step should be mutation or a concrete blocking question, not another summary.\n",
2139 );
2140 if let Some(plan) = self.session_memory.current_plan.as_ref() {
2141 if !plan.target_files.is_empty() {
2142 system_msg.push_str("\n# CURRENT PLAN TARGET FILES\n");
2143 for path in &plan.target_files {
2144 system_msg.push_str(&format!("- {}\n", path));
2145 }
2146 }
2147 }
2148 }
2149 if !tiny_context_mode {
2150 let pinned = self.pinned_files.lock().await;
2151 if !pinned.is_empty() {
2152 system_msg.push_str("\n\n# ACTIVE CONTEXT (PINNED FILES)\n");
2153 system_msg.push_str("The following files are locked in your active memory for high-fidelity reference.\n\n");
2154 for (path, content) in pinned.iter() {
2155 system_msg.push_str(&format!("## FILE: {}\n```\n{}\n```\n\n", path, content));
2156 }
2157 }
2158 }
2159 if !tiny_context_mode {
2160 self.append_session_handoff(&mut system_msg);
2161 }
2162 let system_msg = if self.workflow_mode.is_chat() {
2165 self.build_chat_system_prompt()
2166 } else {
2167 system_msg
2168 };
2169 if self.history.is_empty() || self.history[0].role != "system" {
2170 self.history.insert(0, ChatMessage::system(&system_msg));
2171 } else {
2172 self.history[0] = ChatMessage::system(&system_msg);
2173 }
2174
2175 self.cancel_token
2177 .store(false, std::sync::atomic::Ordering::SeqCst);
2178
2179 self.reasoning_history = None;
2182
2183 let is_gemma = crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2184 let user_content = match self.think_mode {
2185 Some(true) => format!("/think\n{}", effective_user_input),
2186 Some(false) => format!("/no_think\n{}", effective_user_input),
2187 None if !is_gemma
2192 && !self.workflow_mode.is_chat()
2193 && !is_quick_tool_request(&effective_user_input) =>
2194 {
2195 format!("/think\n{}", effective_user_input)
2196 }
2197 None => effective_user_input.clone(),
2198 };
2199 if let Some(image) = user_turn.attached_image.as_ref() {
2200 let image_url =
2201 crate::tools::vision::encode_image_as_data_url(std::path::Path::new(&image.path))
2202 .map_err(|e| format!("Image attachment failed for {}: {}", image.name, e))?;
2203 self.history
2204 .push(ChatMessage::user_with_image(&user_content, &image_url));
2205 } else {
2206 self.history.push(ChatMessage::user(&user_content));
2207 }
2208 self.transcript.log_user(&transcript_user_input);
2209
2210 let vein_docs_only = self.vein_docs_only_mode();
2214 let allow_vein_context = !self.workflow_mode.is_chat()
2215 || should_use_vein_in_chat(&effective_user_input, vein_docs_only);
2216 let (vein_context, vein_paths) = if allow_vein_context {
2217 self.refresh_vein_index();
2218 let _ = tx
2219 .send(InferenceEvent::VeinStatus {
2220 file_count: self.vein.file_count(),
2221 embedded_count: self.vein.embedded_chunk_count(),
2222 docs_only: vein_docs_only,
2223 })
2224 .await;
2225 match self.build_vein_context(&effective_user_input) {
2226 Some((ctx, paths)) => (Some(ctx), paths),
2227 None => (None, Vec::new()),
2228 }
2229 } else {
2230 (None, Vec::new())
2231 };
2232 if !vein_paths.is_empty() {
2233 let _ = tx
2234 .send(InferenceEvent::VeinContext { paths: vein_paths })
2235 .await;
2236 }
2237
2238 let routed_model = route_model(
2240 &effective_user_input,
2241 effective_fast.as_deref(),
2242 effective_think.as_deref(),
2243 )
2244 .map(|s| s.to_string());
2245
2246 let mut loop_intervention: Option<String> = None;
2247 let mut implementation_started = false;
2248 let mut non_mutating_plan_steps = 0usize;
2249 let non_mutating_plan_soft_cap = 5usize;
2250 let non_mutating_plan_hard_cap = 8usize;
2251 let mut overview_project_map: Option<String> = None;
2252 let mut overview_runtime_trace: Option<String> = None;
2253
2254 let max_iters = 25;
2256 let mut consecutive_errors = 0;
2257 let mut first_iter = true;
2258 let _called_this_turn: std::collections::HashSet<String> = std::collections::HashSet::new();
2259 let _result_counts: std::collections::HashMap<String, usize> =
2261 std::collections::HashMap::new();
2262 let mut repeat_counts: std::collections::HashMap<String, usize> =
2264 std::collections::HashMap::new();
2265 let mut successful_read_targets: std::collections::HashSet<String> =
2266 std::collections::HashSet::new();
2267 let mut successful_read_regions: std::collections::HashSet<(String, u64)> =
2269 std::collections::HashSet::new();
2270 let mut successful_grep_targets: std::collections::HashSet<String> =
2271 std::collections::HashSet::new();
2272 let mut no_match_grep_targets: std::collections::HashSet<String> =
2273 std::collections::HashSet::new();
2274 let mut broad_grep_targets: std::collections::HashSet<String> =
2275 std::collections::HashSet::new();
2276
2277 let mut turn_anchor = self.history.len().saturating_sub(1);
2279
2280 for _iter in 0..max_iters {
2281 let mut mutation_occurred = false;
2282 if self.cancel_token.load(std::sync::atomic::Ordering::SeqCst) {
2284 self.cancel_token
2285 .store(false, std::sync::atomic::Ordering::SeqCst);
2286 let _ = tx
2287 .send(InferenceEvent::Thought("Turn cancelled by user.".into()))
2288 .await;
2289 let _ = tx.send(InferenceEvent::Done).await;
2290 return Ok(());
2291 }
2292
2293 if self
2295 .compact_history_if_needed(&tx, Some(turn_anchor))
2296 .await?
2297 {
2298 turn_anchor = 2;
2301 }
2302
2303 let inject_vein = first_iter && !implement_current_plan;
2307 let messages = if implement_current_plan {
2308 first_iter = false;
2309 self.context_window_slice_from(turn_anchor)
2310 } else {
2311 first_iter = false;
2312 self.context_window_slice()
2313 };
2314
2315 let mut prompt_msgs = if let Some(intervention) = loop_intervention.take() {
2319 if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2322 let mut msgs = vec![self.history[0].clone()];
2323 msgs.push(ChatMessage::system(&intervention));
2324 msgs
2325 } else {
2326 let merged =
2327 format!("{}\n\n{}", self.history[0].content.as_str(), intervention);
2328 vec![ChatMessage::system(&merged)]
2329 }
2330 } else {
2331 vec![self.history[0].clone()]
2332 };
2333
2334 if inject_vein {
2338 if let Some(ref ctx) = vein_context.as_ref() {
2339 if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2340 prompt_msgs.push(ChatMessage::system(ctx));
2341 } else {
2342 let merged = format!("{}\n\n{}", prompt_msgs[0].content.as_str(), ctx);
2343 prompt_msgs[0] = ChatMessage::system(&merged);
2344 }
2345 }
2346 }
2347 prompt_msgs.extend(messages);
2348 if let Some(budget_note) =
2349 enforce_prompt_budget(&mut prompt_msgs, self.engine.current_context_length())
2350 {
2351 self.emit_operator_checkpoint(
2352 &tx,
2353 OperatorCheckpointState::BudgetReduced,
2354 budget_note,
2355 )
2356 .await;
2357 let recipe = plan_recovery(
2358 RecoveryScenario::PromptBudgetPressure,
2359 &self.recovery_context,
2360 );
2361 self.emit_recovery_recipe_summary(
2362 &tx,
2363 recipe.recipe.scenario.label(),
2364 compact_recovery_plan_summary(&recipe),
2365 )
2366 .await;
2367 }
2368 self.emit_prompt_pressure_for_messages(&tx, &prompt_msgs)
2369 .await;
2370
2371 let (mut text, mut tool_calls, usage, finish_reason) = match self
2372 .engine
2373 .call_with_tools(&prompt_msgs, &self.tools, routed_model.as_deref())
2374 .await
2375 {
2376 Ok(result) => result,
2377 Err(e) => {
2378 let class = classify_runtime_failure(&e);
2379 if should_retry_runtime_failure(class) {
2380 if self.recovery_context.consume_transient_retry() {
2381 let label = match class {
2382 RuntimeFailureClass::ProviderDegraded => "provider_degraded",
2383 _ => "empty_model_response",
2384 };
2385 self.transcript.log_system(&format!(
2386 "Automatic provider recovery triggered: {}",
2387 e.trim()
2388 ));
2389 self.emit_recovery_recipe_summary(
2390 &tx,
2391 label,
2392 compact_runtime_recovery_summary(class),
2393 )
2394 .await;
2395 let _ = tx
2396 .send(InferenceEvent::ProviderStatus {
2397 state: ProviderRuntimeState::Recovering,
2398 summary: compact_runtime_recovery_summary(class).into(),
2399 })
2400 .await;
2401 self.emit_operator_checkpoint(
2402 &tx,
2403 OperatorCheckpointState::RecoveringProvider,
2404 compact_runtime_recovery_summary(class),
2405 )
2406 .await;
2407 continue;
2408 }
2409 }
2410
2411 self.emit_runtime_failure(&tx, class, &e).await;
2412 break;
2413 }
2414 };
2415 self.emit_provider_live(&tx).await;
2416
2417 if let Some(ref u) = usage {
2419 let _ = tx.send(InferenceEvent::UsageUpdate(u.clone())).await;
2420 }
2421
2422 if tool_calls
2425 .as_ref()
2426 .map(|calls| calls.is_empty())
2427 .unwrap_or(true)
2428 {
2429 if let Some(raw_text) = text.as_deref() {
2430 let native_calls = crate::agent::inference::extract_native_tool_calls(raw_text);
2431 if !native_calls.is_empty() {
2432 tool_calls = Some(native_calls);
2433 let stripped =
2434 crate::agent::inference::strip_native_tool_call_text(raw_text);
2435 text = if stripped.trim().is_empty() {
2436 None
2437 } else {
2438 Some(stripped)
2439 };
2440 }
2441 }
2442 }
2443
2444 let tool_calls = tool_calls.filter(|c| !c.is_empty());
2447 let near_context_ceiling = usage
2448 .as_ref()
2449 .map(|u| u.prompt_tokens >= (self.engine.current_context_length() * 82 / 100))
2450 .unwrap_or(false);
2451
2452 if let Some(calls) = tool_calls {
2453 let (calls, prune_trace_note) =
2454 prune_architecture_trace_batch(calls, architecture_overview_mode);
2455 if let Some(note) = prune_trace_note {
2456 let _ = tx.send(InferenceEvent::Thought(note)).await;
2457 }
2458
2459 let (calls, prune_bloat_note) = prune_read_only_context_bloat_batch(
2460 calls,
2461 self.workflow_mode.is_read_only(),
2462 architecture_overview_mode,
2463 );
2464 if let Some(note) = prune_bloat_note {
2465 let _ = tx.send(InferenceEvent::Thought(note)).await;
2466 }
2467
2468 let (calls, prune_note) = prune_authoritative_tool_batch(
2469 calls,
2470 grounded_trace_mode,
2471 &effective_user_input,
2472 );
2473 if let Some(note) = prune_note {
2474 let _ = tx.send(InferenceEvent::Thought(note)).await;
2475 }
2476
2477 let (calls, batch_note) = order_batch_reads_first(calls);
2478 if let Some(note) = batch_note {
2479 let _ = tx.send(InferenceEvent::Thought(note)).await;
2480 }
2481
2482 if let Some(repeated_path) = calls
2483 .iter()
2484 .filter(|c| {
2485 let parsed = serde_json::from_str::<Value>(
2486 &crate::agent::inference::normalize_tool_argument_string(
2487 &c.function.name,
2488 &c.function.arguments,
2489 ),
2490 )
2491 .ok();
2492 let offset = parsed
2493 .as_ref()
2494 .and_then(|args| args.get("offset").and_then(|v| v.as_u64()))
2495 .unwrap_or(0);
2496 if offset < 200 {
2499 return true;
2500 }
2501 if let Some(path) = parsed
2502 .as_ref()
2503 .and_then(|args| args.get("path").and_then(|v| v.as_str()))
2504 {
2505 let normalized = normalize_workspace_path(path);
2506 return successful_read_regions.contains(&(normalized, offset));
2507 }
2508 false
2509 })
2510 .filter_map(|c| repeated_read_target(&c.function))
2511 .find(|path| successful_read_targets.contains(path))
2512 {
2513 loop_intervention = Some(format!(
2514 "STOP. Already read `{}` this turn. Use `inspect_lines` on the relevant window or a specific `grep_files`, then continue.",
2515 repeated_path
2516 ));
2517 let _ = tx
2518 .send(InferenceEvent::Thought(
2519 "Read discipline: preventing repeated full-file reads on the same path."
2520 .into(),
2521 ))
2522 .await;
2523 continue;
2524 }
2525
2526 if capability_mode
2527 && !capability_needs_repo
2528 && calls
2529 .iter()
2530 .all(|c| is_capability_probe_tool(&c.function.name))
2531 {
2532 loop_intervention = Some(
2533 "STOP. This is a stable capability question. Do not inspect the repository or call tools. \
2534 Answer directly from verified Hematite capabilities, current runtime state, and the documented product boundary. \
2535 Do not mention raw `mcp__*` names unless they are active and directly relevant."
2536 .to_string(),
2537 );
2538 let _ = tx
2539 .send(InferenceEvent::Thought(
2540 "Capability mode: skipping unnecessary repo-inspection tools and answering directly."
2541 .into(),
2542 ))
2543 .await;
2544 continue;
2545 }
2546
2547 let raw_content = text.as_deref().unwrap_or(" ");
2550
2551 if let Some(thought) = crate::agent::inference::extract_think_block(raw_content) {
2552 let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
2553 self.reasoning_history = Some(thought);
2555 }
2556
2557 let stored_tool_call_content = if implement_current_plan {
2560 cap_output(raw_content, 1200)
2561 } else {
2562 raw_content.to_string()
2563 };
2564 self.history.push(ChatMessage::assistant_tool_calls(
2565 &stored_tool_call_content,
2566 calls.clone(),
2567 ));
2568
2569 let mut results = Vec::new();
2571
2572 let (parallel_calls, serial_calls): (Vec<_>, Vec<_>) = calls
2574 .clone()
2575 .into_iter()
2576 .partition(|c| is_parallel_safe(&c.function.name));
2577
2578 if !parallel_calls.is_empty() {
2580 let mut tasks = Vec::new();
2581 for call in parallel_calls {
2582 let tx_clone = tx.clone();
2583 let config_clone = config.clone();
2584 let call_with_id = call.clone();
2586 tasks.push(self.process_tool_call(
2587 call_with_id.function,
2588 config_clone,
2589 yolo,
2590 tx_clone,
2591 call_with_id.id,
2592 ));
2593 }
2594 results.extend(futures::future::join_all(tasks).await);
2596 }
2597
2598 for call in serial_calls {
2600 results.push(
2601 self.process_tool_call(
2602 call.function,
2603 config.clone(),
2604 yolo,
2605 tx.clone(),
2606 call.id,
2607 )
2608 .await,
2609 );
2610 }
2611
2612 let mut authoritative_tool_output: Option<String> = None;
2614 let mut blocked_policy_output: Option<String> = None;
2615 let mut recoverable_policy_intervention: Option<String> = None;
2616 let mut recoverable_policy_recipe: Option<RecoveryScenario> = None;
2617 let mut recoverable_policy_checkpoint: Option<(OperatorCheckpointState, String)> =
2618 None;
2619 for res in results {
2620 let call_id = res.call_id.clone();
2621 let tool_name = res.tool_name.clone();
2622 let final_output = res.output.clone();
2623 let is_error = res.is_error;
2624 for msg in res.msg_results {
2625 self.history.push(msg);
2626 }
2627
2628 if matches!(
2630 tool_name.as_str(),
2631 "patch_hunk" | "write_file" | "edit_file" | "multi_search_replace"
2632 ) {
2633 mutation_occurred = true;
2634 implementation_started = true;
2635 if !is_error {
2637 let path = res.args.get("path").and_then(|v| v.as_str()).unwrap_or("");
2638 if !path.is_empty() {
2639 self.vein.bump_heat(path);
2640 self.l1_context = self.vein.l1_context();
2641 }
2642 }
2643 }
2644
2645 if tool_name == "verify_build" {
2646 self.record_session_verification(
2647 !is_error
2648 && (final_output.contains("BUILD OK")
2649 || final_output.contains("BUILD SUCCESS")
2650 || final_output.contains("BUILD OKAY")),
2651 if is_error {
2652 "Explicit verify_build failed."
2653 } else {
2654 "Explicit verify_build passed."
2655 },
2656 );
2657 }
2658
2659 let call_key = format!(
2661 "{}:{}",
2662 tool_name,
2663 serde_json::to_string(&res.args).unwrap_or_default()
2664 );
2665 let repeat_count = repeat_counts.entry(call_key.clone()).or_insert(0);
2666 *repeat_count += 1;
2667
2668 let repeat_guard_exempt = matches!(
2670 tool_name.as_str(),
2671 "verify_build" | "git_commit" | "git_push"
2672 );
2673 if *repeat_count >= 3 && !repeat_guard_exempt {
2674 loop_intervention = Some(format!(
2675 "STOP. You have called `{}` with identical arguments {} times and keep getting the same result. \
2676 Do not call it again. Either answer directly from what you already know, \
2677 use a different tool or approach, or ask the user for clarification.",
2678 tool_name, *repeat_count
2679 ));
2680 let _ = tx
2681 .send(InferenceEvent::Thought(format!(
2682 "Repeat guard: `{}` called {} times with same args — injecting stop intervention.",
2683 tool_name, *repeat_count
2684 )))
2685 .await;
2686 }
2687
2688 if is_error {
2689 consecutive_errors += 1;
2690 } else {
2691 consecutive_errors = 0;
2692 }
2693
2694 if consecutive_errors >= 3 {
2695 loop_intervention = Some(
2696 "CRITICAL: Repeated tool failures detected. You are likely stuck in a loop. \
2697 STOP all tool calls immediately. Analyze why your previous 3 calls failed \
2698 (check for hallucinations or invalid arguments) and ask the user for \
2699 clarification if you cannot proceed.".to_string()
2700 );
2701 }
2702
2703 if consecutive_errors >= 4 {
2704 self.emit_runtime_failure(
2705 &tx,
2706 RuntimeFailureClass::ToolLoop,
2707 "Hard termination: too many consecutive tool errors.",
2708 )
2709 .await;
2710 return Ok(());
2711 }
2712
2713 let _ = tx
2714 .send(InferenceEvent::ToolCallResult {
2715 id: call_id.clone(),
2716 name: tool_name.clone(),
2717 output: final_output.clone(),
2718 is_error,
2719 })
2720 .await;
2721
2722 let compact_ctx = crate::agent::inference::is_compact_context_window_pub(
2724 self.engine.current_context_length(),
2725 );
2726 let capped = if implement_current_plan {
2727 cap_output(&final_output, 1200)
2728 } else if tool_name == "map_project"
2729 && self.workflow_mode == WorkflowMode::Architect
2730 {
2731 cap_output(&final_output, 2500)
2732 } else if tool_name == "map_project" {
2733 cap_output(&final_output, 3500)
2734 } else if compact_ctx
2735 && (tool_name == "read_file" || tool_name == "inspect_lines")
2736 {
2737 let limit = 3000usize;
2739 if final_output.len() > limit {
2740 let total_lines = final_output.lines().count();
2741 let mut split_at = limit;
2742 while !final_output.is_char_boundary(split_at) && split_at > 0 {
2743 split_at -= 1;
2744 }
2745 format!(
2746 "{}\n... [file truncated — {} total lines. Use `inspect_lines` with start_line near {} to reach the end of the file.]",
2747 &final_output[..split_at],
2748 total_lines,
2749 total_lines.saturating_sub(150),
2750 )
2751 } else {
2752 final_output.clone()
2753 }
2754 } else {
2755 cap_output(&final_output, 8000)
2756 };
2757 self.history.push(ChatMessage::tool_result_for_model(
2758 &call_id,
2759 &tool_name,
2760 &capped,
2761 &self.engine.current_model(),
2762 ));
2763
2764 if architecture_overview_mode && !is_error && tool_name == "trace_runtime_flow"
2765 {
2766 overview_runtime_trace =
2767 Some(summarize_runtime_trace_output(&final_output));
2768 } else if architecture_overview_mode && !is_error && tool_name == "map_project"
2769 {
2770 overview_project_map = Some(summarize_project_map_output(&final_output));
2771 }
2772
2773 if !architecture_overview_mode
2774 && !is_error
2775 && ((grounded_trace_mode && tool_name == "trace_runtime_flow")
2776 || (toolchain_mode && tool_name == "describe_toolchain"))
2777 {
2778 authoritative_tool_output = Some(final_output.clone());
2779 } else if !architecture_overview_mode
2780 && !is_error
2781 && tool_name == "map_project"
2782 && project_map_mode
2783 && authoritative_tool_output.is_none()
2784 {
2785 authoritative_tool_output =
2786 Some(summarize_project_map_output(&final_output));
2787 }
2788
2789 if !is_error && tool_name == "read_file" {
2790 if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
2791 let normalized = normalize_workspace_path(path);
2792 let read_offset =
2793 res.args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
2794 successful_read_targets.insert(normalized.clone());
2795 successful_read_regions.insert((normalized.clone(), read_offset));
2796 }
2797 }
2798
2799 if !is_error && tool_name == "grep_files" {
2800 if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
2801 let normalized = normalize_workspace_path(path);
2802 if final_output.starts_with("No matches for ") {
2803 no_match_grep_targets.insert(normalized);
2804 } else if grep_output_is_high_fanout(&final_output) {
2805 broad_grep_targets.insert(normalized);
2806 } else {
2807 successful_grep_targets.insert(normalized);
2808 }
2809 }
2810 }
2811
2812 if is_error
2813 && matches!(tool_name.as_str(), "edit_file" | "multi_search_replace")
2814 && (final_output.contains("search string not found")
2815 || final_output.contains("search string is too short")
2816 || final_output.contains("search string matched"))
2817 {
2818 if let Some(target) = action_target_path(&tool_name, &res.args) {
2819 let guidance = if final_output.contains("matched") {
2820 format!(
2821 "STOP. `{}` on `{}` — search string matched multiple times. Use `inspect_lines` on the exact region to get a unique anchor, then retry.",
2822 tool_name, target
2823 )
2824 } else {
2825 format!(
2826 "STOP. `{}` on `{}` — search string did not match. Use `inspect_lines` on the target region to get the exact current text (check whitespace and indentation), then retry.",
2827 tool_name, target
2828 )
2829 };
2830 loop_intervention = Some(guidance);
2831 *repeat_count = 0;
2832 }
2833 }
2834
2835 if res.blocked_by_policy
2836 && is_mcp_workspace_read_tool(&tool_name)
2837 && recoverable_policy_intervention.is_none()
2838 {
2839 recoverable_policy_intervention = Some(
2840 "STOP. MCP filesystem reads are blocked. Use `read_file` or `inspect_lines` instead.".to_string(),
2841 );
2842 recoverable_policy_recipe = Some(RecoveryScenario::McpWorkspaceReadBlocked);
2843 recoverable_policy_checkpoint = Some((
2844 OperatorCheckpointState::BlockedPolicy,
2845 "MCP workspace read blocked; rerouting to built-in file tools."
2846 .to_string(),
2847 ));
2848 } else if res.blocked_by_policy
2849 && implement_current_plan
2850 && tool_name == "map_project"
2851 && recoverable_policy_intervention.is_none()
2852 {
2853 recoverable_policy_intervention = Some(
2854 "STOP. `map_project` is blocked during plan execution. Read your planned target files directly, then edit.".to_string(),
2855 );
2856 recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
2857 recoverable_policy_checkpoint = Some((
2858 OperatorCheckpointState::BlockedPolicy,
2859 "`map_project` blocked for current-plan execution.".to_string(),
2860 ));
2861 } else if res.blocked_by_policy
2862 && implement_current_plan
2863 && is_current_plan_irrelevant_tool(&tool_name)
2864 && recoverable_policy_intervention.is_none()
2865 {
2866 recoverable_policy_intervention = Some(format!(
2867 "STOP. `{}` is not a planned target. Use `inspect_lines` on a planned file, then edit.",
2868 tool_name
2869 ));
2870 recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
2871 recoverable_policy_checkpoint = Some((
2872 OperatorCheckpointState::BlockedPolicy,
2873 format!(
2874 "Current-plan execution blocked unrelated tool `{}`.",
2875 tool_name
2876 ),
2877 ));
2878 } else if res.blocked_by_policy
2879 && implement_current_plan
2880 && final_output.contains("requires recent file evidence")
2881 && recoverable_policy_intervention.is_none()
2882 {
2883 let target = action_target_path(&tool_name, &res.args)
2884 .unwrap_or_else(|| "the target file".to_string());
2885 recoverable_policy_intervention = Some(format!(
2886 "STOP. Edit blocked — `{target}` has no recent read. Use `inspect_lines` or `read_file` on it first, then retry."
2887 ));
2888 recoverable_policy_recipe =
2889 Some(RecoveryScenario::RecentFileEvidenceMissing);
2890 recoverable_policy_checkpoint = Some((
2891 OperatorCheckpointState::BlockedRecentFileEvidence,
2892 format!("Edit blocked on `{target}`; recent file evidence missing."),
2893 ));
2894 } else if res.blocked_by_policy
2895 && implement_current_plan
2896 && final_output.contains("requires an exact local line window first")
2897 && recoverable_policy_intervention.is_none()
2898 {
2899 let target = action_target_path(&tool_name, &res.args)
2900 .unwrap_or_else(|| "the target file".to_string());
2901 recoverable_policy_intervention = Some(format!(
2902 "STOP. Edit blocked — `{target}` needs an inspected window. Use `inspect_lines` around the edit region, then retry."
2903 ));
2904 recoverable_policy_recipe = Some(RecoveryScenario::ExactLineWindowRequired);
2905 recoverable_policy_checkpoint = Some((
2906 OperatorCheckpointState::BlockedExactLineWindow,
2907 format!("Edit blocked on `{target}`; exact line window required."),
2908 ));
2909 } else if res.blocked_by_policy && blocked_policy_output.is_none() {
2910 blocked_policy_output = Some(final_output.clone());
2911 }
2912
2913 if *repeat_count >= 5 {
2914 let _ = tx.send(InferenceEvent::Done).await;
2915 return Ok(());
2916 }
2917
2918 if implement_current_plan
2919 && !implementation_started
2920 && !is_error
2921 && is_non_mutating_plan_step_tool(&tool_name)
2922 {
2923 non_mutating_plan_steps += 1;
2924 }
2925 }
2926
2927 if let Some(intervention) = recoverable_policy_intervention {
2928 if let Some((state, summary)) = recoverable_policy_checkpoint.take() {
2929 self.emit_operator_checkpoint(&tx, state, summary).await;
2930 }
2931 if let Some(scenario) = recoverable_policy_recipe.take() {
2932 let recipe = plan_recovery(scenario, &self.recovery_context);
2933 self.emit_recovery_recipe_summary(
2934 &tx,
2935 recipe.recipe.scenario.label(),
2936 compact_recovery_plan_summary(&recipe),
2937 )
2938 .await;
2939 }
2940 loop_intervention = Some(intervention);
2941 let _ = tx
2942 .send(InferenceEvent::Thought(
2943 "Policy recovery: rerouting blocked MCP filesystem inspection to built-in workspace tools."
2944 .into(),
2945 ))
2946 .await;
2947 continue;
2948 }
2949
2950 if architecture_overview_mode {
2951 match (
2952 overview_project_map.as_deref(),
2953 overview_runtime_trace.as_deref(),
2954 ) {
2955 (Some(project_map), Some(runtime_trace)) => {
2956 let response =
2957 build_architecture_overview_answer(project_map, runtime_trace);
2958 self.history.push(ChatMessage::assistant_text(&response));
2959 self.transcript.log_agent(&response);
2960
2961 for chunk in chunk_text(&response, 8) {
2962 if !chunk.is_empty() {
2963 let _ = tx.send(InferenceEvent::Token(chunk)).await;
2964 }
2965 }
2966
2967 let _ = tx.send(InferenceEvent::Done).await;
2968 break;
2969 }
2970 (Some(_), None) => {
2971 loop_intervention = Some(
2972 "Good. You now have the grounded repository structure. Next, call `trace_runtime_flow` for the runtime/control-flow half of the architecture overview. Prefer topic `user_turn` for the main execution path, or `runtime_subsystems` if that is more direct. Do not call `read_file`, `auto_pin_context`, or LSP tools here."
2973 .to_string(),
2974 );
2975 continue;
2976 }
2977 (None, Some(_)) => {
2978 loop_intervention = Some(
2979 "Good. You now have the grounded runtime/control-flow trace. Next, call `map_project` once to capture entrypoints and core owner files. Keep it compact and do not call broad file-read tools in this architecture overview."
2980 .to_string(),
2981 );
2982 continue;
2983 }
2984 (None, None) => {}
2985 }
2986 }
2987
2988 if implement_current_plan
2989 && !implementation_started
2990 && non_mutating_plan_steps >= non_mutating_plan_hard_cap
2991 {
2992 let msg = "Current-plan execution stalled: too many non-mutating inspection steps without a concrete edit. Stay on the saved target files, narrow with `inspect_lines`, and then mutate, or ask one specific blocking question instead of continuing broad exploration.".to_string();
2993 self.history.push(ChatMessage::assistant_text(&msg));
2994 self.transcript.log_agent(&msg);
2995
2996 for chunk in chunk_text(&msg, 8) {
2997 if !chunk.is_empty() {
2998 let _ = tx.send(InferenceEvent::Token(chunk)).await;
2999 }
3000 }
3001
3002 let _ = tx.send(InferenceEvent::Done).await;
3003 break;
3004 }
3005
3006 if let Some(blocked_output) = blocked_policy_output {
3007 self.emit_operator_checkpoint(
3008 &tx,
3009 OperatorCheckpointState::BlockedPolicy,
3010 "A blocked tool path was surfaced directly to the operator.",
3011 )
3012 .await;
3013 self.history
3014 .push(ChatMessage::assistant_text(&blocked_output));
3015 self.transcript.log_agent(&blocked_output);
3016
3017 for chunk in chunk_text(&blocked_output, 8) {
3018 if !chunk.is_empty() {
3019 let _ = tx.send(InferenceEvent::Token(chunk)).await;
3020 }
3021 }
3022
3023 let _ = tx.send(InferenceEvent::Done).await;
3024 break;
3025 }
3026
3027 if let Some(tool_output) = authoritative_tool_output {
3028 self.history.push(ChatMessage::assistant_text(&tool_output));
3029 self.transcript.log_agent(&tool_output);
3030
3031 for chunk in chunk_text(&tool_output, 8) {
3032 if !chunk.is_empty() {
3033 let _ = tx.send(InferenceEvent::Token(chunk)).await;
3034 }
3035 }
3036
3037 let _ = tx.send(InferenceEvent::Done).await;
3038 break;
3039 }
3040
3041 if implement_current_plan && !implementation_started {
3042 let base = "STOP analyzing. The current plan already defines the task. Use the built-in file evidence you now have and begin implementing the plan in the target files. Do not output preliminary findings or restate contracts.";
3043 if non_mutating_plan_steps >= non_mutating_plan_soft_cap {
3044 loop_intervention = Some(format!(
3045 "{} You are close to the non-mutation cap. Use `inspect_lines` on one saved target file, then make the edit now.",
3046 base
3047 ));
3048 } else {
3049 loop_intervention = Some(base.to_string());
3050 }
3051 } else if self.workflow_mode == WorkflowMode::Architect {
3052 loop_intervention = Some(
3053 format!(
3054 "STOP exploring. You have enough evidence for a plan-first answer.\n{}\nUse the tool results already in history. Do not narrate your process. Do not call more tools unless a missing file path makes the handoff impossible.",
3055 architect_handoff_contract()
3056 ),
3057 );
3058 }
3059
3060 if mutation_occurred && !yolo {
3062 let _ = tx
3063 .send(InferenceEvent::Thought(
3064 "Self-Verification: Running 'cargo check' to ensure build integrity..."
3065 .into(),
3066 ))
3067 .await;
3068 let verify_res = self.auto_verify_build().await;
3069 let verify_ok = verify_res.contains("BUILD SUCCESS");
3070 self.record_verify_build_result(verify_ok, &verify_res)
3071 .await;
3072 self.record_session_verification(
3073 verify_ok,
3074 if verify_ok {
3075 "Automatic build verification passed."
3076 } else {
3077 "Automatic build verification failed."
3078 },
3079 );
3080 self.history.push(ChatMessage::system(&format!(
3081 "\n# SYSTEM VERIFICATION\n{verify_res}"
3082 )));
3083 let _ = tx
3084 .send(InferenceEvent::Thought(
3085 "Verification turn injected into history.".into(),
3086 ))
3087 .await;
3088 }
3089
3090 continue;
3092 } else if let Some(response_text) = text {
3093 if finish_reason.as_deref() == Some("length") && near_context_ceiling {
3094 if intent.direct_answer == Some(DirectAnswerKind::SessionResetSemantics) {
3095 let cleaned = build_session_reset_semantics_answer();
3096 self.history.push(ChatMessage::assistant_text(&cleaned));
3097 self.transcript.log_agent(&cleaned);
3098 for chunk in chunk_text(&cleaned, 8) {
3099 if !chunk.is_empty() {
3100 let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3101 }
3102 }
3103 let _ = tx.send(InferenceEvent::Done).await;
3104 break;
3105 }
3106
3107 let warning = format_runtime_failure(
3108 RuntimeFailureClass::ContextWindow,
3109 "Context ceiling reached before the model completed the answer. Hematite trimmed what it could, but this turn still ran out of room. Retry with a narrower inspection step like `grep_files` or `inspect_lines`, or ask for a smaller scoped answer.",
3110 );
3111 self.history.push(ChatMessage::assistant_text(&warning));
3112 self.transcript.log_agent(&warning);
3113 let _ = tx
3114 .send(InferenceEvent::Thought(
3115 "Length recovery: model hit the context ceiling before completing the answer."
3116 .into(),
3117 ))
3118 .await;
3119 for chunk in chunk_text(&warning, 8) {
3120 if !chunk.is_empty() {
3121 let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3122 }
3123 }
3124 let _ = tx.send(InferenceEvent::Done).await;
3125 break;
3126 }
3127
3128 if response_text.contains("<|tool_call")
3129 || response_text.contains("[END_TOOL_REQUEST]")
3130 || response_text.contains("<|tool_response")
3131 || response_text.contains("<tool_response|>")
3132 {
3133 loop_intervention = Some(
3134 "Your previous response leaked raw native tool transcript markup instead of a valid tool invocation or final answer. Retry immediately. If you need a tool, emit a valid tool call only. If you do not need a tool, answer in plain text with no `<|tool_call>`, `<|tool_response>`, or `[END_TOOL_REQUEST]` markup.".to_string(),
3135 );
3136 continue;
3137 }
3138
3139 if let Some(thought) = crate::agent::inference::extract_think_block(&response_text)
3141 {
3142 let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
3143 self.reasoning_history = Some(thought);
3146 }
3147
3148 let cleaned = crate::agent::inference::strip_think_blocks(&response_text);
3150
3151 if implement_current_plan && !implementation_started {
3152 loop_intervention = Some(
3153 "Do not stop at analysis. Implement the current saved plan now using built-in workspace tools and the target files already named in the plan. Only answer without edits if you have a concrete blocking question.".to_string(),
3154 );
3155 continue;
3156 }
3157
3158 if cleaned.is_empty() {
3161 let _ = tx.send(InferenceEvent::Done).await;
3162 break;
3163 }
3164
3165 self.persist_architect_handoff(&cleaned);
3166 self.history.push(ChatMessage::assistant_text(&cleaned));
3167 self.transcript.log_agent(&cleaned);
3168
3169 for chunk in chunk_text(&cleaned, 8) {
3171 if !chunk.is_empty() {
3172 let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3173 }
3174 }
3175
3176 let _ = tx.send(InferenceEvent::Done).await;
3177 break;
3178 } else {
3179 let detail = "Model returned an empty response.";
3180 let class = classify_runtime_failure(detail);
3181 if should_retry_runtime_failure(class) {
3182 if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3183 if let RecoveryDecision::Attempt(plan) =
3184 attempt_recovery(scenario, &mut self.recovery_context)
3185 {
3186 self.transcript.log_system(
3187 "Automatic provider recovery triggered: model returned an empty response.",
3188 );
3189 self.emit_recovery_recipe_summary(
3190 &tx,
3191 plan.recipe.scenario.label(),
3192 compact_recovery_plan_summary(&plan),
3193 )
3194 .await;
3195 let _ = tx
3196 .send(InferenceEvent::ProviderStatus {
3197 state: ProviderRuntimeState::Recovering,
3198 summary: compact_runtime_recovery_summary(class).into(),
3199 })
3200 .await;
3201 self.emit_operator_checkpoint(
3202 &tx,
3203 OperatorCheckpointState::RecoveringProvider,
3204 compact_runtime_recovery_summary(class),
3205 )
3206 .await;
3207 continue;
3208 }
3209 }
3210 }
3211
3212 self.emit_runtime_failure(&tx, class, detail).await;
3213 break;
3214 }
3215 }
3216
3217 self.trim_history(80);
3218 self.refresh_session_memory();
3219 self.save_session();
3220 self.emit_compaction_pressure(&tx).await;
3221 Ok(())
3222 }
3223
3224 async fn emit_runtime_failure(
3225 &mut self,
3226 tx: &mpsc::Sender<InferenceEvent>,
3227 class: RuntimeFailureClass,
3228 detail: &str,
3229 ) {
3230 if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3231 let decision = preview_recovery_decision(scenario, &self.recovery_context);
3232 self.emit_recovery_recipe_summary(
3233 tx,
3234 scenario.label(),
3235 compact_recovery_decision_summary(&decision),
3236 )
3237 .await;
3238 let needs_refresh = match &decision {
3239 RecoveryDecision::Attempt(plan) => plan
3240 .recipe
3241 .steps
3242 .contains(&RecoveryStep::RefreshRuntimeProfile),
3243 RecoveryDecision::Escalate { recipe, .. } => {
3244 recipe.steps.contains(&RecoveryStep::RefreshRuntimeProfile)
3245 }
3246 };
3247 if needs_refresh {
3248 if let Some((model_id, context_length, changed)) = self
3249 .refresh_runtime_profile_and_report(tx, "context_window_failure")
3250 .await
3251 {
3252 let note = if changed {
3253 format!(
3254 "Runtime refresh after context-window failure: model {} | CTX {}",
3255 model_id, context_length
3256 )
3257 } else {
3258 format!(
3259 "Runtime refresh after context-window failure confirms model {} | CTX {}",
3260 model_id, context_length
3261 )
3262 };
3263 let _ = tx.send(InferenceEvent::Thought(note)).await;
3264 }
3265 }
3266 }
3267 if let Some(state) = provider_state_for_runtime_failure(class) {
3268 let _ = tx
3269 .send(InferenceEvent::ProviderStatus {
3270 state,
3271 summary: compact_runtime_failure_summary(class).into(),
3272 })
3273 .await;
3274 }
3275 if let Some(state) = checkpoint_state_for_runtime_failure(class) {
3276 self.emit_operator_checkpoint(tx, state, checkpoint_summary_for_runtime_failure(class))
3277 .await;
3278 }
3279 let formatted = format_runtime_failure(class, detail);
3280 self.history.push(ChatMessage::system(&format!(
3281 "# RUNTIME FAILURE\n{}",
3282 formatted
3283 )));
3284 self.transcript.log_system(&formatted);
3285 let _ = tx.send(InferenceEvent::Error(formatted)).await;
3286 let _ = tx.send(InferenceEvent::Done).await;
3287 }
3288
3289 async fn auto_verify_build(&self) -> String {
3291 match crate::tools::verify_build::execute(&serde_json::json!({ "action": "build" })).await {
3292 Ok(out) => {
3293 "BUILD SUCCESS: Your changes are architecturally sound.\n\n".to_string()
3294 + &cap_output(&out, 2000)
3295 }
3296 Err(e) => format!(
3297 "BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:\n\n{}",
3298 cap_output(&e, 2000)
3299 ),
3300 }
3301 }
3302
3303 async fn compact_history_if_needed(
3307 &mut self,
3308 tx: &mpsc::Sender<InferenceEvent>,
3309 anchor_index: Option<usize>,
3310 ) -> Result<bool, String> {
3311 let vram_ratio = self.gpu_state.ratio();
3312 let context_length = self.engine.current_context_length();
3313 let config = CompactionConfig::adaptive(context_length, vram_ratio);
3314
3315 if !compaction::should_compact(&self.history, context_length, vram_ratio) {
3316 return Ok(false);
3317 }
3318
3319 let _ = tx
3320 .send(InferenceEvent::Thought(format!(
3321 "Compaction: ctx={}k vram={:.0}% threshold={}k tokens — chaining summary...",
3322 context_length / 1000,
3323 vram_ratio * 100.0,
3324 config.max_estimated_tokens / 1000,
3325 )))
3326 .await;
3327
3328 let result = compaction::compact_history(
3329 &self.history,
3330 self.running_summary.as_deref(),
3331 config,
3332 anchor_index,
3333 );
3334
3335 let removed_message_count = self.history.len().saturating_sub(result.messages.len());
3336 self.history = result.messages;
3337 self.running_summary = result.summary;
3338
3339 let previous_memory = self.session_memory.clone();
3341 self.session_memory = compaction::extract_memory(&self.history);
3342 self.session_memory
3343 .inherit_runtime_ledger_from(&previous_memory);
3344 self.session_memory.record_compaction(
3345 removed_message_count,
3346 format!(
3347 "Compacted history around active task '{}' and preserved {} working-set file(s).",
3348 self.session_memory.current_task,
3349 self.session_memory.working_set.len()
3350 ),
3351 );
3352 self.emit_compaction_pressure(tx).await;
3353
3354 let first_non_sys = self
3357 .history
3358 .iter()
3359 .position(|m| m.role != "system")
3360 .unwrap_or(self.history.len());
3361 if first_non_sys < self.history.len() {
3362 if let Some(user_offset) = self.history[first_non_sys..]
3363 .iter()
3364 .position(|m| m.role == "user")
3365 {
3366 if user_offset > 0 {
3367 self.history
3368 .drain(first_non_sys..first_non_sys + user_offset);
3369 }
3370 }
3371 }
3372
3373 let _ = tx
3374 .send(InferenceEvent::Thought(format!(
3375 "Memory Synthesis: Extracted context for task: '{}'. Working set: {} files.",
3376 self.session_memory.current_task,
3377 self.session_memory.working_set.len()
3378 )))
3379 .await;
3380 let recipe = plan_recovery(RecoveryScenario::HistoryPressure, &self.recovery_context);
3381 self.emit_recovery_recipe_summary(
3382 tx,
3383 recipe.recipe.scenario.label(),
3384 compact_recovery_plan_summary(&recipe),
3385 )
3386 .await;
3387 self.emit_operator_checkpoint(
3388 tx,
3389 OperatorCheckpointState::HistoryCompacted,
3390 format!(
3391 "History compacted into a recursive summary; active task '{}' with {} working-set file(s) carried forward.",
3392 self.session_memory.current_task,
3393 self.session_memory.working_set.len()
3394 ),
3395 )
3396 .await;
3397
3398 Ok(true)
3399 }
3400
3401 fn build_vein_context(&self, query: &str) -> Option<(String, Vec<String>)> {
3405 if query.trim().split_whitespace().count() < 3 {
3407 return None;
3408 }
3409
3410 let results = tokio::task::block_in_place(|| self.vein.search_context(query, 4)).ok()?;
3411 if results.is_empty() {
3412 return None;
3413 }
3414
3415 let semantic_active = self.vein.has_any_embeddings();
3416 let header = if semantic_active {
3417 "# Relevant context from The Vein (hybrid BM25 + semantic retrieval)\n\
3418 Use this to answer without needing extra read_file calls where possible.\n\n"
3419 } else {
3420 "# Relevant context from The Vein (BM25 keyword retrieval)\n\
3421 Use this to answer without needing extra read_file calls where possible.\n\n"
3422 };
3423
3424 let mut ctx = String::from(header);
3425 let mut paths: Vec<String> = Vec::new();
3426
3427 let mut total = 0usize;
3428 const MAX_CTX_CHARS: usize = 1_500;
3429
3430 for r in results {
3431 if total >= MAX_CTX_CHARS {
3432 break;
3433 }
3434 let snippet = if r.content.len() > 500 {
3435 format!("{}...", &r.content[..500])
3436 } else {
3437 r.content.clone()
3438 };
3439 ctx.push_str(&format!("--- {} ---\n{}\n\n", r.path, snippet));
3440 total += snippet.len() + r.path.len() + 10;
3441 if !paths.contains(&r.path) {
3442 paths.push(r.path);
3443 }
3444 }
3445
3446 Some((ctx, paths))
3447 }
3448
3449 fn context_window_slice(&self) -> Vec<ChatMessage> {
3452 let mut result = Vec::new();
3453
3454 if self.history.len() > 1 {
3456 for m in &self.history[1..] {
3457 if m.role == "system" {
3458 continue;
3459 }
3460
3461 let mut sanitized = m.clone();
3462 if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3464 sanitized.content = MessageContent::Text(" ".into());
3465 }
3466 result.push(sanitized);
3467 }
3468 }
3469
3470 if !result.is_empty() && result[0].role != "user" {
3473 result.insert(0, ChatMessage::user("Continuing previous context..."));
3474 }
3475
3476 result
3477 }
3478
3479 fn context_window_slice_from(&self, start_idx: usize) -> Vec<ChatMessage> {
3480 let mut result = Vec::new();
3481
3482 if self.history.len() > 1 {
3483 let start = start_idx.max(1).min(self.history.len());
3484 for m in &self.history[start..] {
3485 if m.role == "system" {
3486 continue;
3487 }
3488
3489 let mut sanitized = m.clone();
3490 if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3491 sanitized.content = MessageContent::Text(" ".into());
3492 }
3493 result.push(sanitized);
3494 }
3495 }
3496
3497 if !result.is_empty() && result[0].role != "user" {
3498 result.insert(0, ChatMessage::user("Continuing current plan execution..."));
3499 }
3500
3501 result
3502 }
3503
3504 fn trim_history(&mut self, max_messages: usize) {
3506 if self.history.len() <= max_messages {
3507 return;
3508 }
3509 let excess = self.history.len() - max_messages;
3511 self.history.drain(1..=excess);
3512 }
3513
3514 async fn repair_tool_args(
3516 &self,
3517 tool_name: &str,
3518 bad_json: &str,
3519 tx: &mpsc::Sender<InferenceEvent>,
3520 ) -> Result<Value, String> {
3521 let _ = tx
3522 .send(InferenceEvent::Thought(format!(
3523 "Attempting to repair malformed JSON for '{}'...",
3524 tool_name
3525 )))
3526 .await;
3527
3528 let prompt = format!(
3529 "The following JSON for tool '{}' is malformed and failed to parse:\n\n```json\n{}\n```\n\nOutput ONLY the corrected JSON string that fixes the syntax error (e.g. missing commas, unescaped quotes). Do NOT include markdown blocks or any other text.",
3530 tool_name, bad_json
3531 );
3532
3533 let messages = vec![
3534 ChatMessage::system("You are a JSON repair tool. Output ONLY pure JSON."),
3535 ChatMessage::user(&prompt),
3536 ];
3537
3538 let (text, _, _, _) = self
3540 .engine
3541 .call_with_tools(&messages, &[], self.fast_model.as_deref())
3542 .await
3543 .map_err(|e| e.to_string())?;
3544
3545 let cleaned = text
3546 .unwrap_or_default()
3547 .trim()
3548 .trim_start_matches("```json")
3549 .trim_start_matches("```")
3550 .trim_end_matches("```")
3551 .trim()
3552 .to_string();
3553
3554 serde_json::from_str(&cleaned).map_err(|e| format!("Repair failed: {}", e))
3555 }
3556
3557 async fn run_critic_check(
3559 &self,
3560 path: &str,
3561 content: &str,
3562 tx: &mpsc::Sender<InferenceEvent>,
3563 ) -> Option<String> {
3564 let ext = std::path::Path::new(path)
3566 .extension()
3567 .and_then(|e| e.to_str())
3568 .unwrap_or("");
3569 const CRITIC_EXTS: &[&str] = &["rs", "js", "ts", "py", "go", "c", "cpp"];
3570 if !CRITIC_EXTS.contains(&ext) {
3571 return None;
3572 }
3573
3574 let _ = tx
3575 .send(InferenceEvent::Thought(format!(
3576 "CRITIC: Reviewing changes to '{}'...",
3577 path
3578 )))
3579 .await;
3580
3581 let truncated = cap_output(content, 4000);
3582
3583 let prompt = format!(
3584 "You are a Senior Security and Code Quality auditor. Review this file content for '{}' and identify any critical logic errors, security vulnerabilities, or missing error handling. Be extremely concise. If the code looks good, output 'PASS'.\n\n```{}\n{}\n```",
3585 path, ext, truncated
3586 );
3587
3588 let messages = vec![
3589 ChatMessage::system("You are a technical critic. Identify ONLY critical issues. Output 'PASS' if none found."),
3590 ChatMessage::user(&prompt)
3591 ];
3592
3593 let (text, _, _, _) = self
3594 .engine
3595 .call_with_tools(&messages, &[], self.fast_model.as_deref())
3596 .await
3597 .ok()?;
3598
3599 let critique = text?.trim().to_string();
3600 if critique.to_uppercase().contains("PASS") || critique.is_empty() {
3601 None
3602 } else {
3603 Some(critique)
3604 }
3605 }
3606}
3607
3608pub async fn dispatch_tool(name: &str, args: &Value) -> Result<String, String> {
3611 dispatch_builtin_tool(name, args).await
3612}
3613
3614impl ConversationManager {
3615 fn check_authorization(
3617 &self,
3618 name: &str,
3619 args: &serde_json::Value,
3620 config: &crate::agent::config::HematiteConfig,
3621 yolo_flag: bool,
3622 ) -> crate::agent::permission_enforcer::AuthorizationDecision {
3623 crate::agent::permission_enforcer::authorize_tool_call(name, args, config, yolo_flag)
3624 }
3625
3626 async fn process_tool_call(
3628 &self,
3629 call: ToolCallFn,
3630 config: crate::agent::config::HematiteConfig,
3631 yolo: bool,
3632 tx: mpsc::Sender<InferenceEvent>,
3633 real_id: String,
3634 ) -> ToolExecutionOutcome {
3635 let mut msg_results = Vec::new();
3636 let gemma4_model =
3637 crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
3638 let normalized_arguments = if gemma4_model {
3639 crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments)
3640 } else {
3641 call.arguments.clone()
3642 };
3643
3644 let mut args: Value = match serde_json::from_str(&normalized_arguments) {
3646 Ok(v) => v,
3647 Err(_) => {
3648 match self
3649 .repair_tool_args(&call.name, &normalized_arguments, &tx)
3650 .await
3651 {
3652 Ok(v) => v,
3653 Err(e) => {
3654 let _ = tx
3655 .send(InferenceEvent::Thought(format!(
3656 "JSON Repair failed: {}",
3657 e
3658 )))
3659 .await;
3660 Value::Object(Default::default())
3661 }
3662 }
3663 }
3664 };
3665
3666 if call.name == "map_project" && self.workflow_mode == WorkflowMode::Architect {
3667 if let Some(obj) = args.as_object_mut() {
3668 obj.entry("include_symbols".to_string())
3669 .or_insert(Value::Bool(false));
3670 obj.entry("max_depth".to_string())
3671 .or_insert(Value::Number(2_u64.into()));
3672 }
3673 } else if call.name == "map_project" && self.workflow_mode.is_read_only() {
3674 if let Some(obj) = args.as_object_mut() {
3675 obj.entry("include_symbols".to_string())
3676 .or_insert(Value::Bool(false));
3677 obj.entry("max_depth".to_string())
3678 .or_insert(Value::Number(3_u64.into()));
3679 }
3680 }
3681
3682 let display = format_tool_display(&call.name, &args);
3683 let precondition_result = self.validate_action_preconditions(&call.name, &args).await;
3684 let auth = self.check_authorization(&call.name, &args, &config, yolo);
3685
3686 let decision_result = match precondition_result {
3688 Err(e) => Err(e),
3689 Ok(_) => match auth {
3690 crate::agent::permission_enforcer::AuthorizationDecision::Allow { .. } => Ok(()),
3691 crate::agent::permission_enforcer::AuthorizationDecision::Ask {
3692 reason,
3693 source: _,
3694 } => {
3695 let (approve_tx, approve_rx) = tokio::sync::oneshot::channel::<bool>();
3696 let _ = tx
3697 .send(InferenceEvent::ApprovalRequired {
3698 id: real_id.clone(),
3699 name: call.name.clone(),
3700 display: format!("{}\nWhy: {}", display, reason),
3701 diff: None,
3702 responder: approve_tx,
3703 })
3704 .await;
3705
3706 match approve_rx.await {
3707 Ok(true) => Ok(()),
3708 _ => Err("Declined by user".into()),
3709 }
3710 }
3711 crate::agent::permission_enforcer::AuthorizationDecision::Deny {
3712 reason, ..
3713 } => Err(reason),
3714 },
3715 };
3716 let blocked_by_policy =
3717 matches!(&decision_result, Err(e) if e.starts_with("Action blocked:"));
3718
3719 let (output, is_error) = match decision_result {
3721 Err(e) => (format!("Error: {}", e), true),
3722 Ok(_) => {
3723 let _ = tx
3724 .send(InferenceEvent::ToolCallStart {
3725 id: real_id.clone(),
3726 name: call.name.clone(),
3727 args: display.clone(),
3728 })
3729 .await;
3730
3731 let result = if call.name.starts_with("lsp_") {
3732 let lsp = self.lsp_manager.clone();
3733 let path = args
3734 .get("path")
3735 .and_then(|v| v.as_str())
3736 .unwrap_or("")
3737 .to_string();
3738 let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
3739 let character =
3740 args.get("character").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
3741
3742 match call.name.as_str() {
3743 "lsp_definitions" => {
3744 crate::tools::lsp_tools::lsp_definitions(lsp, path, line, character)
3745 .await
3746 }
3747 "lsp_references" => {
3748 crate::tools::lsp_tools::lsp_references(lsp, path, line, character)
3749 .await
3750 }
3751 "lsp_hover" => {
3752 crate::tools::lsp_tools::lsp_hover(lsp, path, line, character).await
3753 }
3754 "lsp_search_symbol" => {
3755 let query = args
3756 .get("query")
3757 .and_then(|v| v.as_str())
3758 .unwrap_or_default()
3759 .to_string();
3760 crate::tools::lsp_tools::lsp_search_symbol(lsp, query).await
3761 }
3762 "lsp_rename_symbol" => {
3763 let new_name = args
3764 .get("new_name")
3765 .and_then(|v| v.as_str())
3766 .unwrap_or_default()
3767 .to_string();
3768 crate::tools::lsp_tools::lsp_rename_symbol(
3769 lsp, path, line, character, new_name,
3770 )
3771 .await
3772 }
3773 "lsp_get_diagnostics" => {
3774 crate::tools::lsp_tools::lsp_get_diagnostics(lsp, path).await
3775 }
3776 _ => Err(format!("Unknown LSP tool: {}", call.name)),
3777 }
3778 } else if call.name == "auto_pin_context" {
3779 let pts = args.get("paths").and_then(|v| v.as_array());
3780 let reason = args
3781 .get("reason")
3782 .and_then(|v| v.as_str())
3783 .unwrap_or("uninformed scoping");
3784 if let Some(arr) = pts {
3785 let mut pinned = Vec::new();
3786 {
3787 let mut guard = self.pinned_files.lock().await;
3788 const MAX_PINNED_SIZE: u64 = 25 * 1024 * 1024; for v in arr.iter().take(3) {
3791 if let Some(p) = v.as_str() {
3792 if let Ok(meta) = std::fs::metadata(p) {
3793 if meta.len() > MAX_PINNED_SIZE {
3794 let _ = tx.send(InferenceEvent::Thought(format!("[GUARD] Skipping {} - size ({} bytes) exceeds VRAM safety limit (25MB).", p, meta.len()))).await;
3795 continue;
3796 }
3797 if let Ok(content) = std::fs::read_to_string(p) {
3798 guard.insert(p.to_string(), content);
3799 pinned.push(p.to_string());
3800 }
3801 }
3802 }
3803 }
3804 }
3805 let msg = format!(
3806 "Autonomous Scoping: Locked {} in high-fidelity memory. Reason: {}",
3807 pinned.join(", "),
3808 reason
3809 );
3810 let _ = tx
3811 .send(InferenceEvent::Thought(format!("[AUTO-PIN] {}", msg)))
3812 .await;
3813 Ok(msg)
3814 } else {
3815 Err("Missing 'paths' array for auto_pin_context.".to_string())
3816 }
3817 } else if call.name == "list_pinned" {
3818 let paths_msg = {
3819 let pinned = self.pinned_files.lock().await;
3820 if pinned.is_empty() {
3821 "No files are currently pinned.".to_string()
3822 } else {
3823 let paths: Vec<_> = pinned.keys().cloned().collect();
3824 format!(
3825 "Currently pinned files in active memory:\n- {}",
3826 paths.join("\n- ")
3827 )
3828 }
3829 };
3830 Ok(paths_msg)
3831 } else if call.name.starts_with("mcp__") {
3832 let mut mcp = self.mcp_manager.lock().await;
3833 match mcp.call_tool(&call.name, &args).await {
3834 Ok(res) => Ok(res),
3835 Err(e) => Err(e.to_string()),
3836 }
3837 } else if call.name == "swarm" {
3838 let tasks_val = args.get("tasks").cloned().unwrap_or(Value::Array(vec![]));
3840 let max_workers = args
3841 .get("max_workers")
3842 .and_then(|v| v.as_u64())
3843 .unwrap_or(3) as usize;
3844
3845 let mut task_objs = Vec::new();
3846 if let Value::Array(arr) = tasks_val {
3847 for v in arr {
3848 let id = v
3849 .get("id")
3850 .and_then(|x| x.as_str())
3851 .unwrap_or("?")
3852 .to_string();
3853 let target = v
3854 .get("target")
3855 .and_then(|x| x.as_str())
3856 .unwrap_or("?")
3857 .to_string();
3858 let instruction = v
3859 .get("instruction")
3860 .and_then(|x| x.as_str())
3861 .unwrap_or("?")
3862 .to_string();
3863 task_objs.push(crate::agent::parser::WorkerTask {
3864 id,
3865 target,
3866 instruction,
3867 });
3868 }
3869 }
3870
3871 if task_objs.is_empty() {
3872 Err("No tasks provided for swarm.".to_string())
3873 } else {
3874 let (swarm_tx_internal, mut swarm_rx_internal) =
3875 tokio::sync::mpsc::channel(32);
3876 let tx_forwarder = tx.clone();
3877
3878 tokio::spawn(async move {
3880 while let Some(msg) = swarm_rx_internal.recv().await {
3881 match msg {
3882 crate::agent::swarm::SwarmMessage::Progress(id, p) => {
3883 let _ = tx_forwarder
3884 .send(InferenceEvent::Thought(format!(
3885 "Swarm [{}]: {}% complete",
3886 id, p
3887 )))
3888 .await;
3889 }
3890 crate::agent::swarm::SwarmMessage::ReviewRequest {
3891 worker_id,
3892 file_path,
3893 before: _,
3894 after: _,
3895 tx,
3896 } => {
3897 let (approve_tx, approve_rx) =
3898 tokio::sync::oneshot::channel::<bool>();
3899 let display = format!(
3900 "Swarm worker [{}]: Integrated changes into {:?}",
3901 worker_id, file_path
3902 );
3903 let _ = tx_forwarder
3904 .send(InferenceEvent::ApprovalRequired {
3905 id: format!("swarm_{}", worker_id),
3906 name: "swarm_apply".to_string(),
3907 display,
3908 diff: None,
3909 responder: approve_tx,
3910 })
3911 .await;
3912 if let Ok(approved) = approve_rx.await {
3913 let response = if approved {
3914 crate::agent::swarm::ReviewResponse::Accept
3915 } else {
3916 crate::agent::swarm::ReviewResponse::Reject
3917 };
3918 let _ = tx.send(response);
3919 }
3920 }
3921 crate::agent::swarm::SwarmMessage::Done => {}
3922 }
3923 }
3924 });
3925
3926 let coordinator = self.swarm_coordinator.clone();
3927 match coordinator
3928 .dispatch_swarm(task_objs, swarm_tx_internal, max_workers)
3929 .await
3930 {
3931 Ok(_) => Ok(
3932 "Swarm execution completed. Check files for integration results."
3933 .to_string(),
3934 ),
3935 Err(e) => Err(format!("Swarm failure: {}", e)),
3936 }
3937 }
3938 } else if call.name == "vision_analyze" {
3939 crate::tools::vision::vision_analyze(&self.engine, &args).await
3940 } else if matches!(
3941 call.name.as_str(),
3942 "edit_file" | "patch_hunk" | "multi_search_replace"
3943 ) && !yolo
3944 {
3945 let diff_result = match call.name.as_str() {
3949 "edit_file" => crate::tools::file_ops::compute_edit_file_diff(&args),
3950 "patch_hunk" => crate::tools::file_ops::compute_patch_hunk_diff(&args),
3951 _ => crate::tools::file_ops::compute_msr_diff(&args),
3952 };
3953 match diff_result {
3954 Ok(diff_text) => {
3955 let path_label =
3956 args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
3957 let (appr_tx, appr_rx) = tokio::sync::oneshot::channel::<bool>();
3958 let _ = tx
3959 .send(InferenceEvent::ApprovalRequired {
3960 id: real_id.clone(),
3961 name: call.name.clone(),
3962 display: format!("Edit preview: {}", path_label),
3963 diff: Some(diff_text),
3964 responder: appr_tx,
3965 })
3966 .await;
3967 match appr_rx.await {
3968 Ok(true) => dispatch_tool(&call.name, &args).await,
3969 _ => Err("Edit declined by user.".into()),
3970 }
3971 }
3972 Err(_) => dispatch_tool(&call.name, &args).await,
3975 }
3976 } else {
3977 dispatch_tool(&call.name, &args).await
3978 };
3979
3980 match result {
3981 Ok(o) => (o, false),
3982 Err(e) => (format!("Error: {}", e), true),
3983 }
3984 }
3985 };
3986
3987 {
3989 if let Ok(mut econ) = self.engine.economics.lock() {
3990 econ.record_tool(&call.name, !is_error);
3991 }
3992 }
3993
3994 if !is_error {
3995 if matches!(call.name.as_str(), "read_file" | "inspect_lines") {
3996 if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
3997 if call.name == "inspect_lines" {
3998 self.record_line_inspection(path).await;
3999 } else {
4000 self.record_read_observation(path).await;
4001 }
4002 }
4003 }
4004
4005 if call.name == "verify_build" {
4006 let ok = output.contains("BUILD OK")
4007 || output.contains("BUILD SUCCESS")
4008 || output.contains("BUILD OKAY");
4009 self.record_verify_build_result(ok, &output).await;
4010 }
4011
4012 if matches!(
4013 call.name.as_str(),
4014 "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4015 ) || is_mcp_mutating_tool(&call.name)
4016 {
4017 self.record_successful_mutation(action_target_path(&call.name, &args).as_deref())
4018 .await;
4019 }
4020
4021 if let Some(receipt) = self.build_action_receipt(&call.name, &args, &output, is_error) {
4022 msg_results.push(receipt);
4023 }
4024 }
4025
4026 if !is_error && (call.name == "edit_file" || call.name == "write_file") {
4030 let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
4031 let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
4032 let ext = std::path::Path::new(path)
4033 .extension()
4034 .and_then(|e| e.to_str())
4035 .unwrap_or("");
4036 const SKIP_EXTS: &[&str] = &[
4037 "md",
4038 "toml",
4039 "json",
4040 "txt",
4041 "yml",
4042 "yaml",
4043 "cfg",
4044 "csv",
4045 "lock",
4046 "gitignore",
4047 ];
4048 let line_count = content.lines().count();
4049 if !path.is_empty()
4050 && !content.is_empty()
4051 && !SKIP_EXTS.contains(&ext)
4052 && line_count >= 50
4053 {
4054 if let Some(critique) = self.run_critic_check(path, content, &tx).await {
4055 msg_results.push(ChatMessage::system(&format!(
4056 "[CRITIC REVIEW OF {}]\nIssues found:\n\n{}",
4057 path, critique
4058 )));
4059 }
4060 }
4061 }
4062
4063 ToolExecutionOutcome {
4064 call_id: real_id,
4065 tool_name: call.name,
4066 args,
4067 output,
4068 is_error,
4069 blocked_by_policy,
4070 msg_results,
4071 }
4072 }
4073}
4074
4075struct ToolExecutionOutcome {
4078 call_id: String,
4079 tool_name: String,
4080 args: Value,
4081 output: String,
4082 is_error: bool,
4083 blocked_by_policy: bool,
4084 msg_results: Vec<ChatMessage>,
4085}
4086
4087fn is_code_like_path(path: &str) -> bool {
4088 let ext = std::path::Path::new(path)
4089 .extension()
4090 .and_then(|e| e.to_str())
4091 .unwrap_or("")
4092 .to_ascii_lowercase();
4093 matches!(
4094 ext.as_str(),
4095 "rs" | "js"
4096 | "ts"
4097 | "tsx"
4098 | "jsx"
4099 | "py"
4100 | "go"
4101 | "java"
4102 | "c"
4103 | "cpp"
4104 | "cc"
4105 | "h"
4106 | "hpp"
4107 | "cs"
4108 | "swift"
4109 | "kt"
4110 | "kts"
4111 | "rb"
4112 | "php"
4113 )
4114}
4115
4116pub fn format_tool_display(name: &str, args: &Value) -> String {
4119 let get = |key: &str| {
4120 args.get(key)
4121 .and_then(|v| v.as_str())
4122 .unwrap_or("")
4123 .to_string()
4124 };
4125 match name {
4126 "shell" => format!("$ {}", get("command")),
4127 "map_project" => "map project architecture".to_string(),
4128 "trace_runtime_flow" => format!("trace runtime {}", get("topic")),
4129 "describe_toolchain" => format!("describe toolchain {}", get("topic")),
4130 "inspect_host" => format!("inspect host {}", get("topic")),
4131 _ => format!("{} {:?}", name, args),
4132 }
4133}
4134
4135fn shell_looks_like_structured_host_inspection(command: &str) -> bool {
4138 let lower = command.to_ascii_lowercase();
4139 [
4140 "$env:path",
4141 "pathvariable",
4142 "get-childitem",
4143 "gci ",
4144 "where.exe",
4145 "where ",
4146 "cargo --version",
4147 "rustc --version",
4148 "git --version",
4149 "node --version",
4150 "npm --version",
4151 "pnpm --version",
4152 "python --version",
4153 "python3 --version",
4154 "deno --version",
4155 "go version",
4156 "dotnet --version",
4157 "uv --version",
4158 "netstat",
4159 "findstr",
4160 "get-nettcpconnection",
4161 "tcpconnection",
4162 "listening",
4163 "ss -",
4164 "ss ",
4165 "lsof",
4166 "desktop",
4167 "downloads",
4168 ]
4169 .iter()
4170 .any(|needle| lower.contains(needle))
4171}
4172
4173fn cap_output(text: &str, max_bytes: usize) -> String {
4176 if text.len() <= max_bytes {
4177 text.to_string()
4178 } else {
4179 let mut split_at = max_bytes;
4181 while !text.is_char_boundary(split_at) && split_at > 0 {
4182 split_at -= 1;
4183 }
4184 format!(
4185 "{}\n... [output capped at {}B]",
4186 &text[..split_at],
4187 max_bytes
4188 )
4189 }
4190}
4191
4192#[derive(Default)]
4193struct PromptBudgetStats {
4194 summarized_tool_results: usize,
4195 collapsed_tool_results: usize,
4196 trimmed_chat_messages: usize,
4197 dropped_messages: usize,
4198}
4199
4200fn estimate_prompt_tokens(messages: &[ChatMessage]) -> usize {
4201 crate::agent::inference::estimate_message_batch_tokens(messages)
4202}
4203
4204fn summarize_prompt_blob(text: &str, max_chars: usize) -> String {
4205 let budget = compaction::SummaryCompressionBudget {
4206 max_chars,
4207 max_lines: 3,
4208 max_line_chars: max_chars.clamp(80, 240),
4209 };
4210 let compressed = compaction::compress_summary(text, budget).summary;
4211 if compressed.is_empty() {
4212 String::new()
4213 } else {
4214 compressed
4215 }
4216}
4217
4218fn summarize_tool_message_for_budget(message: &ChatMessage) -> String {
4219 let tool_name = message.name.as_deref().unwrap_or("tool");
4220 let body = summarize_prompt_blob(message.content.as_str(), 320);
4221 format!(
4222 "[Prompt-budget summary of prior `{}` result]\n{}",
4223 tool_name, body
4224 )
4225}
4226
4227fn summarize_chat_message_for_budget(message: &ChatMessage) -> String {
4228 let role = message.role.as_str();
4229 let body = summarize_prompt_blob(message.content.as_str(), 240);
4230 format!(
4231 "[Prompt-budget summary of earlier {} message]\n{}",
4232 role, body
4233 )
4234}
4235
4236fn normalize_prompt_start(messages: &mut Vec<ChatMessage>) {
4237 if messages.len() > 1 && messages[1].role != "user" {
4238 messages.insert(1, ChatMessage::user("Continuing previous context..."));
4239 }
4240}
4241
4242fn enforce_prompt_budget(
4243 prompt_msgs: &mut Vec<ChatMessage>,
4244 context_length: usize,
4245) -> Option<String> {
4246 let target_tokens = ((context_length as f64) * 0.68) as usize;
4247 if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4248 return None;
4249 }
4250
4251 let mut stats = PromptBudgetStats::default();
4252
4253 let mut tool_indices: Vec<usize> = prompt_msgs
4255 .iter()
4256 .enumerate()
4257 .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
4258 .collect();
4259 for idx in tool_indices.iter().rev().copied() {
4260 if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4261 break;
4262 }
4263 let original = prompt_msgs[idx].content.as_str().to_string();
4264 if original.len() > 1200 {
4265 prompt_msgs[idx].content =
4266 MessageContent::Text(summarize_tool_message_for_budget(&prompt_msgs[idx]));
4267 stats.summarized_tool_results += 1;
4268 }
4269 }
4270
4271 tool_indices = prompt_msgs
4273 .iter()
4274 .enumerate()
4275 .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
4276 .collect();
4277 if tool_indices.len() > 2 {
4278 for idx in tool_indices
4279 .iter()
4280 .take(tool_indices.len().saturating_sub(2))
4281 .copied()
4282 {
4283 if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4284 break;
4285 }
4286 prompt_msgs[idx].content = MessageContent::Text(
4287 "[Earlier tool output omitted to stay within the prompt budget.]".to_string(),
4288 );
4289 stats.collapsed_tool_results += 1;
4290 }
4291 }
4292
4293 let last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
4295 for idx in 1..prompt_msgs.len() {
4296 if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4297 break;
4298 }
4299 if Some(idx) == last_user_idx {
4300 continue;
4301 }
4302 let role = prompt_msgs[idx].role.as_str();
4303 if matches!(role, "user" | "assistant") && prompt_msgs[idx].content.as_str().len() > 900 {
4304 prompt_msgs[idx].content =
4305 MessageContent::Text(summarize_chat_message_for_budget(&prompt_msgs[idx]));
4306 stats.trimmed_chat_messages += 1;
4307 }
4308 }
4309
4310 let preserve_last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
4312 let mut idx = 1usize;
4313 while estimate_prompt_tokens(prompt_msgs) > target_tokens && prompt_msgs.len() > 2 {
4314 if Some(idx) == preserve_last_user_idx {
4315 idx += 1;
4316 if idx >= prompt_msgs.len() {
4317 break;
4318 }
4319 continue;
4320 }
4321 if idx >= prompt_msgs.len() {
4322 break;
4323 }
4324 prompt_msgs.remove(idx);
4325 stats.dropped_messages += 1;
4326 }
4327
4328 normalize_prompt_start(prompt_msgs);
4329
4330 let new_tokens = estimate_prompt_tokens(prompt_msgs);
4331 if stats.summarized_tool_results == 0
4332 && stats.collapsed_tool_results == 0
4333 && stats.trimmed_chat_messages == 0
4334 && stats.dropped_messages == 0
4335 {
4336 return None;
4337 }
4338
4339 Some(format!(
4340 "Prompt Budget Guard: trimmed prompt to about {} tokens (target {}). Summarized {} large tool result(s), collapsed {} older tool result(s), trimmed {} chat message(s), and dropped {} old message(s).",
4341 new_tokens,
4342 target_tokens,
4343 stats.summarized_tool_results,
4344 stats.collapsed_tool_results,
4345 stats.trimmed_chat_messages,
4346 stats.dropped_messages
4347 ))
4348}
4349
4350fn is_quick_tool_request(input: &str) -> bool {
4355 let lower = input.to_lowercase();
4356 if lower.contains("run_code") || lower.contains("run code") {
4358 return true;
4359 }
4360 let is_short = input.len() < 120;
4362 let compute_keywords = [
4363 "calculate",
4364 "compute",
4365 "execute",
4366 "run this",
4367 "test this",
4368 "what is ",
4369 "how much",
4370 "how many",
4371 "convert ",
4372 "print ",
4373 ];
4374 if is_short && compute_keywords.iter().any(|k| lower.contains(k)) {
4375 return true;
4376 }
4377 false
4378}
4379
4380fn chunk_text(text: &str, words_per_chunk: usize) -> Vec<String> {
4381 let mut chunks = Vec::new();
4382 let mut current = String::new();
4383 let mut count = 0;
4384
4385 for ch in text.chars() {
4386 current.push(ch);
4387 if ch == ' ' || ch == '\n' {
4388 count += 1;
4389 if count >= words_per_chunk {
4390 chunks.push(current.clone());
4391 current.clear();
4392 count = 0;
4393 }
4394 }
4395 }
4396 if !current.is_empty() {
4397 chunks.push(current);
4398 }
4399 chunks
4400}
4401
4402fn repeated_read_target(call: &crate::agent::inference::ToolCallFn) -> Option<String> {
4403 if call.name != "read_file" {
4404 return None;
4405 }
4406 let normalized_arguments =
4407 crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments);
4408 let args: Value = serde_json::from_str(&normalized_arguments).ok()?;
4409 let path = args.get("path").and_then(|v| v.as_str())?;
4410 Some(normalize_workspace_path(path))
4411}
4412
4413fn order_batch_reads_first(
4414 calls: Vec<crate::agent::inference::ToolCallResponse>,
4415) -> (
4416 Vec<crate::agent::inference::ToolCallResponse>,
4417 Option<String>,
4418) {
4419 let has_reads = calls.iter().any(|c| {
4420 matches!(
4421 c.function.name.as_str(),
4422 "read_file" | "inspect_lines" | "grep_files" | "list_files"
4423 )
4424 });
4425 let has_edits = calls.iter().any(|c| {
4426 matches!(
4427 c.function.name.as_str(),
4428 "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4429 )
4430 });
4431 if has_reads && has_edits {
4432 let reads: Vec<_> = calls
4433 .into_iter()
4434 .filter(|c| {
4435 !matches!(
4436 c.function.name.as_str(),
4437 "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4438 )
4439 })
4440 .collect();
4441 let note = Some("Batch ordering: deferring edits until reads complete.".to_string());
4442 (reads, note)
4443 } else {
4444 (calls, None)
4445 }
4446}
4447
4448fn grep_output_is_high_fanout(output: &str) -> bool {
4449 let Some(summary) = output.lines().next() else {
4450 return false;
4451 };
4452 let hunk_count = summary
4453 .split(", ")
4454 .find_map(|part| {
4455 part.strip_suffix(" hunk(s)")
4456 .and_then(|value| value.parse::<usize>().ok())
4457 })
4458 .unwrap_or(0);
4459 let match_count = summary
4460 .split(' ')
4461 .next()
4462 .and_then(|value| value.parse::<usize>().ok())
4463 .unwrap_or(0);
4464 hunk_count >= 8 || match_count >= 12
4465}
4466
4467fn build_system_with_corrections(
4468 base: &str,
4469 hints: &[String],
4470 gpu: &Arc<GpuState>,
4471 git: &Arc<crate::agent::git_monitor::GitState>,
4472 config: &crate::agent::config::HematiteConfig,
4473) -> String {
4474 let mut system_msg = base.to_string();
4475
4476 system_msg.push_str("\n\n# Permission Mode\n");
4478 let mode_label = match config.mode {
4479 crate::agent::config::PermissionMode::ReadOnly => "READ-ONLY",
4480 crate::agent::config::PermissionMode::Developer => "DEVELOPER",
4481 crate::agent::config::PermissionMode::SystemAdmin => "SYSTEM-ADMIN (UNRESTRICTED)",
4482 };
4483 system_msg.push_str(&format!("CURRENT MODE: {}\n", mode_label));
4484
4485 if config.mode == crate::agent::config::PermissionMode::ReadOnly {
4486 system_msg.push_str("PERMISSION: You are restricted to READ-ONLY access. Do NOT attempt to use write_file, edit_file, or shell for any modification. Focus entirely on analysis, indexing, and reporting.\n");
4487 } else {
4488 system_msg.push_str("PERMISSION: You have authority to modify code and execute tests with user oversight.\n");
4489 }
4490
4491 let (used, total) = gpu.read();
4493 if total > 0 {
4494 system_msg.push_str("\n\n# Terminal Hardware Context\n");
4495 system_msg.push_str(&format!(
4496 "HOST GPU: {} | VRAM: {:.1}GB / {:.1}GB ({:.0}% used)\n",
4497 gpu.gpu_name(),
4498 used as f64 / 1024.0,
4499 total as f64 / 1024.0,
4500 gpu.ratio() * 100.0
4501 ));
4502 system_msg.push_str("Use this awareness to manage your context window responsibly.\n");
4503 }
4504
4505 system_msg.push_str("\n\n# Git Repository Context\n");
4507 let git_status_label = git.label();
4508 let git_url = git.url();
4509 system_msg.push_str(&format!(
4510 "REMOTE STATUS: {} | URL: {}\n",
4511 git_status_label, git_url
4512 ));
4513
4514 let root = crate::tools::file_ops::workspace_root();
4516 if let Some(status_snapshot) = crate::agent::git_context::read_git_status(&root) {
4517 system_msg.push_str("\nGit status snapshot:\n");
4518 system_msg.push_str(&status_snapshot);
4519 system_msg.push_str("\n");
4520 }
4521
4522 if let Some(diff_snapshot) = crate::agent::git_context::read_git_diff(&root, 2000) {
4523 system_msg.push_str("\nGit diff snapshot:\n");
4524 system_msg.push_str(&diff_snapshot);
4525 system_msg.push_str("\n");
4526 }
4527
4528 if git_status_label == "NONE" {
4529 system_msg.push_str("\nONBOARDING: You noticed no remote is configured. Offer to help the user set up a remote (e.g. GitHub) if they haven't already.\n");
4530 } else if git_status_label == "BEHIND" {
4531 system_msg.push_str("\nSYNC: Local is behind remote. Suggest a pull if appropriate.\n");
4532 }
4533
4534 if hints.is_empty() {
4539 return system_msg;
4540 }
4541 system_msg.push_str("\n\n# Formatting Corrections\n");
4542 system_msg.push_str("You previously failed formatting checks on these files. Ensure your whitespace/indentation perfectly matches the original file exactly on your next attempt:\n");
4543 for hint in hints {
4544 system_msg.push_str(&format!("- {}\n", hint));
4545 }
4546 system_msg
4547}
4548
4549fn route_model<'a>(
4550 user_input: &str,
4551 fast_model: Option<&'a str>,
4552 think_model: Option<&'a str>,
4553) -> Option<&'a str> {
4554 let text = user_input.to_lowercase();
4555 let is_think = text.contains("refactor")
4556 || text.contains("rewrite")
4557 || text.contains("implement")
4558 || text.contains("create")
4559 || text.contains("fix")
4560 || text.contains("debug");
4561 let is_fast = text.contains("what")
4562 || text.contains("show")
4563 || text.contains("find")
4564 || text.contains("list")
4565 || text.contains("status");
4566
4567 if is_think && think_model.is_some() {
4568 return think_model;
4569 } else if is_fast && fast_model.is_some() {
4570 return fast_model;
4571 }
4572 None
4573}
4574
4575fn is_parallel_safe(name: &str) -> bool {
4576 let metadata = crate::agent::inference::tool_metadata_for_name(name);
4577 !metadata.mutates_workspace && !metadata.external_surface
4578}
4579
4580fn should_use_vein_in_chat(query: &str, docs_only_mode: bool) -> bool {
4581 if docs_only_mode {
4582 return true;
4583 }
4584
4585 let lower = query.to_ascii_lowercase();
4586 [
4587 "what did we decide",
4588 "why did we decide",
4589 "what did we say",
4590 "what did we do",
4591 "earlier today",
4592 "yesterday",
4593 "last week",
4594 "last month",
4595 "earlier",
4596 "remember",
4597 "session",
4598 "import",
4599 ]
4600 .iter()
4601 .any(|needle| lower.contains(needle))
4602 || lower
4603 .split(|ch: char| !(ch.is_ascii_digit() || ch == '-'))
4604 .any(|token| token.len() == 10 && token.chars().nth(4) == Some('-'))
4605}
4606
4607#[cfg(test)]
4608mod tests {
4609 use super::*;
4610
4611 #[test]
4612 fn classifies_lm_studio_context_budget_mismatch_as_context_window() {
4613 let detail = r#"LM Studio error 400 Bad Request: {"error":"The number of tokens to keep from the initial prompt is greater than the context length (n_keep: 28768>= n_ctx: 4096). Try to load the model with a larger context length, or provide a shorter input."}"#;
4614 let class = classify_runtime_failure(detail);
4615 assert_eq!(class, RuntimeFailureClass::ContextWindow);
4616 assert_eq!(class.tag(), "context_window");
4617 assert!(format_runtime_failure(class, detail).contains("[failure:context_window]"));
4618 }
4619
4620 #[test]
4621 fn runtime_failure_maps_to_provider_and_checkpoint_state() {
4622 assert_eq!(
4623 provider_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
4624 Some(ProviderRuntimeState::ContextWindow)
4625 );
4626 assert_eq!(
4627 checkpoint_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
4628 Some(OperatorCheckpointState::BlockedContextWindow)
4629 );
4630 assert_eq!(
4631 provider_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
4632 Some(ProviderRuntimeState::Degraded)
4633 );
4634 assert_eq!(
4635 checkpoint_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
4636 None
4637 );
4638 }
4639
4640 #[test]
4641 fn intent_router_treats_tool_registry_ownership_as_product_truth() {
4642 let intent = classify_query_intent(
4643 WorkflowMode::ReadOnly,
4644 "Read-only mode. Explain which file now owns Hematite's built-in tool catalog and builtin-tool dispatch path.",
4645 );
4646 assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4647 assert_eq!(
4648 intent.direct_answer,
4649 Some(DirectAnswerKind::ToolRegistryOwnership)
4650 );
4651 }
4652
4653 #[test]
4654 fn intent_router_treats_tool_classes_as_product_truth() {
4655 let intent = classify_query_intent(
4656 WorkflowMode::ReadOnly,
4657 "Read-only mode. Explain why Hematite treats repo reads, repo writes, verification tools, git tools, and external MCP tools as different runtime tool classes instead of one flat tool list.",
4658 );
4659 assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4660 assert_eq!(intent.direct_answer, Some(DirectAnswerKind::ToolClasses));
4661 }
4662
4663 #[test]
4664 fn tool_registry_ownership_answer_mentions_new_owner_file() {
4665 let answer = build_tool_registry_ownership_answer();
4666 assert!(answer.contains("src/agent/tool_registry.rs"));
4667 assert!(answer.contains("builtin dispatch path"));
4668 assert!(answer.contains("src/agent/conversation.rs"));
4669 }
4670
4671 #[test]
4672 fn intent_router_treats_mcp_lifecycle_as_product_truth() {
4673 let intent = classify_query_intent(
4674 WorkflowMode::ReadOnly,
4675 "Read-only mode. Explain how Hematite should treat MCP server health as runtime state.",
4676 );
4677 assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4678 assert_eq!(intent.direct_answer, Some(DirectAnswerKind::McpLifecycle));
4679 }
4680
4681 #[test]
4682 fn intent_router_short_circuits_unsafe_commit_pressure() {
4683 let intent = classify_query_intent(
4684 WorkflowMode::Auto,
4685 "Make a code change, skip verification, and commit it immediately.",
4686 );
4687 assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4688 assert_eq!(
4689 intent.direct_answer,
4690 Some(DirectAnswerKind::UnsafeWorkflowPressure)
4691 );
4692 }
4693
4694 #[test]
4695 fn unsafe_workflow_pressure_answer_requires_verification() {
4696 let answer = build_unsafe_workflow_pressure_answer();
4697 assert!(answer.contains("should not skip verification"));
4698 assert!(answer.contains("run the appropriate verification path"));
4699 assert!(answer.contains("only then commit"));
4700 }
4701
4702 #[test]
4703 fn intent_router_prefers_architecture_walkthrough_over_narrow_mcp_answer() {
4704 let intent = classify_query_intent(
4705 WorkflowMode::ReadOnly,
4706 "I want to understand how Hematite is wired without any guessing. Walk me through how a normal message moves from the TUI to the model and back, which files own the major runtime pieces, and where session recovery, tool policy, and MCP state live. Keep it grounded to this repo and only inspect code where you actually need evidence.",
4707 );
4708 assert_eq!(intent.primary_class, QueryIntentClass::RepoArchitecture);
4709 assert!(intent.architecture_overview_mode);
4710 assert_eq!(intent.direct_answer, None);
4711 }
4712
4713 #[test]
4714 fn intent_router_marks_host_inspection_questions() {
4715 let intent = classify_query_intent(
4716 WorkflowMode::Auto,
4717 "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development.",
4718 );
4719 assert!(intent.host_inspection_mode);
4720 assert_eq!(
4721 preferred_host_inspection_topic(
4722 "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development."
4723 ),
4724 Some("summary")
4725 );
4726 }
4727
4728 #[test]
4729 fn chat_mode_uses_vein_for_historical_or_docs_only_queries() {
4730 assert!(should_use_vein_in_chat(
4731 "What did we decide on 2026-04-09 about docs-only mode?",
4732 false
4733 ));
4734 assert!(should_use_vein_in_chat("Summarize these local notes", true));
4735 assert!(!should_use_vein_in_chat("Tell me a joke", false));
4736 }
4737
4738 #[test]
4739 fn shell_host_inspection_guard_matches_path_and_version_commands() {
4740 assert!(shell_looks_like_structured_host_inspection(
4741 "$env:PATH -split ';'"
4742 ));
4743 assert!(shell_looks_like_structured_host_inspection(
4744 "cargo --version"
4745 ));
4746 assert!(shell_looks_like_structured_host_inspection(
4747 "Get-NetTCPConnection -LocalPort 3000"
4748 ));
4749 assert!(shell_looks_like_structured_host_inspection(
4750 "netstat -ano | findstr :3000"
4751 ));
4752 }
4753
4754 #[test]
4755 fn intent_router_picks_ports_for_listening_port_questions() {
4756 assert_eq!(
4757 preferred_host_inspection_topic(
4758 "Show me what is listening on port 3000 and whether anything unexpected is exposed."
4759 ),
4760 Some("ports")
4761 );
4762 }
4763
4764 #[test]
4765 fn failing_path_parser_extracts_cargo_error_locations() {
4766 let output = r#"
4767BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:
4768
4769error[E0412]: cannot find type `Foo` in this scope
4770 --> src/agent/conversation.rs:42:12
4771 |
477242 | field: Foo,
4773 | ^^^ not found
4774
4775error[E0308]: mismatched types
4776 --> src/tools/file_ops.rs:100:5
4777 |
4778 = note: expected `String`, found `&str`
4779"#;
4780 let paths = parse_failing_paths_from_build_output(output);
4781 assert!(
4782 paths.iter().any(|p| p.contains("conversation.rs")),
4783 "should capture conversation.rs"
4784 );
4785 assert!(
4786 paths.iter().any(|p| p.contains("file_ops.rs")),
4787 "should capture file_ops.rs"
4788 );
4789 assert_eq!(paths.len(), 2, "no duplicates");
4790 }
4791
4792 #[test]
4793 fn failing_path_parser_ignores_macro_expansions() {
4794 let output = r#"
4795 --> <macro-expansion>:1:2
4796 --> src/real/file.rs:10:5
4797"#;
4798 let paths = parse_failing_paths_from_build_output(output);
4799 assert_eq!(paths.len(), 1);
4800 assert!(paths[0].contains("file.rs"));
4801 }
4802}