1use crate::agent::architecture_summary::{
2 build_architecture_overview_answer, prune_architecture_trace_batch,
3 prune_authoritative_tool_batch, prune_read_only_context_bloat_batch,
4 summarize_project_map_output, summarize_runtime_trace_output,
5};
6use crate::agent::direct_answers::{
7 build_about_answer, build_architect_session_reset_plan, build_authorization_policy_answer,
8 build_gemma_native_answer, build_gemma_native_settings_answer, build_identity_answer,
9 build_language_capability_answer, build_mcp_lifecycle_answer, build_product_surface_answer,
10 build_reasoning_split_answer, build_recovery_recipes_answer, build_session_memory_answer,
11 build_session_reset_semantics_answer, build_tool_classes_answer,
12 build_tool_registry_ownership_answer, build_unsafe_workflow_pressure_answer,
13 build_verify_profiles_answer, build_workflow_modes_answer,
14};
15use crate::agent::inference::{
16 ChatMessage, InferenceEngine, InferenceEvent, MessageContent, OperatorCheckpointState,
17 ProviderRuntimeState, ToolCallFn, ToolDefinition, ToolFunction,
18};
19use crate::agent::policy::{
20 action_target_path, docs_edit_without_explicit_request, is_destructive_tool,
21 is_mcp_mutating_tool, is_mcp_workspace_read_tool, normalize_workspace_path,
22};
23use crate::agent::recovery_recipes::{
24 attempt_recovery, plan_recovery, preview_recovery_decision, RecoveryContext, RecoveryDecision,
25 RecoveryPlan, RecoveryScenario, RecoveryStep,
26};
27use crate::agent::routing::{
28 classify_query_intent, is_capability_probe_tool, looks_like_mutation_request,
29 preferred_host_inspection_topic, DirectAnswerKind, QueryIntentClass,
30};
31use crate::agent::tool_registry::dispatch_builtin_tool;
32use crate::agent::compaction::{self, CompactionConfig};
34use crate::ui::gpu_monitor::GpuState;
35
36use serde_json::Value;
37use std::sync::Arc;
38use tokio::sync::{mpsc, Mutex};
39#[derive(Clone, Debug, Default)]
42pub struct UserTurn {
43 pub text: String,
44 pub attached_document: Option<AttachedDocument>,
45 pub attached_image: Option<AttachedImage>,
46}
47
48#[derive(Clone, Debug)]
49pub struct AttachedDocument {
50 pub name: String,
51 pub content: String,
52}
53
54#[derive(Clone, Debug)]
55pub struct AttachedImage {
56 pub name: String,
57 pub path: String,
58}
59
60impl UserTurn {
61 pub fn text(text: impl Into<String>) -> Self {
62 Self {
63 text: text.into(),
64 attached_document: None,
65 attached_image: None,
66 }
67 }
68}
69
70#[derive(serde::Serialize, serde::Deserialize)]
71struct SavedSession {
72 running_summary: Option<String>,
73 #[serde(default)]
74 session_memory: crate::agent::compaction::SessionMemory,
75}
76
77#[derive(Default)]
78struct ActionGroundingState {
79 turn_index: u64,
80 observed_paths: std::collections::HashMap<String, u64>,
81 inspected_paths: std::collections::HashMap<String, u64>,
82 last_verify_build_turn: Option<u64>,
83 last_verify_build_ok: bool,
84 last_failed_build_paths: Vec<String>,
85 code_changed_since_verify: bool,
86}
87
88struct PlanExecutionGuard {
89 flag: Arc<std::sync::atomic::AtomicBool>,
90}
91
92impl Drop for PlanExecutionGuard {
93 fn drop(&mut self) {
94 self.flag.store(false, std::sync::atomic::Ordering::SeqCst);
95 }
96}
97
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
99pub(crate) enum WorkflowMode {
100 #[default]
101 Auto,
102 Ask,
103 Code,
104 Architect,
105 ReadOnly,
106 Chat,
109}
110
111impl WorkflowMode {
112 fn label(self) -> &'static str {
113 match self {
114 WorkflowMode::Auto => "AUTO",
115 WorkflowMode::Ask => "ASK",
116 WorkflowMode::Code => "CODE",
117 WorkflowMode::Architect => "ARCHITECT",
118 WorkflowMode::ReadOnly => "READ-ONLY",
119 WorkflowMode::Chat => "CHAT",
120 }
121 }
122
123 fn is_read_only(self) -> bool {
124 matches!(
125 self,
126 WorkflowMode::Ask | WorkflowMode::Architect | WorkflowMode::ReadOnly
127 )
128 }
129
130 pub(crate) fn is_chat(self) -> bool {
131 matches!(self, WorkflowMode::Chat)
132 }
133}
134
135fn session_path() -> std::path::PathBuf {
136 crate::tools::file_ops::workspace_root()
137 .join(".hematite")
138 .join("session.json")
139}
140
141fn load_session_data() -> (Option<String>, crate::agent::compaction::SessionMemory) {
142 let path = session_path();
143 if !path.exists() {
144 return (None, crate::agent::compaction::SessionMemory::default());
145 }
146 let Ok(data) = std::fs::read_to_string(&path) else {
147 return (None, crate::agent::compaction::SessionMemory::default());
148 };
149 let Ok(saved) = serde_json::from_str::<SavedSession>(&data) else {
150 return (None, crate::agent::compaction::SessionMemory::default());
151 };
152 (saved.running_summary, saved.session_memory)
153}
154
155fn reset_task_files() {
156 let root = crate::tools::file_ops::workspace_root();
157 let _ = std::fs::remove_file(root.join(".hematite").join("TASK.md"));
158 let _ = std::fs::remove_file(root.join(".hematite").join("PLAN.md"));
159 let _ = std::fs::remove_file(root.join(".hematite").join("WALKTHROUGH.md"));
160 let _ = std::fs::remove_file(root.join(".github").join("WALKTHROUGH.md"));
161 let _ = std::fs::write(root.join(".hematite").join("TASK.md"), "");
162 let _ = std::fs::write(root.join(".hematite").join("PLAN.md"), "");
163}
164
165fn purge_persistent_memory() {
166 let root = crate::tools::file_ops::workspace_root();
167 let mem_dir = root.join(".hematite").join("memories");
168 if mem_dir.exists() {
169 let _ = std::fs::remove_dir_all(&mem_dir);
170 let _ = std::fs::create_dir_all(&mem_dir);
171 }
172
173 let log_dir = root.join(".hematite_logs");
174 if log_dir.exists() {
175 if let Ok(entries) = std::fs::read_dir(&log_dir) {
176 for entry in entries.flatten() {
177 let _ = std::fs::write(entry.path(), "");
178 }
179 }
180 }
181}
182
183fn apply_turn_attachments(user_turn: &UserTurn, prompt: &str) -> String {
184 let mut out = prompt.trim().to_string();
185 if let Some(doc) = user_turn.attached_document.as_ref() {
186 out = format!(
187 "[Attached document: {}]\n\n{}\n\n---\n\n{}",
188 doc.name, doc.content, out
189 );
190 }
191 if let Some(image) = user_turn.attached_image.as_ref() {
192 out = if out.is_empty() {
193 format!("[Attached image: {}]", image.name)
194 } else {
195 format!("[Attached image: {}]\n\n{}", image.name, out)
196 };
197 }
198 out
199}
200
201fn transcript_user_turn_text(user_turn: &UserTurn, prompt: &str) -> String {
202 let mut prefixes = Vec::new();
203 if let Some(doc) = user_turn.attached_document.as_ref() {
204 prefixes.push(format!("[Attached document: {}]", doc.name));
205 }
206 if let Some(image) = user_turn.attached_image.as_ref() {
207 prefixes.push(format!("[Attached image: {}]", image.name));
208 }
209 if prefixes.is_empty() {
210 prompt.to_string()
211 } else if prompt.trim().is_empty() {
212 prefixes.join("\n")
213 } else {
214 format!("{}\n{}", prefixes.join("\n"), prompt)
215 }
216}
217
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219enum RuntimeFailureClass {
220 ContextWindow,
221 ProviderDegraded,
222 ToolArgMalformed,
223 ToolPolicyBlocked,
224 ToolLoop,
225 VerificationFailed,
226 EmptyModelResponse,
227 Unknown,
228}
229
230impl RuntimeFailureClass {
231 fn tag(self) -> &'static str {
232 match self {
233 RuntimeFailureClass::ContextWindow => "context_window",
234 RuntimeFailureClass::ProviderDegraded => "provider_degraded",
235 RuntimeFailureClass::ToolArgMalformed => "tool_arg_malformed",
236 RuntimeFailureClass::ToolPolicyBlocked => "tool_policy_blocked",
237 RuntimeFailureClass::ToolLoop => "tool_loop",
238 RuntimeFailureClass::VerificationFailed => "verification_failed",
239 RuntimeFailureClass::EmptyModelResponse => "empty_model_response",
240 RuntimeFailureClass::Unknown => "unknown",
241 }
242 }
243
244 fn operator_guidance(self) -> &'static str {
245 match self {
246 RuntimeFailureClass::ContextWindow => {
247 "Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
248 }
249 RuntimeFailureClass::ProviderDegraded => {
250 "Retry once automatically, then narrow the turn or restart LM Studio if it persists."
251 }
252 RuntimeFailureClass::ToolArgMalformed => {
253 "Retry with repaired or narrower tool arguments instead of repeating the same malformed call."
254 }
255 RuntimeFailureClass::ToolPolicyBlocked => {
256 "Stay inside the allowed workflow or switch modes before retrying."
257 }
258 RuntimeFailureClass::ToolLoop => {
259 "Stop repeating the same failing tool pattern and switch to a narrower recovery step."
260 }
261 RuntimeFailureClass::VerificationFailed => {
262 "Fix the build or test failure before treating the task as complete."
263 }
264 RuntimeFailureClass::EmptyModelResponse => {
265 "Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
266 }
267 RuntimeFailureClass::Unknown => {
268 "Inspect the latest grounded tool results or provider status before retrying."
269 }
270 }
271 }
272}
273
274fn classify_runtime_failure(detail: &str) -> RuntimeFailureClass {
275 let lower = detail.to_ascii_lowercase();
276 if lower.contains("context_window_blocked")
277 || lower.contains("context ceiling reached")
278 || lower.contains("exceeds the")
279 || ((lower.contains("n_keep") && lower.contains("n_ctx"))
280 || lower.contains("context length")
281 || lower.contains("keep from the initial prompt")
282 || lower.contains("prompt is greater than the context length"))
283 {
284 RuntimeFailureClass::ContextWindow
285 } else if lower.contains("empty response from model")
286 || lower.contains("model returned an empty response")
287 {
288 RuntimeFailureClass::EmptyModelResponse
289 } else if lower.contains("lm studio unreachable")
290 || lower.contains("lm studio error")
291 || lower.contains("request failed")
292 || lower.contains("response parse error")
293 || lower.contains("provider degraded")
294 {
295 RuntimeFailureClass::ProviderDegraded
296 } else if lower.contains("missing required argument")
297 || lower.contains("json repair failed")
298 || lower.contains("invalid pattern")
299 || lower.contains("invalid line range")
300 {
301 RuntimeFailureClass::ToolArgMalformed
302 } else if lower.contains("action blocked:")
303 || lower.contains("access denied")
304 || lower.contains("declined by user")
305 {
306 RuntimeFailureClass::ToolPolicyBlocked
307 } else if lower.contains("too many consecutive tool errors")
308 || lower.contains("repeated tool failures")
309 || lower.contains("stuck in a loop")
310 {
311 RuntimeFailureClass::ToolLoop
312 } else if lower.contains("build failed")
313 || lower.contains("verification failed")
314 || lower.contains("verify_build")
315 {
316 RuntimeFailureClass::VerificationFailed
317 } else {
318 RuntimeFailureClass::Unknown
319 }
320}
321
322fn format_runtime_failure(class: RuntimeFailureClass, detail: &str) -> String {
323 format!(
324 "[failure:{}] {} Detail: {}",
325 class.tag(),
326 class.operator_guidance(),
327 detail.trim()
328 )
329}
330
331fn provider_state_for_runtime_failure(class: RuntimeFailureClass) -> Option<ProviderRuntimeState> {
332 match class {
333 RuntimeFailureClass::ContextWindow => Some(ProviderRuntimeState::ContextWindow),
334 RuntimeFailureClass::ProviderDegraded => Some(ProviderRuntimeState::Degraded),
335 RuntimeFailureClass::EmptyModelResponse => Some(ProviderRuntimeState::EmptyResponse),
336 _ => None,
337 }
338}
339
340fn checkpoint_state_for_runtime_failure(
341 class: RuntimeFailureClass,
342) -> Option<OperatorCheckpointState> {
343 match class {
344 RuntimeFailureClass::ContextWindow => Some(OperatorCheckpointState::BlockedContextWindow),
345 RuntimeFailureClass::ToolPolicyBlocked => Some(OperatorCheckpointState::BlockedPolicy),
346 RuntimeFailureClass::ToolLoop => Some(OperatorCheckpointState::BlockedToolLoop),
347 RuntimeFailureClass::VerificationFailed => {
348 Some(OperatorCheckpointState::BlockedVerification)
349 }
350 _ => None,
351 }
352}
353
354fn compact_runtime_recovery_summary(class: RuntimeFailureClass) -> &'static str {
355 match class {
356 RuntimeFailureClass::ProviderDegraded => {
357 "LM Studio degraded during the turn; retrying once before surfacing a failure."
358 }
359 RuntimeFailureClass::EmptyModelResponse => {
360 "The model returned an empty reply; retrying once before surfacing a failure."
361 }
362 _ => "Runtime recovery in progress.",
363 }
364}
365
366fn checkpoint_summary_for_runtime_failure(class: RuntimeFailureClass) -> &'static str {
367 match class {
368 RuntimeFailureClass::ContextWindow => "Provider context ceiling confirmed.",
369 RuntimeFailureClass::ToolPolicyBlocked => "Policy blocked the current action.",
370 RuntimeFailureClass::ToolLoop => "Repeated failing tool pattern stopped.",
371 RuntimeFailureClass::VerificationFailed => "Verification failed; fix before continuing.",
372 _ => "Operator checkpoint updated.",
373 }
374}
375
376fn compact_runtime_failure_summary(class: RuntimeFailureClass) -> &'static str {
377 match class {
378 RuntimeFailureClass::ContextWindow => "LM context ceiling hit.",
379 RuntimeFailureClass::ProviderDegraded => {
380 "LM Studio degraded and did not recover cleanly; operator action is now required."
381 }
382 RuntimeFailureClass::EmptyModelResponse => {
383 "LM Studio returned an empty reply after recovery; operator action is now required."
384 }
385 RuntimeFailureClass::ToolLoop => {
386 "Repeated failing tool pattern detected; Hematite stopped the loop."
387 }
388 _ => "Runtime failure surfaced to the operator.",
389 }
390}
391
392fn should_retry_runtime_failure(class: RuntimeFailureClass) -> bool {
393 matches!(
394 class,
395 RuntimeFailureClass::ProviderDegraded | RuntimeFailureClass::EmptyModelResponse
396 )
397}
398
399fn recovery_scenario_for_runtime_failure(class: RuntimeFailureClass) -> Option<RecoveryScenario> {
400 match class {
401 RuntimeFailureClass::ContextWindow => Some(RecoveryScenario::ContextWindow),
402 RuntimeFailureClass::ProviderDegraded => Some(RecoveryScenario::ProviderDegraded),
403 RuntimeFailureClass::EmptyModelResponse => Some(RecoveryScenario::EmptyModelResponse),
404 RuntimeFailureClass::ToolPolicyBlocked => Some(RecoveryScenario::McpWorkspaceReadBlocked),
405 RuntimeFailureClass::ToolLoop => Some(RecoveryScenario::ToolLoop),
406 RuntimeFailureClass::VerificationFailed => Some(RecoveryScenario::VerificationFailed),
407 RuntimeFailureClass::ToolArgMalformed | RuntimeFailureClass::Unknown => None,
408 }
409}
410
411fn compact_recovery_plan_summary(plan: &RecoveryPlan) -> String {
412 format!(
413 "{} [{}]",
414 plan.recipe.scenario.label(),
415 plan.recipe.steps_summary()
416 )
417}
418
419fn compact_recovery_decision_summary(decision: &RecoveryDecision) -> String {
420 match decision {
421 RecoveryDecision::Attempt(plan) => compact_recovery_plan_summary(plan),
422 RecoveryDecision::Escalate {
423 recipe,
424 attempts_made,
425 ..
426 } => format!(
427 "{} escalated after {} / {} [{}]",
428 recipe.scenario.label(),
429 attempts_made,
430 recipe.max_attempts.max(1),
431 recipe.steps_summary()
432 ),
433 }
434}
435
436fn parse_failing_paths_from_build_output(output: &str) -> Vec<String> {
439 let root = crate::tools::file_ops::workspace_root();
440 let mut paths: Vec<String> = output
441 .lines()
442 .filter_map(|line| {
443 let trimmed = line.trim_start();
444 let after_arrow = trimmed.strip_prefix("--> ")?;
446 let file_part = after_arrow.split(':').next()?;
447 if file_part.is_empty() || file_part.starts_with('<') {
448 return None;
449 }
450 let p = std::path::Path::new(file_part);
451 let resolved = if p.is_absolute() {
452 p.to_path_buf()
453 } else {
454 root.join(p)
455 };
456 Some(resolved.to_string_lossy().replace('\\', "/").to_lowercase())
457 })
458 .collect();
459 paths.sort();
460 paths.dedup();
461 paths
462}
463
464fn build_mode_redirect_answer(mode: WorkflowMode) -> String {
465 match mode {
466 WorkflowMode::Ask => "Workflow mode ASK is read-only. I can inspect the code, explain what should change, or review the target area, but I will not modify files here. Switch to `/code` to implement the change, or `/auto` to let Hematite choose.".to_string(),
467 WorkflowMode::Architect => "Workflow mode ARCHITECT is plan-first. I can inspect the code and design the implementation approach, but I will not mutate files until you explicitly switch to `/code` or ask me to implement.".to_string(),
468 WorkflowMode::ReadOnly => "Workflow mode READ-ONLY is a hard no-mutation mode. I can analyze, inspect, and explain, but I will not edit files, run mutating shell commands, or commit changes. Switch to `/code` or `/auto` if you want implementation.".to_string(),
469 _ => "Switch to `/code` or `/auto` to allow implementation.".to_string(),
470 }
471}
472
473fn architect_handoff_contract() -> &'static str {
474 "ARCHITECT OUTPUT CONTRACT:\n\
475Use a compact implementation handoff, not a process narrative.\n\
476Do not say \"the first step\" or describe what you are about to do.\n\
477After one or two read-only inspection tools at most, stop and answer.\n\
478For runtime wiring, reset behavior, or control-flow questions, prefer `trace_runtime_flow` over a broad `map_project` scan.\n\
479If you do use `map_project`, keep it minimal and scoped.\n\
480Use these exact ASCII headings and keep each section short:\n\
481# Goal\n\
482# Target Files\n\
483# Ordered Steps\n\
484# Verification\n\
485# Risks\n\
486# Open Questions\n\
487Keep the whole handoff concise and implementation-oriented."
488}
489
490fn is_current_plan_execution_request(user_input: &str) -> bool {
491 let lower = user_input.trim().to_ascii_lowercase();
492 lower == "implement the current plan."
493 || lower == "implement the current plan"
494 || lower.contains("implement the current plan")
495}
496
497fn is_plan_scoped_tool(name: &str) -> bool {
498 crate::agent::inference::tool_metadata_for_name(name).plan_scope
499}
500
501fn is_current_plan_irrelevant_tool(name: &str) -> bool {
502 !crate::agent::inference::tool_metadata_for_name(name).plan_scope
503}
504
505fn is_non_mutating_plan_step_tool(name: &str) -> bool {
506 let metadata = crate::agent::inference::tool_metadata_for_name(name);
507 metadata.plan_scope && !metadata.mutates_workspace
508}
509
510fn parse_inline_workflow_prompt(user_input: &str) -> Option<(WorkflowMode, &str)> {
511 let trimmed = user_input.trim();
512 for (prefix, mode) in [
513 ("/ask", WorkflowMode::Ask),
514 ("/code", WorkflowMode::Code),
515 ("/architect", WorkflowMode::Architect),
516 ("/read-only", WorkflowMode::ReadOnly),
517 ("/auto", WorkflowMode::Auto),
518 ] {
519 if let Some(rest) = trimmed.strip_prefix(prefix) {
520 let rest = rest.trim();
521 if !rest.is_empty() {
522 return Some((mode, rest));
523 }
524 }
525 }
526 None
527}
528
529pub fn get_tools() -> Vec<ToolDefinition> {
533 crate::agent::tool_registry::get_tools()
534}
535
536pub struct ConversationManager {
537 pub history: Vec<ChatMessage>,
539 pub engine: Arc<InferenceEngine>,
540 pub tools: Vec<ToolDefinition>,
541 pub mcp_manager: Arc<Mutex<crate::agent::mcp_manager::McpManager>>,
542 pub professional: bool,
543 pub brief: bool,
544 pub snark: u8,
545 pub chaos: u8,
546 pub fast_model: Option<String>,
548 pub think_model: Option<String>,
550 pub correction_hints: Vec<String>,
552 pub running_summary: Option<String>,
554 pub gpu_state: Arc<GpuState>,
556 pub vein: crate::memory::vein::Vein,
558 pub transcript: crate::agent::transcript::TranscriptLogger,
560 pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
562 pub git_state: Arc<crate::agent::git_monitor::GitState>,
564 pub think_mode: Option<bool>,
567 workflow_mode: WorkflowMode,
568 pub session_memory: crate::agent::compaction::SessionMemory,
570 pub swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
571 pub voice_manager: Arc<crate::ui::voice::VoiceManager>,
572 pub soul_personality: String,
574 pub lsp_manager: Arc<Mutex<crate::agent::lsp::manager::LspManager>>,
575 pub reasoning_history: Option<String>,
577 pub pinned_files: Arc<Mutex<std::collections::HashMap<String, String>>>,
579 action_grounding: Arc<Mutex<ActionGroundingState>>,
581 plan_execution_active: Arc<std::sync::atomic::AtomicBool>,
583 recovery_context: RecoveryContext,
585 pub l1_context: Option<String>,
588}
589
590impl ConversationManager {
591 fn vein_docs_only_mode(&self) -> bool {
592 !crate::tools::file_ops::is_project_workspace()
593 }
594
595 fn refresh_vein_index(&mut self) -> usize {
596 let count = if self.vein_docs_only_mode() {
597 let root = crate::tools::file_ops::workspace_root();
598 tokio::task::block_in_place(|| self.vein.index_workspace_artifacts(&root))
599 } else {
600 tokio::task::block_in_place(|| self.vein.index_project())
601 };
602 self.l1_context = self.vein.l1_context();
603 count
604 }
605
606 fn build_vein_inspection_report(&self, indexed_this_pass: usize) -> String {
607 let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(8));
608 let workspace_mode = if self.vein_docs_only_mode() {
609 "docs-only (outside a project workspace)"
610 } else {
611 "project workspace"
612 };
613 let active_room = snapshot.active_room.as_deref().unwrap_or("none");
614 let mut out = format!(
615 "Vein Inspection\n\
616 Workspace mode: {workspace_mode}\n\
617 Indexed this pass: {indexed_this_pass}\n\
618 Indexed source files: {}\n\
619 Indexed docs: {}\n\
620 Indexed session exchanges: {}\n\
621 Embedded source/doc chunks: {}\n\
622 Embeddings available: {}\n\
623 Active room bias: {active_room}\n\
624 L1 hot-files block: {}\n",
625 snapshot.indexed_source_files,
626 snapshot.indexed_docs,
627 snapshot.indexed_session_exchanges,
628 snapshot.embedded_source_doc_chunks,
629 if snapshot.has_any_embeddings {
630 "yes"
631 } else {
632 "no"
633 },
634 if snapshot.l1_ready {
635 "ready"
636 } else {
637 "not built yet"
638 },
639 );
640
641 if snapshot.hot_files.is_empty() {
642 out.push_str("Hot files: none yet.\n");
643 return out;
644 }
645
646 out.push_str("\nHot files by room:\n");
647 let mut by_room: std::collections::BTreeMap<&str, Vec<&crate::memory::vein::VeinHotFile>> =
648 std::collections::BTreeMap::new();
649 for file in &snapshot.hot_files {
650 by_room.entry(file.room.as_str()).or_default().push(file);
651 }
652 for (room, files) in by_room {
653 out.push_str(&format!("[{}]\n", room));
654 for file in files {
655 out.push_str(&format!(
656 "- {} [{} edit{}]\n",
657 file.path,
658 file.heat,
659 if file.heat == 1 { "" } else { "s" }
660 ));
661 }
662 }
663
664 out
665 }
666
667 fn latest_user_prompt(&self) -> Option<&str> {
668 self.history
669 .iter()
670 .rev()
671 .find(|msg| msg.role == "user")
672 .map(|msg| msg.content.as_str())
673 }
674
675 async fn emit_direct_response(
676 &mut self,
677 tx: &mpsc::Sender<InferenceEvent>,
678 raw_user_input: &str,
679 effective_user_input: &str,
680 response: &str,
681 ) {
682 self.history.push(ChatMessage::user(effective_user_input));
683 self.history.push(ChatMessage::assistant_text(response));
684 self.transcript.log_user(raw_user_input);
685 self.transcript.log_agent(response);
686 for chunk in chunk_text(response, 8) {
687 if !chunk.is_empty() {
688 let _ = tx.send(InferenceEvent::Token(chunk)).await;
689 }
690 }
691 let _ = tx.send(InferenceEvent::Done).await;
692 self.trim_history(80);
693 self.refresh_session_memory();
694 self.save_session();
695 }
696
697 async fn emit_operator_checkpoint(
698 &mut self,
699 tx: &mpsc::Sender<InferenceEvent>,
700 state: OperatorCheckpointState,
701 summary: impl Into<String>,
702 ) {
703 let summary = summary.into();
704 self.session_memory
705 .record_checkpoint(state.label(), summary.clone());
706 let _ = tx
707 .send(InferenceEvent::OperatorCheckpoint { state, summary })
708 .await;
709 }
710
711 async fn emit_recovery_recipe_summary(
712 &mut self,
713 tx: &mpsc::Sender<InferenceEvent>,
714 state: impl Into<String>,
715 summary: impl Into<String>,
716 ) {
717 let state = state.into();
718 let summary = summary.into();
719 self.session_memory.record_recovery(state, summary.clone());
720 let _ = tx.send(InferenceEvent::RecoveryRecipe { summary }).await;
721 }
722
723 async fn emit_provider_live(&mut self, tx: &mpsc::Sender<InferenceEvent>) {
724 let _ = tx
725 .send(InferenceEvent::ProviderStatus {
726 state: ProviderRuntimeState::Live,
727 summary: String::new(),
728 })
729 .await;
730 self.emit_operator_checkpoint(tx, OperatorCheckpointState::Idle, "")
731 .await;
732 }
733
734 async fn emit_prompt_pressure_for_messages(
735 &self,
736 tx: &mpsc::Sender<InferenceEvent>,
737 messages: &[ChatMessage],
738 ) {
739 let context_length = self.engine.current_context_length();
740 let (estimated_input_tokens, reserved_output_tokens, estimated_total_tokens, percent) =
741 crate::agent::inference::estimate_prompt_pressure(
742 messages,
743 &self.tools,
744 context_length,
745 );
746 let _ = tx
747 .send(InferenceEvent::PromptPressure {
748 estimated_input_tokens,
749 reserved_output_tokens,
750 estimated_total_tokens,
751 context_length,
752 percent,
753 })
754 .await;
755 }
756
757 async fn emit_prompt_pressure_idle(&self, tx: &mpsc::Sender<InferenceEvent>) {
758 let context_length = self.engine.current_context_length();
759 let _ = tx
760 .send(InferenceEvent::PromptPressure {
761 estimated_input_tokens: 0,
762 reserved_output_tokens: 0,
763 estimated_total_tokens: 0,
764 context_length,
765 percent: 0,
766 })
767 .await;
768 }
769
770 async fn emit_compaction_pressure(&self, tx: &mpsc::Sender<InferenceEvent>) {
771 let context_length = self.engine.current_context_length();
772 let vram_ratio = self.gpu_state.ratio();
773 let config = CompactionConfig::adaptive(context_length, vram_ratio);
774 let estimated_tokens = compaction::estimate_compactable_tokens(&self.history);
775 let percent = if config.max_estimated_tokens == 0 {
776 0
777 } else {
778 ((estimated_tokens.saturating_mul(100)) / config.max_estimated_tokens).min(100) as u8
779 };
780
781 let _ = tx
782 .send(InferenceEvent::CompactionPressure {
783 estimated_tokens,
784 threshold_tokens: config.max_estimated_tokens,
785 percent,
786 })
787 .await;
788 }
789
790 async fn refresh_runtime_profile_and_report(
791 &mut self,
792 tx: &mpsc::Sender<InferenceEvent>,
793 reason: &str,
794 ) -> Option<(String, usize, bool)> {
795 let refreshed = self.engine.refresh_runtime_profile().await;
796 if let Some((model_id, context_length, changed)) = refreshed.as_ref() {
797 let _ = tx
798 .send(InferenceEvent::RuntimeProfile {
799 model_id: model_id.clone(),
800 context_length: *context_length,
801 })
802 .await;
803 self.transcript.log_system(&format!(
804 "Runtime profile refresh ({}): model={} ctx={} changed={}",
805 reason, model_id, context_length, changed
806 ));
807 }
808 refreshed
809 }
810
811 pub fn new(
812 engine: Arc<InferenceEngine>,
813 professional: bool,
814 brief: bool,
815 snark: u8,
816 chaos: u8,
817 soul_personality: String,
818 fast_model: Option<String>,
819 think_model: Option<String>,
820 gpu_state: Arc<GpuState>,
821 git_state: Arc<crate::agent::git_monitor::GitState>,
822 swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
823 voice_manager: Arc<crate::ui::voice::VoiceManager>,
824 ) -> Self {
825 let (saved_summary, saved_memory) = load_session_data();
826
827 let mcp_manager = Arc::new(tokio::sync::Mutex::new(
829 crate::agent::mcp_manager::McpManager::new(),
830 ));
831
832 let dynamic_instructions =
834 engine.build_system_prompt(snark, chaos, brief, professional, &[], None, &[]);
835
836 let history = vec![ChatMessage::system(&dynamic_instructions)];
837
838 let vein_path = crate::tools::file_ops::workspace_root()
839 .join(".hematite")
840 .join("vein.db");
841 let vein_base_url = engine.base_url.clone();
842 let vein = crate::memory::vein::Vein::new(&vein_path, vein_base_url.clone())
843 .unwrap_or_else(|_| crate::memory::vein::Vein::new(":memory:", vein_base_url).unwrap());
844
845 Self {
846 history,
847 engine,
848 tools: get_tools(),
849 mcp_manager,
850 professional,
851 brief,
852 snark,
853 chaos,
854 fast_model,
855 think_model,
856 correction_hints: Vec::new(),
857 running_summary: saved_summary,
858 gpu_state,
859 vein,
860 transcript: crate::agent::transcript::TranscriptLogger::new(),
861 cancel_token: Arc::new(std::sync::atomic::AtomicBool::new(false)),
862 git_state,
863 think_mode: None,
864 workflow_mode: WorkflowMode::Auto,
865 session_memory: saved_memory,
866 swarm_coordinator,
867 voice_manager,
868 soul_personality,
869 lsp_manager: Arc::new(Mutex::new(crate::agent::lsp::manager::LspManager::new(
870 crate::tools::file_ops::workspace_root(),
871 ))),
872 reasoning_history: None,
873 pinned_files: Arc::new(Mutex::new(std::collections::HashMap::new())),
874 action_grounding: Arc::new(Mutex::new(ActionGroundingState::default())),
875 plan_execution_active: Arc::new(std::sync::atomic::AtomicBool::new(false)),
876 recovery_context: RecoveryContext::default(),
877 l1_context: None,
878 }
879 }
880
881 pub fn initialize_vein(&mut self) -> usize {
884 self.refresh_vein_index()
885 }
886
887 fn save_session(&self) {
888 let path = session_path();
889 if let Some(parent) = path.parent() {
890 let _ = std::fs::create_dir_all(parent);
891 }
892 let saved = SavedSession {
893 running_summary: self.running_summary.clone(),
894 session_memory: self.session_memory.clone(),
895 };
896 if let Ok(json) = serde_json::to_string(&saved) {
897 let _ = std::fs::write(&path, json);
898 }
899 }
900
901 fn save_empty_session(&self) {
902 let path = session_path();
903 if let Some(parent) = path.parent() {
904 let _ = std::fs::create_dir_all(parent);
905 }
906 let saved = SavedSession {
907 running_summary: None,
908 session_memory: crate::agent::compaction::SessionMemory::default(),
909 };
910 if let Ok(json) = serde_json::to_string(&saved) {
911 let _ = std::fs::write(&path, json);
912 }
913 }
914
915 fn refresh_session_memory(&mut self) {
916 let current_plan = self.session_memory.current_plan.clone();
917 let previous_memory = self.session_memory.clone();
918 self.session_memory = compaction::extract_memory(&self.history);
919 self.session_memory.current_plan = current_plan;
920 self.session_memory
921 .inherit_runtime_ledger_from(&previous_memory);
922 }
923
924 fn build_chat_system_prompt(&self) -> String {
925 let species = &self.engine.species;
926 let personality = &self.soul_personality;
927 format!(
928 "You are {species}, a local AI companion running entirely on the user's GPU — no cloud, no subscriptions, no phoning home.\n\
929 {personality}\n\n\
930 This is CHAT mode — a clean conversational surface. Behave like a sharp friend who happens to know everything about code, not like an agent following a workflow.\n\n\
931 Rules:\n\
932 - Talk like a person. Skip the bullet-point breakdowns unless the topic genuinely needs structure.\n\
933 - Answer directly. One paragraph is usually right.\n\
934 - Don't call tools unless the user explicitly asks you to look at a file or run something.\n\
935 - Don't narrate your reasoning or mention tool names unprompted.\n\
936 - You can discuss code, debug ideas, explain concepts, help plan, or just talk.\n\
937 - If the user clearly wants you to edit or build something, do it — but lead with conversation, not scaffolding.\n\
938 - If the user wants the full coding harness, they can type `/agent`.\n",
939 )
940 }
941
942 fn append_session_handoff(&self, system_msg: &mut String) {
943 let has_summary = self
944 .running_summary
945 .as_ref()
946 .map(|s| !s.trim().is_empty())
947 .unwrap_or(false);
948 let has_memory = self.session_memory.has_signal();
949
950 if !has_summary && !has_memory {
951 return;
952 }
953
954 system_msg.push_str(
955 "\n\n# LIGHTWEIGHT SESSION HANDOFF\n\
956 This is compact carry-over from earlier work on this machine.\n\
957 Use it only when it helps the current request.\n\
958 Prefer current repository state, pinned files, and fresh tool results over stale session memory.\n",
959 );
960
961 if has_memory {
962 system_msg.push_str("\n## Active Task Memory\n");
963 system_msg.push_str(&self.session_memory.to_prompt());
964 }
965
966 if let Some(summary) = self.running_summary.as_deref() {
967 if !summary.trim().is_empty() {
968 system_msg.push_str("\n## Compacted Session Summary\n");
969 system_msg.push_str(summary);
970 system_msg.push('\n');
971 }
972 }
973 }
974
975 fn set_workflow_mode(&mut self, mode: WorkflowMode) {
976 self.workflow_mode = mode;
977 }
978
979 fn current_plan_summary(&self) -> Option<String> {
980 self.session_memory
981 .current_plan
982 .as_ref()
983 .filter(|plan| plan.has_signal())
984 .map(|plan| plan.summary_line())
985 }
986
987 fn current_plan_allowed_paths(&self) -> Vec<String> {
988 self.session_memory
989 .current_plan
990 .as_ref()
991 .map(|plan| {
992 plan.target_files
993 .iter()
994 .map(|path| normalize_workspace_path(path))
995 .collect()
996 })
997 .unwrap_or_default()
998 }
999
1000 fn persist_architect_handoff(&mut self, response: &str) {
1001 if self.workflow_mode != WorkflowMode::Architect {
1002 return;
1003 }
1004 let Some(plan) = crate::tools::plan::parse_plan_handoff(response) else {
1005 return;
1006 };
1007 let _ = crate::tools::plan::save_plan_handoff(&plan);
1008 self.session_memory.current_plan = Some(plan);
1009 }
1010
1011 async fn begin_grounded_turn(&self) -> u64 {
1012 let mut state = self.action_grounding.lock().await;
1013 state.turn_index += 1;
1014 state.turn_index
1015 }
1016
1017 async fn reset_action_grounding(&self) {
1018 let mut state = self.action_grounding.lock().await;
1019 *state = ActionGroundingState::default();
1020 }
1021
1022 async fn record_read_observation(&self, path: &str) {
1023 let normalized = normalize_workspace_path(path);
1024 let mut state = self.action_grounding.lock().await;
1025 let turn = state.turn_index;
1026 state.observed_paths.insert(normalized.clone(), turn);
1030 state.inspected_paths.insert(normalized, turn);
1031 }
1032
1033 async fn record_line_inspection(&self, path: &str) {
1034 let normalized = normalize_workspace_path(path);
1035 let mut state = self.action_grounding.lock().await;
1036 let turn = state.turn_index;
1037 state.observed_paths.insert(normalized.clone(), turn);
1038 state.inspected_paths.insert(normalized, turn);
1039 }
1040
1041 async fn record_verify_build_result(&self, ok: bool, output: &str) {
1042 let mut state = self.action_grounding.lock().await;
1043 let turn = state.turn_index;
1044 state.last_verify_build_turn = Some(turn);
1045 state.last_verify_build_ok = ok;
1046 if ok {
1047 state.code_changed_since_verify = false;
1048 state.last_failed_build_paths.clear();
1049 } else {
1050 state.last_failed_build_paths = parse_failing_paths_from_build_output(output);
1051 }
1052 }
1053
1054 fn record_session_verification(&mut self, ok: bool, summary: impl Into<String>) {
1055 self.session_memory.record_verification(ok, summary);
1056 }
1057
1058 async fn record_successful_mutation(&self, path: Option<&str>) {
1059 let mut state = self.action_grounding.lock().await;
1060 state.code_changed_since_verify = match path {
1061 Some(p) => is_code_like_path(p),
1062 None => true,
1063 };
1064 }
1065
1066 async fn validate_action_preconditions(&self, name: &str, args: &Value) -> Result<(), String> {
1067 if self
1068 .plan_execution_active
1069 .load(std::sync::atomic::Ordering::SeqCst)
1070 {
1071 if is_current_plan_irrelevant_tool(name) {
1072 return Err(format!(
1073 "Action blocked: `{}` is not part of current-plan execution. Stay on the saved target files, use built-in workspace file tools only, and either make a concrete edit or surface one specific blocker.",
1074 name
1075 ));
1076 }
1077
1078 if name == "map_project" {
1079 return Err(
1080 "Action blocked: `map_project` is too broad for current-plan execution. Use the target files from the saved plan and inspect them directly with built-in workspace tools."
1081 .to_string(),
1082 );
1083 }
1084
1085 if is_plan_scoped_tool(name) {
1086 let allowed_paths = self.current_plan_allowed_paths();
1087 if !allowed_paths.is_empty() {
1088 let in_allowed = match name {
1089 "auto_pin_context" => args
1090 .get("paths")
1091 .and_then(|v| v.as_array())
1092 .map(|paths| {
1093 !paths.is_empty()
1094 && paths.iter().all(|v| {
1095 v.as_str()
1096 .map(normalize_workspace_path)
1097 .map(|p| allowed_paths.contains(&p))
1098 .unwrap_or(false)
1099 })
1100 })
1101 .unwrap_or(false),
1102 "grep_files" | "list_files" => args
1103 .get("path")
1104 .and_then(|v| v.as_str())
1105 .map(normalize_workspace_path)
1106 .map(|p| allowed_paths.contains(&p))
1107 .unwrap_or(false),
1108 _ => action_target_path(name, args)
1109 .map(|p| allowed_paths.contains(&p))
1110 .unwrap_or(false),
1111 };
1112
1113 if !in_allowed {
1114 let allowed = allowed_paths
1115 .iter()
1116 .map(|p| format!("`{}`", p))
1117 .collect::<Vec<_>>()
1118 .join(", ");
1119 return Err(format!(
1120 "Action blocked: current-plan execution is locked to the saved target files. Use a path-scoped built-in tool on one of these files only: {}.",
1121 allowed
1122 ));
1123 }
1124 }
1125 }
1126
1127 if matches!(name, "edit_file" | "multi_search_replace" | "patch_hunk") {
1128 if let Some(target) = action_target_path(name, args) {
1129 let state = self.action_grounding.lock().await;
1130 let recently_inspected = state
1131 .inspected_paths
1132 .get(&target)
1133 .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1134 .unwrap_or(false);
1135 drop(state);
1136 if !recently_inspected {
1137 return Err(format!(
1138 "Action blocked: `{}` on '{}' requires an exact local line window first during current-plan execution. Use `inspect_lines` on that file around the intended edit region, then retry the mutation.",
1139 name, target
1140 ));
1141 }
1142 }
1143 }
1144 }
1145
1146 if self.workflow_mode.is_read_only() && name == "auto_pin_context" {
1147 return Err(
1148 "Action blocked: `auto_pin_context` is disabled in read-only workflows. Use the grounded file evidence you already have, or narrow with `inspect_lines` instead of pinning more files into active context."
1149 .to_string(),
1150 );
1151 }
1152
1153 if self.workflow_mode.is_read_only() && is_destructive_tool(name) {
1154 if name == "shell" {
1155 let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1156 let risk = crate::tools::guard::classify_bash_risk(command);
1157 if !matches!(risk, crate::tools::RiskLevel::Safe) {
1158 return Err(format!(
1159 "Action blocked: workflow mode `{}` is read-only for risky or mutating operations. Switch to `/code` or `/auto` before making changes.",
1160 self.workflow_mode.label()
1161 ));
1162 }
1163 } else {
1164 return Err(format!(
1165 "Action blocked: workflow mode `{}` is read-only. Use `/code` to implement changes or `/auto` to leave mode selection to Hematite.",
1166 self.workflow_mode.label()
1167 ));
1168 }
1169 }
1170
1171 let normalized_target = action_target_path(name, args);
1172 if let Some(target) = normalized_target.as_deref() {
1173 if matches!(
1174 name,
1175 "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1176 ) {
1177 if let Some(prompt) = self.latest_user_prompt() {
1178 if docs_edit_without_explicit_request(prompt, target) {
1179 return Err(format!(
1180 "Action blocked: '{}' is a docs file but the current request did not explicitly ask for documentation changes. Finish the code task first. If docs need updating, the user will ask.",
1181 target
1182 ));
1183 }
1184 }
1185 }
1186 let path_exists = std::path::Path::new(target).exists();
1187 if path_exists {
1188 let state = self.action_grounding.lock().await;
1189 let pinned = self.pinned_files.lock().await;
1190 let pinned_match = pinned.keys().any(|p| normalize_workspace_path(p) == target);
1191 drop(pinned);
1192
1193 let needs_exact_window = matches!(name, "edit_file" | "multi_search_replace");
1198 let recently_inspected = state
1199 .inspected_paths
1200 .get(target)
1201 .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1202 .unwrap_or(false);
1203 let same_turn_read = state
1204 .observed_paths
1205 .get(target)
1206 .map(|turn| state.turn_index.saturating_sub(*turn) == 0)
1207 .unwrap_or(false);
1208 let recent_observed = state
1209 .observed_paths
1210 .get(target)
1211 .map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
1212 .unwrap_or(false);
1213
1214 if needs_exact_window {
1215 if !recently_inspected && !same_turn_read && !pinned_match {
1216 return Err(format!(
1217 "Action blocked: `{}` on '{}' requires a line-level inspection first. \
1218 Use `inspect_lines` on the target region to get the exact current text \
1219 (whitespace and indentation included), then retry the edit.",
1220 name, target
1221 ));
1222 }
1223 } else if !recent_observed && !pinned_match {
1224 return Err(format!(
1225 "Action blocked: `{}` on '{}' requires recent file evidence. Use `read_file` or `inspect_lines` on that path first, or pin the file into active context.",
1226 name, target
1227 ));
1228 }
1229 }
1230 }
1231
1232 if is_mcp_mutating_tool(name) {
1233 return Err(format!(
1234 "Action blocked: `{}` is an external MCP mutation tool. For workspace file edits, prefer Hematite's built-in edit path (`read_file`/`inspect_lines` plus `patch_hunk`, `edit_file`, or `multi_search_replace`) unless the user explicitly requires MCP for that action.",
1235 name
1236 ));
1237 }
1238
1239 if is_mcp_workspace_read_tool(name) {
1240 return Err(format!(
1241 "Action blocked: `{}` is an external MCP filesystem read tool. For local workspace inspection, prefer Hematite's built-in read path (`read_file`, `inspect_lines`, `list_files`, or `grep_files`) unless the user explicitly requires MCP for that action.",
1242 name
1243 ));
1244 }
1245
1246 if matches!(
1249 name,
1250 "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
1251 ) {
1252 if let Some(target) = normalized_target.as_deref() {
1253 let state = self.action_grounding.lock().await;
1254 if state.code_changed_since_verify
1255 && !state.last_verify_build_ok
1256 && !state.last_failed_build_paths.is_empty()
1257 && !state.last_failed_build_paths.iter().any(|p| p == target)
1258 {
1259 let files = state
1260 .last_failed_build_paths
1261 .iter()
1262 .map(|p| format!("`{}`", p))
1263 .collect::<Vec<_>>()
1264 .join(", ");
1265 return Err(format!(
1266 "Action blocked: the build is broken. Fix the errors in {} before editing other files. Run `verify_build` to confirm the fix, then continue.",
1267 files
1268 ));
1269 }
1270 }
1271 }
1272
1273 if name == "git_commit" || name == "git_push" {
1274 let state = self.action_grounding.lock().await;
1275 if state.code_changed_since_verify && !state.last_verify_build_ok {
1276 return Err(format!(
1277 "Action blocked: `{}` requires a successful `verify_build` after the latest code edits. Run verification first so Hematite has proof that the tree is build-clean.",
1278 name
1279 ));
1280 }
1281 }
1282
1283 if name == "shell" {
1284 let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
1285 if let Some(prompt) = self.latest_user_prompt() {
1286 if let Some(topic) = preferred_host_inspection_topic(prompt) {
1287 if shell_looks_like_structured_host_inspection(command) {
1288 return Err(format!(
1289 "Action blocked: this is a host-inspection question. Prefer `inspect_host(topic: \"{}\")` instead of raw `shell` for PATH, toolchains, environment/package-manager health, network state, service state, running processes, desktop, Downloads, listening ports, repo-doctor checks, or directory/disk summaries. Use `shell` only if `inspect_host` cannot answer the question directly.",
1290 topic
1291 ));
1292 }
1293 }
1294 }
1295 let reason = args
1296 .get("reason")
1297 .and_then(|v| v.as_str())
1298 .unwrap_or("")
1299 .trim();
1300 let risk = crate::tools::guard::classify_bash_risk(command);
1301 if !matches!(risk, crate::tools::RiskLevel::Safe) && reason.is_empty() {
1302 return Err(
1303 "Action blocked: risky `shell` calls require a concrete `reason` argument that explains what is being verified or changed."
1304 .to_string(),
1305 );
1306 }
1307 }
1308
1309 Ok(())
1310 }
1311
1312 fn build_action_receipt(
1313 &self,
1314 name: &str,
1315 args: &Value,
1316 output: &str,
1317 is_error: bool,
1318 ) -> Option<ChatMessage> {
1319 if is_error || !is_destructive_tool(name) {
1320 return None;
1321 }
1322
1323 let mut receipt = String::from("[ACTION RECEIPT]\n");
1324 receipt.push_str(&format!("- tool: {}\n", name));
1325 if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
1326 receipt.push_str(&format!("- target: {}\n", path));
1327 }
1328 if name == "shell" {
1329 if let Some(command) = args.get("command").and_then(|v| v.as_str()) {
1330 receipt.push_str(&format!("- command: {}\n", command));
1331 }
1332 if let Some(reason) = args.get("reason").and_then(|v| v.as_str()) {
1333 if !reason.trim().is_empty() {
1334 receipt.push_str(&format!("- reason: {}\n", reason.trim()));
1335 }
1336 }
1337 }
1338 let first_line = output.lines().next().unwrap_or(output).trim();
1339 receipt.push_str(&format!("- outcome: {}\n", first_line));
1340 Some(ChatMessage::system(&receipt))
1341 }
1342
1343 fn replace_mcp_tool_definitions(&mut self, mcp_tools: &[crate::agent::mcp::McpTool]) {
1344 self.tools
1345 .retain(|tool| !tool.function.name.starts_with("mcp__"));
1346 self.tools
1347 .extend(mcp_tools.iter().map(|tool| ToolDefinition {
1348 tool_type: "function".into(),
1349 function: ToolFunction {
1350 name: tool.name.clone(),
1351 description: tool.description.clone().unwrap_or_default(),
1352 parameters: tool.input_schema.clone(),
1353 },
1354 metadata: crate::agent::inference::tool_metadata_for_name(&tool.name),
1355 }));
1356 }
1357
1358 async fn emit_mcp_runtime_status(&self, tx: &mpsc::Sender<InferenceEvent>) {
1359 let summary = {
1360 let mcp = self.mcp_manager.lock().await;
1361 mcp.runtime_report()
1362 };
1363 let _ = tx
1364 .send(InferenceEvent::McpStatus {
1365 state: summary.state,
1366 summary: summary.summary,
1367 })
1368 .await;
1369 }
1370
1371 async fn refresh_mcp_tools(
1372 &mut self,
1373 tx: &mpsc::Sender<InferenceEvent>,
1374 ) -> Result<Vec<crate::agent::mcp::McpTool>, Box<dyn std::error::Error + Send + Sync>> {
1375 let mcp_tools = {
1376 let mut mcp = self.mcp_manager.lock().await;
1377 match mcp.initialize_all().await {
1378 Ok(()) => mcp.discover_tools().await,
1379 Err(e) => {
1380 drop(mcp);
1381 self.replace_mcp_tool_definitions(&[]);
1382 self.emit_mcp_runtime_status(tx).await;
1383 return Err(e.into());
1384 }
1385 }
1386 };
1387
1388 self.replace_mcp_tool_definitions(&mcp_tools);
1389 self.emit_mcp_runtime_status(tx).await;
1390 Ok(mcp_tools)
1391 }
1392
1393 pub async fn initialize_mcp(
1395 &mut self,
1396 tx: &mpsc::Sender<InferenceEvent>,
1397 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1398 let _ = self.refresh_mcp_tools(tx).await?;
1399 Ok(())
1400 }
1401
1402 pub async fn run_turn(
1408 &mut self,
1409 user_turn: &UserTurn,
1410 tx: mpsc::Sender<InferenceEvent>,
1411 yolo: bool,
1412 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
1413 let user_input = user_turn.text.as_str();
1414 if user_input.trim() == "/new" {
1416 self.history.clear();
1417 self.reasoning_history = None;
1418 self.session_memory.clear();
1419 self.running_summary = None;
1420 self.correction_hints.clear();
1421 self.pinned_files.lock().await.clear();
1422 self.reset_action_grounding().await;
1423 reset_task_files();
1424 let _ = std::fs::remove_file(session_path());
1425 self.save_empty_session();
1426 self.emit_compaction_pressure(&tx).await;
1427 self.emit_prompt_pressure_idle(&tx).await;
1428 for chunk in chunk_text(
1429 "Fresh task context started. Chat history, pins, and task files cleared. Saved memory remains available.",
1430 8,
1431 ) {
1432 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1433 }
1434 let _ = tx.send(InferenceEvent::Done).await;
1435 return Ok(());
1436 }
1437
1438 if user_input.trim() == "/forget" {
1439 self.history.clear();
1440 self.reasoning_history = None;
1441 self.session_memory.clear();
1442 self.running_summary = None;
1443 self.correction_hints.clear();
1444 self.pinned_files.lock().await.clear();
1445 self.reset_action_grounding().await;
1446 reset_task_files();
1447 purge_persistent_memory();
1448 tokio::task::block_in_place(|| self.vein.reset());
1449 let _ = std::fs::remove_file(session_path());
1450 self.save_empty_session();
1451 self.emit_compaction_pressure(&tx).await;
1452 self.emit_prompt_pressure_idle(&tx).await;
1453 for chunk in chunk_text(
1454 "Hard forget complete. Chat history, saved memory, task files, and the Vein index were purged.",
1455 8,
1456 ) {
1457 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1458 }
1459 let _ = tx.send(InferenceEvent::Done).await;
1460 return Ok(());
1461 }
1462
1463 if user_input.trim() == "/vein-inspect" {
1464 let indexed = self.refresh_vein_index();
1465 let report = self.build_vein_inspection_report(indexed);
1466 let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(1));
1467 let _ = tx
1468 .send(InferenceEvent::VeinStatus {
1469 file_count: snapshot.indexed_source_files + snapshot.indexed_docs,
1470 embedded_count: snapshot.embedded_source_doc_chunks,
1471 docs_only: self.vein_docs_only_mode(),
1472 })
1473 .await;
1474 for chunk in chunk_text(&report, 8) {
1475 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1476 }
1477 let _ = tx.send(InferenceEvent::Done).await;
1478 return Ok(());
1479 }
1480
1481 if user_input.trim() == "/workspace-profile" {
1482 let root = crate::tools::file_ops::workspace_root();
1483 let _ = crate::agent::workspace_profile::ensure_workspace_profile(&root);
1484 let report = crate::agent::workspace_profile::profile_report(&root);
1485 for chunk in chunk_text(&report, 8) {
1486 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1487 }
1488 let _ = tx.send(InferenceEvent::Done).await;
1489 return Ok(());
1490 }
1491
1492 if user_input.trim() == "/vein-reset" {
1493 tokio::task::block_in_place(|| self.vein.reset());
1494 let _ = tx
1495 .send(InferenceEvent::VeinStatus {
1496 file_count: 0,
1497 embedded_count: 0,
1498 docs_only: self.vein_docs_only_mode(),
1499 })
1500 .await;
1501 for chunk in chunk_text("Vein index cleared. Will rebuild on the next turn.", 8) {
1502 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1503 }
1504 let _ = tx.send(InferenceEvent::Done).await;
1505 return Ok(());
1506 }
1507
1508 let config = crate::agent::config::load_config();
1510 self.recovery_context.clear();
1511 let manual_runtime_refresh = user_input.trim() == "/runtime-refresh";
1512 if !manual_runtime_refresh {
1513 if let Some((model_id, context_length, changed)) = self
1514 .refresh_runtime_profile_and_report(&tx, "turn_start")
1515 .await
1516 {
1517 if changed {
1518 let _ = tx
1519 .send(InferenceEvent::Thought(format!(
1520 "Runtime refresh: using model `{}` with CTX {} for this turn.",
1521 model_id, context_length
1522 )))
1523 .await;
1524 }
1525 }
1526 }
1527 self.emit_compaction_pressure(&tx).await;
1528 let current_model = self.engine.current_model();
1529 self.engine.set_gemma_native_formatting(
1530 crate::agent::config::effective_gemma_native_formatting(&config, ¤t_model),
1531 );
1532 let _turn_id = self.begin_grounded_turn().await;
1533 let _hook_runner = crate::agent::hooks::HookRunner::new(config.hooks.clone());
1534 let mcp_tools = match self.refresh_mcp_tools(&tx).await {
1535 Ok(tools) => tools,
1536 Err(e) => {
1537 let _ = tx
1538 .send(InferenceEvent::Error(format!("MCP refresh failed: {}", e)))
1539 .await;
1540 Vec::new()
1541 }
1542 };
1543
1544 let effective_fast = config
1546 .fast_model
1547 .clone()
1548 .or_else(|| self.fast_model.clone());
1549 let effective_think = config
1550 .think_model
1551 .clone()
1552 .or_else(|| self.think_model.clone());
1553
1554 if user_input.trim() == "/lsp" {
1556 let mut lsp = self.lsp_manager.lock().await;
1557 match lsp.start_servers().await {
1558 Ok(_) => {
1559 let _ = tx
1560 .send(InferenceEvent::MutedToken(
1561 "LSP: Servers Initialized OK.".to_string(),
1562 ))
1563 .await;
1564 }
1565 Err(e) => {
1566 let _ = tx
1567 .send(InferenceEvent::Error(format!(
1568 "LSP: Failed to start servers - {}",
1569 e
1570 )))
1571 .await;
1572 }
1573 }
1574 let _ = tx.send(InferenceEvent::Done).await;
1575 return Ok(());
1576 }
1577
1578 if user_input.trim() == "/runtime-refresh" {
1579 match self
1580 .refresh_runtime_profile_and_report(&tx, "manual_command")
1581 .await
1582 {
1583 Some((model_id, context_length, changed)) => {
1584 let msg = if changed {
1585 format!(
1586 "Runtime profile refreshed. Model: {} | CTX: {}",
1587 model_id, context_length
1588 )
1589 } else {
1590 format!(
1591 "Runtime profile unchanged. Model: {} | CTX: {}",
1592 model_id, context_length
1593 )
1594 };
1595 for chunk in chunk_text(&msg, 8) {
1596 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1597 }
1598 }
1599 None => {
1600 let _ = tx
1601 .send(InferenceEvent::Error(
1602 "Runtime refresh failed: LM Studio profile could not be read."
1603 .to_string(),
1604 ))
1605 .await;
1606 }
1607 }
1608 let _ = tx.send(InferenceEvent::Done).await;
1609 return Ok(());
1610 }
1611
1612 if user_input.trim() == "/ask" {
1613 self.set_workflow_mode(WorkflowMode::Ask);
1614 for chunk in chunk_text(
1615 "Workflow mode: ASK. Stay read-only, explain, inspect, and answer without making changes.",
1616 8,
1617 ) {
1618 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1619 }
1620 let _ = tx.send(InferenceEvent::Done).await;
1621 return Ok(());
1622 }
1623
1624 if user_input.trim() == "/code" {
1625 self.set_workflow_mode(WorkflowMode::Code);
1626 let mut message =
1627 "Workflow mode: CODE. Make changes when needed, but keep proof-before-action and verification discipline.".to_string();
1628 if let Some(plan) = self.current_plan_summary() {
1629 message.push_str(&format!(" Current plan: {plan}."));
1630 }
1631 for chunk in chunk_text(&message, 8) {
1632 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1633 }
1634 let _ = tx.send(InferenceEvent::Done).await;
1635 return Ok(());
1636 }
1637
1638 if user_input.trim() == "/architect" {
1639 self.set_workflow_mode(WorkflowMode::Architect);
1640 let mut message =
1641 "Workflow mode: ARCHITECT. Plan, inspect, and shape the approach first. Do not mutate code unless the user explicitly asks to implement.".to_string();
1642 if let Some(plan) = self.current_plan_summary() {
1643 message.push_str(&format!(" Existing plan: {plan}."));
1644 }
1645 for chunk in chunk_text(&message, 8) {
1646 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1647 }
1648 let _ = tx.send(InferenceEvent::Done).await;
1649 return Ok(());
1650 }
1651
1652 if user_input.trim() == "/read-only" {
1653 self.set_workflow_mode(WorkflowMode::ReadOnly);
1654 for chunk in chunk_text(
1655 "Workflow mode: READ-ONLY. Analysis only. Do not modify files, run mutating shell commands, or commit changes.",
1656 8,
1657 ) {
1658 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1659 }
1660 let _ = tx.send(InferenceEvent::Done).await;
1661 return Ok(());
1662 }
1663
1664 if user_input.trim() == "/auto" {
1665 self.set_workflow_mode(WorkflowMode::Auto);
1666 for chunk in chunk_text(
1667 "Workflow mode: AUTO. Hematite will choose the narrowest effective path for the request.",
1668 8,
1669 ) {
1670 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1671 }
1672 let _ = tx.send(InferenceEvent::Done).await;
1673 return Ok(());
1674 }
1675
1676 if user_input.trim() == "/chat" {
1677 self.set_workflow_mode(WorkflowMode::Chat);
1678 let _ = tx.send(InferenceEvent::Done).await;
1679 return Ok(());
1680 }
1681
1682 if user_input.trim() == "/reroll" {
1683 let soul = crate::ui::hatch::generate_soul_random();
1684 self.snark = soul.snark;
1685 self.chaos = soul.chaos;
1686 self.soul_personality = soul.personality.clone();
1687 let species = soul.species.clone();
1692 if let Some(eng) = Arc::get_mut(&mut self.engine) {
1693 eng.species = species.clone();
1694 }
1695 let shiny_tag = if soul.shiny { " 🌟 SHINY" } else { "" };
1696 let _ = tx
1697 .send(InferenceEvent::SoulReroll {
1698 species: soul.species.clone(),
1699 rarity: soul.rarity.label().to_string(),
1700 shiny: soul.shiny,
1701 personality: soul.personality.clone(),
1702 })
1703 .await;
1704 for chunk in chunk_text(
1705 &format!(
1706 "A new companion awakens!\n[{}{}] {} — \"{}\"",
1707 soul.rarity.label(),
1708 shiny_tag,
1709 soul.species,
1710 soul.personality
1711 ),
1712 8,
1713 ) {
1714 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1715 }
1716 let _ = tx.send(InferenceEvent::Done).await;
1717 return Ok(());
1718 }
1719
1720 if user_input.trim() == "/agent" {
1721 self.set_workflow_mode(WorkflowMode::Auto);
1722 let _ = tx.send(InferenceEvent::Done).await;
1723 return Ok(());
1724 }
1725
1726 let mut effective_user_input = user_input.trim().to_string();
1727 if let Some((mode, rest)) = parse_inline_workflow_prompt(user_input) {
1728 self.set_workflow_mode(mode);
1729 effective_user_input = rest.to_string();
1730 }
1731 let transcript_user_input = transcript_user_turn_text(user_turn, &effective_user_input);
1732 effective_user_input = apply_turn_attachments(user_turn, &effective_user_input);
1733 let implement_current_plan = self.workflow_mode == WorkflowMode::Code
1734 && is_current_plan_execution_request(&effective_user_input)
1735 && self
1736 .session_memory
1737 .current_plan
1738 .as_ref()
1739 .map(|plan| plan.has_signal())
1740 .unwrap_or(false);
1741 self.plan_execution_active
1742 .store(implement_current_plan, std::sync::atomic::Ordering::SeqCst);
1743 let _plan_execution_guard = PlanExecutionGuard {
1744 flag: self.plan_execution_active.clone(),
1745 };
1746 let intent = classify_query_intent(self.workflow_mode, &effective_user_input);
1747
1748 if let Some(answer_kind) = intent.direct_answer {
1750 match answer_kind {
1751 DirectAnswerKind::About => {
1752 let response = build_about_answer();
1753 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1754 .await;
1755 return Ok(());
1756 }
1757 DirectAnswerKind::LanguageCapability => {
1758 let response = build_language_capability_answer();
1759 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1760 .await;
1761 return Ok(());
1762 }
1763 DirectAnswerKind::UnsafeWorkflowPressure => {
1764 let response = build_unsafe_workflow_pressure_answer();
1765 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1766 .await;
1767 return Ok(());
1768 }
1769 DirectAnswerKind::SessionMemory => {
1770 let response = build_session_memory_answer();
1771 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1772 .await;
1773 return Ok(());
1774 }
1775 DirectAnswerKind::RecoveryRecipes => {
1776 let response = build_recovery_recipes_answer();
1777 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1778 .await;
1779 return Ok(());
1780 }
1781 DirectAnswerKind::McpLifecycle => {
1782 let response = build_mcp_lifecycle_answer();
1783 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1784 .await;
1785 return Ok(());
1786 }
1787 DirectAnswerKind::AuthorizationPolicy => {
1788 let response = build_authorization_policy_answer();
1789 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1790 .await;
1791 return Ok(());
1792 }
1793 DirectAnswerKind::ToolClasses => {
1794 let response = build_tool_classes_answer();
1795 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1796 .await;
1797 return Ok(());
1798 }
1799 DirectAnswerKind::ToolRegistryOwnership => {
1800 let response = build_tool_registry_ownership_answer();
1801 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1802 .await;
1803 return Ok(());
1804 }
1805 DirectAnswerKind::SessionResetSemantics => {
1806 let response = build_session_reset_semantics_answer();
1807 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1808 .await;
1809 return Ok(());
1810 }
1811 DirectAnswerKind::ProductSurface => {
1812 let response = build_product_surface_answer();
1813 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1814 .await;
1815 return Ok(());
1816 }
1817 DirectAnswerKind::ReasoningSplit => {
1818 let response = build_reasoning_split_answer();
1819 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1820 .await;
1821 return Ok(());
1822 }
1823 DirectAnswerKind::Identity => {
1824 let response = build_identity_answer();
1825 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1826 .await;
1827 return Ok(());
1828 }
1829 DirectAnswerKind::WorkflowModes => {
1830 let response = build_workflow_modes_answer();
1831 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1832 .await;
1833 return Ok(());
1834 }
1835 DirectAnswerKind::GemmaNative => {
1836 let response = build_gemma_native_answer();
1837 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1838 .await;
1839 return Ok(());
1840 }
1841 DirectAnswerKind::GemmaNativeSettings => {
1842 let response = build_gemma_native_settings_answer();
1843 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1844 .await;
1845 return Ok(());
1846 }
1847 DirectAnswerKind::VerifyProfiles => {
1848 let response = build_verify_profiles_answer();
1849 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1850 .await;
1851 return Ok(());
1852 }
1853 DirectAnswerKind::Toolchain => {
1854 let lower = effective_user_input.to_lowercase();
1855 let topic = if (lower.contains("voice output") || lower.contains("voice"))
1856 && (lower.contains("lag")
1857 || lower.contains("behind visible text")
1858 || lower.contains("latency"))
1859 {
1860 "voice_latency_plan"
1861 } else {
1862 "all"
1863 };
1864 let response =
1865 crate::tools::toolchain::describe_toolchain(&serde_json::json!({
1866 "topic": topic,
1867 "question": effective_user_input,
1868 }))
1869 .await
1870 .unwrap_or_else(|e| format!("Error: {}", e));
1871 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1872 .await;
1873 return Ok(());
1874 }
1875 DirectAnswerKind::ArchitectSessionResetPlan => {
1876 let plan = build_architect_session_reset_plan();
1877 let response = plan.to_markdown();
1878 let _ = crate::tools::plan::save_plan_handoff(&plan);
1879 self.session_memory.current_plan = Some(plan);
1880 self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
1881 .await;
1882 return Ok(());
1883 }
1884 }
1885 }
1886
1887 if matches!(
1888 self.workflow_mode,
1889 WorkflowMode::Ask | WorkflowMode::ReadOnly
1890 ) && looks_like_mutation_request(&effective_user_input)
1891 {
1892 let response = build_mode_redirect_answer(self.workflow_mode);
1893 self.history.push(ChatMessage::user(&effective_user_input));
1894 self.history.push(ChatMessage::assistant_text(&response));
1895 self.transcript.log_user(&transcript_user_input);
1896 self.transcript.log_agent(&response);
1897 for chunk in chunk_text(&response, 8) {
1898 if !chunk.is_empty() {
1899 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1900 }
1901 }
1902 let _ = tx.send(InferenceEvent::Done).await;
1903 self.trim_history(80);
1904 self.refresh_session_memory();
1905 self.save_session();
1906 return Ok(());
1907 }
1908
1909 if user_input.trim() == "/think" {
1910 self.think_mode = Some(true);
1911 for chunk in chunk_text("Think mode: ON — full chain-of-thought enabled.", 8) {
1912 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1913 }
1914 let _ = tx.send(InferenceEvent::Done).await;
1915 return Ok(());
1916 }
1917 if user_input.trim() == "/no_think" {
1918 self.think_mode = Some(false);
1919 for chunk in chunk_text(
1920 "Think mode: OFF — fast mode enabled (no chain-of-thought).",
1921 8,
1922 ) {
1923 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1924 }
1925 let _ = tx.send(InferenceEvent::Done).await;
1926 return Ok(());
1927 }
1928
1929 if user_input.trim_start().starts_with("/pin ") {
1931 let path = user_input.trim_start()[5..].trim();
1932 match std::fs::read_to_string(path) {
1933 Ok(content) => {
1934 self.pinned_files
1935 .lock()
1936 .await
1937 .insert(path.to_string(), content);
1938 let msg = format!(
1939 "Pinned: {} — this file is now locked in model context.",
1940 path
1941 );
1942 for chunk in chunk_text(&msg, 8) {
1943 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1944 }
1945 }
1946 Err(e) => {
1947 let _ = tx
1948 .send(InferenceEvent::Error(format!(
1949 "Failed to pin {}: {}",
1950 path, e
1951 )))
1952 .await;
1953 }
1954 }
1955 let _ = tx.send(InferenceEvent::Done).await;
1956 return Ok(());
1957 }
1958
1959 if user_input.trim_start().starts_with("/unpin ") {
1961 let path = user_input.trim_start()[7..].trim();
1962 if self.pinned_files.lock().await.remove(path).is_some() {
1963 let msg = format!("Unpinned: {} — file removed from active context.", path);
1964 for chunk in chunk_text(&msg, 8) {
1965 let _ = tx.send(InferenceEvent::Token(chunk)).await;
1966 }
1967 } else {
1968 let _ = tx
1969 .send(InferenceEvent::Error(format!(
1970 "File {} was not pinned.",
1971 path
1972 )))
1973 .await;
1974 }
1975 let _ = tx.send(InferenceEvent::Done).await;
1976 return Ok(());
1977 }
1978
1979 let tiny_context_mode = self.engine.current_context_length() <= 8_192;
1983 let mut base_prompt = self.engine.build_system_prompt(
1984 self.snark,
1985 self.chaos,
1986 self.brief,
1987 self.professional,
1988 &self.tools,
1989 self.reasoning_history.as_deref(),
1990 &mcp_tools,
1991 );
1992 if !tiny_context_mode {
1993 if let Some(hint) = &config.context_hint {
1994 if !hint.trim().is_empty() {
1995 base_prompt.push_str(&format!(
1996 "\n\n# Project Context (from .hematite/settings.json)\n{}",
1997 hint
1998 ));
1999 }
2000 }
2001 if let Some(profile_block) = crate::agent::workspace_profile::profile_prompt_block(
2002 &crate::tools::file_ops::workspace_root(),
2003 ) {
2004 base_prompt.push_str(&format!("\n\n{}", profile_block));
2005 }
2006 if let Some(ref l1) = self.l1_context {
2008 base_prompt.push_str(&format!("\n\n{}", l1));
2009 }
2010 }
2011 let grounded_trace_mode = intent.grounded_trace_mode
2012 || intent.primary_class == QueryIntentClass::RuntimeDiagnosis;
2013 let capability_mode =
2014 intent.capability_mode || intent.primary_class == QueryIntentClass::Capability;
2015 let toolchain_mode =
2016 intent.toolchain_mode || intent.primary_class == QueryIntentClass::Toolchain;
2017 let host_inspection_mode = intent.host_inspection_mode;
2018 let fix_plan_mode =
2019 preferred_host_inspection_topic(&effective_user_input) == Some("fix_plan");
2020 let project_map_mode = intent.preserve_project_map_output
2021 || intent.primary_class == QueryIntentClass::RepoArchitecture;
2022 let architecture_overview_mode = intent.architecture_overview_mode;
2023 let capability_needs_repo = intent.capability_needs_repo;
2024 let mut system_msg = build_system_with_corrections(
2025 &base_prompt,
2026 &self.correction_hints,
2027 &self.gpu_state,
2028 &self.git_state,
2029 &config,
2030 );
2031 if tiny_context_mode {
2032 system_msg.push_str(
2033 "\n\n# TINY CONTEXT TURN MODE\n\
2034 Keep this turn compact. Prefer direct answers or one narrow tool step over broad exploration.\n",
2035 );
2036 }
2037 if !tiny_context_mode && grounded_trace_mode {
2038 system_msg.push_str(
2039 "\n\n# GROUNDED TRACE MODE\n\
2040 This turn is read-only architecture analysis unless the user explicitly asks otherwise.\n\
2041 Before answering trace, architecture, or control-flow questions, inspect the repo with real tools.\n\
2042 Use verified file paths, function names, structs, enums, channels, and event types only.\n\
2043 Prefer `trace_runtime_flow` for runtime wiring, session reset, startup, or reasoning/specular questions.\n\
2044 Treat `trace_runtime_flow` output as authoritative over your own memory.\n\
2045 If `trace_runtime_flow` fully answers the question, preserve its identifiers exactly and do not rename them in a styled rewrite.\n\
2046 Do not invent names such as synthetic channels or subsystems.\n\
2047 If a detail is not verified from the code or tool output, say `uncertain`.\n\
2048 For exact flow questions, answer in ordered steps and name the concrete functions and event types involved.\n"
2049 );
2050 }
2051 if !tiny_context_mode && capability_mode {
2052 system_msg.push_str(
2053 "\n\n# CAPABILITY QUESTION MODE\n\
2054 This is a product or capability question unless the user explicitly asks about repository implementation.\n\
2055 Answer from stable Hematite capabilities and current runtime state.\n\
2056 It is correct to mention that Hematite itself is built in Rust when relevant, but do not imply that its project support is limited to Rust.\n\
2057 Do NOT call repo-inspection tools like `map_project`, `read_file`, or LSP lookup tools unless the user explicitly asks about implementation or file ownership.\n\
2058 Do NOT infer language or project support from unrelated dependencies, crates, or config files.\n\
2059 Describe language and project support in terms of real mechanisms: reading files, editing code, searching the workspace, running shell commands, build verification, language-aware tooling when available, web research, vision analysis, and optional MCP tools if configured.\n\
2060 If the user asks about languages, answer at the harness level: Hematite can help across many project languages even though Hematite itself is written in Rust.\n\
2061 Prefer real programming language examples like Python, JavaScript, TypeScript, Go, C#, or similar over file extensions like `.json` or `.md`.\n\
2062 For project-building questions, describe cross-project workflows like scaffolding files, shaping structure, implementing features, and running the appropriate local build or test commands for the target stack. Do not overclaim certainty.\n\
2063 Never mention raw `mcp__*` tool names unless those tools are active this turn and directly relevant.\n\
2064 Keep the answer short, plain, and ASCII-first.\n"
2065 );
2066 }
2067 if !tiny_context_mode && toolchain_mode {
2068 system_msg.push_str(
2069 "\n\n# TOOLCHAIN DISCIPLINE MODE\n\
2070 This turn is about Hematite's real built-in tools and how to choose them.\n\
2071 Prefer `describe_toolchain` before you try to summarize tool capabilities or propose a read-only investigation plan from memory.\n\
2072 Use only real built-in tool names.\n\
2073 Do not invent helper tools, MCP tool names, synthetic symbols, or example function names.\n\
2074 If `describe_toolchain` fully answers the question, preserve its output exactly instead of restyling it.\n\
2075 Be explicit about which tools are optional or conditional.\n"
2076 );
2077 }
2078 if !tiny_context_mode && host_inspection_mode {
2079 system_msg.push_str(
2080 "\n\n# HOST INSPECTION MODE\n\
2081 This turn is about the local machine and environment, not repository architecture.\n\
2082 Prefer `inspect_host` before raw `shell` for PATH analysis, installed developer tool versions, environment/package-manager health, grounded fix plans, network snapshots, service snapshots, process snapshots, desktop item counts, Downloads summaries, listening ports, repo-doctor checks, and directory/disk-size reports.\n\
2083 Use the closest built-in topic first: `summary`, `toolchains`, `path`, `env_doctor`, `fix_plan`, `network`, `services`, `processes`, `desktop`, `downloads`, `ports`, `repo_doctor`, `directory`, or `disk`.\n\
2084 If the user asks how to fix a common workstation problem such as `cargo not found`, `port 3000 already in use`, or `LM Studio not reachable`, use `fix_plan` first instead of `env_doctor`, `path`, or `ports`.\n\
2085 If `env_doctor` answers the question, stop there. Do not follow with `path` unless the user explicitly asks for raw PATH entries.\n\
2086 Only use `shell` if the host question truly goes beyond `inspect_host`.\n"
2087 );
2088 }
2089 if !tiny_context_mode && fix_plan_mode {
2090 system_msg.push_str(
2091 "\n\n# FIX PLAN MODE\n\
2092 This turn is a workstation remediation question, not just a diagnosis question.\n\
2093 Call `inspect_host` with `topic=fix_plan` first.\n\
2094 Do not start with `path`, `toolchains`, `env_doctor`, or `ports` unless the user explicitly asks for diagnosis details instead of a fix plan.\n\
2095 Keep the answer grounded, stepwise, and approval-aware.\n"
2096 );
2097 }
2098 if !tiny_context_mode && project_map_mode {
2099 system_msg.push_str(
2100 "\n\n# PROJECT MAP DISCIPLINE MODE\n\
2101 For repository structure, entrypoint, owner-file, or architecture-map questions, prefer `map_project` first.\n\
2102 If `map_project` provides likely entrypoints and core owner files, preserve that grounded structure instead of rewriting it into broad prose.\n\
2103 Do not invent new entrypoints or owner files that are not present in the tool output.\n\
2104 Keep the final answer compact and architecture-first.\n"
2105 );
2106 }
2107 if !tiny_context_mode && architecture_overview_mode {
2108 system_msg.push_str(
2109 "\n\n# ARCHITECTURE OVERVIEW DISCIPLINE MODE\n\
2110 For broad runtime or architecture walkthroughs, prefer authoritative tools first: `trace_runtime_flow` for control flow and `map_project` for compact structure.\n\
2111 Do not call `auto_pin_context` or `list_pinned` in read-only analysis. Avoid broad `read_file` calls unless the user explicitly asks for implementation detail in one named file.\n\
2112 Preserve grounded tool output rather than restyling it into a larger answer.\n"
2113 );
2114 }
2115
2116 system_msg.push_str(&format!(
2118 "\n\n# WORKFLOW MODE\nCURRENT WORKFLOW: {}\n",
2119 self.workflow_mode.label()
2120 ));
2121 if tiny_context_mode {
2122 system_msg
2123 .push_str("Use the narrowest safe behavior for this mode. Keep the turn short.\n");
2124 } else {
2125 match self.workflow_mode {
2126 WorkflowMode::Auto => system_msg.push_str(
2127 "AUTO means choose the narrowest effective path for the request. Answer directly when stable product logic exists. Inspect before editing. Mutate only when the user is clearly asking for implementation.\n",
2128 ),
2129 WorkflowMode::Ask => system_msg.push_str(
2130 "ASK means analysis only. Stay read-only, inspect the repo, explain findings, and do not make changes unless the user explicitly switches modes.\n",
2131 ),
2132 WorkflowMode::Code => system_msg.push_str(
2133 "CODE means implementation is allowed when needed. Keep proof-before-action, verification, and edit precision discipline. If an active plan handoff exists in session memory or `.hematite/PLAN.md`, treat it as the implementation brief unless the user explicitly overrides it. For ordinary workspace inspection during implementation, use built-in read/edit tools first and do not reach for `mcp__filesystem__*` unless the user explicitly requires MCP.\n",
2134 ),
2135 WorkflowMode::Architect => system_msg.push_str(
2136 "ARCHITECT means plan first. Inspect, reason, and produce a concrete implementation approach before editing. Do not mutate code unless the user explicitly asks to implement. When you produce an implementation handoff, use these exact ASCII headings so Hematite can persist the plan: `# Goal`, `# Target Files`, `# Ordered Steps`, `# Verification`, `# Risks`, `# Open Questions`.\n",
2137 ),
2138 WorkflowMode::ReadOnly => system_msg.push_str(
2139 "READ-ONLY means analysis only. Do not modify files, run mutating shell commands, or commit changes.\n",
2140 ),
2141 WorkflowMode::Chat => {} }
2143 }
2144 if !tiny_context_mode && self.workflow_mode == WorkflowMode::Architect {
2145 system_msg.push_str("\n\n# ARCHITECT HANDOFF CONTRACT\n");
2146 system_msg.push_str(architect_handoff_contract());
2147 system_msg.push('\n');
2148 }
2149 if !tiny_context_mode && implement_current_plan {
2150 system_msg.push_str(
2151 "\n\n# CURRENT PLAN EXECUTION CONTRACT\n\
2152 The user explicitly asked you to implement the current saved plan.\n\
2153 Do not restate the plan, do not provide preliminary contracts, and do not stop at analysis.\n\
2154 Use the saved plan as the brief, gather only the minimum built-in file evidence you need, then start editing the target files.\n\
2155 Do not call `map_project` during current-plan execution.\n\
2156 Every file inspection or edit call must be path-scoped to one of the saved target files.\n\
2157 If a built-in workspace read tool gives you enough context, your next step should be mutation or a concrete blocking question, not another summary.\n",
2158 );
2159 if let Some(plan) = self.session_memory.current_plan.as_ref() {
2160 if !plan.target_files.is_empty() {
2161 system_msg.push_str("\n# CURRENT PLAN TARGET FILES\n");
2162 for path in &plan.target_files {
2163 system_msg.push_str(&format!("- {}\n", path));
2164 }
2165 }
2166 }
2167 }
2168 if !tiny_context_mode {
2169 let pinned = self.pinned_files.lock().await;
2170 if !pinned.is_empty() {
2171 system_msg.push_str("\n\n# ACTIVE CONTEXT (PINNED FILES)\n");
2172 system_msg.push_str("The following files are locked in your active memory for high-fidelity reference.\n\n");
2173 for (path, content) in pinned.iter() {
2174 system_msg.push_str(&format!("## FILE: {}\n```\n{}\n```\n\n", path, content));
2175 }
2176 }
2177 }
2178 if !tiny_context_mode {
2179 self.append_session_handoff(&mut system_msg);
2180 }
2181 let system_msg = if self.workflow_mode.is_chat() {
2184 self.build_chat_system_prompt()
2185 } else {
2186 system_msg
2187 };
2188 if self.history.is_empty() || self.history[0].role != "system" {
2189 self.history.insert(0, ChatMessage::system(&system_msg));
2190 } else {
2191 self.history[0] = ChatMessage::system(&system_msg);
2192 }
2193
2194 self.cancel_token
2196 .store(false, std::sync::atomic::Ordering::SeqCst);
2197
2198 self.reasoning_history = None;
2201
2202 let is_gemma = crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2203 let user_content = match self.think_mode {
2204 Some(true) => format!("/think\n{}", effective_user_input),
2205 Some(false) => format!("/no_think\n{}", effective_user_input),
2206 None if !is_gemma
2211 && !self.workflow_mode.is_chat()
2212 && !is_quick_tool_request(&effective_user_input) =>
2213 {
2214 format!("/think\n{}", effective_user_input)
2215 }
2216 None => effective_user_input.clone(),
2217 };
2218 if let Some(image) = user_turn.attached_image.as_ref() {
2219 let image_url =
2220 crate::tools::vision::encode_image_as_data_url(std::path::Path::new(&image.path))
2221 .map_err(|e| format!("Image attachment failed for {}: {}", image.name, e))?;
2222 self.history
2223 .push(ChatMessage::user_with_image(&user_content, &image_url));
2224 } else {
2225 self.history.push(ChatMessage::user(&user_content));
2226 }
2227 self.transcript.log_user(&transcript_user_input);
2228
2229 let vein_docs_only = self.vein_docs_only_mode();
2233 let allow_vein_context = !self.workflow_mode.is_chat()
2234 || should_use_vein_in_chat(&effective_user_input, vein_docs_only);
2235 let (vein_context, vein_paths) = if allow_vein_context {
2236 self.refresh_vein_index();
2237 let _ = tx
2238 .send(InferenceEvent::VeinStatus {
2239 file_count: self.vein.file_count(),
2240 embedded_count: self.vein.embedded_chunk_count(),
2241 docs_only: vein_docs_only,
2242 })
2243 .await;
2244 match self.build_vein_context(&effective_user_input) {
2245 Some((ctx, paths)) => (Some(ctx), paths),
2246 None => (None, Vec::new()),
2247 }
2248 } else {
2249 (None, Vec::new())
2250 };
2251 if !vein_paths.is_empty() {
2252 let _ = tx
2253 .send(InferenceEvent::VeinContext { paths: vein_paths })
2254 .await;
2255 }
2256
2257 let routed_model = route_model(
2259 &effective_user_input,
2260 effective_fast.as_deref(),
2261 effective_think.as_deref(),
2262 )
2263 .map(|s| s.to_string());
2264
2265 let mut loop_intervention: Option<String> = None;
2266 let mut implementation_started = false;
2267 let mut non_mutating_plan_steps = 0usize;
2268 let non_mutating_plan_soft_cap = 5usize;
2269 let non_mutating_plan_hard_cap = 8usize;
2270 let mut overview_project_map: Option<String> = None;
2271 let mut overview_runtime_trace: Option<String> = None;
2272
2273 let max_iters = 25;
2275 let mut consecutive_errors = 0;
2276 let mut first_iter = true;
2277 let _called_this_turn: std::collections::HashSet<String> = std::collections::HashSet::new();
2278 let _result_counts: std::collections::HashMap<String, usize> =
2280 std::collections::HashMap::new();
2281 let mut repeat_counts: std::collections::HashMap<String, usize> =
2283 std::collections::HashMap::new();
2284 let mut completed_tool_cache: std::collections::HashMap<String, CachedToolResult> =
2285 std::collections::HashMap::new();
2286 let mut successful_read_targets: std::collections::HashSet<String> =
2287 std::collections::HashSet::new();
2288 let mut successful_read_regions: std::collections::HashSet<(String, u64)> =
2290 std::collections::HashSet::new();
2291 let mut successful_grep_targets: std::collections::HashSet<String> =
2292 std::collections::HashSet::new();
2293 let mut no_match_grep_targets: std::collections::HashSet<String> =
2294 std::collections::HashSet::new();
2295 let mut broad_grep_targets: std::collections::HashSet<String> =
2296 std::collections::HashSet::new();
2297
2298 let mut turn_anchor = self.history.len().saturating_sub(1);
2300
2301 for _iter in 0..max_iters {
2302 let mut mutation_occurred = false;
2303 if self.cancel_token.load(std::sync::atomic::Ordering::SeqCst) {
2305 self.cancel_token
2306 .store(false, std::sync::atomic::Ordering::SeqCst);
2307 let _ = tx
2308 .send(InferenceEvent::Thought("Turn cancelled by user.".into()))
2309 .await;
2310 let _ = tx.send(InferenceEvent::Done).await;
2311 return Ok(());
2312 }
2313
2314 if self
2316 .compact_history_if_needed(&tx, Some(turn_anchor))
2317 .await?
2318 {
2319 turn_anchor = 2;
2322 }
2323
2324 let inject_vein = first_iter && !implement_current_plan;
2328 let messages = if implement_current_plan {
2329 first_iter = false;
2330 self.context_window_slice_from(turn_anchor)
2331 } else {
2332 first_iter = false;
2333 self.context_window_slice()
2334 };
2335
2336 let mut prompt_msgs = if let Some(intervention) = loop_intervention.take() {
2340 if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2343 let mut msgs = vec![self.history[0].clone()];
2344 msgs.push(ChatMessage::system(&intervention));
2345 msgs
2346 } else {
2347 let merged =
2348 format!("{}\n\n{}", self.history[0].content.as_str(), intervention);
2349 vec![ChatMessage::system(&merged)]
2350 }
2351 } else {
2352 vec![self.history[0].clone()]
2353 };
2354
2355 if inject_vein {
2359 if let Some(ref ctx) = vein_context.as_ref() {
2360 if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
2361 prompt_msgs.push(ChatMessage::system(ctx));
2362 } else {
2363 let merged = format!("{}\n\n{}", prompt_msgs[0].content.as_str(), ctx);
2364 prompt_msgs[0] = ChatMessage::system(&merged);
2365 }
2366 }
2367 }
2368 prompt_msgs.extend(messages);
2369 if let Some(budget_note) =
2370 enforce_prompt_budget(&mut prompt_msgs, self.engine.current_context_length())
2371 {
2372 self.emit_operator_checkpoint(
2373 &tx,
2374 OperatorCheckpointState::BudgetReduced,
2375 budget_note,
2376 )
2377 .await;
2378 let recipe = plan_recovery(
2379 RecoveryScenario::PromptBudgetPressure,
2380 &self.recovery_context,
2381 );
2382 self.emit_recovery_recipe_summary(
2383 &tx,
2384 recipe.recipe.scenario.label(),
2385 compact_recovery_plan_summary(&recipe),
2386 )
2387 .await;
2388 }
2389 self.emit_prompt_pressure_for_messages(&tx, &prompt_msgs)
2390 .await;
2391
2392 let (mut text, mut tool_calls, usage, finish_reason) = match self
2393 .engine
2394 .call_with_tools(&prompt_msgs, &self.tools, routed_model.as_deref())
2395 .await
2396 {
2397 Ok(result) => result,
2398 Err(e) => {
2399 let class = classify_runtime_failure(&e);
2400 if should_retry_runtime_failure(class) {
2401 if self.recovery_context.consume_transient_retry() {
2402 let label = match class {
2403 RuntimeFailureClass::ProviderDegraded => "provider_degraded",
2404 _ => "empty_model_response",
2405 };
2406 self.transcript.log_system(&format!(
2407 "Automatic provider recovery triggered: {}",
2408 e.trim()
2409 ));
2410 self.emit_recovery_recipe_summary(
2411 &tx,
2412 label,
2413 compact_runtime_recovery_summary(class),
2414 )
2415 .await;
2416 let _ = tx
2417 .send(InferenceEvent::ProviderStatus {
2418 state: ProviderRuntimeState::Recovering,
2419 summary: compact_runtime_recovery_summary(class).into(),
2420 })
2421 .await;
2422 self.emit_operator_checkpoint(
2423 &tx,
2424 OperatorCheckpointState::RecoveringProvider,
2425 compact_runtime_recovery_summary(class),
2426 )
2427 .await;
2428 continue;
2429 }
2430 }
2431
2432 self.emit_runtime_failure(&tx, class, &e).await;
2433 break;
2434 }
2435 };
2436 self.emit_provider_live(&tx).await;
2437
2438 if let Some(ref u) = usage {
2440 let _ = tx.send(InferenceEvent::UsageUpdate(u.clone())).await;
2441 }
2442
2443 if tool_calls
2446 .as_ref()
2447 .map(|calls| calls.is_empty())
2448 .unwrap_or(true)
2449 {
2450 if let Some(raw_text) = text.as_deref() {
2451 let native_calls = crate::agent::inference::extract_native_tool_calls(raw_text);
2452 if !native_calls.is_empty() {
2453 tool_calls = Some(native_calls);
2454 let stripped =
2455 crate::agent::inference::strip_native_tool_call_text(raw_text);
2456 text = if stripped.trim().is_empty() {
2457 None
2458 } else {
2459 Some(stripped)
2460 };
2461 }
2462 }
2463 }
2464
2465 let tool_calls = tool_calls.filter(|c| !c.is_empty());
2468 let near_context_ceiling = usage
2469 .as_ref()
2470 .map(|u| u.prompt_tokens >= (self.engine.current_context_length() * 82 / 100))
2471 .unwrap_or(false);
2472
2473 if let Some(calls) = tool_calls {
2474 let (calls, prune_trace_note) =
2475 prune_architecture_trace_batch(calls, architecture_overview_mode);
2476 if let Some(note) = prune_trace_note {
2477 let _ = tx.send(InferenceEvent::Thought(note)).await;
2478 }
2479
2480 let (calls, prune_bloat_note) = prune_read_only_context_bloat_batch(
2481 calls,
2482 self.workflow_mode.is_read_only(),
2483 architecture_overview_mode,
2484 );
2485 if let Some(note) = prune_bloat_note {
2486 let _ = tx.send(InferenceEvent::Thought(note)).await;
2487 }
2488
2489 let (calls, prune_note) = prune_authoritative_tool_batch(
2490 calls,
2491 grounded_trace_mode,
2492 &effective_user_input,
2493 );
2494 if let Some(note) = prune_note {
2495 let _ = tx.send(InferenceEvent::Thought(note)).await;
2496 }
2497
2498 let (calls, batch_note) = order_batch_reads_first(calls);
2499 if let Some(note) = batch_note {
2500 let _ = tx.send(InferenceEvent::Thought(note)).await;
2501 }
2502
2503 if let Some(repeated_path) = calls
2504 .iter()
2505 .filter(|c| {
2506 let parsed = serde_json::from_str::<Value>(
2507 &crate::agent::inference::normalize_tool_argument_string(
2508 &c.function.name,
2509 &c.function.arguments,
2510 ),
2511 )
2512 .ok();
2513 let offset = parsed
2514 .as_ref()
2515 .and_then(|args| args.get("offset").and_then(|v| v.as_u64()))
2516 .unwrap_or(0);
2517 if offset < 200 {
2520 return true;
2521 }
2522 if let Some(path) = parsed
2523 .as_ref()
2524 .and_then(|args| args.get("path").and_then(|v| v.as_str()))
2525 {
2526 let normalized = normalize_workspace_path(path);
2527 return successful_read_regions.contains(&(normalized, offset));
2528 }
2529 false
2530 })
2531 .filter_map(|c| repeated_read_target(&c.function))
2532 .find(|path| successful_read_targets.contains(path))
2533 {
2534 loop_intervention = Some(format!(
2535 "STOP. Already read `{}` this turn. Use `inspect_lines` on the relevant window or a specific `grep_files`, then continue.",
2536 repeated_path
2537 ));
2538 let _ = tx
2539 .send(InferenceEvent::Thought(
2540 "Read discipline: preventing repeated full-file reads on the same path."
2541 .into(),
2542 ))
2543 .await;
2544 continue;
2545 }
2546
2547 if capability_mode
2548 && !capability_needs_repo
2549 && calls
2550 .iter()
2551 .all(|c| is_capability_probe_tool(&c.function.name))
2552 {
2553 loop_intervention = Some(
2554 "STOP. This is a stable capability question. Do not inspect the repository or call tools. \
2555 Answer directly from verified Hematite capabilities, current runtime state, and the documented product boundary. \
2556 Do not mention raw `mcp__*` names unless they are active and directly relevant."
2557 .to_string(),
2558 );
2559 let _ = tx
2560 .send(InferenceEvent::Thought(
2561 "Capability mode: skipping unnecessary repo-inspection tools and answering directly."
2562 .into(),
2563 ))
2564 .await;
2565 continue;
2566 }
2567
2568 let raw_content = text.as_deref().unwrap_or(" ");
2571
2572 if let Some(thought) = crate::agent::inference::extract_think_block(raw_content) {
2573 let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
2574 self.reasoning_history = Some(thought);
2576 }
2577
2578 let stored_tool_call_content = if implement_current_plan {
2581 cap_output(raw_content, 1200)
2582 } else {
2583 raw_content.to_string()
2584 };
2585 self.history.push(ChatMessage::assistant_tool_calls(
2586 &stored_tool_call_content,
2587 calls.clone(),
2588 ));
2589
2590 let mut results = Vec::new();
2592 let gemma4_model =
2593 crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
2594 let latest_user_prompt = self.latest_user_prompt();
2595 let mut seen_call_keys = std::collections::HashSet::new();
2596 let mut deduped_calls = Vec::new();
2597 for call in calls.clone() {
2598 let (normalized_name, normalized_args) = normalized_tool_call_for_execution(
2599 &call.function.name,
2600 &call.function.arguments,
2601 gemma4_model,
2602 latest_user_prompt,
2603 );
2604 let key = canonical_tool_call_key(&normalized_name, &normalized_args);
2605 if seen_call_keys.insert(key) {
2606 let repeat_guard_exempt = matches!(
2607 normalized_name.as_str(),
2608 "verify_build" | "git_commit" | "git_push"
2609 );
2610 if !repeat_guard_exempt {
2611 if let Some(cached) = completed_tool_cache
2612 .get(&canonical_tool_call_key(&normalized_name, &normalized_args))
2613 {
2614 let _ = tx
2615 .send(InferenceEvent::Thought(
2616 "Cached tool result reused: identical built-in invocation already completed earlier in this turn."
2617 .to_string(),
2618 ))
2619 .await;
2620 loop_intervention = Some(format!(
2621 "STOP. You already called `{}` with identical arguments earlier in this turn and already have that result in conversation history. Do not call it again. Use the existing result to answer or choose a different next step.",
2622 cached.tool_name
2623 ));
2624 continue;
2625 }
2626 }
2627 deduped_calls.push(call);
2628 } else {
2629 let _ = tx
2630 .send(InferenceEvent::Thought(
2631 "Duplicate tool call skipped: identical built-in invocation already ran this turn."
2632 .to_string(),
2633 ))
2634 .await;
2635 }
2636 }
2637
2638 let (parallel_calls, serial_calls): (Vec<_>, Vec<_>) = deduped_calls
2640 .into_iter()
2641 .partition(|c| is_parallel_safe(&c.function.name));
2642
2643 if !parallel_calls.is_empty() {
2645 let mut tasks = Vec::new();
2646 for call in parallel_calls {
2647 let tx_clone = tx.clone();
2648 let config_clone = config.clone();
2649 let call_with_id = call.clone();
2651 tasks.push(self.process_tool_call(
2652 call_with_id.function,
2653 config_clone,
2654 yolo,
2655 tx_clone,
2656 call_with_id.id,
2657 ));
2658 }
2659 results.extend(futures::future::join_all(tasks).await);
2661 }
2662
2663 for call in serial_calls {
2665 results.push(
2666 self.process_tool_call(
2667 call.function,
2668 config.clone(),
2669 yolo,
2670 tx.clone(),
2671 call.id,
2672 )
2673 .await,
2674 );
2675 }
2676
2677 let mut authoritative_tool_output: Option<String> = None;
2679 let mut blocked_policy_output: Option<String> = None;
2680 let mut recoverable_policy_intervention: Option<String> = None;
2681 let mut recoverable_policy_recipe: Option<RecoveryScenario> = None;
2682 let mut recoverable_policy_checkpoint: Option<(OperatorCheckpointState, String)> =
2683 None;
2684 for res in results {
2685 let call_id = res.call_id.clone();
2686 let tool_name = res.tool_name.clone();
2687 let final_output = res.output.clone();
2688 let is_error = res.is_error;
2689 for msg in res.msg_results {
2690 self.history.push(msg);
2691 }
2692
2693 if matches!(
2695 tool_name.as_str(),
2696 "patch_hunk" | "write_file" | "edit_file" | "multi_search_replace"
2697 ) {
2698 mutation_occurred = true;
2699 implementation_started = true;
2700 if !is_error {
2702 let path = res.args.get("path").and_then(|v| v.as_str()).unwrap_or("");
2703 if !path.is_empty() {
2704 self.vein.bump_heat(path);
2705 self.l1_context = self.vein.l1_context();
2706 }
2707 }
2708 }
2709
2710 if tool_name == "verify_build" {
2711 self.record_session_verification(
2712 !is_error
2713 && (final_output.contains("BUILD OK")
2714 || final_output.contains("BUILD SUCCESS")
2715 || final_output.contains("BUILD OKAY")),
2716 if is_error {
2717 "Explicit verify_build failed."
2718 } else {
2719 "Explicit verify_build passed."
2720 },
2721 );
2722 }
2723
2724 let call_key = format!(
2726 "{}:{}",
2727 tool_name,
2728 serde_json::to_string(&res.args).unwrap_or_default()
2729 );
2730 let repeat_count = repeat_counts.entry(call_key.clone()).or_insert(0);
2731 *repeat_count += 1;
2732
2733 let repeat_guard_exempt = matches!(
2735 tool_name.as_str(),
2736 "verify_build" | "git_commit" | "git_push"
2737 );
2738 if *repeat_count >= 3 && !repeat_guard_exempt {
2739 loop_intervention = Some(format!(
2740 "STOP. You have called `{}` with identical arguments {} times and keep getting the same result. \
2741 Do not call it again. Either answer directly from what you already know, \
2742 use a different tool or approach, or ask the user for clarification.",
2743 tool_name, *repeat_count
2744 ));
2745 let _ = tx
2746 .send(InferenceEvent::Thought(format!(
2747 "Repeat guard: `{}` called {} times with same args — injecting stop intervention.",
2748 tool_name, *repeat_count
2749 )))
2750 .await;
2751 }
2752
2753 if is_error {
2754 consecutive_errors += 1;
2755 } else {
2756 consecutive_errors = 0;
2757 }
2758
2759 if consecutive_errors >= 3 {
2760 loop_intervention = Some(
2761 "CRITICAL: Repeated tool failures detected. You are likely stuck in a loop. \
2762 STOP all tool calls immediately. Analyze why your previous 3 calls failed \
2763 (check for hallucinations or invalid arguments) and ask the user for \
2764 clarification if you cannot proceed.".to_string()
2765 );
2766 }
2767
2768 if consecutive_errors >= 4 {
2769 self.emit_runtime_failure(
2770 &tx,
2771 RuntimeFailureClass::ToolLoop,
2772 "Hard termination: too many consecutive tool errors.",
2773 )
2774 .await;
2775 return Ok(());
2776 }
2777
2778 let _ = tx
2779 .send(InferenceEvent::ToolCallResult {
2780 id: call_id.clone(),
2781 name: tool_name.clone(),
2782 output: final_output.clone(),
2783 is_error,
2784 })
2785 .await;
2786
2787 let repeat_guard_exempt = matches!(
2788 tool_name.as_str(),
2789 "verify_build" | "git_commit" | "git_push"
2790 );
2791 if !repeat_guard_exempt {
2792 completed_tool_cache.insert(
2793 canonical_tool_call_key(&tool_name, &res.args),
2794 CachedToolResult {
2795 tool_name: tool_name.clone(),
2796 },
2797 );
2798 }
2799
2800 let compact_ctx = crate::agent::inference::is_compact_context_window_pub(
2802 self.engine.current_context_length(),
2803 );
2804 let capped = if implement_current_plan {
2805 cap_output(&final_output, 1200)
2806 } else if tool_name == "map_project"
2807 && self.workflow_mode == WorkflowMode::Architect
2808 {
2809 cap_output(&final_output, 2500)
2810 } else if tool_name == "map_project" {
2811 cap_output(&final_output, 3500)
2812 } else if compact_ctx
2813 && (tool_name == "read_file" || tool_name == "inspect_lines")
2814 {
2815 let limit = 3000usize;
2817 if final_output.len() > limit {
2818 let total_lines = final_output.lines().count();
2819 let mut split_at = limit;
2820 while !final_output.is_char_boundary(split_at) && split_at > 0 {
2821 split_at -= 1;
2822 }
2823 format!(
2824 "{}\n... [file truncated — {} total lines. Use `inspect_lines` with start_line near {} to reach the end of the file.]",
2825 &final_output[..split_at],
2826 total_lines,
2827 total_lines.saturating_sub(150),
2828 )
2829 } else {
2830 final_output.clone()
2831 }
2832 } else {
2833 cap_output(&final_output, 8000)
2834 };
2835 self.history.push(ChatMessage::tool_result_for_model(
2836 &call_id,
2837 &tool_name,
2838 &capped,
2839 &self.engine.current_model(),
2840 ));
2841
2842 if architecture_overview_mode && !is_error && tool_name == "trace_runtime_flow"
2843 {
2844 overview_runtime_trace =
2845 Some(summarize_runtime_trace_output(&final_output));
2846 } else if architecture_overview_mode && !is_error && tool_name == "map_project"
2847 {
2848 overview_project_map = Some(summarize_project_map_output(&final_output));
2849 }
2850
2851 if !architecture_overview_mode
2852 && !is_error
2853 && ((grounded_trace_mode && tool_name == "trace_runtime_flow")
2854 || (toolchain_mode && tool_name == "describe_toolchain"))
2855 {
2856 authoritative_tool_output = Some(final_output.clone());
2857 } else if !architecture_overview_mode
2858 && !is_error
2859 && tool_name == "map_project"
2860 && project_map_mode
2861 && authoritative_tool_output.is_none()
2862 {
2863 authoritative_tool_output =
2864 Some(summarize_project_map_output(&final_output));
2865 }
2866
2867 if !is_error && tool_name == "read_file" {
2868 if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
2869 let normalized = normalize_workspace_path(path);
2870 let read_offset =
2871 res.args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
2872 successful_read_targets.insert(normalized.clone());
2873 successful_read_regions.insert((normalized.clone(), read_offset));
2874 }
2875 }
2876
2877 if !is_error && tool_name == "grep_files" {
2878 if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
2879 let normalized = normalize_workspace_path(path);
2880 if final_output.starts_with("No matches for ") {
2881 no_match_grep_targets.insert(normalized);
2882 } else if grep_output_is_high_fanout(&final_output) {
2883 broad_grep_targets.insert(normalized);
2884 } else {
2885 successful_grep_targets.insert(normalized);
2886 }
2887 }
2888 }
2889
2890 if is_error
2891 && matches!(tool_name.as_str(), "edit_file" | "multi_search_replace")
2892 && (final_output.contains("search string not found")
2893 || final_output.contains("search string is too short")
2894 || final_output.contains("search string matched"))
2895 {
2896 if let Some(target) = action_target_path(&tool_name, &res.args) {
2897 let guidance = if final_output.contains("matched") {
2898 format!(
2899 "STOP. `{}` on `{}` — search string matched multiple times. Use `inspect_lines` on the exact region to get a unique anchor, then retry.",
2900 tool_name, target
2901 )
2902 } else {
2903 format!(
2904 "STOP. `{}` on `{}` — search string did not match. Use `inspect_lines` on the target region to get the exact current text (check whitespace and indentation), then retry.",
2905 tool_name, target
2906 )
2907 };
2908 loop_intervention = Some(guidance);
2909 *repeat_count = 0;
2910 }
2911 }
2912
2913 if res.blocked_by_policy
2914 && is_mcp_workspace_read_tool(&tool_name)
2915 && recoverable_policy_intervention.is_none()
2916 {
2917 recoverable_policy_intervention = Some(
2918 "STOP. MCP filesystem reads are blocked. Use `read_file` or `inspect_lines` instead.".to_string(),
2919 );
2920 recoverable_policy_recipe = Some(RecoveryScenario::McpWorkspaceReadBlocked);
2921 recoverable_policy_checkpoint = Some((
2922 OperatorCheckpointState::BlockedPolicy,
2923 "MCP workspace read blocked; rerouting to built-in file tools."
2924 .to_string(),
2925 ));
2926 } else if res.blocked_by_policy
2927 && implement_current_plan
2928 && tool_name == "map_project"
2929 && recoverable_policy_intervention.is_none()
2930 {
2931 recoverable_policy_intervention = Some(
2932 "STOP. `map_project` is blocked during plan execution. Read your planned target files directly, then edit.".to_string(),
2933 );
2934 recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
2935 recoverable_policy_checkpoint = Some((
2936 OperatorCheckpointState::BlockedPolicy,
2937 "`map_project` blocked for current-plan execution.".to_string(),
2938 ));
2939 } else if res.blocked_by_policy
2940 && implement_current_plan
2941 && is_current_plan_irrelevant_tool(&tool_name)
2942 && recoverable_policy_intervention.is_none()
2943 {
2944 recoverable_policy_intervention = Some(format!(
2945 "STOP. `{}` is not a planned target. Use `inspect_lines` on a planned file, then edit.",
2946 tool_name
2947 ));
2948 recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
2949 recoverable_policy_checkpoint = Some((
2950 OperatorCheckpointState::BlockedPolicy,
2951 format!(
2952 "Current-plan execution blocked unrelated tool `{}`.",
2953 tool_name
2954 ),
2955 ));
2956 } else if res.blocked_by_policy
2957 && implement_current_plan
2958 && final_output.contains("requires recent file evidence")
2959 && recoverable_policy_intervention.is_none()
2960 {
2961 let target = action_target_path(&tool_name, &res.args)
2962 .unwrap_or_else(|| "the target file".to_string());
2963 recoverable_policy_intervention = Some(format!(
2964 "STOP. Edit blocked — `{target}` has no recent read. Use `inspect_lines` or `read_file` on it first, then retry."
2965 ));
2966 recoverable_policy_recipe =
2967 Some(RecoveryScenario::RecentFileEvidenceMissing);
2968 recoverable_policy_checkpoint = Some((
2969 OperatorCheckpointState::BlockedRecentFileEvidence,
2970 format!("Edit blocked on `{target}`; recent file evidence missing."),
2971 ));
2972 } else if res.blocked_by_policy
2973 && implement_current_plan
2974 && final_output.contains("requires an exact local line window first")
2975 && recoverable_policy_intervention.is_none()
2976 {
2977 let target = action_target_path(&tool_name, &res.args)
2978 .unwrap_or_else(|| "the target file".to_string());
2979 recoverable_policy_intervention = Some(format!(
2980 "STOP. Edit blocked — `{target}` needs an inspected window. Use `inspect_lines` around the edit region, then retry."
2981 ));
2982 recoverable_policy_recipe = Some(RecoveryScenario::ExactLineWindowRequired);
2983 recoverable_policy_checkpoint = Some((
2984 OperatorCheckpointState::BlockedExactLineWindow,
2985 format!("Edit blocked on `{target}`; exact line window required."),
2986 ));
2987 } else if res.blocked_by_policy && blocked_policy_output.is_none() {
2988 blocked_policy_output = Some(final_output.clone());
2989 }
2990
2991 if *repeat_count >= 5 {
2992 let _ = tx.send(InferenceEvent::Done).await;
2993 return Ok(());
2994 }
2995
2996 if implement_current_plan
2997 && !implementation_started
2998 && !is_error
2999 && is_non_mutating_plan_step_tool(&tool_name)
3000 {
3001 non_mutating_plan_steps += 1;
3002 }
3003 }
3004
3005 if let Some(intervention) = recoverable_policy_intervention {
3006 if let Some((state, summary)) = recoverable_policy_checkpoint.take() {
3007 self.emit_operator_checkpoint(&tx, state, summary).await;
3008 }
3009 if let Some(scenario) = recoverable_policy_recipe.take() {
3010 let recipe = plan_recovery(scenario, &self.recovery_context);
3011 self.emit_recovery_recipe_summary(
3012 &tx,
3013 recipe.recipe.scenario.label(),
3014 compact_recovery_plan_summary(&recipe),
3015 )
3016 .await;
3017 }
3018 loop_intervention = Some(intervention);
3019 let _ = tx
3020 .send(InferenceEvent::Thought(
3021 "Policy recovery: rerouting blocked MCP filesystem inspection to built-in workspace tools."
3022 .into(),
3023 ))
3024 .await;
3025 continue;
3026 }
3027
3028 if architecture_overview_mode {
3029 match (
3030 overview_project_map.as_deref(),
3031 overview_runtime_trace.as_deref(),
3032 ) {
3033 (Some(project_map), Some(runtime_trace)) => {
3034 let response =
3035 build_architecture_overview_answer(project_map, runtime_trace);
3036 self.history.push(ChatMessage::assistant_text(&response));
3037 self.transcript.log_agent(&response);
3038
3039 for chunk in chunk_text(&response, 8) {
3040 if !chunk.is_empty() {
3041 let _ = tx.send(InferenceEvent::Token(chunk)).await;
3042 }
3043 }
3044
3045 let _ = tx.send(InferenceEvent::Done).await;
3046 break;
3047 }
3048 (Some(_), None) => {
3049 loop_intervention = Some(
3050 "Good. You now have the grounded repository structure. Next, call `trace_runtime_flow` for the runtime/control-flow half of the architecture overview. Prefer topic `user_turn` for the main execution path, or `runtime_subsystems` if that is more direct. Do not call `read_file`, `auto_pin_context`, or LSP tools here."
3051 .to_string(),
3052 );
3053 continue;
3054 }
3055 (None, Some(_)) => {
3056 loop_intervention = Some(
3057 "Good. You now have the grounded runtime/control-flow trace. Next, call `map_project` once to capture entrypoints and core owner files. Keep it compact and do not call broad file-read tools in this architecture overview."
3058 .to_string(),
3059 );
3060 continue;
3061 }
3062 (None, None) => {}
3063 }
3064 }
3065
3066 if implement_current_plan
3067 && !implementation_started
3068 && non_mutating_plan_steps >= non_mutating_plan_hard_cap
3069 {
3070 let msg = "Current-plan execution stalled: too many non-mutating inspection steps without a concrete edit. Stay on the saved target files, narrow with `inspect_lines`, and then mutate, or ask one specific blocking question instead of continuing broad exploration.".to_string();
3071 self.history.push(ChatMessage::assistant_text(&msg));
3072 self.transcript.log_agent(&msg);
3073
3074 for chunk in chunk_text(&msg, 8) {
3075 if !chunk.is_empty() {
3076 let _ = tx.send(InferenceEvent::Token(chunk)).await;
3077 }
3078 }
3079
3080 let _ = tx.send(InferenceEvent::Done).await;
3081 break;
3082 }
3083
3084 if let Some(blocked_output) = blocked_policy_output {
3085 self.emit_operator_checkpoint(
3086 &tx,
3087 OperatorCheckpointState::BlockedPolicy,
3088 "A blocked tool path was surfaced directly to the operator.",
3089 )
3090 .await;
3091 self.history
3092 .push(ChatMessage::assistant_text(&blocked_output));
3093 self.transcript.log_agent(&blocked_output);
3094
3095 for chunk in chunk_text(&blocked_output, 8) {
3096 if !chunk.is_empty() {
3097 let _ = tx.send(InferenceEvent::Token(chunk)).await;
3098 }
3099 }
3100
3101 let _ = tx.send(InferenceEvent::Done).await;
3102 break;
3103 }
3104
3105 if let Some(tool_output) = authoritative_tool_output {
3106 self.history.push(ChatMessage::assistant_text(&tool_output));
3107 self.transcript.log_agent(&tool_output);
3108
3109 for chunk in chunk_text(&tool_output, 8) {
3110 if !chunk.is_empty() {
3111 let _ = tx.send(InferenceEvent::Token(chunk)).await;
3112 }
3113 }
3114
3115 let _ = tx.send(InferenceEvent::Done).await;
3116 break;
3117 }
3118
3119 if implement_current_plan && !implementation_started {
3120 let base = "STOP analyzing. The current plan already defines the task. Use the built-in file evidence you now have and begin implementing the plan in the target files. Do not output preliminary findings or restate contracts.";
3121 if non_mutating_plan_steps >= non_mutating_plan_soft_cap {
3122 loop_intervention = Some(format!(
3123 "{} You are close to the non-mutation cap. Use `inspect_lines` on one saved target file, then make the edit now.",
3124 base
3125 ));
3126 } else {
3127 loop_intervention = Some(base.to_string());
3128 }
3129 } else if self.workflow_mode == WorkflowMode::Architect {
3130 loop_intervention = Some(
3131 format!(
3132 "STOP exploring. You have enough evidence for a plan-first answer.\n{}\nUse the tool results already in history. Do not narrate your process. Do not call more tools unless a missing file path makes the handoff impossible.",
3133 architect_handoff_contract()
3134 ),
3135 );
3136 }
3137
3138 if mutation_occurred && !yolo {
3140 let _ = tx
3141 .send(InferenceEvent::Thought(
3142 "Self-Verification: Running 'cargo check' to ensure build integrity..."
3143 .into(),
3144 ))
3145 .await;
3146 let verify_res = self.auto_verify_build().await;
3147 let verify_ok = verify_res.contains("BUILD SUCCESS");
3148 self.record_verify_build_result(verify_ok, &verify_res)
3149 .await;
3150 self.record_session_verification(
3151 verify_ok,
3152 if verify_ok {
3153 "Automatic build verification passed."
3154 } else {
3155 "Automatic build verification failed."
3156 },
3157 );
3158 self.history.push(ChatMessage::system(&format!(
3159 "\n# SYSTEM VERIFICATION\n{verify_res}"
3160 )));
3161 let _ = tx
3162 .send(InferenceEvent::Thought(
3163 "Verification turn injected into history.".into(),
3164 ))
3165 .await;
3166 }
3167
3168 continue;
3170 } else if let Some(response_text) = text {
3171 if finish_reason.as_deref() == Some("length") && near_context_ceiling {
3172 if intent.direct_answer == Some(DirectAnswerKind::SessionResetSemantics) {
3173 let cleaned = build_session_reset_semantics_answer();
3174 self.history.push(ChatMessage::assistant_text(&cleaned));
3175 self.transcript.log_agent(&cleaned);
3176 for chunk in chunk_text(&cleaned, 8) {
3177 if !chunk.is_empty() {
3178 let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3179 }
3180 }
3181 let _ = tx.send(InferenceEvent::Done).await;
3182 break;
3183 }
3184
3185 let warning = format_runtime_failure(
3186 RuntimeFailureClass::ContextWindow,
3187 "Context ceiling reached before the model completed the answer. Hematite trimmed what it could, but this turn still ran out of room. Retry with a narrower inspection step like `grep_files` or `inspect_lines`, or ask for a smaller scoped answer.",
3188 );
3189 self.history.push(ChatMessage::assistant_text(&warning));
3190 self.transcript.log_agent(&warning);
3191 let _ = tx
3192 .send(InferenceEvent::Thought(
3193 "Length recovery: model hit the context ceiling before completing the answer."
3194 .into(),
3195 ))
3196 .await;
3197 for chunk in chunk_text(&warning, 8) {
3198 if !chunk.is_empty() {
3199 let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3200 }
3201 }
3202 let _ = tx.send(InferenceEvent::Done).await;
3203 break;
3204 }
3205
3206 if response_text.contains("<|tool_call")
3207 || response_text.contains("[END_TOOL_REQUEST]")
3208 || response_text.contains("<|tool_response")
3209 || response_text.contains("<tool_response|>")
3210 {
3211 loop_intervention = Some(
3212 "Your previous response leaked raw native tool transcript markup instead of a valid tool invocation or final answer. Retry immediately. If you need a tool, emit a valid tool call only. If you do not need a tool, answer in plain text with no `<|tool_call>`, `<|tool_response>`, or `[END_TOOL_REQUEST]` markup.".to_string(),
3213 );
3214 continue;
3215 }
3216
3217 if let Some(thought) = crate::agent::inference::extract_think_block(&response_text)
3219 {
3220 let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
3221 self.reasoning_history = Some(thought);
3224 }
3225
3226 let cleaned = crate::agent::inference::strip_think_blocks(&response_text);
3228
3229 if implement_current_plan && !implementation_started {
3230 loop_intervention = Some(
3231 "Do not stop at analysis. Implement the current saved plan now using built-in workspace tools and the target files already named in the plan. Only answer without edits if you have a concrete blocking question.".to_string(),
3232 );
3233 continue;
3234 }
3235
3236 if cleaned.is_empty() {
3239 let _ = tx.send(InferenceEvent::Done).await;
3240 break;
3241 }
3242
3243 self.persist_architect_handoff(&cleaned);
3244 self.history.push(ChatMessage::assistant_text(&cleaned));
3245 self.transcript.log_agent(&cleaned);
3246
3247 for chunk in chunk_text(&cleaned, 8) {
3249 if !chunk.is_empty() {
3250 let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
3251 }
3252 }
3253
3254 let _ = tx.send(InferenceEvent::Done).await;
3255 break;
3256 } else {
3257 let detail = "Model returned an empty response.";
3258 let class = classify_runtime_failure(detail);
3259 if should_retry_runtime_failure(class) {
3260 if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3261 if let RecoveryDecision::Attempt(plan) =
3262 attempt_recovery(scenario, &mut self.recovery_context)
3263 {
3264 self.transcript.log_system(
3265 "Automatic provider recovery triggered: model returned an empty response.",
3266 );
3267 self.emit_recovery_recipe_summary(
3268 &tx,
3269 plan.recipe.scenario.label(),
3270 compact_recovery_plan_summary(&plan),
3271 )
3272 .await;
3273 let _ = tx
3274 .send(InferenceEvent::ProviderStatus {
3275 state: ProviderRuntimeState::Recovering,
3276 summary: compact_runtime_recovery_summary(class).into(),
3277 })
3278 .await;
3279 self.emit_operator_checkpoint(
3280 &tx,
3281 OperatorCheckpointState::RecoveringProvider,
3282 compact_runtime_recovery_summary(class),
3283 )
3284 .await;
3285 continue;
3286 }
3287 }
3288 }
3289
3290 self.emit_runtime_failure(&tx, class, detail).await;
3291 break;
3292 }
3293 }
3294
3295 self.trim_history(80);
3296 self.refresh_session_memory();
3297 self.save_session();
3298 self.emit_compaction_pressure(&tx).await;
3299 Ok(())
3300 }
3301
3302 async fn emit_runtime_failure(
3303 &mut self,
3304 tx: &mpsc::Sender<InferenceEvent>,
3305 class: RuntimeFailureClass,
3306 detail: &str,
3307 ) {
3308 if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
3309 let decision = preview_recovery_decision(scenario, &self.recovery_context);
3310 self.emit_recovery_recipe_summary(
3311 tx,
3312 scenario.label(),
3313 compact_recovery_decision_summary(&decision),
3314 )
3315 .await;
3316 let needs_refresh = match &decision {
3317 RecoveryDecision::Attempt(plan) => plan
3318 .recipe
3319 .steps
3320 .contains(&RecoveryStep::RefreshRuntimeProfile),
3321 RecoveryDecision::Escalate { recipe, .. } => {
3322 recipe.steps.contains(&RecoveryStep::RefreshRuntimeProfile)
3323 }
3324 };
3325 if needs_refresh {
3326 if let Some((model_id, context_length, changed)) = self
3327 .refresh_runtime_profile_and_report(tx, "context_window_failure")
3328 .await
3329 {
3330 let note = if changed {
3331 format!(
3332 "Runtime refresh after context-window failure: model {} | CTX {}",
3333 model_id, context_length
3334 )
3335 } else {
3336 format!(
3337 "Runtime refresh after context-window failure confirms model {} | CTX {}",
3338 model_id, context_length
3339 )
3340 };
3341 let _ = tx.send(InferenceEvent::Thought(note)).await;
3342 }
3343 }
3344 }
3345 if let Some(state) = provider_state_for_runtime_failure(class) {
3346 let _ = tx
3347 .send(InferenceEvent::ProviderStatus {
3348 state,
3349 summary: compact_runtime_failure_summary(class).into(),
3350 })
3351 .await;
3352 }
3353 if let Some(state) = checkpoint_state_for_runtime_failure(class) {
3354 self.emit_operator_checkpoint(tx, state, checkpoint_summary_for_runtime_failure(class))
3355 .await;
3356 }
3357 let formatted = format_runtime_failure(class, detail);
3358 self.history.push(ChatMessage::system(&format!(
3359 "# RUNTIME FAILURE\n{}",
3360 formatted
3361 )));
3362 self.transcript.log_system(&formatted);
3363 let _ = tx.send(InferenceEvent::Error(formatted)).await;
3364 let _ = tx.send(InferenceEvent::Done).await;
3365 }
3366
3367 async fn auto_verify_build(&self) -> String {
3369 match crate::tools::verify_build::execute(&serde_json::json!({ "action": "build" })).await {
3370 Ok(out) => {
3371 "BUILD SUCCESS: Your changes are architecturally sound.\n\n".to_string()
3372 + &cap_output(&out, 2000)
3373 }
3374 Err(e) => format!(
3375 "BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:\n\n{}",
3376 cap_output(&e, 2000)
3377 ),
3378 }
3379 }
3380
3381 async fn compact_history_if_needed(
3385 &mut self,
3386 tx: &mpsc::Sender<InferenceEvent>,
3387 anchor_index: Option<usize>,
3388 ) -> Result<bool, String> {
3389 let vram_ratio = self.gpu_state.ratio();
3390 let context_length = self.engine.current_context_length();
3391 let config = CompactionConfig::adaptive(context_length, vram_ratio);
3392
3393 if !compaction::should_compact(&self.history, context_length, vram_ratio) {
3394 return Ok(false);
3395 }
3396
3397 let _ = tx
3398 .send(InferenceEvent::Thought(format!(
3399 "Compaction: ctx={}k vram={:.0}% threshold={}k tokens — chaining summary...",
3400 context_length / 1000,
3401 vram_ratio * 100.0,
3402 config.max_estimated_tokens / 1000,
3403 )))
3404 .await;
3405
3406 let result = compaction::compact_history(
3407 &self.history,
3408 self.running_summary.as_deref(),
3409 config,
3410 anchor_index,
3411 );
3412
3413 let removed_message_count = self.history.len().saturating_sub(result.messages.len());
3414 self.history = result.messages;
3415 self.running_summary = result.summary;
3416
3417 let previous_memory = self.session_memory.clone();
3419 self.session_memory = compaction::extract_memory(&self.history);
3420 self.session_memory
3421 .inherit_runtime_ledger_from(&previous_memory);
3422 self.session_memory.record_compaction(
3423 removed_message_count,
3424 format!(
3425 "Compacted history around active task '{}' and preserved {} working-set file(s).",
3426 self.session_memory.current_task,
3427 self.session_memory.working_set.len()
3428 ),
3429 );
3430 self.emit_compaction_pressure(tx).await;
3431
3432 let first_non_sys = self
3435 .history
3436 .iter()
3437 .position(|m| m.role != "system")
3438 .unwrap_or(self.history.len());
3439 if first_non_sys < self.history.len() {
3440 if let Some(user_offset) = self.history[first_non_sys..]
3441 .iter()
3442 .position(|m| m.role == "user")
3443 {
3444 if user_offset > 0 {
3445 self.history
3446 .drain(first_non_sys..first_non_sys + user_offset);
3447 }
3448 }
3449 }
3450
3451 let _ = tx
3452 .send(InferenceEvent::Thought(format!(
3453 "Memory Synthesis: Extracted context for task: '{}'. Working set: {} files.",
3454 self.session_memory.current_task,
3455 self.session_memory.working_set.len()
3456 )))
3457 .await;
3458 let recipe = plan_recovery(RecoveryScenario::HistoryPressure, &self.recovery_context);
3459 self.emit_recovery_recipe_summary(
3460 tx,
3461 recipe.recipe.scenario.label(),
3462 compact_recovery_plan_summary(&recipe),
3463 )
3464 .await;
3465 self.emit_operator_checkpoint(
3466 tx,
3467 OperatorCheckpointState::HistoryCompacted,
3468 format!(
3469 "History compacted into a recursive summary; active task '{}' with {} working-set file(s) carried forward.",
3470 self.session_memory.current_task,
3471 self.session_memory.working_set.len()
3472 ),
3473 )
3474 .await;
3475
3476 Ok(true)
3477 }
3478
3479 fn build_vein_context(&self, query: &str) -> Option<(String, Vec<String>)> {
3483 if query.trim().split_whitespace().count() < 3 {
3485 return None;
3486 }
3487
3488 let results = tokio::task::block_in_place(|| self.vein.search_context(query, 4)).ok()?;
3489 if results.is_empty() {
3490 return None;
3491 }
3492
3493 let semantic_active = self.vein.has_any_embeddings();
3494 let header = if semantic_active {
3495 "# Relevant context from The Vein (hybrid BM25 + semantic retrieval)\n\
3496 Use this to answer without needing extra read_file calls where possible.\n\n"
3497 } else {
3498 "# Relevant context from The Vein (BM25 keyword retrieval)\n\
3499 Use this to answer without needing extra read_file calls where possible.\n\n"
3500 };
3501
3502 let mut ctx = String::from(header);
3503 let mut paths: Vec<String> = Vec::new();
3504
3505 let mut total = 0usize;
3506 const MAX_CTX_CHARS: usize = 1_500;
3507
3508 for r in results {
3509 if total >= MAX_CTX_CHARS {
3510 break;
3511 }
3512 let snippet = if r.content.len() > 500 {
3513 format!("{}...", &r.content[..500])
3514 } else {
3515 r.content.clone()
3516 };
3517 ctx.push_str(&format!("--- {} ---\n{}\n\n", r.path, snippet));
3518 total += snippet.len() + r.path.len() + 10;
3519 if !paths.contains(&r.path) {
3520 paths.push(r.path);
3521 }
3522 }
3523
3524 Some((ctx, paths))
3525 }
3526
3527 fn context_window_slice(&self) -> Vec<ChatMessage> {
3530 let mut result = Vec::new();
3531
3532 if self.history.len() > 1 {
3534 for m in &self.history[1..] {
3535 if m.role == "system" {
3536 continue;
3537 }
3538
3539 let mut sanitized = m.clone();
3540 if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3542 sanitized.content = MessageContent::Text(" ".into());
3543 }
3544 result.push(sanitized);
3545 }
3546 }
3547
3548 if !result.is_empty() && result[0].role != "user" {
3551 result.insert(0, ChatMessage::user("Continuing previous context..."));
3552 }
3553
3554 result
3555 }
3556
3557 fn context_window_slice_from(&self, start_idx: usize) -> Vec<ChatMessage> {
3558 let mut result = Vec::new();
3559
3560 if self.history.len() > 1 {
3561 let start = start_idx.max(1).min(self.history.len());
3562 for m in &self.history[start..] {
3563 if m.role == "system" {
3564 continue;
3565 }
3566
3567 let mut sanitized = m.clone();
3568 if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
3569 sanitized.content = MessageContent::Text(" ".into());
3570 }
3571 result.push(sanitized);
3572 }
3573 }
3574
3575 if !result.is_empty() && result[0].role != "user" {
3576 result.insert(0, ChatMessage::user("Continuing current plan execution..."));
3577 }
3578
3579 result
3580 }
3581
3582 fn trim_history(&mut self, max_messages: usize) {
3584 if self.history.len() <= max_messages {
3585 return;
3586 }
3587 let excess = self.history.len() - max_messages;
3589 self.history.drain(1..=excess);
3590 }
3591
3592 async fn repair_tool_args(
3594 &self,
3595 tool_name: &str,
3596 bad_json: &str,
3597 tx: &mpsc::Sender<InferenceEvent>,
3598 ) -> Result<Value, String> {
3599 let _ = tx
3600 .send(InferenceEvent::Thought(format!(
3601 "Attempting to repair malformed JSON for '{}'...",
3602 tool_name
3603 )))
3604 .await;
3605
3606 let prompt = format!(
3607 "The following JSON for tool '{}' is malformed and failed to parse:\n\n```json\n{}\n```\n\nOutput ONLY the corrected JSON string that fixes the syntax error (e.g. missing commas, unescaped quotes). Do NOT include markdown blocks or any other text.",
3608 tool_name, bad_json
3609 );
3610
3611 let messages = vec![
3612 ChatMessage::system("You are a JSON repair tool. Output ONLY pure JSON."),
3613 ChatMessage::user(&prompt),
3614 ];
3615
3616 let (text, _, _, _) = self
3618 .engine
3619 .call_with_tools(&messages, &[], self.fast_model.as_deref())
3620 .await
3621 .map_err(|e| e.to_string())?;
3622
3623 let cleaned = text
3624 .unwrap_or_default()
3625 .trim()
3626 .trim_start_matches("```json")
3627 .trim_start_matches("```")
3628 .trim_end_matches("```")
3629 .trim()
3630 .to_string();
3631
3632 serde_json::from_str(&cleaned).map_err(|e| format!("Repair failed: {}", e))
3633 }
3634
3635 async fn run_critic_check(
3637 &self,
3638 path: &str,
3639 content: &str,
3640 tx: &mpsc::Sender<InferenceEvent>,
3641 ) -> Option<String> {
3642 let ext = std::path::Path::new(path)
3644 .extension()
3645 .and_then(|e| e.to_str())
3646 .unwrap_or("");
3647 const CRITIC_EXTS: &[&str] = &["rs", "js", "ts", "py", "go", "c", "cpp"];
3648 if !CRITIC_EXTS.contains(&ext) {
3649 return None;
3650 }
3651
3652 let _ = tx
3653 .send(InferenceEvent::Thought(format!(
3654 "CRITIC: Reviewing changes to '{}'...",
3655 path
3656 )))
3657 .await;
3658
3659 let truncated = cap_output(content, 4000);
3660
3661 let prompt = format!(
3662 "You are a Senior Security and Code Quality auditor. Review this file content for '{}' and identify any critical logic errors, security vulnerabilities, or missing error handling. Be extremely concise. If the code looks good, output 'PASS'.\n\n```{}\n{}\n```",
3663 path, ext, truncated
3664 );
3665
3666 let messages = vec![
3667 ChatMessage::system("You are a technical critic. Identify ONLY critical issues. Output 'PASS' if none found."),
3668 ChatMessage::user(&prompt)
3669 ];
3670
3671 let (text, _, _, _) = self
3672 .engine
3673 .call_with_tools(&messages, &[], self.fast_model.as_deref())
3674 .await
3675 .ok()?;
3676
3677 let critique = text?.trim().to_string();
3678 if critique.to_uppercase().contains("PASS") || critique.is_empty() {
3679 None
3680 } else {
3681 Some(critique)
3682 }
3683 }
3684}
3685
3686pub async fn dispatch_tool(name: &str, args: &Value) -> Result<String, String> {
3689 dispatch_builtin_tool(name, args).await
3690}
3691
3692fn normalize_fix_plan_issue_text(text: &str) -> Option<String> {
3693 let trimmed = text.trim();
3694 let stripped = trimmed
3695 .strip_prefix("/think")
3696 .or_else(|| trimmed.strip_prefix("/no_think"))
3697 .map(str::trim)
3698 .unwrap_or(trimmed)
3699 .trim_start_matches('\n')
3700 .trim();
3701 (!stripped.is_empty()).then(|| stripped.to_string())
3702}
3703
3704fn fill_missing_fix_plan_issue(tool_name: &str, args: &mut Value, fallback_issue: Option<&str>) {
3705 if tool_name != "inspect_host" {
3706 return;
3707 }
3708
3709 let Some(topic) = args.get("topic").and_then(|v| v.as_str()) else {
3710 return;
3711 };
3712 if topic != "fix_plan" {
3713 return;
3714 }
3715
3716 let issue_missing = args
3717 .get("issue")
3718 .and_then(|v| v.as_str())
3719 .map(str::trim)
3720 .is_none_or(|value| value.is_empty());
3721 if !issue_missing {
3722 return;
3723 }
3724
3725 let Some(fallback_issue) = fallback_issue.and_then(normalize_fix_plan_issue_text) else {
3726 return;
3727 };
3728
3729 let Value::Object(map) = args else {
3730 return;
3731 };
3732 map.insert(
3733 "issue".to_string(),
3734 Value::String(fallback_issue.to_string()),
3735 );
3736}
3737
3738fn should_rewrite_shell_to_fix_plan(
3739 tool_name: &str,
3740 args: &Value,
3741 latest_user_prompt: Option<&str>,
3742) -> bool {
3743 if tool_name != "shell" {
3744 return false;
3745 }
3746 let Some(prompt) = latest_user_prompt else {
3747 return false;
3748 };
3749 if preferred_host_inspection_topic(prompt) != Some("fix_plan") {
3750 return false;
3751 }
3752 let command = args
3753 .get("command")
3754 .and_then(|value| value.as_str())
3755 .unwrap_or("");
3756 shell_looks_like_structured_host_inspection(command)
3757}
3758
3759fn rewrite_host_tool_call(
3760 tool_name: &mut String,
3761 args: &mut Value,
3762 latest_user_prompt: Option<&str>,
3763) {
3764 if should_rewrite_shell_to_fix_plan(tool_name, args, latest_user_prompt) {
3765 *tool_name = "inspect_host".to_string();
3766 *args = serde_json::json!({
3767 "topic": "fix_plan"
3768 });
3769 }
3770 fill_missing_fix_plan_issue(tool_name, args, latest_user_prompt);
3771}
3772
3773fn canonical_tool_call_key(tool_name: &str, args: &Value) -> String {
3774 format!(
3775 "{}:{}",
3776 tool_name,
3777 serde_json::to_string(args).unwrap_or_default()
3778 )
3779}
3780
3781fn normalized_tool_call_for_execution(
3782 tool_name: &str,
3783 raw_arguments: &str,
3784 gemma4_model: bool,
3785 latest_user_prompt: Option<&str>,
3786) -> (String, Value) {
3787 let normalized_arguments = if gemma4_model {
3788 crate::agent::inference::normalize_tool_argument_string(tool_name, raw_arguments)
3789 } else {
3790 raw_arguments.to_string()
3791 };
3792 let mut normalized_name = tool_name.to_string();
3793 let mut args = serde_json::from_str::<Value>(&normalized_arguments)
3794 .unwrap_or(Value::Object(Default::default()));
3795 rewrite_host_tool_call(&mut normalized_name, &mut args, latest_user_prompt);
3796 (normalized_name, args)
3797}
3798
3799#[cfg(test)]
3800fn normalized_tool_call_key_for_dedupe(
3801 tool_name: &str,
3802 raw_arguments: &str,
3803 gemma4_model: bool,
3804 latest_user_prompt: Option<&str>,
3805) -> String {
3806 let (normalized_name, args) = normalized_tool_call_for_execution(
3807 tool_name,
3808 raw_arguments,
3809 gemma4_model,
3810 latest_user_prompt,
3811 );
3812 canonical_tool_call_key(&normalized_name, &args)
3813}
3814
3815impl ConversationManager {
3816 fn check_authorization(
3818 &self,
3819 name: &str,
3820 args: &serde_json::Value,
3821 config: &crate::agent::config::HematiteConfig,
3822 yolo_flag: bool,
3823 ) -> crate::agent::permission_enforcer::AuthorizationDecision {
3824 crate::agent::permission_enforcer::authorize_tool_call(name, args, config, yolo_flag)
3825 }
3826
3827 async fn process_tool_call(
3829 &self,
3830 mut call: ToolCallFn,
3831 config: crate::agent::config::HematiteConfig,
3832 yolo: bool,
3833 tx: mpsc::Sender<InferenceEvent>,
3834 real_id: String,
3835 ) -> ToolExecutionOutcome {
3836 let mut msg_results = Vec::new();
3837 let gemma4_model =
3838 crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
3839 let normalized_arguments = if gemma4_model {
3840 crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments)
3841 } else {
3842 call.arguments.clone()
3843 };
3844
3845 let mut args: Value = match serde_json::from_str(&normalized_arguments) {
3847 Ok(v) => v,
3848 Err(_) => {
3849 match self
3850 .repair_tool_args(&call.name, &normalized_arguments, &tx)
3851 .await
3852 {
3853 Ok(v) => v,
3854 Err(e) => {
3855 let _ = tx
3856 .send(InferenceEvent::Thought(format!(
3857 "JSON Repair failed: {}",
3858 e
3859 )))
3860 .await;
3861 Value::Object(Default::default())
3862 }
3863 }
3864 }
3865 };
3866 let last_user_prompt = self
3867 .history
3868 .iter()
3869 .rev()
3870 .find(|message| message.role == "user")
3871 .map(|message| message.content.as_str());
3872 rewrite_host_tool_call(&mut call.name, &mut args, last_user_prompt);
3873
3874 if call.name == "map_project" && self.workflow_mode == WorkflowMode::Architect {
3875 if let Some(obj) = args.as_object_mut() {
3876 obj.entry("include_symbols".to_string())
3877 .or_insert(Value::Bool(false));
3878 obj.entry("max_depth".to_string())
3879 .or_insert(Value::Number(2_u64.into()));
3880 }
3881 } else if call.name == "map_project" && self.workflow_mode.is_read_only() {
3882 if let Some(obj) = args.as_object_mut() {
3883 obj.entry("include_symbols".to_string())
3884 .or_insert(Value::Bool(false));
3885 obj.entry("max_depth".to_string())
3886 .or_insert(Value::Number(3_u64.into()));
3887 }
3888 }
3889
3890 let display = format_tool_display(&call.name, &args);
3891 let precondition_result = self.validate_action_preconditions(&call.name, &args).await;
3892 let auth = self.check_authorization(&call.name, &args, &config, yolo);
3893
3894 let decision_result = match precondition_result {
3896 Err(e) => Err(e),
3897 Ok(_) => match auth {
3898 crate::agent::permission_enforcer::AuthorizationDecision::Allow { .. } => Ok(()),
3899 crate::agent::permission_enforcer::AuthorizationDecision::Ask {
3900 reason,
3901 source: _,
3902 } => {
3903 let (approve_tx, approve_rx) = tokio::sync::oneshot::channel::<bool>();
3904 let _ = tx
3905 .send(InferenceEvent::ApprovalRequired {
3906 id: real_id.clone(),
3907 name: call.name.clone(),
3908 display: format!("{}\nWhy: {}", display, reason),
3909 diff: None,
3910 responder: approve_tx,
3911 })
3912 .await;
3913
3914 match approve_rx.await {
3915 Ok(true) => Ok(()),
3916 _ => Err("Declined by user".into()),
3917 }
3918 }
3919 crate::agent::permission_enforcer::AuthorizationDecision::Deny {
3920 reason, ..
3921 } => Err(reason),
3922 },
3923 };
3924 let blocked_by_policy =
3925 matches!(&decision_result, Err(e) if e.starts_with("Action blocked:"));
3926
3927 let (output, is_error) = match decision_result {
3929 Err(e) => (format!("Error: {}", e), true),
3930 Ok(_) => {
3931 let _ = tx
3932 .send(InferenceEvent::ToolCallStart {
3933 id: real_id.clone(),
3934 name: call.name.clone(),
3935 args: display.clone(),
3936 })
3937 .await;
3938
3939 let result = if call.name.starts_with("lsp_") {
3940 let lsp = self.lsp_manager.clone();
3941 let path = args
3942 .get("path")
3943 .and_then(|v| v.as_str())
3944 .unwrap_or("")
3945 .to_string();
3946 let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
3947 let character =
3948 args.get("character").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
3949
3950 match call.name.as_str() {
3951 "lsp_definitions" => {
3952 crate::tools::lsp_tools::lsp_definitions(lsp, path, line, character)
3953 .await
3954 }
3955 "lsp_references" => {
3956 crate::tools::lsp_tools::lsp_references(lsp, path, line, character)
3957 .await
3958 }
3959 "lsp_hover" => {
3960 crate::tools::lsp_tools::lsp_hover(lsp, path, line, character).await
3961 }
3962 "lsp_search_symbol" => {
3963 let query = args
3964 .get("query")
3965 .and_then(|v| v.as_str())
3966 .unwrap_or_default()
3967 .to_string();
3968 crate::tools::lsp_tools::lsp_search_symbol(lsp, query).await
3969 }
3970 "lsp_rename_symbol" => {
3971 let new_name = args
3972 .get("new_name")
3973 .and_then(|v| v.as_str())
3974 .unwrap_or_default()
3975 .to_string();
3976 crate::tools::lsp_tools::lsp_rename_symbol(
3977 lsp, path, line, character, new_name,
3978 )
3979 .await
3980 }
3981 "lsp_get_diagnostics" => {
3982 crate::tools::lsp_tools::lsp_get_diagnostics(lsp, path).await
3983 }
3984 _ => Err(format!("Unknown LSP tool: {}", call.name)),
3985 }
3986 } else if call.name == "auto_pin_context" {
3987 let pts = args.get("paths").and_then(|v| v.as_array());
3988 let reason = args
3989 .get("reason")
3990 .and_then(|v| v.as_str())
3991 .unwrap_or("uninformed scoping");
3992 if let Some(arr) = pts {
3993 let mut pinned = Vec::new();
3994 {
3995 let mut guard = self.pinned_files.lock().await;
3996 const MAX_PINNED_SIZE: u64 = 25 * 1024 * 1024; for v in arr.iter().take(3) {
3999 if let Some(p) = v.as_str() {
4000 if let Ok(meta) = std::fs::metadata(p) {
4001 if meta.len() > MAX_PINNED_SIZE {
4002 let _ = tx.send(InferenceEvent::Thought(format!("[GUARD] Skipping {} - size ({} bytes) exceeds VRAM safety limit (25MB).", p, meta.len()))).await;
4003 continue;
4004 }
4005 if let Ok(content) = std::fs::read_to_string(p) {
4006 guard.insert(p.to_string(), content);
4007 pinned.push(p.to_string());
4008 }
4009 }
4010 }
4011 }
4012 }
4013 let msg = format!(
4014 "Autonomous Scoping: Locked {} in high-fidelity memory. Reason: {}",
4015 pinned.join(", "),
4016 reason
4017 );
4018 let _ = tx
4019 .send(InferenceEvent::Thought(format!("[AUTO-PIN] {}", msg)))
4020 .await;
4021 Ok(msg)
4022 } else {
4023 Err("Missing 'paths' array for auto_pin_context.".to_string())
4024 }
4025 } else if call.name == "list_pinned" {
4026 let paths_msg = {
4027 let pinned = self.pinned_files.lock().await;
4028 if pinned.is_empty() {
4029 "No files are currently pinned.".to_string()
4030 } else {
4031 let paths: Vec<_> = pinned.keys().cloned().collect();
4032 format!(
4033 "Currently pinned files in active memory:\n- {}",
4034 paths.join("\n- ")
4035 )
4036 }
4037 };
4038 Ok(paths_msg)
4039 } else if call.name.starts_with("mcp__") {
4040 let mut mcp = self.mcp_manager.lock().await;
4041 match mcp.call_tool(&call.name, &args).await {
4042 Ok(res) => Ok(res),
4043 Err(e) => Err(e.to_string()),
4044 }
4045 } else if call.name == "swarm" {
4046 let tasks_val = args.get("tasks").cloned().unwrap_or(Value::Array(vec![]));
4048 let max_workers = args
4049 .get("max_workers")
4050 .and_then(|v| v.as_u64())
4051 .unwrap_or(3) as usize;
4052
4053 let mut task_objs = Vec::new();
4054 if let Value::Array(arr) = tasks_val {
4055 for v in arr {
4056 let id = v
4057 .get("id")
4058 .and_then(|x| x.as_str())
4059 .unwrap_or("?")
4060 .to_string();
4061 let target = v
4062 .get("target")
4063 .and_then(|x| x.as_str())
4064 .unwrap_or("?")
4065 .to_string();
4066 let instruction = v
4067 .get("instruction")
4068 .and_then(|x| x.as_str())
4069 .unwrap_or("?")
4070 .to_string();
4071 task_objs.push(crate::agent::parser::WorkerTask {
4072 id,
4073 target,
4074 instruction,
4075 });
4076 }
4077 }
4078
4079 if task_objs.is_empty() {
4080 Err("No tasks provided for swarm.".to_string())
4081 } else {
4082 let (swarm_tx_internal, mut swarm_rx_internal) =
4083 tokio::sync::mpsc::channel(32);
4084 let tx_forwarder = tx.clone();
4085
4086 tokio::spawn(async move {
4088 while let Some(msg) = swarm_rx_internal.recv().await {
4089 match msg {
4090 crate::agent::swarm::SwarmMessage::Progress(id, p) => {
4091 let _ = tx_forwarder
4092 .send(InferenceEvent::Thought(format!(
4093 "Swarm [{}]: {}% complete",
4094 id, p
4095 )))
4096 .await;
4097 }
4098 crate::agent::swarm::SwarmMessage::ReviewRequest {
4099 worker_id,
4100 file_path,
4101 before: _,
4102 after: _,
4103 tx,
4104 } => {
4105 let (approve_tx, approve_rx) =
4106 tokio::sync::oneshot::channel::<bool>();
4107 let display = format!(
4108 "Swarm worker [{}]: Integrated changes into {:?}",
4109 worker_id, file_path
4110 );
4111 let _ = tx_forwarder
4112 .send(InferenceEvent::ApprovalRequired {
4113 id: format!("swarm_{}", worker_id),
4114 name: "swarm_apply".to_string(),
4115 display,
4116 diff: None,
4117 responder: approve_tx,
4118 })
4119 .await;
4120 if let Ok(approved) = approve_rx.await {
4121 let response = if approved {
4122 crate::agent::swarm::ReviewResponse::Accept
4123 } else {
4124 crate::agent::swarm::ReviewResponse::Reject
4125 };
4126 let _ = tx.send(response);
4127 }
4128 }
4129 crate::agent::swarm::SwarmMessage::Done => {}
4130 }
4131 }
4132 });
4133
4134 let coordinator = self.swarm_coordinator.clone();
4135 match coordinator
4136 .dispatch_swarm(task_objs, swarm_tx_internal, max_workers)
4137 .await
4138 {
4139 Ok(_) => Ok(
4140 "Swarm execution completed. Check files for integration results."
4141 .to_string(),
4142 ),
4143 Err(e) => Err(format!("Swarm failure: {}", e)),
4144 }
4145 }
4146 } else if call.name == "vision_analyze" {
4147 crate::tools::vision::vision_analyze(&self.engine, &args).await
4148 } else if matches!(
4149 call.name.as_str(),
4150 "edit_file" | "patch_hunk" | "multi_search_replace"
4151 ) && !yolo
4152 {
4153 let diff_result = match call.name.as_str() {
4157 "edit_file" => crate::tools::file_ops::compute_edit_file_diff(&args),
4158 "patch_hunk" => crate::tools::file_ops::compute_patch_hunk_diff(&args),
4159 _ => crate::tools::file_ops::compute_msr_diff(&args),
4160 };
4161 match diff_result {
4162 Ok(diff_text) => {
4163 let path_label =
4164 args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
4165 let (appr_tx, appr_rx) = tokio::sync::oneshot::channel::<bool>();
4166 let _ = tx
4167 .send(InferenceEvent::ApprovalRequired {
4168 id: real_id.clone(),
4169 name: call.name.clone(),
4170 display: format!("Edit preview: {}", path_label),
4171 diff: Some(diff_text),
4172 responder: appr_tx,
4173 })
4174 .await;
4175 match appr_rx.await {
4176 Ok(true) => dispatch_tool(&call.name, &args).await,
4177 _ => Err("Edit declined by user.".into()),
4178 }
4179 }
4180 Err(_) => dispatch_tool(&call.name, &args).await,
4183 }
4184 } else {
4185 dispatch_tool(&call.name, &args).await
4186 };
4187
4188 match result {
4189 Ok(o) => (o, false),
4190 Err(e) => (format!("Error: {}", e), true),
4191 }
4192 }
4193 };
4194
4195 {
4197 if let Ok(mut econ) = self.engine.economics.lock() {
4198 econ.record_tool(&call.name, !is_error);
4199 }
4200 }
4201
4202 if !is_error {
4203 if matches!(call.name.as_str(), "read_file" | "inspect_lines") {
4204 if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
4205 if call.name == "inspect_lines" {
4206 self.record_line_inspection(path).await;
4207 } else {
4208 self.record_read_observation(path).await;
4209 }
4210 }
4211 }
4212
4213 if call.name == "verify_build" {
4214 let ok = output.contains("BUILD OK")
4215 || output.contains("BUILD SUCCESS")
4216 || output.contains("BUILD OKAY");
4217 self.record_verify_build_result(ok, &output).await;
4218 }
4219
4220 if matches!(
4221 call.name.as_str(),
4222 "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4223 ) || is_mcp_mutating_tool(&call.name)
4224 {
4225 self.record_successful_mutation(action_target_path(&call.name, &args).as_deref())
4226 .await;
4227 }
4228
4229 if let Some(receipt) = self.build_action_receipt(&call.name, &args, &output, is_error) {
4230 msg_results.push(receipt);
4231 }
4232 }
4233
4234 if !is_error && (call.name == "edit_file" || call.name == "write_file") {
4238 let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
4239 let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
4240 let ext = std::path::Path::new(path)
4241 .extension()
4242 .and_then(|e| e.to_str())
4243 .unwrap_or("");
4244 const SKIP_EXTS: &[&str] = &[
4245 "md",
4246 "toml",
4247 "json",
4248 "txt",
4249 "yml",
4250 "yaml",
4251 "cfg",
4252 "csv",
4253 "lock",
4254 "gitignore",
4255 ];
4256 let line_count = content.lines().count();
4257 if !path.is_empty()
4258 && !content.is_empty()
4259 && !SKIP_EXTS.contains(&ext)
4260 && line_count >= 50
4261 {
4262 if let Some(critique) = self.run_critic_check(path, content, &tx).await {
4263 msg_results.push(ChatMessage::system(&format!(
4264 "[CRITIC REVIEW OF {}]\nIssues found:\n\n{}",
4265 path, critique
4266 )));
4267 }
4268 }
4269 }
4270
4271 ToolExecutionOutcome {
4272 call_id: real_id,
4273 tool_name: call.name,
4274 args,
4275 output,
4276 is_error,
4277 blocked_by_policy,
4278 msg_results,
4279 }
4280 }
4281}
4282
4283struct ToolExecutionOutcome {
4286 call_id: String,
4287 tool_name: String,
4288 args: Value,
4289 output: String,
4290 is_error: bool,
4291 blocked_by_policy: bool,
4292 msg_results: Vec<ChatMessage>,
4293}
4294
4295#[derive(Clone)]
4296struct CachedToolResult {
4297 tool_name: String,
4298}
4299
4300fn is_code_like_path(path: &str) -> bool {
4301 let ext = std::path::Path::new(path)
4302 .extension()
4303 .and_then(|e| e.to_str())
4304 .unwrap_or("")
4305 .to_ascii_lowercase();
4306 matches!(
4307 ext.as_str(),
4308 "rs" | "js"
4309 | "ts"
4310 | "tsx"
4311 | "jsx"
4312 | "py"
4313 | "go"
4314 | "java"
4315 | "c"
4316 | "cpp"
4317 | "cc"
4318 | "h"
4319 | "hpp"
4320 | "cs"
4321 | "swift"
4322 | "kt"
4323 | "kts"
4324 | "rb"
4325 | "php"
4326 )
4327}
4328
4329pub fn format_tool_display(name: &str, args: &Value) -> String {
4332 let get = |key: &str| {
4333 args.get(key)
4334 .and_then(|v| v.as_str())
4335 .unwrap_or("")
4336 .to_string()
4337 };
4338 match name {
4339 "shell" => format!("$ {}", get("command")),
4340 "map_project" => "map project architecture".to_string(),
4341 "trace_runtime_flow" => format!("trace runtime {}", get("topic")),
4342 "describe_toolchain" => format!("describe toolchain {}", get("topic")),
4343 "inspect_host" => format!("inspect host {}", get("topic")),
4344 _ => format!("{} {:?}", name, args),
4345 }
4346}
4347
4348fn shell_looks_like_structured_host_inspection(command: &str) -> bool {
4351 let lower = command.to_ascii_lowercase();
4352 [
4353 "$env:path",
4354 "pathvariable",
4355 "pip --version",
4356 "pipx --version",
4357 "winget --version",
4358 "choco",
4359 "scoop",
4360 "get-childitem",
4361 "gci ",
4362 "where.exe",
4363 "where ",
4364 "cargo --version",
4365 "rustc --version",
4366 "git --version",
4367 "node --version",
4368 "npm --version",
4369 "pnpm --version",
4370 "python --version",
4371 "python3 --version",
4372 "deno --version",
4373 "go version",
4374 "dotnet --version",
4375 "uv --version",
4376 "netstat",
4377 "findstr",
4378 "get-nettcpconnection",
4379 "tcpconnection",
4380 "listening",
4381 "ss -",
4382 "ss ",
4383 "lsof",
4384 "tasklist",
4385 "ipconfig",
4386 "get-netipconfiguration",
4387 "get-netadapter",
4388 "route print",
4389 "ifconfig",
4390 "ip addr",
4391 "ip route",
4392 "resolv.conf",
4393 "get-service",
4394 "sc query",
4395 "systemctl",
4396 "service --status-all",
4397 "get-process",
4398 "working set",
4399 "ps -eo",
4400 "ps aux",
4401 "desktop",
4402 "downloads",
4403 ]
4404 .iter()
4405 .any(|needle| lower.contains(needle))
4406}
4407
4408fn cap_output(text: &str, max_bytes: usize) -> String {
4411 if text.len() <= max_bytes {
4412 text.to_string()
4413 } else {
4414 let mut split_at = max_bytes;
4416 while !text.is_char_boundary(split_at) && split_at > 0 {
4417 split_at -= 1;
4418 }
4419 format!(
4420 "{}\n... [output capped at {}B]",
4421 &text[..split_at],
4422 max_bytes
4423 )
4424 }
4425}
4426
4427#[derive(Default)]
4428struct PromptBudgetStats {
4429 summarized_tool_results: usize,
4430 collapsed_tool_results: usize,
4431 trimmed_chat_messages: usize,
4432 dropped_messages: usize,
4433}
4434
4435fn estimate_prompt_tokens(messages: &[ChatMessage]) -> usize {
4436 crate::agent::inference::estimate_message_batch_tokens(messages)
4437}
4438
4439fn summarize_prompt_blob(text: &str, max_chars: usize) -> String {
4440 let budget = compaction::SummaryCompressionBudget {
4441 max_chars,
4442 max_lines: 3,
4443 max_line_chars: max_chars.clamp(80, 240),
4444 };
4445 let compressed = compaction::compress_summary(text, budget).summary;
4446 if compressed.is_empty() {
4447 String::new()
4448 } else {
4449 compressed
4450 }
4451}
4452
4453fn summarize_tool_message_for_budget(message: &ChatMessage) -> String {
4454 let tool_name = message.name.as_deref().unwrap_or("tool");
4455 let body = summarize_prompt_blob(message.content.as_str(), 320);
4456 format!(
4457 "[Prompt-budget summary of prior `{}` result]\n{}",
4458 tool_name, body
4459 )
4460}
4461
4462fn summarize_chat_message_for_budget(message: &ChatMessage) -> String {
4463 let role = message.role.as_str();
4464 let body = summarize_prompt_blob(message.content.as_str(), 240);
4465 format!(
4466 "[Prompt-budget summary of earlier {} message]\n{}",
4467 role, body
4468 )
4469}
4470
4471fn normalize_prompt_start(messages: &mut Vec<ChatMessage>) {
4472 if messages.len() > 1 && messages[1].role != "user" {
4473 messages.insert(1, ChatMessage::user("Continuing previous context..."));
4474 }
4475}
4476
4477fn enforce_prompt_budget(
4478 prompt_msgs: &mut Vec<ChatMessage>,
4479 context_length: usize,
4480) -> Option<String> {
4481 let target_tokens = ((context_length as f64) * 0.68) as usize;
4482 if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4483 return None;
4484 }
4485
4486 let mut stats = PromptBudgetStats::default();
4487
4488 let mut tool_indices: Vec<usize> = prompt_msgs
4490 .iter()
4491 .enumerate()
4492 .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
4493 .collect();
4494 for idx in tool_indices.iter().rev().copied() {
4495 if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4496 break;
4497 }
4498 let original = prompt_msgs[idx].content.as_str().to_string();
4499 if original.len() > 1200 {
4500 prompt_msgs[idx].content =
4501 MessageContent::Text(summarize_tool_message_for_budget(&prompt_msgs[idx]));
4502 stats.summarized_tool_results += 1;
4503 }
4504 }
4505
4506 tool_indices = prompt_msgs
4508 .iter()
4509 .enumerate()
4510 .filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
4511 .collect();
4512 if tool_indices.len() > 2 {
4513 for idx in tool_indices
4514 .iter()
4515 .take(tool_indices.len().saturating_sub(2))
4516 .copied()
4517 {
4518 if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4519 break;
4520 }
4521 prompt_msgs[idx].content = MessageContent::Text(
4522 "[Earlier tool output omitted to stay within the prompt budget.]".to_string(),
4523 );
4524 stats.collapsed_tool_results += 1;
4525 }
4526 }
4527
4528 let last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
4530 for idx in 1..prompt_msgs.len() {
4531 if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
4532 break;
4533 }
4534 if Some(idx) == last_user_idx {
4535 continue;
4536 }
4537 let role = prompt_msgs[idx].role.as_str();
4538 if matches!(role, "user" | "assistant") && prompt_msgs[idx].content.as_str().len() > 900 {
4539 prompt_msgs[idx].content =
4540 MessageContent::Text(summarize_chat_message_for_budget(&prompt_msgs[idx]));
4541 stats.trimmed_chat_messages += 1;
4542 }
4543 }
4544
4545 let preserve_last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
4547 let mut idx = 1usize;
4548 while estimate_prompt_tokens(prompt_msgs) > target_tokens && prompt_msgs.len() > 2 {
4549 if Some(idx) == preserve_last_user_idx {
4550 idx += 1;
4551 if idx >= prompt_msgs.len() {
4552 break;
4553 }
4554 continue;
4555 }
4556 if idx >= prompt_msgs.len() {
4557 break;
4558 }
4559 prompt_msgs.remove(idx);
4560 stats.dropped_messages += 1;
4561 }
4562
4563 normalize_prompt_start(prompt_msgs);
4564
4565 let new_tokens = estimate_prompt_tokens(prompt_msgs);
4566 if stats.summarized_tool_results == 0
4567 && stats.collapsed_tool_results == 0
4568 && stats.trimmed_chat_messages == 0
4569 && stats.dropped_messages == 0
4570 {
4571 return None;
4572 }
4573
4574 Some(format!(
4575 "Prompt Budget Guard: trimmed prompt to about {} tokens (target {}). Summarized {} large tool result(s), collapsed {} older tool result(s), trimmed {} chat message(s), and dropped {} old message(s).",
4576 new_tokens,
4577 target_tokens,
4578 stats.summarized_tool_results,
4579 stats.collapsed_tool_results,
4580 stats.trimmed_chat_messages,
4581 stats.dropped_messages
4582 ))
4583}
4584
4585fn is_quick_tool_request(input: &str) -> bool {
4590 let lower = input.to_lowercase();
4591 if lower.contains("run_code") || lower.contains("run code") {
4593 return true;
4594 }
4595 let is_short = input.len() < 120;
4597 let compute_keywords = [
4598 "calculate",
4599 "compute",
4600 "execute",
4601 "run this",
4602 "test this",
4603 "what is ",
4604 "how much",
4605 "how many",
4606 "convert ",
4607 "print ",
4608 ];
4609 if is_short && compute_keywords.iter().any(|k| lower.contains(k)) {
4610 return true;
4611 }
4612 false
4613}
4614
4615fn chunk_text(text: &str, words_per_chunk: usize) -> Vec<String> {
4616 let mut chunks = Vec::new();
4617 let mut current = String::new();
4618 let mut count = 0;
4619
4620 for ch in text.chars() {
4621 current.push(ch);
4622 if ch == ' ' || ch == '\n' {
4623 count += 1;
4624 if count >= words_per_chunk {
4625 chunks.push(current.clone());
4626 current.clear();
4627 count = 0;
4628 }
4629 }
4630 }
4631 if !current.is_empty() {
4632 chunks.push(current);
4633 }
4634 chunks
4635}
4636
4637fn repeated_read_target(call: &crate::agent::inference::ToolCallFn) -> Option<String> {
4638 if call.name != "read_file" {
4639 return None;
4640 }
4641 let normalized_arguments =
4642 crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments);
4643 let args: Value = serde_json::from_str(&normalized_arguments).ok()?;
4644 let path = args.get("path").and_then(|v| v.as_str())?;
4645 Some(normalize_workspace_path(path))
4646}
4647
4648fn order_batch_reads_first(
4649 calls: Vec<crate::agent::inference::ToolCallResponse>,
4650) -> (
4651 Vec<crate::agent::inference::ToolCallResponse>,
4652 Option<String>,
4653) {
4654 let has_reads = calls.iter().any(|c| {
4655 matches!(
4656 c.function.name.as_str(),
4657 "read_file" | "inspect_lines" | "grep_files" | "list_files"
4658 )
4659 });
4660 let has_edits = calls.iter().any(|c| {
4661 matches!(
4662 c.function.name.as_str(),
4663 "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4664 )
4665 });
4666 if has_reads && has_edits {
4667 let reads: Vec<_> = calls
4668 .into_iter()
4669 .filter(|c| {
4670 !matches!(
4671 c.function.name.as_str(),
4672 "write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
4673 )
4674 })
4675 .collect();
4676 let note = Some("Batch ordering: deferring edits until reads complete.".to_string());
4677 (reads, note)
4678 } else {
4679 (calls, None)
4680 }
4681}
4682
4683fn grep_output_is_high_fanout(output: &str) -> bool {
4684 let Some(summary) = output.lines().next() else {
4685 return false;
4686 };
4687 let hunk_count = summary
4688 .split(", ")
4689 .find_map(|part| {
4690 part.strip_suffix(" hunk(s)")
4691 .and_then(|value| value.parse::<usize>().ok())
4692 })
4693 .unwrap_or(0);
4694 let match_count = summary
4695 .split(' ')
4696 .next()
4697 .and_then(|value| value.parse::<usize>().ok())
4698 .unwrap_or(0);
4699 hunk_count >= 8 || match_count >= 12
4700}
4701
4702fn build_system_with_corrections(
4703 base: &str,
4704 hints: &[String],
4705 gpu: &Arc<GpuState>,
4706 git: &Arc<crate::agent::git_monitor::GitState>,
4707 config: &crate::agent::config::HematiteConfig,
4708) -> String {
4709 let mut system_msg = base.to_string();
4710
4711 system_msg.push_str("\n\n# Permission Mode\n");
4713 let mode_label = match config.mode {
4714 crate::agent::config::PermissionMode::ReadOnly => "READ-ONLY",
4715 crate::agent::config::PermissionMode::Developer => "DEVELOPER",
4716 crate::agent::config::PermissionMode::SystemAdmin => "SYSTEM-ADMIN (UNRESTRICTED)",
4717 };
4718 system_msg.push_str(&format!("CURRENT MODE: {}\n", mode_label));
4719
4720 if config.mode == crate::agent::config::PermissionMode::ReadOnly {
4721 system_msg.push_str("PERMISSION: You are restricted to READ-ONLY access. Do NOT attempt to use write_file, edit_file, or shell for any modification. Focus entirely on analysis, indexing, and reporting.\n");
4722 } else {
4723 system_msg.push_str("PERMISSION: You have authority to modify code and execute tests with user oversight.\n");
4724 }
4725
4726 let (used, total) = gpu.read();
4728 if total > 0 {
4729 system_msg.push_str("\n\n# Terminal Hardware Context\n");
4730 system_msg.push_str(&format!(
4731 "HOST GPU: {} | VRAM: {:.1}GB / {:.1}GB ({:.0}% used)\n",
4732 gpu.gpu_name(),
4733 used as f64 / 1024.0,
4734 total as f64 / 1024.0,
4735 gpu.ratio() * 100.0
4736 ));
4737 system_msg.push_str("Use this awareness to manage your context window responsibly.\n");
4738 }
4739
4740 system_msg.push_str("\n\n# Git Repository Context\n");
4742 let git_status_label = git.label();
4743 let git_url = git.url();
4744 system_msg.push_str(&format!(
4745 "REMOTE STATUS: {} | URL: {}\n",
4746 git_status_label, git_url
4747 ));
4748
4749 let root = crate::tools::file_ops::workspace_root();
4751 if let Some(status_snapshot) = crate::agent::git_context::read_git_status(&root) {
4752 system_msg.push_str("\nGit status snapshot:\n");
4753 system_msg.push_str(&status_snapshot);
4754 system_msg.push_str("\n");
4755 }
4756
4757 if let Some(diff_snapshot) = crate::agent::git_context::read_git_diff(&root, 2000) {
4758 system_msg.push_str("\nGit diff snapshot:\n");
4759 system_msg.push_str(&diff_snapshot);
4760 system_msg.push_str("\n");
4761 }
4762
4763 if git_status_label == "NONE" {
4764 system_msg.push_str("\nONBOARDING: You noticed no remote is configured. Offer to help the user set up a remote (e.g. GitHub) if they haven't already.\n");
4765 } else if git_status_label == "BEHIND" {
4766 system_msg.push_str("\nSYNC: Local is behind remote. Suggest a pull if appropriate.\n");
4767 }
4768
4769 if hints.is_empty() {
4774 return system_msg;
4775 }
4776 system_msg.push_str("\n\n# Formatting Corrections\n");
4777 system_msg.push_str("You previously failed formatting checks on these files. Ensure your whitespace/indentation perfectly matches the original file exactly on your next attempt:\n");
4778 for hint in hints {
4779 system_msg.push_str(&format!("- {}\n", hint));
4780 }
4781 system_msg
4782}
4783
4784fn route_model<'a>(
4785 user_input: &str,
4786 fast_model: Option<&'a str>,
4787 think_model: Option<&'a str>,
4788) -> Option<&'a str> {
4789 let text = user_input.to_lowercase();
4790 let is_think = text.contains("refactor")
4791 || text.contains("rewrite")
4792 || text.contains("implement")
4793 || text.contains("create")
4794 || text.contains("fix")
4795 || text.contains("debug");
4796 let is_fast = text.contains("what")
4797 || text.contains("show")
4798 || text.contains("find")
4799 || text.contains("list")
4800 || text.contains("status");
4801
4802 if is_think && think_model.is_some() {
4803 return think_model;
4804 } else if is_fast && fast_model.is_some() {
4805 return fast_model;
4806 }
4807 None
4808}
4809
4810fn is_parallel_safe(name: &str) -> bool {
4811 let metadata = crate::agent::inference::tool_metadata_for_name(name);
4812 !metadata.mutates_workspace && !metadata.external_surface
4813}
4814
4815fn should_use_vein_in_chat(query: &str, docs_only_mode: bool) -> bool {
4816 if docs_only_mode {
4817 return true;
4818 }
4819
4820 let lower = query.to_ascii_lowercase();
4821 [
4822 "what did we decide",
4823 "why did we decide",
4824 "what did we say",
4825 "what did we do",
4826 "earlier today",
4827 "yesterday",
4828 "last week",
4829 "last month",
4830 "earlier",
4831 "remember",
4832 "session",
4833 "import",
4834 ]
4835 .iter()
4836 .any(|needle| lower.contains(needle))
4837 || lower
4838 .split(|ch: char| !(ch.is_ascii_digit() || ch == '-'))
4839 .any(|token| token.len() == 10 && token.chars().nth(4) == Some('-'))
4840}
4841
4842#[cfg(test)]
4843mod tests {
4844 use super::*;
4845
4846 #[test]
4847 fn classifies_lm_studio_context_budget_mismatch_as_context_window() {
4848 let detail = r#"LM Studio error 400 Bad Request: {"error":"The number of tokens to keep from the initial prompt is greater than the context length (n_keep: 28768>= n_ctx: 4096). Try to load the model with a larger context length, or provide a shorter input."}"#;
4849 let class = classify_runtime_failure(detail);
4850 assert_eq!(class, RuntimeFailureClass::ContextWindow);
4851 assert_eq!(class.tag(), "context_window");
4852 assert!(format_runtime_failure(class, detail).contains("[failure:context_window]"));
4853 }
4854
4855 #[test]
4856 fn runtime_failure_maps_to_provider_and_checkpoint_state() {
4857 assert_eq!(
4858 provider_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
4859 Some(ProviderRuntimeState::ContextWindow)
4860 );
4861 assert_eq!(
4862 checkpoint_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
4863 Some(OperatorCheckpointState::BlockedContextWindow)
4864 );
4865 assert_eq!(
4866 provider_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
4867 Some(ProviderRuntimeState::Degraded)
4868 );
4869 assert_eq!(
4870 checkpoint_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
4871 None
4872 );
4873 }
4874
4875 #[test]
4876 fn intent_router_treats_tool_registry_ownership_as_product_truth() {
4877 let intent = classify_query_intent(
4878 WorkflowMode::ReadOnly,
4879 "Read-only mode. Explain which file now owns Hematite's built-in tool catalog and builtin-tool dispatch path.",
4880 );
4881 assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4882 assert_eq!(
4883 intent.direct_answer,
4884 Some(DirectAnswerKind::ToolRegistryOwnership)
4885 );
4886 }
4887
4888 #[test]
4889 fn intent_router_treats_tool_classes_as_product_truth() {
4890 let intent = classify_query_intent(
4891 WorkflowMode::ReadOnly,
4892 "Read-only mode. Explain why Hematite treats repo reads, repo writes, verification tools, git tools, and external MCP tools as different runtime tool classes instead of one flat tool list.",
4893 );
4894 assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4895 assert_eq!(intent.direct_answer, Some(DirectAnswerKind::ToolClasses));
4896 }
4897
4898 #[test]
4899 fn tool_registry_ownership_answer_mentions_new_owner_file() {
4900 let answer = build_tool_registry_ownership_answer();
4901 assert!(answer.contains("src/agent/tool_registry.rs"));
4902 assert!(answer.contains("builtin dispatch path"));
4903 assert!(answer.contains("src/agent/conversation.rs"));
4904 }
4905
4906 #[test]
4907 fn intent_router_treats_mcp_lifecycle_as_product_truth() {
4908 let intent = classify_query_intent(
4909 WorkflowMode::ReadOnly,
4910 "Read-only mode. Explain how Hematite should treat MCP server health as runtime state.",
4911 );
4912 assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4913 assert_eq!(intent.direct_answer, Some(DirectAnswerKind::McpLifecycle));
4914 }
4915
4916 #[test]
4917 fn intent_router_short_circuits_unsafe_commit_pressure() {
4918 let intent = classify_query_intent(
4919 WorkflowMode::Auto,
4920 "Make a code change, skip verification, and commit it immediately.",
4921 );
4922 assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
4923 assert_eq!(
4924 intent.direct_answer,
4925 Some(DirectAnswerKind::UnsafeWorkflowPressure)
4926 );
4927 }
4928
4929 #[test]
4930 fn unsafe_workflow_pressure_answer_requires_verification() {
4931 let answer = build_unsafe_workflow_pressure_answer();
4932 assert!(answer.contains("should not skip verification"));
4933 assert!(answer.contains("run the appropriate verification path"));
4934 assert!(answer.contains("only then commit"));
4935 }
4936
4937 #[test]
4938 fn intent_router_prefers_architecture_walkthrough_over_narrow_mcp_answer() {
4939 let intent = classify_query_intent(
4940 WorkflowMode::ReadOnly,
4941 "I want to understand how Hematite is wired without any guessing. Walk me through how a normal message moves from the TUI to the model and back, which files own the major runtime pieces, and where session recovery, tool policy, and MCP state live. Keep it grounded to this repo and only inspect code where you actually need evidence.",
4942 );
4943 assert_eq!(intent.primary_class, QueryIntentClass::RepoArchitecture);
4944 assert!(intent.architecture_overview_mode);
4945 assert_eq!(intent.direct_answer, None);
4946 }
4947
4948 #[test]
4949 fn intent_router_marks_host_inspection_questions() {
4950 let intent = classify_query_intent(
4951 WorkflowMode::Auto,
4952 "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development.",
4953 );
4954 assert!(intent.host_inspection_mode);
4955 assert_eq!(
4956 preferred_host_inspection_topic(
4957 "Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development."
4958 ),
4959 Some("summary")
4960 );
4961 }
4962
4963 #[test]
4964 fn chat_mode_uses_vein_for_historical_or_docs_only_queries() {
4965 assert!(should_use_vein_in_chat(
4966 "What did we decide on 2026-04-09 about docs-only mode?",
4967 false
4968 ));
4969 assert!(should_use_vein_in_chat("Summarize these local notes", true));
4970 assert!(!should_use_vein_in_chat("Tell me a joke", false));
4971 }
4972
4973 #[test]
4974 fn shell_host_inspection_guard_matches_path_and_version_commands() {
4975 assert!(shell_looks_like_structured_host_inspection(
4976 "$env:PATH -split ';'"
4977 ));
4978 assert!(shell_looks_like_structured_host_inspection(
4979 "cargo --version"
4980 ));
4981 assert!(shell_looks_like_structured_host_inspection(
4982 "Get-NetTCPConnection -LocalPort 3000"
4983 ));
4984 assert!(shell_looks_like_structured_host_inspection(
4985 "netstat -ano | findstr :3000"
4986 ));
4987 assert!(shell_looks_like_structured_host_inspection(
4988 "Get-Process | Sort-Object WS -Descending"
4989 ));
4990 assert!(shell_looks_like_structured_host_inspection("ipconfig /all"));
4991 assert!(shell_looks_like_structured_host_inspection("Get-Service"));
4992 assert!(shell_looks_like_structured_host_inspection(
4993 "winget --version"
4994 ));
4995 }
4996
4997 #[test]
4998 fn intent_router_picks_ports_for_listening_port_questions() {
4999 assert_eq!(
5000 preferred_host_inspection_topic(
5001 "Show me what is listening on port 3000 and whether anything unexpected is exposed."
5002 ),
5003 Some("ports")
5004 );
5005 }
5006
5007 #[test]
5008 fn intent_router_picks_processes_for_host_process_questions() {
5009 assert_eq!(
5010 preferred_host_inspection_topic(
5011 "Show me what processes are using the most RAM right now."
5012 ),
5013 Some("processes")
5014 );
5015 }
5016
5017 #[test]
5018 fn intent_router_picks_network_for_adapter_questions() {
5019 assert_eq!(
5020 preferred_host_inspection_topic(
5021 "Show me my active network adapters, IP addresses, gateways, and DNS servers."
5022 ),
5023 Some("network")
5024 );
5025 }
5026
5027 #[test]
5028 fn intent_router_picks_services_for_service_questions() {
5029 assert_eq!(
5030 preferred_host_inspection_topic(
5031 "Show me the running services and startup types that matter for a normal dev machine."
5032 ),
5033 Some("services")
5034 );
5035 }
5036
5037 #[test]
5038 fn intent_router_picks_env_doctor_for_package_manager_questions() {
5039 assert_eq!(
5040 preferred_host_inspection_topic(
5041 "Run an environment doctor on this machine and tell me whether my PATH and package managers look sane."
5042 ),
5043 Some("env_doctor")
5044 );
5045 }
5046
5047 #[test]
5048 fn intent_router_picks_fix_plan_for_host_remediation_questions() {
5049 assert_eq!(
5050 preferred_host_inspection_topic("How do I fix cargo not found on this machine?"),
5051 Some("fix_plan")
5052 );
5053 assert_eq!(
5054 preferred_host_inspection_topic(
5055 "How do I fix Hematite when LM Studio is not reachable on localhost:1234?"
5056 ),
5057 Some("fix_plan")
5058 );
5059 }
5060
5061 #[test]
5062 fn fill_missing_fix_plan_issue_backfills_last_user_prompt() {
5063 let mut args = serde_json::json!({
5064 "topic": "fix_plan"
5065 });
5066
5067 fill_missing_fix_plan_issue(
5068 "inspect_host",
5069 &mut args,
5070 Some("/think\nHow do I fix cargo not found on this machine?"),
5071 );
5072
5073 assert_eq!(
5074 args.get("issue").and_then(|value| value.as_str()),
5075 Some("How do I fix cargo not found on this machine?")
5076 );
5077 }
5078
5079 #[test]
5080 fn shell_fix_question_rewrites_to_fix_plan() {
5081 let args = serde_json::json!({
5082 "command": "where cargo"
5083 });
5084
5085 assert!(should_rewrite_shell_to_fix_plan(
5086 "shell",
5087 &args,
5088 Some("How do I fix cargo not found on this machine?")
5089 ));
5090 }
5091
5092 #[test]
5093 fn fix_plan_dedupe_key_matches_rewritten_shell_probe() {
5094 let latest_user_prompt = Some("How do I fix cargo not found on this machine?");
5095 let shell_key = normalized_tool_call_key_for_dedupe(
5096 "shell",
5097 r#"{"command":"where cargo"}"#,
5098 false,
5099 latest_user_prompt,
5100 );
5101 let fix_plan_key = normalized_tool_call_key_for_dedupe(
5102 "inspect_host",
5103 r#"{"topic":"fix_plan"}"#,
5104 false,
5105 latest_user_prompt,
5106 );
5107
5108 assert_eq!(shell_key, fix_plan_key);
5109 }
5110
5111 #[test]
5112 fn failing_path_parser_extracts_cargo_error_locations() {
5113 let output = r#"
5114BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:
5115
5116error[E0412]: cannot find type `Foo` in this scope
5117 --> src/agent/conversation.rs:42:12
5118 |
511942 | field: Foo,
5120 | ^^^ not found
5121
5122error[E0308]: mismatched types
5123 --> src/tools/file_ops.rs:100:5
5124 |
5125 = note: expected `String`, found `&str`
5126"#;
5127 let paths = parse_failing_paths_from_build_output(output);
5128 assert!(
5129 paths.iter().any(|p| p.contains("conversation.rs")),
5130 "should capture conversation.rs"
5131 );
5132 assert!(
5133 paths.iter().any(|p| p.contains("file_ops.rs")),
5134 "should capture file_ops.rs"
5135 );
5136 assert_eq!(paths.len(), 2, "no duplicates");
5137 }
5138
5139 #[test]
5140 fn failing_path_parser_ignores_macro_expansions() {
5141 let output = r#"
5142 --> <macro-expansion>:1:2
5143 --> src/real/file.rs:10:5
5144"#;
5145 let paths = parse_failing_paths_from_build_output(output);
5146 assert_eq!(paths.len(), 1);
5147 assert!(paths[0].contains("file.rs"));
5148 }
5149}