use crate::agent::architecture_summary::{
build_architecture_overview_answer, prune_architecture_trace_batch,
prune_authoritative_tool_batch, prune_read_only_context_bloat_batch,
summarize_project_map_output, summarize_runtime_trace_output,
};
use crate::agent::direct_answers::{
build_architect_session_reset_plan, build_authorization_policy_answer,
build_gemma_native_answer, build_gemma_native_settings_answer, build_identity_answer,
build_language_capability_answer, build_mcp_lifecycle_answer, build_product_surface_answer,
build_reasoning_split_answer, build_recovery_recipes_answer, build_session_memory_answer,
build_session_reset_semantics_answer, build_tool_classes_answer,
build_tool_registry_ownership_answer, build_unsafe_workflow_pressure_answer,
build_verify_profiles_answer, build_workflow_modes_answer,
};
use crate::agent::inference::{
ChatMessage, InferenceEngine, InferenceEvent, MessageContent, OperatorCheckpointState,
ProviderRuntimeState, ToolCallFn, ToolDefinition, ToolFunction,
};
use crate::agent::policy::{
action_target_path, docs_edit_without_explicit_request, is_destructive_tool,
is_mcp_mutating_tool, is_mcp_workspace_read_tool, normalize_workspace_path,
};
use crate::agent::recovery_recipes::{
attempt_recovery, plan_recovery, preview_recovery_decision, RecoveryContext, RecoveryDecision,
RecoveryPlan, RecoveryScenario, RecoveryStep,
};
use crate::agent::routing::{
classify_query_intent, is_capability_probe_tool, looks_like_mutation_request,
preferred_host_inspection_topic, DirectAnswerKind, QueryIntentClass,
};
use crate::agent::tool_registry::dispatch_builtin_tool;
use crate::agent::compaction::{self, CompactionConfig};
use crate::ui::gpu_monitor::GpuState;
use serde_json::Value;
use std::sync::Arc;
use tokio::sync::{mpsc, Mutex};
#[derive(Clone, Debug, Default)]
pub struct UserTurn {
pub text: String,
pub attached_document: Option<AttachedDocument>,
pub attached_image: Option<AttachedImage>,
}
#[derive(Clone, Debug)]
pub struct AttachedDocument {
pub name: String,
pub content: String,
}
#[derive(Clone, Debug)]
pub struct AttachedImage {
pub name: String,
pub path: String,
}
impl UserTurn {
pub fn text(text: impl Into<String>) -> Self {
Self {
text: text.into(),
attached_document: None,
attached_image: None,
}
}
}
#[derive(serde::Serialize, serde::Deserialize)]
struct SavedSession {
running_summary: Option<String>,
#[serde(default)]
session_memory: crate::agent::compaction::SessionMemory,
}
#[derive(Default)]
struct ActionGroundingState {
turn_index: u64,
observed_paths: std::collections::HashMap<String, u64>,
inspected_paths: std::collections::HashMap<String, u64>,
last_verify_build_turn: Option<u64>,
last_verify_build_ok: bool,
last_failed_build_paths: Vec<String>,
code_changed_since_verify: bool,
}
struct PlanExecutionGuard {
flag: Arc<std::sync::atomic::AtomicBool>,
}
impl Drop for PlanExecutionGuard {
fn drop(&mut self) {
self.flag.store(false, std::sync::atomic::Ordering::SeqCst);
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub(crate) enum WorkflowMode {
#[default]
Auto,
Ask,
Code,
Architect,
ReadOnly,
Chat,
}
impl WorkflowMode {
fn label(self) -> &'static str {
match self {
WorkflowMode::Auto => "AUTO",
WorkflowMode::Ask => "ASK",
WorkflowMode::Code => "CODE",
WorkflowMode::Architect => "ARCHITECT",
WorkflowMode::ReadOnly => "READ-ONLY",
WorkflowMode::Chat => "CHAT",
}
}
fn is_read_only(self) -> bool {
matches!(
self,
WorkflowMode::Ask | WorkflowMode::Architect | WorkflowMode::ReadOnly
)
}
pub(crate) fn is_chat(self) -> bool {
matches!(self, WorkflowMode::Chat)
}
}
fn session_path() -> std::path::PathBuf {
crate::tools::file_ops::workspace_root()
.join(".hematite")
.join("session.json")
}
fn load_session_data() -> (Option<String>, crate::agent::compaction::SessionMemory) {
let path = session_path();
if !path.exists() {
return (None, crate::agent::compaction::SessionMemory::default());
}
let Ok(data) = std::fs::read_to_string(&path) else {
return (None, crate::agent::compaction::SessionMemory::default());
};
let Ok(saved) = serde_json::from_str::<SavedSession>(&data) else {
return (None, crate::agent::compaction::SessionMemory::default());
};
(saved.running_summary, saved.session_memory)
}
fn reset_task_files() {
let root = crate::tools::file_ops::workspace_root();
let _ = std::fs::remove_file(root.join(".hematite").join("TASK.md"));
let _ = std::fs::remove_file(root.join(".hematite").join("PLAN.md"));
let _ = std::fs::remove_file(root.join(".hematite").join("WALKTHROUGH.md"));
let _ = std::fs::remove_file(root.join(".github").join("WALKTHROUGH.md"));
let _ = std::fs::write(root.join(".hematite").join("TASK.md"), "");
let _ = std::fs::write(root.join(".hematite").join("PLAN.md"), "");
}
fn purge_persistent_memory() {
let root = crate::tools::file_ops::workspace_root();
let mem_dir = root.join(".hematite").join("memories");
if mem_dir.exists() {
let _ = std::fs::remove_dir_all(&mem_dir);
let _ = std::fs::create_dir_all(&mem_dir);
}
let log_dir = root.join(".hematite_logs");
if log_dir.exists() {
if let Ok(entries) = std::fs::read_dir(&log_dir) {
for entry in entries.flatten() {
let _ = std::fs::write(entry.path(), "");
}
}
}
}
fn apply_turn_attachments(user_turn: &UserTurn, prompt: &str) -> String {
let mut out = prompt.trim().to_string();
if let Some(doc) = user_turn.attached_document.as_ref() {
out = format!(
"[Attached document: {}]\n\n{}\n\n---\n\n{}",
doc.name, doc.content, out
);
}
if let Some(image) = user_turn.attached_image.as_ref() {
out = if out.is_empty() {
format!("[Attached image: {}]", image.name)
} else {
format!("[Attached image: {}]\n\n{}", image.name, out)
};
}
out
}
fn transcript_user_turn_text(user_turn: &UserTurn, prompt: &str) -> String {
let mut prefixes = Vec::new();
if let Some(doc) = user_turn.attached_document.as_ref() {
prefixes.push(format!("[Attached document: {}]", doc.name));
}
if let Some(image) = user_turn.attached_image.as_ref() {
prefixes.push(format!("[Attached image: {}]", image.name));
}
if prefixes.is_empty() {
prompt.to_string()
} else if prompt.trim().is_empty() {
prefixes.join("\n")
} else {
format!("{}\n{}", prefixes.join("\n"), prompt)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeFailureClass {
ContextWindow,
ProviderDegraded,
ToolArgMalformed,
ToolPolicyBlocked,
ToolLoop,
VerificationFailed,
EmptyModelResponse,
Unknown,
}
impl RuntimeFailureClass {
fn tag(self) -> &'static str {
match self {
RuntimeFailureClass::ContextWindow => "context_window",
RuntimeFailureClass::ProviderDegraded => "provider_degraded",
RuntimeFailureClass::ToolArgMalformed => "tool_arg_malformed",
RuntimeFailureClass::ToolPolicyBlocked => "tool_policy_blocked",
RuntimeFailureClass::ToolLoop => "tool_loop",
RuntimeFailureClass::VerificationFailed => "verification_failed",
RuntimeFailureClass::EmptyModelResponse => "empty_model_response",
RuntimeFailureClass::Unknown => "unknown",
}
}
fn operator_guidance(self) -> &'static str {
match self {
RuntimeFailureClass::ContextWindow => {
"Narrow the request, compact the session, or preserve grounded tool output instead of restyling it. If LM Studio reports a smaller live n_ctx than Hematite expected, reload or re-detect the model budget before retrying."
}
RuntimeFailureClass::ProviderDegraded => {
"Retry once automatically, then narrow the turn or restart LM Studio if it persists."
}
RuntimeFailureClass::ToolArgMalformed => {
"Retry with repaired or narrower tool arguments instead of repeating the same malformed call."
}
RuntimeFailureClass::ToolPolicyBlocked => {
"Stay inside the allowed workflow or switch modes before retrying."
}
RuntimeFailureClass::ToolLoop => {
"Stop repeating the same failing tool pattern and switch to a narrower recovery step."
}
RuntimeFailureClass::VerificationFailed => {
"Fix the build or test failure before treating the task as complete."
}
RuntimeFailureClass::EmptyModelResponse => {
"Retry once automatically, then narrow the turn or restart LM Studio if the model keeps returning nothing."
}
RuntimeFailureClass::Unknown => {
"Inspect the latest grounded tool results or provider status before retrying."
}
}
}
}
fn classify_runtime_failure(detail: &str) -> RuntimeFailureClass {
let lower = detail.to_ascii_lowercase();
if lower.contains("context_window_blocked")
|| lower.contains("context ceiling reached")
|| lower.contains("exceeds the")
|| ((lower.contains("n_keep") && lower.contains("n_ctx"))
|| lower.contains("context length")
|| lower.contains("keep from the initial prompt")
|| lower.contains("prompt is greater than the context length"))
{
RuntimeFailureClass::ContextWindow
} else if lower.contains("empty response from model")
|| lower.contains("model returned an empty response")
{
RuntimeFailureClass::EmptyModelResponse
} else if lower.contains("lm studio unreachable")
|| lower.contains("lm studio error")
|| lower.contains("request failed")
|| lower.contains("response parse error")
|| lower.contains("provider degraded")
{
RuntimeFailureClass::ProviderDegraded
} else if lower.contains("missing required argument")
|| lower.contains("json repair failed")
|| lower.contains("invalid pattern")
|| lower.contains("invalid line range")
{
RuntimeFailureClass::ToolArgMalformed
} else if lower.contains("action blocked:")
|| lower.contains("access denied")
|| lower.contains("declined by user")
{
RuntimeFailureClass::ToolPolicyBlocked
} else if lower.contains("too many consecutive tool errors")
|| lower.contains("repeated tool failures")
|| lower.contains("stuck in a loop")
{
RuntimeFailureClass::ToolLoop
} else if lower.contains("build failed")
|| lower.contains("verification failed")
|| lower.contains("verify_build")
{
RuntimeFailureClass::VerificationFailed
} else {
RuntimeFailureClass::Unknown
}
}
fn format_runtime_failure(class: RuntimeFailureClass, detail: &str) -> String {
format!(
"[failure:{}] {} Detail: {}",
class.tag(),
class.operator_guidance(),
detail.trim()
)
}
fn provider_state_for_runtime_failure(class: RuntimeFailureClass) -> Option<ProviderRuntimeState> {
match class {
RuntimeFailureClass::ContextWindow => Some(ProviderRuntimeState::ContextWindow),
RuntimeFailureClass::ProviderDegraded => Some(ProviderRuntimeState::Degraded),
RuntimeFailureClass::EmptyModelResponse => Some(ProviderRuntimeState::EmptyResponse),
_ => None,
}
}
fn checkpoint_state_for_runtime_failure(
class: RuntimeFailureClass,
) -> Option<OperatorCheckpointState> {
match class {
RuntimeFailureClass::ContextWindow => Some(OperatorCheckpointState::BlockedContextWindow),
RuntimeFailureClass::ToolPolicyBlocked => Some(OperatorCheckpointState::BlockedPolicy),
RuntimeFailureClass::ToolLoop => Some(OperatorCheckpointState::BlockedToolLoop),
RuntimeFailureClass::VerificationFailed => {
Some(OperatorCheckpointState::BlockedVerification)
}
_ => None,
}
}
fn compact_runtime_recovery_summary(class: RuntimeFailureClass) -> &'static str {
match class {
RuntimeFailureClass::ProviderDegraded => {
"LM Studio degraded during the turn; retrying once before surfacing a failure."
}
RuntimeFailureClass::EmptyModelResponse => {
"The model returned an empty reply; retrying once before surfacing a failure."
}
_ => "Runtime recovery in progress.",
}
}
fn checkpoint_summary_for_runtime_failure(class: RuntimeFailureClass) -> &'static str {
match class {
RuntimeFailureClass::ContextWindow => "Provider context ceiling confirmed.",
RuntimeFailureClass::ToolPolicyBlocked => "Policy blocked the current action.",
RuntimeFailureClass::ToolLoop => "Repeated failing tool pattern stopped.",
RuntimeFailureClass::VerificationFailed => "Verification failed; fix before continuing.",
_ => "Operator checkpoint updated.",
}
}
fn compact_runtime_failure_summary(class: RuntimeFailureClass) -> &'static str {
match class {
RuntimeFailureClass::ContextWindow => "LM context ceiling hit.",
RuntimeFailureClass::ProviderDegraded => {
"LM Studio degraded and did not recover cleanly; operator action is now required."
}
RuntimeFailureClass::EmptyModelResponse => {
"LM Studio returned an empty reply after recovery; operator action is now required."
}
RuntimeFailureClass::ToolLoop => {
"Repeated failing tool pattern detected; Hematite stopped the loop."
}
_ => "Runtime failure surfaced to the operator.",
}
}
fn should_retry_runtime_failure(class: RuntimeFailureClass) -> bool {
matches!(
class,
RuntimeFailureClass::ProviderDegraded | RuntimeFailureClass::EmptyModelResponse
)
}
fn recovery_scenario_for_runtime_failure(class: RuntimeFailureClass) -> Option<RecoveryScenario> {
match class {
RuntimeFailureClass::ContextWindow => Some(RecoveryScenario::ContextWindow),
RuntimeFailureClass::ProviderDegraded => Some(RecoveryScenario::ProviderDegraded),
RuntimeFailureClass::EmptyModelResponse => Some(RecoveryScenario::EmptyModelResponse),
RuntimeFailureClass::ToolPolicyBlocked => Some(RecoveryScenario::McpWorkspaceReadBlocked),
RuntimeFailureClass::ToolLoop => Some(RecoveryScenario::ToolLoop),
RuntimeFailureClass::VerificationFailed => Some(RecoveryScenario::VerificationFailed),
RuntimeFailureClass::ToolArgMalformed | RuntimeFailureClass::Unknown => None,
}
}
fn compact_recovery_plan_summary(plan: &RecoveryPlan) -> String {
format!(
"{} [{}]",
plan.recipe.scenario.label(),
plan.recipe.steps_summary()
)
}
fn compact_recovery_decision_summary(decision: &RecoveryDecision) -> String {
match decision {
RecoveryDecision::Attempt(plan) => compact_recovery_plan_summary(plan),
RecoveryDecision::Escalate {
recipe,
attempts_made,
..
} => format!(
"{} escalated after {} / {} [{}]",
recipe.scenario.label(),
attempts_made,
recipe.max_attempts.max(1),
recipe.steps_summary()
),
}
}
fn parse_failing_paths_from_build_output(output: &str) -> Vec<String> {
let root = crate::tools::file_ops::workspace_root();
let mut paths: Vec<String> = output
.lines()
.filter_map(|line| {
let trimmed = line.trim_start();
let after_arrow = trimmed.strip_prefix("--> ")?;
let file_part = after_arrow.split(':').next()?;
if file_part.is_empty() || file_part.starts_with('<') {
return None;
}
let p = std::path::Path::new(file_part);
let resolved = if p.is_absolute() {
p.to_path_buf()
} else {
root.join(p)
};
Some(resolved.to_string_lossy().replace('\\', "/").to_lowercase())
})
.collect();
paths.sort();
paths.dedup();
paths
}
fn build_mode_redirect_answer(mode: WorkflowMode) -> String {
match mode {
WorkflowMode::Ask => "Workflow mode ASK is read-only. I can inspect the code, explain what should change, or review the target area, but I will not modify files here. Switch to `/code` to implement the change, or `/auto` to let Hematite choose.".to_string(),
WorkflowMode::Architect => "Workflow mode ARCHITECT is plan-first. I can inspect the code and design the implementation approach, but I will not mutate files until you explicitly switch to `/code` or ask me to implement.".to_string(),
WorkflowMode::ReadOnly => "Workflow mode READ-ONLY is a hard no-mutation mode. I can analyze, inspect, and explain, but I will not edit files, run mutating shell commands, or commit changes. Switch to `/code` or `/auto` if you want implementation.".to_string(),
_ => "Switch to `/code` or `/auto` to allow implementation.".to_string(),
}
}
fn architect_handoff_contract() -> &'static str {
"ARCHITECT OUTPUT CONTRACT:\n\
Use a compact implementation handoff, not a process narrative.\n\
Do not say \"the first step\" or describe what you are about to do.\n\
After one or two read-only inspection tools at most, stop and answer.\n\
For runtime wiring, reset behavior, or control-flow questions, prefer `trace_runtime_flow` over a broad `map_project` scan.\n\
If you do use `map_project`, keep it minimal and scoped.\n\
Use these exact ASCII headings and keep each section short:\n\
# Goal\n\
# Target Files\n\
# Ordered Steps\n\
# Verification\n\
# Risks\n\
# Open Questions\n\
Keep the whole handoff concise and implementation-oriented."
}
fn is_current_plan_execution_request(user_input: &str) -> bool {
let lower = user_input.trim().to_ascii_lowercase();
lower == "implement the current plan."
|| lower == "implement the current plan"
|| lower.contains("implement the current plan")
}
fn is_plan_scoped_tool(name: &str) -> bool {
crate::agent::inference::tool_metadata_for_name(name).plan_scope
}
fn is_current_plan_irrelevant_tool(name: &str) -> bool {
!crate::agent::inference::tool_metadata_for_name(name).plan_scope
}
fn is_non_mutating_plan_step_tool(name: &str) -> bool {
let metadata = crate::agent::inference::tool_metadata_for_name(name);
metadata.plan_scope && !metadata.mutates_workspace
}
fn parse_inline_workflow_prompt(user_input: &str) -> Option<(WorkflowMode, &str)> {
let trimmed = user_input.trim();
for (prefix, mode) in [
("/ask", WorkflowMode::Ask),
("/code", WorkflowMode::Code),
("/architect", WorkflowMode::Architect),
("/read-only", WorkflowMode::ReadOnly),
("/auto", WorkflowMode::Auto),
] {
if let Some(rest) = trimmed.strip_prefix(prefix) {
let rest = rest.trim();
if !rest.is_empty() {
return Some((mode, rest));
}
}
}
None
}
pub fn get_tools() -> Vec<ToolDefinition> {
crate::agent::tool_registry::get_tools()
}
pub struct ConversationManager {
pub history: Vec<ChatMessage>,
pub engine: Arc<InferenceEngine>,
pub tools: Vec<ToolDefinition>,
pub mcp_manager: Arc<Mutex<crate::agent::mcp_manager::McpManager>>,
pub professional: bool,
pub brief: bool,
pub snark: u8,
pub chaos: u8,
pub fast_model: Option<String>,
pub think_model: Option<String>,
pub correction_hints: Vec<String>,
pub running_summary: Option<String>,
pub gpu_state: Arc<GpuState>,
pub vein: crate::memory::vein::Vein,
pub transcript: crate::agent::transcript::TranscriptLogger,
pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
pub git_state: Arc<crate::agent::git_monitor::GitState>,
pub think_mode: Option<bool>,
workflow_mode: WorkflowMode,
pub session_memory: crate::agent::compaction::SessionMemory,
pub swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
pub voice_manager: Arc<crate::ui::voice::VoiceManager>,
pub soul_personality: String,
pub lsp_manager: Arc<Mutex<crate::agent::lsp::manager::LspManager>>,
pub reasoning_history: Option<String>,
pub pinned_files: Arc<Mutex<std::collections::HashMap<String, String>>>,
action_grounding: Arc<Mutex<ActionGroundingState>>,
plan_execution_active: Arc<std::sync::atomic::AtomicBool>,
recovery_context: RecoveryContext,
pub l1_context: Option<String>,
}
impl ConversationManager {
fn vein_docs_only_mode(&self) -> bool {
!crate::tools::file_ops::is_project_workspace()
}
fn refresh_vein_index(&mut self) -> usize {
let count = if self.vein_docs_only_mode() {
let root = crate::tools::file_ops::workspace_root();
tokio::task::block_in_place(|| self.vein.index_workspace_artifacts(&root))
} else {
tokio::task::block_in_place(|| self.vein.index_project())
};
self.l1_context = self.vein.l1_context();
count
}
fn build_vein_inspection_report(&self, indexed_this_pass: usize) -> String {
let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(8));
let workspace_mode = if self.vein_docs_only_mode() {
"docs-only (outside a project workspace)"
} else {
"project workspace"
};
let active_room = snapshot.active_room.as_deref().unwrap_or("none");
let mut out = format!(
"Vein Inspection\n\
Workspace mode: {workspace_mode}\n\
Indexed this pass: {indexed_this_pass}\n\
Indexed source files: {}\n\
Indexed docs: {}\n\
Indexed session exchanges: {}\n\
Embedded source/doc chunks: {}\n\
Embeddings available: {}\n\
Active room bias: {active_room}\n\
L1 hot-files block: {}\n",
snapshot.indexed_source_files,
snapshot.indexed_docs,
snapshot.indexed_session_exchanges,
snapshot.embedded_source_doc_chunks,
if snapshot.has_any_embeddings {
"yes"
} else {
"no"
},
if snapshot.l1_ready {
"ready"
} else {
"not built yet"
},
);
if snapshot.hot_files.is_empty() {
out.push_str("Hot files: none yet.\n");
return out;
}
out.push_str("\nHot files by room:\n");
let mut by_room: std::collections::BTreeMap<&str, Vec<&crate::memory::vein::VeinHotFile>> =
std::collections::BTreeMap::new();
for file in &snapshot.hot_files {
by_room.entry(file.room.as_str()).or_default().push(file);
}
for (room, files) in by_room {
out.push_str(&format!("[{}]\n", room));
for file in files {
out.push_str(&format!(
"- {} [{} edit{}]\n",
file.path,
file.heat,
if file.heat == 1 { "" } else { "s" }
));
}
}
out
}
fn latest_user_prompt(&self) -> Option<&str> {
self.history
.iter()
.rev()
.find(|msg| msg.role == "user")
.map(|msg| msg.content.as_str())
}
async fn emit_direct_response(
&mut self,
tx: &mpsc::Sender<InferenceEvent>,
raw_user_input: &str,
effective_user_input: &str,
response: &str,
) {
self.history.push(ChatMessage::user(effective_user_input));
self.history.push(ChatMessage::assistant_text(response));
self.transcript.log_user(raw_user_input);
self.transcript.log_agent(response);
for chunk in chunk_text(response, 8) {
if !chunk.is_empty() {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
self.trim_history(80);
self.refresh_session_memory();
self.save_session();
}
async fn emit_operator_checkpoint(
&mut self,
tx: &mpsc::Sender<InferenceEvent>,
state: OperatorCheckpointState,
summary: impl Into<String>,
) {
let summary = summary.into();
self.session_memory
.record_checkpoint(state.label(), summary.clone());
let _ = tx
.send(InferenceEvent::OperatorCheckpoint { state, summary })
.await;
}
async fn emit_recovery_recipe_summary(
&mut self,
tx: &mpsc::Sender<InferenceEvent>,
state: impl Into<String>,
summary: impl Into<String>,
) {
let state = state.into();
let summary = summary.into();
self.session_memory.record_recovery(state, summary.clone());
let _ = tx.send(InferenceEvent::RecoveryRecipe { summary }).await;
}
async fn emit_provider_live(&mut self, tx: &mpsc::Sender<InferenceEvent>) {
let _ = tx
.send(InferenceEvent::ProviderStatus {
state: ProviderRuntimeState::Live,
summary: String::new(),
})
.await;
self.emit_operator_checkpoint(tx, OperatorCheckpointState::Idle, "")
.await;
}
async fn emit_prompt_pressure_for_messages(
&self,
tx: &mpsc::Sender<InferenceEvent>,
messages: &[ChatMessage],
) {
let context_length = self.engine.current_context_length();
let (estimated_input_tokens, reserved_output_tokens, estimated_total_tokens, percent) =
crate::agent::inference::estimate_prompt_pressure(
messages,
&self.tools,
context_length,
);
let _ = tx
.send(InferenceEvent::PromptPressure {
estimated_input_tokens,
reserved_output_tokens,
estimated_total_tokens,
context_length,
percent,
})
.await;
}
async fn emit_prompt_pressure_idle(&self, tx: &mpsc::Sender<InferenceEvent>) {
let context_length = self.engine.current_context_length();
let _ = tx
.send(InferenceEvent::PromptPressure {
estimated_input_tokens: 0,
reserved_output_tokens: 0,
estimated_total_tokens: 0,
context_length,
percent: 0,
})
.await;
}
async fn emit_compaction_pressure(&self, tx: &mpsc::Sender<InferenceEvent>) {
let context_length = self.engine.current_context_length();
let vram_ratio = self.gpu_state.ratio();
let config = CompactionConfig::adaptive(context_length, vram_ratio);
let estimated_tokens = compaction::estimate_compactable_tokens(&self.history);
let percent = if config.max_estimated_tokens == 0 {
0
} else {
((estimated_tokens.saturating_mul(100)) / config.max_estimated_tokens).min(100) as u8
};
let _ = tx
.send(InferenceEvent::CompactionPressure {
estimated_tokens,
threshold_tokens: config.max_estimated_tokens,
percent,
})
.await;
}
async fn refresh_runtime_profile_and_report(
&mut self,
tx: &mpsc::Sender<InferenceEvent>,
reason: &str,
) -> Option<(String, usize, bool)> {
let refreshed = self.engine.refresh_runtime_profile().await;
if let Some((model_id, context_length, changed)) = refreshed.as_ref() {
let _ = tx
.send(InferenceEvent::RuntimeProfile {
model_id: model_id.clone(),
context_length: *context_length,
})
.await;
self.transcript.log_system(&format!(
"Runtime profile refresh ({}): model={} ctx={} changed={}",
reason, model_id, context_length, changed
));
}
refreshed
}
pub fn new(
engine: Arc<InferenceEngine>,
professional: bool,
brief: bool,
snark: u8,
chaos: u8,
soul_personality: String,
fast_model: Option<String>,
think_model: Option<String>,
gpu_state: Arc<GpuState>,
git_state: Arc<crate::agent::git_monitor::GitState>,
swarm_coordinator: Arc<crate::agent::swarm::SwarmCoordinator>,
voice_manager: Arc<crate::ui::voice::VoiceManager>,
) -> Self {
let (saved_summary, saved_memory) = load_session_data();
let mcp_manager = Arc::new(tokio::sync::Mutex::new(
crate::agent::mcp_manager::McpManager::new(),
));
let dynamic_instructions =
engine.build_system_prompt(snark, chaos, brief, professional, &[], None, &[]);
let history = vec![ChatMessage::system(&dynamic_instructions)];
let vein_path = crate::tools::file_ops::workspace_root()
.join(".hematite")
.join("vein.db");
let vein_base_url = engine.base_url.clone();
let vein = crate::memory::vein::Vein::new(&vein_path, vein_base_url.clone())
.unwrap_or_else(|_| crate::memory::vein::Vein::new(":memory:", vein_base_url).unwrap());
Self {
history,
engine,
tools: get_tools(),
mcp_manager,
professional,
brief,
snark,
chaos,
fast_model,
think_model,
correction_hints: Vec::new(),
running_summary: saved_summary,
gpu_state,
vein,
transcript: crate::agent::transcript::TranscriptLogger::new(),
cancel_token: Arc::new(std::sync::atomic::AtomicBool::new(false)),
git_state,
think_mode: None,
workflow_mode: WorkflowMode::Auto,
session_memory: saved_memory,
swarm_coordinator,
voice_manager,
soul_personality,
lsp_manager: Arc::new(Mutex::new(crate::agent::lsp::manager::LspManager::new(
crate::tools::file_ops::workspace_root(),
))),
reasoning_history: None,
pinned_files: Arc::new(Mutex::new(std::collections::HashMap::new())),
action_grounding: Arc::new(Mutex::new(ActionGroundingState::default())),
plan_execution_active: Arc::new(std::sync::atomic::AtomicBool::new(false)),
recovery_context: RecoveryContext::default(),
l1_context: None,
}
}
pub fn initialize_vein(&mut self) -> usize {
self.refresh_vein_index()
}
fn save_session(&self) {
let path = session_path();
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let saved = SavedSession {
running_summary: self.running_summary.clone(),
session_memory: self.session_memory.clone(),
};
if let Ok(json) = serde_json::to_string(&saved) {
let _ = std::fs::write(&path, json);
}
}
fn save_empty_session(&self) {
let path = session_path();
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let saved = SavedSession {
running_summary: None,
session_memory: crate::agent::compaction::SessionMemory::default(),
};
if let Ok(json) = serde_json::to_string(&saved) {
let _ = std::fs::write(&path, json);
}
}
fn refresh_session_memory(&mut self) {
let current_plan = self.session_memory.current_plan.clone();
let previous_memory = self.session_memory.clone();
self.session_memory = compaction::extract_memory(&self.history);
self.session_memory.current_plan = current_plan;
self.session_memory
.inherit_runtime_ledger_from(&previous_memory);
}
fn build_chat_system_prompt(&self) -> String {
let species = &self.engine.species;
let personality = &self.soul_personality;
format!(
"You are {species}, a local AI companion running entirely on the user's GPU — no cloud, no subscriptions, no phoning home.\n\
{personality}\n\n\
This is CHAT mode — a clean conversational surface. Behave like a sharp friend who happens to know everything about code, not like an agent following a workflow.\n\n\
Rules:\n\
- Talk like a person. Skip the bullet-point breakdowns unless the topic genuinely needs structure.\n\
- Answer directly. One paragraph is usually right.\n\
- Don't call tools unless the user explicitly asks you to look at a file or run something.\n\
- Don't narrate your reasoning or mention tool names unprompted.\n\
- You can discuss code, debug ideas, explain concepts, help plan, or just talk.\n\
- If the user clearly wants you to edit or build something, do it — but lead with conversation, not scaffolding.\n\
- If the user wants the full coding harness, they can type `/agent`.\n",
)
}
fn append_session_handoff(&self, system_msg: &mut String) {
let has_summary = self
.running_summary
.as_ref()
.map(|s| !s.trim().is_empty())
.unwrap_or(false);
let has_memory = self.session_memory.has_signal();
if !has_summary && !has_memory {
return;
}
system_msg.push_str(
"\n\n# LIGHTWEIGHT SESSION HANDOFF\n\
This is compact carry-over from earlier work on this machine.\n\
Use it only when it helps the current request.\n\
Prefer current repository state, pinned files, and fresh tool results over stale session memory.\n",
);
if has_memory {
system_msg.push_str("\n## Active Task Memory\n");
system_msg.push_str(&self.session_memory.to_prompt());
}
if let Some(summary) = self.running_summary.as_deref() {
if !summary.trim().is_empty() {
system_msg.push_str("\n## Compacted Session Summary\n");
system_msg.push_str(summary);
system_msg.push('\n');
}
}
}
fn set_workflow_mode(&mut self, mode: WorkflowMode) {
self.workflow_mode = mode;
}
fn current_plan_summary(&self) -> Option<String> {
self.session_memory
.current_plan
.as_ref()
.filter(|plan| plan.has_signal())
.map(|plan| plan.summary_line())
}
fn current_plan_allowed_paths(&self) -> Vec<String> {
self.session_memory
.current_plan
.as_ref()
.map(|plan| {
plan.target_files
.iter()
.map(|path| normalize_workspace_path(path))
.collect()
})
.unwrap_or_default()
}
fn persist_architect_handoff(&mut self, response: &str) {
if self.workflow_mode != WorkflowMode::Architect {
return;
}
let Some(plan) = crate::tools::plan::parse_plan_handoff(response) else {
return;
};
let _ = crate::tools::plan::save_plan_handoff(&plan);
self.session_memory.current_plan = Some(plan);
}
async fn begin_grounded_turn(&self) -> u64 {
let mut state = self.action_grounding.lock().await;
state.turn_index += 1;
state.turn_index
}
async fn reset_action_grounding(&self) {
let mut state = self.action_grounding.lock().await;
*state = ActionGroundingState::default();
}
async fn record_read_observation(&self, path: &str) {
let normalized = normalize_workspace_path(path);
let mut state = self.action_grounding.lock().await;
let turn = state.turn_index;
state.observed_paths.insert(normalized.clone(), turn);
state.inspected_paths.insert(normalized, turn);
}
async fn record_line_inspection(&self, path: &str) {
let normalized = normalize_workspace_path(path);
let mut state = self.action_grounding.lock().await;
let turn = state.turn_index;
state.observed_paths.insert(normalized.clone(), turn);
state.inspected_paths.insert(normalized, turn);
}
async fn record_verify_build_result(&self, ok: bool, output: &str) {
let mut state = self.action_grounding.lock().await;
let turn = state.turn_index;
state.last_verify_build_turn = Some(turn);
state.last_verify_build_ok = ok;
if ok {
state.code_changed_since_verify = false;
state.last_failed_build_paths.clear();
} else {
state.last_failed_build_paths = parse_failing_paths_from_build_output(output);
}
}
fn record_session_verification(&mut self, ok: bool, summary: impl Into<String>) {
self.session_memory.record_verification(ok, summary);
}
async fn record_successful_mutation(&self, path: Option<&str>) {
let mut state = self.action_grounding.lock().await;
state.code_changed_since_verify = match path {
Some(p) => is_code_like_path(p),
None => true,
};
}
async fn validate_action_preconditions(&self, name: &str, args: &Value) -> Result<(), String> {
if self
.plan_execution_active
.load(std::sync::atomic::Ordering::SeqCst)
{
if is_current_plan_irrelevant_tool(name) {
return Err(format!(
"Action blocked: `{}` is not part of current-plan execution. Stay on the saved target files, use built-in workspace file tools only, and either make a concrete edit or surface one specific blocker.",
name
));
}
if name == "map_project" {
return Err(
"Action blocked: `map_project` is too broad for current-plan execution. Use the target files from the saved plan and inspect them directly with built-in workspace tools."
.to_string(),
);
}
if is_plan_scoped_tool(name) {
let allowed_paths = self.current_plan_allowed_paths();
if !allowed_paths.is_empty() {
let in_allowed = match name {
"auto_pin_context" => args
.get("paths")
.and_then(|v| v.as_array())
.map(|paths| {
!paths.is_empty()
&& paths.iter().all(|v| {
v.as_str()
.map(normalize_workspace_path)
.map(|p| allowed_paths.contains(&p))
.unwrap_or(false)
})
})
.unwrap_or(false),
"grep_files" | "list_files" => args
.get("path")
.and_then(|v| v.as_str())
.map(normalize_workspace_path)
.map(|p| allowed_paths.contains(&p))
.unwrap_or(false),
_ => action_target_path(name, args)
.map(|p| allowed_paths.contains(&p))
.unwrap_or(false),
};
if !in_allowed {
let allowed = allowed_paths
.iter()
.map(|p| format!("`{}`", p))
.collect::<Vec<_>>()
.join(", ");
return Err(format!(
"Action blocked: current-plan execution is locked to the saved target files. Use a path-scoped built-in tool on one of these files only: {}.",
allowed
));
}
}
}
if matches!(name, "edit_file" | "multi_search_replace" | "patch_hunk") {
if let Some(target) = action_target_path(name, args) {
let state = self.action_grounding.lock().await;
let recently_inspected = state
.inspected_paths
.get(&target)
.map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
.unwrap_or(false);
drop(state);
if !recently_inspected {
return Err(format!(
"Action blocked: `{}` on '{}' requires an exact local line window first during current-plan execution. Use `inspect_lines` on that file around the intended edit region, then retry the mutation.",
name, target
));
}
}
}
}
if self.workflow_mode.is_read_only() && name == "auto_pin_context" {
return Err(
"Action blocked: `auto_pin_context` is disabled in read-only workflows. Use the grounded file evidence you already have, or narrow with `inspect_lines` instead of pinning more files into active context."
.to_string(),
);
}
if self.workflow_mode.is_read_only() && is_destructive_tool(name) {
if name == "shell" {
let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
let risk = crate::tools::guard::classify_bash_risk(command);
if !matches!(risk, crate::tools::RiskLevel::Safe) {
return Err(format!(
"Action blocked: workflow mode `{}` is read-only for risky or mutating operations. Switch to `/code` or `/auto` before making changes.",
self.workflow_mode.label()
));
}
} else {
return Err(format!(
"Action blocked: workflow mode `{}` is read-only. Use `/code` to implement changes or `/auto` to leave mode selection to Hematite.",
self.workflow_mode.label()
));
}
}
let normalized_target = action_target_path(name, args);
if let Some(target) = normalized_target.as_deref() {
if matches!(
name,
"write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
) {
if let Some(prompt) = self.latest_user_prompt() {
if docs_edit_without_explicit_request(prompt, target) {
return Err(format!(
"Action blocked: '{}' is a docs file but the current request did not explicitly ask for documentation changes. Finish the code task first. If docs need updating, the user will ask.",
target
));
}
}
}
let path_exists = std::path::Path::new(target).exists();
if path_exists {
let state = self.action_grounding.lock().await;
let pinned = self.pinned_files.lock().await;
let pinned_match = pinned.keys().any(|p| normalize_workspace_path(p) == target);
drop(pinned);
let needs_exact_window = matches!(name, "edit_file" | "multi_search_replace");
let recently_inspected = state
.inspected_paths
.get(target)
.map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
.unwrap_or(false);
let same_turn_read = state
.observed_paths
.get(target)
.map(|turn| state.turn_index.saturating_sub(*turn) == 0)
.unwrap_or(false);
let recent_observed = state
.observed_paths
.get(target)
.map(|turn| state.turn_index.saturating_sub(*turn) <= 3)
.unwrap_or(false);
if needs_exact_window {
if !recently_inspected && !same_turn_read && !pinned_match {
return Err(format!(
"Action blocked: `{}` on '{}' requires a line-level inspection first. \
Use `inspect_lines` on the target region to get the exact current text \
(whitespace and indentation included), then retry the edit.",
name, target
));
}
} else if !recent_observed && !pinned_match {
return Err(format!(
"Action blocked: `{}` on '{}' requires recent file evidence. Use `read_file` or `inspect_lines` on that path first, or pin the file into active context.",
name, target
));
}
}
}
if is_mcp_mutating_tool(name) {
return Err(format!(
"Action blocked: `{}` is an external MCP mutation tool. For workspace file edits, prefer Hematite's built-in edit path (`read_file`/`inspect_lines` plus `patch_hunk`, `edit_file`, or `multi_search_replace`) unless the user explicitly requires MCP for that action.",
name
));
}
if is_mcp_workspace_read_tool(name) {
return Err(format!(
"Action blocked: `{}` is an external MCP filesystem read tool. For local workspace inspection, prefer Hematite's built-in read path (`read_file`, `inspect_lines`, `list_files`, or `grep_files`) unless the user explicitly requires MCP for that action.",
name
));
}
if matches!(
name,
"write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
) {
if let Some(target) = normalized_target.as_deref() {
let state = self.action_grounding.lock().await;
if state.code_changed_since_verify
&& !state.last_verify_build_ok
&& !state.last_failed_build_paths.is_empty()
&& !state.last_failed_build_paths.iter().any(|p| p == target)
{
let files = state
.last_failed_build_paths
.iter()
.map(|p| format!("`{}`", p))
.collect::<Vec<_>>()
.join(", ");
return Err(format!(
"Action blocked: the build is broken. Fix the errors in {} before editing other files. Run `verify_build` to confirm the fix, then continue.",
files
));
}
}
}
if name == "git_commit" || name == "git_push" {
let state = self.action_grounding.lock().await;
if state.code_changed_since_verify && !state.last_verify_build_ok {
return Err(format!(
"Action blocked: `{}` requires a successful `verify_build` after the latest code edits. Run verification first so Hematite has proof that the tree is build-clean.",
name
));
}
}
if name == "shell" {
let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
if let Some(prompt) = self.latest_user_prompt() {
if let Some(topic) = preferred_host_inspection_topic(prompt) {
if shell_looks_like_structured_host_inspection(command) {
return Err(format!(
"Action blocked: this is a host-inspection question. Prefer `inspect_host(topic: \"{}\")` instead of raw `shell` for PATH, toolchains, desktop, Downloads, listening ports, repo-doctor checks, or directory/disk summaries. Use `shell` only if `inspect_host` cannot answer the question directly.",
topic
));
}
}
}
let reason = args
.get("reason")
.and_then(|v| v.as_str())
.unwrap_or("")
.trim();
let risk = crate::tools::guard::classify_bash_risk(command);
if !matches!(risk, crate::tools::RiskLevel::Safe) && reason.is_empty() {
return Err(
"Action blocked: risky `shell` calls require a concrete `reason` argument that explains what is being verified or changed."
.to_string(),
);
}
}
Ok(())
}
fn build_action_receipt(
&self,
name: &str,
args: &Value,
output: &str,
is_error: bool,
) -> Option<ChatMessage> {
if is_error || !is_destructive_tool(name) {
return None;
}
let mut receipt = String::from("[ACTION RECEIPT]\n");
receipt.push_str(&format!("- tool: {}\n", name));
if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
receipt.push_str(&format!("- target: {}\n", path));
}
if name == "shell" {
if let Some(command) = args.get("command").and_then(|v| v.as_str()) {
receipt.push_str(&format!("- command: {}\n", command));
}
if let Some(reason) = args.get("reason").and_then(|v| v.as_str()) {
if !reason.trim().is_empty() {
receipt.push_str(&format!("- reason: {}\n", reason.trim()));
}
}
}
let first_line = output.lines().next().unwrap_or(output).trim();
receipt.push_str(&format!("- outcome: {}\n", first_line));
Some(ChatMessage::system(&receipt))
}
fn replace_mcp_tool_definitions(&mut self, mcp_tools: &[crate::agent::mcp::McpTool]) {
self.tools
.retain(|tool| !tool.function.name.starts_with("mcp__"));
self.tools
.extend(mcp_tools.iter().map(|tool| ToolDefinition {
tool_type: "function".into(),
function: ToolFunction {
name: tool.name.clone(),
description: tool.description.clone().unwrap_or_default(),
parameters: tool.input_schema.clone(),
},
metadata: crate::agent::inference::tool_metadata_for_name(&tool.name),
}));
}
async fn emit_mcp_runtime_status(&self, tx: &mpsc::Sender<InferenceEvent>) {
let summary = {
let mcp = self.mcp_manager.lock().await;
mcp.runtime_report()
};
let _ = tx
.send(InferenceEvent::McpStatus {
state: summary.state,
summary: summary.summary,
})
.await;
}
async fn refresh_mcp_tools(
&mut self,
tx: &mpsc::Sender<InferenceEvent>,
) -> Result<Vec<crate::agent::mcp::McpTool>, Box<dyn std::error::Error + Send + Sync>> {
let mcp_tools = {
let mut mcp = self.mcp_manager.lock().await;
match mcp.initialize_all().await {
Ok(()) => mcp.discover_tools().await,
Err(e) => {
drop(mcp);
self.replace_mcp_tool_definitions(&[]);
self.emit_mcp_runtime_status(tx).await;
return Err(e.into());
}
}
};
self.replace_mcp_tool_definitions(&mcp_tools);
self.emit_mcp_runtime_status(tx).await;
Ok(mcp_tools)
}
pub async fn initialize_mcp(
&mut self,
tx: &mpsc::Sender<InferenceEvent>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let _ = self.refresh_mcp_tools(tx).await?;
Ok(())
}
pub async fn run_turn(
&mut self,
user_turn: &UserTurn,
tx: mpsc::Sender<InferenceEvent>,
yolo: bool,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let user_input = user_turn.text.as_str();
if user_input.trim() == "/new" {
self.history.clear();
self.reasoning_history = None;
self.session_memory.clear();
self.running_summary = None;
self.correction_hints.clear();
self.pinned_files.lock().await.clear();
self.reset_action_grounding().await;
reset_task_files();
let _ = std::fs::remove_file(session_path());
self.save_empty_session();
self.emit_compaction_pressure(&tx).await;
self.emit_prompt_pressure_idle(&tx).await;
for chunk in chunk_text(
"Fresh task context started. Chat history, pins, and task files cleared. Saved memory remains available.",
8,
) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/forget" {
self.history.clear();
self.reasoning_history = None;
self.session_memory.clear();
self.running_summary = None;
self.correction_hints.clear();
self.pinned_files.lock().await.clear();
self.reset_action_grounding().await;
reset_task_files();
purge_persistent_memory();
tokio::task::block_in_place(|| self.vein.reset());
let _ = std::fs::remove_file(session_path());
self.save_empty_session();
self.emit_compaction_pressure(&tx).await;
self.emit_prompt_pressure_idle(&tx).await;
for chunk in chunk_text(
"Hard forget complete. Chat history, saved memory, task files, and the Vein index were purged.",
8,
) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/vein-inspect" {
let indexed = self.refresh_vein_index();
let report = self.build_vein_inspection_report(indexed);
let snapshot = tokio::task::block_in_place(|| self.vein.inspect_snapshot(1));
let _ = tx
.send(InferenceEvent::VeinStatus {
file_count: snapshot.indexed_source_files + snapshot.indexed_docs,
embedded_count: snapshot.embedded_source_doc_chunks,
docs_only: self.vein_docs_only_mode(),
})
.await;
for chunk in chunk_text(&report, 8) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/workspace-profile" {
let root = crate::tools::file_ops::workspace_root();
let _ = crate::agent::workspace_profile::ensure_workspace_profile(&root);
let report = crate::agent::workspace_profile::profile_report(&root);
for chunk in chunk_text(&report, 8) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/vein-reset" {
tokio::task::block_in_place(|| self.vein.reset());
let _ = tx
.send(InferenceEvent::VeinStatus {
file_count: 0,
embedded_count: 0,
docs_only: self.vein_docs_only_mode(),
})
.await;
for chunk in chunk_text("Vein index cleared. Will rebuild on the next turn.", 8) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
let config = crate::agent::config::load_config();
self.recovery_context.clear();
let manual_runtime_refresh = user_input.trim() == "/runtime-refresh";
if !manual_runtime_refresh {
if let Some((model_id, context_length, changed)) = self
.refresh_runtime_profile_and_report(&tx, "turn_start")
.await
{
if changed {
let _ = tx
.send(InferenceEvent::Thought(format!(
"Runtime refresh: using model `{}` with CTX {} for this turn.",
model_id, context_length
)))
.await;
}
}
}
self.emit_compaction_pressure(&tx).await;
let current_model = self.engine.current_model();
self.engine.set_gemma_native_formatting(
crate::agent::config::effective_gemma_native_formatting(&config, ¤t_model),
);
let _turn_id = self.begin_grounded_turn().await;
let _hook_runner = crate::agent::hooks::HookRunner::new(config.hooks.clone());
let mcp_tools = match self.refresh_mcp_tools(&tx).await {
Ok(tools) => tools,
Err(e) => {
let _ = tx
.send(InferenceEvent::Error(format!("MCP refresh failed: {}", e)))
.await;
Vec::new()
}
};
let effective_fast = config
.fast_model
.clone()
.or_else(|| self.fast_model.clone());
let effective_think = config
.think_model
.clone()
.or_else(|| self.think_model.clone());
if user_input.trim() == "/lsp" {
let mut lsp = self.lsp_manager.lock().await;
match lsp.start_servers().await {
Ok(_) => {
let _ = tx
.send(InferenceEvent::MutedToken(
"LSP: Servers Initialized OK.".to_string(),
))
.await;
}
Err(e) => {
let _ = tx
.send(InferenceEvent::Error(format!(
"LSP: Failed to start servers - {}",
e
)))
.await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/runtime-refresh" {
match self
.refresh_runtime_profile_and_report(&tx, "manual_command")
.await
{
Some((model_id, context_length, changed)) => {
let msg = if changed {
format!(
"Runtime profile refreshed. Model: {} | CTX: {}",
model_id, context_length
)
} else {
format!(
"Runtime profile unchanged. Model: {} | CTX: {}",
model_id, context_length
)
};
for chunk in chunk_text(&msg, 8) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
}
None => {
let _ = tx
.send(InferenceEvent::Error(
"Runtime refresh failed: LM Studio profile could not be read."
.to_string(),
))
.await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/ask" {
self.set_workflow_mode(WorkflowMode::Ask);
for chunk in chunk_text(
"Workflow mode: ASK. Stay read-only, explain, inspect, and answer without making changes.",
8,
) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/code" {
self.set_workflow_mode(WorkflowMode::Code);
let mut message =
"Workflow mode: CODE. Make changes when needed, but keep proof-before-action and verification discipline.".to_string();
if let Some(plan) = self.current_plan_summary() {
message.push_str(&format!(" Current plan: {plan}."));
}
for chunk in chunk_text(&message, 8) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/architect" {
self.set_workflow_mode(WorkflowMode::Architect);
let mut message =
"Workflow mode: ARCHITECT. Plan, inspect, and shape the approach first. Do not mutate code unless the user explicitly asks to implement.".to_string();
if let Some(plan) = self.current_plan_summary() {
message.push_str(&format!(" Existing plan: {plan}."));
}
for chunk in chunk_text(&message, 8) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/read-only" {
self.set_workflow_mode(WorkflowMode::ReadOnly);
for chunk in chunk_text(
"Workflow mode: READ-ONLY. Analysis only. Do not modify files, run mutating shell commands, or commit changes.",
8,
) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/auto" {
self.set_workflow_mode(WorkflowMode::Auto);
for chunk in chunk_text(
"Workflow mode: AUTO. Hematite will choose the narrowest effective path for the request.",
8,
) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/chat" {
self.set_workflow_mode(WorkflowMode::Chat);
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/reroll" {
let soul = crate::ui::hatch::generate_soul_random();
self.snark = soul.snark;
self.chaos = soul.chaos;
self.soul_personality = soul.personality.clone();
let species = soul.species.clone();
if let Some(eng) = Arc::get_mut(&mut self.engine) {
eng.species = species.clone();
}
let shiny_tag = if soul.shiny { " 🌟 SHINY" } else { "" };
let _ = tx
.send(InferenceEvent::SoulReroll {
species: soul.species.clone(),
rarity: soul.rarity.label().to_string(),
shiny: soul.shiny,
personality: soul.personality.clone(),
})
.await;
for chunk in chunk_text(
&format!(
"A new companion awakens!\n[{}{}] {} — \"{}\"",
soul.rarity.label(),
shiny_tag,
soul.species,
soul.personality
),
8,
) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/agent" {
self.set_workflow_mode(WorkflowMode::Auto);
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
let mut effective_user_input = user_input.trim().to_string();
if let Some((mode, rest)) = parse_inline_workflow_prompt(user_input) {
self.set_workflow_mode(mode);
effective_user_input = rest.to_string();
}
let transcript_user_input = transcript_user_turn_text(user_turn, &effective_user_input);
effective_user_input = apply_turn_attachments(user_turn, &effective_user_input);
let implement_current_plan = self.workflow_mode == WorkflowMode::Code
&& is_current_plan_execution_request(&effective_user_input)
&& self
.session_memory
.current_plan
.as_ref()
.map(|plan| plan.has_signal())
.unwrap_or(false);
self.plan_execution_active
.store(implement_current_plan, std::sync::atomic::Ordering::SeqCst);
let _plan_execution_guard = PlanExecutionGuard {
flag: self.plan_execution_active.clone(),
};
let intent = classify_query_intent(self.workflow_mode, &effective_user_input);
if let Some(answer_kind) = intent.direct_answer {
match answer_kind {
DirectAnswerKind::LanguageCapability => {
let response = build_language_capability_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::UnsafeWorkflowPressure => {
let response = build_unsafe_workflow_pressure_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::SessionMemory => {
let response = build_session_memory_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::RecoveryRecipes => {
let response = build_recovery_recipes_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::McpLifecycle => {
let response = build_mcp_lifecycle_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::AuthorizationPolicy => {
let response = build_authorization_policy_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::ToolClasses => {
let response = build_tool_classes_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::ToolRegistryOwnership => {
let response = build_tool_registry_ownership_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::SessionResetSemantics => {
let response = build_session_reset_semantics_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::ProductSurface => {
let response = build_product_surface_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::ReasoningSplit => {
let response = build_reasoning_split_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::Identity => {
let response = build_identity_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::WorkflowModes => {
let response = build_workflow_modes_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::GemmaNative => {
let response = build_gemma_native_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::GemmaNativeSettings => {
let response = build_gemma_native_settings_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::VerifyProfiles => {
let response = build_verify_profiles_answer();
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::Toolchain => {
let lower = effective_user_input.to_lowercase();
let topic = if (lower.contains("voice output") || lower.contains("voice"))
&& (lower.contains("lag")
|| lower.contains("behind visible text")
|| lower.contains("latency"))
{
"voice_latency_plan"
} else {
"all"
};
let response =
crate::tools::toolchain::describe_toolchain(&serde_json::json!({
"topic": topic,
"question": effective_user_input,
}))
.await
.unwrap_or_else(|e| format!("Error: {}", e));
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
DirectAnswerKind::ArchitectSessionResetPlan => {
let plan = build_architect_session_reset_plan();
let response = plan.to_markdown();
let _ = crate::tools::plan::save_plan_handoff(&plan);
self.session_memory.current_plan = Some(plan);
self.emit_direct_response(&tx, user_input, &effective_user_input, &response)
.await;
return Ok(());
}
}
}
if matches!(
self.workflow_mode,
WorkflowMode::Ask | WorkflowMode::ReadOnly
) && looks_like_mutation_request(&effective_user_input)
{
let response = build_mode_redirect_answer(self.workflow_mode);
self.history.push(ChatMessage::user(&effective_user_input));
self.history.push(ChatMessage::assistant_text(&response));
self.transcript.log_user(&transcript_user_input);
self.transcript.log_agent(&response);
for chunk in chunk_text(&response, 8) {
if !chunk.is_empty() {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
self.trim_history(80);
self.refresh_session_memory();
self.save_session();
return Ok(());
}
if user_input.trim() == "/think" {
self.think_mode = Some(true);
for chunk in chunk_text("Think mode: ON — full chain-of-thought enabled.", 8) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim() == "/no_think" {
self.think_mode = Some(false);
for chunk in chunk_text(
"Think mode: OFF — fast mode enabled (no chain-of-thought).",
8,
) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim_start().starts_with("/pin ") {
let path = user_input.trim_start()[5..].trim();
match std::fs::read_to_string(path) {
Ok(content) => {
self.pinned_files
.lock()
.await
.insert(path.to_string(), content);
let msg = format!(
"Pinned: {} — this file is now locked in model context.",
path
);
for chunk in chunk_text(&msg, 8) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
}
Err(e) => {
let _ = tx
.send(InferenceEvent::Error(format!(
"Failed to pin {}: {}",
path, e
)))
.await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if user_input.trim_start().starts_with("/unpin ") {
let path = user_input.trim_start()[7..].trim();
if self.pinned_files.lock().await.remove(path).is_some() {
let msg = format!("Unpinned: {} — file removed from active context.", path);
for chunk in chunk_text(&msg, 8) {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
} else {
let _ = tx
.send(InferenceEvent::Error(format!(
"File {} was not pinned.",
path
)))
.await;
}
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
let tiny_context_mode = self.engine.current_context_length() <= 8_192;
let mut base_prompt = self.engine.build_system_prompt(
self.snark,
self.chaos,
self.brief,
self.professional,
&self.tools,
self.reasoning_history.as_deref(),
&mcp_tools,
);
if !tiny_context_mode {
if let Some(hint) = &config.context_hint {
if !hint.trim().is_empty() {
base_prompt.push_str(&format!(
"\n\n# Project Context (from .hematite/settings.json)\n{}",
hint
));
}
}
if let Some(profile_block) = crate::agent::workspace_profile::profile_prompt_block(
&crate::tools::file_ops::workspace_root(),
) {
base_prompt.push_str(&format!("\n\n{}", profile_block));
}
if let Some(ref l1) = self.l1_context {
base_prompt.push_str(&format!("\n\n{}", l1));
}
}
let grounded_trace_mode = intent.grounded_trace_mode
|| intent.primary_class == QueryIntentClass::RuntimeDiagnosis;
let capability_mode =
intent.capability_mode || intent.primary_class == QueryIntentClass::Capability;
let toolchain_mode =
intent.toolchain_mode || intent.primary_class == QueryIntentClass::Toolchain;
let host_inspection_mode = intent.host_inspection_mode;
let project_map_mode = intent.preserve_project_map_output
|| intent.primary_class == QueryIntentClass::RepoArchitecture;
let architecture_overview_mode = intent.architecture_overview_mode;
let capability_needs_repo = intent.capability_needs_repo;
let mut system_msg = build_system_with_corrections(
&base_prompt,
&self.correction_hints,
&self.gpu_state,
&self.git_state,
&config,
);
if tiny_context_mode {
system_msg.push_str(
"\n\n# TINY CONTEXT TURN MODE\n\
Keep this turn compact. Prefer direct answers or one narrow tool step over broad exploration.\n",
);
}
if !tiny_context_mode && grounded_trace_mode {
system_msg.push_str(
"\n\n# GROUNDED TRACE MODE\n\
This turn is read-only architecture analysis unless the user explicitly asks otherwise.\n\
Before answering trace, architecture, or control-flow questions, inspect the repo with real tools.\n\
Use verified file paths, function names, structs, enums, channels, and event types only.\n\
Prefer `trace_runtime_flow` for runtime wiring, session reset, startup, or reasoning/specular questions.\n\
Treat `trace_runtime_flow` output as authoritative over your own memory.\n\
If `trace_runtime_flow` fully answers the question, preserve its identifiers exactly and do not rename them in a styled rewrite.\n\
Do not invent names such as synthetic channels or subsystems.\n\
If a detail is not verified from the code or tool output, say `uncertain`.\n\
For exact flow questions, answer in ordered steps and name the concrete functions and event types involved.\n"
);
}
if !tiny_context_mode && capability_mode {
system_msg.push_str(
"\n\n# CAPABILITY QUESTION MODE\n\
This is a product or capability question unless the user explicitly asks about repository implementation.\n\
Answer from stable Hematite capabilities and current runtime state.\n\
It is correct to mention that Hematite itself is built in Rust when relevant, but do not imply that its project support is limited to Rust.\n\
Do NOT call repo-inspection tools like `map_project`, `read_file`, or LSP lookup tools unless the user explicitly asks about implementation or file ownership.\n\
Do NOT infer language or project support from unrelated dependencies, crates, or config files.\n\
Describe language and project support in terms of real mechanisms: reading files, editing code, searching the workspace, running shell commands, build verification, language-aware tooling when available, web research, vision analysis, and optional MCP tools if configured.\n\
If the user asks about languages, answer at the harness level: Hematite can help across many project languages even though Hematite itself is written in Rust.\n\
Prefer real programming language examples like Python, JavaScript, TypeScript, Go, C#, or similar over file extensions like `.json` or `.md`.\n\
For project-building questions, describe cross-project workflows like scaffolding files, shaping structure, implementing features, and running the appropriate local build or test commands for the target stack. Do not overclaim certainty.\n\
Never mention raw `mcp__*` tool names unless those tools are active this turn and directly relevant.\n\
Keep the answer short, plain, and ASCII-first.\n"
);
}
if !tiny_context_mode && toolchain_mode {
system_msg.push_str(
"\n\n# TOOLCHAIN DISCIPLINE MODE\n\
This turn is about Hematite's real built-in tools and how to choose them.\n\
Prefer `describe_toolchain` before you try to summarize tool capabilities or propose a read-only investigation plan from memory.\n\
Use only real built-in tool names.\n\
Do not invent helper tools, MCP tool names, synthetic symbols, or example function names.\n\
If `describe_toolchain` fully answers the question, preserve its output exactly instead of restyling it.\n\
Be explicit about which tools are optional or conditional.\n"
);
}
if !tiny_context_mode && host_inspection_mode {
system_msg.push_str(
"\n\n# HOST INSPECTION MODE\n\
This turn is about the local machine and environment, not repository architecture.\n\
Prefer `inspect_host` before raw `shell` for PATH analysis, installed developer tool versions, desktop item counts, Downloads summaries, listening ports, repo-doctor checks, and directory/disk-size reports.\n\
Use the closest built-in topic first: `summary`, `toolchains`, `path`, `desktop`, `downloads`, `ports`, `repo_doctor`, `directory`, or `disk`.\n\
Only use `shell` if the host question truly goes beyond `inspect_host`.\n"
);
}
if !tiny_context_mode && project_map_mode {
system_msg.push_str(
"\n\n# PROJECT MAP DISCIPLINE MODE\n\
For repository structure, entrypoint, owner-file, or architecture-map questions, prefer `map_project` first.\n\
If `map_project` provides likely entrypoints and core owner files, preserve that grounded structure instead of rewriting it into broad prose.\n\
Do not invent new entrypoints or owner files that are not present in the tool output.\n\
Keep the final answer compact and architecture-first.\n"
);
}
if !tiny_context_mode && architecture_overview_mode {
system_msg.push_str(
"\n\n# ARCHITECTURE OVERVIEW DISCIPLINE MODE\n\
For broad runtime or architecture walkthroughs, prefer authoritative tools first: `trace_runtime_flow` for control flow and `map_project` for compact structure.\n\
Do not call `auto_pin_context` or `list_pinned` in read-only analysis. Avoid broad `read_file` calls unless the user explicitly asks for implementation detail in one named file.\n\
Preserve grounded tool output rather than restyling it into a larger answer.\n"
);
}
system_msg.push_str(&format!(
"\n\n# WORKFLOW MODE\nCURRENT WORKFLOW: {}\n",
self.workflow_mode.label()
));
if tiny_context_mode {
system_msg
.push_str("Use the narrowest safe behavior for this mode. Keep the turn short.\n");
} else {
match self.workflow_mode {
WorkflowMode::Auto => system_msg.push_str(
"AUTO means choose the narrowest effective path for the request. Answer directly when stable product logic exists. Inspect before editing. Mutate only when the user is clearly asking for implementation.\n",
),
WorkflowMode::Ask => system_msg.push_str(
"ASK means analysis only. Stay read-only, inspect the repo, explain findings, and do not make changes unless the user explicitly switches modes.\n",
),
WorkflowMode::Code => system_msg.push_str(
"CODE means implementation is allowed when needed. Keep proof-before-action, verification, and edit precision discipline. If an active plan handoff exists in session memory or `.hematite/PLAN.md`, treat it as the implementation brief unless the user explicitly overrides it. For ordinary workspace inspection during implementation, use built-in read/edit tools first and do not reach for `mcp__filesystem__*` unless the user explicitly requires MCP.\n",
),
WorkflowMode::Architect => system_msg.push_str(
"ARCHITECT means plan first. Inspect, reason, and produce a concrete implementation approach before editing. Do not mutate code unless the user explicitly asks to implement. When you produce an implementation handoff, use these exact ASCII headings so Hematite can persist the plan: `# Goal`, `# Target Files`, `# Ordered Steps`, `# Verification`, `# Risks`, `# Open Questions`.\n",
),
WorkflowMode::ReadOnly => system_msg.push_str(
"READ-ONLY means analysis only. Do not modify files, run mutating shell commands, or commit changes.\n",
),
WorkflowMode::Chat => {} }
}
if !tiny_context_mode && self.workflow_mode == WorkflowMode::Architect {
system_msg.push_str("\n\n# ARCHITECT HANDOFF CONTRACT\n");
system_msg.push_str(architect_handoff_contract());
system_msg.push('\n');
}
if !tiny_context_mode && implement_current_plan {
system_msg.push_str(
"\n\n# CURRENT PLAN EXECUTION CONTRACT\n\
The user explicitly asked you to implement the current saved plan.\n\
Do not restate the plan, do not provide preliminary contracts, and do not stop at analysis.\n\
Use the saved plan as the brief, gather only the minimum built-in file evidence you need, then start editing the target files.\n\
Do not call `map_project` during current-plan execution.\n\
Every file inspection or edit call must be path-scoped to one of the saved target files.\n\
If a built-in workspace read tool gives you enough context, your next step should be mutation or a concrete blocking question, not another summary.\n",
);
if let Some(plan) = self.session_memory.current_plan.as_ref() {
if !plan.target_files.is_empty() {
system_msg.push_str("\n# CURRENT PLAN TARGET FILES\n");
for path in &plan.target_files {
system_msg.push_str(&format!("- {}\n", path));
}
}
}
}
if !tiny_context_mode {
let pinned = self.pinned_files.lock().await;
if !pinned.is_empty() {
system_msg.push_str("\n\n# ACTIVE CONTEXT (PINNED FILES)\n");
system_msg.push_str("The following files are locked in your active memory for high-fidelity reference.\n\n");
for (path, content) in pinned.iter() {
system_msg.push_str(&format!("## FILE: {}\n```\n{}\n```\n\n", path, content));
}
}
}
if !tiny_context_mode {
self.append_session_handoff(&mut system_msg);
}
let system_msg = if self.workflow_mode.is_chat() {
self.build_chat_system_prompt()
} else {
system_msg
};
if self.history.is_empty() || self.history[0].role != "system" {
self.history.insert(0, ChatMessage::system(&system_msg));
} else {
self.history[0] = ChatMessage::system(&system_msg);
}
self.cancel_token
.store(false, std::sync::atomic::Ordering::SeqCst);
self.reasoning_history = None;
let is_gemma = crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
let user_content = match self.think_mode {
Some(true) => format!("/think\n{}", effective_user_input),
Some(false) => format!("/no_think\n{}", effective_user_input),
None if !is_gemma
&& !self.workflow_mode.is_chat()
&& !is_quick_tool_request(&effective_user_input) =>
{
format!("/think\n{}", effective_user_input)
}
None => effective_user_input.clone(),
};
if let Some(image) = user_turn.attached_image.as_ref() {
let image_url =
crate::tools::vision::encode_image_as_data_url(std::path::Path::new(&image.path))
.map_err(|e| format!("Image attachment failed for {}: {}", image.name, e))?;
self.history
.push(ChatMessage::user_with_image(&user_content, &image_url));
} else {
self.history.push(ChatMessage::user(&user_content));
}
self.transcript.log_user(&transcript_user_input);
let vein_docs_only = self.vein_docs_only_mode();
let allow_vein_context = !self.workflow_mode.is_chat()
|| should_use_vein_in_chat(&effective_user_input, vein_docs_only);
let (vein_context, vein_paths) = if allow_vein_context {
self.refresh_vein_index();
let _ = tx
.send(InferenceEvent::VeinStatus {
file_count: self.vein.file_count(),
embedded_count: self.vein.embedded_chunk_count(),
docs_only: vein_docs_only,
})
.await;
match self.build_vein_context(&effective_user_input) {
Some((ctx, paths)) => (Some(ctx), paths),
None => (None, Vec::new()),
}
} else {
(None, Vec::new())
};
if !vein_paths.is_empty() {
let _ = tx
.send(InferenceEvent::VeinContext { paths: vein_paths })
.await;
}
let routed_model = route_model(
&effective_user_input,
effective_fast.as_deref(),
effective_think.as_deref(),
)
.map(|s| s.to_string());
let mut loop_intervention: Option<String> = None;
let mut implementation_started = false;
let mut non_mutating_plan_steps = 0usize;
let non_mutating_plan_soft_cap = 5usize;
let non_mutating_plan_hard_cap = 8usize;
let mut overview_project_map: Option<String> = None;
let mut overview_runtime_trace: Option<String> = None;
let max_iters = 25;
let mut consecutive_errors = 0;
let mut first_iter = true;
let _called_this_turn: std::collections::HashSet<String> = std::collections::HashSet::new();
let _result_counts: std::collections::HashMap<String, usize> =
std::collections::HashMap::new();
let mut repeat_counts: std::collections::HashMap<String, usize> =
std::collections::HashMap::new();
let mut successful_read_targets: std::collections::HashSet<String> =
std::collections::HashSet::new();
let mut successful_read_regions: std::collections::HashSet<(String, u64)> =
std::collections::HashSet::new();
let mut successful_grep_targets: std::collections::HashSet<String> =
std::collections::HashSet::new();
let mut no_match_grep_targets: std::collections::HashSet<String> =
std::collections::HashSet::new();
let mut broad_grep_targets: std::collections::HashSet<String> =
std::collections::HashSet::new();
let mut turn_anchor = self.history.len().saturating_sub(1);
for _iter in 0..max_iters {
let mut mutation_occurred = false;
if self.cancel_token.load(std::sync::atomic::Ordering::SeqCst) {
self.cancel_token
.store(false, std::sync::atomic::Ordering::SeqCst);
let _ = tx
.send(InferenceEvent::Thought("Turn cancelled by user.".into()))
.await;
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if self
.compact_history_if_needed(&tx, Some(turn_anchor))
.await?
{
turn_anchor = 2;
}
let inject_vein = first_iter && !implement_current_plan;
let messages = if implement_current_plan {
first_iter = false;
self.context_window_slice_from(turn_anchor)
} else {
first_iter = false;
self.context_window_slice()
};
let mut prompt_msgs = if let Some(intervention) = loop_intervention.take() {
if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
let mut msgs = vec![self.history[0].clone()];
msgs.push(ChatMessage::system(&intervention));
msgs
} else {
let merged =
format!("{}\n\n{}", self.history[0].content.as_str(), intervention);
vec![ChatMessage::system(&merged)]
}
} else {
vec![self.history[0].clone()]
};
if inject_vein {
if let Some(ref ctx) = vein_context.as_ref() {
if crate::agent::inference::is_gemma4_model_name(&self.engine.current_model()) {
prompt_msgs.push(ChatMessage::system(ctx));
} else {
let merged = format!("{}\n\n{}", prompt_msgs[0].content.as_str(), ctx);
prompt_msgs[0] = ChatMessage::system(&merged);
}
}
}
prompt_msgs.extend(messages);
if let Some(budget_note) =
enforce_prompt_budget(&mut prompt_msgs, self.engine.current_context_length())
{
self.emit_operator_checkpoint(
&tx,
OperatorCheckpointState::BudgetReduced,
budget_note,
)
.await;
let recipe = plan_recovery(
RecoveryScenario::PromptBudgetPressure,
&self.recovery_context,
);
self.emit_recovery_recipe_summary(
&tx,
recipe.recipe.scenario.label(),
compact_recovery_plan_summary(&recipe),
)
.await;
}
self.emit_prompt_pressure_for_messages(&tx, &prompt_msgs)
.await;
let (mut text, mut tool_calls, usage, finish_reason) = match self
.engine
.call_with_tools(&prompt_msgs, &self.tools, routed_model.as_deref())
.await
{
Ok(result) => result,
Err(e) => {
let class = classify_runtime_failure(&e);
if should_retry_runtime_failure(class) {
if self.recovery_context.consume_transient_retry() {
let label = match class {
RuntimeFailureClass::ProviderDegraded => "provider_degraded",
_ => "empty_model_response",
};
self.transcript.log_system(&format!(
"Automatic provider recovery triggered: {}",
e.trim()
));
self.emit_recovery_recipe_summary(
&tx,
label,
compact_runtime_recovery_summary(class),
)
.await;
let _ = tx
.send(InferenceEvent::ProviderStatus {
state: ProviderRuntimeState::Recovering,
summary: compact_runtime_recovery_summary(class).into(),
})
.await;
self.emit_operator_checkpoint(
&tx,
OperatorCheckpointState::RecoveringProvider,
compact_runtime_recovery_summary(class),
)
.await;
continue;
}
}
self.emit_runtime_failure(&tx, class, &e).await;
break;
}
};
self.emit_provider_live(&tx).await;
if let Some(ref u) = usage {
let _ = tx.send(InferenceEvent::UsageUpdate(u.clone())).await;
}
if tool_calls
.as_ref()
.map(|calls| calls.is_empty())
.unwrap_or(true)
{
if let Some(raw_text) = text.as_deref() {
let native_calls = crate::agent::inference::extract_native_tool_calls(raw_text);
if !native_calls.is_empty() {
tool_calls = Some(native_calls);
let stripped =
crate::agent::inference::strip_native_tool_call_text(raw_text);
text = if stripped.trim().is_empty() {
None
} else {
Some(stripped)
};
}
}
}
let tool_calls = tool_calls.filter(|c| !c.is_empty());
let near_context_ceiling = usage
.as_ref()
.map(|u| u.prompt_tokens >= (self.engine.current_context_length() * 82 / 100))
.unwrap_or(false);
if let Some(calls) = tool_calls {
let (calls, prune_trace_note) =
prune_architecture_trace_batch(calls, architecture_overview_mode);
if let Some(note) = prune_trace_note {
let _ = tx.send(InferenceEvent::Thought(note)).await;
}
let (calls, prune_bloat_note) = prune_read_only_context_bloat_batch(
calls,
self.workflow_mode.is_read_only(),
architecture_overview_mode,
);
if let Some(note) = prune_bloat_note {
let _ = tx.send(InferenceEvent::Thought(note)).await;
}
let (calls, prune_note) = prune_authoritative_tool_batch(
calls,
grounded_trace_mode,
&effective_user_input,
);
if let Some(note) = prune_note {
let _ = tx.send(InferenceEvent::Thought(note)).await;
}
let (calls, batch_note) = order_batch_reads_first(calls);
if let Some(note) = batch_note {
let _ = tx.send(InferenceEvent::Thought(note)).await;
}
if let Some(repeated_path) = calls
.iter()
.filter(|c| {
let parsed = serde_json::from_str::<Value>(
&crate::agent::inference::normalize_tool_argument_string(
&c.function.name,
&c.function.arguments,
),
)
.ok();
let offset = parsed
.as_ref()
.and_then(|args| args.get("offset").and_then(|v| v.as_u64()))
.unwrap_or(0);
if offset < 200 {
return true;
}
if let Some(path) = parsed
.as_ref()
.and_then(|args| args.get("path").and_then(|v| v.as_str()))
{
let normalized = normalize_workspace_path(path);
return successful_read_regions.contains(&(normalized, offset));
}
false
})
.filter_map(|c| repeated_read_target(&c.function))
.find(|path| successful_read_targets.contains(path))
{
loop_intervention = Some(format!(
"STOP. Already read `{}` this turn. Use `inspect_lines` on the relevant window or a specific `grep_files`, then continue.",
repeated_path
));
let _ = tx
.send(InferenceEvent::Thought(
"Read discipline: preventing repeated full-file reads on the same path."
.into(),
))
.await;
continue;
}
if capability_mode
&& !capability_needs_repo
&& calls
.iter()
.all(|c| is_capability_probe_tool(&c.function.name))
{
loop_intervention = Some(
"STOP. This is a stable capability question. Do not inspect the repository or call tools. \
Answer directly from verified Hematite capabilities, current runtime state, and the documented product boundary. \
Do not mention raw `mcp__*` names unless they are active and directly relevant."
.to_string(),
);
let _ = tx
.send(InferenceEvent::Thought(
"Capability mode: skipping unnecessary repo-inspection tools and answering directly."
.into(),
))
.await;
continue;
}
let raw_content = text.as_deref().unwrap_or(" ");
if let Some(thought) = crate::agent::inference::extract_think_block(raw_content) {
let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
self.reasoning_history = Some(thought);
}
let stored_tool_call_content = if implement_current_plan {
cap_output(raw_content, 1200)
} else {
raw_content.to_string()
};
self.history.push(ChatMessage::assistant_tool_calls(
&stored_tool_call_content,
calls.clone(),
));
let mut results = Vec::new();
let (parallel_calls, serial_calls): (Vec<_>, Vec<_>) = calls
.clone()
.into_iter()
.partition(|c| is_parallel_safe(&c.function.name));
if !parallel_calls.is_empty() {
let mut tasks = Vec::new();
for call in parallel_calls {
let tx_clone = tx.clone();
let config_clone = config.clone();
let call_with_id = call.clone();
tasks.push(self.process_tool_call(
call_with_id.function,
config_clone,
yolo,
tx_clone,
call_with_id.id,
));
}
results.extend(futures::future::join_all(tasks).await);
}
for call in serial_calls {
results.push(
self.process_tool_call(
call.function,
config.clone(),
yolo,
tx.clone(),
call.id,
)
.await,
);
}
let mut authoritative_tool_output: Option<String> = None;
let mut blocked_policy_output: Option<String> = None;
let mut recoverable_policy_intervention: Option<String> = None;
let mut recoverable_policy_recipe: Option<RecoveryScenario> = None;
let mut recoverable_policy_checkpoint: Option<(OperatorCheckpointState, String)> =
None;
for res in results {
let call_id = res.call_id.clone();
let tool_name = res.tool_name.clone();
let final_output = res.output.clone();
let is_error = res.is_error;
for msg in res.msg_results {
self.history.push(msg);
}
if matches!(
tool_name.as_str(),
"patch_hunk" | "write_file" | "edit_file" | "multi_search_replace"
) {
mutation_occurred = true;
implementation_started = true;
if !is_error {
let path = res.args.get("path").and_then(|v| v.as_str()).unwrap_or("");
if !path.is_empty() {
self.vein.bump_heat(path);
self.l1_context = self.vein.l1_context();
}
}
}
if tool_name == "verify_build" {
self.record_session_verification(
!is_error
&& (final_output.contains("BUILD OK")
|| final_output.contains("BUILD SUCCESS")
|| final_output.contains("BUILD OKAY")),
if is_error {
"Explicit verify_build failed."
} else {
"Explicit verify_build passed."
},
);
}
let call_key = format!(
"{}:{}",
tool_name,
serde_json::to_string(&res.args).unwrap_or_default()
);
let repeat_count = repeat_counts.entry(call_key.clone()).or_insert(0);
*repeat_count += 1;
let repeat_guard_exempt = matches!(
tool_name.as_str(),
"verify_build" | "git_commit" | "git_push"
);
if *repeat_count >= 3 && !repeat_guard_exempt {
loop_intervention = Some(format!(
"STOP. You have called `{}` with identical arguments {} times and keep getting the same result. \
Do not call it again. Either answer directly from what you already know, \
use a different tool or approach, or ask the user for clarification.",
tool_name, *repeat_count
));
let _ = tx
.send(InferenceEvent::Thought(format!(
"Repeat guard: `{}` called {} times with same args — injecting stop intervention.",
tool_name, *repeat_count
)))
.await;
}
if is_error {
consecutive_errors += 1;
} else {
consecutive_errors = 0;
}
if consecutive_errors >= 3 {
loop_intervention = Some(
"CRITICAL: Repeated tool failures detected. You are likely stuck in a loop. \
STOP all tool calls immediately. Analyze why your previous 3 calls failed \
(check for hallucinations or invalid arguments) and ask the user for \
clarification if you cannot proceed.".to_string()
);
}
if consecutive_errors >= 4 {
self.emit_runtime_failure(
&tx,
RuntimeFailureClass::ToolLoop,
"Hard termination: too many consecutive tool errors.",
)
.await;
return Ok(());
}
let _ = tx
.send(InferenceEvent::ToolCallResult {
id: call_id.clone(),
name: tool_name.clone(),
output: final_output.clone(),
is_error,
})
.await;
let compact_ctx = crate::agent::inference::is_compact_context_window_pub(
self.engine.current_context_length(),
);
let capped = if implement_current_plan {
cap_output(&final_output, 1200)
} else if tool_name == "map_project"
&& self.workflow_mode == WorkflowMode::Architect
{
cap_output(&final_output, 2500)
} else if tool_name == "map_project" {
cap_output(&final_output, 3500)
} else if compact_ctx
&& (tool_name == "read_file" || tool_name == "inspect_lines")
{
let limit = 3000usize;
if final_output.len() > limit {
let total_lines = final_output.lines().count();
let mut split_at = limit;
while !final_output.is_char_boundary(split_at) && split_at > 0 {
split_at -= 1;
}
format!(
"{}\n... [file truncated — {} total lines. Use `inspect_lines` with start_line near {} to reach the end of the file.]",
&final_output[..split_at],
total_lines,
total_lines.saturating_sub(150),
)
} else {
final_output.clone()
}
} else {
cap_output(&final_output, 8000)
};
self.history.push(ChatMessage::tool_result_for_model(
&call_id,
&tool_name,
&capped,
&self.engine.current_model(),
));
if architecture_overview_mode && !is_error && tool_name == "trace_runtime_flow"
{
overview_runtime_trace =
Some(summarize_runtime_trace_output(&final_output));
} else if architecture_overview_mode && !is_error && tool_name == "map_project"
{
overview_project_map = Some(summarize_project_map_output(&final_output));
}
if !architecture_overview_mode
&& !is_error
&& ((grounded_trace_mode && tool_name == "trace_runtime_flow")
|| (toolchain_mode && tool_name == "describe_toolchain"))
{
authoritative_tool_output = Some(final_output.clone());
} else if !architecture_overview_mode
&& !is_error
&& tool_name == "map_project"
&& project_map_mode
&& authoritative_tool_output.is_none()
{
authoritative_tool_output =
Some(summarize_project_map_output(&final_output));
}
if !is_error && tool_name == "read_file" {
if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
let normalized = normalize_workspace_path(path);
let read_offset =
res.args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0);
successful_read_targets.insert(normalized.clone());
successful_read_regions.insert((normalized.clone(), read_offset));
}
}
if !is_error && tool_name == "grep_files" {
if let Some(path) = res.args.get("path").and_then(|v| v.as_str()) {
let normalized = normalize_workspace_path(path);
if final_output.starts_with("No matches for ") {
no_match_grep_targets.insert(normalized);
} else if grep_output_is_high_fanout(&final_output) {
broad_grep_targets.insert(normalized);
} else {
successful_grep_targets.insert(normalized);
}
}
}
if is_error
&& matches!(tool_name.as_str(), "edit_file" | "multi_search_replace")
&& (final_output.contains("search string not found")
|| final_output.contains("search string is too short")
|| final_output.contains("search string matched"))
{
if let Some(target) = action_target_path(&tool_name, &res.args) {
let guidance = if final_output.contains("matched") {
format!(
"STOP. `{}` on `{}` — search string matched multiple times. Use `inspect_lines` on the exact region to get a unique anchor, then retry.",
tool_name, target
)
} else {
format!(
"STOP. `{}` on `{}` — search string did not match. Use `inspect_lines` on the target region to get the exact current text (check whitespace and indentation), then retry.",
tool_name, target
)
};
loop_intervention = Some(guidance);
*repeat_count = 0;
}
}
if res.blocked_by_policy
&& is_mcp_workspace_read_tool(&tool_name)
&& recoverable_policy_intervention.is_none()
{
recoverable_policy_intervention = Some(
"STOP. MCP filesystem reads are blocked. Use `read_file` or `inspect_lines` instead.".to_string(),
);
recoverable_policy_recipe = Some(RecoveryScenario::McpWorkspaceReadBlocked);
recoverable_policy_checkpoint = Some((
OperatorCheckpointState::BlockedPolicy,
"MCP workspace read blocked; rerouting to built-in file tools."
.to_string(),
));
} else if res.blocked_by_policy
&& implement_current_plan
&& tool_name == "map_project"
&& recoverable_policy_intervention.is_none()
{
recoverable_policy_intervention = Some(
"STOP. `map_project` is blocked during plan execution. Read your planned target files directly, then edit.".to_string(),
);
recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
recoverable_policy_checkpoint = Some((
OperatorCheckpointState::BlockedPolicy,
"`map_project` blocked for current-plan execution.".to_string(),
));
} else if res.blocked_by_policy
&& implement_current_plan
&& is_current_plan_irrelevant_tool(&tool_name)
&& recoverable_policy_intervention.is_none()
{
recoverable_policy_intervention = Some(format!(
"STOP. `{}` is not a planned target. Use `inspect_lines` on a planned file, then edit.",
tool_name
));
recoverable_policy_recipe = Some(RecoveryScenario::CurrentPlanScopeBlocked);
recoverable_policy_checkpoint = Some((
OperatorCheckpointState::BlockedPolicy,
format!(
"Current-plan execution blocked unrelated tool `{}`.",
tool_name
),
));
} else if res.blocked_by_policy
&& implement_current_plan
&& final_output.contains("requires recent file evidence")
&& recoverable_policy_intervention.is_none()
{
let target = action_target_path(&tool_name, &res.args)
.unwrap_or_else(|| "the target file".to_string());
recoverable_policy_intervention = Some(format!(
"STOP. Edit blocked — `{target}` has no recent read. Use `inspect_lines` or `read_file` on it first, then retry."
));
recoverable_policy_recipe =
Some(RecoveryScenario::RecentFileEvidenceMissing);
recoverable_policy_checkpoint = Some((
OperatorCheckpointState::BlockedRecentFileEvidence,
format!("Edit blocked on `{target}`; recent file evidence missing."),
));
} else if res.blocked_by_policy
&& implement_current_plan
&& final_output.contains("requires an exact local line window first")
&& recoverable_policy_intervention.is_none()
{
let target = action_target_path(&tool_name, &res.args)
.unwrap_or_else(|| "the target file".to_string());
recoverable_policy_intervention = Some(format!(
"STOP. Edit blocked — `{target}` needs an inspected window. Use `inspect_lines` around the edit region, then retry."
));
recoverable_policy_recipe = Some(RecoveryScenario::ExactLineWindowRequired);
recoverable_policy_checkpoint = Some((
OperatorCheckpointState::BlockedExactLineWindow,
format!("Edit blocked on `{target}`; exact line window required."),
));
} else if res.blocked_by_policy && blocked_policy_output.is_none() {
blocked_policy_output = Some(final_output.clone());
}
if *repeat_count >= 5 {
let _ = tx.send(InferenceEvent::Done).await;
return Ok(());
}
if implement_current_plan
&& !implementation_started
&& !is_error
&& is_non_mutating_plan_step_tool(&tool_name)
{
non_mutating_plan_steps += 1;
}
}
if let Some(intervention) = recoverable_policy_intervention {
if let Some((state, summary)) = recoverable_policy_checkpoint.take() {
self.emit_operator_checkpoint(&tx, state, summary).await;
}
if let Some(scenario) = recoverable_policy_recipe.take() {
let recipe = plan_recovery(scenario, &self.recovery_context);
self.emit_recovery_recipe_summary(
&tx,
recipe.recipe.scenario.label(),
compact_recovery_plan_summary(&recipe),
)
.await;
}
loop_intervention = Some(intervention);
let _ = tx
.send(InferenceEvent::Thought(
"Policy recovery: rerouting blocked MCP filesystem inspection to built-in workspace tools."
.into(),
))
.await;
continue;
}
if architecture_overview_mode {
match (
overview_project_map.as_deref(),
overview_runtime_trace.as_deref(),
) {
(Some(project_map), Some(runtime_trace)) => {
let response =
build_architecture_overview_answer(project_map, runtime_trace);
self.history.push(ChatMessage::assistant_text(&response));
self.transcript.log_agent(&response);
for chunk in chunk_text(&response, 8) {
if !chunk.is_empty() {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
break;
}
(Some(_), None) => {
loop_intervention = Some(
"Good. You now have the grounded repository structure. Next, call `trace_runtime_flow` for the runtime/control-flow half of the architecture overview. Prefer topic `user_turn` for the main execution path, or `runtime_subsystems` if that is more direct. Do not call `read_file`, `auto_pin_context`, or LSP tools here."
.to_string(),
);
continue;
}
(None, Some(_)) => {
loop_intervention = Some(
"Good. You now have the grounded runtime/control-flow trace. Next, call `map_project` once to capture entrypoints and core owner files. Keep it compact and do not call broad file-read tools in this architecture overview."
.to_string(),
);
continue;
}
(None, None) => {}
}
}
if implement_current_plan
&& !implementation_started
&& non_mutating_plan_steps >= non_mutating_plan_hard_cap
{
let msg = "Current-plan execution stalled: too many non-mutating inspection steps without a concrete edit. Stay on the saved target files, narrow with `inspect_lines`, and then mutate, or ask one specific blocking question instead of continuing broad exploration.".to_string();
self.history.push(ChatMessage::assistant_text(&msg));
self.transcript.log_agent(&msg);
for chunk in chunk_text(&msg, 8) {
if !chunk.is_empty() {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
break;
}
if let Some(blocked_output) = blocked_policy_output {
self.emit_operator_checkpoint(
&tx,
OperatorCheckpointState::BlockedPolicy,
"A blocked tool path was surfaced directly to the operator.",
)
.await;
self.history
.push(ChatMessage::assistant_text(&blocked_output));
self.transcript.log_agent(&blocked_output);
for chunk in chunk_text(&blocked_output, 8) {
if !chunk.is_empty() {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
break;
}
if let Some(tool_output) = authoritative_tool_output {
self.history.push(ChatMessage::assistant_text(&tool_output));
self.transcript.log_agent(&tool_output);
for chunk in chunk_text(&tool_output, 8) {
if !chunk.is_empty() {
let _ = tx.send(InferenceEvent::Token(chunk)).await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
break;
}
if implement_current_plan && !implementation_started {
let base = "STOP analyzing. The current plan already defines the task. Use the built-in file evidence you now have and begin implementing the plan in the target files. Do not output preliminary findings or restate contracts.";
if non_mutating_plan_steps >= non_mutating_plan_soft_cap {
loop_intervention = Some(format!(
"{} You are close to the non-mutation cap. Use `inspect_lines` on one saved target file, then make the edit now.",
base
));
} else {
loop_intervention = Some(base.to_string());
}
} else if self.workflow_mode == WorkflowMode::Architect {
loop_intervention = Some(
format!(
"STOP exploring. You have enough evidence for a plan-first answer.\n{}\nUse the tool results already in history. Do not narrate your process. Do not call more tools unless a missing file path makes the handoff impossible.",
architect_handoff_contract()
),
);
}
if mutation_occurred && !yolo {
let _ = tx
.send(InferenceEvent::Thought(
"Self-Verification: Running 'cargo check' to ensure build integrity..."
.into(),
))
.await;
let verify_res = self.auto_verify_build().await;
let verify_ok = verify_res.contains("BUILD SUCCESS");
self.record_verify_build_result(verify_ok, &verify_res)
.await;
self.record_session_verification(
verify_ok,
if verify_ok {
"Automatic build verification passed."
} else {
"Automatic build verification failed."
},
);
self.history.push(ChatMessage::system(&format!(
"\n# SYSTEM VERIFICATION\n{verify_res}"
)));
let _ = tx
.send(InferenceEvent::Thought(
"Verification turn injected into history.".into(),
))
.await;
}
continue;
} else if let Some(response_text) = text {
if finish_reason.as_deref() == Some("length") && near_context_ceiling {
if intent.direct_answer == Some(DirectAnswerKind::SessionResetSemantics) {
let cleaned = build_session_reset_semantics_answer();
self.history.push(ChatMessage::assistant_text(&cleaned));
self.transcript.log_agent(&cleaned);
for chunk in chunk_text(&cleaned, 8) {
if !chunk.is_empty() {
let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
break;
}
let warning = format_runtime_failure(
RuntimeFailureClass::ContextWindow,
"Context ceiling reached before the model completed the answer. Hematite trimmed what it could, but this turn still ran out of room. Retry with a narrower inspection step like `grep_files` or `inspect_lines`, or ask for a smaller scoped answer.",
);
self.history.push(ChatMessage::assistant_text(&warning));
self.transcript.log_agent(&warning);
let _ = tx
.send(InferenceEvent::Thought(
"Length recovery: model hit the context ceiling before completing the answer."
.into(),
))
.await;
for chunk in chunk_text(&warning, 8) {
if !chunk.is_empty() {
let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
break;
}
if response_text.contains("<|tool_call")
|| response_text.contains("[END_TOOL_REQUEST]")
|| response_text.contains("<|tool_response")
|| response_text.contains("<tool_response|>")
{
loop_intervention = Some(
"Your previous response leaked raw native tool transcript markup instead of a valid tool invocation or final answer. Retry immediately. If you need a tool, emit a valid tool call only. If you do not need a tool, answer in plain text with no `<|tool_call>`, `<|tool_response>`, or `[END_TOOL_REQUEST]` markup.".to_string(),
);
continue;
}
if let Some(thought) = crate::agent::inference::extract_think_block(&response_text)
{
let _ = tx.send(InferenceEvent::Thought(thought.clone())).await;
self.reasoning_history = Some(thought);
}
let cleaned = crate::agent::inference::strip_think_blocks(&response_text);
if implement_current_plan && !implementation_started {
loop_intervention = Some(
"Do not stop at analysis. Implement the current saved plan now using built-in workspace tools and the target files already named in the plan. Only answer without edits if you have a concrete blocking question.".to_string(),
);
continue;
}
if cleaned.is_empty() {
let _ = tx.send(InferenceEvent::Done).await;
break;
}
self.persist_architect_handoff(&cleaned);
self.history.push(ChatMessage::assistant_text(&cleaned));
self.transcript.log_agent(&cleaned);
for chunk in chunk_text(&cleaned, 8) {
if !chunk.is_empty() {
let _ = tx.send(InferenceEvent::Token(chunk.clone())).await;
}
}
let _ = tx.send(InferenceEvent::Done).await;
break;
} else {
let detail = "Model returned an empty response.";
let class = classify_runtime_failure(detail);
if should_retry_runtime_failure(class) {
if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
if let RecoveryDecision::Attempt(plan) =
attempt_recovery(scenario, &mut self.recovery_context)
{
self.transcript.log_system(
"Automatic provider recovery triggered: model returned an empty response.",
);
self.emit_recovery_recipe_summary(
&tx,
plan.recipe.scenario.label(),
compact_recovery_plan_summary(&plan),
)
.await;
let _ = tx
.send(InferenceEvent::ProviderStatus {
state: ProviderRuntimeState::Recovering,
summary: compact_runtime_recovery_summary(class).into(),
})
.await;
self.emit_operator_checkpoint(
&tx,
OperatorCheckpointState::RecoveringProvider,
compact_runtime_recovery_summary(class),
)
.await;
continue;
}
}
}
self.emit_runtime_failure(&tx, class, detail).await;
break;
}
}
self.trim_history(80);
self.refresh_session_memory();
self.save_session();
self.emit_compaction_pressure(&tx).await;
Ok(())
}
async fn emit_runtime_failure(
&mut self,
tx: &mpsc::Sender<InferenceEvent>,
class: RuntimeFailureClass,
detail: &str,
) {
if let Some(scenario) = recovery_scenario_for_runtime_failure(class) {
let decision = preview_recovery_decision(scenario, &self.recovery_context);
self.emit_recovery_recipe_summary(
tx,
scenario.label(),
compact_recovery_decision_summary(&decision),
)
.await;
let needs_refresh = match &decision {
RecoveryDecision::Attempt(plan) => plan
.recipe
.steps
.contains(&RecoveryStep::RefreshRuntimeProfile),
RecoveryDecision::Escalate { recipe, .. } => {
recipe.steps.contains(&RecoveryStep::RefreshRuntimeProfile)
}
};
if needs_refresh {
if let Some((model_id, context_length, changed)) = self
.refresh_runtime_profile_and_report(tx, "context_window_failure")
.await
{
let note = if changed {
format!(
"Runtime refresh after context-window failure: model {} | CTX {}",
model_id, context_length
)
} else {
format!(
"Runtime refresh after context-window failure confirms model {} | CTX {}",
model_id, context_length
)
};
let _ = tx.send(InferenceEvent::Thought(note)).await;
}
}
}
if let Some(state) = provider_state_for_runtime_failure(class) {
let _ = tx
.send(InferenceEvent::ProviderStatus {
state,
summary: compact_runtime_failure_summary(class).into(),
})
.await;
}
if let Some(state) = checkpoint_state_for_runtime_failure(class) {
self.emit_operator_checkpoint(tx, state, checkpoint_summary_for_runtime_failure(class))
.await;
}
let formatted = format_runtime_failure(class, detail);
self.history.push(ChatMessage::system(&format!(
"# RUNTIME FAILURE\n{}",
formatted
)));
self.transcript.log_system(&formatted);
let _ = tx.send(InferenceEvent::Error(formatted)).await;
let _ = tx.send(InferenceEvent::Done).await;
}
async fn auto_verify_build(&self) -> String {
match crate::tools::verify_build::execute(&serde_json::json!({ "action": "build" })).await {
Ok(out) => {
"BUILD SUCCESS: Your changes are architecturally sound.\n\n".to_string()
+ &cap_output(&out, 2000)
}
Err(e) => format!(
"BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:\n\n{}",
cap_output(&e, 2000)
),
}
}
async fn compact_history_if_needed(
&mut self,
tx: &mpsc::Sender<InferenceEvent>,
anchor_index: Option<usize>,
) -> Result<bool, String> {
let vram_ratio = self.gpu_state.ratio();
let context_length = self.engine.current_context_length();
let config = CompactionConfig::adaptive(context_length, vram_ratio);
if !compaction::should_compact(&self.history, context_length, vram_ratio) {
return Ok(false);
}
let _ = tx
.send(InferenceEvent::Thought(format!(
"Compaction: ctx={}k vram={:.0}% threshold={}k tokens — chaining summary...",
context_length / 1000,
vram_ratio * 100.0,
config.max_estimated_tokens / 1000,
)))
.await;
let result = compaction::compact_history(
&self.history,
self.running_summary.as_deref(),
config,
anchor_index,
);
let removed_message_count = self.history.len().saturating_sub(result.messages.len());
self.history = result.messages;
self.running_summary = result.summary;
let previous_memory = self.session_memory.clone();
self.session_memory = compaction::extract_memory(&self.history);
self.session_memory
.inherit_runtime_ledger_from(&previous_memory);
self.session_memory.record_compaction(
removed_message_count,
format!(
"Compacted history around active task '{}' and preserved {} working-set file(s).",
self.session_memory.current_task,
self.session_memory.working_set.len()
),
);
self.emit_compaction_pressure(tx).await;
let first_non_sys = self
.history
.iter()
.position(|m| m.role != "system")
.unwrap_or(self.history.len());
if first_non_sys < self.history.len() {
if let Some(user_offset) = self.history[first_non_sys..]
.iter()
.position(|m| m.role == "user")
{
if user_offset > 0 {
self.history
.drain(first_non_sys..first_non_sys + user_offset);
}
}
}
let _ = tx
.send(InferenceEvent::Thought(format!(
"Memory Synthesis: Extracted context for task: '{}'. Working set: {} files.",
self.session_memory.current_task,
self.session_memory.working_set.len()
)))
.await;
let recipe = plan_recovery(RecoveryScenario::HistoryPressure, &self.recovery_context);
self.emit_recovery_recipe_summary(
tx,
recipe.recipe.scenario.label(),
compact_recovery_plan_summary(&recipe),
)
.await;
self.emit_operator_checkpoint(
tx,
OperatorCheckpointState::HistoryCompacted,
format!(
"History compacted into a recursive summary; active task '{}' with {} working-set file(s) carried forward.",
self.session_memory.current_task,
self.session_memory.working_set.len()
),
)
.await;
Ok(true)
}
fn build_vein_context(&self, query: &str) -> Option<(String, Vec<String>)> {
if query.trim().split_whitespace().count() < 3 {
return None;
}
let results = tokio::task::block_in_place(|| self.vein.search_context(query, 4)).ok()?;
if results.is_empty() {
return None;
}
let semantic_active = self.vein.has_any_embeddings();
let header = if semantic_active {
"# Relevant context from The Vein (hybrid BM25 + semantic retrieval)\n\
Use this to answer without needing extra read_file calls where possible.\n\n"
} else {
"# Relevant context from The Vein (BM25 keyword retrieval)\n\
Use this to answer without needing extra read_file calls where possible.\n\n"
};
let mut ctx = String::from(header);
let mut paths: Vec<String> = Vec::new();
let mut total = 0usize;
const MAX_CTX_CHARS: usize = 1_500;
for r in results {
if total >= MAX_CTX_CHARS {
break;
}
let snippet = if r.content.len() > 500 {
format!("{}...", &r.content[..500])
} else {
r.content.clone()
};
ctx.push_str(&format!("--- {} ---\n{}\n\n", r.path, snippet));
total += snippet.len() + r.path.len() + 10;
if !paths.contains(&r.path) {
paths.push(r.path);
}
}
Some((ctx, paths))
}
fn context_window_slice(&self) -> Vec<ChatMessage> {
let mut result = Vec::new();
if self.history.len() > 1 {
for m in &self.history[1..] {
if m.role == "system" {
continue;
}
let mut sanitized = m.clone();
if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
sanitized.content = MessageContent::Text(" ".into());
}
result.push(sanitized);
}
}
if !result.is_empty() && result[0].role != "user" {
result.insert(0, ChatMessage::user("Continuing previous context..."));
}
result
}
fn context_window_slice_from(&self, start_idx: usize) -> Vec<ChatMessage> {
let mut result = Vec::new();
if self.history.len() > 1 {
let start = start_idx.max(1).min(self.history.len());
for m in &self.history[start..] {
if m.role == "system" {
continue;
}
let mut sanitized = m.clone();
if (m.role == "assistant" || m.role == "tool") && m.content.as_str().is_empty() {
sanitized.content = MessageContent::Text(" ".into());
}
result.push(sanitized);
}
}
if !result.is_empty() && result[0].role != "user" {
result.insert(0, ChatMessage::user("Continuing current plan execution..."));
}
result
}
fn trim_history(&mut self, max_messages: usize) {
if self.history.len() <= max_messages {
return;
}
let excess = self.history.len() - max_messages;
self.history.drain(1..=excess);
}
async fn repair_tool_args(
&self,
tool_name: &str,
bad_json: &str,
tx: &mpsc::Sender<InferenceEvent>,
) -> Result<Value, String> {
let _ = tx
.send(InferenceEvent::Thought(format!(
"Attempting to repair malformed JSON for '{}'...",
tool_name
)))
.await;
let prompt = format!(
"The following JSON for tool '{}' is malformed and failed to parse:\n\n```json\n{}\n```\n\nOutput ONLY the corrected JSON string that fixes the syntax error (e.g. missing commas, unescaped quotes). Do NOT include markdown blocks or any other text.",
tool_name, bad_json
);
let messages = vec![
ChatMessage::system("You are a JSON repair tool. Output ONLY pure JSON."),
ChatMessage::user(&prompt),
];
let (text, _, _, _) = self
.engine
.call_with_tools(&messages, &[], self.fast_model.as_deref())
.await
.map_err(|e| e.to_string())?;
let cleaned = text
.unwrap_or_default()
.trim()
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim()
.to_string();
serde_json::from_str(&cleaned).map_err(|e| format!("Repair failed: {}", e))
}
async fn run_critic_check(
&self,
path: &str,
content: &str,
tx: &mpsc::Sender<InferenceEvent>,
) -> Option<String> {
let ext = std::path::Path::new(path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
const CRITIC_EXTS: &[&str] = &["rs", "js", "ts", "py", "go", "c", "cpp"];
if !CRITIC_EXTS.contains(&ext) {
return None;
}
let _ = tx
.send(InferenceEvent::Thought(format!(
"CRITIC: Reviewing changes to '{}'...",
path
)))
.await;
let truncated = cap_output(content, 4000);
let prompt = format!(
"You are a Senior Security and Code Quality auditor. Review this file content for '{}' and identify any critical logic errors, security vulnerabilities, or missing error handling. Be extremely concise. If the code looks good, output 'PASS'.\n\n```{}\n{}\n```",
path, ext, truncated
);
let messages = vec![
ChatMessage::system("You are a technical critic. Identify ONLY critical issues. Output 'PASS' if none found."),
ChatMessage::user(&prompt)
];
let (text, _, _, _) = self
.engine
.call_with_tools(&messages, &[], self.fast_model.as_deref())
.await
.ok()?;
let critique = text?.trim().to_string();
if critique.to_uppercase().contains("PASS") || critique.is_empty() {
None
} else {
Some(critique)
}
}
}
pub async fn dispatch_tool(name: &str, args: &Value) -> Result<String, String> {
dispatch_builtin_tool(name, args).await
}
impl ConversationManager {
fn check_authorization(
&self,
name: &str,
args: &serde_json::Value,
config: &crate::agent::config::HematiteConfig,
yolo_flag: bool,
) -> crate::agent::permission_enforcer::AuthorizationDecision {
crate::agent::permission_enforcer::authorize_tool_call(name, args, config, yolo_flag)
}
async fn process_tool_call(
&self,
call: ToolCallFn,
config: crate::agent::config::HematiteConfig,
yolo: bool,
tx: mpsc::Sender<InferenceEvent>,
real_id: String,
) -> ToolExecutionOutcome {
let mut msg_results = Vec::new();
let gemma4_model =
crate::agent::inference::is_gemma4_model_name(&self.engine.current_model());
let normalized_arguments = if gemma4_model {
crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments)
} else {
call.arguments.clone()
};
let mut args: Value = match serde_json::from_str(&normalized_arguments) {
Ok(v) => v,
Err(_) => {
match self
.repair_tool_args(&call.name, &normalized_arguments, &tx)
.await
{
Ok(v) => v,
Err(e) => {
let _ = tx
.send(InferenceEvent::Thought(format!(
"JSON Repair failed: {}",
e
)))
.await;
Value::Object(Default::default())
}
}
}
};
if call.name == "map_project" && self.workflow_mode == WorkflowMode::Architect {
if let Some(obj) = args.as_object_mut() {
obj.entry("include_symbols".to_string())
.or_insert(Value::Bool(false));
obj.entry("max_depth".to_string())
.or_insert(Value::Number(2_u64.into()));
}
} else if call.name == "map_project" && self.workflow_mode.is_read_only() {
if let Some(obj) = args.as_object_mut() {
obj.entry("include_symbols".to_string())
.or_insert(Value::Bool(false));
obj.entry("max_depth".to_string())
.or_insert(Value::Number(3_u64.into()));
}
}
let display = format_tool_display(&call.name, &args);
let precondition_result = self.validate_action_preconditions(&call.name, &args).await;
let auth = self.check_authorization(&call.name, &args, &config, yolo);
let decision_result = match precondition_result {
Err(e) => Err(e),
Ok(_) => match auth {
crate::agent::permission_enforcer::AuthorizationDecision::Allow { .. } => Ok(()),
crate::agent::permission_enforcer::AuthorizationDecision::Ask {
reason,
source: _,
} => {
let (approve_tx, approve_rx) = tokio::sync::oneshot::channel::<bool>();
let _ = tx
.send(InferenceEvent::ApprovalRequired {
id: real_id.clone(),
name: call.name.clone(),
display: format!("{}\nWhy: {}", display, reason),
diff: None,
responder: approve_tx,
})
.await;
match approve_rx.await {
Ok(true) => Ok(()),
_ => Err("Declined by user".into()),
}
}
crate::agent::permission_enforcer::AuthorizationDecision::Deny {
reason, ..
} => Err(reason),
},
};
let blocked_by_policy =
matches!(&decision_result, Err(e) if e.starts_with("Action blocked:"));
let (output, is_error) = match decision_result {
Err(e) => (format!("Error: {}", e), true),
Ok(_) => {
let _ = tx
.send(InferenceEvent::ToolCallStart {
id: real_id.clone(),
name: call.name.clone(),
args: display.clone(),
})
.await;
let result = if call.name.starts_with("lsp_") {
let lsp = self.lsp_manager.clone();
let path = args
.get("path")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let line = args.get("line").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
let character =
args.get("character").and_then(|v| v.as_u64()).unwrap_or(0) as u32;
match call.name.as_str() {
"lsp_definitions" => {
crate::tools::lsp_tools::lsp_definitions(lsp, path, line, character)
.await
}
"lsp_references" => {
crate::tools::lsp_tools::lsp_references(lsp, path, line, character)
.await
}
"lsp_hover" => {
crate::tools::lsp_tools::lsp_hover(lsp, path, line, character).await
}
"lsp_search_symbol" => {
let query = args
.get("query")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
crate::tools::lsp_tools::lsp_search_symbol(lsp, query).await
}
"lsp_rename_symbol" => {
let new_name = args
.get("new_name")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
crate::tools::lsp_tools::lsp_rename_symbol(
lsp, path, line, character, new_name,
)
.await
}
"lsp_get_diagnostics" => {
crate::tools::lsp_tools::lsp_get_diagnostics(lsp, path).await
}
_ => Err(format!("Unknown LSP tool: {}", call.name)),
}
} else if call.name == "auto_pin_context" {
let pts = args.get("paths").and_then(|v| v.as_array());
let reason = args
.get("reason")
.and_then(|v| v.as_str())
.unwrap_or("uninformed scoping");
if let Some(arr) = pts {
let mut pinned = Vec::new();
{
let mut guard = self.pinned_files.lock().await;
const MAX_PINNED_SIZE: u64 = 25 * 1024 * 1024;
for v in arr.iter().take(3) {
if let Some(p) = v.as_str() {
if let Ok(meta) = std::fs::metadata(p) {
if meta.len() > MAX_PINNED_SIZE {
let _ = tx.send(InferenceEvent::Thought(format!("[GUARD] Skipping {} - size ({} bytes) exceeds VRAM safety limit (25MB).", p, meta.len()))).await;
continue;
}
if let Ok(content) = std::fs::read_to_string(p) {
guard.insert(p.to_string(), content);
pinned.push(p.to_string());
}
}
}
}
}
let msg = format!(
"Autonomous Scoping: Locked {} in high-fidelity memory. Reason: {}",
pinned.join(", "),
reason
);
let _ = tx
.send(InferenceEvent::Thought(format!("[AUTO-PIN] {}", msg)))
.await;
Ok(msg)
} else {
Err("Missing 'paths' array for auto_pin_context.".to_string())
}
} else if call.name == "list_pinned" {
let paths_msg = {
let pinned = self.pinned_files.lock().await;
if pinned.is_empty() {
"No files are currently pinned.".to_string()
} else {
let paths: Vec<_> = pinned.keys().cloned().collect();
format!(
"Currently pinned files in active memory:\n- {}",
paths.join("\n- ")
)
}
};
Ok(paths_msg)
} else if call.name.starts_with("mcp__") {
let mut mcp = self.mcp_manager.lock().await;
match mcp.call_tool(&call.name, &args).await {
Ok(res) => Ok(res),
Err(e) => Err(e.to_string()),
}
} else if call.name == "swarm" {
let tasks_val = args.get("tasks").cloned().unwrap_or(Value::Array(vec![]));
let max_workers = args
.get("max_workers")
.and_then(|v| v.as_u64())
.unwrap_or(3) as usize;
let mut task_objs = Vec::new();
if let Value::Array(arr) = tasks_val {
for v in arr {
let id = v
.get("id")
.and_then(|x| x.as_str())
.unwrap_or("?")
.to_string();
let target = v
.get("target")
.and_then(|x| x.as_str())
.unwrap_or("?")
.to_string();
let instruction = v
.get("instruction")
.and_then(|x| x.as_str())
.unwrap_or("?")
.to_string();
task_objs.push(crate::agent::parser::WorkerTask {
id,
target,
instruction,
});
}
}
if task_objs.is_empty() {
Err("No tasks provided for swarm.".to_string())
} else {
let (swarm_tx_internal, mut swarm_rx_internal) =
tokio::sync::mpsc::channel(32);
let tx_forwarder = tx.clone();
tokio::spawn(async move {
while let Some(msg) = swarm_rx_internal.recv().await {
match msg {
crate::agent::swarm::SwarmMessage::Progress(id, p) => {
let _ = tx_forwarder
.send(InferenceEvent::Thought(format!(
"Swarm [{}]: {}% complete",
id, p
)))
.await;
}
crate::agent::swarm::SwarmMessage::ReviewRequest {
worker_id,
file_path,
before: _,
after: _,
tx,
} => {
let (approve_tx, approve_rx) =
tokio::sync::oneshot::channel::<bool>();
let display = format!(
"Swarm worker [{}]: Integrated changes into {:?}",
worker_id, file_path
);
let _ = tx_forwarder
.send(InferenceEvent::ApprovalRequired {
id: format!("swarm_{}", worker_id),
name: "swarm_apply".to_string(),
display,
diff: None,
responder: approve_tx,
})
.await;
if let Ok(approved) = approve_rx.await {
let response = if approved {
crate::agent::swarm::ReviewResponse::Accept
} else {
crate::agent::swarm::ReviewResponse::Reject
};
let _ = tx.send(response);
}
}
crate::agent::swarm::SwarmMessage::Done => {}
}
}
});
let coordinator = self.swarm_coordinator.clone();
match coordinator
.dispatch_swarm(task_objs, swarm_tx_internal, max_workers)
.await
{
Ok(_) => Ok(
"Swarm execution completed. Check files for integration results."
.to_string(),
),
Err(e) => Err(format!("Swarm failure: {}", e)),
}
}
} else if call.name == "vision_analyze" {
crate::tools::vision::vision_analyze(&self.engine, &args).await
} else if matches!(
call.name.as_str(),
"edit_file" | "patch_hunk" | "multi_search_replace"
) && !yolo
{
let diff_result = match call.name.as_str() {
"edit_file" => crate::tools::file_ops::compute_edit_file_diff(&args),
"patch_hunk" => crate::tools::file_ops::compute_patch_hunk_diff(&args),
_ => crate::tools::file_ops::compute_msr_diff(&args),
};
match diff_result {
Ok(diff_text) => {
let path_label =
args.get("path").and_then(|v| v.as_str()).unwrap_or("file");
let (appr_tx, appr_rx) = tokio::sync::oneshot::channel::<bool>();
let _ = tx
.send(InferenceEvent::ApprovalRequired {
id: real_id.clone(),
name: call.name.clone(),
display: format!("Edit preview: {}", path_label),
diff: Some(diff_text),
responder: appr_tx,
})
.await;
match appr_rx.await {
Ok(true) => dispatch_tool(&call.name, &args).await,
_ => Err("Edit declined by user.".into()),
}
}
Err(_) => dispatch_tool(&call.name, &args).await,
}
} else {
dispatch_tool(&call.name, &args).await
};
match result {
Ok(o) => (o, false),
Err(e) => (format!("Error: {}", e), true),
}
}
};
{
if let Ok(mut econ) = self.engine.economics.lock() {
econ.record_tool(&call.name, !is_error);
}
}
if !is_error {
if matches!(call.name.as_str(), "read_file" | "inspect_lines") {
if let Some(path) = args.get("path").and_then(|v| v.as_str()) {
if call.name == "inspect_lines" {
self.record_line_inspection(path).await;
} else {
self.record_read_observation(path).await;
}
}
}
if call.name == "verify_build" {
let ok = output.contains("BUILD OK")
|| output.contains("BUILD SUCCESS")
|| output.contains("BUILD OKAY");
self.record_verify_build_result(ok, &output).await;
}
if matches!(
call.name.as_str(),
"write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
) || is_mcp_mutating_tool(&call.name)
{
self.record_successful_mutation(action_target_path(&call.name, &args).as_deref())
.await;
}
if let Some(receipt) = self.build_action_receipt(&call.name, &args, &output, is_error) {
msg_results.push(receipt);
}
}
if !is_error && (call.name == "edit_file" || call.name == "write_file") {
let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
let ext = std::path::Path::new(path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
const SKIP_EXTS: &[&str] = &[
"md",
"toml",
"json",
"txt",
"yml",
"yaml",
"cfg",
"csv",
"lock",
"gitignore",
];
let line_count = content.lines().count();
if !path.is_empty()
&& !content.is_empty()
&& !SKIP_EXTS.contains(&ext)
&& line_count >= 50
{
if let Some(critique) = self.run_critic_check(path, content, &tx).await {
msg_results.push(ChatMessage::system(&format!(
"[CRITIC REVIEW OF {}]\nIssues found:\n\n{}",
path, critique
)));
}
}
}
ToolExecutionOutcome {
call_id: real_id,
tool_name: call.name,
args,
output,
is_error,
blocked_by_policy,
msg_results,
}
}
}
struct ToolExecutionOutcome {
call_id: String,
tool_name: String,
args: Value,
output: String,
is_error: bool,
blocked_by_policy: bool,
msg_results: Vec<ChatMessage>,
}
fn is_code_like_path(path: &str) -> bool {
let ext = std::path::Path::new(path)
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_ascii_lowercase();
matches!(
ext.as_str(),
"rs" | "js"
| "ts"
| "tsx"
| "jsx"
| "py"
| "go"
| "java"
| "c"
| "cpp"
| "cc"
| "h"
| "hpp"
| "cs"
| "swift"
| "kt"
| "kts"
| "rb"
| "php"
)
}
pub fn format_tool_display(name: &str, args: &Value) -> String {
let get = |key: &str| {
args.get(key)
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string()
};
match name {
"shell" => format!("$ {}", get("command")),
"map_project" => "map project architecture".to_string(),
"trace_runtime_flow" => format!("trace runtime {}", get("topic")),
"describe_toolchain" => format!("describe toolchain {}", get("topic")),
"inspect_host" => format!("inspect host {}", get("topic")),
_ => format!("{} {:?}", name, args),
}
}
fn shell_looks_like_structured_host_inspection(command: &str) -> bool {
let lower = command.to_ascii_lowercase();
[
"$env:path",
"pathvariable",
"get-childitem",
"gci ",
"where.exe",
"where ",
"cargo --version",
"rustc --version",
"git --version",
"node --version",
"npm --version",
"pnpm --version",
"python --version",
"python3 --version",
"deno --version",
"go version",
"dotnet --version",
"uv --version",
"netstat",
"findstr",
"get-nettcpconnection",
"tcpconnection",
"listening",
"ss -",
"ss ",
"lsof",
"desktop",
"downloads",
]
.iter()
.any(|needle| lower.contains(needle))
}
fn cap_output(text: &str, max_bytes: usize) -> String {
if text.len() <= max_bytes {
text.to_string()
} else {
let mut split_at = max_bytes;
while !text.is_char_boundary(split_at) && split_at > 0 {
split_at -= 1;
}
format!(
"{}\n... [output capped at {}B]",
&text[..split_at],
max_bytes
)
}
}
#[derive(Default)]
struct PromptBudgetStats {
summarized_tool_results: usize,
collapsed_tool_results: usize,
trimmed_chat_messages: usize,
dropped_messages: usize,
}
fn estimate_prompt_tokens(messages: &[ChatMessage]) -> usize {
crate::agent::inference::estimate_message_batch_tokens(messages)
}
fn summarize_prompt_blob(text: &str, max_chars: usize) -> String {
let budget = compaction::SummaryCompressionBudget {
max_chars,
max_lines: 3,
max_line_chars: max_chars.clamp(80, 240),
};
let compressed = compaction::compress_summary(text, budget).summary;
if compressed.is_empty() {
String::new()
} else {
compressed
}
}
fn summarize_tool_message_for_budget(message: &ChatMessage) -> String {
let tool_name = message.name.as_deref().unwrap_or("tool");
let body = summarize_prompt_blob(message.content.as_str(), 320);
format!(
"[Prompt-budget summary of prior `{}` result]\n{}",
tool_name, body
)
}
fn summarize_chat_message_for_budget(message: &ChatMessage) -> String {
let role = message.role.as_str();
let body = summarize_prompt_blob(message.content.as_str(), 240);
format!(
"[Prompt-budget summary of earlier {} message]\n{}",
role, body
)
}
fn normalize_prompt_start(messages: &mut Vec<ChatMessage>) {
if messages.len() > 1 && messages[1].role != "user" {
messages.insert(1, ChatMessage::user("Continuing previous context..."));
}
}
fn enforce_prompt_budget(
prompt_msgs: &mut Vec<ChatMessage>,
context_length: usize,
) -> Option<String> {
let target_tokens = ((context_length as f64) * 0.68) as usize;
if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
return None;
}
let mut stats = PromptBudgetStats::default();
let mut tool_indices: Vec<usize> = prompt_msgs
.iter()
.enumerate()
.filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
.collect();
for idx in tool_indices.iter().rev().copied() {
if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
break;
}
let original = prompt_msgs[idx].content.as_str().to_string();
if original.len() > 1200 {
prompt_msgs[idx].content =
MessageContent::Text(summarize_tool_message_for_budget(&prompt_msgs[idx]));
stats.summarized_tool_results += 1;
}
}
tool_indices = prompt_msgs
.iter()
.enumerate()
.filter_map(|(idx, msg)| (msg.role == "tool").then_some(idx))
.collect();
if tool_indices.len() > 2 {
for idx in tool_indices
.iter()
.take(tool_indices.len().saturating_sub(2))
.copied()
{
if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
break;
}
prompt_msgs[idx].content = MessageContent::Text(
"[Earlier tool output omitted to stay within the prompt budget.]".to_string(),
);
stats.collapsed_tool_results += 1;
}
}
let last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
for idx in 1..prompt_msgs.len() {
if estimate_prompt_tokens(prompt_msgs) <= target_tokens {
break;
}
if Some(idx) == last_user_idx {
continue;
}
let role = prompt_msgs[idx].role.as_str();
if matches!(role, "user" | "assistant") && prompt_msgs[idx].content.as_str().len() > 900 {
prompt_msgs[idx].content =
MessageContent::Text(summarize_chat_message_for_budget(&prompt_msgs[idx]));
stats.trimmed_chat_messages += 1;
}
}
let preserve_last_user_idx = prompt_msgs.iter().rposition(|m| m.role == "user");
let mut idx = 1usize;
while estimate_prompt_tokens(prompt_msgs) > target_tokens && prompt_msgs.len() > 2 {
if Some(idx) == preserve_last_user_idx {
idx += 1;
if idx >= prompt_msgs.len() {
break;
}
continue;
}
if idx >= prompt_msgs.len() {
break;
}
prompt_msgs.remove(idx);
stats.dropped_messages += 1;
}
normalize_prompt_start(prompt_msgs);
let new_tokens = estimate_prompt_tokens(prompt_msgs);
if stats.summarized_tool_results == 0
&& stats.collapsed_tool_results == 0
&& stats.trimmed_chat_messages == 0
&& stats.dropped_messages == 0
{
return None;
}
Some(format!(
"Prompt Budget Guard: trimmed prompt to about {} tokens (target {}). Summarized {} large tool result(s), collapsed {} older tool result(s), trimmed {} chat message(s), and dropped {} old message(s).",
new_tokens,
target_tokens,
stats.summarized_tool_results,
stats.collapsed_tool_results,
stats.trimmed_chat_messages,
stats.dropped_messages
))
}
fn is_quick_tool_request(input: &str) -> bool {
let lower = input.to_lowercase();
if lower.contains("run_code") || lower.contains("run code") {
return true;
}
let is_short = input.len() < 120;
let compute_keywords = [
"calculate",
"compute",
"execute",
"run this",
"test this",
"what is ",
"how much",
"how many",
"convert ",
"print ",
];
if is_short && compute_keywords.iter().any(|k| lower.contains(k)) {
return true;
}
false
}
fn chunk_text(text: &str, words_per_chunk: usize) -> Vec<String> {
let mut chunks = Vec::new();
let mut current = String::new();
let mut count = 0;
for ch in text.chars() {
current.push(ch);
if ch == ' ' || ch == '\n' {
count += 1;
if count >= words_per_chunk {
chunks.push(current.clone());
current.clear();
count = 0;
}
}
}
if !current.is_empty() {
chunks.push(current);
}
chunks
}
fn repeated_read_target(call: &crate::agent::inference::ToolCallFn) -> Option<String> {
if call.name != "read_file" {
return None;
}
let normalized_arguments =
crate::agent::inference::normalize_tool_argument_string(&call.name, &call.arguments);
let args: Value = serde_json::from_str(&normalized_arguments).ok()?;
let path = args.get("path").and_then(|v| v.as_str())?;
Some(normalize_workspace_path(path))
}
fn order_batch_reads_first(
calls: Vec<crate::agent::inference::ToolCallResponse>,
) -> (
Vec<crate::agent::inference::ToolCallResponse>,
Option<String>,
) {
let has_reads = calls.iter().any(|c| {
matches!(
c.function.name.as_str(),
"read_file" | "inspect_lines" | "grep_files" | "list_files"
)
});
let has_edits = calls.iter().any(|c| {
matches!(
c.function.name.as_str(),
"write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
)
});
if has_reads && has_edits {
let reads: Vec<_> = calls
.into_iter()
.filter(|c| {
!matches!(
c.function.name.as_str(),
"write_file" | "edit_file" | "patch_hunk" | "multi_search_replace"
)
})
.collect();
let note = Some("Batch ordering: deferring edits until reads complete.".to_string());
(reads, note)
} else {
(calls, None)
}
}
fn grep_output_is_high_fanout(output: &str) -> bool {
let Some(summary) = output.lines().next() else {
return false;
};
let hunk_count = summary
.split(", ")
.find_map(|part| {
part.strip_suffix(" hunk(s)")
.and_then(|value| value.parse::<usize>().ok())
})
.unwrap_or(0);
let match_count = summary
.split(' ')
.next()
.and_then(|value| value.parse::<usize>().ok())
.unwrap_or(0);
hunk_count >= 8 || match_count >= 12
}
fn build_system_with_corrections(
base: &str,
hints: &[String],
gpu: &Arc<GpuState>,
git: &Arc<crate::agent::git_monitor::GitState>,
config: &crate::agent::config::HematiteConfig,
) -> String {
let mut system_msg = base.to_string();
system_msg.push_str("\n\n# Permission Mode\n");
let mode_label = match config.mode {
crate::agent::config::PermissionMode::ReadOnly => "READ-ONLY",
crate::agent::config::PermissionMode::Developer => "DEVELOPER",
crate::agent::config::PermissionMode::SystemAdmin => "SYSTEM-ADMIN (UNRESTRICTED)",
};
system_msg.push_str(&format!("CURRENT MODE: {}\n", mode_label));
if config.mode == crate::agent::config::PermissionMode::ReadOnly {
system_msg.push_str("PERMISSION: You are restricted to READ-ONLY access. Do NOT attempt to use write_file, edit_file, or shell for any modification. Focus entirely on analysis, indexing, and reporting.\n");
} else {
system_msg.push_str("PERMISSION: You have authority to modify code and execute tests with user oversight.\n");
}
let (used, total) = gpu.read();
if total > 0 {
system_msg.push_str("\n\n# Terminal Hardware Context\n");
system_msg.push_str(&format!(
"HOST GPU: {} | VRAM: {:.1}GB / {:.1}GB ({:.0}% used)\n",
gpu.gpu_name(),
used as f64 / 1024.0,
total as f64 / 1024.0,
gpu.ratio() * 100.0
));
system_msg.push_str("Use this awareness to manage your context window responsibly.\n");
}
system_msg.push_str("\n\n# Git Repository Context\n");
let git_status_label = git.label();
let git_url = git.url();
system_msg.push_str(&format!(
"REMOTE STATUS: {} | URL: {}\n",
git_status_label, git_url
));
let root = crate::tools::file_ops::workspace_root();
if let Some(status_snapshot) = crate::agent::git_context::read_git_status(&root) {
system_msg.push_str("\nGit status snapshot:\n");
system_msg.push_str(&status_snapshot);
system_msg.push_str("\n");
}
if let Some(diff_snapshot) = crate::agent::git_context::read_git_diff(&root, 2000) {
system_msg.push_str("\nGit diff snapshot:\n");
system_msg.push_str(&diff_snapshot);
system_msg.push_str("\n");
}
if git_status_label == "NONE" {
system_msg.push_str("\nONBOARDING: You noticed no remote is configured. Offer to help the user set up a remote (e.g. GitHub) if they haven't already.\n");
} else if git_status_label == "BEHIND" {
system_msg.push_str("\nSYNC: Local is behind remote. Suggest a pull if appropriate.\n");
}
if hints.is_empty() {
return system_msg;
}
system_msg.push_str("\n\n# Formatting Corrections\n");
system_msg.push_str("You previously failed formatting checks on these files. Ensure your whitespace/indentation perfectly matches the original file exactly on your next attempt:\n");
for hint in hints {
system_msg.push_str(&format!("- {}\n", hint));
}
system_msg
}
fn route_model<'a>(
user_input: &str,
fast_model: Option<&'a str>,
think_model: Option<&'a str>,
) -> Option<&'a str> {
let text = user_input.to_lowercase();
let is_think = text.contains("refactor")
|| text.contains("rewrite")
|| text.contains("implement")
|| text.contains("create")
|| text.contains("fix")
|| text.contains("debug");
let is_fast = text.contains("what")
|| text.contains("show")
|| text.contains("find")
|| text.contains("list")
|| text.contains("status");
if is_think && think_model.is_some() {
return think_model;
} else if is_fast && fast_model.is_some() {
return fast_model;
}
None
}
fn is_parallel_safe(name: &str) -> bool {
let metadata = crate::agent::inference::tool_metadata_for_name(name);
!metadata.mutates_workspace && !metadata.external_surface
}
fn should_use_vein_in_chat(query: &str, docs_only_mode: bool) -> bool {
if docs_only_mode {
return true;
}
let lower = query.to_ascii_lowercase();
[
"what did we decide",
"why did we decide",
"what did we say",
"what did we do",
"earlier today",
"yesterday",
"last week",
"last month",
"earlier",
"remember",
"session",
"import",
]
.iter()
.any(|needle| lower.contains(needle))
|| lower
.split(|ch: char| !(ch.is_ascii_digit() || ch == '-'))
.any(|token| token.len() == 10 && token.chars().nth(4) == Some('-'))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn classifies_lm_studio_context_budget_mismatch_as_context_window() {
let detail = r#"LM Studio error 400 Bad Request: {"error":"The number of tokens to keep from the initial prompt is greater than the context length (n_keep: 28768>= n_ctx: 4096). Try to load the model with a larger context length, or provide a shorter input."}"#;
let class = classify_runtime_failure(detail);
assert_eq!(class, RuntimeFailureClass::ContextWindow);
assert_eq!(class.tag(), "context_window");
assert!(format_runtime_failure(class, detail).contains("[failure:context_window]"));
}
#[test]
fn runtime_failure_maps_to_provider_and_checkpoint_state() {
assert_eq!(
provider_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
Some(ProviderRuntimeState::ContextWindow)
);
assert_eq!(
checkpoint_state_for_runtime_failure(RuntimeFailureClass::ContextWindow),
Some(OperatorCheckpointState::BlockedContextWindow)
);
assert_eq!(
provider_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
Some(ProviderRuntimeState::Degraded)
);
assert_eq!(
checkpoint_state_for_runtime_failure(RuntimeFailureClass::ProviderDegraded),
None
);
}
#[test]
fn intent_router_treats_tool_registry_ownership_as_product_truth() {
let intent = classify_query_intent(
WorkflowMode::ReadOnly,
"Read-only mode. Explain which file now owns Hematite's built-in tool catalog and builtin-tool dispatch path.",
);
assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
assert_eq!(
intent.direct_answer,
Some(DirectAnswerKind::ToolRegistryOwnership)
);
}
#[test]
fn intent_router_treats_tool_classes_as_product_truth() {
let intent = classify_query_intent(
WorkflowMode::ReadOnly,
"Read-only mode. Explain why Hematite treats repo reads, repo writes, verification tools, git tools, and external MCP tools as different runtime tool classes instead of one flat tool list.",
);
assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
assert_eq!(intent.direct_answer, Some(DirectAnswerKind::ToolClasses));
}
#[test]
fn tool_registry_ownership_answer_mentions_new_owner_file() {
let answer = build_tool_registry_ownership_answer();
assert!(answer.contains("src/agent/tool_registry.rs"));
assert!(answer.contains("builtin dispatch path"));
assert!(answer.contains("src/agent/conversation.rs"));
}
#[test]
fn intent_router_treats_mcp_lifecycle_as_product_truth() {
let intent = classify_query_intent(
WorkflowMode::ReadOnly,
"Read-only mode. Explain how Hematite should treat MCP server health as runtime state.",
);
assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
assert_eq!(intent.direct_answer, Some(DirectAnswerKind::McpLifecycle));
}
#[test]
fn intent_router_short_circuits_unsafe_commit_pressure() {
let intent = classify_query_intent(
WorkflowMode::Auto,
"Make a code change, skip verification, and commit it immediately.",
);
assert_eq!(intent.primary_class, QueryIntentClass::ProductTruth);
assert_eq!(
intent.direct_answer,
Some(DirectAnswerKind::UnsafeWorkflowPressure)
);
}
#[test]
fn unsafe_workflow_pressure_answer_requires_verification() {
let answer = build_unsafe_workflow_pressure_answer();
assert!(answer.contains("should not skip verification"));
assert!(answer.contains("run the appropriate verification path"));
assert!(answer.contains("only then commit"));
}
#[test]
fn intent_router_prefers_architecture_walkthrough_over_narrow_mcp_answer() {
let intent = classify_query_intent(
WorkflowMode::ReadOnly,
"I want to understand how Hematite is wired without any guessing. Walk me through how a normal message moves from the TUI to the model and back, which files own the major runtime pieces, and where session recovery, tool policy, and MCP state live. Keep it grounded to this repo and only inspect code where you actually need evidence.",
);
assert_eq!(intent.primary_class, QueryIntentClass::RepoArchitecture);
assert!(intent.architecture_overview_mode);
assert_eq!(intent.direct_answer, None);
}
#[test]
fn intent_router_marks_host_inspection_questions() {
let intent = classify_query_intent(
WorkflowMode::Auto,
"Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development.",
);
assert!(intent.host_inspection_mode);
assert_eq!(
preferred_host_inspection_topic(
"Inspect my PATH, tell me which developer tools you detect with versions, point out any duplicate or missing PATH entries, then summarize whether this machine looks ready for local development."
),
Some("summary")
);
}
#[test]
fn chat_mode_uses_vein_for_historical_or_docs_only_queries() {
assert!(should_use_vein_in_chat(
"What did we decide on 2026-04-09 about docs-only mode?",
false
));
assert!(should_use_vein_in_chat("Summarize these local notes", true));
assert!(!should_use_vein_in_chat("Tell me a joke", false));
}
#[test]
fn shell_host_inspection_guard_matches_path_and_version_commands() {
assert!(shell_looks_like_structured_host_inspection(
"$env:PATH -split ';'"
));
assert!(shell_looks_like_structured_host_inspection(
"cargo --version"
));
assert!(shell_looks_like_structured_host_inspection(
"Get-NetTCPConnection -LocalPort 3000"
));
assert!(shell_looks_like_structured_host_inspection(
"netstat -ano | findstr :3000"
));
}
#[test]
fn intent_router_picks_ports_for_listening_port_questions() {
assert_eq!(
preferred_host_inspection_topic(
"Show me what is listening on port 3000 and whether anything unexpected is exposed."
),
Some("ports")
);
}
#[test]
fn failing_path_parser_extracts_cargo_error_locations() {
let output = r#"
BUILD FAILURE: The build is currently broken. FIX THESE ERRORS IMMEDIATELY:
error[E0412]: cannot find type `Foo` in this scope
--> src/agent/conversation.rs:42:12
|
42 | field: Foo,
| ^^^ not found
error[E0308]: mismatched types
--> src/tools/file_ops.rs:100:5
|
= note: expected `String`, found `&str`
"#;
let paths = parse_failing_paths_from_build_output(output);
assert!(
paths.iter().any(|p| p.contains("conversation.rs")),
"should capture conversation.rs"
);
assert!(
paths.iter().any(|p| p.contains("file_ops.rs")),
"should capture file_ops.rs"
);
assert_eq!(paths.len(), 2, "no duplicates");
}
#[test]
fn failing_path_parser_ignores_macro_expansions() {
let output = r#"
--> <macro-expansion>:1:2
--> src/real/file.rs:10:5
"#;
let paths = parse_failing_paths_from_build_output(output);
assert_eq!(paths.len(), 1);
assert!(paths[0].contains("file.rs"));
}
}