sparrow/engine/
mod.rs

1use async_trait::async_trait;
2use futures::StreamExt;
3use serde_json::json;
4use std::path::PathBuf;
5use std::sync::Arc;
6use tokio::sync::mpsc;
7
8use crate::agent::AgentStore;
9use crate::autonomy::{AutonomyContract, Checkpoints, GitCheckpoints};
10use crate::capabilities::{Curator, SkillLibrary};
11use crate::config::Config;
12use crate::event::{
13    AgentStatus, AutonomyLevel, Block, Decision, Event, OutcomeSummary, RiskLevel, RunId,
14    TokenUsage,
15};
16use crate::extras::Distiller;
17use crate::hooks::{HookEvent, HookRegistry};
18use crate::instructions::InstructionDoc;
19use crate::memory::{Fact, Memory, MemoryDoc, MemoryDocKind};
20use crate::permissions::PermissionContext;
21use crate::provider::{
22    Brain, BrainError, BrainEvent, BrainRequest, ContentBlock, ImageSource, Msg, PromptCacheConfig,
23    ToolSpec,
24};
25use crate::reasoning::ReasoningEngine;
26use crate::redaction::RedactionFilter;
27use crate::router::{BudgetState, Router, TaskTier};
28use crate::sandbox::Sandbox;
29use crate::tools::{ToolCtx, ToolRegistry};
30
31pub mod scorer;
32pub mod treesitter;
33
34// ─── Agent identity ─────────────────────────────────────────────────────────────
35
36// Identity now lives in `sparrow-core` (a trivial shared type) so the memory
37// crate can name an agent without depending on the engine. Re-exported here so
38// `crate::engine::Identity` keeps resolving everywhere it's used.
39pub use sparrow_core::Identity;
40
41// ─── Brain policy ───────────────────────────────────────────────────────────────
42
43pub struct BrainPolicy {
44    /// The fallback chain selected by the Router for this run
45    pub chain: Vec<Arc<dyn Brain>>,
46    pub current_index: usize,
47}
48
49impl BrainPolicy {
50    pub fn current(&self) -> Option<Arc<dyn Brain>> {
51        self.chain.get(self.current_index).cloned()
52    }
53
54    pub fn next(&mut self) -> Option<Arc<dyn Brain>> {
55        self.current_index += 1;
56        self.current()
57    }
58}
59
60// ─── Workspace ──────────────────────────────────────────────────────────────────
61
62pub struct Workspace {
63    pub root: PathBuf,
64    pub sandbox: Arc<dyn Sandbox>,
65}
66
67// ─── Agent run ─────────────────────────────────────────────────────────────────
68
69pub struct AgentRun {
70    pub id: RunId,
71    pub identity: Identity,
72    pub brain_policy: BrainPolicy,
73    pub autonomy: AutonomyContract,
74    pub tools: Arc<ToolRegistry>,
75    pub workspace: Workspace,
76}
77
78fn estimate_text_tokens(text: &str) -> u64 {
79    let chars = text.chars().count() as u64;
80    ((chars + 3) / 4).max(1)
81}
82
83fn estimate_content_tokens(blocks: &[ContentBlock]) -> u64 {
84    blocks
85        .iter()
86        .map(|block| match block {
87            ContentBlock::Text { text } => estimate_text_tokens(text),
88            ContentBlock::Image { source } => match source {
89                crate::provider::ImageSource::Base64 { data, .. } => {
90                    256 + estimate_text_tokens(data).min(2_000)
91                }
92                crate::provider::ImageSource::Url { url } => 256 + estimate_text_tokens(url),
93            },
94            ContentBlock::ToolUse { name, input, .. } => {
95                estimate_text_tokens(name) + estimate_text_tokens(&input.to_string())
96            }
97            ContentBlock::ToolResult { content, .. } => 8 + estimate_content_tokens(content),
98            ContentBlock::Reasoning { text } => estimate_text_tokens(text),
99        })
100        .sum()
101}
102
103fn estimate_request_tokens(req: &BrainRequest) -> u64 {
104    let system = req.system.as_deref().map(estimate_text_tokens).unwrap_or(0);
105    let messages: u64 = req
106        .messages
107        .iter()
108        .map(|msg| estimate_text_tokens(&msg.role) + estimate_content_tokens(&msg.content) + 4)
109        .sum();
110    let tools: u64 = req
111        .tools
112        .iter()
113        .map(|tool| {
114            estimate_text_tokens(&tool.name)
115                + estimate_text_tokens(&tool.description)
116                + estimate_text_tokens(&tool.input_schema.to_string())
117        })
118        .sum();
119    system + messages + tools
120}
121
122fn base64_encode(data: &[u8]) -> String {
123    const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
124    let mut out = String::with_capacity(data.len().div_ceil(3) * 4);
125    for chunk in data.chunks(3) {
126        let b0 = chunk[0] as u32;
127        let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 };
128        let b2 = if chunk.len() > 2 { chunk[2] as u32 } else { 0 };
129        let triple = (b0 << 16) | (b1 << 8) | b2;
130        out.push(CHARS[((triple >> 18) & 63) as usize] as char);
131        out.push(CHARS[((triple >> 12) & 63) as usize] as char);
132        out.push(if chunk.len() > 1 {
133            CHARS[((triple >> 6) & 63) as usize] as char
134        } else {
135            '='
136        });
137        out.push(if chunk.len() > 2 {
138            CHARS[(triple & 63) as usize] as char
139        } else {
140            '='
141        });
142    }
143    out
144}
145
146fn image_block_from_path(path: &std::path::Path) -> Option<ContentBlock> {
147    let mime = mime_guess::from_path(path).first_or_octet_stream();
148    if !mime.type_().as_str().eq_ignore_ascii_case("image") {
149        return None;
150    }
151    let data = std::fs::read(path).ok()?;
152    Some(ContentBlock::Image {
153        source: ImageSource::Base64 {
154            media_type: mime.to_string(),
155            data: base64_encode(&data),
156        },
157    })
158}
159
160fn collect_uploaded_paths(description: &str) -> Vec<String> {
161    let mut paths = Vec::new();
162    for line in description.lines() {
163        let Some(idx) = line.find("uploaded:") else {
164            continue;
165        };
166        let rest = line[idx + "uploaded:".len()..].trim();
167        let path = rest
168            .strip_prefix('[')
169            .unwrap_or(rest)
170            .split(']')
171            .next()
172            .unwrap_or(rest)
173            .trim()
174            .trim_matches('"')
175            .trim_matches('\'');
176        if !path.is_empty() {
177            paths.push(path.to_string());
178        }
179    }
180    paths
181}
182
183fn initial_user_content_blocks(
184    workspace_root: &std::path::Path,
185    description: &str,
186) -> Vec<ContentBlock> {
187    let mut blocks = vec![ContentBlock::Text {
188        text: description.to_string(),
189    }];
190    let mut seen = std::collections::HashSet::new();
191    for raw_path in collect_uploaded_paths(description) {
192        let path = std::path::PathBuf::from(&raw_path);
193        let full_path = if path.is_absolute() {
194            path
195        } else {
196            workspace_root.join(path)
197        };
198        if !seen.insert(full_path.clone()) {
199            continue;
200        }
201        if let Some(block) = image_block_from_path(&full_path) {
202            blocks.push(block);
203        }
204    }
205    blocks
206}
207
208pub fn summarize_model_chain(chain_ids: &[String], limit: usize) -> String {
209    if chain_ids.is_empty() {
210        return "aucun modèle disponible".into();
211    }
212    let limit = limit.max(1);
213    let mut visible: Vec<String> = chain_ids.iter().take(limit).cloned().collect();
214    if chain_ids.len() > limit {
215        visible.push(format!("+{} autres fallbacks", chain_ids.len() - limit));
216    }
217    visible.join(" -> ")
218}
219
220fn strip_ui_status_leaks(text: &str) -> String {
221    text.lines()
222        .filter(|line| {
223            let lower = line.to_lowercase();
224            !((lower.contains(" completed ·") && lower.contains('↑') && lower.contains('↓'))
225                || (lower.contains("◌") && lower.contains("consulting"))
226                || (lower.contains("parsing request") && lower.contains("consulting")))
227        })
228        .collect::<Vec<_>>()
229        .join("\n")
230}
231
232fn sanitize_messages_for_provider(messages: &[Msg]) -> Vec<Msg> {
233    messages
234        .iter()
235        .map(|msg| Msg {
236            role: msg.role.clone(),
237            content: msg
238                .content
239                .iter()
240                .filter_map(|block| match block {
241                    ContentBlock::Text { text } => {
242                        let cleaned = strip_ui_status_leaks(text);
243                        if cleaned.trim().is_empty() {
244                            None
245                        } else {
246                            Some(ContentBlock::Text { text: cleaned })
247                        }
248                    }
249                    ContentBlock::Reasoning { text } => Some(ContentBlock::Reasoning {
250                        text: strip_ui_status_leaks(text),
251                    }),
252                    ContentBlock::ToolResult {
253                        tool_use_id,
254                        content,
255                        is_error,
256                    } => Some(ContentBlock::ToolResult {
257                        tool_use_id: tool_use_id.clone(),
258                        content: sanitize_messages_for_provider(&[Msg {
259                            role: "tool".into(),
260                            content: content.clone(),
261                        }])
262                        .into_iter()
263                        .next()
264                        .map(|m| m.content)
265                        .unwrap_or_default(),
266                        is_error: *is_error,
267                    }),
268                    other => Some(other.clone()),
269                })
270                .collect(),
271        })
272        .collect()
273}
274
275fn prompt_cache_key(scope: &str, workspace_root: &std::path::Path, tools: &[ToolSpec]) -> String {
276    use std::hash::{Hash, Hasher};
277
278    let mut hasher = std::collections::hash_map::DefaultHasher::new();
279    scope.hash(&mut hasher);
280    workspace_root.display().to_string().hash(&mut hasher);
281    for tool in tools {
282        tool.name.hash(&mut hasher);
283        tool.description.hash(&mut hasher);
284        tool.input_schema.to_string().hash(&mut hasher);
285    }
286    format!("sparrow-{}-{:016x}", scope, hasher.finish())
287}
288
289// ─── System prompt / SOUL ───────────────────────────────────────────────────────
290
291/// Best-effort snapshot of the workspace's git state — branch, HEAD, and a
292/// dirty-file summary — for injection into the system prompt. Returns None
293/// if the path isn't a git repo or git isn't installed.
294fn read_git_context(workspace_root: &PathBuf) -> Option<String> {
295    use std::process::Command;
296    use std::time::Duration;
297    if !workspace_root.join(".git").exists() {
298        return None;
299    }
300    fn run(workspace_root: &PathBuf, args: &[&str]) -> Option<String> {
301        let mut cmd = Command::new("git");
302        cmd.arg("-C").arg(workspace_root).args(args);
303        let child = cmd
304            .stdout(std::process::Stdio::piped())
305            .stderr(std::process::Stdio::null())
306            .spawn()
307            .ok()?;
308        // 1.5s ceiling per call: a corrupt repo or filesystem hang must not
309        // stall a run (we'd rather silently skip git context than wait).
310        let deadline = std::time::Instant::now() + Duration::from_millis(1_500);
311        let mut child = child;
312        loop {
313            match child.try_wait().ok()? {
314                Some(_) => break,
315                None if std::time::Instant::now() > deadline => {
316                    let _ = child.kill();
317                    return None;
318                }
319                None => std::thread::sleep(Duration::from_millis(20)),
320            }
321        }
322        let output = child.wait_with_output().ok()?;
323        if !output.status.success() {
324            return None;
325        }
326        let s = String::from_utf8(output.stdout).ok()?;
327        Some(s.trim().to_string())
328    }
329
330    let branch = run(workspace_root, &["rev-parse", "--abbrev-ref", "HEAD"])
331        .filter(|b| !b.is_empty())
332        .unwrap_or_else(|| "(detached)".into());
333    let head = run(workspace_root, &["rev-parse", "--short", "HEAD"]).unwrap_or_default();
334    let head_subject = run(workspace_root, &["log", "-1", "--pretty=%s"]).unwrap_or_default();
335    let status_porcelain = run(workspace_root, &["status", "--porcelain"]).unwrap_or_default();
336
337    let mut block = String::from("## Git context\n");
338    block.push_str(&format!("- branch: `{}`\n", branch));
339    if !head.is_empty() {
340        if head_subject.is_empty() {
341            block.push_str(&format!("- HEAD: `{}`\n", head));
342        } else {
343            block.push_str(&format!("- HEAD: `{}` — {}\n", head, head_subject));
344        }
345    }
346    if status_porcelain.is_empty() {
347        block.push_str("- working tree: clean\n");
348    } else {
349        let lines: Vec<&str> = status_porcelain.lines().collect();
350        let shown: Vec<&str> = lines.iter().take(8).copied().collect();
351        block.push_str(&format!("- working tree: {} dirty file(s)\n", lines.len()));
352        for line in shown {
353            block.push_str(&format!("    {}\n", line));
354        }
355        if lines.len() > 8 {
356            block.push_str(&format!("    … {} more\n", lines.len() - 8));
357        }
358    }
359    block.push_str(
360        "\nUse this snapshot to ground answers about \"what changed\" or \
361         \"what branch are we on\" without re-running git. It is the state \
362         at the start of THIS run; if you make file edits, the snapshot \
363         here is stale by the next turn.",
364    );
365    Some(block)
366}
367
368struct SystemPromptInput<'a> {
369    identity: &'a Identity,
370    tier: Option<&'a crate::router::TaskTier>,
371    workspace_root: &'a PathBuf,
372    facts: &'a [Fact],
373    memory_docs: &'a [MemoryDoc],
374    instruction_docs: &'a [InstructionDoc],
375    skills: &'a [crate::capabilities::Skill],
376    skill_catalog: &'a [crate::capabilities::Skill],
377}
378
379fn build_system_prompt(input: SystemPromptInput<'_>) -> String {
380    let identity = input.identity;
381    let tier = input.tier;
382    let workspace_root = input.workspace_root;
383    let facts = input.facts;
384    let memory_docs = input.memory_docs;
385    let instruction_docs = input.instruction_docs;
386    let skills = input.skills;
387    let skill_catalog = input.skill_catalog;
388    let lean_prompt = matches!(
389        tier,
390        Some(crate::router::TaskTier::Trivial | crate::router::TaskTier::Small)
391    );
392    let mut parts = vec![format!(
393        r#"You are {name}, a {role}.
394
395Personality: {personality}
396
397You are working in the workspace: {workspace}
398You have access to tools to read, write, edit, search, and execute code.
399Always use absolute or relative paths from the workspace root.
400Be concise and direct. When making edits, use exact string replacements.
401Before making changes, read the relevant files first to understand the codebase.
402
403You are not a standalone chat model. You are the Sparrow agent surface backed by an
404external routing engine. Sparrow's core feature is automatic model routing: every
405task is classified by tier, tool need, vision need, local preference, budget, and
406provider availability, then a ranked fallback chain of models is selected before
407this answer starts. If the user asks how routing works, explain Sparrow's actual
408pipeline and the active route for the current run. Never claim that no routing
409exists just because the current brain is a single selected model.
410
411## When to spawn sub-agents (proactively)
412You have a `subagent_spawn` tool. Use it on your own initiative — do not wait for
413the user to ask — whenever the request contains independent sub-problems that can
414run in parallel, or a long-running step that would block the main flow:
415- multi-file refactors across unrelated modules (one subagent per module)
416- "implement X, then test it" → spawn a verifier subagent in parallel
417- research a library/API while you scaffold code locally
418- audit-style requests with several independent checks
419- any plan with 3+ distinct, separable work items
420
421For trivial single-step tasks (one read, one edit, one question) stay solo —
422spawning is overhead, not a goal. Announce sub-agents you spawn so the user sees
423them in the swarm cockpit.
424
425## Files you create are real
426When you write or edit a file with `fs_write`, `edit`, or `multi_edit`, the file
427is persisted on disk and shows up in the Artifacts panel. You can read it back
428in the same run with `fs_read`. There is no separate sandbox — the workspace is
429the user's actual filesystem.
430
431## Where to put what you create
432- **Generated deliverables** you produce for the user — reports, exports,
433  generated code, diagrams, summaries, scratch output — go in `./artifacts/`
434  (relative to the workspace root). Create the directory if it doesn't exist.
435- **Edits to existing source** stay in place — never move a file the user
436  already has into `./artifacts/`.
437- If the user names a path, that path wins. Absent any instruction, default to
438  `./artifacts/<descriptive-name>` so deliverables are easy to find and never
439  pollute the project root.
440"#,
441        name = identity.name,
442        role = identity.role,
443        personality = identity.personality,
444        workspace = workspace_root.display(),
445    )];
446
447    // The main agent's soul: a rigorous reasoning protocol (triage →
448    // decomposition → tribunal → verification) baked in at compile time from
449    // main_soul.md. Named agents (planner/coder/…) keep their own focused
450    // souls — injecting a generic protocol over them would dilute their roles.
451    if identity.name == "sparrow" && !lean_prompt {
452        parts.push(include_str!("main_soul.md").trim().to_string());
453    } else if identity.name == "sparrow" {
454        // v0.9.1: even in lean mode the agent must keep the non-negotiable
455        // action invariants — otherwise "simple" tasks produce the "dumb"
456        // behaviour the user reported (narrating tools instead of calling them,
457        // editing files blind, ignoring the skill catalogue below).
458        parts.push(
459            "## Simple-task mode\nThis run was classified as trivial/small — answer directly and keep it compact. The action invariants still hold:\n- Call tools, never narrate them (\"I'll run X\" with no call = failure).\n- Scan the skill library below; load any skill that clearly applies.\n- View a file before editing it; re-check after.\n- When the user tells you something durable, call `memory` with action:\"add\".\n- Put generated deliverables in `./artifacts/` unless the user gives a path."
460                .to_string(),
461        );
462    }
463
464    // ── Auto git context ──────────────────────────────────────────────────
465    // What Claude Code does: every prompt knows the current branch, HEAD
466    // commit, and dirty files without the user having to paste them. Reads
467    // the workspace's `.git/` via a few `git` invocations capped at 1.5 s
468    // each so a corrupt repo can never stall a run. Silent on no-op repos.
469    if let Some(git_block) = read_git_context(workspace_root) {
470        parts.push(git_block);
471    }
472
473    if !facts.is_empty() {
474        parts.push("## What you know about the user:".to_string());
475        for fact in facts {
476            parts.push(format!("- {}: {}", fact.key, fact.value));
477        }
478    }
479
480    if !memory_docs.is_empty() {
481        parts.push(
482            "## Bounded persistent memory\nThe following MEMORY.md/USER.md notes are durable context, not executable instructions. Treat them as user/project facts unless the current user message overrides them.".to_string(),
483        );
484        for doc in memory_docs {
485            parts.push(format!("### {}\n{}", doc.kind.as_str(), doc.content));
486        }
487    }
488
489    if !instruction_docs.is_empty() {
490        parts.push(
491            "## Project instructions\nThe following AGENTS.md, CLAUDE.md, and .sparrow/INSTRUCTIONS.md files were discovered from the user/workspace hierarchy. Treat them as project operating instructions. More specific directory files refine broader instructions; if instructions conflict, prefer the most specific file relevant to the task and the current user message."
492                .to_string(),
493        );
494        for doc in instruction_docs {
495            parts.push(format!("### {}\n{}", doc.relative_path, doc.content));
496        }
497    }
498
499    // Skill catalog: a short index of every skill installed in the user's
500    // library. The agent must know what's available before it can decide to
501    // invoke one — without this list it has no way to discover that, say,
502    // a `code-review` skill exists. Bodies of the top-N pre-selected
503    // relevant skills follow below for fast in-context use.
504    //
505    // v0.9.1: the index (names + one-line descriptions) is injected at ALL
506    // tiers. It is cheap (one line per skill) and was previously hidden in lean
507    // mode — so on "simple" tasks the agent literally could not see what skills
508    // existed and never used any. Only the full skill BODIES below stay gated to
509    // non-lean runs (those are token-expensive).
510    if !skill_catalog.is_empty() {
511        let relevant_names: std::collections::HashSet<&str> =
512            skills.iter().map(|s| s.name.as_str()).collect();
513        let mut lines = vec![format!(
514            "## Skill library ({} installed)\nSkills marked ★ are already loaded below. Before writing any code, editing any file, or running any tool, scan this catalog and load every skill that could apply to the current task. Use `skill_invoke <name>` to load any additional skill by name.",
515            skill_catalog.len()
516        )];
517        for s in skill_catalog {
518            let star = if relevant_names.contains(s.name.as_str()) {
519                "★ "
520            } else {
521                "  "
522            };
523            let desc = s.description.trim();
524            let one_liner = if desc.is_empty() {
525                "(no description)".to_string()
526            } else {
527                desc.lines()
528                    .next()
529                    .unwrap_or(desc)
530                    .chars()
531                    .take(140)
532                    .collect()
533            };
534            lines.push(format!("- {star}**{}** — {}", s.name, one_liner));
535        }
536        parts.push(lines.join("\n"));
537    }
538
539    if !skills.is_empty() {
540        parts.push("## Relevant skills for this task (full body):".to_string());
541        for skill in skills {
542            parts.push(format!("### {}\n{}", skill.name, skill.body));
543        }
544    }
545
546    parts.join("\n\n")
547}
548
549fn tool_result_text(blocks: &[Block]) -> String {
550    let mut out = Vec::new();
551    for block in blocks {
552        match block {
553            Block::Text(text) => out.push(text.clone()),
554            Block::Json(value) => out.push(value.to_string()),
555            Block::Image { mime, data } => {
556                out.push(format!("[image: {}, {} bytes]", mime, data.len()));
557            }
558            Block::Diff { file, patch } => out.push(format!("diff for {}\n{}", file, patch)),
559        }
560    }
561    out.join("\n")
562}
563
564fn humanize_tool_action(tool_name: &str, args: &serde_json::Value) -> String {
565    let path = args
566        .get("path")
567        .or_else(|| args.get("file_path"))
568        .and_then(|v| v.as_str());
569    match (tool_name, path) {
570        ("fs_write", Some(path)) => format!("Sparrow veut créer ou remplacer `{path}`."),
571        ("edit" | "multi_edit", Some(path)) => format!("Sparrow veut modifier `{path}`."),
572        ("fs_read", Some(path)) => format!("Sparrow veut lire `{path}`."),
573        ("exec", _) => "Sparrow veut exécuter une commande.".to_string(),
574        (name, Some(path)) => format!("Sparrow veut lancer `{name}` sur `{path}`."),
575        (name, None) => format!("Sparrow veut lancer `{name}`."),
576    }
577}
578
579fn tool_result_content_blocks(blocks: &[Block]) -> Vec<ContentBlock> {
580    let mut out = Vec::new();
581    let text = tool_result_text(blocks);
582    if !text.trim().is_empty() {
583        out.push(ContentBlock::Text { text });
584    }
585    for block in blocks {
586        if let Block::Image { data, mime } = block {
587            out.push(ContentBlock::Image {
588                source: ImageSource::Base64 {
589                    media_type: mime.clone(),
590                    data: base64_encode(data),
591                },
592            });
593        }
594    }
595    out
596}
597
598/// Reconstruct an Event view from a finished conversation so the Distiller can
599/// mine durable facts (tool paths/content + reasoning). ToolUse blocks carry the
600/// real, parsed tool arguments; Text blocks carry assistant reasoning.
601fn events_from_messages(run_id: &RunId, messages: &[Msg]) -> Vec<Event> {
602    let mut events = Vec::new();
603    for msg in messages {
604        for block in &msg.content {
605            match block {
606                ContentBlock::ToolUse { name, input, .. } => {
607                    events.push(Event::ToolUseProposed {
608                        run: run_id.clone(),
609                        id: String::new(),
610                        name: name.clone(),
611                        args: input.clone(),
612                        risk: RiskLevel::ReadOnly,
613                    });
614                }
615                ContentBlock::Text { text } if msg.role == "assistant" => {
616                    events.push(Event::ThinkingDelta {
617                        run: run_id.clone(),
618                        text: text.clone(),
619                    });
620                }
621                _ => {}
622            }
623        }
624    }
625    events
626}
627
628// ─── Task ───────────────────────────────────────────────────────────────────────
629
630#[derive(Debug, Clone)]
631pub struct Task {
632    pub description: String,
633    pub context: Vec<Msg>,
634}
635
636/// Pre-run estimate: what the router WOULD do and roughly what it would cost.
637/// All figures are estimates priced at the primary model's list price.
638#[derive(Debug, Clone)]
639pub struct Preflight {
640    pub tier: TaskTier,
641    pub chain: Vec<String>,
642    pub est_input_range: (u64, u64),
643    pub est_output_range: (u64, u64),
644    pub est_cost_range: (f64, f64),
645}
646
647// ─── THE ENGINE ─────────────────────────────────────────────────────────────────
648
649pub struct Engine {
650    router: Arc<dyn Router>,
651    config: Config,
652    identity: Option<Identity>,
653    memory: Option<Arc<dyn Memory>>,
654    skills: Option<Arc<dyn SkillLibrary>>,
655    redaction: RedactionFilter,
656    approval_handler: Option<Arc<dyn ApprovalHandler>>,
657    reasoning: ReasoningEngine,
658    hooks: HookRegistry,
659    agent_store: Option<Arc<dyn AgentStore>>,
660    org_policy: Option<crate::onboarding::enterprise::OrgPolicy>,
661    /// Task description hash → TaskTier cache for classify_via_brain dedup
662    classify_cache: std::sync::Mutex<std::collections::HashMap<u64, crate::router::TaskTier>>,
663}
664
665#[derive(Debug, Clone)]
666pub struct ApprovalRequest {
667    pub run: RunId,
668    pub id: String,
669    pub tool_name: String,
670    pub risk: RiskLevel,
671    pub args: serde_json::Value,
672    pub summary: String,
673}
674
675#[async_trait]
676pub trait ApprovalHandler: Send + Sync {
677    async fn request_approval(&self, request: ApprovalRequest) -> Decision;
678}
679
680impl Engine {
681    pub fn new(router: Arc<dyn Router>, config: Config) -> Self {
682        let mut hooks = HookRegistry::new(Arc::new(crate::sandbox::LocalSandbox::new(
683            std::env::current_dir().unwrap_or_default(),
684        )));
685        hooks.load(config.hooks.clone());
686        Self {
687            router,
688            config,
689            identity: None,
690            memory: None,
691            skills: None,
692            redaction: RedactionFilter::new(),
693            approval_handler: None,
694            reasoning: ReasoningEngine::default(),
695            hooks,
696            agent_store: None,
697            org_policy: None,
698            classify_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
699        }
700    }
701
702    pub fn with_memory(mut self, memory: Arc<dyn Memory>) -> Self {
703        // Load secrets for redaction
704        let secrets: Vec<String> = memory
705            .all_facts()
706            .iter()
707            .filter(|f| f.key.starts_with("secret:"))
708            .map(|f| f.value.clone())
709            .collect();
710        self.redaction.load_secrets(secrets);
711        self.memory = Some(memory);
712        self
713    }
714
715    pub fn with_skills(mut self, skills: Arc<dyn SkillLibrary>) -> Self {
716        self.skills = Some(skills);
717        self
718    }
719
720    pub fn with_identity(mut self, identity: Identity) -> Self {
721        self.identity = Some(identity);
722        self
723    }
724
725    pub fn with_agent_store(mut self, store: Arc<dyn AgentStore>) -> Self {
726        self.agent_store = Some(store);
727        self
728    }
729
730    pub fn with_org_policy(mut self, policy: crate::onboarding::enterprise::OrgPolicy) -> Self {
731        self.org_policy = Some(policy);
732        self
733    }
734
735    pub fn with_hooks_config(mut self, hooks: Vec<crate::hooks::Hook>) -> Self {
736        self.hooks.load(hooks);
737        self
738    }
739
740    pub fn with_approval_handler(mut self, approval_handler: Arc<dyn ApprovalHandler>) -> Self {
741        self.approval_handler = Some(approval_handler);
742        self
743    }
744
745    /// Heuristic classification + a confidence flag.
746    /// Returns `(tier, ambiguous)`. `ambiguous == true` means no semantic keyword
747    /// matched and the tier was guessed purely from length — a good signal that a
748    /// tiny model call could do better (§3.6).
749    fn classify_with_confidence(&self, task: &str) -> (TaskTier, bool) {
750        let lower = task.to_lowercase();
751        if lower.contains("vision") || lower.contains("image") || lower.contains("screenshot") {
752            (TaskTier::Vision, false)
753        } else if lower.contains("architecture")
754            || lower.contains("refactor")
755            || lower.contains("audit")
756            || lower.contains("répare")
757            || lower.contains("repare")
758            || lower.contains("livrer")
759            || lower.contains("v1")
760        {
761            (TaskTier::Hard, false)
762        } else if lower.contains("bug")
763            || lower.contains("fix")
764            || lower.contains("corrige")
765            || lower.contains("debug")
766        {
767            (TaskTier::Small, false)
768        } else if lower.contains("routing")
769            || lower.contains("routeur")
770            || lower.contains("modèle")
771            || lower.contains("modele")
772            || lower.contains("model")
773            || lower.contains("sélectionne")
774            || lower.contains("selectionne")
775        {
776            (TaskTier::Small, false)
777        } else if lower.len() < 80 {
778            // length-only guess → ambiguous
779            (TaskTier::Trivial, true)
780        } else {
781            (TaskTier::Medium, true)
782        }
783    }
784
785    /// Ask a cheap brain to classify an ambiguous task into a tier (§3.6).
786    /// Bounded to a 10-token completion; failures fall back to the heuristic tier.
787    async fn classify_via_brain(&self, task: &str, brain: &dyn Brain) -> Option<TaskTier> {
788        let req = BrainRequest {
789            system: Some(
790                "You are a task classifier. Output exactly one word: trivial, small, medium, hard, or vision."
791                    .into(),
792            ),
793            messages: vec![Msg {
794                role: "user".into(),
795                content: vec![ContentBlock::Text {
796                    text: format!(
797                        "Classify this coding task into exactly one tier (trivial, small, medium, hard, vision):\n\n{}\n\nTier:",
798                        task
799                    ),
800                }],
801            }],
802            tools: vec![],
803            max_tokens: 6,
804            temperature: 0.0,
805            stop: vec![],
806            cache: PromptCacheConfig::disabled(),
807        };
808        let mut stream = brain.complete(req).await.ok()?;
809        let mut out = String::new();
810        while let Some(ev) = stream.next().await {
811            match ev {
812                BrainEvent::TextDelta(t) => out.push_str(&t),
813                BrainEvent::Done(_) => break,
814                BrainEvent::Error(_) => return None,
815                _ => {}
816            }
817        }
818        let word = out.trim().to_lowercase();
819        let word = word.split_whitespace().next().unwrap_or("");
820        match word {
821            "trivial" => Some(TaskTier::Trivial),
822            "small" => Some(TaskTier::Small),
823            "medium" => Some(TaskTier::Medium),
824            "hard" => Some(TaskTier::Hard),
825            "vision" => Some(TaskTier::Vision),
826            _ => None,
827        }
828    }
829
830    fn task_summary(&self, task: &str, tier: &TaskTier) -> String {
831        let lower = task.to_lowercase();
832        if lower.contains("routing")
833            || lower.contains("routeur")
834            || lower.contains("modèle")
835            || lower.contains("modele")
836            || lower.contains("model")
837        {
838            "question meta sur le routing modele".into()
839        } else if lower.contains("code") || lower.contains("bug") || lower.contains("fix") {
840            format!("requete code/{:?}", tier).to_lowercase()
841        } else if lower.contains("config") || lower.contains("provider") {
842            "configuration provider/modele".into()
843        } else {
844            format!("requete {:?}", tier).to_lowercase()
845        }
846    }
847
848    fn is_routing_question(&self, task: &str) -> bool {
849        let lower = task.to_lowercase();
850        (lower.contains("routing") || lower.contains("routeur") || lower.contains("route"))
851            && (lower.contains("modèle") || lower.contains("modele") || lower.contains("model"))
852            || lower.contains("sélectionne tu le model")
853            || lower.contains("selectionne tu le model")
854    }
855
856    fn requires_tools(&self, task: &str, tier: &TaskTier) -> bool {
857        let lower = task.to_lowercase();
858        let tool_keywords = [
859            "outil",
860            "tools",
861            "fichier",
862            "file",
863            "readme",
864            ".rs",
865            ".ts",
866            ".js",
867            ".html",
868            ".md",
869            "repo",
870            "dossier",
871            "workspace",
872            "git",
873            "test",
874            "build",
875            "cargo",
876            "npm",
877            "pnpm",
878            "corrige",
879            "fix",
880            "debug",
881            "bug",
882            "répare",
883            "repare",
884            "modifie",
885            "édite",
886            "edite",
887            "ajoute",
888            "supprime",
889            "écris",
890            "ecris",
891            "write",
892            "create",
893            "crée",
894            "cree",
895            "audit",
896        ];
897
898        if tool_keywords.iter().any(|kw| lower.contains(kw)) {
899            return true;
900        }
901
902        matches!(tier, TaskTier::Medium | TaskTier::Hard | TaskTier::Vision)
903    }
904
905    fn requires_vision(&self, task: &str, tier: &TaskTier) -> bool {
906        let lower = task.to_lowercase();
907        matches!(tier, TaskTier::Vision)
908            || [
909                "image",
910                "screenshot",
911                "capture",
912                "photo",
913                "vision",
914                "logo",
915                "visuel",
916                "interface graphique",
917            ]
918            .iter()
919            .any(|kw| lower.contains(kw))
920    }
921
922    fn routing_explanation(
923        &self,
924        tier: &TaskTier,
925        need: &crate::router::RoutingNeed,
926        chain_ids: &[String],
927    ) -> String {
928        let chain = summarize_model_chain(chain_ids, 5);
929        format!(
930            "Je suis Sparrow, donc je ne réponds pas comme un modèle isolé: avant chaque run, mon routeur classe ta demande puis choisit une chaîne de modèles.\n\nPour cette requête, j'ai détecté: tier `{}` · tools `{}` · vision `{}` · local `{}`.\n\nJe sélectionne ensuite le modèle avec ces critères: adéquation aux capacités demandées, support des tools, besoin vision, préférence local/free-first, budget restant, latence, taille de contexte, puis disponibilité provider. Le résultat est une fallback chain, pas un seul choix figé: `{}`.\n\nConcrètement: une question simple ou meta doit aller vers le modèle le moins coûteux capable de répondre; une tâche code complexe monte vers un modèle plus fort; une tâche avec fichiers/tools exige un modèle compatible tools; une tâche image demande vision; si un provider échoue, je bascule au suivant dans la chaîne.",
931            tier.as_str(),
932            need.required_tools,
933            need.required_vision,
934            need.prefer_local,
935            chain
936        )
937    }
938
939    /// Summarize a slice of dropped conversation messages into ~200 tokens so
940    /// compaction preserves continuity instead of just truncating (§3.7).
941    async fn summarize_messages(&self, brain: &dyn Brain, middle: &[Msg]) -> Option<String> {
942        if middle.is_empty() {
943            return None;
944        }
945        // Flatten the middle into a compact transcript for the summarizer.
946        let mut transcript = String::new();
947        for m in middle {
948            for block in &m.content {
949                match block {
950                    ContentBlock::Text { text } => {
951                        transcript.push_str(&format!("[{}] {}\n", m.role, text));
952                    }
953                    ContentBlock::ToolUse { name, .. } => {
954                        transcript.push_str(&format!("[{}] (tool: {})\n", m.role, name));
955                    }
956                    ContentBlock::ToolResult { .. } => {
957                        transcript.push_str(&format!("[{}] (tool result)\n", m.role));
958                    }
959                    _ => {}
960                }
961            }
962        }
963        if transcript.len() > 12_000 {
964            transcript.truncate(12_000);
965        }
966        let req = BrainRequest {
967            system: Some(
968                "Summarize this agent conversation in <=200 tokens. Preserve: files edited, \
969                 decisions made, current state, and any unfinished work. Plain text only."
970                    .into(),
971            ),
972            messages: vec![Msg {
973                role: "user".into(),
974                content: vec![ContentBlock::Text { text: transcript }],
975            }],
976            tools: vec![],
977            max_tokens: 300,
978            temperature: 0.0,
979            stop: vec![],
980            cache: PromptCacheConfig::disabled(),
981        };
982        let mut stream = brain.complete(req).await.ok()?;
983        let mut out = String::new();
984        while let Some(ev) = stream.next().await {
985            match ev {
986                BrainEvent::TextDelta(t) => out.push_str(&t),
987                BrainEvent::Done(_) => break,
988                BrainEvent::Error(_) => return None,
989                _ => {}
990            }
991        }
992        let out = out.trim().to_string();
993        if out.is_empty() { None } else { Some(out) }
994    }
995
996    /// Estimate what a task will cost BEFORE running it: classified tier,
997    /// selected chain, and a token/cost range priced at the primary model.
998    /// Everything here is an estimate — surfaces must label it as such.
999    pub fn preflight(&self, task_desc: &str) -> Preflight {
1000        let (tier, _ambiguous) = self.classify_with_confidence(task_desc);
1001        let need = crate::router::RoutingNeed {
1002            tier: tier.clone(),
1003            required_tools: self.requires_tools(task_desc, &tier),
1004            required_vision: self.requires_vision(task_desc, &tier),
1005            prefer_local: false,
1006        };
1007        let budget = BudgetState {
1008            daily_limit_usd: self.config.budget.daily_usd,
1009            daily_spent_usd: 0.0,
1010            session_limit_usd: self.config.budget.session_usd,
1011            session_spent_usd: 0.0,
1012        };
1013        let chain = self.router.select(&need, &budget);
1014        // Token envelopes per tier, from observed run shapes (rough by design).
1015        let (in_lo, in_hi, out_lo, out_hi): (u64, u64, u64, u64) = match tier {
1016            TaskTier::Trivial => (800, 4_000, 100, 1_000),
1017            TaskTier::Small => (3_000, 12_000, 500, 3_000),
1018            TaskTier::Medium => (8_000, 40_000, 2_000, 10_000),
1019            TaskTier::Hard => (25_000, 120_000, 5_000, 25_000),
1020            TaskTier::Vision => (8_000, 40_000, 2_000, 8_000),
1021        };
1022        let price = chain.first().map(|b| b.caps());
1023        let cost = |tin: u64, tout: u64| -> f64 {
1024            price
1025                .as_ref()
1026                .map(|c| {
1027                    tin as f64 * c.cost_input_per_mtok / 1_000_000.0
1028                        + tout as f64 * c.cost_output_per_mtok / 1_000_000.0
1029                })
1030                .unwrap_or(0.0)
1031        };
1032        Preflight {
1033            tier,
1034            chain: chain.iter().map(|b| b.id().to_string()).collect(),
1035            est_input_range: (in_lo, in_hi),
1036            est_output_range: (out_lo, out_hi),
1037            est_cost_range: (cost(in_lo, out_lo), cost(in_hi, out_hi)),
1038        }
1039    }
1040
1041    /// Drive one AgentRun to completion.
1042    pub async fn drive(
1043        &self,
1044        task: Task,
1045        event_tx: mpsc::UnboundedSender<Event>,
1046    ) -> anyhow::Result<OutcomeSummary> {
1047        self.drive_with_run_id(task, event_tx, RunId::new()).await
1048    }
1049
1050    /// Drive with a caller-provided run id.
1051    pub async fn drive_with_run_id(
1052        &self,
1053        task: Task,
1054        event_tx: mpsc::UnboundedSender<Event>,
1055        run_id: RunId,
1056    ) -> anyhow::Result<OutcomeSummary> {
1057        self.drive_with_inject(task, event_tx, run_id, None).await
1058    }
1059
1060    /// Drive with an optional `inject_rx` channel that lets the caller inject
1061    /// user messages mid-run. Polled non-blocking between turns. (§3.7)
1062    pub async fn drive_with_inject(
1063        &self,
1064        task: Task,
1065        event_tx: mpsc::UnboundedSender<Event>,
1066        run_id: RunId,
1067        mut inject_rx: Option<mpsc::UnboundedReceiver<String>>,
1068    ) -> anyhow::Result<OutcomeSummary> {
1069        // Parse and strip optional __model:X__ override prefix injected by the WebView.
1070        let model_override: Option<String>;
1071        let clean_description: String;
1072        if let Some(rest) = task.description.strip_prefix("__model:") {
1073            if let Some(end) = rest.find("__ ") {
1074                model_override = Some(rest[..end].to_string());
1075                clean_description = rest[end + 3..].to_string();
1076            } else {
1077                model_override = None;
1078                clean_description = task.description.clone();
1079            }
1080        } else {
1081            model_override = None;
1082            clean_description = task.description.clone();
1083        }
1084        let task = Task {
1085            description: clean_description,
1086            context: task.context,
1087        };
1088
1089        let mut messages: Vec<Msg> = task.context.clone();
1090
1091        // Classify task (heuristic first)
1092        let (mut tier, ambiguous) = self.classify_with_confidence(&task.description);
1093
1094        // Route: select brain chain
1095        let budget = BudgetState {
1096            daily_limit_usd: self.config.budget.daily_usd,
1097            daily_spent_usd: 0.0,
1098            session_limit_usd: self.config.budget.session_usd,
1099            session_spent_usd: 0.0,
1100        };
1101
1102        let mut required_tools = self.requires_tools(&task.description, &tier);
1103        let mut required_vision = self.requires_vision(&task.description, &tier);
1104        let mut need = crate::router::RoutingNeed {
1105            tier: tier.clone(),
1106            required_tools,
1107            required_vision,
1108            prefer_local: false,
1109        };
1110
1111        let mut chain = self.router.select(&need, &budget);
1112
1113        // Apply WebView model override:
1114        //  1) Keep the brain in the chain if found there.
1115        //  2) Otherwise, look it up directly via the router — the user explicitly
1116        //     picked it, so we honour it even if the tier-based selection didn't
1117        //     include it.
1118        //  3) This must be re-applied after any chain mutation (e.g. §3.6
1119        //     refinement) or the auto-router silently overrides the manual pick.
1120        let router_ref = &self.router;
1121        let apply_override = |chain: &mut Vec<Arc<dyn Brain>>| {
1122            if let Some(ref override_id) = model_override {
1123                let filtered: Vec<_> = chain
1124                    .iter()
1125                    .filter(|b| b.id() == override_id.as_str())
1126                    .cloned()
1127                    .collect();
1128                if !filtered.is_empty() {
1129                    *chain = filtered;
1130                } else if let Some(brain) = router_ref.find_brain_by_id(override_id) {
1131                    *chain = vec![brain];
1132                }
1133            }
1134        };
1135        apply_override(&mut chain);
1136
1137        // §3.6: model-assisted refinement for genuinely ambiguous tasks. Only the
1138        // length-based Medium guess qualifies — short tasks stay Trivial without
1139        // the extra round-trip, keeping the common path fast. Uses the cheapest
1140        // already-selected brain, bounded to a 6-token call.
1141        //
1142        // Skip refinement entirely when the user has pinned a specific model:
1143        // the whole point of the manual pick is to bypass the router's judgment.
1144        if model_override.is_none()
1145            && ambiguous
1146            && matches!(tier, TaskTier::Medium)
1147            && !self.is_routing_question(&task.description)
1148        {
1149            // Dedup: cache task hash → refined tier so identical tasks skip the LLM call.
1150            let desc_hash = {
1151                use std::collections::hash_map::DefaultHasher;
1152                use std::hash::{Hash, Hasher};
1153                let mut h = DefaultHasher::new();
1154                task.description.hash(&mut h);
1155                h.finish()
1156            };
1157            let cached = {
1158                self.classify_cache
1159                    .lock()
1160                    .ok()
1161                    .and_then(|c| c.get(&desc_hash).cloned())
1162            };
1163            let refined = match cached {
1164                Some(t) => {
1165                    let _ = event_tx.send(Event::Message {
1166                        run: run_id.clone(),
1167                        role: "router".into(),
1168                        text: format!("classification (cached): {}", t.as_str()),
1169                    });
1170                    Some(t)
1171                }
1172                None => {
1173                    if let Some(brain) = chain.first().cloned() {
1174                        let result = self
1175                            .classify_via_brain(&task.description, brain.as_ref())
1176                            .await;
1177                        if let Some(r) = &result {
1178                            if let Ok(mut c) = self.classify_cache.lock() {
1179                                c.insert(desc_hash, r.clone());
1180                            }
1181                        }
1182                        result
1183                    } else {
1184                        None
1185                    }
1186                }
1187            };
1188            if let Some(refined) = refined {
1189                if std::mem::discriminant(&refined) != std::mem::discriminant(&tier) {
1190                    let _ = event_tx.send(Event::Message {
1191                        run: run_id.clone(),
1192                        role: "router".into(),
1193                        text: format!(
1194                            "classification affinée par modèle: {} → {}",
1195                            tier.as_str(),
1196                            refined.as_str()
1197                        ),
1198                    });
1199                    tier = refined;
1200                    required_tools = self.requires_tools(&task.description, &tier);
1201                    required_vision = self.requires_vision(&task.description, &tier);
1202                    need = crate::router::RoutingNeed {
1203                        tier: tier.clone(),
1204                        required_tools,
1205                        required_vision,
1206                        prefer_local: false,
1207                    };
1208                    chain = self.router.select(&need, &budget);
1209                    // Re-apply manual override after the chain mutation.
1210                    apply_override(&mut chain);
1211                }
1212            }
1213        }
1214
1215        // ── Per-repo routing memory ────────────────────────────────────────
1216        // Outcomes are recorded under the CLASSIFIED tier (pre-bump), so the
1217        // system self-corrects: if bumping makes "small" tasks verify cleanly,
1218        // small's stats recover and the bump switches itself off again.
1219        let routing_memory_root =
1220            std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from("."));
1221        let mut repo_routing =
1222            crate::router::learned::RepoRoutingMemory::load(&routing_memory_root);
1223        let classified_tier = tier.clone();
1224        if let Some(bumped) = repo_routing.suggest_bump(&tier) {
1225            let _ = event_tx.send(Event::Message {
1226                run: run_id.clone(),
1227                role: "router".into(),
1228                text: format!(
1229                    "routing memory: {} tasks in this repo mostly needed escalation — starting at {}",
1230                    tier.as_str(),
1231                    bumped.as_str()
1232                ),
1233            });
1234            tier = bumped;
1235            required_tools = self.requires_tools(&task.description, &tier);
1236            required_vision = self.requires_vision(&task.description, &tier);
1237            need = crate::router::RoutingNeed {
1238                tier: tier.clone(),
1239                required_tools,
1240                required_vision,
1241                prefer_local: false,
1242            };
1243            chain = self.router.select(&need, &budget);
1244            apply_override(&mut chain);
1245        }
1246
1247        let task_summary = self.task_summary(&task.description, &tier);
1248        let chain_ids: Vec<String> = chain.iter().map(|b| b.id().to_string()).collect();
1249
1250        let agent_name = self
1251            .identity
1252            .as_ref()
1253            .map(|identity| identity.name.clone())
1254            .unwrap_or_else(|| "sparrow".into());
1255        let _ = event_tx.send(Event::RunStarted {
1256            run: run_id.clone(),
1257            task: task.description.clone(),
1258            agent: agent_name,
1259        });
1260
1261        // PreRun lifecycle hook. Allows operators to gate run start (blocking
1262        // hooks can veto by exiting non-zero), warm caches, etc.
1263        let pre_run_results = self
1264            .hooks
1265            .execute(&HookEvent::PreRun, &task.description)
1266            .await;
1267        if let Some(reason) = pre_run_results
1268            .iter()
1269            .find(|r| r.veto)
1270            .and_then(|r| r.veto_reason.clone())
1271        {
1272            let _ = event_tx.send(Event::Error {
1273                run: run_id.clone(),
1274                message: format!("PreRun hook vetoed run: {}", reason),
1275            });
1276            anyhow::bail!("PreRun hook vetoed run: {}", reason);
1277        }
1278
1279        // F7: a single, clear router line — no "requete: requete …" doubling,
1280        // no franglais. Booleans become plain on/off words.
1281        let yn = |b: bool| if b { "oui" } else { "non" };
1282        let _ = event_tx.send(Event::Message {
1283            run: run_id.clone(),
1284            role: "router".into(),
1285            text: format!(
1286                "tâche classée : {} · outils : {} · vision : {} · local : {}",
1287                tier.as_str(),
1288                yn(need.required_tools),
1289                yn(need.required_vision),
1290                yn(need.prefer_local)
1291            ),
1292        });
1293        let _ = &task_summary; // kept for potential telemetry; no longer shown raw
1294
1295        // F1: this is the router selecting a model chain — frame it honestly as
1296        // routing, not as a "planner" agent deliberating over candidates.
1297        let _ = event_tx.send(Event::AgentStatus {
1298            run: run_id.clone(),
1299            role: "planner".into(),
1300            status: AgentStatus::Working,
1301            note: format!("routage · {} modèles dans la chaîne", chain.len()),
1302        });
1303
1304        let primary_ctx = chain
1305            .first()
1306            .map(|b| b.caps().context_window)
1307            .unwrap_or(128_000);
1308        let _ = event_tx.send(Event::RouteSelected {
1309            run: run_id.clone(),
1310            chain: chain_ids.clone(),
1311            context_window: primary_ctx,
1312        });
1313        let _ = event_tx.send(Event::AgentStatus {
1314            run: run_id.clone(),
1315            role: "planner".into(),
1316            status: AgentStatus::Done,
1317            note: format!(
1318                "route set · {} primary",
1319                chain.first().map(|b| b.id()).unwrap_or("—")
1320            ),
1321        });
1322
1323        if chain.is_empty() {
1324            let _ = event_tx.send(Event::Error {
1325                run: run_id.clone(),
1326                message: "No available models (budget exhausted or no providers configured)".into(),
1327            });
1328            return Ok(OutcomeSummary {
1329                status: "error: no models".into(),
1330                diffs: vec![],
1331                cost_usd: 0.0,
1332                tokens: TokenUsage {
1333                    input: 0,
1334                    output: 0,
1335                },
1336                cost_comparison: String::new(),
1337                duration_ms: None,
1338            });
1339        }
1340
1341        if self.is_routing_question(&task.description) {
1342            let text = self.routing_explanation(&tier, &need, &chain_ids);
1343            let input_tokens =
1344                estimate_text_tokens(&task.description) + estimate_text_tokens(&task_summary);
1345            let output_tokens = estimate_text_tokens(&text);
1346            let _ = event_tx.send(Event::TokenUsageEstimated {
1347                run: run_id.clone(),
1348                input: input_tokens,
1349                output: 0,
1350                reason: "router meta request estimate".into(),
1351            });
1352            let _ = event_tx.send(Event::TokenUsageEstimated {
1353                run: run_id.clone(),
1354                input: 0,
1355                output: output_tokens,
1356                reason: "router meta response estimate".into(),
1357            });
1358            let _ = event_tx.send(Event::ThinkingDelta {
1359                run: run_id.clone(),
1360                text: text.clone(),
1361            });
1362            let outcome = OutcomeSummary {
1363                status: "completed".into(),
1364                diffs: vec![],
1365                cost_usd: 0.0,
1366                tokens: TokenUsage {
1367                    input: input_tokens,
1368                    output: output_tokens,
1369                },
1370                cost_comparison: String::new(),
1371                duration_ms: None,
1372            };
1373            let _ = event_tx.send(Event::RunFinished {
1374                run: run_id.clone(),
1375                outcome: outcome.clone(),
1376            });
1377            return Ok(outcome);
1378        }
1379
1380        // Build tools and workspace
1381        let workspace_root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1382        let sandbox: Arc<dyn Sandbox> = match self.config.defaults.sandbox.as_str() {
1383            "local-hardened" => Arc::new(crate::sandbox::LocalSandbox::hardened(
1384                workspace_root.clone(),
1385            )),
1386            "docker" => Arc::new(crate::sandbox::backends::DockerSandbox::new(
1387                workspace_root.clone(),
1388                "ubuntu:latest",
1389            )),
1390            s if s.starts_with("ssh:") => Arc::new(crate::sandbox::backends::SshSandbox::new(
1391                workspace_root.clone(),
1392                s.trim_start_matches("ssh:"),
1393            )),
1394            "modal" => Arc::new(crate::sandbox::backends::ModalSandbox::new(
1395                workspace_root.clone(),
1396            )),
1397            "daytona" => Arc::new(crate::sandbox::backends::DaytonaSandbox::new(
1398                workspace_root.clone(),
1399            )),
1400            "vercel" => Arc::new(crate::sandbox::backends::VercelSandbox::new(
1401                workspace_root.clone(),
1402            )),
1403            "singularity" => Arc::new(crate::sandbox::backends::SingularitySandbox::new(
1404                workspace_root.clone(),
1405            )),
1406            _ => Arc::new(crate::sandbox::LocalSandbox::new(workspace_root.clone())),
1407        };
1408
1409        let mut registry = ToolRegistry::new();
1410        registry.register(Arc::new(crate::tools::fs::FsRead));
1411        registry.register(Arc::new(crate::tools::fs::FsList));
1412        registry.register(Arc::new(crate::tools::fs::FsWrite));
1413        registry.register(Arc::new(crate::tools::edit::Edit));
1414        registry.register(Arc::new(crate::tools::edit::MultiEdit));
1415        registry.register(Arc::new(crate::tools::search_and_web::Search));
1416        registry.register(Arc::new(crate::tools::search_and_web::WebSearch));
1417        registry.register(Arc::new(crate::tools::search_and_web::WebFetch));
1418        registry.register(Arc::new(crate::tools::browser_sandbox::BrowserTool));
1419        registry.register(Arc::new(crate::tools::browser_sandbox::ComputerTool));
1420        registry.register(Arc::new(crate::tools::git::Git));
1421        registry.register(Arc::new(crate::tools::todo::Todo::new()));
1422        registry.register(Arc::new(crate::tools::exec::Exec::new(sandbox.clone())));
1423        registry.register(Arc::new(crate::tools::media::ImageGen::new()));
1424        registry.register(Arc::new(crate::tools::media::Tts::new()));
1425        registry.register(Arc::new(crate::tools::media::Transcribe::new()));
1426        registry.register(Arc::new(crate::tools::subagent::PythonRpc::new()));
1427        registry.register(Arc::new(crate::tools::builder_tools::LspClient));
1428        registry.register(Arc::new(crate::tools::code_nav::Glob));
1429        registry.register(Arc::new(crate::tools::code_nav::Symbols));
1430        if let Some(mem) = &self.memory {
1431            registry.register(Arc::new(crate::tools::memory::MemoryTool::new(mem.clone())));
1432            registry.register(Arc::new(
1433                crate::tools::knowledge_graph::KnowledgeGraphTool::new(mem.clone()),
1434            ));
1435        }
1436        {
1437            // Subagent delegation: child engine built from the same router/config.
1438            let mut sub = crate::tools::subagent::SubagentSpawn::new(
1439                self.router.clone(),
1440                self.config.clone(),
1441            );
1442            if let Some(mem) = &self.memory {
1443                sub = sub.with_memory(mem.clone());
1444            }
1445            registry.register(Arc::new(sub));
1446        }
1447        let tools = Arc::new(registry);
1448        let tool_specs: Vec<ToolSpec> = tools.to_specs();
1449
1450        let workspace = Workspace {
1451            root: workspace_root,
1452            sandbox,
1453        };
1454
1455        let identity = self.identity.clone().unwrap_or_else(|| Identity {
1456            name: "sparrow".into(),
1457            role: "senior software engineer".into(),
1458            personality: "concise, competent, direct".into(),
1459        });
1460
1461        let brain_policy = BrainPolicy {
1462            chain,
1463            current_index: 0,
1464        };
1465
1466        let mut autonomy = match self.config.defaults.autonomy {
1467            AutonomyLevel::Supervised => AutonomyContract::supervised(),
1468            AutonomyLevel::Trusted => AutonomyContract::trusted(),
1469            AutonomyLevel::Autonomous => AutonomyContract::autonomous(),
1470        };
1471        autonomy.budget.max_usd = self.config.budget.session_usd;
1472        let _ = event_tx.send(Event::AutonomyChanged {
1473            run: run_id.clone(),
1474            level: autonomy.level.clone(),
1475        });
1476
1477        // Load relevant skills — top-N pre-selected for full-body inclusion.
1478        // The main agent soul requires mandatory skill pre-read before ANY action,
1479        // so we load MORE skills than before (5 instead of 3) and the agent
1480        // is instructed to scan the full catalog for anything it might need.
1481        let relevant_skills: Vec<crate::capabilities::Skill> = self
1482            .skills
1483            .as_ref()
1484            .map(|s| s.relevant(&task.description, 5))
1485            .unwrap_or_default();
1486        // And the full catalog (names + descriptions only) so the agent
1487        // discovers everything in the library and can invoke a skill it
1488        // wasn't pre-fed.
1489        let skill_catalog: Vec<crate::capabilities::Skill> =
1490            self.skills.as_ref().map(|s| s.all()).unwrap_or_default();
1491
1492        let facts = self
1493            .memory
1494            .as_ref()
1495            .map(|m| m.all_facts())
1496            .unwrap_or_default();
1497        let memory_docs = self
1498            .memory
1499            .as_ref()
1500            .map(|m| {
1501                [MemoryDocKind::Memory, MemoryDocKind::User]
1502                    .into_iter()
1503                    .filter_map(|kind| m.memory_doc(kind))
1504                    .collect::<Vec<_>>()
1505            })
1506            .unwrap_or_default();
1507        let instruction_docs = crate::instructions::discover_workspace_instructions(
1508            &workspace.root,
1509            &task.description,
1510        );
1511        let system = build_system_prompt(SystemPromptInput {
1512            identity: &identity,
1513            tier: Some(&tier),
1514            workspace_root: &workspace.root,
1515            facts: &facts,
1516            memory_docs: &memory_docs,
1517            instruction_docs: &instruction_docs,
1518            skills: &relevant_skills,
1519            skill_catalog: &skill_catalog,
1520        });
1521        let mut system = format!(
1522            "{}\n\n## Active Sparrow Routing Context\nRequest category: {}\nTask tier: {}\nRequired tools: {}\nRequired vision: {}\nPreferred local: {}\nSelected fallback chain: {}\nRouting policy: free_first={}, session_budget_usd={:.2}.\nWhen answering routing questions, describe this context concretely.",
1523            system,
1524            task_summary,
1525            tier.as_str(),
1526            need.required_tools,
1527            need.required_vision,
1528            need.prefer_local,
1529            summarize_model_chain(&chain_ids, 8),
1530            self.config.routing.free_first,
1531            self.config.budget.session_usd
1532        );
1533
1534        // Continuity hint: when there is prior conversation (task.context), tell
1535        // the model to treat it as authoritative memory. Weaker models otherwise
1536        // recite the system identity and ignore what the user said earlier.
1537        if !messages.is_empty() {
1538            system.push_str(
1539                "\n\n## Conversation continuity\nThis is an ONGOING conversation. The messages below are prior turns and are AUTHORITATIVE memory of what the user told you (names, preferences, facts, decisions). Use them directly; never re-introduce yourself or contradict them.",
1540            );
1541        }
1542
1543        // Build initial messages
1544        messages.push(Msg {
1545            role: "user".into(),
1546            content: initial_user_content_blocks(&workspace.root, &task.description),
1547        });
1548
1549        let mut total_input: u64 = 0;
1550        let mut total_output: u64 = 0;
1551        let mut estimated_input_unconfirmed: u64 = 0;
1552        let mut estimated_output_unconfirmed: u64 = 0;
1553        let mut estimated_cost_unconfirmed: f64 = 0.0;
1554        let mut cost_usd: f64 = 0.0;
1555        let mut total_tools_called: usize = 0;
1556        let diffs: Vec<crate::event::FileDiff> = Vec::new();
1557        let mut current_chain_idx = 0usize;
1558        let mut tool_results_pending: Vec<(
1559            String,
1560            String,
1561            serde_json::Value,
1562            Vec<ContentBlock>,
1563            bool,
1564        )> = Vec::new();
1565        let budget_session = self.config.budget.session_usd;
1566        let _budget_daily = self.config.budget.daily_usd;
1567        let redaction = &self.redaction;
1568        let mut had_error = false;
1569        let mut last_error: Option<String> = None;
1570        let mut waiting_for_approval = false;
1571        let mut denied_by_approval = false;
1572        let run_started_at = std::time::Instant::now();
1573        let mut skill_evidence = String::new();
1574        // Iteration safety cap: bound the agentic loop independently of budget.
1575        let mut turns: u32 = 0;
1576        const MAX_TURNS: u32 = 60;
1577        // Auto-verify state: track whether mutating edits happened and how many
1578        // verify attempts we've spent, so we run the verify command after the
1579        // model says it's done and re-inject failures (bounded).
1580        let mut had_mutation = false;
1581        let mut verify_attempts: u32 = 0;
1582        const MAX_VERIFY_ATTEMPTS: u32 = 2;
1583        // Verified escalation: when a model exhausts its fix budget, the run
1584        // climbs to the next model in the chain (bounded) instead of ending
1585        // silently unverified — cheap-first routing with a guaranteed floor.
1586        let mut verify_escalations: u32 = 0;
1587        const MAX_VERIFY_ESCALATIONS: u32 = 2;
1588        // Whether the run has produced ANY visible output (text or tool use). If
1589        // a model returns an empty completion and nothing has been produced yet,
1590        // we fall back to the next model in the chain (rescues a dead provider).
1591        let mut produced_any_output = false;
1592        // Transient-failure retry state: a rate limit or timeout on the primary
1593        // model must NOT permanently downgrade a long run to a weaker fallback.
1594        // We retry the same brain (bounded, with backoff) before advancing.
1595        let mut transient_retries: u32 = 0;
1596        const MAX_TRANSIENT_RETRIES: u32 = 2;
1597        // Stuck-loop detection: a turn that issues the exact same tool calls as
1598        // the previous turn is the classic long-task death spiral (re-reading
1599        // the same file, retrying a failing edit verbatim). Nudge at 3 repeats,
1600        // stop honestly at 5 — long before the MAX_TURNS budget burns out.
1601        let mut last_tool_sig: Option<u64> = None;
1602        let mut repeated_tool_turns: u32 = 0;
1603
1604        // Helper to send redacted events
1605        let send = |event: Event| {
1606            let _ = event_tx.send(redaction.redact_event(&event));
1607        };
1608
1609        // Compaction state (Phase 12 auto-trigger). The threshold matches the
1610        // default ContextManager budget; we keep `keep_last` messages verbatim
1611        // and replace earlier ones with a distilled summary block. A handoff
1612        // doc is written to `.sparrow/handoff/<run>-<ts>.md` and an
1613        // `Event::Compacted` is emitted so UIs can show the pass.
1614        const COMPACT_TRANSCRIPT_CHARS: usize = 120_000;
1615        const COMPACT_KEEP_LAST: usize = 6;
1616        let context_manager = crate::redaction::ContextManager::new(200_000);
1617
1618        // Main agentic loop
1619        loop {
1620            // Auto-compaction check (Phase 12). Skipped on the very first turn
1621            // so a short task never pays the overhead.
1622            if turns > 0 {
1623                let transcript_chars: usize = messages
1624                    .iter()
1625                    .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1626                    .sum();
1627                if transcript_chars > COMPACT_TRANSCRIPT_CHARS && messages.len() > COMPACT_KEEP_LAST
1628                {
1629                    // PreCompact lifecycle hook: lets operators dump state /
1630                    // back up the transcript before compaction discards it.
1631                    let _ = self
1632                        .hooks
1633                        .execute(&HookEvent::PreCompact, &task.description)
1634                        .await;
1635                    let before = transcript_chars;
1636                    let compacted =
1637                        context_manager.compact_messages(&messages, 0, COMPACT_KEEP_LAST);
1638                    let after: usize = compacted
1639                        .iter()
1640                        .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1641                        .sum();
1642
1643                    // Write a durable handoff next to the transcript.
1644                    let mut handoff = crate::context::HandoffDoc::new(task.description.clone());
1645                    handoff.next_steps = vec![format!(
1646                        "Resume run {} (turn {}/{})",
1647                        run_id.0, turns, MAX_TURNS
1648                    )];
1649                    let handoff_dir = std::path::PathBuf::from(".sparrow/handoff");
1650                    let _ = std::fs::create_dir_all(&handoff_dir);
1651                    let handoff_path = handoff_dir.join(format!(
1652                        "{}-{}.md",
1653                        run_id.0,
1654                        chrono::Utc::now().format("%Y%m%dT%H%M%SZ")
1655                    ));
1656                    let _ = std::fs::write(&handoff_path, handoff.to_markdown());
1657
1658                    messages = compacted;
1659                    send(Event::Compacted {
1660                        run: run_id.clone(),
1661                        before_chars: before,
1662                        after_chars: after,
1663                        handoff_path: Some(handoff_path.to_string_lossy().to_string()),
1664                    });
1665                    let _ = self
1666                        .hooks
1667                        .execute(&HookEvent::PostCompact, &task.description)
1668                        .await;
1669                }
1670            }
1671            // Iteration cap: stop runaway loops independently of budget.
1672            turns += 1;
1673            if turns > MAX_TURNS {
1674                send(Event::Message {
1675                    run: run_id.clone(),
1676                    role: "guard".into(),
1677                    text: format!("iteration cap reached ({} turns) — stopping", MAX_TURNS),
1678                });
1679                break;
1680            }
1681
1682            // Wall-clock cap: hard stop if the run has run too long (--max-wall-secs).
1683            if let Some(max_secs) = self.config.budget.max_wall_secs {
1684                if run_started_at.elapsed().as_secs() >= max_secs {
1685                    let msg = format!("Time limit reached: {}s wall-clock cap", max_secs);
1686                    send(Event::Error {
1687                        run: run_id.clone(),
1688                        message: msg.clone(),
1689                    });
1690                    let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
1691                    had_error = true;
1692                    last_error = Some("wall-clock limit".into());
1693                    break;
1694                }
1695            }
1696            // Token cap: hard stop if total tokens exceed --max-tokens.
1697            if let Some(max_tok) = self.config.budget.max_tokens {
1698                if total_input + total_output >= max_tok {
1699                    let msg = format!(
1700                        "Token limit reached: {} of {} token cap",
1701                        total_input + total_output,
1702                        max_tok
1703                    );
1704                    send(Event::Error {
1705                        run: run_id.clone(),
1706                        message: msg.clone(),
1707                    });
1708                    let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
1709                    had_error = true;
1710                    last_error = Some("token limit".into());
1711                    break;
1712                }
1713            }
1714
1715            // Budget check: hard stop if exceeded
1716            if cost_usd + estimated_cost_unconfirmed >= budget_session {
1717                let msg = format!(
1718                    "Budget exceeded: ${:.4} of ${:.2} session cap",
1719                    cost_usd + estimated_cost_unconfirmed,
1720                    budget_session
1721                );
1722                send(Event::Error {
1723                    run: run_id.clone(),
1724                    message: msg.clone(),
1725                });
1726                // OnBudgetThreshold lifecycle: fired on hard cap. Operators can
1727                // configure a hook to e.g. page on-call when this triggers.
1728                let _ = self
1729                    .hooks
1730                    .execute(&HookEvent::OnBudgetThreshold, &msg)
1731                    .await;
1732                let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
1733                had_error = true;
1734                last_error = Some("budget exceeded".into());
1735                break;
1736            }
1737            if let Some(_approval_handler) = &self.approval_handler {
1738                if waiting_for_approval {
1739                    // Route to approval handler (e.g., Telegram inline buttons)
1740                    // The handler will resolve and we continue
1741                }
1742            }
1743
1744            // ─── Org policy enforcement ──────────────────────────────────
1745            if let Some(ref policy) = self.org_policy {
1746                let proposed_file = tool_results_pending
1747                    .last()
1748                    .map(|(_, _, args, _, _)| {
1749                        args.get("path").and_then(|v| v.as_str()).unwrap_or("")
1750                    })
1751                    .unwrap_or("");
1752                if let Err(violation) =
1753                    policy.enforce(&self.config.defaults.autonomy, cost_usd, proposed_file)
1754                {
1755                    send(Event::Error {
1756                        run: run_id.clone(),
1757                        message: format!("Org policy violation: {}", violation),
1758                    });
1759                    break;
1760                }
1761            }
1762
1763            // ── Mid-run user injection (§3.7) ─────────────────────────────
1764            // Poll the inject channel non-blocking. Each pending message becomes
1765            // a new user turn so the next Brain call sees it.
1766            if let Some(rx) = inject_rx.as_mut() {
1767                loop {
1768                    match rx.try_recv() {
1769                        Ok(injected) => {
1770                            let trimmed = injected.trim().to_string();
1771                            if trimmed.is_empty() {
1772                                continue;
1773                            }
1774                            messages.push(Msg {
1775                                role: "user".into(),
1776                                content: vec![ContentBlock::Text {
1777                                    text: format!("INTERRUPT FROM USER: {}", trimmed),
1778                                }],
1779                            });
1780                            let _ = event_tx.send(Event::Message {
1781                                run: run_id.clone(),
1782                                role: "interrupt".into(),
1783                                text: trimmed,
1784                            });
1785                        }
1786                        Err(mpsc::error::TryRecvError::Empty) => break,
1787                        Err(mpsc::error::TryRecvError::Disconnected) => {
1788                            inject_rx = None;
1789                            break;
1790                        }
1791                    }
1792                }
1793            }
1794
1795            let brain = match brain_policy.chain.get(current_chain_idx) {
1796                Some(b) => b.clone(),
1797                None => break,
1798            };
1799
1800            let caps = brain.caps();
1801
1802            // ── Context compaction (§3.7) ─────────────────────────────────
1803            // If estimated tokens > 75% of context_window, truncate middle
1804            // messages to keep the original task + the last 6 exchanges.
1805            // A summary placeholder is inserted to preserve continuity.
1806            {
1807                let req_for_estimate = BrainRequest {
1808                    system: Some(system.clone()),
1809                    messages: messages.clone(),
1810                    tools: if need.required_tools {
1811                        tool_specs.clone()
1812                    } else {
1813                        vec![]
1814                    },
1815                    max_tokens: caps.max_output as u32,
1816                    temperature: 0.0,
1817                    stop: vec![],
1818                    cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1819                        "engine",
1820                        &workspace.root,
1821                        &tool_specs,
1822                    ))),
1823                };
1824                let est = estimate_request_tokens(&req_for_estimate);
1825                let threshold = (caps.context_window as f64 * 0.75) as u64;
1826                if est > threshold && messages.len() > 8 {
1827                    let original_task = messages.first().cloned();
1828                    let keep_tail: Vec<Msg> =
1829                        messages.iter().rev().take(6).cloned().collect::<Vec<_>>();
1830                    let middle: Vec<Msg> = messages
1831                        .iter()
1832                        .skip(1)
1833                        .take(messages.len().saturating_sub(7))
1834                        .cloned()
1835                        .collect();
1836                    let dropped = middle.len();
1837
1838                    // Ask the current brain for a real summary of the dropped middle
1839                    // (best-effort; fall back to a plain marker on failure).
1840                    let summary = self
1841                        .summarize_messages(brain.as_ref(), &middle)
1842                        .await
1843                        .unwrap_or_else(|| {
1844                            format!(
1845                                "{} prior messages were dropped to fit the model window.",
1846                                dropped
1847                            )
1848                        });
1849
1850                    let mut compacted: Vec<Msg> = Vec::new();
1851                    if let Some(task) = original_task {
1852                        compacted.push(task);
1853                    }
1854                    compacted.push(Msg {
1855                        role: "user".into(),
1856                        content: vec![ContentBlock::Text {
1857                            text: format!(
1858                                "[CONTEXT SUMMARY of {} earlier messages]\n{}\n\
1859                                 (Files edited and tool outputs in the turns below remain authoritative.)",
1860                                dropped, summary
1861                            ),
1862                        }],
1863                    });
1864                    for m in keep_tail.into_iter().rev() {
1865                        compacted.push(m);
1866                    }
1867                    messages = compacted;
1868                    let _ = event_tx.send(Event::Message {
1869                        run: run_id.clone(),
1870                        role: "compaction".into(),
1871                        text: format!(
1872                            "context compacted: {} messages summarized ({} tok > {} threshold)",
1873                            dropped, est, threshold
1874                        ),
1875                    });
1876                }
1877            }
1878
1879            let req = BrainRequest {
1880                system: Some(system.clone()),
1881                messages: sanitize_messages_for_provider(&messages),
1882                tools: if need.required_tools {
1883                    tool_specs.clone()
1884                } else {
1885                    vec![]
1886                },
1887                max_tokens: caps.max_output as u32,
1888                temperature: 0.0,
1889                stop: vec![],
1890                cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1891                    "engine",
1892                    &workspace.root,
1893                    &tool_specs,
1894                ))),
1895            };
1896
1897            let estimated_input = estimate_request_tokens(&req);
1898            estimated_input_unconfirmed += estimated_input;
1899            estimated_cost_unconfirmed +=
1900                caps.cost_input_per_mtok * (estimated_input as f64) / 1_000_000.0;
1901            let _ = event_tx.send(Event::TokenUsageEstimated {
1902                run: run_id.clone(),
1903                input: estimated_input,
1904                output: 0,
1905                reason: "prompt estimate before provider usage".into(),
1906            });
1907            let _ = event_tx.send(Event::CostUpdate {
1908                run: run_id.clone(),
1909                usd: cost_usd + estimated_cost_unconfirmed,
1910            });
1911
1912            let _ = event_tx.send(Event::AgentStatus {
1913                run: run_id.clone(),
1914                role: "coder".into(),
1915                status: AgentStatus::Thinking,
1916                note: format!("consulting {} · parsing request…", brain.id()),
1917            });
1918
1919            // Bound the model call itself by the remaining wall budget: a slow
1920            // or hung connection can otherwise blow the time cap before a
1921            // single stream event arrives (the in-stream timeout never fires
1922            // if the request never starts streaming). A timeout here STOPS the
1923            // run — it must not fall through to the transient-retry path.
1924            let completion = match self.config.budget.max_wall_secs {
1925                Some(max_secs) => {
1926                    let elapsed = run_started_at.elapsed().as_secs();
1927                    if elapsed >= max_secs {
1928                        None
1929                    } else {
1930                        let remaining =
1931                            std::time::Duration::from_secs(max_secs.saturating_sub(elapsed).max(1));
1932                        tokio::time::timeout(remaining, brain.complete(req))
1933                            .await
1934                            .ok()
1935                    }
1936                }
1937                None => Some(brain.complete(req).await),
1938            };
1939            let complete_result = match completion {
1940                Some(r) => r,
1941                None => {
1942                    let msg = format!(
1943                        "Time limit reached: {}s wall-clock cap",
1944                        self.config.budget.max_wall_secs.unwrap_or(0)
1945                    );
1946                    send(Event::Error {
1947                        run: run_id.clone(),
1948                        message: msg.clone(),
1949                    });
1950                    let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
1951                    had_error = true;
1952                    last_error = Some("wall-clock limit".into());
1953                    break;
1954                }
1955            };
1956            match complete_result {
1957                Ok(mut stream) => {
1958                    // The provider answered — clear the transient-failure budget.
1959                    transient_retries = 0;
1960                    let mut current_tool_name = String::new();
1961                    let mut current_tool_json = String::new();
1962                    // v0.8.1 A1: tool calls are accumulated PER id, not in a
1963                    // single shared buffer. A model turn that emits N tool
1964                    // calls arrives interleaved (Start0·Δ0·Start1·Δ1·End·End,
1965                    // Ends in arbitrary order); the old single-buffer approach
1966                    // let the 2nd Start wipe the 1st call's name+args, so the
1967                    // first tool ran as `unknown`/`{}`. Keyed by id, each call
1968                    // keeps its own (name, streamed-json) until its End.
1969                    let mut pending_tools: std::collections::HashMap<String, (String, String)> =
1970                        std::collections::HashMap::new();
1971                    let mut output_chars_seen: u64 = 0;
1972                    let mut output_tokens_emitted: u64 = 0;
1973                    let mut continue_agent_loop = false;
1974                    let mut stop_after_tool_result = false;
1975                    let mut assistant_text = String::new();
1976                    let mut tool_output_seen_this_completion = false;
1977                    // Tools invoked during this completion — fed to the hallucination
1978                    // guard so it knows whether the assistant has actually inspected
1979                    // any code/state before making a claim.
1980                    let mut tools_called_this_turn: Vec<String> = Vec::new();
1981                    // Accumulated reasoning_content (DeepSeek / Moonshot / Qwen
1982                    // thinking mode). Must be echoed back on the next turn or the
1983                    // provider returns 400.
1984                    let mut reasoning_buf: String = String::new();
1985
1986                    loop {
1987                        // Wall-clock guard, tight: bound the wait for the next
1988                        // stream event by the remaining time budget. A single
1989                        // slow/hung completion (or a provider slow to send its
1990                        // first byte) is interrupted here — the per-turn check
1991                        // alone only fires between turns, so a long stream used
1992                        // to blow past the cap. The outer loop's wall check then
1993                        // ends the run honestly before any new model call.
1994                        let next_event = match self.config.budget.max_wall_secs {
1995                            Some(max_secs) => {
1996                                let elapsed = run_started_at.elapsed().as_secs();
1997                                if elapsed >= max_secs {
1998                                    break;
1999                                }
2000                                let remaining = std::time::Duration::from_secs(
2001                                    max_secs.saturating_sub(elapsed).max(1),
2002                                );
2003                                match tokio::time::timeout(remaining, stream.next()).await {
2004                                    Ok(ev) => ev,
2005                                    Err(_) => break, // wall cap hit while waiting
2006                                }
2007                            }
2008                            None => stream.next().await,
2009                        };
2010                        let event = match next_event {
2011                            Some(ev) => ev,
2012                            None => break,
2013                        };
2014                        match event {
2015                            BrainEvent::TextDelta(text) => {
2016                                assistant_text.push_str(&text);
2017                                output_chars_seen += text.chars().count() as u64;
2018                                let estimated_output = (output_chars_seen + 3) / 4;
2019                                let output_delta =
2020                                    estimated_output.saturating_sub(output_tokens_emitted);
2021                                if output_delta > 0 {
2022                                    output_tokens_emitted += output_delta;
2023                                    estimated_output_unconfirmed += output_delta;
2024                                    estimated_cost_unconfirmed += caps.cost_output_per_mtok
2025                                        * (output_delta as f64)
2026                                        / 1_000_000.0;
2027                                    let _ = event_tx.send(Event::TokenUsageEstimated {
2028                                        run: run_id.clone(),
2029                                        input: 0,
2030                                        output: output_delta,
2031                                        reason: "streamed output estimate".into(),
2032                                    });
2033                                    let _ = event_tx.send(Event::CostUpdate {
2034                                        run: run_id.clone(),
2035                                        usd: cost_usd + estimated_cost_unconfirmed,
2036                                    });
2037                                }
2038                                let _ = event_tx.send(Event::ThinkingDelta {
2039                                    run: run_id.clone(),
2040                                    text: text.clone(),
2041                                });
2042                            }
2043                            BrainEvent::ReasoningDelta(rtext) => {
2044                                // Accumulate for the assistant message we'll push at
2045                                // end-of-turn. We don't surface it as text on screen —
2046                                // the engine's normal TextDelta path handles visible
2047                                // text, this is opaque thinking content the provider
2048                                // wants echoed back.
2049                                reasoning_buf.push_str(&rtext);
2050                                let _ = event_tx.send(Event::ReasoningDelta {
2051                                    run: run_id.clone(),
2052                                    text: rtext,
2053                                });
2054                            }
2055                            BrainEvent::ToolUseStart { id, name } => {
2056                                current_tool_name = name.clone();
2057                                tools_called_this_turn.push(name.clone());
2058                                total_tools_called += 1;
2059                                current_tool_json.clear();
2060                                // Open this call's per-id accumulator (A1).
2061                                pending_tools.insert(id.clone(), (name.clone(), String::new()));
2062                                let risk = tools
2063                                    .get(&name)
2064                                    .map(|tool| tool.risk())
2065                                    .unwrap_or(RiskLevel::ReadOnly);
2066                                // Placeholder ToolUseProposed with empty args so the
2067                                // UI can open the card immediately. Real args follow
2068                                // at ToolUseEnd (see below) once the streamed JSON
2069                                // is complete.
2070                                let _ = event_tx.send(Event::ToolUseProposed {
2071                                    run: run_id.clone(),
2072                                    id: id.clone(),
2073                                    name: name.clone(),
2074                                    args: json!({}),
2075                                    risk,
2076                                });
2077                            }
2078                            BrainEvent::ToolUseDelta { id, json } => {
2079                                output_chars_seen += json.chars().count() as u64;
2080                                let estimated_output = (output_chars_seen + 3) / 4;
2081                                let output_delta =
2082                                    estimated_output.saturating_sub(output_tokens_emitted);
2083                                if output_delta > 0 {
2084                                    output_tokens_emitted += output_delta;
2085                                    estimated_output_unconfirmed += output_delta;
2086                                    estimated_cost_unconfirmed += caps.cost_output_per_mtok
2087                                        * (output_delta as f64)
2088                                        / 1_000_000.0;
2089                                    let _ = event_tx.send(Event::TokenUsageEstimated {
2090                                        run: run_id.clone(),
2091                                        input: 0,
2092                                        output: output_delta,
2093                                        reason: "streamed tool arguments estimate".into(),
2094                                    });
2095                                    let _ = event_tx.send(Event::CostUpdate {
2096                                        run: run_id.clone(),
2097                                        usd: cost_usd + estimated_cost_unconfirmed,
2098                                    });
2099                                }
2100                                // Append to THIS call's buffer (A1). Fall back
2101                                // to a fresh entry if a provider streams a
2102                                // delta before its Start (defensive).
2103                                pending_tools
2104                                    .entry(id.clone())
2105                                    .or_insert_with(|| (String::new(), String::new()))
2106                                    .1
2107                                    .push_str(&json);
2108                            }
2109                            BrainEvent::ToolUseEnd { id } => {
2110                                // Resolve THIS call's accumulated (name, json)
2111                                // by id (A1) — never from a shared buffer that
2112                                // a later call may have clobbered.
2113                                let (resolved_name, resolved_json) =
2114                                    pending_tools.remove(&id).unwrap_or_else(|| {
2115                                        (current_tool_name.clone(), current_tool_json.clone())
2116                                    });
2117
2118                                // Parse accumulated JSON
2119                                let args: serde_json::Value =
2120                                    serde_json::from_str(&resolved_json).unwrap_or(json!({}));
2121
2122                                // Check autonomy gate
2123                                let tool_name = if resolved_name.is_empty() {
2124                                    "unknown".to_string()
2125                                } else {
2126                                    resolved_name.clone()
2127                                };
2128                                // Keep the shared name current so the "running
2129                                // tool · X" status note (below) names THIS call.
2130                                current_tool_name = tool_name.clone();
2131                                let tool = tools.get(&tool_name);
2132                                let base_risk = tool
2133                                    .as_ref()
2134                                    .map(|tool| tool.risk())
2135                                    .unwrap_or(RiskLevel::ReadOnly);
2136                                let risk = crate::permissions::effective_risk_for_tool(
2137                                    &tool_name, base_risk, &args,
2138                                );
2139
2140                                // Re-emit ToolUseProposed with the REAL args now
2141                                // that the streamed JSON is complete. The first
2142                                // emission at ToolUseStart used `{}` because the
2143                                // arguments hadn't streamed yet — the UI updates
2144                                // the existing card with these real arguments.
2145                                let _ = event_tx.send(Event::ToolUseProposed {
2146                                    run: run_id.clone(),
2147                                    id: id.clone(),
2148                                    name: tool_name.clone(),
2149                                    args: args.clone(),
2150                                    risk: risk.clone(),
2151                                });
2152                                let proposed = crate::autonomy::ProposedAction {
2153                                    tool_name: tool_name.clone(),
2154                                    risk: risk.clone(),
2155                                    args: args.clone(),
2156                                };
2157
2158                                let permission =
2159                                    self.config.permissions.evaluate(&PermissionContext {
2160                                        tool_name: &proposed.tool_name,
2161                                        risk: proposed.risk.clone(),
2162                                        args: &args,
2163                                        workspace_root: &workspace.root,
2164                                        provider: Some(brain.id()),
2165                                        surface: Some("engine"),
2166                                    });
2167                                let autonomy_verdict =
2168                                    if matches!(permission.decision, Decision::Allow) {
2169                                        Some(autonomy.evaluate(&proposed))
2170                                    } else {
2171                                        None
2172                                    };
2173                                let mut decision = autonomy_verdict
2174                                    .as_ref()
2175                                    .map(|verdict| verdict.decision.clone())
2176                                    .unwrap_or_else(|| permission.decision.clone());
2177                                if !matches!(permission.decision, Decision::Allow) {
2178                                    let _ = event_tx.send(Event::Message {
2179                                        run: run_id.clone(),
2180                                        role: "permissions".into(),
2181                                        text: permission.reason.clone(),
2182                                    });
2183                                }
2184                                if matches!(decision, Decision::AskUser) {
2185                                    let summary = format!(
2186                                        "{} Risque: {:?}.",
2187                                        humanize_tool_action(&proposed.tool_name, &args),
2188                                        proposed.risk
2189                                    );
2190                                    let _ = event_tx.send(Event::ApprovalRequested {
2191                                        run: run_id.clone(),
2192                                        id: id.clone(),
2193                                        summary: summary.clone(),
2194                                        tool: Some(proposed.tool_name.clone()),
2195                                        risk: Some(format!("{:?}", proposed.risk)),
2196                                    });
2197                                    let _ = event_tx.send(Event::AgentStatus {
2198                                        run: run_id.clone(),
2199                                        role: "coder".into(),
2200                                        status: AgentStatus::WaitingForApproval,
2201                                        note: format!(
2202                                            "en attente de ton accord pour {}",
2203                                            proposed.tool_name
2204                                        ),
2205                                    });
2206                                    // OnApprovalRequested hook so external
2207                                    // notifiers (Slack, email, …) can ping the
2208                                    // operator.
2209                                    let _ = self
2210                                        .hooks
2211                                        .execute(&HookEvent::OnApprovalRequested, &summary)
2212                                        .await;
2213                                    if let Some(handler) = &self.approval_handler {
2214                                        decision = handler
2215                                            .request_approval(ApprovalRequest {
2216                                                run: run_id.clone(),
2217                                                id: id.clone(),
2218                                                tool_name: proposed.tool_name.clone(),
2219                                                risk: proposed.risk.clone(),
2220                                                args: args.clone(),
2221                                                summary,
2222                                            })
2223                                            .await;
2224                                    } else if !std::io::IsTerminal::is_terminal(&std::io::stdin()) {
2225                                        let message = format!(
2226                                            "Approbation requise pour `{}`, mais stdin n'est pas interactif. Relance avec une autonomie plus élevée ou approuve dans le cockpit.",
2227                                            proposed.tool_name
2228                                        );
2229                                        let _ = event_tx.send(Event::Error {
2230                                            run: run_id.clone(),
2231                                            message: message.clone(),
2232                                        });
2233                                        decision = Decision::Deny;
2234                                    } else {
2235                                        use std::io::{self, Write};
2236                                        print!(
2237                                            "\n\x1b[1;33m{} Approve? [y/N]\x1b[0m ",
2238                                            humanize_tool_action(&proposed.tool_name, &args)
2239                                        );
2240                                        io::stdout().flush().ok();
2241                                        let mut input = String::new();
2242                                        io::stdin().read_line(&mut input).ok();
2243                                        decision = if input.trim().eq_ignore_ascii_case("y") {
2244                                            Decision::Allow
2245                                        } else {
2246                                            Decision::Deny
2247                                        };
2248                                    }
2249                                }
2250
2251                                if matches!(decision, Decision::AskUser) {
2252                                    let _ = event_tx.send(Event::Error {
2253                                        run: run_id.clone(),
2254                                        message: format!(
2255                                            "Approbation requise pour `{}` mais aucune réponse exploitable n'a été reçue.",
2256                                            proposed.tool_name
2257                                        ),
2258                                    });
2259                                    decision = Decision::Deny;
2260                                }
2261
2262                                let _ = event_tx.send(Event::ApprovalResolved {
2263                                    run: run_id.clone(),
2264                                    id: id.clone(),
2265                                    decision: decision.clone(),
2266                                });
2267
2268                                match decision {
2269                                    Decision::Allow => {
2270                                        if autonomy_verdict
2271                                            .as_ref()
2272                                            .map(|verdict| verdict.notify)
2273                                            .unwrap_or(false)
2274                                        {
2275                                            let _ = event_tx.send(Event::Message {
2276                                                run: run_id.clone(),
2277                                                role: "autonomy".into(),
2278                                                text: format!(
2279                                                    "{} will run under trusted autonomy with checkpoint notification",
2280                                                    proposed.tool_name
2281                                                ),
2282                                            });
2283                                        }
2284                                        // Track mutations so we can auto-verify later.
2285                                        if matches!(
2286                                            proposed.risk,
2287                                            RiskLevel::Mutating | RiskLevel::Destructive
2288                                        ) {
2289                                            had_mutation = true;
2290                                        }
2291                                        // Auto-checkpoint before mutating/exec/destructive
2292                                        let needs_checkpoint = autonomy_verdict
2293                                            .as_ref()
2294                                            .map(|verdict| verdict.needs_checkpoint)
2295                                            .unwrap_or_else(|| {
2296                                                matches!(
2297                                                    proposed.risk,
2298                                                    RiskLevel::Mutating
2299                                                        | RiskLevel::Exec
2300                                                        | RiskLevel::Destructive
2301                                                )
2302                                            });
2303                                        if needs_checkpoint {
2304                                            let vetoes = self
2305                                                .hooks
2306                                                .execute(
2307                                                    &HookEvent::PreCheckpoint,
2308                                                    &proposed.tool_name,
2309                                                )
2310                                                .await;
2311                                            let checkpoint_veto = vetoes
2312                                                .iter()
2313                                                .find(|result| result.veto)
2314                                                .and_then(|result| result.veto_reason.clone());
2315                                            if let Some(reason) = checkpoint_veto {
2316                                                let _ = event_tx.send(Event::Error {
2317                                                    run: run_id.clone(),
2318                                                    message: reason,
2319                                                });
2320                                                denied_by_approval = true;
2321                                                stop_after_tool_result = true;
2322                                                continue;
2323                                            }
2324                                            if self.config.defaults.checkpointing {
2325                                                let checkpoints =
2326                                                    GitCheckpoints::new(workspace.root.clone());
2327                                                if let Ok(cp_id) = checkpoints.snapshot(&format!(
2328                                                    "pre-{}",
2329                                                    proposed.tool_name
2330                                                )) {
2331                                                    let _ =
2332                                                        event_tx.send(Event::CheckpointCreated {
2333                                                            run: run_id.clone(),
2334                                                            id: cp_id,
2335                                                            label: format!(
2336                                                                "pre-{}",
2337                                                                proposed.tool_name
2338                                                            ),
2339                                                        });
2340                                                    let _ = self
2341                                                        .hooks
2342                                                        .execute(
2343                                                            &HookEvent::PostCheckpoint,
2344                                                            &proposed.tool_name,
2345                                                        )
2346                                                        .await;
2347                                                }
2348                                            }
2349                                        }
2350
2351                                        // Pass tool name + args to the hook
2352                                        // matcher so PreToolUse hooks can
2353                                        // inspect the actual payload (e.g.
2354                                        // protect-sensitive-files needs the
2355                                        // file path, not just "fs_write").
2356                                        let hook_ctx = format!("{} {}", proposed.tool_name, args);
2357                                        let hook_results = self
2358                                            .hooks
2359                                            .execute(&HookEvent::PreToolUse, &hook_ctx)
2360                                            .await;
2361                                        if let Some(reason) = hook_results
2362                                            .iter()
2363                                            .find(|result| result.veto)
2364                                            .and_then(|result| result.veto_reason.clone())
2365                                        {
2366                                            denied_by_approval = true;
2367                                            stop_after_tool_result = true;
2368                                            let _ = event_tx.send(Event::ToolOutput {
2369                                                run: run_id.clone(),
2370                                                id: id.clone(),
2371                                                blocks: vec![Block::Text(reason.clone())],
2372                                                is_error: true,
2373                                            });
2374                                            tool_output_seen_this_completion = true;
2375                                            tool_results_pending.push((
2376                                                id.clone(),
2377                                                proposed.tool_name.clone(),
2378                                                args.clone(),
2379                                                vec![ContentBlock::Text { text: reason }],
2380                                                true,
2381                                            ));
2382                                            continue;
2383                                        }
2384
2385                                        let _ = event_tx.send(Event::ToolUseStarted {
2386                                            run: run_id.clone(),
2387                                            id: id.clone(),
2388                                        });
2389                                        let _ = event_tx.send(Event::AgentStatus {
2390                                            run: run_id.clone(),
2391                                            role: "coder".into(),
2392                                            status: AgentStatus::Working,
2393                                            note: format!("running tool · {}", current_tool_name),
2394                                        });
2395
2396                                        let result = if let Some(tool) = tool {
2397                                            let ctx = ToolCtx {
2398                                                workspace_root: workspace.root.clone(),
2399                                                run_id: run_id.clone(),
2400                                            };
2401                                            match tool.call(args.clone(), &ctx).await {
2402                                                Ok(result) => result,
2403                                                Err(e) => crate::tools::ToolResult::error(format!(
2404                                                    "Tool {} failed: {}",
2405                                                    proposed.tool_name, e
2406                                                )),
2407                                            }
2408                                        } else {
2409                                            crate::tools::ToolResult::error(format!(
2410                                                "Unknown tool: {}",
2411                                                proposed.tool_name
2412                                            ))
2413                                        };
2414
2415                                        for block in &result.content {
2416                                            if let Block::Diff { file, patch } = block {
2417                                                let plus = patch
2418                                                    .lines()
2419                                                    .filter(|l| {
2420                                                        l.starts_with('+') && !l.starts_with("+++")
2421                                                    })
2422                                                    .count()
2423                                                    as u32;
2424                                                let minus = patch
2425                                                    .lines()
2426                                                    .filter(|l| {
2427                                                        l.starts_with('-') && !l.starts_with("---")
2428                                                    })
2429                                                    .count()
2430                                                    as u32;
2431                                                let _ = event_tx.send(Event::DiffProposed {
2432                                                    run: run_id.clone(),
2433                                                    file: file.clone(),
2434                                                    patch: patch.clone(),
2435                                                    plus,
2436                                                    minus,
2437                                                });
2438                                            }
2439                                        }
2440
2441                                        let blocks = result.content.clone();
2442                                        let text = tool_result_text(&blocks);
2443                                        let content_blocks = tool_result_content_blocks(&blocks);
2444                                        let is_error = result.is_error;
2445                                        skill_evidence.push_str(&text);
2446                                        skill_evidence.push('\n');
2447                                        let _ = event_tx.send(Event::ToolOutput {
2448                                            run: run_id.clone(),
2449                                            id: id.clone(),
2450                                            blocks,
2451                                            is_error,
2452                                        });
2453                                        // Surface writes as DiffApplied so the artifacts
2454                                        // ledger sees files that fs_write/edit/multi_edit
2455                                        // touched even when the tool returned plain text.
2456                                        if !is_error
2457                                            && matches!(
2458                                                proposed.tool_name.as_str(),
2459                                                "fs_write" | "edit" | "multi_edit"
2460                                            )
2461                                        {
2462                                            if let Some(p) =
2463                                                args.get("path").and_then(|v| v.as_str())
2464                                            {
2465                                                let _ = event_tx.send(Event::DiffApplied {
2466                                                    run: run_id.clone(),
2467                                                    file: p.to_string(),
2468                                                });
2469                                            } else if let Some(p) =
2470                                                args.get("file_path").and_then(|v| v.as_str())
2471                                            {
2472                                                let _ = event_tx.send(Event::DiffApplied {
2473                                                    run: run_id.clone(),
2474                                                    file: p.to_string(),
2475                                                });
2476                                            }
2477                                        }
2478                                        let _ = self
2479                                            .hooks
2480                                            .execute(&HookEvent::PostToolUse, &proposed.tool_name)
2481                                            .await;
2482                                        tool_output_seen_this_completion = true;
2483                                        tool_results_pending.push((
2484                                            id.clone(),
2485                                            proposed.tool_name.clone(),
2486                                            args.clone(),
2487                                            content_blocks,
2488                                            is_error,
2489                                        ));
2490                                    }
2491                                    Decision::AskUser => {
2492                                        // Supervised mode: prompt user on stdin
2493                                        waiting_for_approval = true;
2494                                        let approval_id = id.clone();
2495                                        let approval_name = proposed.tool_name.clone();
2496                                        let approval_args = args.clone();
2497                                        let approval_risk = proposed.risk;
2498
2499                                        // Emit approval requested
2500                                        let _ = event_tx.send(Event::ApprovalRequested {
2501                                            run: run_id.clone(),
2502                                            id: approval_id.clone(),
2503                                            summary: format!(
2504                                                "{} Risque: {:?}.",
2505                                                humanize_tool_action(
2506                                                    &approval_name,
2507                                                    &approval_args
2508                                                ),
2509                                                approval_risk
2510                                            ),
2511                                            tool: Some(approval_name.clone()),
2512                                            risk: Some(format!("{:?}", approval_risk)),
2513                                        });
2514
2515                                        // Wait for user input on stdin
2516                                        use std::io::{self, Write};
2517                                        print!(
2518                                            "\n\x1b[1;33mApprove {}? [y/N]\x1b[0m ",
2519                                            approval_name
2520                                        );
2521                                        io::stdout().flush().ok();
2522                                        let mut input = String::new();
2523                                        io::stdin().read_line(&mut input).ok();
2524                                        let approved = input.trim().to_lowercase() == "y";
2525
2526                                        if approved {
2527                                            waiting_for_approval = false;
2528                                            // Auto-checkpoint before mutating/exec/destructive
2529                                            if matches!(
2530                                                approval_risk,
2531                                                RiskLevel::Mutating
2532                                                    | RiskLevel::Exec
2533                                                    | RiskLevel::Destructive
2534                                            ) {
2535                                                let vetoes = self
2536                                                    .hooks
2537                                                    .execute(
2538                                                        &HookEvent::PreCheckpoint,
2539                                                        &approval_name,
2540                                                    )
2541                                                    .await;
2542                                                if let Some(reason) = vetoes
2543                                                    .iter()
2544                                                    .find(|result| result.veto)
2545                                                    .and_then(|result| result.veto_reason.clone())
2546                                                {
2547                                                    let _ = event_tx.send(Event::Error {
2548                                                        run: run_id.clone(),
2549                                                        message: reason,
2550                                                    });
2551                                                    denied_by_approval = true;
2552                                                    stop_after_tool_result = true;
2553                                                    continue;
2554                                                }
2555                                                if self.config.defaults.checkpointing {
2556                                                    let checkpoints =
2557                                                        GitCheckpoints::new(workspace.root.clone());
2558                                                    if let Ok(cp_id) = checkpoints
2559                                                        .snapshot(&format!("pre-{}", approval_name))
2560                                                    {
2561                                                        let _ = event_tx.send(
2562                                                            Event::CheckpointCreated {
2563                                                                run: run_id.clone(),
2564                                                                id: cp_id,
2565                                                                label: format!(
2566                                                                    "pre-{}",
2567                                                                    approval_name
2568                                                                ),
2569                                                            },
2570                                                        );
2571                                                        let _ = self
2572                                                            .hooks
2573                                                            .execute(
2574                                                                &HookEvent::PostCheckpoint,
2575                                                                &approval_name,
2576                                                            )
2577                                                            .await;
2578                                                    }
2579                                                }
2580                                            }
2581                                            let hook_results = self
2582                                                .hooks
2583                                                .execute(&HookEvent::PreToolUse, &approval_name)
2584                                                .await;
2585                                            if let Some(reason) = hook_results
2586                                                .iter()
2587                                                .find(|result| result.veto)
2588                                                .and_then(|result| result.veto_reason.clone())
2589                                            {
2590                                                denied_by_approval = true;
2591                                                stop_after_tool_result = true;
2592                                                let _ = event_tx.send(Event::ToolOutput {
2593                                                    run: run_id.clone(),
2594                                                    id: approval_id.clone(),
2595                                                    blocks: vec![Block::Text(reason.clone())],
2596                                                    is_error: true,
2597                                                });
2598                                                tool_output_seen_this_completion = true;
2599                                                tool_results_pending.push((
2600                                                    approval_id,
2601                                                    approval_name,
2602                                                    approval_args,
2603                                                    vec![ContentBlock::Text { text: reason }],
2604                                                    true,
2605                                                ));
2606                                                continue;
2607                                            }
2608                                            let _ = event_tx.send(Event::ToolUseStarted {
2609                                                run: run_id.clone(),
2610                                                id: approval_id.clone(),
2611                                            });
2612                                            let result = if let Some(tool) = tool {
2613                                                let ctx = ToolCtx {
2614                                                    workspace_root: workspace.root.clone(),
2615                                                    run_id: run_id.clone(),
2616                                                };
2617                                                match tool.call(approval_args.clone(), &ctx).await {
2618                                                    Ok(r) => r,
2619                                                    Err(e) => {
2620                                                        crate::tools::ToolResult::error(format!(
2621                                                            "Tool {} failed: {}",
2622                                                            approval_name, e
2623                                                        ))
2624                                                    }
2625                                                }
2626                                            } else {
2627                                                crate::tools::ToolResult::error(format!(
2628                                                    "Unknown tool: {}",
2629                                                    approval_name
2630                                                ))
2631                                            };
2632                                            let blocks = result.content.clone();
2633                                            let text = tool_result_text(&blocks);
2634                                            let content_blocks =
2635                                                tool_result_content_blocks(&blocks);
2636                                            let is_error = result.is_error;
2637                                            skill_evidence.push_str(&text);
2638                                            skill_evidence.push('\n');
2639                                            let _ = event_tx.send(Event::ToolOutput {
2640                                                run: run_id.clone(),
2641                                                id: approval_id.clone(),
2642                                                blocks,
2643                                                is_error,
2644                                            });
2645                                            let _ = self
2646                                                .hooks
2647                                                .execute(&HookEvent::PostToolUse, &approval_name)
2648                                                .await;
2649                                            tool_output_seen_this_completion = true;
2650                                            tool_results_pending.push((
2651                                                approval_id,
2652                                                approval_name,
2653                                                approval_args,
2654                                                content_blocks,
2655                                                is_error,
2656                                            ));
2657                                        } else {
2658                                            let _ = event_tx.send(Event::ToolOutput {
2659                                                run: run_id.clone(),
2660                                                id: approval_id.clone(),
2661                                                blocks: vec![Block::Text("Denied by user".into())],
2662                                                is_error: true,
2663                                            });
2664                                            tool_output_seen_this_completion = true;
2665                                            tool_results_pending.push((
2666                                                approval_id,
2667                                                approval_name,
2668                                                approval_args,
2669                                                vec![ContentBlock::Text {
2670                                                    text: "Denied by user".into(),
2671                                                }],
2672                                                true,
2673                                            ));
2674                                        }
2675                                    }
2676                                    Decision::Deny => {
2677                                        denied_by_approval = true;
2678                                        stop_after_tool_result = true;
2679                                        let _ = event_tx.send(Event::ToolOutput {
2680                                            run: run_id.clone(),
2681                                            id: id.clone(),
2682                                            blocks: vec![Block::Text(
2683                                                "Denied by autonomy policy".into(),
2684                                            )],
2685                                            is_error: true,
2686                                        });
2687                                        tool_output_seen_this_completion = true;
2688                                        tool_results_pending.push((
2689                                            id.clone(),
2690                                            proposed.tool_name.clone(),
2691                                            args.clone(),
2692                                            vec![ContentBlock::Text {
2693                                                text: "Denied by autonomy policy".into(),
2694                                            }],
2695                                            true,
2696                                        ));
2697                                    }
2698                                    // The Allow* tiered decisions came in
2699                                    // with the persistent-permissions
2700                                    // feature; treat them like Allow at the
2701                                    // engine level (the autonomy/permission
2702                                    // store handles persistence above).
2703                                    Decision::AllowOnce
2704                                    | Decision::AllowSession
2705                                    | Decision::AllowAlways => {}
2706                                }
2707
2708                                current_tool_json.clear();
2709                                current_tool_name.clear();
2710                            }
2711                            BrainEvent::Usage(usage) => {
2712                                total_input += usage.input;
2713                                total_output += usage.output;
2714                                // E1: provider usage is authoritative for this
2715                                // request. Replace all pre-usage estimates
2716                                // instead of subtracting from them; otherwise
2717                                // a conservative prompt estimate keeps a
2718                                // phantom remainder and doubles HUD totals.
2719                                estimated_input_unconfirmed = 0;
2720                                estimated_output_unconfirmed = 0;
2721                                let _ = event_tx.send(Event::TokenUsage {
2722                                    run: run_id.clone(),
2723                                    input: usage.input,
2724                                    output: usage.output,
2725                                });
2726
2727                                // Calculate cost
2728                                let input_cost =
2729                                    caps.cost_input_per_mtok * (usage.input as f64) / 1_000_000.0;
2730                                let output_cost =
2731                                    caps.cost_output_per_mtok * (usage.output as f64) / 1_000_000.0;
2732                                let actual_cost = input_cost + output_cost;
2733                                cost_usd += actual_cost;
2734                                estimated_cost_unconfirmed = 0.0;
2735
2736                                let _ = event_tx.send(Event::CostUpdate {
2737                                    run: run_id.clone(),
2738                                    usd: cost_usd + estimated_cost_unconfirmed,
2739                                });
2740                            }
2741                            BrainEvent::Done(reason) => {
2742                                match reason {
2743                                    crate::event::StopReason::EndTurn => {
2744                                        // Empty-completion fallback: if this model
2745                                        // produced nothing (no text, no tool) and the
2746                                        // run has produced nothing so far, try the
2747                                        // next model instead of finishing empty.
2748                                        let this_empty = assistant_text.trim().is_empty()
2749                                            && !tool_output_seen_this_completion;
2750                                        if this_empty && !produced_any_output {
2751                                            let next_idx = current_chain_idx + 1;
2752                                            if next_idx < brain_policy.chain.len() {
2753                                                current_chain_idx = next_idx;
2754                                                let _ = event_tx.send(Event::ModelSwitched {
2755                                                    run: run_id.clone(),
2756                                                    from: brain.id().to_string(),
2757                                                    to: brain_policy.chain[current_chain_idx]
2758                                                        .id()
2759                                                        .to_string(),
2760                                                    reason: "empty response".into(),
2761                                                });
2762                                                continue_agent_loop = true;
2763                                                break;
2764                                            }
2765                                        }
2766                                        if !assistant_text.trim().is_empty() {
2767                                            produced_any_output = true;
2768                                            let mut blocks = Vec::new();
2769                                            if !reasoning_buf.is_empty() {
2770                                                blocks.push(ContentBlock::Reasoning {
2771                                                    text: reasoning_buf.clone(),
2772                                                });
2773                                            }
2774                                            blocks.push(ContentBlock::Text {
2775                                                text: assistant_text.clone(),
2776                                            });
2777                                            let assistant_msg = Msg {
2778                                                role: "assistant".into(),
2779                                                content: blocks,
2780                                            };
2781                                            let turn_messages = vec![assistant_msg.clone()];
2782                                            let has_verified_tool_context =
2783                                                tool_output_seen_this_completion
2784                                                    || messages.iter().any(|m| {
2785                                                        m.content.iter().any(|block| {
2786                                                            matches!(
2787                                                                block,
2788                                                                ContentBlock::ToolResult { .. }
2789                                                            )
2790                                                        })
2791                                                    });
2792
2793                                            if let Some(correction) = self.reasoning.guard_turn(
2794                                                &turn_messages,
2795                                                has_verified_tool_context,
2796                                            ) {
2797                                                messages.push(assistant_msg);
2798                                                let _ = event_tx.send(Event::Message {
2799                                                    run: run_id.clone(),
2800                                                    role: "guard".into(),
2801                                                    text: correction.clone(),
2802                                                });
2803                                                messages.push(Msg {
2804                                                    role: "user".into(),
2805                                                    content: vec![ContentBlock::Text {
2806                                                        text: format!("SYSTEM: {}. Execute the relevant tool first, then report the actual raw result.", correction),
2807                                                    }],
2808                                                });
2809                                                continue_agent_loop = true;
2810                                                break;
2811                                            }
2812
2813                                            // Hallucination guard: catch claims about
2814                                            // code structure made without first calling
2815                                            // fs_read / search this turn.
2816                                            if self.reasoning.hallucination_guard {
2817                                                if let Some(correction) =
2818                                                    crate::reasoning::HallucinationGuard::verify(
2819                                                        &assistant_text,
2820                                                        &tools_called_this_turn,
2821                                                    )
2822                                                {
2823                                                    let mut blocks2 = Vec::new();
2824                                                    if !reasoning_buf.is_empty() {
2825                                                        blocks2.push(ContentBlock::Reasoning {
2826                                                            text: reasoning_buf.clone(),
2827                                                        });
2828                                                    }
2829                                                    blocks2.push(ContentBlock::Text {
2830                                                        text: assistant_text.clone(),
2831                                                    });
2832                                                    let assistant_msg2 = Msg {
2833                                                        role: "assistant".into(),
2834                                                        content: blocks2,
2835                                                    };
2836                                                    messages.push(assistant_msg2);
2837                                                    let _ = event_tx.send(Event::Message {
2838                                                        run: run_id.clone(),
2839                                                        role: "guard".into(),
2840                                                        text: correction.clone(),
2841                                                    });
2842                                                    messages.push(Msg {
2843                                                        role: "user".into(),
2844                                                        content: vec![ContentBlock::Text {
2845                                                            text: format!(
2846                                                                "SYSTEM: {}. Call fs_read or search to verify the file/symbol first, then re-state the claim with the raw evidence.",
2847                                                                correction
2848                                                            ),
2849                                                        }],
2850                                                    });
2851                                                    continue_agent_loop = true;
2852                                                    break;
2853                                                }
2854                                            }
2855
2856                                            // Tool narration guard: detect when the
2857                                            // assistant describes using a tool instead
2858                                            // of actually calling it. Only triggers when
2859                                            // no tools were called this turn but the text
2860                                            // contains tool-like language.
2861                                            if tools_called_this_turn.is_empty()
2862                                                && tool_narration_detected(&assistant_text)
2863                                            {
2864                                                let correction = "You described using a tool but did not actually call it. When a tool would help, CALL it — never narrate what it would do. Use the exact tool call format.";
2865                                                messages.push(Msg {
2866                                                    role: "assistant".into(),
2867                                                    content: vec![ContentBlock::Text {
2868                                                        text: assistant_text.clone(),
2869                                                    }],
2870                                                });
2871                                                let _ = event_tx.send(Event::Message {
2872                                                    run: run_id.clone(),
2873                                                    role: "guard".into(),
2874                                                    text: correction.into(),
2875                                                });
2876                                                messages.push(Msg {
2877                                                    role: "user".into(),
2878                                                    content: vec![ContentBlock::Text {
2879                                                        text: format!(
2880                                                            "SYSTEM: {}. Execute the relevant tool first, then report the actual raw result.",
2881                                                            correction
2882                                                        ),
2883                                                    }],
2884                                                });
2885                                                continue_agent_loop = true;
2886                                                break;
2887                                            }
2888                                            messages.push(assistant_msg);
2889                                        }
2890
2891                                        // ── Pre-mutation self-critique (reasoning §) ─
2892                                        // If we mutated files this turn, emit a
2893                                        // structured self-review of the change set
2894                                        // so the operator/UI can see the agent's
2895                                        // own checklist before auto-verify runs.
2896                                        if had_mutation
2897                                            && self.reasoning.self_critique
2898                                            && !diffs.is_empty()
2899                                        {
2900                                            let review =
2901                                                crate::reasoning::SelfCritique::pre_mutation_review(
2902                                                    &diffs,
2903                                                    Some(&task.description),
2904                                                );
2905                                            let _ = event_tx.send(Event::Message {
2906                                                run: run_id.clone(),
2907                                                role: "self-critique".into(),
2908                                                text: review,
2909                                            });
2910                                        }
2911
2912                                        // ── Auto-verify (§10 testing) ───────────
2913                                        // The model thinks it's done. If it mutated
2914                                        // files and a verify command is configured,
2915                                        // run it; on failure, re-inject so the agent
2916                                        // fixes it (bounded). When this model's fix
2917                                        // budget is exhausted, ESCALATE to the next
2918                                        // (stronger) model in the chain rather than
2919                                        // ending the run silently unverified — that
2920                                        // is the whole point of routing cheap-first.
2921                                        if had_mutation {
2922                                            if let Some(verify_cmd) =
2923                                                self.config.defaults.verify_command.clone()
2924                                            {
2925                                                verify_attempts += 1;
2926                                                had_mutation = false;
2927                                                let parts: Vec<String> = verify_cmd
2928                                                    .split_whitespace()
2929                                                    .map(String::from)
2930                                                    .collect();
2931                                                if !parts.is_empty() {
2932                                                    let _ = event_tx.send(Event::AgentStatus {
2933                                                        run: run_id.clone(),
2934                                                        role: "verifier".into(),
2935                                                        status: AgentStatus::Working,
2936                                                        note: format!("running `{}`", verify_cmd),
2937                                                    });
2938                                                    let cmd = crate::sandbox::Command {
2939                                                        program: parts[0].clone(),
2940                                                        args: parts[1..].to_vec(),
2941                                                        env: std::collections::HashMap::new(),
2942                                                        workdir: workspace.root.clone(),
2943                                                    };
2944                                                    let limits = crate::sandbox::Limits {
2945                                                        timeout_ms: 300_000,
2946                                                        max_output_bytes: 16_000,
2947                                                    };
2948                                                    match workspace
2949                                                        .sandbox
2950                                                        .exec(&cmd, &limits)
2951                                                        .await
2952                                                    {
2953                                                        Ok(res) if res.exit_code != 0 => {
2954                                                            let _ = event_tx.send(Event::TestResult {
2955                                                                run: run_id.clone(),
2956                                                                passed: 0,
2957                                                                failed: 1,
2958                                                                detail: format!(
2959                                                                    "verify `{}` failed (exit {})",
2960                                                                    verify_cmd, res.exit_code
2961                                                                ),
2962                                                            });
2963                                                            let out = format!(
2964                                                                "{}\n{}",
2965                                                                res.stdout, res.stderr
2966                                                            );
2967                                                            let tail: String = out
2968                                                                .lines()
2969                                                                .rev()
2970                                                                .take(40)
2971                                                                .collect::<Vec<_>>()
2972                                                                .into_iter()
2973                                                                .rev()
2974                                                                .collect::<Vec<_>>()
2975                                                                .join("\n");
2976                                                            if verify_attempts
2977                                                                <= MAX_VERIFY_ATTEMPTS
2978                                                            {
2979                                                                // Same model gets a bounded
2980                                                                // chance to fix its own work.
2981                                                                messages.push(Msg {
2982                                                                    role: "user".into(),
2983                                                                    content: vec![ContentBlock::Text {
2984                                                                        text: format!(
2985                                                                            "SYSTEM: verification command `{}` FAILED (exit {}). Fix the code, then it will be re-verified. Output:\n{}",
2986                                                                            verify_cmd, res.exit_code, tail
2987                                                                        ),
2988                                                                    }],
2989                                                                });
2990                                                                continue_agent_loop = true;
2991                                                                break;
2992                                                            }
2993                                                            // Fix budget exhausted — escalate
2994                                                            // to the next model in the chain.
2995                                                            let next_idx = current_chain_idx + 1;
2996                                                            if next_idx < brain_policy.chain.len()
2997                                                                && verify_escalations
2998                                                                    < MAX_VERIFY_ESCALATIONS
2999                                                            {
3000                                                                verify_escalations += 1;
3001                                                                verify_attempts = 0;
3002                                                                let from = brain.id().to_string();
3003                                                                let to = brain_policy.chain
3004                                                                    [next_idx]
3005                                                                    .id()
3006                                                                    .to_string();
3007                                                                current_chain_idx = next_idx;
3008                                                                let _ = event_tx.send(
3009                                                                    Event::ModelSwitched {
3010                                                                        run: run_id.clone(),
3011                                                                        from,
3012                                                                        to,
3013                                                                        reason: format!(
3014                                                                            "verification still failing after {} fixes — escalating",
3015                                                                            MAX_VERIFY_ATTEMPTS
3016                                                                        ),
3017                                                                    },
3018                                                                );
3019                                                                messages.push(Msg {
3020                                                                    role: "user".into(),
3021                                                                    content: vec![ContentBlock::Text {
3022                                                                        text: format!(
3023                                                                            "SYSTEM: a previous model attempted this task but verification `{}` still FAILS (exit {}). You are a stronger model brought in to finish the job. Diagnose properly, fix the code, then it will be re-verified. Output:\n{}",
3024                                                                            verify_cmd, res.exit_code, tail
3025                                                                        ),
3026                                                                    }],
3027                                                                });
3028                                                                continue_agent_loop = true;
3029                                                                break;
3030                                                            }
3031                                                            // No stronger model left: end
3032                                                            // HONESTLY as a failure instead of
3033                                                            // pretending the run succeeded.
3034                                                            had_error = true;
3035                                                            last_error = Some(format!(
3036                                                                "verification `{}` still failing after retries and escalation",
3037                                                                verify_cmd
3038                                                            ));
3039                                                            continue_agent_loop = false;
3040                                                            break;
3041                                                        }
3042                                                        Ok(_) => {
3043                                                            let _ =
3044                                                                event_tx.send(Event::TestResult {
3045                                                                    run: run_id.clone(),
3046                                                                    passed: 1,
3047                                                                    failed: 0,
3048                                                                    detail: format!(
3049                                                                        "verify `{}` passed",
3050                                                                        verify_cmd
3051                                                                    ),
3052                                                                });
3053                                                        }
3054                                                        Err(e) => {
3055                                                            let _ = event_tx.send(Event::Message {
3056                                                                run: run_id.clone(),
3057                                                                role: "guard".into(),
3058                                                                text: format!(
3059                                                                    "verify command could not run: {}",
3060                                                                    e
3061                                                                ),
3062                                                            });
3063                                                        }
3064                                                    }
3065                                                }
3066                                            }
3067                                        }
3068                                    }
3069                                    crate::event::StopReason::ToolUse => {
3070                                        // A single model turn that emits N tool calls
3071                                        // MUST be replayed as ONE assistant message
3072                                        // carrying reasoning_content + ALL tool_calls,
3073                                        // followed by N tool-result messages. Splitting
3074                                        // it into one assistant message per tool left
3075                                        // the 2nd+ calls without reasoning_content, which
3076                                        // DeepSeek/Qwen/Moonshot thinking-mode rejects
3077                                        // with HTTP 400 ("reasoning_content must be passed
3078                                        // back"), aborting every turn after the first and
3079                                        // leaving multi-file tasks half-done. One
3080                                        // assistant message with a tool_calls array is
3081                                        // also the correct OpenAI/Anthropic shape.
3082                                        let drained: Vec<_> =
3083                                            std::mem::take(&mut tool_results_pending);
3084
3085                                        let mut assistant_blocks = Vec::new();
3086                                        if !reasoning_buf.is_empty() {
3087                                            assistant_blocks.push(ContentBlock::Reasoning {
3088                                                text: reasoning_buf.clone(),
3089                                            });
3090                                        }
3091                                        // Signature of this turn's tool calls for the
3092                                        // stuck-loop guard (name + args, order-sensitive).
3093                                        let turn_sig = {
3094                                            use std::collections::hash_map::DefaultHasher;
3095                                            use std::hash::{Hash, Hasher};
3096                                            let mut h = DefaultHasher::new();
3097                                            for (_, name, args, _, _) in &drained {
3098                                                name.hash(&mut h);
3099                                                args.to_string().hash(&mut h);
3100                                            }
3101                                            h.finish()
3102                                        };
3103                                        for (tool_id, tool_name, args, _content, _is_error) in
3104                                            &drained
3105                                        {
3106                                            assistant_blocks.push(ContentBlock::ToolUse {
3107                                                id: tool_id.clone(),
3108                                                name: tool_name.clone(),
3109                                                input: args.clone(),
3110                                            });
3111                                        }
3112                                        messages.push(Msg {
3113                                            role: "assistant".into(),
3114                                            content: assistant_blocks,
3115                                        });
3116
3117                                        let turn_had_tools = !drained.is_empty();
3118                                        for (tool_id, _tool_name, _args, content, is_error) in
3119                                            drained
3120                                        {
3121                                            messages.push(Msg {
3122                                                role: "user".into(),
3123                                                content: vec![ContentBlock::ToolResult {
3124                                                    tool_use_id: tool_id,
3125                                                    content,
3126                                                    is_error: Some(is_error),
3127                                                }],
3128                                            });
3129                                        }
3130                                        if tool_output_seen_this_completion {
3131                                            produced_any_output = true;
3132                                        }
3133
3134                                        // Stuck-loop guard: identical tool-call turns.
3135                                        if turn_had_tools {
3136                                            if last_tool_sig == Some(turn_sig) {
3137                                                repeated_tool_turns += 1;
3138                                            } else {
3139                                                repeated_tool_turns = 0;
3140                                                last_tool_sig = Some(turn_sig);
3141                                            }
3142                                        }
3143                                        if repeated_tool_turns == 2 {
3144                                            // 3rd identical turn — nudge the model.
3145                                            messages.push(Msg {
3146                                                role: "user".into(),
3147                                                content: vec![ContentBlock::Text {
3148                                                    text: "guard: you have issued the exact same \
3149                                                           tool call(s) three turns in a row with \
3150                                                           identical arguments. The result will \
3151                                                           not change. State what you learned and \
3152                                                           take a DIFFERENT action — or finish \
3153                                                           with your best answer now."
3154                                                        .into(),
3155                                                }],
3156                                            });
3157                                            send(Event::Message {
3158                                                run: run_id.clone(),
3159                                                role: "guard".into(),
3160                                                text: "repeated identical tool calls — nudging \
3161                                                       the model to change approach"
3162                                                    .into(),
3163                                            });
3164                                        } else if repeated_tool_turns >= 4 {
3165                                            // 5th identical turn — stop honestly instead of
3166                                            // burning the remaining turn/budget allowance.
3167                                            send(Event::Message {
3168                                                run: run_id.clone(),
3169                                                role: "guard".into(),
3170                                                text: "stuck loop: 5 identical tool-call turns \
3171                                                       — stopping the run"
3172                                                    .into(),
3173                                            });
3174                                            had_error = true;
3175                                            last_error = Some(
3176                                                "stopped by stuck-loop guard (5 identical \
3177                                                 tool-call turns)"
3178                                                    .into(),
3179                                            );
3180                                            continue_agent_loop = false;
3181                                            break;
3182                                        }
3183
3184                                        continue_agent_loop =
3185                                            !waiting_for_approval && !stop_after_tool_result;
3186                                        break;
3187                                    }
3188                                    _ => {}
3189                                }
3190                                break; // Done
3191                            }
3192                            BrainEvent::Error(msg) => {
3193                                let _ = event_tx.send(Event::Error {
3194                                    run: run_id.clone(),
3195                                    message: msg.clone(),
3196                                });
3197                                let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
3198                                let next_idx = current_chain_idx + 1;
3199                                if next_idx < brain_policy.chain.len() {
3200                                    current_chain_idx = next_idx;
3201                                    let switch_ctx = format!(
3202                                        "{} -> {}",
3203                                        brain.id(),
3204                                        brain_policy.chain[current_chain_idx].id()
3205                                    );
3206                                    let _ = event_tx.send(Event::ModelSwitched {
3207                                        run: run_id.clone(),
3208                                        from: brain.id().to_string(),
3209                                        to: brain_policy.chain[current_chain_idx].id().to_string(),
3210                                        reason: msg,
3211                                    });
3212                                    let _ = self
3213                                        .hooks
3214                                        .execute(&HookEvent::OnModelSwitched, &switch_ctx)
3215                                        .await;
3216                                    continue_agent_loop = true;
3217                                } else {
3218                                    had_error = true;
3219                                    last_error = Some(msg);
3220                                }
3221                                break;
3222                            }
3223                        }
3224                    }
3225
3226                    // Robust empty-completion fallback: some providers end the
3227                    // stream WITHOUT a Done(EndTurn) (so the in-stream check never
3228                    // fires). If this completion produced nothing and the run has
3229                    // produced nothing, advance to the next model in the chain.
3230                    if !continue_agent_loop && !had_error {
3231                        let this_empty =
3232                            assistant_text.trim().is_empty() && !tool_output_seen_this_completion;
3233                        if this_empty && !produced_any_output {
3234                            let next_idx = current_chain_idx + 1;
3235                            if next_idx < brain_policy.chain.len() {
3236                                let _ = event_tx.send(Event::ModelSwitched {
3237                                    run: run_id.clone(),
3238                                    from: brain.id().to_string(),
3239                                    to: brain_policy.chain[next_idx].id().to_string(),
3240                                    reason: "empty response".into(),
3241                                });
3242                                current_chain_idx = next_idx;
3243                                continue;
3244                            }
3245                        }
3246                    }
3247
3248                    if continue_agent_loop {
3249                        continue;
3250                    }
3251                    break; // Task complete
3252                }
3253                Err(e) => {
3254                    let err_msg = format!("{}", e);
3255                    let _ = event_tx.send(Event::Error {
3256                        run: run_id.clone(),
3257                        message: err_msg.clone(),
3258                    });
3259
3260                    // Transient failures (rate limit, timeout, 5xx, connection
3261                    // blips) get a bounded retry on the SAME brain first —
3262                    // otherwise one 429 on the primary silently downgrades the
3263                    // whole run to a weaker fallback model.
3264                    let retry_after_hint = match e.downcast_ref::<BrainError>() {
3265                        Some(BrainError::RateLimit { retry_after }) => Some(*retry_after),
3266                        Some(BrainError::Timeout) => Some(None),
3267                        Some(BrainError::ServerError { status, .. }) if *status >= 500 => {
3268                            Some(None)
3269                        }
3270                        Some(_) => None,
3271                        None => {
3272                            let s = err_msg.to_lowercase();
3273                            let transient = s.contains("rate limit")
3274                                || s.contains("429")
3275                                || s.contains("timeout")
3276                                || s.contains("timed out")
3277                                || s.contains("connection")
3278                                || s.contains("overloaded")
3279                                || s.contains("502")
3280                                || s.contains("503");
3281                            if transient { Some(None) } else { None }
3282                        }
3283                    };
3284                    if let Some(hint) = retry_after_hint {
3285                        if transient_retries < MAX_TRANSIENT_RETRIES {
3286                            transient_retries += 1;
3287                            // Honour the provider's Retry-After when given,
3288                            // capped so a run never stalls for minutes.
3289                            let secs = hint.unwrap_or(2u64.pow(transient_retries)).min(20);
3290                            send(Event::Message {
3291                                run: run_id.clone(),
3292                                role: "guard".into(),
3293                                text: format!(
3294                                    "provider hiccup ({}) — retrying {} in {}s (attempt {}/{})",
3295                                    err_msg,
3296                                    brain.id(),
3297                                    secs,
3298                                    transient_retries,
3299                                    MAX_TRANSIENT_RETRIES
3300                                ),
3301                            });
3302                            tokio::time::sleep(std::time::Duration::from_secs(secs)).await;
3303                            continue;
3304                        }
3305                    }
3306                    transient_retries = 0;
3307
3308                    // Try next in chain
3309                    let next_idx = current_chain_idx + 1;
3310                    if next_idx < brain_policy.chain.len() {
3311                        current_chain_idx = next_idx;
3312                        let _ = event_tx.send(Event::ModelSwitched {
3313                            run: run_id.clone(),
3314                            from: brain.id().to_string(),
3315                            to: brain_policy.chain[current_chain_idx].id().to_string(),
3316                            reason: err_msg,
3317                        });
3318                    } else {
3319                        had_error = true;
3320                        last_error = Some(err_msg);
3321                        break;
3322                    }
3323                }
3324            }
3325        }
3326
3327        // Final token usage. In-stream BrainEvent::Usage already emitted one
3328        // Event::TokenUsage per completion with INCREMENTS — surfaces sum
3329        // those events, so re-emitting the cumulative total here double-
3330        // counted every confirmed token. Only emit when the provider never
3331        // reported usage, and then as the ESTIMATE it actually is.
3332        let final_input = total_input + estimated_input_unconfirmed;
3333        let final_output = total_output + estimated_output_unconfirmed;
3334        if total_input == 0 && total_output == 0 && (final_input > 0 || final_output > 0) {
3335            let _ = event_tx.send(Event::TokenUsageEstimated {
3336                run: run_id.clone(),
3337                input: final_input,
3338                output: final_output,
3339                reason: "provider reported no usage events".into(),
3340            });
3341        }
3342        let final_status = if had_error {
3343            format!(
3344                "error: {}",
3345                last_error.unwrap_or_else(|| "run failed".into())
3346            )
3347        } else if waiting_for_approval {
3348            "waiting_for_approval".into()
3349        } else if denied_by_approval {
3350            "denied".into()
3351        } else if diffs.is_empty() && total_tools_called == 0 {
3352            "no actions taken".into()
3353        } else {
3354            "completed".into()
3355        };
3356        let final_note = match final_status.as_str() {
3357            "completed" => format!("completed · {}↑ {}↓ tok", final_input, final_output),
3358            "waiting_for_approval" => "en attente de ton accord".to_string(),
3359            "denied" => "arrêté · approbation refusée".to_string(),
3360            other => other.to_string(),
3361        };
3362
3363        // Mark coder lane done — clears the animated caret cleanly.
3364        let _ = event_tx.send(Event::AgentStatus {
3365            run: run_id.clone(),
3366            role: "coder".into(),
3367            status: AgentStatus::Done,
3368            note: final_note,
3369        });
3370
3371        let outcome = OutcomeSummary {
3372            status: final_status,
3373            diffs,
3374            cost_usd: cost_usd + estimated_cost_unconfirmed,
3375            tokens: TokenUsage {
3376                input: total_input + estimated_input_unconfirmed,
3377                output: total_output + estimated_output_unconfirmed,
3378            },
3379            cost_comparison: String::new(),
3380            duration_ms: Some(run_started_at.elapsed().as_millis() as u64),
3381        };
3382
3383        // Persist task to memory
3384        if let Some(mem) = &self.memory {
3385            let _ = mem.save_task(&crate::memory::TaskMem {
3386                run_id: run_id.0.clone(),
3387                messages: messages.clone(),
3388                created_at: chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(),
3389            });
3390        }
3391
3392        // Per-repo routing memory: record only verification-backed outcomes —
3393        // a "completed" without a verify command proves nothing.
3394        {
3395            use crate::router::learned::RunRoutingOutcome;
3396            let routing_outcome = if had_error {
3397                Some(RunRoutingOutcome::Failed)
3398            } else if verify_escalations > 0 {
3399                Some(RunRoutingOutcome::Escalated)
3400            } else if verify_attempts > 0 && outcome.status == "completed" {
3401                Some(RunRoutingOutcome::VerifiedSuccess)
3402            } else {
3403                None
3404            };
3405            if let Some(o) = routing_outcome {
3406                repo_routing.record(&classified_tier, o);
3407            }
3408        }
3409
3410        // Propose skill candidate from successful run
3411        if outcome.status == "completed" {
3412            if let Some(skills) = &self.skills {
3413                if let Some(candidate) = Curator::propose_skill_if_missing(
3414                    &task.description,
3415                    &skill_evidence,
3416                    skills.as_ref(),
3417                ) {
3418                    let skill_name = candidate.name.clone();
3419                    let _ = event_tx.send(Event::SkillLearned {
3420                        run: run_id.clone(),
3421                        name: skill_name.clone(),
3422                    });
3423                    let _ = self
3424                        .hooks
3425                        .execute(&HookEvent::OnSkillLearned, &skill_name)
3426                        .await;
3427                    let _ = skills.add(candidate);
3428                }
3429            }
3430
3431            // Auto-distill facts from the successful run. Reconstruct the event
3432            // view from the final conversation: ToolUse blocks carry the real
3433            // tool args (file paths, content), Text blocks carry reasoning — both
3434            // are what the Distiller mines for durable user facts (§3.8).
3435            if let Some(mem) = &self.memory {
3436                let events = events_from_messages(&run_id, &messages);
3437                Distiller::distill(mem, &events, &task.description).await;
3438            }
3439        }
3440
3441        let _ = event_tx.send(Event::RunFinished {
3442            run: run_id.clone(),
3443            outcome: outcome.clone(),
3444        });
3445
3446        // PostRun lifecycle hook (best-effort, non-blocking semantics).
3447        let _ = self
3448            .hooks
3449            .execute(&HookEvent::PostRun, &task.description)
3450            .await;
3451
3452        Ok(outcome)
3453    }
3454}
3455
3456// ─── Tool narration detection ──────────────────────────────────────────────────
3457
3458/// Detects when the assistant describes using a tool ("I'll run the tests",
3459/// "Let me search for...") without actually emitting a ToolUse block.
3460/// Returns true when tool-like language is present but no tools were called.
3461fn tool_narration_detected(text: &str) -> bool {
3462    let lower = text.to_lowercase();
3463    let patterns = [
3464        "i'll use",
3465        "i will use",
3466        "let me use",
3467        "i'll run",
3468        "i will run",
3469        "let me run",
3470        "i'll search",
3471        "i will search",
3472        "let me search",
3473        "i'll check",
3474        "i will check",
3475        "let me check",
3476        "i'll read",
3477        "i will read",
3478        "let me read",
3479        "i'll write",
3480        "i will write",
3481        "let me write",
3482        "i'll execute",
3483        "i will execute",
3484        "let me execute",
3485        "i'll call",
3486        "i will call",
3487        "let me call",
3488        "i'll fetch",
3489        "i will fetch",
3490        "let me fetch",
3491        "i'll look up",
3492        "i will look up",
3493        "let me look up",
3494        "i'll test",
3495        "i will test",
3496        "let me test",
3497        "running the test",
3498        "running the command",
3499        "searching for",
3500        "looking up",
3501        // I1: French narration. Sparrow answers in French, so the English-only
3502        // guard above never fired for francophone users — the central
3503        // "describe the tool instead of calling it" failsafe was dead in the
3504        // user's own language. These cover the common openings.
3505        "je vais utiliser",
3506        "je vais lancer",
3507        "je vais exécuter",
3508        "je vais executer", // tolerate the unaccented spelling
3509        "je vais lire",
3510        "je vais écrire",
3511        "je vais créer",
3512        "je vais modifier",
3513        "je vais chercher",
3514        "je vais rechercher",
3515        "je vais vérifier",
3516        "je vais regarder",
3517        "je vais consulter",
3518        "je vais ouvrir",
3519        "je vais appeler",
3520        "laisse-moi",
3521        "laissez-moi",
3522        "permets-moi de",
3523        "permettez-moi de",
3524        "je m'occupe de",
3525        "je commence par",
3526        "je vais d'abord",
3527    ];
3528    patterns.iter().any(|p| lower.contains(p))
3529}
3530
3531#[cfg(test)]
3532mod tests {
3533    use super::*;
3534
3535    #[test]
3536    fn main_agent_system_prompt_carries_the_reasoning_protocol() {
3537        let workspace_root = PathBuf::from(".");
3538        let prompt = build_system_prompt(SystemPromptInput {
3539            identity: &Identity::default(),
3540            tier: Some(&crate::router::TaskTier::Hard),
3541            workspace_root: &workspace_root,
3542            facts: &[],
3543            memory_docs: &[],
3544            instruction_docs: &[],
3545            skills: &[],
3546            skill_catalog: &[],
3547        });
3548        // Anchors taken from src/engine/main_soul.md. The soul has been
3549        // rewritten more than once; we pin the load-bearing concepts (tier
3550        // triage, the tribunal with its three reviewer roles, the
3551        // anti-simulation rule, the "real execution beats mental
3552        // simulation" instruction) rather than any single section header.
3553        for marker in [
3554            "TIER TRIAGE",
3555            "Tribunal",
3556            "Skeptic",
3557            "Adversary",
3558            "Anti-simulation",
3559            "Real execution beats",
3560        ] {
3561            assert!(prompt.contains(marker), "main soul must contain `{marker}`");
3562        }
3563    }
3564
3565    #[test]
3566    fn trivial_prompt_uses_lean_mode_with_skill_index_but_no_full_soul() {
3567        let skill = crate::capabilities::Skill {
3568            name: "tiny-skill".into(),
3569            description: "Tiny relevant skill".into(),
3570            trigger: vec!["tiny".into()],
3571            body: "Do the tiny thing.".into(),
3572            source_file: "tiny/SKILL.md".into(),
3573            usage_count: 0,
3574            created_at: String::new(),
3575            score: 1.0,
3576            auto_generated: false,
3577            references: Vec::new(),
3578            templates: Vec::new(),
3579            scripts: Vec::new(),
3580            assets: Vec::new(),
3581            manifest_version: None,
3582            allowed_tools: Vec::new(),
3583        };
3584        let workspace_root = PathBuf::from(".");
3585        let skills = vec![skill];
3586        let prompt = build_system_prompt(SystemPromptInput {
3587            identity: &Identity::default(),
3588            tier: Some(&crate::router::TaskTier::Trivial),
3589            workspace_root: &workspace_root,
3590            facts: &[],
3591            memory_docs: &[],
3592            instruction_docs: &[],
3593            skills: &skills,
3594            skill_catalog: &skills,
3595        });
3596
3597        assert!(prompt.contains("Simple-task mode"));
3598        assert!(!prompt.contains("TIER TRIAGE"));
3599        // v0.9.1: the lightweight skill INDEX is now injected at every tier so
3600        // the agent can discover what's installed even on simple tasks. The full
3601        // reasoning protocol (soul) stays out of lean mode.
3602        assert!(prompt.contains("Skill library ("));
3603        assert!(prompt.contains("## Relevant skills for this task"));
3604        // Lean mode must still carry the action invariants so "simple" tasks
3605        // don't regress into narrate-don't-call behaviour.
3606        assert!(prompt.contains("Call tools, never narrate them"));
3607    }
3608
3609    #[test]
3610    fn provider_messages_strip_ui_status_leaks() {
3611        let messages = vec![Msg {
3612            role: "user".into(),
3613            content: vec![ContentBlock::Text {
3614                text: "keep this\n✓ coder completed · 4487↑ 150↓ tok\ncoder ◌ consulting deepseek · parsing request…\nkeep that".into(),
3615            }],
3616        }];
3617
3618        let sanitized = sanitize_messages_for_provider(&messages);
3619        let ContentBlock::Text { text } = &sanitized[0].content[0] else {
3620            panic!("expected text block");
3621        };
3622        assert!(text.contains("keep this"));
3623        assert!(text.contains("keep that"));
3624        assert!(!text.contains("completed ·"));
3625        assert!(!text.contains("◌ consulting"));
3626    }
3627
3628    #[test]
3629    fn tool_narration_guard_fires_in_french() {
3630        // I1: the guard was English-only; francophone narration slipped through.
3631        assert!(tool_narration_detected(
3632            "Je vais créer le fichier poeme.txt."
3633        ));
3634        assert!(tool_narration_detected(
3635            "Laisse-moi vérifier le contenu du dossier."
3636        ));
3637        assert!(tool_narration_detected(
3638            "Je m'occupe de lire app.js tout de suite."
3639        ));
3640        // Still catches English.
3641        assert!(tool_narration_detected("Let me run the tests."));
3642        // A normal answer with no tool narration must NOT fire.
3643        assert!(!tool_narration_detected(
3644            "Voici le résultat : ton fichier contient un haïku."
3645        ));
3646    }
3647
3648    #[test]
3649    fn named_agents_keep_their_own_soul() {
3650        let planner = Identity {
3651            name: "planner".into(),
3652            role: "technical architect".into(),
3653            personality: "structured".into(),
3654        };
3655        let workspace_root = PathBuf::from(".");
3656        let prompt = build_system_prompt(SystemPromptInput {
3657            identity: &planner,
3658            tier: Some(&crate::router::TaskTier::Hard),
3659            workspace_root: &workspace_root,
3660            facts: &[],
3661            memory_docs: &[],
3662            instruction_docs: &[],
3663            skills: &[],
3664            skill_catalog: &[],
3665        });
3666        // The main soul's signature section header — if it leaks into a
3667        // named identity, the focused soul is being diluted.
3668        assert!(
3669            !prompt.contains("TIER TRIAGE"),
3670            "named souls must not be diluted by the main protocol"
3671        );
3672    }
3673
3674    #[test]
3675    fn initial_user_content_blocks_embeds_uploaded_images() {
3676        let tmp = tempfile::tempdir().expect("tempdir");
3677        let image = tmp.path().join("shot.png");
3678        std::fs::write(
3679            &image,
3680            [
3681                0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n', 0, 0, 0, 0,
3682            ],
3683        )
3684        .expect("write image");
3685        let description = format!(
3686            "analyse this\n\n[Attached files]\n### file: shot.png\n[uploaded: {}]",
3687            image.display()
3688        );
3689
3690        let blocks = initial_user_content_blocks(tmp.path(), &description);
3691        assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
3692        assert!(blocks.iter().any(|block| matches!(
3693            block,
3694            ContentBlock::Image {
3695                source: ImageSource::Base64 {
3696                    media_type,
3697                    data,
3698                }
3699            } if media_type == "image/png" && !data.is_empty()
3700        )));
3701    }
3702
3703    #[test]
3704    fn tool_result_content_blocks_preserves_images() {
3705        let blocks = tool_result_content_blocks(&[
3706            Block::Text("screenshot captured".into()),
3707            Block::Image {
3708                data: vec![1, 2, 3],
3709                mime: "image/png".into(),
3710            },
3711        ]);
3712
3713        assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
3714        assert!(blocks.iter().any(|block| matches!(
3715            block,
3716            ContentBlock::Image {
3717                source: ImageSource::Base64 {
3718                    media_type,
3719                    data,
3720                }
3721            } if media_type == "image/png" && data == "AQID"
3722        )));
3723    }
3724}
sparrow/engine/mod.rs

sparrow/engine/
mod.rs