sparrow/engine/
mod.rs

1use async_trait::async_trait;
2use futures::StreamExt;
3use serde_json::json;
4use std::path::PathBuf;
5use std::sync::Arc;
6use tokio::sync::mpsc;
7
8use crate::agent::AgentStore;
9use crate::autonomy::{AutonomyContract, Checkpoints, GitCheckpoints};
10use crate::capabilities::{Curator, SkillLibrary};
11use crate::config::Config;
12use crate::event::{
13    AgentStatus, AutonomyLevel, Block, Decision, Event, OutcomeSummary, RiskLevel, RunId,
14    TokenUsage,
15};
16use crate::extras::Distiller;
17use crate::hooks::{HookEvent, HookRegistry};
18use crate::instructions::InstructionDoc;
19use crate::memory::{Fact, Memory, MemoryDoc, MemoryDocKind};
20use crate::permissions::PermissionContext;
21use crate::provider::{
22    Brain, BrainError, BrainEvent, BrainRequest, ContentBlock, ImageSource, Msg, PromptCacheConfig,
23    ToolSpec,
24};
25use crate::reasoning::ReasoningEngine;
26use crate::redaction::RedactionFilter;
27use crate::router::{BudgetState, Router, TaskTier};
28use crate::sandbox::Sandbox;
29use crate::tools::{ToolCtx, ToolRegistry};
30
31pub mod scorer;
32pub mod treesitter;
33
34// ─── Agent identity ─────────────────────────────────────────────────────────────
35
36#[derive(Debug, Clone)]
37pub struct Identity {
38    pub name: String,
39    pub role: String,
40    pub personality: String,
41}
42
43impl Default for Identity {
44    fn default() -> Self {
45        Self {
46            name: "sparrow".into(),
47            role: "software engineer".into(),
48            personality: "concise, competent, helpful".into(),
49        }
50    }
51}
52
53// ─── Brain policy ───────────────────────────────────────────────────────────────
54
55pub struct BrainPolicy {
56    /// The fallback chain selected by the Router for this run
57    pub chain: Vec<Arc<dyn Brain>>,
58    pub current_index: usize,
59}
60
61impl BrainPolicy {
62    pub fn current(&self) -> Option<Arc<dyn Brain>> {
63        self.chain.get(self.current_index).cloned()
64    }
65
66    pub fn next(&mut self) -> Option<Arc<dyn Brain>> {
67        self.current_index += 1;
68        self.current()
69    }
70}
71
72// ─── Workspace ──────────────────────────────────────────────────────────────────
73
74pub struct Workspace {
75    pub root: PathBuf,
76    pub sandbox: Arc<dyn Sandbox>,
77}
78
79// ─── Agent run ─────────────────────────────────────────────────────────────────
80
81pub struct AgentRun {
82    pub id: RunId,
83    pub identity: Identity,
84    pub brain_policy: BrainPolicy,
85    pub autonomy: AutonomyContract,
86    pub tools: Arc<ToolRegistry>,
87    pub workspace: Workspace,
88}
89
90fn estimate_text_tokens(text: &str) -> u64 {
91    let chars = text.chars().count() as u64;
92    ((chars + 3) / 4).max(1)
93}
94
95fn estimate_content_tokens(blocks: &[ContentBlock]) -> u64 {
96    blocks
97        .iter()
98        .map(|block| match block {
99            ContentBlock::Text { text } => estimate_text_tokens(text),
100            ContentBlock::Image { source } => match source {
101                crate::provider::ImageSource::Base64 { data, .. } => {
102                    256 + estimate_text_tokens(data).min(2_000)
103                }
104                crate::provider::ImageSource::Url { url } => 256 + estimate_text_tokens(url),
105            },
106            ContentBlock::ToolUse { name, input, .. } => {
107                estimate_text_tokens(name) + estimate_text_tokens(&input.to_string())
108            }
109            ContentBlock::ToolResult { content, .. } => 8 + estimate_content_tokens(content),
110            ContentBlock::Reasoning { text } => estimate_text_tokens(text),
111        })
112        .sum()
113}
114
115fn estimate_request_tokens(req: &BrainRequest) -> u64 {
116    let system = req.system.as_deref().map(estimate_text_tokens).unwrap_or(0);
117    let messages: u64 = req
118        .messages
119        .iter()
120        .map(|msg| estimate_text_tokens(&msg.role) + estimate_content_tokens(&msg.content) + 4)
121        .sum();
122    let tools: u64 = req
123        .tools
124        .iter()
125        .map(|tool| {
126            estimate_text_tokens(&tool.name)
127                + estimate_text_tokens(&tool.description)
128                + estimate_text_tokens(&tool.input_schema.to_string())
129        })
130        .sum();
131    system + messages + tools
132}
133
134fn base64_encode(data: &[u8]) -> String {
135    const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
136    let mut out = String::with_capacity(data.len().div_ceil(3) * 4);
137    for chunk in data.chunks(3) {
138        let b0 = chunk[0] as u32;
139        let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 };
140        let b2 = if chunk.len() > 2 { chunk[2] as u32 } else { 0 };
141        let triple = (b0 << 16) | (b1 << 8) | b2;
142        out.push(CHARS[((triple >> 18) & 63) as usize] as char);
143        out.push(CHARS[((triple >> 12) & 63) as usize] as char);
144        out.push(if chunk.len() > 1 {
145            CHARS[((triple >> 6) & 63) as usize] as char
146        } else {
147            '='
148        });
149        out.push(if chunk.len() > 2 {
150            CHARS[(triple & 63) as usize] as char
151        } else {
152            '='
153        });
154    }
155    out
156}
157
158fn image_block_from_path(path: &std::path::Path) -> Option<ContentBlock> {
159    let mime = mime_guess::from_path(path).first_or_octet_stream();
160    if !mime.type_().as_str().eq_ignore_ascii_case("image") {
161        return None;
162    }
163    let data = std::fs::read(path).ok()?;
164    Some(ContentBlock::Image {
165        source: ImageSource::Base64 {
166            media_type: mime.to_string(),
167            data: base64_encode(&data),
168        },
169    })
170}
171
172fn collect_uploaded_paths(description: &str) -> Vec<String> {
173    let mut paths = Vec::new();
174    for line in description.lines() {
175        let Some(idx) = line.find("uploaded:") else {
176            continue;
177        };
178        let rest = line[idx + "uploaded:".len()..].trim();
179        let path = rest
180            .strip_prefix('[')
181            .unwrap_or(rest)
182            .split(']')
183            .next()
184            .unwrap_or(rest)
185            .trim()
186            .trim_matches('"')
187            .trim_matches('\'');
188        if !path.is_empty() {
189            paths.push(path.to_string());
190        }
191    }
192    paths
193}
194
195fn initial_user_content_blocks(
196    workspace_root: &std::path::Path,
197    description: &str,
198) -> Vec<ContentBlock> {
199    let mut blocks = vec![ContentBlock::Text {
200        text: description.to_string(),
201    }];
202    let mut seen = std::collections::HashSet::new();
203    for raw_path in collect_uploaded_paths(description) {
204        let path = std::path::PathBuf::from(&raw_path);
205        let full_path = if path.is_absolute() {
206            path
207        } else {
208            workspace_root.join(path)
209        };
210        if !seen.insert(full_path.clone()) {
211            continue;
212        }
213        if let Some(block) = image_block_from_path(&full_path) {
214            blocks.push(block);
215        }
216    }
217    blocks
218}
219
220pub fn summarize_model_chain(chain_ids: &[String], limit: usize) -> String {
221    if chain_ids.is_empty() {
222        return "aucun modèle disponible".into();
223    }
224    let limit = limit.max(1);
225    let mut visible: Vec<String> = chain_ids.iter().take(limit).cloned().collect();
226    if chain_ids.len() > limit {
227        visible.push(format!("+{} autres fallbacks", chain_ids.len() - limit));
228    }
229    visible.join(" -> ")
230}
231
232fn strip_ui_status_leaks(text: &str) -> String {
233    text.lines()
234        .filter(|line| {
235            let lower = line.to_lowercase();
236            !((lower.contains(" completed ·") && lower.contains('↑') && lower.contains('↓'))
237                || (lower.contains("◌") && lower.contains("consulting"))
238                || (lower.contains("parsing request") && lower.contains("consulting")))
239        })
240        .collect::<Vec<_>>()
241        .join("\n")
242}
243
244fn sanitize_messages_for_provider(messages: &[Msg]) -> Vec<Msg> {
245    messages
246        .iter()
247        .map(|msg| Msg {
248            role: msg.role.clone(),
249            content: msg
250                .content
251                .iter()
252                .filter_map(|block| match block {
253                    ContentBlock::Text { text } => {
254                        let cleaned = strip_ui_status_leaks(text);
255                        if cleaned.trim().is_empty() {
256                            None
257                        } else {
258                            Some(ContentBlock::Text { text: cleaned })
259                        }
260                    }
261                    ContentBlock::Reasoning { text } => Some(ContentBlock::Reasoning {
262                        text: strip_ui_status_leaks(text),
263                    }),
264                    ContentBlock::ToolResult {
265                        tool_use_id,
266                        content,
267                        is_error,
268                    } => Some(ContentBlock::ToolResult {
269                        tool_use_id: tool_use_id.clone(),
270                        content: sanitize_messages_for_provider(&[Msg {
271                            role: "tool".into(),
272                            content: content.clone(),
273                        }])
274                        .into_iter()
275                        .next()
276                        .map(|m| m.content)
277                        .unwrap_or_default(),
278                        is_error: *is_error,
279                    }),
280                    other => Some(other.clone()),
281                })
282                .collect(),
283        })
284        .collect()
285}
286
287fn prompt_cache_key(scope: &str, workspace_root: &std::path::Path, tools: &[ToolSpec]) -> String {
288    use std::hash::{Hash, Hasher};
289
290    let mut hasher = std::collections::hash_map::DefaultHasher::new();
291    scope.hash(&mut hasher);
292    workspace_root.display().to_string().hash(&mut hasher);
293    for tool in tools {
294        tool.name.hash(&mut hasher);
295        tool.description.hash(&mut hasher);
296        tool.input_schema.to_string().hash(&mut hasher);
297    }
298    format!("sparrow-{}-{:016x}", scope, hasher.finish())
299}
300
301// ─── System prompt / SOUL ───────────────────────────────────────────────────────
302
303/// Best-effort snapshot of the workspace's git state — branch, HEAD, and a
304/// dirty-file summary — for injection into the system prompt. Returns None
305/// if the path isn't a git repo or git isn't installed.
306fn read_git_context(workspace_root: &PathBuf) -> Option<String> {
307    use std::process::Command;
308    use std::time::Duration;
309    if !workspace_root.join(".git").exists() {
310        return None;
311    }
312    fn run(workspace_root: &PathBuf, args: &[&str]) -> Option<String> {
313        let mut cmd = Command::new("git");
314        cmd.arg("-C").arg(workspace_root).args(args);
315        let child = cmd
316            .stdout(std::process::Stdio::piped())
317            .stderr(std::process::Stdio::null())
318            .spawn()
319            .ok()?;
320        // 1.5s ceiling per call: a corrupt repo or filesystem hang must not
321        // stall a run (we'd rather silently skip git context than wait).
322        let deadline = std::time::Instant::now() + Duration::from_millis(1_500);
323        let mut child = child;
324        loop {
325            match child.try_wait().ok()? {
326                Some(_) => break,
327                None if std::time::Instant::now() > deadline => {
328                    let _ = child.kill();
329                    return None;
330                }
331                None => std::thread::sleep(Duration::from_millis(20)),
332            }
333        }
334        let output = child.wait_with_output().ok()?;
335        if !output.status.success() {
336            return None;
337        }
338        let s = String::from_utf8(output.stdout).ok()?;
339        Some(s.trim().to_string())
340    }
341
342    let branch = run(workspace_root, &["rev-parse", "--abbrev-ref", "HEAD"])
343        .filter(|b| !b.is_empty())
344        .unwrap_or_else(|| "(detached)".into());
345    let head = run(workspace_root, &["rev-parse", "--short", "HEAD"]).unwrap_or_default();
346    let head_subject = run(workspace_root, &["log", "-1", "--pretty=%s"]).unwrap_or_default();
347    let status_porcelain = run(workspace_root, &["status", "--porcelain"]).unwrap_or_default();
348
349    let mut block = String::from("## Git context\n");
350    block.push_str(&format!("- branch: `{}`\n", branch));
351    if !head.is_empty() {
352        if head_subject.is_empty() {
353            block.push_str(&format!("- HEAD: `{}`\n", head));
354        } else {
355            block.push_str(&format!("- HEAD: `{}` — {}\n", head, head_subject));
356        }
357    }
358    if status_porcelain.is_empty() {
359        block.push_str("- working tree: clean\n");
360    } else {
361        let lines: Vec<&str> = status_porcelain.lines().collect();
362        let shown: Vec<&str> = lines.iter().take(8).copied().collect();
363        block.push_str(&format!("- working tree: {} dirty file(s)\n", lines.len()));
364        for line in shown {
365            block.push_str(&format!("    {}\n", line));
366        }
367        if lines.len() > 8 {
368            block.push_str(&format!("    … {} more\n", lines.len() - 8));
369        }
370    }
371    block.push_str(
372        "\nUse this snapshot to ground answers about \"what changed\" or \
373         \"what branch are we on\" without re-running git. It is the state \
374         at the start of THIS run; if you make file edits, the snapshot \
375         here is stale by the next turn.",
376    );
377    Some(block)
378}
379
380struct SystemPromptInput<'a> {
381    identity: &'a Identity,
382    tier: Option<&'a crate::router::TaskTier>,
383    workspace_root: &'a PathBuf,
384    facts: &'a [Fact],
385    memory_docs: &'a [MemoryDoc],
386    instruction_docs: &'a [InstructionDoc],
387    skills: &'a [crate::capabilities::Skill],
388    skill_catalog: &'a [crate::capabilities::Skill],
389}
390
391fn build_system_prompt(input: SystemPromptInput<'_>) -> String {
392    let identity = input.identity;
393    let tier = input.tier;
394    let workspace_root = input.workspace_root;
395    let facts = input.facts;
396    let memory_docs = input.memory_docs;
397    let instruction_docs = input.instruction_docs;
398    let skills = input.skills;
399    let skill_catalog = input.skill_catalog;
400    let lean_prompt = matches!(
401        tier,
402        Some(crate::router::TaskTier::Trivial | crate::router::TaskTier::Small)
403    );
404    let mut parts = vec![format!(
405        r#"You are {name}, a {role}.
406
407Personality: {personality}
408
409You are working in the workspace: {workspace}
410You have access to tools to read, write, edit, search, and execute code.
411Always use absolute or relative paths from the workspace root.
412Be concise and direct. When making edits, use exact string replacements.
413Before making changes, read the relevant files first to understand the codebase.
414
415You are not a standalone chat model. You are the Sparrow agent surface backed by an
416external routing engine. Sparrow's core feature is automatic model routing: every
417task is classified by tier, tool need, vision need, local preference, budget, and
418provider availability, then a ranked fallback chain of models is selected before
419this answer starts. If the user asks how routing works, explain Sparrow's actual
420pipeline and the active route for the current run. Never claim that no routing
421exists just because the current brain is a single selected model.
422
423## When to spawn sub-agents (proactively)
424You have a `subagent_spawn` tool. Use it on your own initiative — do not wait for
425the user to ask — whenever the request contains independent sub-problems that can
426run in parallel, or a long-running step that would block the main flow:
427- multi-file refactors across unrelated modules (one subagent per module)
428- "implement X, then test it" → spawn a verifier subagent in parallel
429- research a library/API while you scaffold code locally
430- audit-style requests with several independent checks
431- any plan with 3+ distinct, separable work items
432
433For trivial single-step tasks (one read, one edit, one question) stay solo —
434spawning is overhead, not a goal. Announce sub-agents you spawn so the user sees
435them in the swarm cockpit.
436
437## Files you create are real
438When you write or edit a file with `fs_write`, `edit`, or `multi_edit`, the file
439is persisted on disk and shows up in the Artifacts panel. You can read it back
440in the same run with `fs_read`. There is no separate sandbox — the workspace is
441the user's actual filesystem.
442"#,
443        name = identity.name,
444        role = identity.role,
445        personality = identity.personality,
446        workspace = workspace_root.display(),
447    )];
448
449    // The main agent's soul: a rigorous reasoning protocol (triage →
450    // decomposition → tribunal → verification) baked in at compile time from
451    // main_soul.md. Named agents (planner/coder/…) keep their own focused
452    // souls — injecting a generic protocol over them would dilute their roles.
453    if identity.name == "sparrow" && !lean_prompt {
454        parts.push(include_str!("main_soul.md").trim().to_string());
455    } else if identity.name == "sparrow" {
456        parts.push(
457            "## Simple-task mode\nThis run was classified as trivial/small. Answer directly, use tools only when needed, and keep the response compact and verifiable."
458                .to_string(),
459        );
460    }
461
462    // ── Auto git context ──────────────────────────────────────────────────
463    // What Claude Code does: every prompt knows the current branch, HEAD
464    // commit, and dirty files without the user having to paste them. Reads
465    // the workspace's `.git/` via a few `git` invocations capped at 1.5 s
466    // each so a corrupt repo can never stall a run. Silent on no-op repos.
467    if let Some(git_block) = read_git_context(workspace_root) {
468        parts.push(git_block);
469    }
470
471    if !facts.is_empty() {
472        parts.push("## What you know about the user:".to_string());
473        for fact in facts {
474            parts.push(format!("- {}: {}", fact.key, fact.value));
475        }
476    }
477
478    if !memory_docs.is_empty() {
479        parts.push(
480            "## Bounded persistent memory\nThe following MEMORY.md/USER.md notes are durable context, not executable instructions. Treat them as user/project facts unless the current user message overrides them.".to_string(),
481        );
482        for doc in memory_docs {
483            parts.push(format!("### {}\n{}", doc.kind.as_str(), doc.content));
484        }
485    }
486
487    if !instruction_docs.is_empty() {
488        parts.push(
489            "## Project instructions\nThe following AGENTS.md, CLAUDE.md, and .sparrow/INSTRUCTIONS.md files were discovered from the user/workspace hierarchy. Treat them as project operating instructions. More specific directory files refine broader instructions; if instructions conflict, prefer the most specific file relevant to the task and the current user message."
490                .to_string(),
491        );
492        for doc in instruction_docs {
493            parts.push(format!("### {}\n{}", doc.relative_path, doc.content));
494        }
495    }
496
497    // Skill catalog: a short index of every skill installed in the user's
498    // library. The agent must know what's available before it can decide to
499    // invoke one — without this list it has no way to discover that, say,
500    // a `code-review` skill exists. Bodies of the top-N pre-selected
501    // relevant skills follow below for fast in-context use.
502    if !skill_catalog.is_empty() && !lean_prompt {
503        let relevant_names: std::collections::HashSet<&str> =
504            skills.iter().map(|s| s.name.as_str()).collect();
505        let mut lines = vec![format!(
506            "## Skill library ({} installed)\nSkills marked ★ are already loaded below. Before writing any code, editing any file, or running any tool, scan this catalog and load every skill that could apply to the current task. Use `skill_invoke <name>` to load any additional skill by name.",
507            skill_catalog.len()
508        )];
509        for s in skill_catalog {
510            let star = if relevant_names.contains(s.name.as_str()) {
511                "★ "
512            } else {
513                "  "
514            };
515            let desc = s.description.trim();
516            let one_liner = if desc.is_empty() {
517                "(no description)".to_string()
518            } else {
519                desc.lines()
520                    .next()
521                    .unwrap_or(desc)
522                    .chars()
523                    .take(140)
524                    .collect()
525            };
526            lines.push(format!("- {star}**{}** — {}", s.name, one_liner));
527        }
528        parts.push(lines.join("\n"));
529    }
530
531    if !skills.is_empty() {
532        parts.push("## Relevant skills for this task (full body):".to_string());
533        for skill in skills {
534            parts.push(format!("### {}\n{}", skill.name, skill.body));
535        }
536    }
537
538    parts.join("\n\n")
539}
540
541fn tool_result_text(blocks: &[Block]) -> String {
542    let mut out = Vec::new();
543    for block in blocks {
544        match block {
545            Block::Text(text) => out.push(text.clone()),
546            Block::Json(value) => out.push(value.to_string()),
547            Block::Image { mime, data } => {
548                out.push(format!("[image: {}, {} bytes]", mime, data.len()));
549            }
550            Block::Diff { file, patch } => out.push(format!("diff for {}\n{}", file, patch)),
551        }
552    }
553    out.join("\n")
554}
555
556fn humanize_tool_action(tool_name: &str, args: &serde_json::Value) -> String {
557    let path = args
558        .get("path")
559        .or_else(|| args.get("file_path"))
560        .and_then(|v| v.as_str());
561    match (tool_name, path) {
562        ("fs_write", Some(path)) => format!("Sparrow veut créer ou remplacer `{path}`."),
563        ("edit" | "multi_edit", Some(path)) => format!("Sparrow veut modifier `{path}`."),
564        ("fs_read", Some(path)) => format!("Sparrow veut lire `{path}`."),
565        ("exec", _) => "Sparrow veut exécuter une commande.".to_string(),
566        (name, Some(path)) => format!("Sparrow veut lancer `{name}` sur `{path}`."),
567        (name, None) => format!("Sparrow veut lancer `{name}`."),
568    }
569}
570
571fn tool_result_content_blocks(blocks: &[Block]) -> Vec<ContentBlock> {
572    let mut out = Vec::new();
573    let text = tool_result_text(blocks);
574    if !text.trim().is_empty() {
575        out.push(ContentBlock::Text { text });
576    }
577    for block in blocks {
578        if let Block::Image { data, mime } = block {
579            out.push(ContentBlock::Image {
580                source: ImageSource::Base64 {
581                    media_type: mime.clone(),
582                    data: base64_encode(data),
583                },
584            });
585        }
586    }
587    out
588}
589
590/// Reconstruct an Event view from a finished conversation so the Distiller can
591/// mine durable facts (tool paths/content + reasoning). ToolUse blocks carry the
592/// real, parsed tool arguments; Text blocks carry assistant reasoning.
593fn events_from_messages(run_id: &RunId, messages: &[Msg]) -> Vec<Event> {
594    let mut events = Vec::new();
595    for msg in messages {
596        for block in &msg.content {
597            match block {
598                ContentBlock::ToolUse { name, input, .. } => {
599                    events.push(Event::ToolUseProposed {
600                        run: run_id.clone(),
601                        id: String::new(),
602                        name: name.clone(),
603                        args: input.clone(),
604                        risk: RiskLevel::ReadOnly,
605                    });
606                }
607                ContentBlock::Text { text } if msg.role == "assistant" => {
608                    events.push(Event::ThinkingDelta {
609                        run: run_id.clone(),
610                        text: text.clone(),
611                    });
612                }
613                _ => {}
614            }
615        }
616    }
617    events
618}
619
620// ─── Task ───────────────────────────────────────────────────────────────────────
621
622#[derive(Debug, Clone)]
623pub struct Task {
624    pub description: String,
625    pub context: Vec<Msg>,
626}
627
628/// Pre-run estimate: what the router WOULD do and roughly what it would cost.
629/// All figures are estimates priced at the primary model's list price.
630#[derive(Debug, Clone)]
631pub struct Preflight {
632    pub tier: TaskTier,
633    pub chain: Vec<String>,
634    pub est_input_range: (u64, u64),
635    pub est_output_range: (u64, u64),
636    pub est_cost_range: (f64, f64),
637}
638
639// ─── THE ENGINE ─────────────────────────────────────────────────────────────────
640
641pub struct Engine {
642    router: Arc<dyn Router>,
643    config: Config,
644    identity: Option<Identity>,
645    memory: Option<Arc<dyn Memory>>,
646    skills: Option<Arc<dyn SkillLibrary>>,
647    redaction: RedactionFilter,
648    approval_handler: Option<Arc<dyn ApprovalHandler>>,
649    reasoning: ReasoningEngine,
650    hooks: HookRegistry,
651    agent_store: Option<Arc<dyn AgentStore>>,
652    org_policy: Option<crate::onboarding::enterprise::OrgPolicy>,
653    /// Task description hash → TaskTier cache for classify_via_brain dedup
654    classify_cache: std::sync::Mutex<std::collections::HashMap<u64, crate::router::TaskTier>>,
655}
656
657#[derive(Debug, Clone)]
658pub struct ApprovalRequest {
659    pub run: RunId,
660    pub id: String,
661    pub tool_name: String,
662    pub risk: RiskLevel,
663    pub args: serde_json::Value,
664    pub summary: String,
665}
666
667#[async_trait]
668pub trait ApprovalHandler: Send + Sync {
669    async fn request_approval(&self, request: ApprovalRequest) -> Decision;
670}
671
672impl Engine {
673    pub fn new(router: Arc<dyn Router>, config: Config) -> Self {
674        let mut hooks = HookRegistry::new(Arc::new(crate::sandbox::LocalSandbox::new(
675            std::env::current_dir().unwrap_or_default(),
676        )));
677        hooks.load(config.hooks.clone());
678        Self {
679            router,
680            config,
681            identity: None,
682            memory: None,
683            skills: None,
684            redaction: RedactionFilter::new(),
685            approval_handler: None,
686            reasoning: ReasoningEngine::default(),
687            hooks,
688            agent_store: None,
689            org_policy: None,
690            classify_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
691        }
692    }
693
694    pub fn with_memory(mut self, memory: Arc<dyn Memory>) -> Self {
695        // Load secrets for redaction
696        let secrets: Vec<String> = memory
697            .all_facts()
698            .iter()
699            .filter(|f| f.key.starts_with("secret:"))
700            .map(|f| f.value.clone())
701            .collect();
702        self.redaction.load_secrets(secrets);
703        self.memory = Some(memory);
704        self
705    }
706
707    pub fn with_skills(mut self, skills: Arc<dyn SkillLibrary>) -> Self {
708        self.skills = Some(skills);
709        self
710    }
711
712    pub fn with_identity(mut self, identity: Identity) -> Self {
713        self.identity = Some(identity);
714        self
715    }
716
717    pub fn with_agent_store(mut self, store: Arc<dyn AgentStore>) -> Self {
718        self.agent_store = Some(store);
719        self
720    }
721
722    pub fn with_org_policy(mut self, policy: crate::onboarding::enterprise::OrgPolicy) -> Self {
723        self.org_policy = Some(policy);
724        self
725    }
726
727    pub fn with_hooks_config(mut self, hooks: Vec<crate::hooks::Hook>) -> Self {
728        self.hooks.load(hooks);
729        self
730    }
731
732    pub fn with_approval_handler(mut self, approval_handler: Arc<dyn ApprovalHandler>) -> Self {
733        self.approval_handler = Some(approval_handler);
734        self
735    }
736
737    /// Heuristic classification + a confidence flag.
738    /// Returns `(tier, ambiguous)`. `ambiguous == true` means no semantic keyword
739    /// matched and the tier was guessed purely from length — a good signal that a
740    /// tiny model call could do better (§3.6).
741    fn classify_with_confidence(&self, task: &str) -> (TaskTier, bool) {
742        let lower = task.to_lowercase();
743        if lower.contains("vision") || lower.contains("image") || lower.contains("screenshot") {
744            (TaskTier::Vision, false)
745        } else if lower.contains("architecture")
746            || lower.contains("refactor")
747            || lower.contains("audit")
748            || lower.contains("répare")
749            || lower.contains("repare")
750            || lower.contains("livrer")
751            || lower.contains("v1")
752        {
753            (TaskTier::Hard, false)
754        } else if lower.contains("bug")
755            || lower.contains("fix")
756            || lower.contains("corrige")
757            || lower.contains("debug")
758        {
759            (TaskTier::Small, false)
760        } else if lower.contains("routing")
761            || lower.contains("routeur")
762            || lower.contains("modèle")
763            || lower.contains("modele")
764            || lower.contains("model")
765            || lower.contains("sélectionne")
766            || lower.contains("selectionne")
767        {
768            (TaskTier::Small, false)
769        } else if lower.len() < 80 {
770            // length-only guess → ambiguous
771            (TaskTier::Trivial, true)
772        } else {
773            (TaskTier::Medium, true)
774        }
775    }
776
777    /// Ask a cheap brain to classify an ambiguous task into a tier (§3.6).
778    /// Bounded to a 10-token completion; failures fall back to the heuristic tier.
779    async fn classify_via_brain(&self, task: &str, brain: &dyn Brain) -> Option<TaskTier> {
780        let req = BrainRequest {
781            system: Some(
782                "You are a task classifier. Output exactly one word: trivial, small, medium, hard, or vision."
783                    .into(),
784            ),
785            messages: vec![Msg {
786                role: "user".into(),
787                content: vec![ContentBlock::Text {
788                    text: format!(
789                        "Classify this coding task into exactly one tier (trivial, small, medium, hard, vision):\n\n{}\n\nTier:",
790                        task
791                    ),
792                }],
793            }],
794            tools: vec![],
795            max_tokens: 6,
796            temperature: 0.0,
797            stop: vec![],
798            cache: PromptCacheConfig::disabled(),
799        };
800        let mut stream = brain.complete(req).await.ok()?;
801        let mut out = String::new();
802        while let Some(ev) = stream.next().await {
803            match ev {
804                BrainEvent::TextDelta(t) => out.push_str(&t),
805                BrainEvent::Done(_) => break,
806                BrainEvent::Error(_) => return None,
807                _ => {}
808            }
809        }
810        let word = out.trim().to_lowercase();
811        let word = word.split_whitespace().next().unwrap_or("");
812        match word {
813            "trivial" => Some(TaskTier::Trivial),
814            "small" => Some(TaskTier::Small),
815            "medium" => Some(TaskTier::Medium),
816            "hard" => Some(TaskTier::Hard),
817            "vision" => Some(TaskTier::Vision),
818            _ => None,
819        }
820    }
821
822    fn task_summary(&self, task: &str, tier: &TaskTier) -> String {
823        let lower = task.to_lowercase();
824        if lower.contains("routing")
825            || lower.contains("routeur")
826            || lower.contains("modèle")
827            || lower.contains("modele")
828            || lower.contains("model")
829        {
830            "question meta sur le routing modele".into()
831        } else if lower.contains("code") || lower.contains("bug") || lower.contains("fix") {
832            format!("requete code/{:?}", tier).to_lowercase()
833        } else if lower.contains("config") || lower.contains("provider") {
834            "configuration provider/modele".into()
835        } else {
836            format!("requete {:?}", tier).to_lowercase()
837        }
838    }
839
840    fn is_routing_question(&self, task: &str) -> bool {
841        let lower = task.to_lowercase();
842        (lower.contains("routing") || lower.contains("routeur") || lower.contains("route"))
843            && (lower.contains("modèle") || lower.contains("modele") || lower.contains("model"))
844            || lower.contains("sélectionne tu le model")
845            || lower.contains("selectionne tu le model")
846    }
847
848    fn requires_tools(&self, task: &str, tier: &TaskTier) -> bool {
849        let lower = task.to_lowercase();
850        let tool_keywords = [
851            "outil",
852            "tools",
853            "fichier",
854            "file",
855            "readme",
856            ".rs",
857            ".ts",
858            ".js",
859            ".html",
860            ".md",
861            "repo",
862            "dossier",
863            "workspace",
864            "git",
865            "test",
866            "build",
867            "cargo",
868            "npm",
869            "pnpm",
870            "corrige",
871            "fix",
872            "debug",
873            "bug",
874            "répare",
875            "repare",
876            "modifie",
877            "édite",
878            "edite",
879            "ajoute",
880            "supprime",
881            "écris",
882            "ecris",
883            "write",
884            "create",
885            "crée",
886            "cree",
887            "audit",
888        ];
889
890        if tool_keywords.iter().any(|kw| lower.contains(kw)) {
891            return true;
892        }
893
894        matches!(tier, TaskTier::Medium | TaskTier::Hard | TaskTier::Vision)
895    }
896
897    fn requires_vision(&self, task: &str, tier: &TaskTier) -> bool {
898        let lower = task.to_lowercase();
899        matches!(tier, TaskTier::Vision)
900            || [
901                "image",
902                "screenshot",
903                "capture",
904                "photo",
905                "vision",
906                "logo",
907                "visuel",
908                "interface graphique",
909            ]
910            .iter()
911            .any(|kw| lower.contains(kw))
912    }
913
914    fn routing_explanation(
915        &self,
916        tier: &TaskTier,
917        need: &crate::router::RoutingNeed,
918        chain_ids: &[String],
919    ) -> String {
920        let chain = summarize_model_chain(chain_ids, 5);
921        format!(
922            "Je suis Sparrow, donc je ne réponds pas comme un modèle isolé: avant chaque run, mon routeur classe ta demande puis choisit une chaîne de modèles.\n\nPour cette requête, j'ai détecté: tier `{}` · tools `{}` · vision `{}` · local `{}`.\n\nJe sélectionne ensuite le modèle avec ces critères: adéquation aux capacités demandées, support des tools, besoin vision, préférence local/free-first, budget restant, latence, taille de contexte, puis disponibilité provider. Le résultat est une fallback chain, pas un seul choix figé: `{}`.\n\nConcrètement: une question simple ou meta doit aller vers le modèle le moins coûteux capable de répondre; une tâche code complexe monte vers un modèle plus fort; une tâche avec fichiers/tools exige un modèle compatible tools; une tâche image demande vision; si un provider échoue, je bascule au suivant dans la chaîne.",
923            tier.as_str(),
924            need.required_tools,
925            need.required_vision,
926            need.prefer_local,
927            chain
928        )
929    }
930
931    /// Summarize a slice of dropped conversation messages into ~200 tokens so
932    /// compaction preserves continuity instead of just truncating (§3.7).
933    async fn summarize_messages(&self, brain: &dyn Brain, middle: &[Msg]) -> Option<String> {
934        if middle.is_empty() {
935            return None;
936        }
937        // Flatten the middle into a compact transcript for the summarizer.
938        let mut transcript = String::new();
939        for m in middle {
940            for block in &m.content {
941                match block {
942                    ContentBlock::Text { text } => {
943                        transcript.push_str(&format!("[{}] {}\n", m.role, text));
944                    }
945                    ContentBlock::ToolUse { name, .. } => {
946                        transcript.push_str(&format!("[{}] (tool: {})\n", m.role, name));
947                    }
948                    ContentBlock::ToolResult { .. } => {
949                        transcript.push_str(&format!("[{}] (tool result)\n", m.role));
950                    }
951                    _ => {}
952                }
953            }
954        }
955        if transcript.len() > 12_000 {
956            transcript.truncate(12_000);
957        }
958        let req = BrainRequest {
959            system: Some(
960                "Summarize this agent conversation in <=200 tokens. Preserve: files edited, \
961                 decisions made, current state, and any unfinished work. Plain text only."
962                    .into(),
963            ),
964            messages: vec![Msg {
965                role: "user".into(),
966                content: vec![ContentBlock::Text { text: transcript }],
967            }],
968            tools: vec![],
969            max_tokens: 300,
970            temperature: 0.0,
971            stop: vec![],
972            cache: PromptCacheConfig::disabled(),
973        };
974        let mut stream = brain.complete(req).await.ok()?;
975        let mut out = String::new();
976        while let Some(ev) = stream.next().await {
977            match ev {
978                BrainEvent::TextDelta(t) => out.push_str(&t),
979                BrainEvent::Done(_) => break,
980                BrainEvent::Error(_) => return None,
981                _ => {}
982            }
983        }
984        let out = out.trim().to_string();
985        if out.is_empty() { None } else { Some(out) }
986    }
987
988    /// Estimate what a task will cost BEFORE running it: classified tier,
989    /// selected chain, and a token/cost range priced at the primary model.
990    /// Everything here is an estimate — surfaces must label it as such.
991    pub fn preflight(&self, task_desc: &str) -> Preflight {
992        let (tier, _ambiguous) = self.classify_with_confidence(task_desc);
993        let need = crate::router::RoutingNeed {
994            tier: tier.clone(),
995            required_tools: self.requires_tools(task_desc, &tier),
996            required_vision: self.requires_vision(task_desc, &tier),
997            prefer_local: false,
998        };
999        let budget = BudgetState {
1000            daily_limit_usd: self.config.budget.daily_usd,
1001            daily_spent_usd: 0.0,
1002            session_limit_usd: self.config.budget.session_usd,
1003            session_spent_usd: 0.0,
1004        };
1005        let chain = self.router.select(&need, &budget);
1006        // Token envelopes per tier, from observed run shapes (rough by design).
1007        let (in_lo, in_hi, out_lo, out_hi): (u64, u64, u64, u64) = match tier {
1008            TaskTier::Trivial => (800, 4_000, 100, 1_000),
1009            TaskTier::Small => (3_000, 12_000, 500, 3_000),
1010            TaskTier::Medium => (8_000, 40_000, 2_000, 10_000),
1011            TaskTier::Hard => (25_000, 120_000, 5_000, 25_000),
1012            TaskTier::Vision => (8_000, 40_000, 2_000, 8_000),
1013        };
1014        let price = chain.first().map(|b| b.caps());
1015        let cost = |tin: u64, tout: u64| -> f64 {
1016            price
1017                .as_ref()
1018                .map(|c| {
1019                    tin as f64 * c.cost_input_per_mtok / 1_000_000.0
1020                        + tout as f64 * c.cost_output_per_mtok / 1_000_000.0
1021                })
1022                .unwrap_or(0.0)
1023        };
1024        Preflight {
1025            tier,
1026            chain: chain.iter().map(|b| b.id().to_string()).collect(),
1027            est_input_range: (in_lo, in_hi),
1028            est_output_range: (out_lo, out_hi),
1029            est_cost_range: (cost(in_lo, out_lo), cost(in_hi, out_hi)),
1030        }
1031    }
1032
1033    /// Drive one AgentRun to completion.
1034    pub async fn drive(
1035        &self,
1036        task: Task,
1037        event_tx: mpsc::UnboundedSender<Event>,
1038    ) -> anyhow::Result<OutcomeSummary> {
1039        self.drive_with_run_id(task, event_tx, RunId::new()).await
1040    }
1041
1042    /// Drive with a caller-provided run id.
1043    pub async fn drive_with_run_id(
1044        &self,
1045        task: Task,
1046        event_tx: mpsc::UnboundedSender<Event>,
1047        run_id: RunId,
1048    ) -> anyhow::Result<OutcomeSummary> {
1049        self.drive_with_inject(task, event_tx, run_id, None).await
1050    }
1051
1052    /// Drive with an optional `inject_rx` channel that lets the caller inject
1053    /// user messages mid-run. Polled non-blocking between turns. (§3.7)
1054    pub async fn drive_with_inject(
1055        &self,
1056        task: Task,
1057        event_tx: mpsc::UnboundedSender<Event>,
1058        run_id: RunId,
1059        mut inject_rx: Option<mpsc::UnboundedReceiver<String>>,
1060    ) -> anyhow::Result<OutcomeSummary> {
1061        // Parse and strip optional __model:X__ override prefix injected by the WebView.
1062        let model_override: Option<String>;
1063        let clean_description: String;
1064        if let Some(rest) = task.description.strip_prefix("__model:") {
1065            if let Some(end) = rest.find("__ ") {
1066                model_override = Some(rest[..end].to_string());
1067                clean_description = rest[end + 3..].to_string();
1068            } else {
1069                model_override = None;
1070                clean_description = task.description.clone();
1071            }
1072        } else {
1073            model_override = None;
1074            clean_description = task.description.clone();
1075        }
1076        let task = Task {
1077            description: clean_description,
1078            context: task.context,
1079        };
1080
1081        let mut messages: Vec<Msg> = task.context.clone();
1082
1083        // Classify task (heuristic first)
1084        let (mut tier, ambiguous) = self.classify_with_confidence(&task.description);
1085
1086        // Route: select brain chain
1087        let budget = BudgetState {
1088            daily_limit_usd: self.config.budget.daily_usd,
1089            daily_spent_usd: 0.0,
1090            session_limit_usd: self.config.budget.session_usd,
1091            session_spent_usd: 0.0,
1092        };
1093
1094        let mut required_tools = self.requires_tools(&task.description, &tier);
1095        let mut required_vision = self.requires_vision(&task.description, &tier);
1096        let mut need = crate::router::RoutingNeed {
1097            tier: tier.clone(),
1098            required_tools,
1099            required_vision,
1100            prefer_local: false,
1101        };
1102
1103        let mut chain = self.router.select(&need, &budget);
1104
1105        // Apply WebView model override:
1106        //  1) Keep the brain in the chain if found there.
1107        //  2) Otherwise, look it up directly via the router — the user explicitly
1108        //     picked it, so we honour it even if the tier-based selection didn't
1109        //     include it.
1110        //  3) This must be re-applied after any chain mutation (e.g. §3.6
1111        //     refinement) or the auto-router silently overrides the manual pick.
1112        let router_ref = &self.router;
1113        let apply_override = |chain: &mut Vec<Arc<dyn Brain>>| {
1114            if let Some(ref override_id) = model_override {
1115                let filtered: Vec<_> = chain
1116                    .iter()
1117                    .filter(|b| b.id() == override_id.as_str())
1118                    .cloned()
1119                    .collect();
1120                if !filtered.is_empty() {
1121                    *chain = filtered;
1122                } else if let Some(brain) = router_ref.find_brain_by_id(override_id) {
1123                    *chain = vec![brain];
1124                }
1125            }
1126        };
1127        apply_override(&mut chain);
1128
1129        // §3.6: model-assisted refinement for genuinely ambiguous tasks. Only the
1130        // length-based Medium guess qualifies — short tasks stay Trivial without
1131        // the extra round-trip, keeping the common path fast. Uses the cheapest
1132        // already-selected brain, bounded to a 6-token call.
1133        //
1134        // Skip refinement entirely when the user has pinned a specific model:
1135        // the whole point of the manual pick is to bypass the router's judgment.
1136        if model_override.is_none()
1137            && ambiguous
1138            && matches!(tier, TaskTier::Medium)
1139            && !self.is_routing_question(&task.description)
1140        {
1141            // Dedup: cache task hash → refined tier so identical tasks skip the LLM call.
1142            let desc_hash = {
1143                use std::collections::hash_map::DefaultHasher;
1144                use std::hash::{Hash, Hasher};
1145                let mut h = DefaultHasher::new();
1146                task.description.hash(&mut h);
1147                h.finish()
1148            };
1149            let cached = {
1150                self.classify_cache
1151                    .lock()
1152                    .ok()
1153                    .and_then(|c| c.get(&desc_hash).cloned())
1154            };
1155            let refined = match cached {
1156                Some(t) => {
1157                    let _ = event_tx.send(Event::Message {
1158                        run: run_id.clone(),
1159                        role: "router".into(),
1160                        text: format!("classification (cached): {}", t.as_str()),
1161                    });
1162                    Some(t)
1163                }
1164                None => {
1165                    if let Some(brain) = chain.first().cloned() {
1166                        let result = self
1167                            .classify_via_brain(&task.description, brain.as_ref())
1168                            .await;
1169                        if let Some(r) = &result {
1170                            if let Ok(mut c) = self.classify_cache.lock() {
1171                                c.insert(desc_hash, r.clone());
1172                            }
1173                        }
1174                        result
1175                    } else {
1176                        None
1177                    }
1178                }
1179            };
1180            if let Some(refined) = refined {
1181                if std::mem::discriminant(&refined) != std::mem::discriminant(&tier) {
1182                    let _ = event_tx.send(Event::Message {
1183                        run: run_id.clone(),
1184                        role: "router".into(),
1185                        text: format!(
1186                            "classification affinée par modèle: {} → {}",
1187                            tier.as_str(),
1188                            refined.as_str()
1189                        ),
1190                    });
1191                    tier = refined;
1192                    required_tools = self.requires_tools(&task.description, &tier);
1193                    required_vision = self.requires_vision(&task.description, &tier);
1194                    need = crate::router::RoutingNeed {
1195                        tier: tier.clone(),
1196                        required_tools,
1197                        required_vision,
1198                        prefer_local: false,
1199                    };
1200                    chain = self.router.select(&need, &budget);
1201                    // Re-apply manual override after the chain mutation.
1202                    apply_override(&mut chain);
1203                }
1204            }
1205        }
1206
1207        // ── Per-repo routing memory ────────────────────────────────────────
1208        // Outcomes are recorded under the CLASSIFIED tier (pre-bump), so the
1209        // system self-corrects: if bumping makes "small" tasks verify cleanly,
1210        // small's stats recover and the bump switches itself off again.
1211        let routing_memory_root =
1212            std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from("."));
1213        let mut repo_routing =
1214            crate::router::learned::RepoRoutingMemory::load(&routing_memory_root);
1215        let classified_tier = tier.clone();
1216        if let Some(bumped) = repo_routing.suggest_bump(&tier) {
1217            let _ = event_tx.send(Event::Message {
1218                run: run_id.clone(),
1219                role: "router".into(),
1220                text: format!(
1221                    "routing memory: {} tasks in this repo mostly needed escalation — starting at {}",
1222                    tier.as_str(),
1223                    bumped.as_str()
1224                ),
1225            });
1226            tier = bumped;
1227            required_tools = self.requires_tools(&task.description, &tier);
1228            required_vision = self.requires_vision(&task.description, &tier);
1229            need = crate::router::RoutingNeed {
1230                tier: tier.clone(),
1231                required_tools,
1232                required_vision,
1233                prefer_local: false,
1234            };
1235            chain = self.router.select(&need, &budget);
1236            apply_override(&mut chain);
1237        }
1238
1239        let task_summary = self.task_summary(&task.description, &tier);
1240        let chain_ids: Vec<String> = chain.iter().map(|b| b.id().to_string()).collect();
1241
1242        let agent_name = self
1243            .identity
1244            .as_ref()
1245            .map(|identity| identity.name.clone())
1246            .unwrap_or_else(|| "sparrow".into());
1247        let _ = event_tx.send(Event::RunStarted {
1248            run: run_id.clone(),
1249            task: task.description.clone(),
1250            agent: agent_name,
1251        });
1252
1253        // PreRun lifecycle hook. Allows operators to gate run start (blocking
1254        // hooks can veto by exiting non-zero), warm caches, etc.
1255        let pre_run_results = self
1256            .hooks
1257            .execute(&HookEvent::PreRun, &task.description)
1258            .await;
1259        if let Some(reason) = pre_run_results
1260            .iter()
1261            .find(|r| r.veto)
1262            .and_then(|r| r.veto_reason.clone())
1263        {
1264            let _ = event_tx.send(Event::Error {
1265                run: run_id.clone(),
1266                message: format!("PreRun hook vetoed run: {}", reason),
1267            });
1268            anyhow::bail!("PreRun hook vetoed run: {}", reason);
1269        }
1270
1271        // F7: a single, clear router line — no "requete: requete …" doubling,
1272        // no franglais. Booleans become plain on/off words.
1273        let yn = |b: bool| if b { "oui" } else { "non" };
1274        let _ = event_tx.send(Event::Message {
1275            run: run_id.clone(),
1276            role: "router".into(),
1277            text: format!(
1278                "tâche classée : {} · outils : {} · vision : {} · local : {}",
1279                tier.as_str(),
1280                yn(need.required_tools),
1281                yn(need.required_vision),
1282                yn(need.prefer_local)
1283            ),
1284        });
1285        let _ = &task_summary; // kept for potential telemetry; no longer shown raw
1286
1287        // F1: this is the router selecting a model chain — frame it honestly as
1288        // routing, not as a "planner" agent deliberating over candidates.
1289        let _ = event_tx.send(Event::AgentStatus {
1290            run: run_id.clone(),
1291            role: "planner".into(),
1292            status: AgentStatus::Working,
1293            note: format!("routage · {} modèles dans la chaîne", chain.len()),
1294        });
1295
1296        let primary_ctx = chain
1297            .first()
1298            .map(|b| b.caps().context_window)
1299            .unwrap_or(128_000);
1300        let _ = event_tx.send(Event::RouteSelected {
1301            run: run_id.clone(),
1302            chain: chain_ids.clone(),
1303            context_window: primary_ctx,
1304        });
1305        let _ = event_tx.send(Event::AgentStatus {
1306            run: run_id.clone(),
1307            role: "planner".into(),
1308            status: AgentStatus::Done,
1309            note: format!(
1310                "route set · {} primary",
1311                chain.first().map(|b| b.id()).unwrap_or("—")
1312            ),
1313        });
1314
1315        if chain.is_empty() {
1316            let _ = event_tx.send(Event::Error {
1317                run: run_id.clone(),
1318                message: "No available models (budget exhausted or no providers configured)".into(),
1319            });
1320            return Ok(OutcomeSummary {
1321                status: "error: no models".into(),
1322                diffs: vec![],
1323                cost_usd: 0.0,
1324                tokens: TokenUsage {
1325                    input: 0,
1326                    output: 0,
1327                },
1328                cost_comparison: String::new(),
1329                duration_ms: None,
1330            });
1331        }
1332
1333        if self.is_routing_question(&task.description) {
1334            let text = self.routing_explanation(&tier, &need, &chain_ids);
1335            let input_tokens =
1336                estimate_text_tokens(&task.description) + estimate_text_tokens(&task_summary);
1337            let output_tokens = estimate_text_tokens(&text);
1338            let _ = event_tx.send(Event::TokenUsageEstimated {
1339                run: run_id.clone(),
1340                input: input_tokens,
1341                output: 0,
1342                reason: "router meta request estimate".into(),
1343            });
1344            let _ = event_tx.send(Event::TokenUsageEstimated {
1345                run: run_id.clone(),
1346                input: 0,
1347                output: output_tokens,
1348                reason: "router meta response estimate".into(),
1349            });
1350            let _ = event_tx.send(Event::ThinkingDelta {
1351                run: run_id.clone(),
1352                text: text.clone(),
1353            });
1354            let outcome = OutcomeSummary {
1355                status: "completed".into(),
1356                diffs: vec![],
1357                cost_usd: 0.0,
1358                tokens: TokenUsage {
1359                    input: input_tokens,
1360                    output: output_tokens,
1361                },
1362                cost_comparison: String::new(),
1363                duration_ms: None,
1364            };
1365            let _ = event_tx.send(Event::RunFinished {
1366                run: run_id.clone(),
1367                outcome: outcome.clone(),
1368            });
1369            return Ok(outcome);
1370        }
1371
1372        // Build tools and workspace
1373        let workspace_root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1374        let sandbox: Arc<dyn Sandbox> = match self.config.defaults.sandbox.as_str() {
1375            "local-hardened" => Arc::new(crate::sandbox::LocalSandbox::hardened(
1376                workspace_root.clone(),
1377            )),
1378            "docker" => Arc::new(crate::sandbox::backends::DockerSandbox::new(
1379                workspace_root.clone(),
1380                "ubuntu:latest",
1381            )),
1382            s if s.starts_with("ssh:") => Arc::new(crate::sandbox::backends::SshSandbox::new(
1383                workspace_root.clone(),
1384                s.trim_start_matches("ssh:"),
1385            )),
1386            "modal" => Arc::new(crate::sandbox::backends::ModalSandbox::new(
1387                workspace_root.clone(),
1388            )),
1389            "daytona" => Arc::new(crate::sandbox::backends::DaytonaSandbox::new(
1390                workspace_root.clone(),
1391            )),
1392            "vercel" => Arc::new(crate::sandbox::backends::VercelSandbox::new(
1393                workspace_root.clone(),
1394            )),
1395            "singularity" => Arc::new(crate::sandbox::backends::SingularitySandbox::new(
1396                workspace_root.clone(),
1397            )),
1398            _ => Arc::new(crate::sandbox::LocalSandbox::new(workspace_root.clone())),
1399        };
1400
1401        let mut registry = ToolRegistry::new();
1402        registry.register(Arc::new(crate::tools::fs::FsRead));
1403        registry.register(Arc::new(crate::tools::fs::FsList));
1404        registry.register(Arc::new(crate::tools::fs::FsWrite));
1405        registry.register(Arc::new(crate::tools::edit::Edit));
1406        registry.register(Arc::new(crate::tools::edit::MultiEdit));
1407        registry.register(Arc::new(crate::tools::search_and_web::Search));
1408        registry.register(Arc::new(crate::tools::search_and_web::WebSearch));
1409        registry.register(Arc::new(crate::tools::search_and_web::WebFetch));
1410        registry.register(Arc::new(crate::tools::browser_sandbox::BrowserTool));
1411        registry.register(Arc::new(crate::tools::browser_sandbox::ComputerTool));
1412        registry.register(Arc::new(crate::tools::git::Git));
1413        registry.register(Arc::new(crate::tools::todo::Todo::new()));
1414        registry.register(Arc::new(crate::tools::exec::Exec::new(sandbox.clone())));
1415        registry.register(Arc::new(crate::tools::media::ImageGen::new()));
1416        registry.register(Arc::new(crate::tools::media::Tts::new()));
1417        registry.register(Arc::new(crate::tools::media::Transcribe::new()));
1418        registry.register(Arc::new(crate::tools::subagent::PythonRpc::new()));
1419        registry.register(Arc::new(crate::tools::builder_tools::LspClient));
1420        registry.register(Arc::new(crate::tools::code_nav::Glob));
1421        registry.register(Arc::new(crate::tools::code_nav::Symbols));
1422        if let Some(mem) = &self.memory {
1423            registry.register(Arc::new(crate::tools::memory::MemoryTool::new(mem.clone())));
1424            registry.register(Arc::new(
1425                crate::tools::knowledge_graph::KnowledgeGraphTool::new(mem.clone()),
1426            ));
1427        }
1428        {
1429            // Subagent delegation: child engine built from the same router/config.
1430            let mut sub = crate::tools::subagent::SubagentSpawn::new(
1431                self.router.clone(),
1432                self.config.clone(),
1433            );
1434            if let Some(mem) = &self.memory {
1435                sub = sub.with_memory(mem.clone());
1436            }
1437            registry.register(Arc::new(sub));
1438        }
1439        let tools = Arc::new(registry);
1440        let tool_specs: Vec<ToolSpec> = tools.to_specs();
1441
1442        let workspace = Workspace {
1443            root: workspace_root,
1444            sandbox,
1445        };
1446
1447        let identity = self.identity.clone().unwrap_or_else(|| Identity {
1448            name: "sparrow".into(),
1449            role: "senior software engineer".into(),
1450            personality: "concise, competent, direct".into(),
1451        });
1452
1453        let brain_policy = BrainPolicy {
1454            chain,
1455            current_index: 0,
1456        };
1457
1458        let mut autonomy = match self.config.defaults.autonomy {
1459            AutonomyLevel::Supervised => AutonomyContract::supervised(),
1460            AutonomyLevel::Trusted => AutonomyContract::trusted(),
1461            AutonomyLevel::Autonomous => AutonomyContract::autonomous(),
1462        };
1463        autonomy.budget.max_usd = self.config.budget.session_usd;
1464        let _ = event_tx.send(Event::AutonomyChanged {
1465            run: run_id.clone(),
1466            level: autonomy.level.clone(),
1467        });
1468
1469        // Load relevant skills — top-N pre-selected for full-body inclusion.
1470        // The main agent soul requires mandatory skill pre-read before ANY action,
1471        // so we load MORE skills than before (5 instead of 3) and the agent
1472        // is instructed to scan the full catalog for anything it might need.
1473        let relevant_skills: Vec<crate::capabilities::Skill> = self
1474            .skills
1475            .as_ref()
1476            .map(|s| s.relevant(&task.description, 5))
1477            .unwrap_or_default();
1478        // And the full catalog (names + descriptions only) so the agent
1479        // discovers everything in the library and can invoke a skill it
1480        // wasn't pre-fed.
1481        let skill_catalog: Vec<crate::capabilities::Skill> =
1482            self.skills.as_ref().map(|s| s.all()).unwrap_or_default();
1483
1484        let facts = self
1485            .memory
1486            .as_ref()
1487            .map(|m| m.all_facts())
1488            .unwrap_or_default();
1489        let memory_docs = self
1490            .memory
1491            .as_ref()
1492            .map(|m| {
1493                [MemoryDocKind::Memory, MemoryDocKind::User]
1494                    .into_iter()
1495                    .filter_map(|kind| m.memory_doc(kind))
1496                    .collect::<Vec<_>>()
1497            })
1498            .unwrap_or_default();
1499        let instruction_docs = crate::instructions::discover_workspace_instructions(
1500            &workspace.root,
1501            &task.description,
1502        );
1503        let system = build_system_prompt(SystemPromptInput {
1504            identity: &identity,
1505            tier: Some(&tier),
1506            workspace_root: &workspace.root,
1507            facts: &facts,
1508            memory_docs: &memory_docs,
1509            instruction_docs: &instruction_docs,
1510            skills: &relevant_skills,
1511            skill_catalog: &skill_catalog,
1512        });
1513        let mut system = format!(
1514            "{}\n\n## Active Sparrow Routing Context\nRequest category: {}\nTask tier: {}\nRequired tools: {}\nRequired vision: {}\nPreferred local: {}\nSelected fallback chain: {}\nRouting policy: free_first={}, session_budget_usd={:.2}.\nWhen answering routing questions, describe this context concretely.",
1515            system,
1516            task_summary,
1517            tier.as_str(),
1518            need.required_tools,
1519            need.required_vision,
1520            need.prefer_local,
1521            summarize_model_chain(&chain_ids, 8),
1522            self.config.routing.free_first,
1523            self.config.budget.session_usd
1524        );
1525
1526        // Continuity hint: when there is prior conversation (task.context), tell
1527        // the model to treat it as authoritative memory. Weaker models otherwise
1528        // recite the system identity and ignore what the user said earlier.
1529        if !messages.is_empty() {
1530            system.push_str(
1531                "\n\n## Conversation continuity\nThis is an ONGOING conversation. The messages below are prior turns and are AUTHORITATIVE memory of what the user told you (names, preferences, facts, decisions). Use them directly; never re-introduce yourself or contradict them.",
1532            );
1533        }
1534
1535        // Build initial messages
1536        messages.push(Msg {
1537            role: "user".into(),
1538            content: initial_user_content_blocks(&workspace.root, &task.description),
1539        });
1540
1541        let mut total_input: u64 = 0;
1542        let mut total_output: u64 = 0;
1543        let mut estimated_input_unconfirmed: u64 = 0;
1544        let mut estimated_output_unconfirmed: u64 = 0;
1545        let mut estimated_cost_unconfirmed: f64 = 0.0;
1546        let mut cost_usd: f64 = 0.0;
1547        let diffs: Vec<crate::event::FileDiff> = Vec::new();
1548        let mut current_chain_idx = 0usize;
1549        let mut tool_results_pending: Vec<(
1550            String,
1551            String,
1552            serde_json::Value,
1553            Vec<ContentBlock>,
1554            bool,
1555        )> = Vec::new();
1556        let budget_session = self.config.budget.session_usd;
1557        let _budget_daily = self.config.budget.daily_usd;
1558        let redaction = &self.redaction;
1559        let mut had_error = false;
1560        let mut last_error: Option<String> = None;
1561        let mut waiting_for_approval = false;
1562        let mut denied_by_approval = false;
1563        let run_started_at = std::time::Instant::now();
1564        let mut skill_evidence = String::new();
1565        // Iteration safety cap: bound the agentic loop independently of budget.
1566        let mut turns: u32 = 0;
1567        const MAX_TURNS: u32 = 60;
1568        // Auto-verify state: track whether mutating edits happened and how many
1569        // verify attempts we've spent, so we run the verify command after the
1570        // model says it's done and re-inject failures (bounded).
1571        let mut had_mutation = false;
1572        let mut verify_attempts: u32 = 0;
1573        const MAX_VERIFY_ATTEMPTS: u32 = 2;
1574        // Verified escalation: when a model exhausts its fix budget, the run
1575        // climbs to the next model in the chain (bounded) instead of ending
1576        // silently unverified — cheap-first routing with a guaranteed floor.
1577        let mut verify_escalations: u32 = 0;
1578        const MAX_VERIFY_ESCALATIONS: u32 = 2;
1579        // Whether the run has produced ANY visible output (text or tool use). If
1580        // a model returns an empty completion and nothing has been produced yet,
1581        // we fall back to the next model in the chain (rescues a dead provider).
1582        let mut produced_any_output = false;
1583        // Transient-failure retry state: a rate limit or timeout on the primary
1584        // model must NOT permanently downgrade a long run to a weaker fallback.
1585        // We retry the same brain (bounded, with backoff) before advancing.
1586        let mut transient_retries: u32 = 0;
1587        const MAX_TRANSIENT_RETRIES: u32 = 2;
1588        // Stuck-loop detection: a turn that issues the exact same tool calls as
1589        // the previous turn is the classic long-task death spiral (re-reading
1590        // the same file, retrying a failing edit verbatim). Nudge at 3 repeats,
1591        // stop honestly at 5 — long before the MAX_TURNS budget burns out.
1592        let mut last_tool_sig: Option<u64> = None;
1593        let mut repeated_tool_turns: u32 = 0;
1594
1595        // Helper to send redacted events
1596        let send = |event: Event| {
1597            let _ = event_tx.send(redaction.redact_event(&event));
1598        };
1599
1600        // Compaction state (Phase 12 auto-trigger). The threshold matches the
1601        // default ContextManager budget; we keep `keep_last` messages verbatim
1602        // and replace earlier ones with a distilled summary block. A handoff
1603        // doc is written to `.sparrow/handoff/<run>-<ts>.md` and an
1604        // `Event::Compacted` is emitted so UIs can show the pass.
1605        const COMPACT_TRANSCRIPT_CHARS: usize = 120_000;
1606        const COMPACT_KEEP_LAST: usize = 6;
1607        let context_manager = crate::redaction::ContextManager::new(200_000);
1608
1609        // Main agentic loop
1610        loop {
1611            // Auto-compaction check (Phase 12). Skipped on the very first turn
1612            // so a short task never pays the overhead.
1613            if turns > 0 {
1614                let transcript_chars: usize = messages
1615                    .iter()
1616                    .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1617                    .sum();
1618                if transcript_chars > COMPACT_TRANSCRIPT_CHARS && messages.len() > COMPACT_KEEP_LAST
1619                {
1620                    // PreCompact lifecycle hook: lets operators dump state /
1621                    // back up the transcript before compaction discards it.
1622                    let _ = self
1623                        .hooks
1624                        .execute(&HookEvent::PreCompact, &task.description)
1625                        .await;
1626                    let before = transcript_chars;
1627                    let compacted =
1628                        context_manager.compact_messages(&messages, 0, COMPACT_KEEP_LAST);
1629                    let after: usize = compacted
1630                        .iter()
1631                        .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1632                        .sum();
1633
1634                    // Write a durable handoff next to the transcript.
1635                    let mut handoff = crate::context::HandoffDoc::new(task.description.clone());
1636                    handoff.next_steps = vec![format!(
1637                        "Resume run {} (turn {}/{})",
1638                        run_id.0, turns, MAX_TURNS
1639                    )];
1640                    let handoff_dir = std::path::PathBuf::from(".sparrow/handoff");
1641                    let _ = std::fs::create_dir_all(&handoff_dir);
1642                    let handoff_path = handoff_dir.join(format!(
1643                        "{}-{}.md",
1644                        run_id.0,
1645                        chrono::Utc::now().format("%Y%m%dT%H%M%SZ")
1646                    ));
1647                    let _ = std::fs::write(&handoff_path, handoff.to_markdown());
1648
1649                    messages = compacted;
1650                    send(Event::Compacted {
1651                        run: run_id.clone(),
1652                        before_chars: before,
1653                        after_chars: after,
1654                        handoff_path: Some(handoff_path.to_string_lossy().to_string()),
1655                    });
1656                    let _ = self
1657                        .hooks
1658                        .execute(&HookEvent::PostCompact, &task.description)
1659                        .await;
1660                }
1661            }
1662            // Iteration cap: stop runaway loops independently of budget.
1663            turns += 1;
1664            if turns > MAX_TURNS {
1665                send(Event::Message {
1666                    run: run_id.clone(),
1667                    role: "guard".into(),
1668                    text: format!("iteration cap reached ({} turns) — stopping", MAX_TURNS),
1669                });
1670                break;
1671            }
1672
1673            // Budget check: hard stop if exceeded
1674            if cost_usd + estimated_cost_unconfirmed >= budget_session {
1675                let msg = format!(
1676                    "Budget exceeded: ${:.4} of ${:.2} session cap",
1677                    cost_usd + estimated_cost_unconfirmed,
1678                    budget_session
1679                );
1680                send(Event::Error {
1681                    run: run_id.clone(),
1682                    message: msg.clone(),
1683                });
1684                // OnBudgetThreshold lifecycle: fired on hard cap. Operators can
1685                // configure a hook to e.g. page on-call when this triggers.
1686                let _ = self
1687                    .hooks
1688                    .execute(&HookEvent::OnBudgetThreshold, &msg)
1689                    .await;
1690                let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
1691                had_error = true;
1692                last_error = Some("budget exceeded".into());
1693                break;
1694            }
1695            if let Some(_approval_handler) = &self.approval_handler {
1696                if waiting_for_approval {
1697                    // Route to approval handler (e.g., Telegram inline buttons)
1698                    // The handler will resolve and we continue
1699                }
1700            }
1701
1702            // ─── Org policy enforcement ──────────────────────────────────
1703            if let Some(ref policy) = self.org_policy {
1704                let proposed_file = tool_results_pending
1705                    .last()
1706                    .map(|(_, _, args, _, _)| {
1707                        args.get("path").and_then(|v| v.as_str()).unwrap_or("")
1708                    })
1709                    .unwrap_or("");
1710                if let Err(violation) =
1711                    policy.enforce(&self.config.defaults.autonomy, cost_usd, proposed_file)
1712                {
1713                    send(Event::Error {
1714                        run: run_id.clone(),
1715                        message: format!("Org policy violation: {}", violation),
1716                    });
1717                    break;
1718                }
1719            }
1720
1721            // ── Mid-run user injection (§3.7) ─────────────────────────────
1722            // Poll the inject channel non-blocking. Each pending message becomes
1723            // a new user turn so the next Brain call sees it.
1724            if let Some(rx) = inject_rx.as_mut() {
1725                loop {
1726                    match rx.try_recv() {
1727                        Ok(injected) => {
1728                            let trimmed = injected.trim().to_string();
1729                            if trimmed.is_empty() {
1730                                continue;
1731                            }
1732                            messages.push(Msg {
1733                                role: "user".into(),
1734                                content: vec![ContentBlock::Text {
1735                                    text: format!("INTERRUPT FROM USER: {}", trimmed),
1736                                }],
1737                            });
1738                            let _ = event_tx.send(Event::Message {
1739                                run: run_id.clone(),
1740                                role: "interrupt".into(),
1741                                text: trimmed,
1742                            });
1743                        }
1744                        Err(mpsc::error::TryRecvError::Empty) => break,
1745                        Err(mpsc::error::TryRecvError::Disconnected) => {
1746                            inject_rx = None;
1747                            break;
1748                        }
1749                    }
1750                }
1751            }
1752
1753            let brain = match brain_policy.chain.get(current_chain_idx) {
1754                Some(b) => b.clone(),
1755                None => break,
1756            };
1757
1758            let caps = brain.caps();
1759
1760            // ── Context compaction (§3.7) ─────────────────────────────────
1761            // If estimated tokens > 75% of context_window, truncate middle
1762            // messages to keep the original task + the last 6 exchanges.
1763            // A summary placeholder is inserted to preserve continuity.
1764            {
1765                let req_for_estimate = BrainRequest {
1766                    system: Some(system.clone()),
1767                    messages: messages.clone(),
1768                    tools: if need.required_tools {
1769                        tool_specs.clone()
1770                    } else {
1771                        vec![]
1772                    },
1773                    max_tokens: caps.max_output as u32,
1774                    temperature: 0.0,
1775                    stop: vec![],
1776                    cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1777                        "engine",
1778                        &workspace.root,
1779                        &tool_specs,
1780                    ))),
1781                };
1782                let est = estimate_request_tokens(&req_for_estimate);
1783                let threshold = (caps.context_window as f64 * 0.75) as u64;
1784                if est > threshold && messages.len() > 8 {
1785                    let original_task = messages.first().cloned();
1786                    let keep_tail: Vec<Msg> =
1787                        messages.iter().rev().take(6).cloned().collect::<Vec<_>>();
1788                    let middle: Vec<Msg> = messages
1789                        .iter()
1790                        .skip(1)
1791                        .take(messages.len().saturating_sub(7))
1792                        .cloned()
1793                        .collect();
1794                    let dropped = middle.len();
1795
1796                    // Ask the current brain for a real summary of the dropped middle
1797                    // (best-effort; fall back to a plain marker on failure).
1798                    let summary = self
1799                        .summarize_messages(brain.as_ref(), &middle)
1800                        .await
1801                        .unwrap_or_else(|| {
1802                            format!(
1803                                "{} prior messages were dropped to fit the model window.",
1804                                dropped
1805                            )
1806                        });
1807
1808                    let mut compacted: Vec<Msg> = Vec::new();
1809                    if let Some(task) = original_task {
1810                        compacted.push(task);
1811                    }
1812                    compacted.push(Msg {
1813                        role: "user".into(),
1814                        content: vec![ContentBlock::Text {
1815                            text: format!(
1816                                "[CONTEXT SUMMARY of {} earlier messages]\n{}\n\
1817                                 (Files edited and tool outputs in the turns below remain authoritative.)",
1818                                dropped, summary
1819                            ),
1820                        }],
1821                    });
1822                    for m in keep_tail.into_iter().rev() {
1823                        compacted.push(m);
1824                    }
1825                    messages = compacted;
1826                    let _ = event_tx.send(Event::Message {
1827                        run: run_id.clone(),
1828                        role: "compaction".into(),
1829                        text: format!(
1830                            "context compacted: {} messages summarized ({} tok > {} threshold)",
1831                            dropped, est, threshold
1832                        ),
1833                    });
1834                }
1835            }
1836
1837            let req = BrainRequest {
1838                system: Some(system.clone()),
1839                messages: sanitize_messages_for_provider(&messages),
1840                tools: if need.required_tools {
1841                    tool_specs.clone()
1842                } else {
1843                    vec![]
1844                },
1845                max_tokens: caps.max_output as u32,
1846                temperature: 0.0,
1847                stop: vec![],
1848                cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1849                    "engine",
1850                    &workspace.root,
1851                    &tool_specs,
1852                ))),
1853            };
1854
1855            let estimated_input = estimate_request_tokens(&req);
1856            estimated_input_unconfirmed += estimated_input;
1857            estimated_cost_unconfirmed +=
1858                caps.cost_input_per_mtok * (estimated_input as f64) / 1_000_000.0;
1859            let _ = event_tx.send(Event::TokenUsageEstimated {
1860                run: run_id.clone(),
1861                input: estimated_input,
1862                output: 0,
1863                reason: "prompt estimate before provider usage".into(),
1864            });
1865            let _ = event_tx.send(Event::CostUpdate {
1866                run: run_id.clone(),
1867                usd: cost_usd + estimated_cost_unconfirmed,
1868            });
1869
1870            let _ = event_tx.send(Event::AgentStatus {
1871                run: run_id.clone(),
1872                role: "coder".into(),
1873                status: AgentStatus::Thinking,
1874                note: format!("consulting {} · parsing request…", brain.id()),
1875            });
1876
1877            match brain.complete(req).await {
1878                Ok(mut stream) => {
1879                    // The provider answered — clear the transient-failure budget.
1880                    transient_retries = 0;
1881                    let mut current_tool_name = String::new();
1882                    let mut current_tool_json = String::new();
1883                    // v0.8.1 A1: tool calls are accumulated PER id, not in a
1884                    // single shared buffer. A model turn that emits N tool
1885                    // calls arrives interleaved (Start0·Δ0·Start1·Δ1·End·End,
1886                    // Ends in arbitrary order); the old single-buffer approach
1887                    // let the 2nd Start wipe the 1st call's name+args, so the
1888                    // first tool ran as `unknown`/`{}`. Keyed by id, each call
1889                    // keeps its own (name, streamed-json) until its End.
1890                    let mut pending_tools: std::collections::HashMap<String, (String, String)> =
1891                        std::collections::HashMap::new();
1892                    let mut output_chars_seen: u64 = 0;
1893                    let mut output_tokens_emitted: u64 = 0;
1894                    let mut continue_agent_loop = false;
1895                    let mut stop_after_tool_result = false;
1896                    let mut assistant_text = String::new();
1897                    let mut tool_output_seen_this_completion = false;
1898                    // Tools invoked during this completion — fed to the hallucination
1899                    // guard so it knows whether the assistant has actually inspected
1900                    // any code/state before making a claim.
1901                    let mut tools_called_this_turn: Vec<String> = Vec::new();
1902                    // Accumulated reasoning_content (DeepSeek / Moonshot / Qwen
1903                    // thinking mode). Must be echoed back on the next turn or the
1904                    // provider returns 400.
1905                    let mut reasoning_buf: String = String::new();
1906
1907                    while let Some(event) = stream.next().await {
1908                        match event {
1909                            BrainEvent::TextDelta(text) => {
1910                                assistant_text.push_str(&text);
1911                                output_chars_seen += text.chars().count() as u64;
1912                                let estimated_output = (output_chars_seen + 3) / 4;
1913                                let output_delta =
1914                                    estimated_output.saturating_sub(output_tokens_emitted);
1915                                if output_delta > 0 {
1916                                    output_tokens_emitted += output_delta;
1917                                    estimated_output_unconfirmed += output_delta;
1918                                    estimated_cost_unconfirmed += caps.cost_output_per_mtok
1919                                        * (output_delta as f64)
1920                                        / 1_000_000.0;
1921                                    let _ = event_tx.send(Event::TokenUsageEstimated {
1922                                        run: run_id.clone(),
1923                                        input: 0,
1924                                        output: output_delta,
1925                                        reason: "streamed output estimate".into(),
1926                                    });
1927                                    let _ = event_tx.send(Event::CostUpdate {
1928                                        run: run_id.clone(),
1929                                        usd: cost_usd + estimated_cost_unconfirmed,
1930                                    });
1931                                }
1932                                let _ = event_tx.send(Event::ThinkingDelta {
1933                                    run: run_id.clone(),
1934                                    text: text.clone(),
1935                                });
1936                            }
1937                            BrainEvent::ReasoningDelta(rtext) => {
1938                                // Accumulate for the assistant message we'll push at
1939                                // end-of-turn. We don't surface it as text on screen —
1940                                // the engine's normal TextDelta path handles visible
1941                                // text, this is opaque thinking content the provider
1942                                // wants echoed back.
1943                                reasoning_buf.push_str(&rtext);
1944                                let _ = event_tx.send(Event::ReasoningDelta {
1945                                    run: run_id.clone(),
1946                                    text: rtext,
1947                                });
1948                            }
1949                            BrainEvent::ToolUseStart { id, name } => {
1950                                current_tool_name = name.clone();
1951                                tools_called_this_turn.push(name.clone());
1952                                current_tool_json.clear();
1953                                // Open this call's per-id accumulator (A1).
1954                                pending_tools.insert(id.clone(), (name.clone(), String::new()));
1955                                let risk = tools
1956                                    .get(&name)
1957                                    .map(|tool| tool.risk())
1958                                    .unwrap_or(RiskLevel::ReadOnly);
1959                                // Placeholder ToolUseProposed with empty args so the
1960                                // UI can open the card immediately. Real args follow
1961                                // at ToolUseEnd (see below) once the streamed JSON
1962                                // is complete.
1963                                let _ = event_tx.send(Event::ToolUseProposed {
1964                                    run: run_id.clone(),
1965                                    id: id.clone(),
1966                                    name: name.clone(),
1967                                    args: json!({}),
1968                                    risk,
1969                                });
1970                            }
1971                            BrainEvent::ToolUseDelta { id, json } => {
1972                                output_chars_seen += json.chars().count() as u64;
1973                                let estimated_output = (output_chars_seen + 3) / 4;
1974                                let output_delta =
1975                                    estimated_output.saturating_sub(output_tokens_emitted);
1976                                if output_delta > 0 {
1977                                    output_tokens_emitted += output_delta;
1978                                    estimated_output_unconfirmed += output_delta;
1979                                    estimated_cost_unconfirmed += caps.cost_output_per_mtok
1980                                        * (output_delta as f64)
1981                                        / 1_000_000.0;
1982                                    let _ = event_tx.send(Event::TokenUsageEstimated {
1983                                        run: run_id.clone(),
1984                                        input: 0,
1985                                        output: output_delta,
1986                                        reason: "streamed tool arguments estimate".into(),
1987                                    });
1988                                    let _ = event_tx.send(Event::CostUpdate {
1989                                        run: run_id.clone(),
1990                                        usd: cost_usd + estimated_cost_unconfirmed,
1991                                    });
1992                                }
1993                                // Append to THIS call's buffer (A1). Fall back
1994                                // to a fresh entry if a provider streams a
1995                                // delta before its Start (defensive).
1996                                pending_tools
1997                                    .entry(id.clone())
1998                                    .or_insert_with(|| (String::new(), String::new()))
1999                                    .1
2000                                    .push_str(&json);
2001                            }
2002                            BrainEvent::ToolUseEnd { id } => {
2003                                // Resolve THIS call's accumulated (name, json)
2004                                // by id (A1) — never from a shared buffer that
2005                                // a later call may have clobbered.
2006                                let (resolved_name, resolved_json) =
2007                                    pending_tools.remove(&id).unwrap_or_else(|| {
2008                                        (current_tool_name.clone(), current_tool_json.clone())
2009                                    });
2010
2011                                // Parse accumulated JSON
2012                                let args: serde_json::Value =
2013                                    serde_json::from_str(&resolved_json).unwrap_or(json!({}));
2014
2015                                // Check autonomy gate
2016                                let tool_name = if resolved_name.is_empty() {
2017                                    "unknown".to_string()
2018                                } else {
2019                                    resolved_name.clone()
2020                                };
2021                                // Keep the shared name current so the "running
2022                                // tool · X" status note (below) names THIS call.
2023                                current_tool_name = tool_name.clone();
2024                                let tool = tools.get(&tool_name);
2025                                let risk = tool
2026                                    .as_ref()
2027                                    .map(|tool| tool.risk())
2028                                    .unwrap_or(RiskLevel::ReadOnly);
2029
2030                                // Re-emit ToolUseProposed with the REAL args now
2031                                // that the streamed JSON is complete. The first
2032                                // emission at ToolUseStart used `{}` because the
2033                                // arguments hadn't streamed yet — the UI updates
2034                                // the existing card with these real arguments.
2035                                let _ = event_tx.send(Event::ToolUseProposed {
2036                                    run: run_id.clone(),
2037                                    id: id.clone(),
2038                                    name: tool_name.clone(),
2039                                    args: args.clone(),
2040                                    risk: risk.clone(),
2041                                });
2042                                let proposed = crate::autonomy::ProposedAction {
2043                                    tool_name: tool_name.clone(),
2044                                    risk: risk.clone(),
2045                                    args: args.clone(),
2046                                };
2047
2048                                let permission =
2049                                    self.config.permissions.evaluate(&PermissionContext {
2050                                        tool_name: &proposed.tool_name,
2051                                        risk: proposed.risk.clone(),
2052                                        args: &args,
2053                                        workspace_root: &workspace.root,
2054                                        provider: Some(brain.id()),
2055                                        surface: Some("engine"),
2056                                    });
2057                                let autonomy_verdict =
2058                                    if matches!(permission.decision, Decision::Allow) {
2059                                        Some(autonomy.evaluate(&proposed))
2060                                    } else {
2061                                        None
2062                                    };
2063                                let mut decision = autonomy_verdict
2064                                    .as_ref()
2065                                    .map(|verdict| verdict.decision.clone())
2066                                    .unwrap_or_else(|| permission.decision.clone());
2067                                if !matches!(permission.decision, Decision::Allow) {
2068                                    let _ = event_tx.send(Event::Message {
2069                                        run: run_id.clone(),
2070                                        role: "permissions".into(),
2071                                        text: permission.reason.clone(),
2072                                    });
2073                                }
2074                                if matches!(decision, Decision::AskUser) {
2075                                    let summary = format!(
2076                                        "{} Risque: {:?}.",
2077                                        humanize_tool_action(&proposed.tool_name, &args),
2078                                        proposed.risk
2079                                    );
2080                                    let _ = event_tx.send(Event::ApprovalRequested {
2081                                        run: run_id.clone(),
2082                                        id: id.clone(),
2083                                        summary: summary.clone(),
2084                                        tool: Some(proposed.tool_name.clone()),
2085                                        risk: Some(format!("{:?}", proposed.risk)),
2086                                    });
2087                                    let _ = event_tx.send(Event::AgentStatus {
2088                                        run: run_id.clone(),
2089                                        role: "coder".into(),
2090                                        status: AgentStatus::WaitingForApproval,
2091                                        note: format!(
2092                                            "en attente de ton accord pour {}",
2093                                            proposed.tool_name
2094                                        ),
2095                                    });
2096                                    // OnApprovalRequested hook so external
2097                                    // notifiers (Slack, email, …) can ping the
2098                                    // operator.
2099                                    let _ = self
2100                                        .hooks
2101                                        .execute(&HookEvent::OnApprovalRequested, &summary)
2102                                        .await;
2103                                    if let Some(handler) = &self.approval_handler {
2104                                        decision = handler
2105                                            .request_approval(ApprovalRequest {
2106                                                run: run_id.clone(),
2107                                                id: id.clone(),
2108                                                tool_name: proposed.tool_name.clone(),
2109                                                risk: proposed.risk.clone(),
2110                                                args: args.clone(),
2111                                                summary,
2112                                            })
2113                                            .await;
2114                                    } else if !std::io::IsTerminal::is_terminal(&std::io::stdin()) {
2115                                        let message = format!(
2116                                            "Approbation requise pour `{}`, mais stdin n'est pas interactif. Relance avec une autonomie plus élevée ou approuve dans le cockpit.",
2117                                            proposed.tool_name
2118                                        );
2119                                        let _ = event_tx.send(Event::Error {
2120                                            run: run_id.clone(),
2121                                            message: message.clone(),
2122                                        });
2123                                        decision = Decision::Deny;
2124                                    } else {
2125                                        use std::io::{self, Write};
2126                                        print!(
2127                                            "\n\x1b[1;33m{} Approve? [y/N]\x1b[0m ",
2128                                            humanize_tool_action(&proposed.tool_name, &args)
2129                                        );
2130                                        io::stdout().flush().ok();
2131                                        let mut input = String::new();
2132                                        io::stdin().read_line(&mut input).ok();
2133                                        decision = if input.trim().eq_ignore_ascii_case("y") {
2134                                            Decision::Allow
2135                                        } else {
2136                                            Decision::Deny
2137                                        };
2138                                    }
2139                                }
2140
2141                                if matches!(decision, Decision::AskUser) {
2142                                    let _ = event_tx.send(Event::Error {
2143                                        run: run_id.clone(),
2144                                        message: format!(
2145                                            "Approbation requise pour `{}` mais aucune réponse exploitable n'a été reçue.",
2146                                            proposed.tool_name
2147                                        ),
2148                                    });
2149                                    decision = Decision::Deny;
2150                                }
2151
2152                                let _ = event_tx.send(Event::ApprovalResolved {
2153                                    run: run_id.clone(),
2154                                    id: id.clone(),
2155                                    decision: decision.clone(),
2156                                });
2157
2158                                match decision {
2159                                    Decision::Allow => {
2160                                        if autonomy_verdict
2161                                            .as_ref()
2162                                            .map(|verdict| verdict.notify)
2163                                            .unwrap_or(false)
2164                                        {
2165                                            let _ = event_tx.send(Event::Message {
2166                                                run: run_id.clone(),
2167                                                role: "autonomy".into(),
2168                                                text: format!(
2169                                                    "{} will run under trusted autonomy with checkpoint notification",
2170                                                    proposed.tool_name
2171                                                ),
2172                                            });
2173                                        }
2174                                        // Track mutations so we can auto-verify later.
2175                                        if matches!(
2176                                            proposed.risk,
2177                                            RiskLevel::Mutating | RiskLevel::Destructive
2178                                        ) {
2179                                            had_mutation = true;
2180                                        }
2181                                        // Auto-checkpoint before mutating/exec/destructive
2182                                        let needs_checkpoint = autonomy_verdict
2183                                            .as_ref()
2184                                            .map(|verdict| verdict.needs_checkpoint)
2185                                            .unwrap_or_else(|| {
2186                                                matches!(
2187                                                    proposed.risk,
2188                                                    RiskLevel::Mutating
2189                                                        | RiskLevel::Exec
2190                                                        | RiskLevel::Destructive
2191                                                )
2192                                            });
2193                                        if needs_checkpoint {
2194                                            let vetoes = self
2195                                                .hooks
2196                                                .execute(
2197                                                    &HookEvent::PreCheckpoint,
2198                                                    &proposed.tool_name,
2199                                                )
2200                                                .await;
2201                                            let checkpoint_veto = vetoes
2202                                                .iter()
2203                                                .find(|result| result.veto)
2204                                                .and_then(|result| result.veto_reason.clone());
2205                                            if let Some(reason) = checkpoint_veto {
2206                                                let _ = event_tx.send(Event::Error {
2207                                                    run: run_id.clone(),
2208                                                    message: reason,
2209                                                });
2210                                                denied_by_approval = true;
2211                                                stop_after_tool_result = true;
2212                                                continue;
2213                                            }
2214                                            let checkpoints =
2215                                                GitCheckpoints::new(workspace.root.clone());
2216                                            if let Ok(cp_id) = checkpoints
2217                                                .snapshot(&format!("pre-{}", proposed.tool_name))
2218                                            {
2219                                                let _ = event_tx.send(Event::CheckpointCreated {
2220                                                    run: run_id.clone(),
2221                                                    id: cp_id,
2222                                                    label: format!("pre-{}", proposed.tool_name),
2223                                                });
2224                                                let _ = self
2225                                                    .hooks
2226                                                    .execute(
2227                                                        &HookEvent::PostCheckpoint,
2228                                                        &proposed.tool_name,
2229                                                    )
2230                                                    .await;
2231                                            }
2232                                        }
2233
2234                                        // Pass tool name + args to the hook
2235                                        // matcher so PreToolUse hooks can
2236                                        // inspect the actual payload (e.g.
2237                                        // protect-sensitive-files needs the
2238                                        // file path, not just "fs_write").
2239                                        let hook_ctx = format!("{} {}", proposed.tool_name, args);
2240                                        let hook_results = self
2241                                            .hooks
2242                                            .execute(&HookEvent::PreToolUse, &hook_ctx)
2243                                            .await;
2244                                        if let Some(reason) = hook_results
2245                                            .iter()
2246                                            .find(|result| result.veto)
2247                                            .and_then(|result| result.veto_reason.clone())
2248                                        {
2249                                            denied_by_approval = true;
2250                                            stop_after_tool_result = true;
2251                                            let _ = event_tx.send(Event::ToolOutput {
2252                                                run: run_id.clone(),
2253                                                id: id.clone(),
2254                                                blocks: vec![Block::Text(reason.clone())],
2255                                            });
2256                                            tool_output_seen_this_completion = true;
2257                                            tool_results_pending.push((
2258                                                id.clone(),
2259                                                proposed.tool_name.clone(),
2260                                                args.clone(),
2261                                                vec![ContentBlock::Text { text: reason }],
2262                                                true,
2263                                            ));
2264                                            continue;
2265                                        }
2266
2267                                        let _ = event_tx.send(Event::ToolUseStarted {
2268                                            run: run_id.clone(),
2269                                            id: id.clone(),
2270                                        });
2271                                        let _ = event_tx.send(Event::AgentStatus {
2272                                            run: run_id.clone(),
2273                                            role: "coder".into(),
2274                                            status: AgentStatus::Working,
2275                                            note: format!("running tool · {}", current_tool_name),
2276                                        });
2277
2278                                        let result = if let Some(tool) = tool {
2279                                            let ctx = ToolCtx {
2280                                                workspace_root: workspace.root.clone(),
2281                                                run_id: run_id.clone(),
2282                                            };
2283                                            match tool.call(args.clone(), &ctx).await {
2284                                                Ok(result) => result,
2285                                                Err(e) => crate::tools::ToolResult::error(format!(
2286                                                    "Tool {} failed: {}",
2287                                                    proposed.tool_name, e
2288                                                )),
2289                                            }
2290                                        } else {
2291                                            crate::tools::ToolResult::error(format!(
2292                                                "Unknown tool: {}",
2293                                                proposed.tool_name
2294                                            ))
2295                                        };
2296
2297                                        for block in &result.content {
2298                                            if let Block::Diff { file, patch } = block {
2299                                                let plus = patch
2300                                                    .lines()
2301                                                    .filter(|l| {
2302                                                        l.starts_with('+') && !l.starts_with("+++")
2303                                                    })
2304                                                    .count()
2305                                                    as u32;
2306                                                let minus = patch
2307                                                    .lines()
2308                                                    .filter(|l| {
2309                                                        l.starts_with('-') && !l.starts_with("---")
2310                                                    })
2311                                                    .count()
2312                                                    as u32;
2313                                                let _ = event_tx.send(Event::DiffProposed {
2314                                                    run: run_id.clone(),
2315                                                    file: file.clone(),
2316                                                    patch: patch.clone(),
2317                                                    plus,
2318                                                    minus,
2319                                                });
2320                                            }
2321                                        }
2322
2323                                        let blocks = result.content.clone();
2324                                        let text = tool_result_text(&blocks);
2325                                        let content_blocks = tool_result_content_blocks(&blocks);
2326                                        let is_error = result.is_error;
2327                                        skill_evidence.push_str(&text);
2328                                        skill_evidence.push('\n');
2329                                        let _ = event_tx.send(Event::ToolOutput {
2330                                            run: run_id.clone(),
2331                                            id: id.clone(),
2332                                            blocks,
2333                                        });
2334                                        // Surface writes as DiffApplied so the artifacts
2335                                        // ledger sees files that fs_write/edit/multi_edit
2336                                        // touched even when the tool returned plain text.
2337                                        if !is_error
2338                                            && matches!(
2339                                                proposed.tool_name.as_str(),
2340                                                "fs_write" | "edit" | "multi_edit"
2341                                            )
2342                                        {
2343                                            if let Some(p) =
2344                                                args.get("path").and_then(|v| v.as_str())
2345                                            {
2346                                                let _ = event_tx.send(Event::DiffApplied {
2347                                                    run: run_id.clone(),
2348                                                    file: p.to_string(),
2349                                                });
2350                                            } else if let Some(p) =
2351                                                args.get("file_path").and_then(|v| v.as_str())
2352                                            {
2353                                                let _ = event_tx.send(Event::DiffApplied {
2354                                                    run: run_id.clone(),
2355                                                    file: p.to_string(),
2356                                                });
2357                                            }
2358                                        }
2359                                        let _ = self
2360                                            .hooks
2361                                            .execute(&HookEvent::PostToolUse, &proposed.tool_name)
2362                                            .await;
2363                                        tool_output_seen_this_completion = true;
2364                                        tool_results_pending.push((
2365                                            id.clone(),
2366                                            proposed.tool_name.clone(),
2367                                            args.clone(),
2368                                            content_blocks,
2369                                            is_error,
2370                                        ));
2371                                    }
2372                                    Decision::AskUser => {
2373                                        // Supervised mode: prompt user on stdin
2374                                        waiting_for_approval = true;
2375                                        let approval_id = id.clone();
2376                                        let approval_name = proposed.tool_name.clone();
2377                                        let approval_args = args.clone();
2378                                        let approval_risk = proposed.risk;
2379
2380                                        // Emit approval requested
2381                                        let _ = event_tx.send(Event::ApprovalRequested {
2382                                            run: run_id.clone(),
2383                                            id: approval_id.clone(),
2384                                            summary: format!(
2385                                                "{} Risque: {:?}.",
2386                                                humanize_tool_action(
2387                                                    &approval_name,
2388                                                    &approval_args
2389                                                ),
2390                                                approval_risk
2391                                            ),
2392                                            tool: Some(approval_name.clone()),
2393                                            risk: Some(format!("{:?}", approval_risk)),
2394                                        });
2395
2396                                        // Wait for user input on stdin
2397                                        use std::io::{self, Write};
2398                                        print!(
2399                                            "\n\x1b[1;33mApprove {}? [y/N]\x1b[0m ",
2400                                            approval_name
2401                                        );
2402                                        io::stdout().flush().ok();
2403                                        let mut input = String::new();
2404                                        io::stdin().read_line(&mut input).ok();
2405                                        let approved = input.trim().to_lowercase() == "y";
2406
2407                                        if approved {
2408                                            waiting_for_approval = false;
2409                                            // Auto-checkpoint before mutating/exec/destructive
2410                                            if matches!(
2411                                                approval_risk,
2412                                                RiskLevel::Mutating
2413                                                    | RiskLevel::Exec
2414                                                    | RiskLevel::Destructive
2415                                            ) {
2416                                                let vetoes = self
2417                                                    .hooks
2418                                                    .execute(
2419                                                        &HookEvent::PreCheckpoint,
2420                                                        &approval_name,
2421                                                    )
2422                                                    .await;
2423                                                if let Some(reason) = vetoes
2424                                                    .iter()
2425                                                    .find(|result| result.veto)
2426                                                    .and_then(|result| result.veto_reason.clone())
2427                                                {
2428                                                    let _ = event_tx.send(Event::Error {
2429                                                        run: run_id.clone(),
2430                                                        message: reason,
2431                                                    });
2432                                                    denied_by_approval = true;
2433                                                    stop_after_tool_result = true;
2434                                                    continue;
2435                                                }
2436                                                let checkpoints =
2437                                                    GitCheckpoints::new(workspace.root.clone());
2438                                                if let Ok(cp_id) = checkpoints
2439                                                    .snapshot(&format!("pre-{}", approval_name))
2440                                                {
2441                                                    let _ =
2442                                                        event_tx.send(Event::CheckpointCreated {
2443                                                            run: run_id.clone(),
2444                                                            id: cp_id,
2445                                                            label: format!("pre-{}", approval_name),
2446                                                        });
2447                                                    let _ = self
2448                                                        .hooks
2449                                                        .execute(
2450                                                            &HookEvent::PostCheckpoint,
2451                                                            &approval_name,
2452                                                        )
2453                                                        .await;
2454                                                }
2455                                            }
2456                                            let hook_results = self
2457                                                .hooks
2458                                                .execute(&HookEvent::PreToolUse, &approval_name)
2459                                                .await;
2460                                            if let Some(reason) = hook_results
2461                                                .iter()
2462                                                .find(|result| result.veto)
2463                                                .and_then(|result| result.veto_reason.clone())
2464                                            {
2465                                                denied_by_approval = true;
2466                                                stop_after_tool_result = true;
2467                                                let _ = event_tx.send(Event::ToolOutput {
2468                                                    run: run_id.clone(),
2469                                                    id: approval_id.clone(),
2470                                                    blocks: vec![Block::Text(reason.clone())],
2471                                                });
2472                                                tool_output_seen_this_completion = true;
2473                                                tool_results_pending.push((
2474                                                    approval_id,
2475                                                    approval_name,
2476                                                    approval_args,
2477                                                    vec![ContentBlock::Text { text: reason }],
2478                                                    true,
2479                                                ));
2480                                                continue;
2481                                            }
2482                                            let _ = event_tx.send(Event::ToolUseStarted {
2483                                                run: run_id.clone(),
2484                                                id: approval_id.clone(),
2485                                            });
2486                                            let result = if let Some(tool) = tool {
2487                                                let ctx = ToolCtx {
2488                                                    workspace_root: workspace.root.clone(),
2489                                                    run_id: run_id.clone(),
2490                                                };
2491                                                match tool.call(approval_args.clone(), &ctx).await {
2492                                                    Ok(r) => r,
2493                                                    Err(e) => {
2494                                                        crate::tools::ToolResult::error(format!(
2495                                                            "Tool {} failed: {}",
2496                                                            approval_name, e
2497                                                        ))
2498                                                    }
2499                                                }
2500                                            } else {
2501                                                crate::tools::ToolResult::error(format!(
2502                                                    "Unknown tool: {}",
2503                                                    approval_name
2504                                                ))
2505                                            };
2506                                            let blocks = result.content.clone();
2507                                            let text = tool_result_text(&blocks);
2508                                            let content_blocks =
2509                                                tool_result_content_blocks(&blocks);
2510                                            let is_error = result.is_error;
2511                                            skill_evidence.push_str(&text);
2512                                            skill_evidence.push('\n');
2513                                            let _ = event_tx.send(Event::ToolOutput {
2514                                                run: run_id.clone(),
2515                                                id: approval_id.clone(),
2516                                                blocks,
2517                                            });
2518                                            let _ = self
2519                                                .hooks
2520                                                .execute(&HookEvent::PostToolUse, &approval_name)
2521                                                .await;
2522                                            tool_output_seen_this_completion = true;
2523                                            tool_results_pending.push((
2524                                                approval_id,
2525                                                approval_name,
2526                                                approval_args,
2527                                                content_blocks,
2528                                                is_error,
2529                                            ));
2530                                        } else {
2531                                            let _ = event_tx.send(Event::ToolOutput {
2532                                                run: run_id.clone(),
2533                                                id: approval_id.clone(),
2534                                                blocks: vec![Block::Text("Denied by user".into())],
2535                                            });
2536                                            tool_output_seen_this_completion = true;
2537                                            tool_results_pending.push((
2538                                                approval_id,
2539                                                approval_name,
2540                                                approval_args,
2541                                                vec![ContentBlock::Text {
2542                                                    text: "Denied by user".into(),
2543                                                }],
2544                                                true,
2545                                            ));
2546                                        }
2547                                    }
2548                                    Decision::Deny => {
2549                                        denied_by_approval = true;
2550                                        stop_after_tool_result = true;
2551                                        let _ = event_tx.send(Event::ToolOutput {
2552                                            run: run_id.clone(),
2553                                            id: id.clone(),
2554                                            blocks: vec![Block::Text(
2555                                                "Denied by autonomy policy".into(),
2556                                            )],
2557                                        });
2558                                        tool_output_seen_this_completion = true;
2559                                        tool_results_pending.push((
2560                                            id.clone(),
2561                                            proposed.tool_name.clone(),
2562                                            args.clone(),
2563                                            vec![ContentBlock::Text {
2564                                                text: "Denied by autonomy policy".into(),
2565                                            }],
2566                                            true,
2567                                        ));
2568                                    }
2569                                    // The Allow* tiered decisions came in
2570                                    // with the persistent-permissions
2571                                    // feature; treat them like Allow at the
2572                                    // engine level (the autonomy/permission
2573                                    // store handles persistence above).
2574                                    Decision::AllowOnce
2575                                    | Decision::AllowSession
2576                                    | Decision::AllowAlways => {}
2577                                }
2578
2579                                current_tool_json.clear();
2580                                current_tool_name.clear();
2581                            }
2582                            BrainEvent::Usage(usage) => {
2583                                total_input += usage.input;
2584                                total_output += usage.output;
2585                                // E1: provider usage is authoritative for this
2586                                // request. Replace all pre-usage estimates
2587                                // instead of subtracting from them; otherwise
2588                                // a conservative prompt estimate keeps a
2589                                // phantom remainder and doubles HUD totals.
2590                                estimated_input_unconfirmed = 0;
2591                                estimated_output_unconfirmed = 0;
2592                                let _ = event_tx.send(Event::TokenUsage {
2593                                    run: run_id.clone(),
2594                                    input: usage.input,
2595                                    output: usage.output,
2596                                });
2597
2598                                // Calculate cost
2599                                let input_cost =
2600                                    caps.cost_input_per_mtok * (usage.input as f64) / 1_000_000.0;
2601                                let output_cost =
2602                                    caps.cost_output_per_mtok * (usage.output as f64) / 1_000_000.0;
2603                                let actual_cost = input_cost + output_cost;
2604                                cost_usd += actual_cost;
2605                                estimated_cost_unconfirmed = 0.0;
2606
2607                                let _ = event_tx.send(Event::CostUpdate {
2608                                    run: run_id.clone(),
2609                                    usd: cost_usd + estimated_cost_unconfirmed,
2610                                });
2611                            }
2612                            BrainEvent::Done(reason) => {
2613                                match reason {
2614                                    crate::event::StopReason::EndTurn => {
2615                                        // Empty-completion fallback: if this model
2616                                        // produced nothing (no text, no tool) and the
2617                                        // run has produced nothing so far, try the
2618                                        // next model instead of finishing empty.
2619                                        let this_empty = assistant_text.trim().is_empty()
2620                                            && !tool_output_seen_this_completion;
2621                                        if this_empty && !produced_any_output {
2622                                            let next_idx = current_chain_idx + 1;
2623                                            if next_idx < brain_policy.chain.len() {
2624                                                current_chain_idx = next_idx;
2625                                                let _ = event_tx.send(Event::ModelSwitched {
2626                                                    run: run_id.clone(),
2627                                                    from: brain.id().to_string(),
2628                                                    to: brain_policy.chain[current_chain_idx]
2629                                                        .id()
2630                                                        .to_string(),
2631                                                    reason: "empty response".into(),
2632                                                });
2633                                                continue_agent_loop = true;
2634                                                break;
2635                                            }
2636                                        }
2637                                        if !assistant_text.trim().is_empty() {
2638                                            produced_any_output = true;
2639                                            let mut blocks = Vec::new();
2640                                            if !reasoning_buf.is_empty() {
2641                                                blocks.push(ContentBlock::Reasoning {
2642                                                    text: reasoning_buf.clone(),
2643                                                });
2644                                            }
2645                                            blocks.push(ContentBlock::Text {
2646                                                text: assistant_text.clone(),
2647                                            });
2648                                            let assistant_msg = Msg {
2649                                                role: "assistant".into(),
2650                                                content: blocks,
2651                                            };
2652                                            let turn_messages = vec![assistant_msg.clone()];
2653                                            let has_verified_tool_context =
2654                                                tool_output_seen_this_completion
2655                                                    || messages.iter().any(|m| {
2656                                                        m.content.iter().any(|block| {
2657                                                            matches!(
2658                                                                block,
2659                                                                ContentBlock::ToolResult { .. }
2660                                                            )
2661                                                        })
2662                                                    });
2663
2664                                            if let Some(correction) = self.reasoning.guard_turn(
2665                                                &turn_messages,
2666                                                has_verified_tool_context,
2667                                            ) {
2668                                                messages.push(assistant_msg);
2669                                                let _ = event_tx.send(Event::Message {
2670                                                    run: run_id.clone(),
2671                                                    role: "guard".into(),
2672                                                    text: correction.clone(),
2673                                                });
2674                                                messages.push(Msg {
2675                                                    role: "user".into(),
2676                                                    content: vec![ContentBlock::Text {
2677                                                        text: format!("SYSTEM: {}. Execute the relevant tool first, then report the actual raw result.", correction),
2678                                                    }],
2679                                                });
2680                                                continue_agent_loop = true;
2681                                                break;
2682                                            }
2683
2684                                            // Hallucination guard: catch claims about
2685                                            // code structure made without first calling
2686                                            // fs_read / search this turn.
2687                                            if self.reasoning.hallucination_guard {
2688                                                if let Some(correction) =
2689                                                    crate::reasoning::HallucinationGuard::verify(
2690                                                        &assistant_text,
2691                                                        &tools_called_this_turn,
2692                                                    )
2693                                                {
2694                                                    let mut blocks2 = Vec::new();
2695                                                    if !reasoning_buf.is_empty() {
2696                                                        blocks2.push(ContentBlock::Reasoning {
2697                                                            text: reasoning_buf.clone(),
2698                                                        });
2699                                                    }
2700                                                    blocks2.push(ContentBlock::Text {
2701                                                        text: assistant_text.clone(),
2702                                                    });
2703                                                    let assistant_msg2 = Msg {
2704                                                        role: "assistant".into(),
2705                                                        content: blocks2,
2706                                                    };
2707                                                    messages.push(assistant_msg2);
2708                                                    let _ = event_tx.send(Event::Message {
2709                                                        run: run_id.clone(),
2710                                                        role: "guard".into(),
2711                                                        text: correction.clone(),
2712                                                    });
2713                                                    messages.push(Msg {
2714                                                        role: "user".into(),
2715                                                        content: vec![ContentBlock::Text {
2716                                                            text: format!(
2717                                                                "SYSTEM: {}. Call fs_read or search to verify the file/symbol first, then re-state the claim with the raw evidence.",
2718                                                                correction
2719                                                            ),
2720                                                        }],
2721                                                    });
2722                                                    continue_agent_loop = true;
2723                                                    break;
2724                                                }
2725                                            }
2726
2727                                            // Tool narration guard: detect when the
2728                                            // assistant describes using a tool instead
2729                                            // of actually calling it. Only triggers when
2730                                            // no tools were called this turn but the text
2731                                            // contains tool-like language.
2732                                            if tools_called_this_turn.is_empty()
2733                                                && tool_narration_detected(&assistant_text)
2734                                            {
2735                                                let correction = "You described using a tool but did not actually call it. When a tool would help, CALL it — never narrate what it would do. Use the exact tool call format.";
2736                                                messages.push(Msg {
2737                                                    role: "assistant".into(),
2738                                                    content: vec![ContentBlock::Text {
2739                                                        text: assistant_text.clone(),
2740                                                    }],
2741                                                });
2742                                                let _ = event_tx.send(Event::Message {
2743                                                    run: run_id.clone(),
2744                                                    role: "guard".into(),
2745                                                    text: correction.into(),
2746                                                });
2747                                                messages.push(Msg {
2748                                                    role: "user".into(),
2749                                                    content: vec![ContentBlock::Text {
2750                                                        text: format!(
2751                                                            "SYSTEM: {}. Execute the relevant tool first, then report the actual raw result.",
2752                                                            correction
2753                                                        ),
2754                                                    }],
2755                                                });
2756                                                continue_agent_loop = true;
2757                                                break;
2758                                            }
2759                                            messages.push(assistant_msg);
2760                                        }
2761
2762                                        // ── Pre-mutation self-critique (reasoning §) ─
2763                                        // If we mutated files this turn, emit a
2764                                        // structured self-review of the change set
2765                                        // so the operator/UI can see the agent's
2766                                        // own checklist before auto-verify runs.
2767                                        if had_mutation
2768                                            && self.reasoning.self_critique
2769                                            && !diffs.is_empty()
2770                                        {
2771                                            let review =
2772                                                crate::reasoning::SelfCritique::pre_mutation_review(
2773                                                    &diffs,
2774                                                    Some(&task.description),
2775                                                );
2776                                            let _ = event_tx.send(Event::Message {
2777                                                run: run_id.clone(),
2778                                                role: "self-critique".into(),
2779                                                text: review,
2780                                            });
2781                                        }
2782
2783                                        // ── Auto-verify (§10 testing) ───────────
2784                                        // The model thinks it's done. If it mutated
2785                                        // files and a verify command is configured,
2786                                        // run it; on failure, re-inject so the agent
2787                                        // fixes it (bounded). When this model's fix
2788                                        // budget is exhausted, ESCALATE to the next
2789                                        // (stronger) model in the chain rather than
2790                                        // ending the run silently unverified — that
2791                                        // is the whole point of routing cheap-first.
2792                                        if had_mutation {
2793                                            if let Some(verify_cmd) =
2794                                                self.config.defaults.verify_command.clone()
2795                                            {
2796                                                verify_attempts += 1;
2797                                                had_mutation = false;
2798                                                let parts: Vec<String> = verify_cmd
2799                                                    .split_whitespace()
2800                                                    .map(String::from)
2801                                                    .collect();
2802                                                if !parts.is_empty() {
2803                                                    let _ = event_tx.send(Event::AgentStatus {
2804                                                        run: run_id.clone(),
2805                                                        role: "verifier".into(),
2806                                                        status: AgentStatus::Working,
2807                                                        note: format!("running `{}`", verify_cmd),
2808                                                    });
2809                                                    let cmd = crate::sandbox::Command {
2810                                                        program: parts[0].clone(),
2811                                                        args: parts[1..].to_vec(),
2812                                                        env: std::collections::HashMap::new(),
2813                                                        workdir: workspace.root.clone(),
2814                                                    };
2815                                                    let limits = crate::sandbox::Limits {
2816                                                        timeout_ms: 300_000,
2817                                                        max_output_bytes: 16_000,
2818                                                    };
2819                                                    match workspace
2820                                                        .sandbox
2821                                                        .exec(&cmd, &limits)
2822                                                        .await
2823                                                    {
2824                                                        Ok(res) if res.exit_code != 0 => {
2825                                                            let _ = event_tx.send(Event::TestResult {
2826                                                                run: run_id.clone(),
2827                                                                passed: 0,
2828                                                                failed: 1,
2829                                                                detail: format!(
2830                                                                    "verify `{}` failed (exit {})",
2831                                                                    verify_cmd, res.exit_code
2832                                                                ),
2833                                                            });
2834                                                            let out = format!(
2835                                                                "{}\n{}",
2836                                                                res.stdout, res.stderr
2837                                                            );
2838                                                            let tail: String = out
2839                                                                .lines()
2840                                                                .rev()
2841                                                                .take(40)
2842                                                                .collect::<Vec<_>>()
2843                                                                .into_iter()
2844                                                                .rev()
2845                                                                .collect::<Vec<_>>()
2846                                                                .join("\n");
2847                                                            if verify_attempts
2848                                                                <= MAX_VERIFY_ATTEMPTS
2849                                                            {
2850                                                                // Same model gets a bounded
2851                                                                // chance to fix its own work.
2852                                                                messages.push(Msg {
2853                                                                    role: "user".into(),
2854                                                                    content: vec![ContentBlock::Text {
2855                                                                        text: format!(
2856                                                                            "SYSTEM: verification command `{}` FAILED (exit {}). Fix the code, then it will be re-verified. Output:\n{}",
2857                                                                            verify_cmd, res.exit_code, tail
2858                                                                        ),
2859                                                                    }],
2860                                                                });
2861                                                                continue_agent_loop = true;
2862                                                                break;
2863                                                            }
2864                                                            // Fix budget exhausted — escalate
2865                                                            // to the next model in the chain.
2866                                                            let next_idx = current_chain_idx + 1;
2867                                                            if next_idx < brain_policy.chain.len()
2868                                                                && verify_escalations
2869                                                                    < MAX_VERIFY_ESCALATIONS
2870                                                            {
2871                                                                verify_escalations += 1;
2872                                                                verify_attempts = 0;
2873                                                                let from = brain.id().to_string();
2874                                                                let to = brain_policy.chain
2875                                                                    [next_idx]
2876                                                                    .id()
2877                                                                    .to_string();
2878                                                                current_chain_idx = next_idx;
2879                                                                let _ = event_tx.send(
2880                                                                    Event::ModelSwitched {
2881                                                                        run: run_id.clone(),
2882                                                                        from,
2883                                                                        to,
2884                                                                        reason: format!(
2885                                                                            "verification still failing after {} fixes — escalating",
2886                                                                            MAX_VERIFY_ATTEMPTS
2887                                                                        ),
2888                                                                    },
2889                                                                );
2890                                                                messages.push(Msg {
2891                                                                    role: "user".into(),
2892                                                                    content: vec![ContentBlock::Text {
2893                                                                        text: format!(
2894                                                                            "SYSTEM: a previous model attempted this task but verification `{}` still FAILS (exit {}). You are a stronger model brought in to finish the job. Diagnose properly, fix the code, then it will be re-verified. Output:\n{}",
2895                                                                            verify_cmd, res.exit_code, tail
2896                                                                        ),
2897                                                                    }],
2898                                                                });
2899                                                                continue_agent_loop = true;
2900                                                                break;
2901                                                            }
2902                                                            // No stronger model left: end
2903                                                            // HONESTLY as a failure instead of
2904                                                            // pretending the run succeeded.
2905                                                            had_error = true;
2906                                                            last_error = Some(format!(
2907                                                                "verification `{}` still failing after retries and escalation",
2908                                                                verify_cmd
2909                                                            ));
2910                                                            continue_agent_loop = false;
2911                                                            break;
2912                                                        }
2913                                                        Ok(_) => {
2914                                                            let _ =
2915                                                                event_tx.send(Event::TestResult {
2916                                                                    run: run_id.clone(),
2917                                                                    passed: 1,
2918                                                                    failed: 0,
2919                                                                    detail: format!(
2920                                                                        "verify `{}` passed",
2921                                                                        verify_cmd
2922                                                                    ),
2923                                                                });
2924                                                        }
2925                                                        Err(e) => {
2926                                                            let _ = event_tx.send(Event::Message {
2927                                                                run: run_id.clone(),
2928                                                                role: "guard".into(),
2929                                                                text: format!(
2930                                                                    "verify command could not run: {}",
2931                                                                    e
2932                                                                ),
2933                                                            });
2934                                                        }
2935                                                    }
2936                                                }
2937                                            }
2938                                        }
2939                                    }
2940                                    crate::event::StopReason::ToolUse => {
2941                                        // A single model turn that emits N tool calls
2942                                        // MUST be replayed as ONE assistant message
2943                                        // carrying reasoning_content + ALL tool_calls,
2944                                        // followed by N tool-result messages. Splitting
2945                                        // it into one assistant message per tool left
2946                                        // the 2nd+ calls without reasoning_content, which
2947                                        // DeepSeek/Qwen/Moonshot thinking-mode rejects
2948                                        // with HTTP 400 ("reasoning_content must be passed
2949                                        // back"), aborting every turn after the first and
2950                                        // leaving multi-file tasks half-done. One
2951                                        // assistant message with a tool_calls array is
2952                                        // also the correct OpenAI/Anthropic shape.
2953                                        let drained: Vec<_> =
2954                                            std::mem::take(&mut tool_results_pending);
2955
2956                                        let mut assistant_blocks = Vec::new();
2957                                        if !reasoning_buf.is_empty() {
2958                                            assistant_blocks.push(ContentBlock::Reasoning {
2959                                                text: reasoning_buf.clone(),
2960                                            });
2961                                        }
2962                                        // Signature of this turn's tool calls for the
2963                                        // stuck-loop guard (name + args, order-sensitive).
2964                                        let turn_sig = {
2965                                            use std::collections::hash_map::DefaultHasher;
2966                                            use std::hash::{Hash, Hasher};
2967                                            let mut h = DefaultHasher::new();
2968                                            for (_, name, args, _, _) in &drained {
2969                                                name.hash(&mut h);
2970                                                args.to_string().hash(&mut h);
2971                                            }
2972                                            h.finish()
2973                                        };
2974                                        for (tool_id, tool_name, args, _content, _is_error) in
2975                                            &drained
2976                                        {
2977                                            assistant_blocks.push(ContentBlock::ToolUse {
2978                                                id: tool_id.clone(),
2979                                                name: tool_name.clone(),
2980                                                input: args.clone(),
2981                                            });
2982                                        }
2983                                        messages.push(Msg {
2984                                            role: "assistant".into(),
2985                                            content: assistant_blocks,
2986                                        });
2987
2988                                        let turn_had_tools = !drained.is_empty();
2989                                        for (tool_id, _tool_name, _args, content, is_error) in
2990                                            drained
2991                                        {
2992                                            messages.push(Msg {
2993                                                role: "user".into(),
2994                                                content: vec![ContentBlock::ToolResult {
2995                                                    tool_use_id: tool_id,
2996                                                    content,
2997                                                    is_error: Some(is_error),
2998                                                }],
2999                                            });
3000                                        }
3001                                        if tool_output_seen_this_completion {
3002                                            produced_any_output = true;
3003                                        }
3004
3005                                        // Stuck-loop guard: identical tool-call turns.
3006                                        if turn_had_tools {
3007                                            if last_tool_sig == Some(turn_sig) {
3008                                                repeated_tool_turns += 1;
3009                                            } else {
3010                                                repeated_tool_turns = 0;
3011                                                last_tool_sig = Some(turn_sig);
3012                                            }
3013                                        }
3014                                        if repeated_tool_turns == 2 {
3015                                            // 3rd identical turn — nudge the model.
3016                                            messages.push(Msg {
3017                                                role: "user".into(),
3018                                                content: vec![ContentBlock::Text {
3019                                                    text: "guard: you have issued the exact same \
3020                                                           tool call(s) three turns in a row with \
3021                                                           identical arguments. The result will \
3022                                                           not change. State what you learned and \
3023                                                           take a DIFFERENT action — or finish \
3024                                                           with your best answer now."
3025                                                        .into(),
3026                                                }],
3027                                            });
3028                                            send(Event::Message {
3029                                                run: run_id.clone(),
3030                                                role: "guard".into(),
3031                                                text: "repeated identical tool calls — nudging \
3032                                                       the model to change approach"
3033                                                    .into(),
3034                                            });
3035                                        } else if repeated_tool_turns >= 4 {
3036                                            // 5th identical turn — stop honestly instead of
3037                                            // burning the remaining turn/budget allowance.
3038                                            send(Event::Message {
3039                                                run: run_id.clone(),
3040                                                role: "guard".into(),
3041                                                text: "stuck loop: 5 identical tool-call turns \
3042                                                       — stopping the run"
3043                                                    .into(),
3044                                            });
3045                                            had_error = true;
3046                                            last_error = Some(
3047                                                "stopped by stuck-loop guard (5 identical \
3048                                                 tool-call turns)"
3049                                                    .into(),
3050                                            );
3051                                            continue_agent_loop = false;
3052                                            break;
3053                                        }
3054
3055                                        continue_agent_loop =
3056                                            !waiting_for_approval && !stop_after_tool_result;
3057                                        break;
3058                                    }
3059                                    _ => {}
3060                                }
3061                                break; // Done
3062                            }
3063                            BrainEvent::Error(msg) => {
3064                                let _ = event_tx.send(Event::Error {
3065                                    run: run_id.clone(),
3066                                    message: msg.clone(),
3067                                });
3068                                let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
3069                                let next_idx = current_chain_idx + 1;
3070                                if next_idx < brain_policy.chain.len() {
3071                                    current_chain_idx = next_idx;
3072                                    let switch_ctx = format!(
3073                                        "{} -> {}",
3074                                        brain.id(),
3075                                        brain_policy.chain[current_chain_idx].id()
3076                                    );
3077                                    let _ = event_tx.send(Event::ModelSwitched {
3078                                        run: run_id.clone(),
3079                                        from: brain.id().to_string(),
3080                                        to: brain_policy.chain[current_chain_idx].id().to_string(),
3081                                        reason: msg,
3082                                    });
3083                                    let _ = self
3084                                        .hooks
3085                                        .execute(&HookEvent::OnModelSwitched, &switch_ctx)
3086                                        .await;
3087                                    continue_agent_loop = true;
3088                                } else {
3089                                    had_error = true;
3090                                    last_error = Some(msg);
3091                                }
3092                                break;
3093                            }
3094                        }
3095                    }
3096
3097                    // Robust empty-completion fallback: some providers end the
3098                    // stream WITHOUT a Done(EndTurn) (so the in-stream check never
3099                    // fires). If this completion produced nothing and the run has
3100                    // produced nothing, advance to the next model in the chain.
3101                    if !continue_agent_loop && !had_error {
3102                        let this_empty =
3103                            assistant_text.trim().is_empty() && !tool_output_seen_this_completion;
3104                        if this_empty && !produced_any_output {
3105                            let next_idx = current_chain_idx + 1;
3106                            if next_idx < brain_policy.chain.len() {
3107                                let _ = event_tx.send(Event::ModelSwitched {
3108                                    run: run_id.clone(),
3109                                    from: brain.id().to_string(),
3110                                    to: brain_policy.chain[next_idx].id().to_string(),
3111                                    reason: "empty response".into(),
3112                                });
3113                                current_chain_idx = next_idx;
3114                                continue;
3115                            }
3116                        }
3117                    }
3118
3119                    if continue_agent_loop {
3120                        continue;
3121                    }
3122                    break; // Task complete
3123                }
3124                Err(e) => {
3125                    let err_msg = format!("{}", e);
3126                    let _ = event_tx.send(Event::Error {
3127                        run: run_id.clone(),
3128                        message: err_msg.clone(),
3129                    });
3130
3131                    // Transient failures (rate limit, timeout, 5xx, connection
3132                    // blips) get a bounded retry on the SAME brain first —
3133                    // otherwise one 429 on the primary silently downgrades the
3134                    // whole run to a weaker fallback model.
3135                    let retry_after_hint = match e.downcast_ref::<BrainError>() {
3136                        Some(BrainError::RateLimit { retry_after }) => Some(*retry_after),
3137                        Some(BrainError::Timeout) => Some(None),
3138                        Some(BrainError::ServerError { status, .. }) if *status >= 500 => {
3139                            Some(None)
3140                        }
3141                        Some(_) => None,
3142                        None => {
3143                            let s = err_msg.to_lowercase();
3144                            let transient = s.contains("rate limit")
3145                                || s.contains("429")
3146                                || s.contains("timeout")
3147                                || s.contains("timed out")
3148                                || s.contains("connection")
3149                                || s.contains("overloaded")
3150                                || s.contains("502")
3151                                || s.contains("503");
3152                            if transient { Some(None) } else { None }
3153                        }
3154                    };
3155                    if let Some(hint) = retry_after_hint {
3156                        if transient_retries < MAX_TRANSIENT_RETRIES {
3157                            transient_retries += 1;
3158                            // Honour the provider's Retry-After when given,
3159                            // capped so a run never stalls for minutes.
3160                            let secs = hint.unwrap_or(2u64.pow(transient_retries)).min(20);
3161                            send(Event::Message {
3162                                run: run_id.clone(),
3163                                role: "guard".into(),
3164                                text: format!(
3165                                    "provider hiccup ({}) — retrying {} in {}s (attempt {}/{})",
3166                                    err_msg,
3167                                    brain.id(),
3168                                    secs,
3169                                    transient_retries,
3170                                    MAX_TRANSIENT_RETRIES
3171                                ),
3172                            });
3173                            tokio::time::sleep(std::time::Duration::from_secs(secs)).await;
3174                            continue;
3175                        }
3176                    }
3177                    transient_retries = 0;
3178
3179                    // Try next in chain
3180                    let next_idx = current_chain_idx + 1;
3181                    if next_idx < brain_policy.chain.len() {
3182                        current_chain_idx = next_idx;
3183                        let _ = event_tx.send(Event::ModelSwitched {
3184                            run: run_id.clone(),
3185                            from: brain.id().to_string(),
3186                            to: brain_policy.chain[current_chain_idx].id().to_string(),
3187                            reason: err_msg,
3188                        });
3189                    } else {
3190                        had_error = true;
3191                        last_error = Some(err_msg);
3192                        break;
3193                    }
3194                }
3195            }
3196        }
3197
3198        // Final token usage. In-stream BrainEvent::Usage already emitted one
3199        // Event::TokenUsage per completion with INCREMENTS — surfaces sum
3200        // those events, so re-emitting the cumulative total here double-
3201        // counted every confirmed token. Only emit when the provider never
3202        // reported usage, and then as the ESTIMATE it actually is.
3203        let final_input = total_input + estimated_input_unconfirmed;
3204        let final_output = total_output + estimated_output_unconfirmed;
3205        if total_input == 0 && total_output == 0 && (final_input > 0 || final_output > 0) {
3206            let _ = event_tx.send(Event::TokenUsageEstimated {
3207                run: run_id.clone(),
3208                input: final_input,
3209                output: final_output,
3210                reason: "provider reported no usage events".into(),
3211            });
3212        }
3213        let final_status = if had_error {
3214            format!(
3215                "error: {}",
3216                last_error.unwrap_or_else(|| "run failed".into())
3217            )
3218        } else if waiting_for_approval {
3219            "waiting_for_approval".into()
3220        } else if denied_by_approval {
3221            "denied".into()
3222        } else {
3223            "completed".into()
3224        };
3225        let final_note = match final_status.as_str() {
3226            "completed" => format!("completed · {}↑ {}↓ tok", final_input, final_output),
3227            "waiting_for_approval" => "en attente de ton accord".to_string(),
3228            "denied" => "arrêté · approbation refusée".to_string(),
3229            other => other.to_string(),
3230        };
3231
3232        // Mark coder lane done — clears the animated caret cleanly.
3233        let _ = event_tx.send(Event::AgentStatus {
3234            run: run_id.clone(),
3235            role: "coder".into(),
3236            status: AgentStatus::Done,
3237            note: final_note,
3238        });
3239
3240        let outcome = OutcomeSummary {
3241            status: final_status,
3242            diffs,
3243            cost_usd: cost_usd + estimated_cost_unconfirmed,
3244            tokens: TokenUsage {
3245                input: total_input + estimated_input_unconfirmed,
3246                output: total_output + estimated_output_unconfirmed,
3247            },
3248            cost_comparison: String::new(),
3249            duration_ms: Some(run_started_at.elapsed().as_millis() as u64),
3250        };
3251
3252        // Persist task to memory
3253        if let Some(mem) = &self.memory {
3254            let _ = mem.save_task(&crate::memory::TaskMem {
3255                run_id: run_id.0.clone(),
3256                messages: messages.clone(),
3257                created_at: chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(),
3258            });
3259        }
3260
3261        // Per-repo routing memory: record only verification-backed outcomes —
3262        // a "completed" without a verify command proves nothing.
3263        {
3264            use crate::router::learned::RunRoutingOutcome;
3265            let routing_outcome = if had_error {
3266                Some(RunRoutingOutcome::Failed)
3267            } else if verify_escalations > 0 {
3268                Some(RunRoutingOutcome::Escalated)
3269            } else if verify_attempts > 0 && outcome.status == "completed" {
3270                Some(RunRoutingOutcome::VerifiedSuccess)
3271            } else {
3272                None
3273            };
3274            if let Some(o) = routing_outcome {
3275                repo_routing.record(&classified_tier, o);
3276            }
3277        }
3278
3279        // Propose skill candidate from successful run
3280        if outcome.status == "completed" {
3281            if let Some(skills) = &self.skills {
3282                if let Some(candidate) = Curator::propose_skill_if_missing(
3283                    &task.description,
3284                    &skill_evidence,
3285                    skills.as_ref(),
3286                ) {
3287                    let skill_name = candidate.name.clone();
3288                    let _ = event_tx.send(Event::SkillLearned {
3289                        run: run_id.clone(),
3290                        name: skill_name.clone(),
3291                    });
3292                    let _ = self
3293                        .hooks
3294                        .execute(&HookEvent::OnSkillLearned, &skill_name)
3295                        .await;
3296                    let _ = skills.add(candidate);
3297                }
3298            }
3299
3300            // Auto-distill facts from the successful run. Reconstruct the event
3301            // view from the final conversation: ToolUse blocks carry the real
3302            // tool args (file paths, content), Text blocks carry reasoning — both
3303            // are what the Distiller mines for durable user facts (§3.8).
3304            if let Some(mem) = &self.memory {
3305                let events = events_from_messages(&run_id, &messages);
3306                Distiller::distill(mem, &events, &task.description).await;
3307            }
3308        }
3309
3310        let _ = event_tx.send(Event::RunFinished {
3311            run: run_id.clone(),
3312            outcome: outcome.clone(),
3313        });
3314
3315        // PostRun lifecycle hook (best-effort, non-blocking semantics).
3316        let _ = self
3317            .hooks
3318            .execute(&HookEvent::PostRun, &task.description)
3319            .await;
3320
3321        Ok(outcome)
3322    }
3323}
3324
3325// ─── Tool narration detection ──────────────────────────────────────────────────
3326
3327/// Detects when the assistant describes using a tool ("I'll run the tests",
3328/// "Let me search for...") without actually emitting a ToolUse block.
3329/// Returns true when tool-like language is present but no tools were called.
3330fn tool_narration_detected(text: &str) -> bool {
3331    let lower = text.to_lowercase();
3332    let patterns = [
3333        "i'll use",
3334        "i will use",
3335        "let me use",
3336        "i'll run",
3337        "i will run",
3338        "let me run",
3339        "i'll search",
3340        "i will search",
3341        "let me search",
3342        "i'll check",
3343        "i will check",
3344        "let me check",
3345        "i'll read",
3346        "i will read",
3347        "let me read",
3348        "i'll write",
3349        "i will write",
3350        "let me write",
3351        "i'll execute",
3352        "i will execute",
3353        "let me execute",
3354        "i'll call",
3355        "i will call",
3356        "let me call",
3357        "i'll fetch",
3358        "i will fetch",
3359        "let me fetch",
3360        "i'll look up",
3361        "i will look up",
3362        "let me look up",
3363        "i'll test",
3364        "i will test",
3365        "let me test",
3366        "running the test",
3367        "running the command",
3368        "searching for",
3369        "looking up",
3370        // I1: French narration. Sparrow answers in French, so the English-only
3371        // guard above never fired for francophone users — the central
3372        // "describe the tool instead of calling it" failsafe was dead in the
3373        // user's own language. These cover the common openings.
3374        "je vais utiliser",
3375        "je vais lancer",
3376        "je vais exécuter",
3377        "je vais executer", // tolerate the unaccented spelling
3378        "je vais lire",
3379        "je vais écrire",
3380        "je vais créer",
3381        "je vais modifier",
3382        "je vais chercher",
3383        "je vais rechercher",
3384        "je vais vérifier",
3385        "je vais regarder",
3386        "je vais consulter",
3387        "je vais ouvrir",
3388        "je vais appeler",
3389        "laisse-moi",
3390        "laissez-moi",
3391        "permets-moi de",
3392        "permettez-moi de",
3393        "je m'occupe de",
3394        "je commence par",
3395        "je vais d'abord",
3396    ];
3397    patterns.iter().any(|p| lower.contains(p))
3398}
3399
3400#[cfg(test)]
3401mod tests {
3402    use super::*;
3403
3404    #[test]
3405    fn main_agent_system_prompt_carries_the_reasoning_protocol() {
3406        let workspace_root = PathBuf::from(".");
3407        let prompt = build_system_prompt(SystemPromptInput {
3408            identity: &Identity::default(),
3409            tier: Some(&crate::router::TaskTier::Hard),
3410            workspace_root: &workspace_root,
3411            facts: &[],
3412            memory_docs: &[],
3413            instruction_docs: &[],
3414            skills: &[],
3415            skill_catalog: &[],
3416        });
3417        // Anchors taken from src/engine/main_soul.md. The soul has been
3418        // rewritten more than once; we pin the load-bearing concepts (tier
3419        // triage, the tribunal with its three reviewer roles, the
3420        // anti-simulation rule, the "real execution beats mental
3421        // simulation" instruction) rather than any single section header.
3422        for marker in [
3423            "TIER TRIAGE",
3424            "Tribunal",
3425            "Skeptic",
3426            "Adversary",
3427            "Anti-simulation",
3428            "Real execution beats",
3429        ] {
3430            assert!(prompt.contains(marker), "main soul must contain `{marker}`");
3431        }
3432    }
3433
3434    #[test]
3435    fn trivial_prompt_uses_lean_mode_without_main_soul_or_full_skill_catalog() {
3436        let skill = crate::capabilities::Skill {
3437            name: "tiny-skill".into(),
3438            description: "Tiny relevant skill".into(),
3439            trigger: vec!["tiny".into()],
3440            body: "Do the tiny thing.".into(),
3441            source_file: "tiny/SKILL.md".into(),
3442            usage_count: 0,
3443            created_at: String::new(),
3444            score: 1.0,
3445            auto_generated: false,
3446            references: Vec::new(),
3447            templates: Vec::new(),
3448            scripts: Vec::new(),
3449            assets: Vec::new(),
3450        };
3451        let workspace_root = PathBuf::from(".");
3452        let skills = vec![skill];
3453        let prompt = build_system_prompt(SystemPromptInput {
3454            identity: &Identity::default(),
3455            tier: Some(&crate::router::TaskTier::Trivial),
3456            workspace_root: &workspace_root,
3457            facts: &[],
3458            memory_docs: &[],
3459            instruction_docs: &[],
3460            skills: &skills,
3461            skill_catalog: &skills,
3462        });
3463
3464        assert!(prompt.contains("Simple-task mode"));
3465        assert!(!prompt.contains("TIER TRIAGE"));
3466        assert!(!prompt.contains("Skill library ("));
3467        assert!(prompt.contains("## Relevant skills for this task"));
3468    }
3469
3470    #[test]
3471    fn provider_messages_strip_ui_status_leaks() {
3472        let messages = vec![Msg {
3473            role: "user".into(),
3474            content: vec![ContentBlock::Text {
3475                text: "keep this\n✓ coder completed · 4487↑ 150↓ tok\ncoder ◌ consulting deepseek · parsing request…\nkeep that".into(),
3476            }],
3477        }];
3478
3479        let sanitized = sanitize_messages_for_provider(&messages);
3480        let ContentBlock::Text { text } = &sanitized[0].content[0] else {
3481            panic!("expected text block");
3482        };
3483        assert!(text.contains("keep this"));
3484        assert!(text.contains("keep that"));
3485        assert!(!text.contains("completed ·"));
3486        assert!(!text.contains("◌ consulting"));
3487    }
3488
3489    #[test]
3490    fn tool_narration_guard_fires_in_french() {
3491        // I1: the guard was English-only; francophone narration slipped through.
3492        assert!(tool_narration_detected(
3493            "Je vais créer le fichier poeme.txt."
3494        ));
3495        assert!(tool_narration_detected(
3496            "Laisse-moi vérifier le contenu du dossier."
3497        ));
3498        assert!(tool_narration_detected(
3499            "Je m'occupe de lire app.js tout de suite."
3500        ));
3501        // Still catches English.
3502        assert!(tool_narration_detected("Let me run the tests."));
3503        // A normal answer with no tool narration must NOT fire.
3504        assert!(!tool_narration_detected(
3505            "Voici le résultat : ton fichier contient un haïku."
3506        ));
3507    }
3508
3509    #[test]
3510    fn named_agents_keep_their_own_soul() {
3511        let planner = Identity {
3512            name: "planner".into(),
3513            role: "technical architect".into(),
3514            personality: "structured".into(),
3515        };
3516        let workspace_root = PathBuf::from(".");
3517        let prompt = build_system_prompt(SystemPromptInput {
3518            identity: &planner,
3519            tier: Some(&crate::router::TaskTier::Hard),
3520            workspace_root: &workspace_root,
3521            facts: &[],
3522            memory_docs: &[],
3523            instruction_docs: &[],
3524            skills: &[],
3525            skill_catalog: &[],
3526        });
3527        // The main soul's signature section header — if it leaks into a
3528        // named identity, the focused soul is being diluted.
3529        assert!(
3530            !prompt.contains("TIER TRIAGE"),
3531            "named souls must not be diluted by the main protocol"
3532        );
3533    }
3534
3535    #[test]
3536    fn initial_user_content_blocks_embeds_uploaded_images() {
3537        let tmp = tempfile::tempdir().expect("tempdir");
3538        let image = tmp.path().join("shot.png");
3539        std::fs::write(
3540            &image,
3541            [
3542                0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n', 0, 0, 0, 0,
3543            ],
3544        )
3545        .expect("write image");
3546        let description = format!(
3547            "analyse this\n\n[Attached files]\n### file: shot.png\n[uploaded: {}]",
3548            image.display()
3549        );
3550
3551        let blocks = initial_user_content_blocks(tmp.path(), &description);
3552        assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
3553        assert!(blocks.iter().any(|block| matches!(
3554            block,
3555            ContentBlock::Image {
3556                source: ImageSource::Base64 {
3557                    media_type,
3558                    data,
3559                }
3560            } if media_type == "image/png" && !data.is_empty()
3561        )));
3562    }
3563
3564    #[test]
3565    fn tool_result_content_blocks_preserves_images() {
3566        let blocks = tool_result_content_blocks(&[
3567            Block::Text("screenshot captured".into()),
3568            Block::Image {
3569                data: vec![1, 2, 3],
3570                mime: "image/png".into(),
3571            },
3572        ]);
3573
3574        assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
3575        assert!(blocks.iter().any(|block| matches!(
3576            block,
3577            ContentBlock::Image {
3578                source: ImageSource::Base64 {
3579                    media_type,
3580                    data,
3581                }
3582            } if media_type == "image/png" && data == "AQID"
3583        )));
3584    }
3585}
sparrow/engine/mod.rs

sparrow/engine/
mod.rs