Skip to main content

sparrow/engine/
mod.rs

1use async_trait::async_trait;
2use futures::StreamExt;
3use serde_json::json;
4use std::path::PathBuf;
5use std::sync::Arc;
6use tokio::sync::mpsc;
7
8use crate::agent::AgentStore;
9use crate::autonomy::{AutonomyContract, Checkpoints, GitCheckpoints};
10use crate::capabilities::{Curator, SkillLibrary};
11use crate::config::Config;
12use crate::event::{
13    AgentStatus, AutonomyLevel, Block, Decision, Event, OutcomeSummary, RiskLevel, RunId,
14    TokenUsage,
15};
16use crate::extras::Distiller;
17use crate::hooks::{HookEvent, HookRegistry};
18use crate::instructions::InstructionDoc;
19use crate::memory::{Fact, Memory, MemoryDoc, MemoryDocKind};
20use crate::permissions::PermissionContext;
21use crate::provider::{
22    Brain, BrainEvent, BrainRequest, ContentBlock, ImageSource, Msg, PromptCacheConfig, ToolSpec,
23};
24use crate::reasoning::ReasoningEngine;
25use crate::redaction::RedactionFilter;
26use crate::router::{BudgetState, Router, TaskTier};
27use crate::sandbox::Sandbox;
28use crate::tools::{ToolCtx, ToolRegistry};
29
30pub mod scorer;
31pub mod treesitter;
32
33// ─── Agent identity ─────────────────────────────────────────────────────────────
34
35#[derive(Debug, Clone)]
36pub struct Identity {
37    pub name: String,
38    pub role: String,
39    pub personality: String,
40}
41
42impl Default for Identity {
43    fn default() -> Self {
44        Self {
45            name: "sparrow".into(),
46            role: "software engineer".into(),
47            personality: "concise, competent, helpful".into(),
48        }
49    }
50}
51
52// ─── Brain policy ───────────────────────────────────────────────────────────────
53
54pub struct BrainPolicy {
55    /// The fallback chain selected by the Router for this run
56    pub chain: Vec<Arc<dyn Brain>>,
57    pub current_index: usize,
58}
59
60impl BrainPolicy {
61    pub fn current(&self) -> Option<Arc<dyn Brain>> {
62        self.chain.get(self.current_index).cloned()
63    }
64
65    pub fn next(&mut self) -> Option<Arc<dyn Brain>> {
66        self.current_index += 1;
67        self.current()
68    }
69}
70
71// ─── Workspace ──────────────────────────────────────────────────────────────────
72
73pub struct Workspace {
74    pub root: PathBuf,
75    pub sandbox: Arc<dyn Sandbox>,
76}
77
78// ─── Agent run ─────────────────────────────────────────────────────────────────
79
80pub struct AgentRun {
81    pub id: RunId,
82    pub identity: Identity,
83    pub brain_policy: BrainPolicy,
84    pub autonomy: AutonomyContract,
85    pub tools: Arc<ToolRegistry>,
86    pub workspace: Workspace,
87}
88
89fn estimate_text_tokens(text: &str) -> u64 {
90    let chars = text.chars().count() as u64;
91    ((chars + 3) / 4).max(1)
92}
93
94fn estimate_content_tokens(blocks: &[ContentBlock]) -> u64 {
95    blocks
96        .iter()
97        .map(|block| match block {
98            ContentBlock::Text { text } => estimate_text_tokens(text),
99            ContentBlock::Image { source } => match source {
100                crate::provider::ImageSource::Base64 { data, .. } => {
101                    256 + estimate_text_tokens(data).min(2_000)
102                }
103                crate::provider::ImageSource::Url { url } => 256 + estimate_text_tokens(url),
104            },
105            ContentBlock::ToolUse { name, input, .. } => {
106                estimate_text_tokens(name) + estimate_text_tokens(&input.to_string())
107            }
108            ContentBlock::ToolResult { content, .. } => 8 + estimate_content_tokens(content),
109            ContentBlock::Reasoning { text } => estimate_text_tokens(text),
110        })
111        .sum()
112}
113
114fn estimate_request_tokens(req: &BrainRequest) -> u64 {
115    let system = req.system.as_deref().map(estimate_text_tokens).unwrap_or(0);
116    let messages: u64 = req
117        .messages
118        .iter()
119        .map(|msg| estimate_text_tokens(&msg.role) + estimate_content_tokens(&msg.content) + 4)
120        .sum();
121    let tools: u64 = req
122        .tools
123        .iter()
124        .map(|tool| {
125            estimate_text_tokens(&tool.name)
126                + estimate_text_tokens(&tool.description)
127                + estimate_text_tokens(&tool.input_schema.to_string())
128        })
129        .sum();
130    system + messages + tools
131}
132
133fn base64_encode(data: &[u8]) -> String {
134    const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
135    let mut out = String::with_capacity(data.len().div_ceil(3) * 4);
136    for chunk in data.chunks(3) {
137        let b0 = chunk[0] as u32;
138        let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 };
139        let b2 = if chunk.len() > 2 { chunk[2] as u32 } else { 0 };
140        let triple = (b0 << 16) | (b1 << 8) | b2;
141        out.push(CHARS[((triple >> 18) & 63) as usize] as char);
142        out.push(CHARS[((triple >> 12) & 63) as usize] as char);
143        out.push(if chunk.len() > 1 {
144            CHARS[((triple >> 6) & 63) as usize] as char
145        } else {
146            '='
147        });
148        out.push(if chunk.len() > 2 {
149            CHARS[(triple & 63) as usize] as char
150        } else {
151            '='
152        });
153    }
154    out
155}
156
157fn image_block_from_path(path: &std::path::Path) -> Option<ContentBlock> {
158    let mime = mime_guess::from_path(path).first_or_octet_stream();
159    if !mime.type_().as_str().eq_ignore_ascii_case("image") {
160        return None;
161    }
162    let data = std::fs::read(path).ok()?;
163    Some(ContentBlock::Image {
164        source: ImageSource::Base64 {
165            media_type: mime.to_string(),
166            data: base64_encode(&data),
167        },
168    })
169}
170
171fn collect_uploaded_paths(description: &str) -> Vec<String> {
172    let mut paths = Vec::new();
173    for line in description.lines() {
174        let Some(idx) = line.find("uploaded:") else {
175            continue;
176        };
177        let rest = line[idx + "uploaded:".len()..].trim();
178        let path = rest
179            .strip_prefix('[')
180            .unwrap_or(rest)
181            .split(']')
182            .next()
183            .unwrap_or(rest)
184            .trim()
185            .trim_matches('"')
186            .trim_matches('\'');
187        if !path.is_empty() {
188            paths.push(path.to_string());
189        }
190    }
191    paths
192}
193
194fn initial_user_content_blocks(
195    workspace_root: &std::path::Path,
196    description: &str,
197) -> Vec<ContentBlock> {
198    let mut blocks = vec![ContentBlock::Text {
199        text: description.to_string(),
200    }];
201    let mut seen = std::collections::HashSet::new();
202    for raw_path in collect_uploaded_paths(description) {
203        let path = std::path::PathBuf::from(&raw_path);
204        let full_path = if path.is_absolute() {
205            path
206        } else {
207            workspace_root.join(path)
208        };
209        if !seen.insert(full_path.clone()) {
210            continue;
211        }
212        if let Some(block) = image_block_from_path(&full_path) {
213            blocks.push(block);
214        }
215    }
216    blocks
217}
218
219pub fn summarize_model_chain(chain_ids: &[String], limit: usize) -> String {
220    if chain_ids.is_empty() {
221        return "aucun modèle disponible".into();
222    }
223    let limit = limit.max(1);
224    let mut visible: Vec<String> = chain_ids.iter().take(limit).cloned().collect();
225    if chain_ids.len() > limit {
226        visible.push(format!("+{} autres fallbacks", chain_ids.len() - limit));
227    }
228    visible.join(" -> ")
229}
230
231fn prompt_cache_key(scope: &str, workspace_root: &std::path::Path, tools: &[ToolSpec]) -> String {
232    use std::hash::{Hash, Hasher};
233
234    let mut hasher = std::collections::hash_map::DefaultHasher::new();
235    scope.hash(&mut hasher);
236    workspace_root.display().to_string().hash(&mut hasher);
237    for tool in tools {
238        tool.name.hash(&mut hasher);
239        tool.description.hash(&mut hasher);
240        tool.input_schema.to_string().hash(&mut hasher);
241    }
242    format!("sparrow-{}-{:016x}", scope, hasher.finish())
243}
244
245// ─── System prompt / SOUL ───────────────────────────────────────────────────────
246
247fn build_system_prompt(
248    identity: &Identity,
249    workspace_root: &PathBuf,
250    facts: &[Fact],
251    memory_docs: &[MemoryDoc],
252    instruction_docs: &[InstructionDoc],
253    skills: &[crate::capabilities::Skill],
254) -> String {
255    let mut parts = vec![format!(
256        r#"You are {name}, a {role}.
257
258Personality: {personality}
259
260You are working in the workspace: {workspace}
261You have access to tools to read, write, edit, search, and execute code.
262Always use absolute or relative paths from the workspace root.
263Be concise and direct. When making edits, use exact string replacements.
264Before making changes, read the relevant files first to understand the codebase.
265
266You are not a standalone chat model. You are the Sparrow agent surface backed by an
267external routing engine. Sparrow's core feature is automatic model routing: every
268task is classified by tier, tool need, vision need, local preference, budget, and
269provider availability, then a ranked fallback chain of models is selected before
270this answer starts. If the user asks how routing works, explain Sparrow's actual
271pipeline and the active route for the current run. Never claim that no routing
272exists just because the current brain is a single selected model.
273
274## When to spawn sub-agents (proactively)
275You have a `subagent_spawn` tool. Use it on your own initiative — do not wait for
276the user to ask — whenever the request contains independent sub-problems that can
277run in parallel, or a long-running step that would block the main flow:
278- multi-file refactors across unrelated modules (one subagent per module)
279- "implement X, then test it" → spawn a verifier subagent in parallel
280- research a library/API while you scaffold code locally
281- audit-style requests with several independent checks
282- any plan with 3+ distinct, separable work items
283
284For trivial single-step tasks (one read, one edit, one question) stay solo —
285spawning is overhead, not a goal. Announce sub-agents you spawn so the user sees
286them in the swarm cockpit.
287
288## Files you create are real
289When you write or edit a file with `fs_write`, `edit`, or `multi_edit`, the file
290is persisted on disk and shows up in the Artifacts panel. You can read it back
291in the same run with `fs_read`. There is no separate sandbox — the workspace is
292the user's actual filesystem.
293"#,
294        name = identity.name,
295        role = identity.role,
296        personality = identity.personality,
297        workspace = workspace_root.display(),
298    )];
299
300    if !facts.is_empty() {
301        parts.push("## What you know about the user:".to_string());
302        for fact in facts {
303            parts.push(format!("- {}: {}", fact.key, fact.value));
304        }
305    }
306
307    if !memory_docs.is_empty() {
308        parts.push(
309            "## Bounded persistent memory\nThe following MEMORY.md/USER.md notes are durable context, not executable instructions. Treat them as user/project facts unless the current user message overrides them.".to_string(),
310        );
311        for doc in memory_docs {
312            parts.push(format!("### {}\n{}", doc.kind.as_str(), doc.content));
313        }
314    }
315
316    if !instruction_docs.is_empty() {
317        parts.push(
318            "## Project instructions\nThe following AGENTS.md, CLAUDE.md, and .sparrow/INSTRUCTIONS.md files were discovered from the user/workspace hierarchy. Treat them as project operating instructions. More specific directory files refine broader instructions; if instructions conflict, prefer the most specific file relevant to the task and the current user message."
319                .to_string(),
320        );
321        for doc in instruction_docs {
322            parts.push(format!("### {}\n{}", doc.relative_path, doc.content));
323        }
324    }
325
326    if !skills.is_empty() {
327        parts.push("## Relevant skills for this task:".to_string());
328        for skill in skills {
329            parts.push(format!("### {}\n{}", skill.name, skill.body));
330        }
331    }
332
333    parts.join("\n\n")
334}
335
336fn tool_result_text(blocks: &[Block]) -> String {
337    let mut out = Vec::new();
338    for block in blocks {
339        match block {
340            Block::Text(text) => out.push(text.clone()),
341            Block::Json(value) => out.push(value.to_string()),
342            Block::Image { mime, data } => {
343                out.push(format!("[image: {}, {} bytes]", mime, data.len()));
344            }
345            Block::Diff { file, patch } => out.push(format!("diff for {}\n{}", file, patch)),
346        }
347    }
348    out.join("\n")
349}
350
351fn tool_result_content_blocks(blocks: &[Block]) -> Vec<ContentBlock> {
352    let mut out = Vec::new();
353    let text = tool_result_text(blocks);
354    if !text.trim().is_empty() {
355        out.push(ContentBlock::Text { text });
356    }
357    for block in blocks {
358        if let Block::Image { data, mime } = block {
359            out.push(ContentBlock::Image {
360                source: ImageSource::Base64 {
361                    media_type: mime.clone(),
362                    data: base64_encode(data),
363                },
364            });
365        }
366    }
367    out
368}
369
370/// Reconstruct an Event view from a finished conversation so the Distiller can
371/// mine durable facts (tool paths/content + reasoning). ToolUse blocks carry the
372/// real, parsed tool arguments; Text blocks carry assistant reasoning.
373fn events_from_messages(run_id: &RunId, messages: &[Msg]) -> Vec<Event> {
374    let mut events = Vec::new();
375    for msg in messages {
376        for block in &msg.content {
377            match block {
378                ContentBlock::ToolUse { name, input, .. } => {
379                    events.push(Event::ToolUseProposed {
380                        run: run_id.clone(),
381                        id: String::new(),
382                        name: name.clone(),
383                        args: input.clone(),
384                        risk: RiskLevel::ReadOnly,
385                    });
386                }
387                ContentBlock::Text { text } if msg.role == "assistant" => {
388                    events.push(Event::ThinkingDelta {
389                        run: run_id.clone(),
390                        text: text.clone(),
391                    });
392                }
393                _ => {}
394            }
395        }
396    }
397    events
398}
399
400// ─── Task ───────────────────────────────────────────────────────────────────────
401
402#[derive(Debug, Clone)]
403pub struct Task {
404    pub description: String,
405    pub context: Vec<Msg>,
406}
407
408// ─── THE ENGINE ─────────────────────────────────────────────────────────────────
409
410pub struct Engine {
411    router: Arc<dyn Router>,
412    config: Config,
413    identity: Option<Identity>,
414    memory: Option<Arc<dyn Memory>>,
415    skills: Option<Arc<dyn SkillLibrary>>,
416    redaction: RedactionFilter,
417    approval_handler: Option<Arc<dyn ApprovalHandler>>,
418    reasoning: ReasoningEngine,
419    hooks: HookRegistry,
420    agent_store: Option<Arc<dyn AgentStore>>,
421    org_policy: Option<crate::onboarding::enterprise::OrgPolicy>,
422    /// Task description hash → TaskTier cache for classify_via_brain dedup
423    classify_cache: std::sync::Mutex<std::collections::HashMap<u64, crate::router::TaskTier>>,
424}
425
426#[derive(Debug, Clone)]
427pub struct ApprovalRequest {
428    pub run: RunId,
429    pub id: String,
430    pub tool_name: String,
431    pub risk: RiskLevel,
432    pub args: serde_json::Value,
433    pub summary: String,
434}
435
436#[async_trait]
437pub trait ApprovalHandler: Send + Sync {
438    async fn request_approval(&self, request: ApprovalRequest) -> Decision;
439}
440
441impl Engine {
442    pub fn new(router: Arc<dyn Router>, config: Config) -> Self {
443        let mut hooks = HookRegistry::new(Arc::new(crate::sandbox::LocalSandbox::new(
444            std::env::current_dir().unwrap_or_default(),
445        )));
446        hooks.load(config.hooks.clone());
447        Self {
448            router,
449            config,
450            identity: None,
451            memory: None,
452            skills: None,
453            redaction: RedactionFilter::new(),
454            approval_handler: None,
455            reasoning: ReasoningEngine::default(),
456            hooks,
457            agent_store: None,
458            org_policy: None,
459            classify_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
460        }
461    }
462
463    pub fn with_memory(mut self, memory: Arc<dyn Memory>) -> Self {
464        // Load secrets for redaction
465        let secrets: Vec<String> = memory
466            .all_facts()
467            .iter()
468            .filter(|f| f.key.starts_with("secret:"))
469            .map(|f| f.value.clone())
470            .collect();
471        self.redaction.load_secrets(secrets);
472        self.memory = Some(memory);
473        self
474    }
475
476    pub fn with_skills(mut self, skills: Arc<dyn SkillLibrary>) -> Self {
477        self.skills = Some(skills);
478        self
479    }
480
481    pub fn with_identity(mut self, identity: Identity) -> Self {
482        self.identity = Some(identity);
483        self
484    }
485
486    pub fn with_agent_store(mut self, store: Arc<dyn AgentStore>) -> Self {
487        self.agent_store = Some(store);
488        self
489    }
490
491    pub fn with_org_policy(mut self, policy: crate::onboarding::enterprise::OrgPolicy) -> Self {
492        self.org_policy = Some(policy);
493        self
494    }
495
496    pub fn with_hooks_config(mut self, hooks: Vec<crate::hooks::Hook>) -> Self {
497        self.hooks.load(hooks);
498        self
499    }
500
501    pub fn with_approval_handler(mut self, approval_handler: Arc<dyn ApprovalHandler>) -> Self {
502        self.approval_handler = Some(approval_handler);
503        self
504    }
505
506    /// Heuristic classification + a confidence flag.
507    /// Returns `(tier, ambiguous)`. `ambiguous == true` means no semantic keyword
508    /// matched and the tier was guessed purely from length — a good signal that a
509    /// tiny model call could do better (§3.6).
510    fn classify_with_confidence(&self, task: &str) -> (TaskTier, bool) {
511        let lower = task.to_lowercase();
512        if lower.contains("vision") || lower.contains("image") || lower.contains("screenshot") {
513            (TaskTier::Vision, false)
514        } else if lower.contains("architecture")
515            || lower.contains("refactor")
516            || lower.contains("audit")
517            || lower.contains("répare")
518            || lower.contains("repare")
519            || lower.contains("livrer")
520            || lower.contains("v1")
521        {
522            (TaskTier::Hard, false)
523        } else if lower.contains("bug")
524            || lower.contains("fix")
525            || lower.contains("corrige")
526            || lower.contains("debug")
527        {
528            (TaskTier::Small, false)
529        } else if lower.contains("routing")
530            || lower.contains("routeur")
531            || lower.contains("modèle")
532            || lower.contains("modele")
533            || lower.contains("model")
534            || lower.contains("sélectionne")
535            || lower.contains("selectionne")
536        {
537            (TaskTier::Small, false)
538        } else if lower.len() < 80 {
539            // length-only guess → ambiguous
540            (TaskTier::Trivial, true)
541        } else {
542            (TaskTier::Medium, true)
543        }
544    }
545
546    /// Ask a cheap brain to classify an ambiguous task into a tier (§3.6).
547    /// Bounded to a 10-token completion; failures fall back to the heuristic tier.
548    async fn classify_via_brain(&self, task: &str, brain: &dyn Brain) -> Option<TaskTier> {
549        let req = BrainRequest {
550            system: Some(
551                "You are a task classifier. Output exactly one word: trivial, small, medium, hard, or vision."
552                    .into(),
553            ),
554            messages: vec![Msg {
555                role: "user".into(),
556                content: vec![ContentBlock::Text {
557                    text: format!(
558                        "Classify this coding task into exactly one tier (trivial, small, medium, hard, vision):\n\n{}\n\nTier:",
559                        task
560                    ),
561                }],
562            }],
563            tools: vec![],
564            max_tokens: 6,
565            temperature: 0.0,
566            stop: vec![],
567            cache: PromptCacheConfig::disabled(),
568        };
569        let mut stream = brain.complete(req).await.ok()?;
570        let mut out = String::new();
571        while let Some(ev) = stream.next().await {
572            match ev {
573                BrainEvent::TextDelta(t) => out.push_str(&t),
574                BrainEvent::Done(_) => break,
575                BrainEvent::Error(_) => return None,
576                _ => {}
577            }
578        }
579        let word = out.trim().to_lowercase();
580        let word = word.split_whitespace().next().unwrap_or("");
581        match word {
582            "trivial" => Some(TaskTier::Trivial),
583            "small" => Some(TaskTier::Small),
584            "medium" => Some(TaskTier::Medium),
585            "hard" => Some(TaskTier::Hard),
586            "vision" => Some(TaskTier::Vision),
587            _ => None,
588        }
589    }
590
591    fn task_summary(&self, task: &str, tier: &TaskTier) -> String {
592        let lower = task.to_lowercase();
593        if lower.contains("routing")
594            || lower.contains("routeur")
595            || lower.contains("modèle")
596            || lower.contains("modele")
597            || lower.contains("model")
598        {
599            "question meta sur le routing modele".into()
600        } else if lower.contains("code") || lower.contains("bug") || lower.contains("fix") {
601            format!("requete code/{:?}", tier).to_lowercase()
602        } else if lower.contains("config") || lower.contains("provider") {
603            "configuration provider/modele".into()
604        } else {
605            format!("requete {:?}", tier).to_lowercase()
606        }
607    }
608
609    fn is_routing_question(&self, task: &str) -> bool {
610        let lower = task.to_lowercase();
611        (lower.contains("routing") || lower.contains("routeur") || lower.contains("route"))
612            && (lower.contains("modèle") || lower.contains("modele") || lower.contains("model"))
613            || lower.contains("sélectionne tu le model")
614            || lower.contains("selectionne tu le model")
615    }
616
617    fn requires_tools(&self, task: &str, tier: &TaskTier) -> bool {
618        let lower = task.to_lowercase();
619        let tool_keywords = [
620            "outil",
621            "tools",
622            "fichier",
623            "file",
624            "readme",
625            ".rs",
626            ".ts",
627            ".js",
628            ".html",
629            ".md",
630            "repo",
631            "dossier",
632            "workspace",
633            "git",
634            "test",
635            "build",
636            "cargo",
637            "npm",
638            "pnpm",
639            "corrige",
640            "fix",
641            "debug",
642            "bug",
643            "répare",
644            "repare",
645            "modifie",
646            "édite",
647            "edite",
648            "ajoute",
649            "supprime",
650            "écris",
651            "ecris",
652            "write",
653            "create",
654            "crée",
655            "cree",
656            "audit",
657        ];
658
659        if tool_keywords.iter().any(|kw| lower.contains(kw)) {
660            return true;
661        }
662
663        matches!(tier, TaskTier::Medium | TaskTier::Hard | TaskTier::Vision)
664    }
665
666    fn requires_vision(&self, task: &str, tier: &TaskTier) -> bool {
667        let lower = task.to_lowercase();
668        matches!(tier, TaskTier::Vision)
669            || [
670                "image",
671                "screenshot",
672                "capture",
673                "photo",
674                "vision",
675                "logo",
676                "visuel",
677                "interface graphique",
678            ]
679            .iter()
680            .any(|kw| lower.contains(kw))
681    }
682
683    fn routing_explanation(
684        &self,
685        tier: &TaskTier,
686        need: &crate::router::RoutingNeed,
687        chain_ids: &[String],
688    ) -> String {
689        let chain = summarize_model_chain(chain_ids, 5);
690        format!(
691            "Je suis Sparrow, donc je ne réponds pas comme un modèle isolé: avant chaque run, mon routeur classe ta demande puis choisit une chaîne de modèles.\n\nPour cette requête, j'ai détecté: tier `{}` · tools `{}` · vision `{}` · local `{}`.\n\nJe sélectionne ensuite le modèle avec ces critères: adéquation aux capacités demandées, support des tools, besoin vision, préférence local/free-first, budget restant, latence, taille de contexte, puis disponibilité provider. Le résultat est une fallback chain, pas un seul choix figé: `{}`.\n\nConcrètement: une question simple ou meta doit aller vers le modèle le moins coûteux capable de répondre; une tâche code complexe monte vers un modèle plus fort; une tâche avec fichiers/tools exige un modèle compatible tools; une tâche image demande vision; si un provider échoue, je bascule au suivant dans la chaîne.",
692            tier.as_str(),
693            need.required_tools,
694            need.required_vision,
695            need.prefer_local,
696            chain
697        )
698    }
699
700    /// Summarize a slice of dropped conversation messages into ~200 tokens so
701    /// compaction preserves continuity instead of just truncating (§3.7).
702    async fn summarize_messages(&self, brain: &dyn Brain, middle: &[Msg]) -> Option<String> {
703        if middle.is_empty() {
704            return None;
705        }
706        // Flatten the middle into a compact transcript for the summarizer.
707        let mut transcript = String::new();
708        for m in middle {
709            for block in &m.content {
710                match block {
711                    ContentBlock::Text { text } => {
712                        transcript.push_str(&format!("[{}] {}\n", m.role, text));
713                    }
714                    ContentBlock::ToolUse { name, .. } => {
715                        transcript.push_str(&format!("[{}] (tool: {})\n", m.role, name));
716                    }
717                    ContentBlock::ToolResult { .. } => {
718                        transcript.push_str(&format!("[{}] (tool result)\n", m.role));
719                    }
720                    _ => {}
721                }
722            }
723        }
724        if transcript.len() > 12_000 {
725            transcript.truncate(12_000);
726        }
727        let req = BrainRequest {
728            system: Some(
729                "Summarize this agent conversation in <=200 tokens. Preserve: files edited, \
730                 decisions made, current state, and any unfinished work. Plain text only."
731                    .into(),
732            ),
733            messages: vec![Msg {
734                role: "user".into(),
735                content: vec![ContentBlock::Text { text: transcript }],
736            }],
737            tools: vec![],
738            max_tokens: 300,
739            temperature: 0.0,
740            stop: vec![],
741            cache: PromptCacheConfig::disabled(),
742        };
743        let mut stream = brain.complete(req).await.ok()?;
744        let mut out = String::new();
745        while let Some(ev) = stream.next().await {
746            match ev {
747                BrainEvent::TextDelta(t) => out.push_str(&t),
748                BrainEvent::Done(_) => break,
749                BrainEvent::Error(_) => return None,
750                _ => {}
751            }
752        }
753        let out = out.trim().to_string();
754        if out.is_empty() { None } else { Some(out) }
755    }
756
757    /// Drive one AgentRun to completion.
758    pub async fn drive(
759        &self,
760        task: Task,
761        event_tx: mpsc::UnboundedSender<Event>,
762    ) -> anyhow::Result<OutcomeSummary> {
763        self.drive_with_run_id(task, event_tx, RunId::new()).await
764    }
765
766    /// Drive with a caller-provided run id.
767    pub async fn drive_with_run_id(
768        &self,
769        task: Task,
770        event_tx: mpsc::UnboundedSender<Event>,
771        run_id: RunId,
772    ) -> anyhow::Result<OutcomeSummary> {
773        self.drive_with_inject(task, event_tx, run_id, None).await
774    }
775
776    /// Drive with an optional `inject_rx` channel that lets the caller inject
777    /// user messages mid-run. Polled non-blocking between turns. (§3.7)
778    pub async fn drive_with_inject(
779        &self,
780        task: Task,
781        event_tx: mpsc::UnboundedSender<Event>,
782        run_id: RunId,
783        mut inject_rx: Option<mpsc::UnboundedReceiver<String>>,
784    ) -> anyhow::Result<OutcomeSummary> {
785        // Parse and strip optional __model:X__ override prefix injected by the WebView.
786        let model_override: Option<String>;
787        let clean_description: String;
788        if let Some(rest) = task.description.strip_prefix("__model:") {
789            if let Some(end) = rest.find("__ ") {
790                model_override = Some(rest[..end].to_string());
791                clean_description = rest[end + 3..].to_string();
792            } else {
793                model_override = None;
794                clean_description = task.description.clone();
795            }
796        } else {
797            model_override = None;
798            clean_description = task.description.clone();
799        }
800        let task = Task {
801            description: clean_description,
802            context: task.context,
803        };
804
805        let mut messages: Vec<Msg> = task.context.clone();
806
807        // Classify task (heuristic first)
808        let (mut tier, ambiguous) = self.classify_with_confidence(&task.description);
809
810        // Route: select brain chain
811        let budget = BudgetState {
812            daily_limit_usd: self.config.budget.daily_usd,
813            daily_spent_usd: 0.0,
814            session_limit_usd: self.config.budget.session_usd,
815            session_spent_usd: 0.0,
816        };
817
818        let mut required_tools = self.requires_tools(&task.description, &tier);
819        let mut required_vision = self.requires_vision(&task.description, &tier);
820        let mut need = crate::router::RoutingNeed {
821            tier: tier.clone(),
822            required_tools,
823            required_vision,
824            prefer_local: false,
825        };
826
827        let mut chain = self.router.select(&need, &budget);
828
829        // Apply WebView model override:
830        //  1) Keep the brain in the chain if found there.
831        //  2) Otherwise, look it up directly via the router — the user explicitly
832        //     picked it, so we honour it even if the tier-based selection didn't
833        //     include it.
834        //  3) This must be re-applied after any chain mutation (e.g. §3.6
835        //     refinement) or the auto-router silently overrides the manual pick.
836        let router_ref = &self.router;
837        let apply_override = |chain: &mut Vec<Arc<dyn Brain>>| {
838            if let Some(ref override_id) = model_override {
839                let filtered: Vec<_> = chain
840                    .iter()
841                    .filter(|b| b.id() == override_id.as_str())
842                    .cloned()
843                    .collect();
844                if !filtered.is_empty() {
845                    *chain = filtered;
846                } else if let Some(brain) = router_ref.find_brain_by_id(override_id) {
847                    *chain = vec![brain];
848                }
849            }
850        };
851        apply_override(&mut chain);
852
853        // §3.6: model-assisted refinement for genuinely ambiguous tasks. Only the
854        // length-based Medium guess qualifies — short tasks stay Trivial without
855        // the extra round-trip, keeping the common path fast. Uses the cheapest
856        // already-selected brain, bounded to a 6-token call.
857        //
858        // Skip refinement entirely when the user has pinned a specific model:
859        // the whole point of the manual pick is to bypass the router's judgment.
860        if model_override.is_none()
861            && ambiguous
862            && matches!(tier, TaskTier::Medium)
863            && !self.is_routing_question(&task.description)
864        {
865            // Dedup: cache task hash → refined tier so identical tasks skip the LLM call.
866            let desc_hash = {
867                use std::collections::hash_map::DefaultHasher;
868                use std::hash::{Hash, Hasher};
869                let mut h = DefaultHasher::new();
870                task.description.hash(&mut h);
871                h.finish()
872            };
873            let cached = {
874                self.classify_cache
875                    .lock()
876                    .ok()
877                    .and_then(|c| c.get(&desc_hash).cloned())
878            };
879            let refined = match cached {
880                Some(t) => {
881                    let _ = event_tx.send(Event::Message {
882                        run: run_id.clone(),
883                        role: "router".into(),
884                        text: format!("classification (cached): {}", t.as_str()),
885                    });
886                    Some(t)
887                }
888                None => {
889                    if let Some(brain) = chain.first().cloned() {
890                        let result = self
891                            .classify_via_brain(&task.description, brain.as_ref())
892                            .await;
893                        if let Some(r) = &result {
894                            if let Ok(mut c) = self.classify_cache.lock() {
895                                c.insert(desc_hash, r.clone());
896                            }
897                        }
898                        result
899                    } else {
900                        None
901                    }
902                }
903            };
904            if let Some(refined) = refined {
905                if std::mem::discriminant(&refined) != std::mem::discriminant(&tier) {
906                    let _ = event_tx.send(Event::Message {
907                        run: run_id.clone(),
908                        role: "router".into(),
909                        text: format!(
910                            "classification affinée par modèle: {} → {}",
911                            tier.as_str(),
912                            refined.as_str()
913                        ),
914                    });
915                    tier = refined;
916                    required_tools = self.requires_tools(&task.description, &tier);
917                    required_vision = self.requires_vision(&task.description, &tier);
918                    need = crate::router::RoutingNeed {
919                        tier: tier.clone(),
920                        required_tools,
921                        required_vision,
922                        prefer_local: false,
923                    };
924                    chain = self.router.select(&need, &budget);
925                    // Re-apply manual override after the chain mutation.
926                    apply_override(&mut chain);
927                }
928            }
929        }
930
931        let task_summary = self.task_summary(&task.description, &tier);
932        let chain_ids: Vec<String> = chain.iter().map(|b| b.id().to_string()).collect();
933
934        let agent_name = self
935            .identity
936            .as_ref()
937            .map(|identity| identity.name.clone())
938            .unwrap_or_else(|| "sparrow".into());
939        let _ = event_tx.send(Event::RunStarted {
940            run: run_id.clone(),
941            task: task.description.clone(),
942            agent: agent_name,
943        });
944
945        // PreRun lifecycle hook. Allows operators to gate run start (blocking
946        // hooks can veto by exiting non-zero), warm caches, etc.
947        let pre_run_results = self
948            .hooks
949            .execute(&HookEvent::PreRun, &task.description)
950            .await;
951        if let Some(reason) = pre_run_results
952            .iter()
953            .find(|r| r.veto)
954            .and_then(|r| r.veto_reason.clone())
955        {
956            let _ = event_tx.send(Event::Error {
957                run: run_id.clone(),
958                message: format!("PreRun hook vetoed run: {}", reason),
959            });
960            anyhow::bail!("PreRun hook vetoed run: {}", reason);
961        }
962
963        let _ = event_tx.send(Event::Message {
964            run: run_id.clone(),
965            role: "router".into(),
966            text: format!(
967                "requete: {} · tier: {} · tools: {} · vision: {} · local: {}",
968                task_summary,
969                tier.as_str(),
970                need.required_tools,
971                need.required_vision,
972                need.prefer_local
973            ),
974        });
975
976        let _ = event_tx.send(Event::AgentStatus {
977            run: run_id.clone(),
978            role: "planner".into(),
979            status: AgentStatus::Working,
980            note: format!("analyzing request · {} candidates", chain.len()),
981        });
982
983        let primary_ctx = chain
984            .first()
985            .map(|b| b.caps().context_window)
986            .unwrap_or(128_000);
987        let _ = event_tx.send(Event::RouteSelected {
988            run: run_id.clone(),
989            chain: chain_ids.clone(),
990            context_window: primary_ctx,
991        });
992        let _ = event_tx.send(Event::AgentStatus {
993            run: run_id.clone(),
994            role: "planner".into(),
995            status: AgentStatus::Done,
996            note: format!(
997                "route set · {} primary",
998                chain.first().map(|b| b.id()).unwrap_or("—")
999            ),
1000        });
1001
1002        if chain.is_empty() {
1003            let _ = event_tx.send(Event::Error {
1004                run: run_id.clone(),
1005                message: "No available models (budget exhausted or no providers configured)".into(),
1006            });
1007            return Ok(OutcomeSummary {
1008                status: "error: no models".into(),
1009                diffs: vec![],
1010                cost_usd: 0.0,
1011                tokens: TokenUsage {
1012                    input: 0,
1013                    output: 0,
1014                },
1015            });
1016        }
1017
1018        if self.is_routing_question(&task.description) {
1019            let text = self.routing_explanation(&tier, &need, &chain_ids);
1020            let input_tokens =
1021                estimate_text_tokens(&task.description) + estimate_text_tokens(&task_summary);
1022            let output_tokens = estimate_text_tokens(&text);
1023            let _ = event_tx.send(Event::TokenUsageEstimated {
1024                run: run_id.clone(),
1025                input: input_tokens,
1026                output: 0,
1027                reason: "router meta request estimate".into(),
1028            });
1029            let _ = event_tx.send(Event::TokenUsageEstimated {
1030                run: run_id.clone(),
1031                input: 0,
1032                output: output_tokens,
1033                reason: "router meta response estimate".into(),
1034            });
1035            let _ = event_tx.send(Event::ThinkingDelta {
1036                run: run_id.clone(),
1037                text: text.clone(),
1038            });
1039            let outcome = OutcomeSummary {
1040                status: "completed".into(),
1041                diffs: vec![],
1042                cost_usd: 0.0,
1043                tokens: TokenUsage {
1044                    input: input_tokens,
1045                    output: output_tokens,
1046                },
1047            };
1048            let _ = event_tx.send(Event::RunFinished {
1049                run: run_id.clone(),
1050                outcome: outcome.clone(),
1051            });
1052            return Ok(outcome);
1053        }
1054
1055        // Build tools and workspace
1056        let workspace_root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1057        let sandbox: Arc<dyn Sandbox> = match self.config.defaults.sandbox.as_str() {
1058            "local-hardened" => Arc::new(crate::sandbox::LocalSandbox::hardened(
1059                workspace_root.clone(),
1060            )),
1061            "docker" => Arc::new(crate::sandbox::backends::DockerSandbox::new(
1062                workspace_root.clone(),
1063                "ubuntu:latest",
1064            )),
1065            s if s.starts_with("ssh:") => Arc::new(crate::sandbox::backends::SshSandbox::new(
1066                workspace_root.clone(),
1067                s.trim_start_matches("ssh:"),
1068            )),
1069            "modal" => Arc::new(crate::sandbox::backends::ModalSandbox::new(
1070                workspace_root.clone(),
1071            )),
1072            "daytona" => Arc::new(crate::sandbox::backends::DaytonaSandbox::new(
1073                workspace_root.clone(),
1074            )),
1075            "vercel" => Arc::new(crate::sandbox::backends::VercelSandbox::new(
1076                workspace_root.clone(),
1077            )),
1078            "singularity" => Arc::new(crate::sandbox::backends::SingularitySandbox::new(
1079                workspace_root.clone(),
1080            )),
1081            _ => Arc::new(crate::sandbox::LocalSandbox::new(workspace_root.clone())),
1082        };
1083
1084        let mut registry = ToolRegistry::new();
1085        registry.register(Arc::new(crate::tools::fs::FsRead));
1086        registry.register(Arc::new(crate::tools::fs::FsList));
1087        registry.register(Arc::new(crate::tools::fs::FsWrite));
1088        registry.register(Arc::new(crate::tools::edit::Edit));
1089        registry.register(Arc::new(crate::tools::edit::MultiEdit));
1090        registry.register(Arc::new(crate::tools::search_and_web::Search));
1091        registry.register(Arc::new(crate::tools::search_and_web::WebSearch));
1092        registry.register(Arc::new(crate::tools::search_and_web::WebFetch));
1093        registry.register(Arc::new(crate::tools::browser_sandbox::BrowserTool));
1094        registry.register(Arc::new(crate::tools::browser_sandbox::ComputerTool));
1095        registry.register(Arc::new(crate::tools::git::Git));
1096        registry.register(Arc::new(crate::tools::todo::Todo::new()));
1097        registry.register(Arc::new(crate::tools::exec::Exec::new(sandbox.clone())));
1098        registry.register(Arc::new(crate::tools::media::ImageGen::new()));
1099        registry.register(Arc::new(crate::tools::media::Tts::new()));
1100        registry.register(Arc::new(crate::tools::media::Transcribe::new()));
1101        registry.register(Arc::new(crate::tools::subagent::PythonRpc::new()));
1102        registry.register(Arc::new(crate::tools::builder_tools::LspClient));
1103        registry.register(Arc::new(crate::tools::code_nav::Glob));
1104        registry.register(Arc::new(crate::tools::code_nav::Symbols));
1105        if let Some(mem) = &self.memory {
1106            registry.register(Arc::new(crate::tools::memory::MemoryTool::new(mem.clone())));
1107            registry.register(Arc::new(
1108                crate::tools::knowledge_graph::KnowledgeGraphTool::new(mem.clone()),
1109            ));
1110        }
1111        {
1112            // Subagent delegation: child engine built from the same router/config.
1113            let mut sub = crate::tools::subagent::SubagentSpawn::new(
1114                self.router.clone(),
1115                self.config.clone(),
1116            );
1117            if let Some(mem) = &self.memory {
1118                sub = sub.with_memory(mem.clone());
1119            }
1120            registry.register(Arc::new(sub));
1121        }
1122        let tools = Arc::new(registry);
1123        let tool_specs: Vec<ToolSpec> = tools.to_specs();
1124
1125        let workspace = Workspace {
1126            root: workspace_root,
1127            sandbox,
1128        };
1129
1130        let identity = self.identity.clone().unwrap_or_else(|| Identity {
1131            name: "sparrow".into(),
1132            role: "senior software engineer".into(),
1133            personality: "concise, competent, direct".into(),
1134        });
1135
1136        let brain_policy = BrainPolicy {
1137            chain,
1138            current_index: 0,
1139        };
1140
1141        let mut autonomy = match self.config.defaults.autonomy {
1142            AutonomyLevel::Supervised => AutonomyContract::supervised(),
1143            AutonomyLevel::Trusted => AutonomyContract::trusted(),
1144            AutonomyLevel::Autonomous => AutonomyContract::autonomous(),
1145        };
1146        autonomy.budget.max_usd = self.config.budget.session_usd;
1147        let _ = event_tx.send(Event::AutonomyChanged {
1148            run: run_id.clone(),
1149            level: autonomy.level.clone(),
1150        });
1151
1152        // Load relevant skills
1153        let relevant_skills: Vec<crate::capabilities::Skill> = self
1154            .skills
1155            .as_ref()
1156            .map(|s| s.relevant(&task.description, 3))
1157            .unwrap_or_default();
1158
1159        let system = build_system_prompt(
1160            &identity,
1161            &workspace.root,
1162            &self
1163                .memory
1164                .as_ref()
1165                .map(|m| m.all_facts())
1166                .unwrap_or_default(),
1167            &self
1168                .memory
1169                .as_ref()
1170                .map(|m| {
1171                    [MemoryDocKind::Memory, MemoryDocKind::User]
1172                        .into_iter()
1173                        .filter_map(|kind| m.memory_doc(kind))
1174                        .collect::<Vec<_>>()
1175                })
1176                .unwrap_or_default(),
1177            &crate::instructions::discover_workspace_instructions(
1178                &workspace.root,
1179                &task.description,
1180            ),
1181            &relevant_skills,
1182        );
1183        let mut system = format!(
1184            "{}\n\n## Active Sparrow Routing Context\nRequest category: {}\nTask tier: {}\nRequired tools: {}\nRequired vision: {}\nPreferred local: {}\nSelected fallback chain: {}\nRouting policy: free_first={}, session_budget_usd={:.2}.\nWhen answering routing questions, describe this context concretely.",
1185            system,
1186            task_summary,
1187            tier.as_str(),
1188            need.required_tools,
1189            need.required_vision,
1190            need.prefer_local,
1191            summarize_model_chain(&chain_ids, 8),
1192            self.config.routing.free_first,
1193            self.config.budget.session_usd
1194        );
1195
1196        // Continuity hint: when there is prior conversation (task.context), tell
1197        // the model to treat it as authoritative memory. Weaker models otherwise
1198        // recite the system identity and ignore what the user said earlier.
1199        if !messages.is_empty() {
1200            system.push_str(
1201                "\n\n## Conversation continuity\nThis is an ONGOING conversation. The messages below are prior turns and are AUTHORITATIVE memory of what the user told you (names, preferences, facts, decisions). Use them directly; never re-introduce yourself or contradict them.",
1202            );
1203        }
1204
1205        // Build initial messages
1206        messages.push(Msg {
1207            role: "user".into(),
1208            content: initial_user_content_blocks(&workspace.root, &task.description),
1209        });
1210
1211        let mut total_input: u64 = 0;
1212        let mut total_output: u64 = 0;
1213        let mut estimated_input_unconfirmed: u64 = 0;
1214        let mut estimated_output_unconfirmed: u64 = 0;
1215        let mut estimated_cost_unconfirmed: f64 = 0.0;
1216        let mut cost_usd: f64 = 0.0;
1217        let diffs: Vec<crate::event::FileDiff> = Vec::new();
1218        let mut current_chain_idx = 0usize;
1219        let mut tool_results_pending: Vec<(
1220            String,
1221            String,
1222            serde_json::Value,
1223            Vec<ContentBlock>,
1224            bool,
1225        )> = Vec::new();
1226        let budget_session = self.config.budget.session_usd;
1227        let _budget_daily = self.config.budget.daily_usd;
1228        let redaction = &self.redaction;
1229        let mut had_error = false;
1230        let mut last_error: Option<String> = None;
1231        let mut waiting_for_approval = false;
1232        let mut denied_by_approval = false;
1233        let mut skill_evidence = String::new();
1234        // Iteration safety cap: bound the agentic loop independently of budget.
1235        let mut turns: u32 = 0;
1236        const MAX_TURNS: u32 = 60;
1237        // Auto-verify state: track whether mutating edits happened and how many
1238        // verify attempts we've spent, so we run the verify command after the
1239        // model says it's done and re-inject failures (bounded).
1240        let mut had_mutation = false;
1241        let mut verify_attempts: u32 = 0;
1242        const MAX_VERIFY_ATTEMPTS: u32 = 2;
1243        // Whether the run has produced ANY visible output (text or tool use). If
1244        // a model returns an empty completion and nothing has been produced yet,
1245        // we fall back to the next model in the chain (rescues a dead provider).
1246        let mut produced_any_output = false;
1247
1248        // Helper to send redacted events
1249        let send = |event: Event| {
1250            let _ = event_tx.send(redaction.redact_event(&event));
1251        };
1252
1253        // Compaction state (Phase 12 auto-trigger). The threshold matches the
1254        // default ContextManager budget; we keep `keep_last` messages verbatim
1255        // and replace earlier ones with a distilled summary block. A handoff
1256        // doc is written to `.sparrow/handoff/<run>-<ts>.md` and an
1257        // `Event::Compacted` is emitted so UIs can show the pass.
1258        const COMPACT_TRANSCRIPT_CHARS: usize = 120_000;
1259        const COMPACT_KEEP_LAST: usize = 6;
1260        let context_manager = crate::redaction::ContextManager::new(200_000);
1261
1262        // Main agentic loop
1263        loop {
1264            // Auto-compaction check (Phase 12). Skipped on the very first turn
1265            // so a short task never pays the overhead.
1266            if turns > 0 {
1267                let transcript_chars: usize = messages
1268                    .iter()
1269                    .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1270                    .sum();
1271                if transcript_chars > COMPACT_TRANSCRIPT_CHARS && messages.len() > COMPACT_KEEP_LAST
1272                {
1273                    // PreCompact lifecycle hook: lets operators dump state /
1274                    // back up the transcript before compaction discards it.
1275                    let _ = self
1276                        .hooks
1277                        .execute(&HookEvent::PreCompact, &task.description)
1278                        .await;
1279                    let before = transcript_chars;
1280                    let compacted =
1281                        context_manager.compact_messages(&messages, 0, COMPACT_KEEP_LAST);
1282                    let after: usize = compacted
1283                        .iter()
1284                        .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1285                        .sum();
1286
1287                    // Write a durable handoff next to the transcript.
1288                    let mut handoff = crate::context::HandoffDoc::new(task.description.clone());
1289                    handoff.next_steps = vec![format!(
1290                        "Resume run {} (turn {}/{})",
1291                        run_id.0, turns, MAX_TURNS
1292                    )];
1293                    let handoff_dir = std::path::PathBuf::from(".sparrow/handoff");
1294                    let _ = std::fs::create_dir_all(&handoff_dir);
1295                    let handoff_path = handoff_dir.join(format!(
1296                        "{}-{}.md",
1297                        run_id.0,
1298                        chrono::Utc::now().format("%Y%m%dT%H%M%SZ")
1299                    ));
1300                    let _ = std::fs::write(&handoff_path, handoff.to_markdown());
1301
1302                    messages = compacted;
1303                    send(Event::Compacted {
1304                        run: run_id.clone(),
1305                        before_chars: before,
1306                        after_chars: after,
1307                        handoff_path: Some(handoff_path.to_string_lossy().to_string()),
1308                    });
1309                    let _ = self
1310                        .hooks
1311                        .execute(&HookEvent::PostCompact, &task.description)
1312                        .await;
1313                }
1314            }
1315            // Iteration cap: stop runaway loops independently of budget.
1316            turns += 1;
1317            if turns > MAX_TURNS {
1318                send(Event::Message {
1319                    run: run_id.clone(),
1320                    role: "guard".into(),
1321                    text: format!("iteration cap reached ({} turns) — stopping", MAX_TURNS),
1322                });
1323                break;
1324            }
1325
1326            // Budget check: hard stop if exceeded
1327            if cost_usd + estimated_cost_unconfirmed >= budget_session {
1328                let msg = format!(
1329                    "Budget exceeded: ${:.4} of ${:.2} session cap",
1330                    cost_usd + estimated_cost_unconfirmed,
1331                    budget_session
1332                );
1333                send(Event::Error {
1334                    run: run_id.clone(),
1335                    message: msg.clone(),
1336                });
1337                // OnBudgetThreshold lifecycle: fired on hard cap. Operators can
1338                // configure a hook to e.g. page on-call when this triggers.
1339                let _ = self
1340                    .hooks
1341                    .execute(&HookEvent::OnBudgetThreshold, &msg)
1342                    .await;
1343                let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
1344                had_error = true;
1345                last_error = Some("budget exceeded".into());
1346                break;
1347            }
1348            if let Some(_approval_handler) = &self.approval_handler {
1349                if waiting_for_approval {
1350                    // Route to approval handler (e.g., Telegram inline buttons)
1351                    // The handler will resolve and we continue
1352                }
1353            }
1354
1355            // ─── Org policy enforcement ──────────────────────────────────
1356            if let Some(ref policy) = self.org_policy {
1357                let proposed_file = tool_results_pending
1358                    .last()
1359                    .map(|(_, _, args, _, _)| {
1360                        args.get("path").and_then(|v| v.as_str()).unwrap_or("")
1361                    })
1362                    .unwrap_or("");
1363                if let Err(violation) =
1364                    policy.enforce(&self.config.defaults.autonomy, cost_usd, proposed_file)
1365                {
1366                    send(Event::Error {
1367                        run: run_id.clone(),
1368                        message: format!("Org policy violation: {}", violation),
1369                    });
1370                    break;
1371                }
1372            }
1373
1374            // ── Mid-run user injection (§3.7) ─────────────────────────────
1375            // Poll the inject channel non-blocking. Each pending message becomes
1376            // a new user turn so the next Brain call sees it.
1377            if let Some(rx) = inject_rx.as_mut() {
1378                loop {
1379                    match rx.try_recv() {
1380                        Ok(injected) => {
1381                            let trimmed = injected.trim().to_string();
1382                            if trimmed.is_empty() {
1383                                continue;
1384                            }
1385                            messages.push(Msg {
1386                                role: "user".into(),
1387                                content: vec![ContentBlock::Text {
1388                                    text: format!("INTERRUPT FROM USER: {}", trimmed),
1389                                }],
1390                            });
1391                            let _ = event_tx.send(Event::Message {
1392                                run: run_id.clone(),
1393                                role: "interrupt".into(),
1394                                text: trimmed,
1395                            });
1396                        }
1397                        Err(mpsc::error::TryRecvError::Empty) => break,
1398                        Err(mpsc::error::TryRecvError::Disconnected) => {
1399                            inject_rx = None;
1400                            break;
1401                        }
1402                    }
1403                }
1404            }
1405
1406            let brain = match brain_policy.chain.get(current_chain_idx) {
1407                Some(b) => b.clone(),
1408                None => break,
1409            };
1410
1411            let caps = brain.caps();
1412
1413            // ── Context compaction (§3.7) ─────────────────────────────────
1414            // If estimated tokens > 75% of context_window, truncate middle
1415            // messages to keep the original task + the last 6 exchanges.
1416            // A summary placeholder is inserted to preserve continuity.
1417            {
1418                let req_for_estimate = BrainRequest {
1419                    system: Some(system.clone()),
1420                    messages: messages.clone(),
1421                    tools: if need.required_tools {
1422                        tool_specs.clone()
1423                    } else {
1424                        vec![]
1425                    },
1426                    max_tokens: caps.max_output as u32,
1427                    temperature: 0.0,
1428                    stop: vec![],
1429                    cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1430                        "engine",
1431                        &workspace.root,
1432                        &tool_specs,
1433                    ))),
1434                };
1435                let est = estimate_request_tokens(&req_for_estimate);
1436                let threshold = (caps.context_window as f64 * 0.75) as u64;
1437                if est > threshold && messages.len() > 8 {
1438                    let original_task = messages.first().cloned();
1439                    let keep_tail: Vec<Msg> =
1440                        messages.iter().rev().take(6).cloned().collect::<Vec<_>>();
1441                    let middle: Vec<Msg> = messages
1442                        .iter()
1443                        .skip(1)
1444                        .take(messages.len().saturating_sub(7))
1445                        .cloned()
1446                        .collect();
1447                    let dropped = middle.len();
1448
1449                    // Ask the current brain for a real summary of the dropped middle
1450                    // (best-effort; fall back to a plain marker on failure).
1451                    let summary = self
1452                        .summarize_messages(brain.as_ref(), &middle)
1453                        .await
1454                        .unwrap_or_else(|| {
1455                            format!(
1456                                "{} prior messages were dropped to fit the model window.",
1457                                dropped
1458                            )
1459                        });
1460
1461                    let mut compacted: Vec<Msg> = Vec::new();
1462                    if let Some(task) = original_task {
1463                        compacted.push(task);
1464                    }
1465                    compacted.push(Msg {
1466                        role: "user".into(),
1467                        content: vec![ContentBlock::Text {
1468                            text: format!(
1469                                "[CONTEXT SUMMARY of {} earlier messages]\n{}\n\
1470                                 (Files edited and tool outputs in the turns below remain authoritative.)",
1471                                dropped, summary
1472                            ),
1473                        }],
1474                    });
1475                    for m in keep_tail.into_iter().rev() {
1476                        compacted.push(m);
1477                    }
1478                    messages = compacted;
1479                    let _ = event_tx.send(Event::Message {
1480                        run: run_id.clone(),
1481                        role: "compaction".into(),
1482                        text: format!(
1483                            "context compacted: {} messages summarized ({} tok > {} threshold)",
1484                            dropped, est, threshold
1485                        ),
1486                    });
1487                }
1488            }
1489
1490            let req = BrainRequest {
1491                system: Some(system.clone()),
1492                messages: messages.clone(),
1493                tools: if need.required_tools {
1494                    tool_specs.clone()
1495                } else {
1496                    vec![]
1497                },
1498                max_tokens: caps.max_output as u32,
1499                temperature: 0.0,
1500                stop: vec![],
1501                cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1502                    "engine",
1503                    &workspace.root,
1504                    &tool_specs,
1505                ))),
1506            };
1507
1508            let estimated_input = estimate_request_tokens(&req);
1509            estimated_input_unconfirmed += estimated_input;
1510            estimated_cost_unconfirmed +=
1511                caps.cost_input_per_mtok * (estimated_input as f64) / 1_000_000.0;
1512            let _ = event_tx.send(Event::TokenUsageEstimated {
1513                run: run_id.clone(),
1514                input: estimated_input,
1515                output: 0,
1516                reason: "prompt estimate before provider usage".into(),
1517            });
1518            let _ = event_tx.send(Event::CostUpdate {
1519                run: run_id.clone(),
1520                usd: cost_usd + estimated_cost_unconfirmed,
1521            });
1522
1523            let _ = event_tx.send(Event::AgentStatus {
1524                run: run_id.clone(),
1525                role: "coder".into(),
1526                status: AgentStatus::Thinking,
1527                note: format!("consulting {} · parsing request…", brain.id()),
1528            });
1529
1530            match brain.complete(req).await {
1531                Ok(mut stream) => {
1532                    let mut current_tool_name = String::new();
1533                    let mut current_tool_json = String::new();
1534                    let mut output_chars_seen: u64 = 0;
1535                    let mut output_tokens_emitted: u64 = 0;
1536                    let mut continue_agent_loop = false;
1537                    let mut stop_after_tool_result = false;
1538                    let mut assistant_text = String::new();
1539                    let mut tool_output_seen_this_completion = false;
1540                    // Tools invoked during this completion — fed to the hallucination
1541                    // guard so it knows whether the assistant has actually inspected
1542                    // any code/state before making a claim.
1543                    let mut tools_called_this_turn: Vec<String> = Vec::new();
1544                    // Accumulated reasoning_content (DeepSeek / Moonshot / Qwen
1545                    // thinking mode). Must be echoed back on the next turn or the
1546                    // provider returns 400.
1547                    let mut reasoning_buf: String = String::new();
1548
1549                    while let Some(event) = stream.next().await {
1550                        match event {
1551                            BrainEvent::TextDelta(text) => {
1552                                assistant_text.push_str(&text);
1553                                output_chars_seen += text.chars().count() as u64;
1554                                let estimated_output = (output_chars_seen + 3) / 4;
1555                                let output_delta =
1556                                    estimated_output.saturating_sub(output_tokens_emitted);
1557                                if output_delta > 0 {
1558                                    output_tokens_emitted += output_delta;
1559                                    estimated_output_unconfirmed += output_delta;
1560                                    estimated_cost_unconfirmed += caps.cost_output_per_mtok
1561                                        * (output_delta as f64)
1562                                        / 1_000_000.0;
1563                                    let _ = event_tx.send(Event::TokenUsageEstimated {
1564                                        run: run_id.clone(),
1565                                        input: 0,
1566                                        output: output_delta,
1567                                        reason: "streamed output estimate".into(),
1568                                    });
1569                                    let _ = event_tx.send(Event::CostUpdate {
1570                                        run: run_id.clone(),
1571                                        usd: cost_usd + estimated_cost_unconfirmed,
1572                                    });
1573                                }
1574                                let _ = event_tx.send(Event::ThinkingDelta {
1575                                    run: run_id.clone(),
1576                                    text: text.clone(),
1577                                });
1578                            }
1579                            BrainEvent::ReasoningDelta(rtext) => {
1580                                // Accumulate for the assistant message we'll push at
1581                                // end-of-turn. We don't surface it as text on screen —
1582                                // the engine's normal TextDelta path handles visible
1583                                // text, this is opaque thinking content the provider
1584                                // wants echoed back.
1585                                reasoning_buf.push_str(&rtext);
1586                                let _ = event_tx.send(Event::ReasoningDelta {
1587                                    run: run_id.clone(),
1588                                    text: rtext,
1589                                });
1590                            }
1591                            BrainEvent::ToolUseStart { id, name } => {
1592                                current_tool_name = name.clone();
1593                                tools_called_this_turn.push(name.clone());
1594                                current_tool_json.clear();
1595                                let risk = tools
1596                                    .get(&name)
1597                                    .map(|tool| tool.risk())
1598                                    .unwrap_or(RiskLevel::ReadOnly);
1599                                // Placeholder ToolUseProposed with empty args so the
1600                                // UI can open the card immediately. Real args follow
1601                                // at ToolUseEnd (see below) once the streamed JSON
1602                                // is complete.
1603                                let _ = event_tx.send(Event::ToolUseProposed {
1604                                    run: run_id.clone(),
1605                                    id: id.clone(),
1606                                    name: name.clone(),
1607                                    args: json!({}),
1608                                    risk,
1609                                });
1610                            }
1611                            BrainEvent::ToolUseDelta { id, json } => {
1612                                let _ = id;
1613                                current_tool_json.push_str(&json);
1614                            }
1615                            BrainEvent::ToolUseEnd { id } => {
1616                                // Parse accumulated JSON
1617                                let args: serde_json::Value =
1618                                    serde_json::from_str(&current_tool_json).unwrap_or(json!({}));
1619
1620                                // Check autonomy gate
1621                                let tool_name = if current_tool_name.is_empty() {
1622                                    "unknown".to_string()
1623                                } else {
1624                                    current_tool_name.clone()
1625                                };
1626                                let tool = tools.get(&tool_name);
1627                                let risk = tool
1628                                    .as_ref()
1629                                    .map(|tool| tool.risk())
1630                                    .unwrap_or(RiskLevel::ReadOnly);
1631
1632                                // Re-emit ToolUseProposed with the REAL args now
1633                                // that the streamed JSON is complete. The first
1634                                // emission at ToolUseStart used `{}` because the
1635                                // arguments hadn't streamed yet — the UI updates
1636                                // the existing card with these real arguments.
1637                                let _ = event_tx.send(Event::ToolUseProposed {
1638                                    run: run_id.clone(),
1639                                    id: id.clone(),
1640                                    name: tool_name.clone(),
1641                                    args: args.clone(),
1642                                    risk: risk.clone(),
1643                                });
1644                                let proposed = crate::autonomy::ProposedAction {
1645                                    tool_name: tool_name.clone(),
1646                                    risk: risk.clone(),
1647                                    args: args.clone(),
1648                                };
1649
1650                                let permission =
1651                                    self.config.permissions.evaluate(&PermissionContext {
1652                                        tool_name: &proposed.tool_name,
1653                                        risk: proposed.risk.clone(),
1654                                        args: &args,
1655                                        workspace_root: &workspace.root,
1656                                        provider: Some(brain.id()),
1657                                        surface: Some("engine"),
1658                                    });
1659                                let autonomy_verdict =
1660                                    if matches!(permission.decision, Decision::Allow) {
1661                                        Some(autonomy.evaluate(&proposed))
1662                                    } else {
1663                                        None
1664                                    };
1665                                let mut decision = autonomy_verdict
1666                                    .as_ref()
1667                                    .map(|verdict| verdict.decision.clone())
1668                                    .unwrap_or_else(|| permission.decision.clone());
1669                                if !matches!(permission.decision, Decision::Allow) {
1670                                    let _ = event_tx.send(Event::Message {
1671                                        run: run_id.clone(),
1672                                        role: "permissions".into(),
1673                                        text: permission.reason.clone(),
1674                                    });
1675                                }
1676                                if matches!(decision, Decision::AskUser) {
1677                                    let summary = format!(
1678                                        "{}. Approve {} with args: {}",
1679                                        permission.reason, proposed.tool_name, args
1680                                    );
1681                                    let _ = event_tx.send(Event::ApprovalRequested {
1682                                        run: run_id.clone(),
1683                                        id: id.clone(),
1684                                        summary: summary.clone(),
1685                                        tool: Some(proposed.tool_name.clone()),
1686                                        risk: Some(format!("{:?}", proposed.risk)),
1687                                    });
1688                                    // OnApprovalRequested hook so external
1689                                    // notifiers (Slack, email, …) can ping the
1690                                    // operator.
1691                                    let _ = self
1692                                        .hooks
1693                                        .execute(&HookEvent::OnApprovalRequested, &summary)
1694                                        .await;
1695                                    if let Some(handler) = &self.approval_handler {
1696                                        decision = handler
1697                                            .request_approval(ApprovalRequest {
1698                                                run: run_id.clone(),
1699                                                id: id.clone(),
1700                                                tool_name: proposed.tool_name.clone(),
1701                                                risk: proposed.risk.clone(),
1702                                                args: args.clone(),
1703                                                summary,
1704                                            })
1705                                            .await;
1706                                    }
1707                                }
1708
1709                                let _ = event_tx.send(Event::ApprovalResolved {
1710                                    run: run_id.clone(),
1711                                    id: id.clone(),
1712                                    decision: decision.clone(),
1713                                });
1714
1715                                match decision {
1716                                    Decision::Allow => {
1717                                        if autonomy_verdict
1718                                            .as_ref()
1719                                            .map(|verdict| verdict.notify)
1720                                            .unwrap_or(false)
1721                                        {
1722                                            let _ = event_tx.send(Event::Message {
1723                                                run: run_id.clone(),
1724                                                role: "autonomy".into(),
1725                                                text: format!(
1726                                                    "{} will run under trusted autonomy with checkpoint notification",
1727                                                    proposed.tool_name
1728                                                ),
1729                                            });
1730                                        }
1731                                        // Track mutations so we can auto-verify later.
1732                                        if matches!(
1733                                            proposed.risk,
1734                                            RiskLevel::Mutating | RiskLevel::Destructive
1735                                        ) {
1736                                            had_mutation = true;
1737                                        }
1738                                        // Auto-checkpoint before mutating/exec/destructive
1739                                        let needs_checkpoint = autonomy_verdict
1740                                            .as_ref()
1741                                            .map(|verdict| verdict.needs_checkpoint)
1742                                            .unwrap_or_else(|| {
1743                                                matches!(
1744                                                    proposed.risk,
1745                                                    RiskLevel::Mutating
1746                                                        | RiskLevel::Exec
1747                                                        | RiskLevel::Destructive
1748                                                )
1749                                            });
1750                                        if needs_checkpoint {
1751                                            let vetoes = self
1752                                                .hooks
1753                                                .execute(
1754                                                    &HookEvent::PreCheckpoint,
1755                                                    &proposed.tool_name,
1756                                                )
1757                                                .await;
1758                                            let checkpoint_veto = vetoes
1759                                                .iter()
1760                                                .find(|result| result.veto)
1761                                                .and_then(|result| result.veto_reason.clone());
1762                                            if let Some(reason) = checkpoint_veto {
1763                                                let _ = event_tx.send(Event::Error {
1764                                                    run: run_id.clone(),
1765                                                    message: reason,
1766                                                });
1767                                                denied_by_approval = true;
1768                                                stop_after_tool_result = true;
1769                                                continue;
1770                                            }
1771                                            let checkpoints =
1772                                                GitCheckpoints::new(workspace.root.clone());
1773                                            if let Ok(cp_id) = checkpoints
1774                                                .snapshot(&format!("pre-{}", proposed.tool_name))
1775                                            {
1776                                                let _ = event_tx.send(Event::CheckpointCreated {
1777                                                    run: run_id.clone(),
1778                                                    id: cp_id,
1779                                                    label: format!("pre-{}", proposed.tool_name),
1780                                                });
1781                                                let _ = self
1782                                                    .hooks
1783                                                    .execute(
1784                                                        &HookEvent::PostCheckpoint,
1785                                                        &proposed.tool_name,
1786                                                    )
1787                                                    .await;
1788                                            }
1789                                        }
1790
1791                                        let hook_results = self
1792                                            .hooks
1793                                            .execute(&HookEvent::PreToolUse, &proposed.tool_name)
1794                                            .await;
1795                                        if let Some(reason) = hook_results
1796                                            .iter()
1797                                            .find(|result| result.veto)
1798                                            .and_then(|result| result.veto_reason.clone())
1799                                        {
1800                                            denied_by_approval = true;
1801                                            stop_after_tool_result = true;
1802                                            let _ = event_tx.send(Event::ToolOutput {
1803                                                run: run_id.clone(),
1804                                                id: id.clone(),
1805                                                blocks: vec![Block::Text(reason.clone())],
1806                                            });
1807                                            tool_output_seen_this_completion = true;
1808                                            tool_results_pending.push((
1809                                                id.clone(),
1810                                                proposed.tool_name.clone(),
1811                                                args.clone(),
1812                                                vec![ContentBlock::Text { text: reason }],
1813                                                true,
1814                                            ));
1815                                            continue;
1816                                        }
1817
1818                                        let _ = event_tx.send(Event::ToolUseStarted {
1819                                            run: run_id.clone(),
1820                                            id: id.clone(),
1821                                        });
1822                                        let _ = event_tx.send(Event::AgentStatus {
1823                                            run: run_id.clone(),
1824                                            role: "coder".into(),
1825                                            status: AgentStatus::Working,
1826                                            note: format!("running tool · {}", current_tool_name),
1827                                        });
1828
1829                                        let result = if let Some(tool) = tool {
1830                                            let ctx = ToolCtx {
1831                                                workspace_root: workspace.root.clone(),
1832                                                run_id: run_id.clone(),
1833                                            };
1834                                            match tool.call(args.clone(), &ctx).await {
1835                                                Ok(result) => result,
1836                                                Err(e) => crate::tools::ToolResult::error(format!(
1837                                                    "Tool {} failed: {}",
1838                                                    proposed.tool_name, e
1839                                                )),
1840                                            }
1841                                        } else {
1842                                            crate::tools::ToolResult::error(format!(
1843                                                "Unknown tool: {}",
1844                                                proposed.tool_name
1845                                            ))
1846                                        };
1847
1848                                        for block in &result.content {
1849                                            if let Block::Diff { file, patch } = block {
1850                                                let plus = patch
1851                                                    .lines()
1852                                                    .filter(|l| {
1853                                                        l.starts_with('+') && !l.starts_with("+++")
1854                                                    })
1855                                                    .count()
1856                                                    as u32;
1857                                                let minus = patch
1858                                                    .lines()
1859                                                    .filter(|l| {
1860                                                        l.starts_with('-') && !l.starts_with("---")
1861                                                    })
1862                                                    .count()
1863                                                    as u32;
1864                                                let _ = event_tx.send(Event::DiffProposed {
1865                                                    run: run_id.clone(),
1866                                                    file: file.clone(),
1867                                                    patch: patch.clone(),
1868                                                    plus,
1869                                                    minus,
1870                                                });
1871                                            }
1872                                        }
1873
1874                                        let blocks = result.content.clone();
1875                                        let text = tool_result_text(&blocks);
1876                                        let content_blocks = tool_result_content_blocks(&blocks);
1877                                        let is_error = result.is_error;
1878                                        skill_evidence.push_str(&text);
1879                                        skill_evidence.push('\n');
1880                                        let _ = event_tx.send(Event::ToolOutput {
1881                                            run: run_id.clone(),
1882                                            id: id.clone(),
1883                                            blocks,
1884                                        });
1885                                        // Surface writes as DiffApplied so the artifacts
1886                                        // ledger sees files that fs_write/edit/multi_edit
1887                                        // touched even when the tool returned plain text.
1888                                        if !is_error
1889                                            && matches!(
1890                                                proposed.tool_name.as_str(),
1891                                                "fs_write" | "edit" | "multi_edit"
1892                                            )
1893                                        {
1894                                            if let Some(p) = args.get("path").and_then(|v| v.as_str())
1895                                            {
1896                                                let _ = event_tx.send(Event::DiffApplied {
1897                                                    run: run_id.clone(),
1898                                                    file: p.to_string(),
1899                                                });
1900                                            } else if let Some(p) = args
1901                                                .get("file_path")
1902                                                .and_then(|v| v.as_str())
1903                                            {
1904                                                let _ = event_tx.send(Event::DiffApplied {
1905                                                    run: run_id.clone(),
1906                                                    file: p.to_string(),
1907                                                });
1908                                            }
1909                                        }
1910                                        let _ = self
1911                                            .hooks
1912                                            .execute(&HookEvent::PostToolUse, &proposed.tool_name)
1913                                            .await;
1914                                        tool_output_seen_this_completion = true;
1915                                        tool_results_pending.push((
1916                                            id.clone(),
1917                                            proposed.tool_name.clone(),
1918                                            args.clone(),
1919                                            content_blocks,
1920                                            is_error,
1921                                        ));
1922                                    }
1923                                    Decision::AskUser => {
1924                                        // Supervised mode: prompt user on stdin
1925                                        waiting_for_approval = true;
1926                                        let approval_id = id.clone();
1927                                        let approval_name = proposed.tool_name.clone();
1928                                        let approval_args = args.clone();
1929                                        let approval_risk = proposed.risk;
1930
1931                                        // Emit approval requested
1932                                        let _ = event_tx.send(Event::ApprovalRequested {
1933                                            run: run_id.clone(),
1934                                            id: approval_id.clone(),
1935                                            summary: format!(
1936                                                "{} tool '{}' with args: {}",
1937                                                format!("{:?}", approval_risk),
1938                                                approval_name,
1939                                                approval_args
1940                                            ),
1941                                            tool: Some(approval_name.clone()),
1942                                            risk: Some(format!("{:?}", approval_risk)),
1943                                        });
1944
1945                                        // Wait for user input on stdin
1946                                        use std::io::{self, Write};
1947                                        print!(
1948                                            "\n\x1b[1;33mApprove {}? [y/N]\x1b[0m ",
1949                                            approval_name
1950                                        );
1951                                        io::stdout().flush().ok();
1952                                        let mut input = String::new();
1953                                        io::stdin().read_line(&mut input).ok();
1954                                        let approved = input.trim().to_lowercase() == "y";
1955
1956                                        if approved {
1957                                            waiting_for_approval = false;
1958                                            // Auto-checkpoint before mutating/exec/destructive
1959                                            if matches!(
1960                                                approval_risk,
1961                                                RiskLevel::Mutating
1962                                                    | RiskLevel::Exec
1963                                                    | RiskLevel::Destructive
1964                                            ) {
1965                                                let vetoes = self
1966                                                    .hooks
1967                                                    .execute(
1968                                                        &HookEvent::PreCheckpoint,
1969                                                        &approval_name,
1970                                                    )
1971                                                    .await;
1972                                                if let Some(reason) = vetoes
1973                                                    .iter()
1974                                                    .find(|result| result.veto)
1975                                                    .and_then(|result| result.veto_reason.clone())
1976                                                {
1977                                                    let _ = event_tx.send(Event::Error {
1978                                                        run: run_id.clone(),
1979                                                        message: reason,
1980                                                    });
1981                                                    denied_by_approval = true;
1982                                                    stop_after_tool_result = true;
1983                                                    continue;
1984                                                }
1985                                                let checkpoints =
1986                                                    GitCheckpoints::new(workspace.root.clone());
1987                                                if let Ok(cp_id) = checkpoints
1988                                                    .snapshot(&format!("pre-{}", approval_name))
1989                                                {
1990                                                    let _ =
1991                                                        event_tx.send(Event::CheckpointCreated {
1992                                                            run: run_id.clone(),
1993                                                            id: cp_id,
1994                                                            label: format!("pre-{}", approval_name),
1995                                                        });
1996                                                    let _ = self
1997                                                        .hooks
1998                                                        .execute(
1999                                                            &HookEvent::PostCheckpoint,
2000                                                            &approval_name,
2001                                                        )
2002                                                        .await;
2003                                                }
2004                                            }
2005                                            let hook_results = self
2006                                                .hooks
2007                                                .execute(&HookEvent::PreToolUse, &approval_name)
2008                                                .await;
2009                                            if let Some(reason) = hook_results
2010                                                .iter()
2011                                                .find(|result| result.veto)
2012                                                .and_then(|result| result.veto_reason.clone())
2013                                            {
2014                                                denied_by_approval = true;
2015                                                stop_after_tool_result = true;
2016                                                let _ = event_tx.send(Event::ToolOutput {
2017                                                    run: run_id.clone(),
2018                                                    id: approval_id.clone(),
2019                                                    blocks: vec![Block::Text(reason.clone())],
2020                                                });
2021                                                tool_output_seen_this_completion = true;
2022                                                tool_results_pending.push((
2023                                                    approval_id,
2024                                                    approval_name,
2025                                                    approval_args,
2026                                                    vec![ContentBlock::Text { text: reason }],
2027                                                    true,
2028                                                ));
2029                                                continue;
2030                                            }
2031                                            let _ = event_tx.send(Event::ToolUseStarted {
2032                                                run: run_id.clone(),
2033                                                id: approval_id.clone(),
2034                                            });
2035                                            let result = if let Some(tool) = tool {
2036                                                let ctx = ToolCtx {
2037                                                    workspace_root: workspace.root.clone(),
2038                                                    run_id: run_id.clone(),
2039                                                };
2040                                                match tool.call(approval_args.clone(), &ctx).await {
2041                                                    Ok(r) => r,
2042                                                    Err(e) => {
2043                                                        crate::tools::ToolResult::error(format!(
2044                                                            "Tool {} failed: {}",
2045                                                            approval_name, e
2046                                                        ))
2047                                                    }
2048                                                }
2049                                            } else {
2050                                                crate::tools::ToolResult::error(format!(
2051                                                    "Unknown tool: {}",
2052                                                    approval_name
2053                                                ))
2054                                            };
2055                                            let blocks = result.content.clone();
2056                                            let text = tool_result_text(&blocks);
2057                                            let content_blocks =
2058                                                tool_result_content_blocks(&blocks);
2059                                            let is_error = result.is_error;
2060                                            skill_evidence.push_str(&text);
2061                                            skill_evidence.push('\n');
2062                                            let _ = event_tx.send(Event::ToolOutput {
2063                                                run: run_id.clone(),
2064                                                id: approval_id.clone(),
2065                                                blocks,
2066                                            });
2067                                            let _ = self
2068                                                .hooks
2069                                                .execute(&HookEvent::PostToolUse, &approval_name)
2070                                                .await;
2071                                            tool_output_seen_this_completion = true;
2072                                            tool_results_pending.push((
2073                                                approval_id,
2074                                                approval_name,
2075                                                approval_args,
2076                                                content_blocks,
2077                                                is_error,
2078                                            ));
2079                                        } else {
2080                                            let _ = event_tx.send(Event::ToolOutput {
2081                                                run: run_id.clone(),
2082                                                id: approval_id.clone(),
2083                                                blocks: vec![Block::Text("Denied by user".into())],
2084                                            });
2085                                            tool_output_seen_this_completion = true;
2086                                            tool_results_pending.push((
2087                                                approval_id,
2088                                                approval_name,
2089                                                approval_args,
2090                                                vec![ContentBlock::Text {
2091                                                    text: "Denied by user".into(),
2092                                                }],
2093                                                true,
2094                                            ));
2095                                        }
2096                                    }
2097                                    Decision::Deny => {
2098                                        denied_by_approval = true;
2099                                        stop_after_tool_result = true;
2100                                        let _ = event_tx.send(Event::ToolOutput {
2101                                            run: run_id.clone(),
2102                                            id: id.clone(),
2103                                            blocks: vec![Block::Text(
2104                                                "Denied by autonomy policy".into(),
2105                                            )],
2106                                        });
2107                                        tool_output_seen_this_completion = true;
2108                                        tool_results_pending.push((
2109                                            id.clone(),
2110                                            proposed.tool_name.clone(),
2111                                            args.clone(),
2112                                            vec![ContentBlock::Text {
2113                                                text: "Denied by autonomy policy".into(),
2114                                            }],
2115                                            true,
2116                                        ));
2117                                    }
2118                                }
2119
2120                                current_tool_json.clear();
2121                                current_tool_name.clear();
2122                            }
2123                            BrainEvent::Usage(usage) => {
2124                                total_input += usage.input;
2125                                total_output += usage.output;
2126                                estimated_input_unconfirmed =
2127                                    estimated_input_unconfirmed.saturating_sub(usage.input);
2128                                estimated_output_unconfirmed =
2129                                    estimated_output_unconfirmed.saturating_sub(usage.output);
2130                                let _ = event_tx.send(Event::TokenUsage {
2131                                    run: run_id.clone(),
2132                                    input: usage.input,
2133                                    output: usage.output,
2134                                });
2135
2136                                // Calculate cost
2137                                let input_cost =
2138                                    caps.cost_input_per_mtok * (usage.input as f64) / 1_000_000.0;
2139                                let output_cost =
2140                                    caps.cost_output_per_mtok * (usage.output as f64) / 1_000_000.0;
2141                                let actual_cost = input_cost + output_cost;
2142                                cost_usd += actual_cost;
2143                                estimated_cost_unconfirmed =
2144                                    (estimated_cost_unconfirmed - actual_cost).max(0.0);
2145
2146                                let _ = event_tx.send(Event::CostUpdate {
2147                                    run: run_id.clone(),
2148                                    usd: cost_usd + estimated_cost_unconfirmed,
2149                                });
2150                            }
2151                            BrainEvent::Done(reason) => {
2152                                match reason {
2153                                    crate::event::StopReason::EndTurn => {
2154                                        // Empty-completion fallback: if this model
2155                                        // produced nothing (no text, no tool) and the
2156                                        // run has produced nothing so far, try the
2157                                        // next model instead of finishing empty.
2158                                        let this_empty = assistant_text.trim().is_empty()
2159                                            && !tool_output_seen_this_completion;
2160                                        if this_empty && !produced_any_output {
2161                                            let next_idx = current_chain_idx + 1;
2162                                            if next_idx < brain_policy.chain.len() {
2163                                                current_chain_idx = next_idx;
2164                                                let _ = event_tx.send(Event::ModelSwitched {
2165                                                    run: run_id.clone(),
2166                                                    from: brain.id().to_string(),
2167                                                    to: brain_policy.chain[current_chain_idx]
2168                                                        .id()
2169                                                        .to_string(),
2170                                                    reason: "empty response".into(),
2171                                                });
2172                                                continue_agent_loop = true;
2173                                                break;
2174                                            }
2175                                        }
2176                                        if !assistant_text.trim().is_empty() {
2177                                            produced_any_output = true;
2178                                            let mut blocks = Vec::new();
2179                                            if !reasoning_buf.is_empty() {
2180                                                blocks.push(ContentBlock::Reasoning {
2181                                                    text: reasoning_buf.clone(),
2182                                                });
2183                                            }
2184                                            blocks.push(ContentBlock::Text {
2185                                                text: assistant_text.clone(),
2186                                            });
2187                                            let assistant_msg = Msg {
2188                                                role: "assistant".into(),
2189                                                content: blocks,
2190                                            };
2191                                            let turn_messages = vec![assistant_msg.clone()];
2192                                            let has_verified_tool_context =
2193                                                tool_output_seen_this_completion
2194                                                    || messages.iter().any(|m| {
2195                                                        m.content.iter().any(|block| {
2196                                                            matches!(
2197                                                                block,
2198                                                                ContentBlock::ToolResult { .. }
2199                                                            )
2200                                                        })
2201                                                    });
2202
2203                                            if let Some(correction) = self.reasoning.guard_turn(
2204                                                &turn_messages,
2205                                                has_verified_tool_context,
2206                                            ) {
2207                                                messages.push(assistant_msg);
2208                                                let _ = event_tx.send(Event::Message {
2209                                                    run: run_id.clone(),
2210                                                    role: "guard".into(),
2211                                                    text: correction.clone(),
2212                                                });
2213                                                messages.push(Msg {
2214                                                    role: "user".into(),
2215                                                    content: vec![ContentBlock::Text {
2216                                                        text: format!("SYSTEM: {}. Execute the relevant tool first, then report the actual raw result.", correction),
2217                                                    }],
2218                                                });
2219                                                continue_agent_loop = true;
2220                                                break;
2221                                            }
2222
2223                                            // Hallucination guard: catch claims about
2224                                            // code structure made without first calling
2225                                            // fs_read / search this turn.
2226                                            if self.reasoning.hallucination_guard {
2227                                                if let Some(correction) =
2228                                                    crate::reasoning::HallucinationGuard::verify(
2229                                                        &assistant_text,
2230                                                        &tools_called_this_turn,
2231                                                    )
2232                                                {
2233                                                    let mut blocks2 = Vec::new();
2234                                                    if !reasoning_buf.is_empty() {
2235                                                        blocks2.push(ContentBlock::Reasoning {
2236                                                            text: reasoning_buf.clone(),
2237                                                        });
2238                                                    }
2239                                                    blocks2.push(ContentBlock::Text {
2240                                                        text: assistant_text.clone(),
2241                                                    });
2242                                                    let assistant_msg2 = Msg {
2243                                                        role: "assistant".into(),
2244                                                        content: blocks2,
2245                                                    };
2246                                                    messages.push(assistant_msg2);
2247                                                    let _ = event_tx.send(Event::Message {
2248                                                        run: run_id.clone(),
2249                                                        role: "guard".into(),
2250                                                        text: correction.clone(),
2251                                                    });
2252                                                    messages.push(Msg {
2253                                                        role: "user".into(),
2254                                                        content: vec![ContentBlock::Text {
2255                                                            text: format!("SYSTEM: {}. Call fs_read or search to verify the file/symbol first, then re-state the claim with the raw evidence.", correction),
2256                                                        }],
2257                                                    });
2258                                                    continue_agent_loop = true;
2259                                                    break;
2260                                                }
2261                                            }
2262
2263                                            skill_evidence.push_str(&assistant_text);
2264                                            skill_evidence.push('\n');
2265                                            messages.push(assistant_msg);
2266                                        }
2267
2268                                        // ── Pre-mutation self-critique (reasoning §) ─
2269                                        // If we mutated files this turn, emit a
2270                                        // structured self-review of the change set
2271                                        // so the operator/UI can see the agent's
2272                                        // own checklist before auto-verify runs.
2273                                        if had_mutation
2274                                            && self.reasoning.self_critique
2275                                            && !diffs.is_empty()
2276                                        {
2277                                            let review =
2278                                                crate::reasoning::SelfCritique::pre_mutation_review(
2279                                                    &diffs,
2280                                                    Some(&task.description),
2281                                                );
2282                                            let _ = event_tx.send(Event::Message {
2283                                                run: run_id.clone(),
2284                                                role: "self-critique".into(),
2285                                                text: review,
2286                                            });
2287                                        }
2288
2289                                        // ── Auto-verify (§10 testing) ───────────
2290                                        // The model thinks it's done. If it mutated
2291                                        // files and a verify command is configured,
2292                                        // run it; on failure, re-inject so the agent
2293                                        // fixes it (bounded retries).
2294                                        if had_mutation && verify_attempts < MAX_VERIFY_ATTEMPTS {
2295                                            if let Some(verify_cmd) =
2296                                                self.config.defaults.verify_command.clone()
2297                                            {
2298                                                verify_attempts += 1;
2299                                                had_mutation = false;
2300                                                let parts: Vec<String> = verify_cmd
2301                                                    .split_whitespace()
2302                                                    .map(String::from)
2303                                                    .collect();
2304                                                if !parts.is_empty() {
2305                                                    let _ = event_tx.send(Event::AgentStatus {
2306                                                        run: run_id.clone(),
2307                                                        role: "verifier".into(),
2308                                                        status: AgentStatus::Working,
2309                                                        note: format!("running `{}`", verify_cmd),
2310                                                    });
2311                                                    let cmd = crate::sandbox::Command {
2312                                                        program: parts[0].clone(),
2313                                                        args: parts[1..].to_vec(),
2314                                                        env: std::collections::HashMap::new(),
2315                                                        workdir: workspace.root.clone(),
2316                                                    };
2317                                                    let limits = crate::sandbox::Limits {
2318                                                        timeout_ms: 300_000,
2319                                                        max_output_bytes: 16_000,
2320                                                    };
2321                                                    match workspace
2322                                                        .sandbox
2323                                                        .exec(&cmd, &limits)
2324                                                        .await
2325                                                    {
2326                                                        Ok(res) if res.exit_code != 0 => {
2327                                                            let _ = event_tx.send(Event::TestResult {
2328                                                                run: run_id.clone(),
2329                                                                passed: 0,
2330                                                                failed: 1,
2331                                                                detail: format!(
2332                                                                    "verify `{}` failed (exit {})",
2333                                                                    verify_cmd, res.exit_code
2334                                                                ),
2335                                                            });
2336                                                            let out = format!(
2337                                                                "{}\n{}",
2338                                                                res.stdout, res.stderr
2339                                                            );
2340                                                            let tail: String = out
2341                                                                .lines()
2342                                                                .rev()
2343                                                                .take(40)
2344                                                                .collect::<Vec<_>>()
2345                                                                .into_iter()
2346                                                                .rev()
2347                                                                .collect::<Vec<_>>()
2348                                                                .join("\n");
2349                                                            messages.push(Msg {
2350                                                                role: "user".into(),
2351                                                                content: vec![ContentBlock::Text {
2352                                                                    text: format!(
2353                                                                        "SYSTEM: verification command `{}` FAILED (exit {}). Fix the code, then it will be re-verified. Output:\n{}",
2354                                                                        verify_cmd, res.exit_code, tail
2355                                                                    ),
2356                                                                }],
2357                                                            });
2358                                                            continue_agent_loop = true;
2359                                                            break;
2360                                                        }
2361                                                        Ok(_) => {
2362                                                            let _ =
2363                                                                event_tx.send(Event::TestResult {
2364                                                                    run: run_id.clone(),
2365                                                                    passed: 1,
2366                                                                    failed: 0,
2367                                                                    detail: format!(
2368                                                                        "verify `{}` passed",
2369                                                                        verify_cmd
2370                                                                    ),
2371                                                                });
2372                                                        }
2373                                                        Err(e) => {
2374                                                            let _ = event_tx.send(Event::Message {
2375                                                                run: run_id.clone(),
2376                                                                role: "guard".into(),
2377                                                                text: format!(
2378                                                                    "verify command could not run: {}",
2379                                                                    e
2380                                                                ),
2381                                                            });
2382                                                        }
2383                                                    }
2384                                                }
2385                                            }
2386                                        }
2387                                    }
2388                                    crate::event::StopReason::ToolUse => {
2389                                        // A single model turn that emits N tool calls
2390                                        // MUST be replayed as ONE assistant message
2391                                        // carrying reasoning_content + ALL tool_calls,
2392                                        // followed by N tool-result messages. Splitting
2393                                        // it into one assistant message per tool left
2394                                        // the 2nd+ calls without reasoning_content, which
2395                                        // DeepSeek/Qwen/Moonshot thinking-mode rejects
2396                                        // with HTTP 400 ("reasoning_content must be passed
2397                                        // back"), aborting every turn after the first and
2398                                        // leaving multi-file tasks half-done. One
2399                                        // assistant message with a tool_calls array is
2400                                        // also the correct OpenAI/Anthropic shape.
2401                                        let drained: Vec<_> =
2402                                            std::mem::take(&mut tool_results_pending);
2403
2404                                        let mut assistant_blocks = Vec::new();
2405                                        if !reasoning_buf.is_empty() {
2406                                            assistant_blocks.push(ContentBlock::Reasoning {
2407                                                text: reasoning_buf.clone(),
2408                                            });
2409                                        }
2410                                        for (tool_id, tool_name, args, _content, _is_error) in
2411                                            &drained
2412                                        {
2413                                            assistant_blocks.push(ContentBlock::ToolUse {
2414                                                id: tool_id.clone(),
2415                                                name: tool_name.clone(),
2416                                                input: args.clone(),
2417                                            });
2418                                        }
2419                                        messages.push(Msg {
2420                                            role: "assistant".into(),
2421                                            content: assistant_blocks,
2422                                        });
2423
2424                                        for (tool_id, _tool_name, _args, content, is_error) in
2425                                            drained
2426                                        {
2427                                            messages.push(Msg {
2428                                                role: "user".into(),
2429                                                content: vec![ContentBlock::ToolResult {
2430                                                    tool_use_id: tool_id,
2431                                                    content,
2432                                                    is_error: Some(is_error),
2433                                                }],
2434                                            });
2435                                        }
2436                                        if tool_output_seen_this_completion {
2437                                            produced_any_output = true;
2438                                        }
2439                                        continue_agent_loop =
2440                                            !waiting_for_approval && !stop_after_tool_result;
2441                                        break;
2442                                    }
2443                                    _ => {}
2444                                }
2445                                break; // Done
2446                            }
2447                            BrainEvent::Error(msg) => {
2448                                let _ = event_tx.send(Event::Error {
2449                                    run: run_id.clone(),
2450                                    message: msg.clone(),
2451                                });
2452                                let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
2453                                let next_idx = current_chain_idx + 1;
2454                                if next_idx < brain_policy.chain.len() {
2455                                    current_chain_idx = next_idx;
2456                                    let switch_ctx = format!(
2457                                        "{} -> {}",
2458                                        brain.id(),
2459                                        brain_policy.chain[current_chain_idx].id()
2460                                    );
2461                                    let _ = event_tx.send(Event::ModelSwitched {
2462                                        run: run_id.clone(),
2463                                        from: brain.id().to_string(),
2464                                        to: brain_policy.chain[current_chain_idx].id().to_string(),
2465                                        reason: msg,
2466                                    });
2467                                    let _ = self
2468                                        .hooks
2469                                        .execute(&HookEvent::OnModelSwitched, &switch_ctx)
2470                                        .await;
2471                                    continue_agent_loop = true;
2472                                } else {
2473                                    had_error = true;
2474                                    last_error = Some(msg);
2475                                }
2476                                break;
2477                            }
2478                        }
2479                    }
2480
2481                    // Robust empty-completion fallback: some providers end the
2482                    // stream WITHOUT a Done(EndTurn) (so the in-stream check never
2483                    // fires). If this completion produced nothing and the run has
2484                    // produced nothing, advance to the next model in the chain.
2485                    if !continue_agent_loop && !had_error {
2486                        let this_empty =
2487                            assistant_text.trim().is_empty() && !tool_output_seen_this_completion;
2488                        if this_empty && !produced_any_output {
2489                            let next_idx = current_chain_idx + 1;
2490                            if next_idx < brain_policy.chain.len() {
2491                                let _ = event_tx.send(Event::ModelSwitched {
2492                                    run: run_id.clone(),
2493                                    from: brain.id().to_string(),
2494                                    to: brain_policy.chain[next_idx].id().to_string(),
2495                                    reason: "empty response".into(),
2496                                });
2497                                current_chain_idx = next_idx;
2498                                continue;
2499                            }
2500                        }
2501                    }
2502
2503                    if continue_agent_loop {
2504                        continue;
2505                    }
2506                    break; // Task complete
2507                }
2508                Err(e) => {
2509                    let err_msg = format!("{}", e);
2510                    let _ = event_tx.send(Event::Error {
2511                        run: run_id.clone(),
2512                        message: err_msg.clone(),
2513                    });
2514
2515                    // Try next in chain
2516                    let next_idx = current_chain_idx + 1;
2517                    if next_idx < brain_policy.chain.len() {
2518                        current_chain_idx = next_idx;
2519                        let _ = event_tx.send(Event::ModelSwitched {
2520                            run: run_id.clone(),
2521                            from: brain.id().to_string(),
2522                            to: brain_policy.chain[current_chain_idx].id().to_string(),
2523                            reason: err_msg,
2524                        });
2525                    } else {
2526                        had_error = true;
2527                        last_error = Some(err_msg);
2528                        break;
2529                    }
2530                }
2531            }
2532        }
2533
2534        // Emit final confirmed token usage — fallback to estimates if provider omitted usage events.
2535        let final_input = if total_input > 0 {
2536            total_input
2537        } else {
2538            total_input + estimated_input_unconfirmed
2539        };
2540        let final_output = if total_output > 0 {
2541            total_output
2542        } else {
2543            total_output + estimated_output_unconfirmed
2544        };
2545        let _ = event_tx.send(Event::TokenUsage {
2546            run: run_id.clone(),
2547            input: final_input,
2548            output: final_output,
2549        });
2550        // Mark coder lane done — clears the animated caret cleanly.
2551        let _ = event_tx.send(Event::AgentStatus {
2552            run: run_id.clone(),
2553            role: "coder".into(),
2554            status: AgentStatus::Done,
2555            note: format!("completed · {}↑ {}↓ tok", final_input, final_output),
2556        });
2557
2558        let outcome = OutcomeSummary {
2559            status: if had_error {
2560                format!(
2561                    "error: {}",
2562                    last_error.unwrap_or_else(|| "run failed".into())
2563                )
2564            } else if waiting_for_approval {
2565                "waiting_for_approval".into()
2566            } else if denied_by_approval {
2567                "denied".into()
2568            } else {
2569                "completed".into()
2570            },
2571            diffs,
2572            cost_usd: cost_usd + estimated_cost_unconfirmed,
2573            tokens: TokenUsage {
2574                input: total_input + estimated_input_unconfirmed,
2575                output: total_output + estimated_output_unconfirmed,
2576            },
2577        };
2578
2579        // Persist task to memory
2580        if let Some(mem) = &self.memory {
2581            let _ = mem.save_task(&crate::memory::TaskMem {
2582                run_id: run_id.0.clone(),
2583                messages: messages.clone(),
2584                created_at: chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(),
2585            });
2586        }
2587
2588        // Propose skill candidate from successful run
2589        if outcome.status == "completed" {
2590            if let Some(skills) = &self.skills {
2591                if let Some(candidate) = Curator::propose_skill_if_missing(
2592                    &task.description,
2593                    &skill_evidence,
2594                    skills.as_ref(),
2595                ) {
2596                    let skill_name = candidate.name.clone();
2597                    let _ = event_tx.send(Event::SkillLearned {
2598                        run: run_id.clone(),
2599                        name: skill_name.clone(),
2600                    });
2601                    let _ = self
2602                        .hooks
2603                        .execute(&HookEvent::OnSkillLearned, &skill_name)
2604                        .await;
2605                    let _ = skills.add(candidate);
2606                }
2607            }
2608
2609            // Auto-distill facts from the successful run. Reconstruct the event
2610            // view from the final conversation: ToolUse blocks carry the real
2611            // tool args (file paths, content), Text blocks carry reasoning — both
2612            // are what the Distiller mines for durable user facts (§3.8).
2613            if let Some(mem) = &self.memory {
2614                let events = events_from_messages(&run_id, &messages);
2615                Distiller::distill(mem, &events, &task.description).await;
2616            }
2617        }
2618
2619        let _ = event_tx.send(Event::RunFinished {
2620            run: run_id.clone(),
2621            outcome: outcome.clone(),
2622        });
2623
2624        // PostRun lifecycle hook (best-effort, non-blocking semantics).
2625        let _ = self
2626            .hooks
2627            .execute(&HookEvent::PostRun, &task.description)
2628            .await;
2629
2630        Ok(outcome)
2631    }
2632}
2633
2634#[cfg(test)]
2635mod tests {
2636    use super::*;
2637
2638    #[test]
2639    fn initial_user_content_blocks_embeds_uploaded_images() {
2640        let tmp = tempfile::tempdir().expect("tempdir");
2641        let image = tmp.path().join("shot.png");
2642        std::fs::write(
2643            &image,
2644            [
2645                0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n', 0, 0, 0, 0,
2646            ],
2647        )
2648        .expect("write image");
2649        let description = format!(
2650            "analyse this\n\n[Attached files]\n### file: shot.png\n[uploaded: {}]",
2651            image.display()
2652        );
2653
2654        let blocks = initial_user_content_blocks(tmp.path(), &description);
2655        assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
2656        assert!(blocks.iter().any(|block| matches!(
2657            block,
2658            ContentBlock::Image {
2659                source: ImageSource::Base64 {
2660                    media_type,
2661                    data,
2662                }
2663            } if media_type == "image/png" && !data.is_empty()
2664        )));
2665    }
2666
2667    #[test]
2668    fn tool_result_content_blocks_preserves_images() {
2669        let blocks = tool_result_content_blocks(&[
2670            Block::Text("screenshot captured".into()),
2671            Block::Image {
2672                data: vec![1, 2, 3],
2673                mime: "image/png".into(),
2674            },
2675        ]);
2676
2677        assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
2678        assert!(blocks.iter().any(|block| matches!(
2679            block,
2680            ContentBlock::Image {
2681                source: ImageSource::Base64 {
2682                    media_type,
2683                    data,
2684                }
2685            } if media_type == "image/png" && data == "AQID"
2686        )));
2687    }
2688}