1use async_trait::async_trait;
2use futures::StreamExt;
3use serde_json::json;
4use std::path::PathBuf;
5use std::sync::Arc;
6use tokio::sync::mpsc;
7
8use crate::agent::AgentStore;
9use crate::autonomy::{AutonomyContract, Checkpoints, GitCheckpoints};
10use crate::capabilities::{Curator, SkillLibrary};
11use crate::config::Config;
12use crate::event::{
13 AgentStatus, AutonomyLevel, Block, Decision, Event, OutcomeSummary, RiskLevel, RunId,
14 TokenUsage,
15};
16use crate::extras::Distiller;
17use crate::hooks::{HookEvent, HookRegistry};
18use crate::instructions::InstructionDoc;
19use crate::memory::{Fact, Memory, MemoryDoc, MemoryDocKind};
20use crate::permissions::PermissionContext;
21use crate::provider::{
22 Brain, BrainEvent, BrainRequest, ContentBlock, ImageSource, Msg, PromptCacheConfig, ToolSpec,
23};
24use crate::reasoning::ReasoningEngine;
25use crate::redaction::RedactionFilter;
26use crate::router::{BudgetState, Router, TaskTier};
27use crate::sandbox::Sandbox;
28use crate::tools::{ToolCtx, ToolRegistry};
29
30pub mod scorer;
31pub mod treesitter;
32
33#[derive(Debug, Clone)]
36pub struct Identity {
37 pub name: String,
38 pub role: String,
39 pub personality: String,
40}
41
42impl Default for Identity {
43 fn default() -> Self {
44 Self {
45 name: "sparrow".into(),
46 role: "software engineer".into(),
47 personality: "concise, competent, helpful".into(),
48 }
49 }
50}
51
52pub struct BrainPolicy {
55 pub chain: Vec<Arc<dyn Brain>>,
57 pub current_index: usize,
58}
59
60impl BrainPolicy {
61 pub fn current(&self) -> Option<Arc<dyn Brain>> {
62 self.chain.get(self.current_index).cloned()
63 }
64
65 pub fn next(&mut self) -> Option<Arc<dyn Brain>> {
66 self.current_index += 1;
67 self.current()
68 }
69}
70
71pub struct Workspace {
74 pub root: PathBuf,
75 pub sandbox: Arc<dyn Sandbox>,
76}
77
78pub struct AgentRun {
81 pub id: RunId,
82 pub identity: Identity,
83 pub brain_policy: BrainPolicy,
84 pub autonomy: AutonomyContract,
85 pub tools: Arc<ToolRegistry>,
86 pub workspace: Workspace,
87}
88
89fn estimate_text_tokens(text: &str) -> u64 {
90 let chars = text.chars().count() as u64;
91 ((chars + 3) / 4).max(1)
92}
93
94fn estimate_content_tokens(blocks: &[ContentBlock]) -> u64 {
95 blocks
96 .iter()
97 .map(|block| match block {
98 ContentBlock::Text { text } => estimate_text_tokens(text),
99 ContentBlock::Image { source } => match source {
100 crate::provider::ImageSource::Base64 { data, .. } => {
101 256 + estimate_text_tokens(data).min(2_000)
102 }
103 crate::provider::ImageSource::Url { url } => 256 + estimate_text_tokens(url),
104 },
105 ContentBlock::ToolUse { name, input, .. } => {
106 estimate_text_tokens(name) + estimate_text_tokens(&input.to_string())
107 }
108 ContentBlock::ToolResult { content, .. } => 8 + estimate_content_tokens(content),
109 ContentBlock::Reasoning { text } => estimate_text_tokens(text),
110 })
111 .sum()
112}
113
114fn estimate_request_tokens(req: &BrainRequest) -> u64 {
115 let system = req.system.as_deref().map(estimate_text_tokens).unwrap_or(0);
116 let messages: u64 = req
117 .messages
118 .iter()
119 .map(|msg| estimate_text_tokens(&msg.role) + estimate_content_tokens(&msg.content) + 4)
120 .sum();
121 let tools: u64 = req
122 .tools
123 .iter()
124 .map(|tool| {
125 estimate_text_tokens(&tool.name)
126 + estimate_text_tokens(&tool.description)
127 + estimate_text_tokens(&tool.input_schema.to_string())
128 })
129 .sum();
130 system + messages + tools
131}
132
133fn base64_encode(data: &[u8]) -> String {
134 const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
135 let mut out = String::with_capacity(data.len().div_ceil(3) * 4);
136 for chunk in data.chunks(3) {
137 let b0 = chunk[0] as u32;
138 let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 };
139 let b2 = if chunk.len() > 2 { chunk[2] as u32 } else { 0 };
140 let triple = (b0 << 16) | (b1 << 8) | b2;
141 out.push(CHARS[((triple >> 18) & 63) as usize] as char);
142 out.push(CHARS[((triple >> 12) & 63) as usize] as char);
143 out.push(if chunk.len() > 1 {
144 CHARS[((triple >> 6) & 63) as usize] as char
145 } else {
146 '='
147 });
148 out.push(if chunk.len() > 2 {
149 CHARS[(triple & 63) as usize] as char
150 } else {
151 '='
152 });
153 }
154 out
155}
156
157fn image_block_from_path(path: &std::path::Path) -> Option<ContentBlock> {
158 let mime = mime_guess::from_path(path).first_or_octet_stream();
159 if !mime.type_().as_str().eq_ignore_ascii_case("image") {
160 return None;
161 }
162 let data = std::fs::read(path).ok()?;
163 Some(ContentBlock::Image {
164 source: ImageSource::Base64 {
165 media_type: mime.to_string(),
166 data: base64_encode(&data),
167 },
168 })
169}
170
171fn collect_uploaded_paths(description: &str) -> Vec<String> {
172 let mut paths = Vec::new();
173 for line in description.lines() {
174 let Some(idx) = line.find("uploaded:") else {
175 continue;
176 };
177 let rest = line[idx + "uploaded:".len()..].trim();
178 let path = rest
179 .strip_prefix('[')
180 .unwrap_or(rest)
181 .split(']')
182 .next()
183 .unwrap_or(rest)
184 .trim()
185 .trim_matches('"')
186 .trim_matches('\'');
187 if !path.is_empty() {
188 paths.push(path.to_string());
189 }
190 }
191 paths
192}
193
194fn initial_user_content_blocks(
195 workspace_root: &std::path::Path,
196 description: &str,
197) -> Vec<ContentBlock> {
198 let mut blocks = vec![ContentBlock::Text {
199 text: description.to_string(),
200 }];
201 let mut seen = std::collections::HashSet::new();
202 for raw_path in collect_uploaded_paths(description) {
203 let path = std::path::PathBuf::from(&raw_path);
204 let full_path = if path.is_absolute() {
205 path
206 } else {
207 workspace_root.join(path)
208 };
209 if !seen.insert(full_path.clone()) {
210 continue;
211 }
212 if let Some(block) = image_block_from_path(&full_path) {
213 blocks.push(block);
214 }
215 }
216 blocks
217}
218
219pub fn summarize_model_chain(chain_ids: &[String], limit: usize) -> String {
220 if chain_ids.is_empty() {
221 return "aucun modèle disponible".into();
222 }
223 let limit = limit.max(1);
224 let mut visible: Vec<String> = chain_ids.iter().take(limit).cloned().collect();
225 if chain_ids.len() > limit {
226 visible.push(format!("+{} autres fallbacks", chain_ids.len() - limit));
227 }
228 visible.join(" -> ")
229}
230
231fn prompt_cache_key(scope: &str, workspace_root: &std::path::Path, tools: &[ToolSpec]) -> String {
232 use std::hash::{Hash, Hasher};
233
234 let mut hasher = std::collections::hash_map::DefaultHasher::new();
235 scope.hash(&mut hasher);
236 workspace_root.display().to_string().hash(&mut hasher);
237 for tool in tools {
238 tool.name.hash(&mut hasher);
239 tool.description.hash(&mut hasher);
240 tool.input_schema.to_string().hash(&mut hasher);
241 }
242 format!("sparrow-{}-{:016x}", scope, hasher.finish())
243}
244
245fn build_system_prompt(
248 identity: &Identity,
249 workspace_root: &PathBuf,
250 facts: &[Fact],
251 memory_docs: &[MemoryDoc],
252 instruction_docs: &[InstructionDoc],
253 skills: &[crate::capabilities::Skill],
254) -> String {
255 let mut parts = vec![format!(
256 r#"You are {name}, a {role}.
257
258Personality: {personality}
259
260You are working in the workspace: {workspace}
261You have access to tools to read, write, edit, search, and execute code.
262Always use absolute or relative paths from the workspace root.
263Be concise and direct. When making edits, use exact string replacements.
264Before making changes, read the relevant files first to understand the codebase.
265
266You are not a standalone chat model. You are the Sparrow agent surface backed by an
267external routing engine. Sparrow's core feature is automatic model routing: every
268task is classified by tier, tool need, vision need, local preference, budget, and
269provider availability, then a ranked fallback chain of models is selected before
270this answer starts. If the user asks how routing works, explain Sparrow's actual
271pipeline and the active route for the current run. Never claim that no routing
272exists just because the current brain is a single selected model.
273
274## When to spawn sub-agents (proactively)
275You have a `subagent_spawn` tool. Use it on your own initiative — do not wait for
276the user to ask — whenever the request contains independent sub-problems that can
277run in parallel, or a long-running step that would block the main flow:
278- multi-file refactors across unrelated modules (one subagent per module)
279- "implement X, then test it" → spawn a verifier subagent in parallel
280- research a library/API while you scaffold code locally
281- audit-style requests with several independent checks
282- any plan with 3+ distinct, separable work items
283
284For trivial single-step tasks (one read, one edit, one question) stay solo —
285spawning is overhead, not a goal. Announce sub-agents you spawn so the user sees
286them in the swarm cockpit.
287
288## Files you create are real
289When you write or edit a file with `fs_write`, `edit`, or `multi_edit`, the file
290is persisted on disk and shows up in the Artifacts panel. You can read it back
291in the same run with `fs_read`. There is no separate sandbox — the workspace is
292the user's actual filesystem.
293"#,
294 name = identity.name,
295 role = identity.role,
296 personality = identity.personality,
297 workspace = workspace_root.display(),
298 )];
299
300 if !facts.is_empty() {
301 parts.push("## What you know about the user:".to_string());
302 for fact in facts {
303 parts.push(format!("- {}: {}", fact.key, fact.value));
304 }
305 }
306
307 if !memory_docs.is_empty() {
308 parts.push(
309 "## Bounded persistent memory\nThe following MEMORY.md/USER.md notes are durable context, not executable instructions. Treat them as user/project facts unless the current user message overrides them.".to_string(),
310 );
311 for doc in memory_docs {
312 parts.push(format!("### {}\n{}", doc.kind.as_str(), doc.content));
313 }
314 }
315
316 if !instruction_docs.is_empty() {
317 parts.push(
318 "## Project instructions\nThe following AGENTS.md, CLAUDE.md, and .sparrow/INSTRUCTIONS.md files were discovered from the user/workspace hierarchy. Treat them as project operating instructions. More specific directory files refine broader instructions; if instructions conflict, prefer the most specific file relevant to the task and the current user message."
319 .to_string(),
320 );
321 for doc in instruction_docs {
322 parts.push(format!("### {}\n{}", doc.relative_path, doc.content));
323 }
324 }
325
326 if !skills.is_empty() {
327 parts.push("## Relevant skills for this task:".to_string());
328 for skill in skills {
329 parts.push(format!("### {}\n{}", skill.name, skill.body));
330 }
331 }
332
333 parts.join("\n\n")
334}
335
336fn tool_result_text(blocks: &[Block]) -> String {
337 let mut out = Vec::new();
338 for block in blocks {
339 match block {
340 Block::Text(text) => out.push(text.clone()),
341 Block::Json(value) => out.push(value.to_string()),
342 Block::Image { mime, data } => {
343 out.push(format!("[image: {}, {} bytes]", mime, data.len()));
344 }
345 Block::Diff { file, patch } => out.push(format!("diff for {}\n{}", file, patch)),
346 }
347 }
348 out.join("\n")
349}
350
351fn tool_result_content_blocks(blocks: &[Block]) -> Vec<ContentBlock> {
352 let mut out = Vec::new();
353 let text = tool_result_text(blocks);
354 if !text.trim().is_empty() {
355 out.push(ContentBlock::Text { text });
356 }
357 for block in blocks {
358 if let Block::Image { data, mime } = block {
359 out.push(ContentBlock::Image {
360 source: ImageSource::Base64 {
361 media_type: mime.clone(),
362 data: base64_encode(data),
363 },
364 });
365 }
366 }
367 out
368}
369
370fn events_from_messages(run_id: &RunId, messages: &[Msg]) -> Vec<Event> {
374 let mut events = Vec::new();
375 for msg in messages {
376 for block in &msg.content {
377 match block {
378 ContentBlock::ToolUse { name, input, .. } => {
379 events.push(Event::ToolUseProposed {
380 run: run_id.clone(),
381 id: String::new(),
382 name: name.clone(),
383 args: input.clone(),
384 risk: RiskLevel::ReadOnly,
385 });
386 }
387 ContentBlock::Text { text } if msg.role == "assistant" => {
388 events.push(Event::ThinkingDelta {
389 run: run_id.clone(),
390 text: text.clone(),
391 });
392 }
393 _ => {}
394 }
395 }
396 }
397 events
398}
399
400#[derive(Debug, Clone)]
403pub struct Task {
404 pub description: String,
405 pub context: Vec<Msg>,
406}
407
408pub struct Engine {
411 router: Arc<dyn Router>,
412 config: Config,
413 identity: Option<Identity>,
414 memory: Option<Arc<dyn Memory>>,
415 skills: Option<Arc<dyn SkillLibrary>>,
416 redaction: RedactionFilter,
417 approval_handler: Option<Arc<dyn ApprovalHandler>>,
418 reasoning: ReasoningEngine,
419 hooks: HookRegistry,
420 agent_store: Option<Arc<dyn AgentStore>>,
421 org_policy: Option<crate::onboarding::enterprise::OrgPolicy>,
422 classify_cache: std::sync::Mutex<std::collections::HashMap<u64, crate::router::TaskTier>>,
424}
425
426#[derive(Debug, Clone)]
427pub struct ApprovalRequest {
428 pub run: RunId,
429 pub id: String,
430 pub tool_name: String,
431 pub risk: RiskLevel,
432 pub args: serde_json::Value,
433 pub summary: String,
434}
435
436#[async_trait]
437pub trait ApprovalHandler: Send + Sync {
438 async fn request_approval(&self, request: ApprovalRequest) -> Decision;
439}
440
441impl Engine {
442 pub fn new(router: Arc<dyn Router>, config: Config) -> Self {
443 let mut hooks = HookRegistry::new(Arc::new(crate::sandbox::LocalSandbox::new(
444 std::env::current_dir().unwrap_or_default(),
445 )));
446 hooks.load(config.hooks.clone());
447 Self {
448 router,
449 config,
450 identity: None,
451 memory: None,
452 skills: None,
453 redaction: RedactionFilter::new(),
454 approval_handler: None,
455 reasoning: ReasoningEngine::default(),
456 hooks,
457 agent_store: None,
458 org_policy: None,
459 classify_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
460 }
461 }
462
463 pub fn with_memory(mut self, memory: Arc<dyn Memory>) -> Self {
464 let secrets: Vec<String> = memory
466 .all_facts()
467 .iter()
468 .filter(|f| f.key.starts_with("secret:"))
469 .map(|f| f.value.clone())
470 .collect();
471 self.redaction.load_secrets(secrets);
472 self.memory = Some(memory);
473 self
474 }
475
476 pub fn with_skills(mut self, skills: Arc<dyn SkillLibrary>) -> Self {
477 self.skills = Some(skills);
478 self
479 }
480
481 pub fn with_identity(mut self, identity: Identity) -> Self {
482 self.identity = Some(identity);
483 self
484 }
485
486 pub fn with_agent_store(mut self, store: Arc<dyn AgentStore>) -> Self {
487 self.agent_store = Some(store);
488 self
489 }
490
491 pub fn with_org_policy(mut self, policy: crate::onboarding::enterprise::OrgPolicy) -> Self {
492 self.org_policy = Some(policy);
493 self
494 }
495
496 pub fn with_hooks_config(mut self, hooks: Vec<crate::hooks::Hook>) -> Self {
497 self.hooks.load(hooks);
498 self
499 }
500
501 pub fn with_approval_handler(mut self, approval_handler: Arc<dyn ApprovalHandler>) -> Self {
502 self.approval_handler = Some(approval_handler);
503 self
504 }
505
506 fn classify_with_confidence(&self, task: &str) -> (TaskTier, bool) {
511 let lower = task.to_lowercase();
512 if lower.contains("vision") || lower.contains("image") || lower.contains("screenshot") {
513 (TaskTier::Vision, false)
514 } else if lower.contains("architecture")
515 || lower.contains("refactor")
516 || lower.contains("audit")
517 || lower.contains("répare")
518 || lower.contains("repare")
519 || lower.contains("livrer")
520 || lower.contains("v1")
521 {
522 (TaskTier::Hard, false)
523 } else if lower.contains("bug")
524 || lower.contains("fix")
525 || lower.contains("corrige")
526 || lower.contains("debug")
527 {
528 (TaskTier::Small, false)
529 } else if lower.contains("routing")
530 || lower.contains("routeur")
531 || lower.contains("modèle")
532 || lower.contains("modele")
533 || lower.contains("model")
534 || lower.contains("sélectionne")
535 || lower.contains("selectionne")
536 {
537 (TaskTier::Small, false)
538 } else if lower.len() < 80 {
539 (TaskTier::Trivial, true)
541 } else {
542 (TaskTier::Medium, true)
543 }
544 }
545
546 async fn classify_via_brain(&self, task: &str, brain: &dyn Brain) -> Option<TaskTier> {
549 let req = BrainRequest {
550 system: Some(
551 "You are a task classifier. Output exactly one word: trivial, small, medium, hard, or vision."
552 .into(),
553 ),
554 messages: vec![Msg {
555 role: "user".into(),
556 content: vec![ContentBlock::Text {
557 text: format!(
558 "Classify this coding task into exactly one tier (trivial, small, medium, hard, vision):\n\n{}\n\nTier:",
559 task
560 ),
561 }],
562 }],
563 tools: vec![],
564 max_tokens: 6,
565 temperature: 0.0,
566 stop: vec![],
567 cache: PromptCacheConfig::disabled(),
568 };
569 let mut stream = brain.complete(req).await.ok()?;
570 let mut out = String::new();
571 while let Some(ev) = stream.next().await {
572 match ev {
573 BrainEvent::TextDelta(t) => out.push_str(&t),
574 BrainEvent::Done(_) => break,
575 BrainEvent::Error(_) => return None,
576 _ => {}
577 }
578 }
579 let word = out.trim().to_lowercase();
580 let word = word.split_whitespace().next().unwrap_or("");
581 match word {
582 "trivial" => Some(TaskTier::Trivial),
583 "small" => Some(TaskTier::Small),
584 "medium" => Some(TaskTier::Medium),
585 "hard" => Some(TaskTier::Hard),
586 "vision" => Some(TaskTier::Vision),
587 _ => None,
588 }
589 }
590
591 fn task_summary(&self, task: &str, tier: &TaskTier) -> String {
592 let lower = task.to_lowercase();
593 if lower.contains("routing")
594 || lower.contains("routeur")
595 || lower.contains("modèle")
596 || lower.contains("modele")
597 || lower.contains("model")
598 {
599 "question meta sur le routing modele".into()
600 } else if lower.contains("code") || lower.contains("bug") || lower.contains("fix") {
601 format!("requete code/{:?}", tier).to_lowercase()
602 } else if lower.contains("config") || lower.contains("provider") {
603 "configuration provider/modele".into()
604 } else {
605 format!("requete {:?}", tier).to_lowercase()
606 }
607 }
608
609 fn is_routing_question(&self, task: &str) -> bool {
610 let lower = task.to_lowercase();
611 (lower.contains("routing") || lower.contains("routeur") || lower.contains("route"))
612 && (lower.contains("modèle") || lower.contains("modele") || lower.contains("model"))
613 || lower.contains("sélectionne tu le model")
614 || lower.contains("selectionne tu le model")
615 }
616
617 fn requires_tools(&self, task: &str, tier: &TaskTier) -> bool {
618 let lower = task.to_lowercase();
619 let tool_keywords = [
620 "outil",
621 "tools",
622 "fichier",
623 "file",
624 "readme",
625 ".rs",
626 ".ts",
627 ".js",
628 ".html",
629 ".md",
630 "repo",
631 "dossier",
632 "workspace",
633 "git",
634 "test",
635 "build",
636 "cargo",
637 "npm",
638 "pnpm",
639 "corrige",
640 "fix",
641 "debug",
642 "bug",
643 "répare",
644 "repare",
645 "modifie",
646 "édite",
647 "edite",
648 "ajoute",
649 "supprime",
650 "écris",
651 "ecris",
652 "write",
653 "create",
654 "crée",
655 "cree",
656 "audit",
657 ];
658
659 if tool_keywords.iter().any(|kw| lower.contains(kw)) {
660 return true;
661 }
662
663 matches!(tier, TaskTier::Medium | TaskTier::Hard | TaskTier::Vision)
664 }
665
666 fn requires_vision(&self, task: &str, tier: &TaskTier) -> bool {
667 let lower = task.to_lowercase();
668 matches!(tier, TaskTier::Vision)
669 || [
670 "image",
671 "screenshot",
672 "capture",
673 "photo",
674 "vision",
675 "logo",
676 "visuel",
677 "interface graphique",
678 ]
679 .iter()
680 .any(|kw| lower.contains(kw))
681 }
682
683 fn routing_explanation(
684 &self,
685 tier: &TaskTier,
686 need: &crate::router::RoutingNeed,
687 chain_ids: &[String],
688 ) -> String {
689 let chain = summarize_model_chain(chain_ids, 5);
690 format!(
691 "Je suis Sparrow, donc je ne réponds pas comme un modèle isolé: avant chaque run, mon routeur classe ta demande puis choisit une chaîne de modèles.\n\nPour cette requête, j'ai détecté: tier `{}` · tools `{}` · vision `{}` · local `{}`.\n\nJe sélectionne ensuite le modèle avec ces critères: adéquation aux capacités demandées, support des tools, besoin vision, préférence local/free-first, budget restant, latence, taille de contexte, puis disponibilité provider. Le résultat est une fallback chain, pas un seul choix figé: `{}`.\n\nConcrètement: une question simple ou meta doit aller vers le modèle le moins coûteux capable de répondre; une tâche code complexe monte vers un modèle plus fort; une tâche avec fichiers/tools exige un modèle compatible tools; une tâche image demande vision; si un provider échoue, je bascule au suivant dans la chaîne.",
692 tier.as_str(),
693 need.required_tools,
694 need.required_vision,
695 need.prefer_local,
696 chain
697 )
698 }
699
700 async fn summarize_messages(&self, brain: &dyn Brain, middle: &[Msg]) -> Option<String> {
703 if middle.is_empty() {
704 return None;
705 }
706 let mut transcript = String::new();
708 for m in middle {
709 for block in &m.content {
710 match block {
711 ContentBlock::Text { text } => {
712 transcript.push_str(&format!("[{}] {}\n", m.role, text));
713 }
714 ContentBlock::ToolUse { name, .. } => {
715 transcript.push_str(&format!("[{}] (tool: {})\n", m.role, name));
716 }
717 ContentBlock::ToolResult { .. } => {
718 transcript.push_str(&format!("[{}] (tool result)\n", m.role));
719 }
720 _ => {}
721 }
722 }
723 }
724 if transcript.len() > 12_000 {
725 transcript.truncate(12_000);
726 }
727 let req = BrainRequest {
728 system: Some(
729 "Summarize this agent conversation in <=200 tokens. Preserve: files edited, \
730 decisions made, current state, and any unfinished work. Plain text only."
731 .into(),
732 ),
733 messages: vec![Msg {
734 role: "user".into(),
735 content: vec![ContentBlock::Text { text: transcript }],
736 }],
737 tools: vec![],
738 max_tokens: 300,
739 temperature: 0.0,
740 stop: vec![],
741 cache: PromptCacheConfig::disabled(),
742 };
743 let mut stream = brain.complete(req).await.ok()?;
744 let mut out = String::new();
745 while let Some(ev) = stream.next().await {
746 match ev {
747 BrainEvent::TextDelta(t) => out.push_str(&t),
748 BrainEvent::Done(_) => break,
749 BrainEvent::Error(_) => return None,
750 _ => {}
751 }
752 }
753 let out = out.trim().to_string();
754 if out.is_empty() { None } else { Some(out) }
755 }
756
757 pub async fn drive(
759 &self,
760 task: Task,
761 event_tx: mpsc::UnboundedSender<Event>,
762 ) -> anyhow::Result<OutcomeSummary> {
763 self.drive_with_run_id(task, event_tx, RunId::new()).await
764 }
765
766 pub async fn drive_with_run_id(
768 &self,
769 task: Task,
770 event_tx: mpsc::UnboundedSender<Event>,
771 run_id: RunId,
772 ) -> anyhow::Result<OutcomeSummary> {
773 self.drive_with_inject(task, event_tx, run_id, None).await
774 }
775
776 pub async fn drive_with_inject(
779 &self,
780 task: Task,
781 event_tx: mpsc::UnboundedSender<Event>,
782 run_id: RunId,
783 mut inject_rx: Option<mpsc::UnboundedReceiver<String>>,
784 ) -> anyhow::Result<OutcomeSummary> {
785 let model_override: Option<String>;
787 let clean_description: String;
788 if let Some(rest) = task.description.strip_prefix("__model:") {
789 if let Some(end) = rest.find("__ ") {
790 model_override = Some(rest[..end].to_string());
791 clean_description = rest[end + 3..].to_string();
792 } else {
793 model_override = None;
794 clean_description = task.description.clone();
795 }
796 } else {
797 model_override = None;
798 clean_description = task.description.clone();
799 }
800 let task = Task {
801 description: clean_description,
802 context: task.context,
803 };
804
805 let mut messages: Vec<Msg> = task.context.clone();
806
807 let (mut tier, ambiguous) = self.classify_with_confidence(&task.description);
809
810 let budget = BudgetState {
812 daily_limit_usd: self.config.budget.daily_usd,
813 daily_spent_usd: 0.0,
814 session_limit_usd: self.config.budget.session_usd,
815 session_spent_usd: 0.0,
816 };
817
818 let mut required_tools = self.requires_tools(&task.description, &tier);
819 let mut required_vision = self.requires_vision(&task.description, &tier);
820 let mut need = crate::router::RoutingNeed {
821 tier: tier.clone(),
822 required_tools,
823 required_vision,
824 prefer_local: false,
825 };
826
827 let mut chain = self.router.select(&need, &budget);
828
829 let router_ref = &self.router;
837 let apply_override = |chain: &mut Vec<Arc<dyn Brain>>| {
838 if let Some(ref override_id) = model_override {
839 let filtered: Vec<_> = chain
840 .iter()
841 .filter(|b| b.id() == override_id.as_str())
842 .cloned()
843 .collect();
844 if !filtered.is_empty() {
845 *chain = filtered;
846 } else if let Some(brain) = router_ref.find_brain_by_id(override_id) {
847 *chain = vec![brain];
848 }
849 }
850 };
851 apply_override(&mut chain);
852
853 if model_override.is_none()
861 && ambiguous
862 && matches!(tier, TaskTier::Medium)
863 && !self.is_routing_question(&task.description)
864 {
865 let desc_hash = {
867 use std::collections::hash_map::DefaultHasher;
868 use std::hash::{Hash, Hasher};
869 let mut h = DefaultHasher::new();
870 task.description.hash(&mut h);
871 h.finish()
872 };
873 let cached = {
874 self.classify_cache
875 .lock()
876 .ok()
877 .and_then(|c| c.get(&desc_hash).cloned())
878 };
879 let refined = match cached {
880 Some(t) => {
881 let _ = event_tx.send(Event::Message {
882 run: run_id.clone(),
883 role: "router".into(),
884 text: format!("classification (cached): {}", t.as_str()),
885 });
886 Some(t)
887 }
888 None => {
889 if let Some(brain) = chain.first().cloned() {
890 let result = self
891 .classify_via_brain(&task.description, brain.as_ref())
892 .await;
893 if let Some(r) = &result {
894 if let Ok(mut c) = self.classify_cache.lock() {
895 c.insert(desc_hash, r.clone());
896 }
897 }
898 result
899 } else {
900 None
901 }
902 }
903 };
904 if let Some(refined) = refined {
905 if std::mem::discriminant(&refined) != std::mem::discriminant(&tier) {
906 let _ = event_tx.send(Event::Message {
907 run: run_id.clone(),
908 role: "router".into(),
909 text: format!(
910 "classification affinée par modèle: {} → {}",
911 tier.as_str(),
912 refined.as_str()
913 ),
914 });
915 tier = refined;
916 required_tools = self.requires_tools(&task.description, &tier);
917 required_vision = self.requires_vision(&task.description, &tier);
918 need = crate::router::RoutingNeed {
919 tier: tier.clone(),
920 required_tools,
921 required_vision,
922 prefer_local: false,
923 };
924 chain = self.router.select(&need, &budget);
925 apply_override(&mut chain);
927 }
928 }
929 }
930
931 let task_summary = self.task_summary(&task.description, &tier);
932 let chain_ids: Vec<String> = chain.iter().map(|b| b.id().to_string()).collect();
933
934 let agent_name = self
935 .identity
936 .as_ref()
937 .map(|identity| identity.name.clone())
938 .unwrap_or_else(|| "sparrow".into());
939 let _ = event_tx.send(Event::RunStarted {
940 run: run_id.clone(),
941 task: task.description.clone(),
942 agent: agent_name,
943 });
944
945 let pre_run_results = self
948 .hooks
949 .execute(&HookEvent::PreRun, &task.description)
950 .await;
951 if let Some(reason) = pre_run_results
952 .iter()
953 .find(|r| r.veto)
954 .and_then(|r| r.veto_reason.clone())
955 {
956 let _ = event_tx.send(Event::Error {
957 run: run_id.clone(),
958 message: format!("PreRun hook vetoed run: {}", reason),
959 });
960 anyhow::bail!("PreRun hook vetoed run: {}", reason);
961 }
962
963 let _ = event_tx.send(Event::Message {
964 run: run_id.clone(),
965 role: "router".into(),
966 text: format!(
967 "requete: {} · tier: {} · tools: {} · vision: {} · local: {}",
968 task_summary,
969 tier.as_str(),
970 need.required_tools,
971 need.required_vision,
972 need.prefer_local
973 ),
974 });
975
976 let _ = event_tx.send(Event::AgentStatus {
977 run: run_id.clone(),
978 role: "planner".into(),
979 status: AgentStatus::Working,
980 note: format!("analyzing request · {} candidates", chain.len()),
981 });
982
983 let primary_ctx = chain
984 .first()
985 .map(|b| b.caps().context_window)
986 .unwrap_or(128_000);
987 let _ = event_tx.send(Event::RouteSelected {
988 run: run_id.clone(),
989 chain: chain_ids.clone(),
990 context_window: primary_ctx,
991 });
992 let _ = event_tx.send(Event::AgentStatus {
993 run: run_id.clone(),
994 role: "planner".into(),
995 status: AgentStatus::Done,
996 note: format!(
997 "route set · {} primary",
998 chain.first().map(|b| b.id()).unwrap_or("—")
999 ),
1000 });
1001
1002 if chain.is_empty() {
1003 let _ = event_tx.send(Event::Error {
1004 run: run_id.clone(),
1005 message: "No available models (budget exhausted or no providers configured)".into(),
1006 });
1007 return Ok(OutcomeSummary {
1008 status: "error: no models".into(),
1009 diffs: vec![],
1010 cost_usd: 0.0,
1011 tokens: TokenUsage {
1012 input: 0,
1013 output: 0,
1014 },
1015 });
1016 }
1017
1018 if self.is_routing_question(&task.description) {
1019 let text = self.routing_explanation(&tier, &need, &chain_ids);
1020 let input_tokens =
1021 estimate_text_tokens(&task.description) + estimate_text_tokens(&task_summary);
1022 let output_tokens = estimate_text_tokens(&text);
1023 let _ = event_tx.send(Event::TokenUsageEstimated {
1024 run: run_id.clone(),
1025 input: input_tokens,
1026 output: 0,
1027 reason: "router meta request estimate".into(),
1028 });
1029 let _ = event_tx.send(Event::TokenUsageEstimated {
1030 run: run_id.clone(),
1031 input: 0,
1032 output: output_tokens,
1033 reason: "router meta response estimate".into(),
1034 });
1035 let _ = event_tx.send(Event::ThinkingDelta {
1036 run: run_id.clone(),
1037 text: text.clone(),
1038 });
1039 let outcome = OutcomeSummary {
1040 status: "completed".into(),
1041 diffs: vec![],
1042 cost_usd: 0.0,
1043 tokens: TokenUsage {
1044 input: input_tokens,
1045 output: output_tokens,
1046 },
1047 };
1048 let _ = event_tx.send(Event::RunFinished {
1049 run: run_id.clone(),
1050 outcome: outcome.clone(),
1051 });
1052 return Ok(outcome);
1053 }
1054
1055 let workspace_root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1057 let sandbox: Arc<dyn Sandbox> = match self.config.defaults.sandbox.as_str() {
1058 "local-hardened" => Arc::new(crate::sandbox::LocalSandbox::hardened(
1059 workspace_root.clone(),
1060 )),
1061 "docker" => Arc::new(crate::sandbox::backends::DockerSandbox::new(
1062 workspace_root.clone(),
1063 "ubuntu:latest",
1064 )),
1065 s if s.starts_with("ssh:") => Arc::new(crate::sandbox::backends::SshSandbox::new(
1066 workspace_root.clone(),
1067 s.trim_start_matches("ssh:"),
1068 )),
1069 "modal" => Arc::new(crate::sandbox::backends::ModalSandbox::new(
1070 workspace_root.clone(),
1071 )),
1072 "daytona" => Arc::new(crate::sandbox::backends::DaytonaSandbox::new(
1073 workspace_root.clone(),
1074 )),
1075 "vercel" => Arc::new(crate::sandbox::backends::VercelSandbox::new(
1076 workspace_root.clone(),
1077 )),
1078 "singularity" => Arc::new(crate::sandbox::backends::SingularitySandbox::new(
1079 workspace_root.clone(),
1080 )),
1081 _ => Arc::new(crate::sandbox::LocalSandbox::new(workspace_root.clone())),
1082 };
1083
1084 let mut registry = ToolRegistry::new();
1085 registry.register(Arc::new(crate::tools::fs::FsRead));
1086 registry.register(Arc::new(crate::tools::fs::FsList));
1087 registry.register(Arc::new(crate::tools::fs::FsWrite));
1088 registry.register(Arc::new(crate::tools::edit::Edit));
1089 registry.register(Arc::new(crate::tools::edit::MultiEdit));
1090 registry.register(Arc::new(crate::tools::search_and_web::Search));
1091 registry.register(Arc::new(crate::tools::search_and_web::WebSearch));
1092 registry.register(Arc::new(crate::tools::search_and_web::WebFetch));
1093 registry.register(Arc::new(crate::tools::browser_sandbox::BrowserTool));
1094 registry.register(Arc::new(crate::tools::browser_sandbox::ComputerTool));
1095 registry.register(Arc::new(crate::tools::git::Git));
1096 registry.register(Arc::new(crate::tools::todo::Todo::new()));
1097 registry.register(Arc::new(crate::tools::exec::Exec::new(sandbox.clone())));
1098 registry.register(Arc::new(crate::tools::media::ImageGen::new()));
1099 registry.register(Arc::new(crate::tools::media::Tts::new()));
1100 registry.register(Arc::new(crate::tools::media::Transcribe::new()));
1101 registry.register(Arc::new(crate::tools::subagent::PythonRpc::new()));
1102 registry.register(Arc::new(crate::tools::builder_tools::LspClient));
1103 registry.register(Arc::new(crate::tools::code_nav::Glob));
1104 registry.register(Arc::new(crate::tools::code_nav::Symbols));
1105 if let Some(mem) = &self.memory {
1106 registry.register(Arc::new(crate::tools::memory::MemoryTool::new(mem.clone())));
1107 registry.register(Arc::new(
1108 crate::tools::knowledge_graph::KnowledgeGraphTool::new(mem.clone()),
1109 ));
1110 }
1111 {
1112 let mut sub = crate::tools::subagent::SubagentSpawn::new(
1114 self.router.clone(),
1115 self.config.clone(),
1116 );
1117 if let Some(mem) = &self.memory {
1118 sub = sub.with_memory(mem.clone());
1119 }
1120 registry.register(Arc::new(sub));
1121 }
1122 let tools = Arc::new(registry);
1123 let tool_specs: Vec<ToolSpec> = tools.to_specs();
1124
1125 let workspace = Workspace {
1126 root: workspace_root,
1127 sandbox,
1128 };
1129
1130 let identity = self.identity.clone().unwrap_or_else(|| Identity {
1131 name: "sparrow".into(),
1132 role: "senior software engineer".into(),
1133 personality: "concise, competent, direct".into(),
1134 });
1135
1136 let brain_policy = BrainPolicy {
1137 chain,
1138 current_index: 0,
1139 };
1140
1141 let mut autonomy = match self.config.defaults.autonomy {
1142 AutonomyLevel::Supervised => AutonomyContract::supervised(),
1143 AutonomyLevel::Trusted => AutonomyContract::trusted(),
1144 AutonomyLevel::Autonomous => AutonomyContract::autonomous(),
1145 };
1146 autonomy.budget.max_usd = self.config.budget.session_usd;
1147 let _ = event_tx.send(Event::AutonomyChanged {
1148 run: run_id.clone(),
1149 level: autonomy.level.clone(),
1150 });
1151
1152 let relevant_skills: Vec<crate::capabilities::Skill> = self
1154 .skills
1155 .as_ref()
1156 .map(|s| s.relevant(&task.description, 3))
1157 .unwrap_or_default();
1158
1159 let system = build_system_prompt(
1160 &identity,
1161 &workspace.root,
1162 &self
1163 .memory
1164 .as_ref()
1165 .map(|m| m.all_facts())
1166 .unwrap_or_default(),
1167 &self
1168 .memory
1169 .as_ref()
1170 .map(|m| {
1171 [MemoryDocKind::Memory, MemoryDocKind::User]
1172 .into_iter()
1173 .filter_map(|kind| m.memory_doc(kind))
1174 .collect::<Vec<_>>()
1175 })
1176 .unwrap_or_default(),
1177 &crate::instructions::discover_workspace_instructions(
1178 &workspace.root,
1179 &task.description,
1180 ),
1181 &relevant_skills,
1182 );
1183 let mut system = format!(
1184 "{}\n\n## Active Sparrow Routing Context\nRequest category: {}\nTask tier: {}\nRequired tools: {}\nRequired vision: {}\nPreferred local: {}\nSelected fallback chain: {}\nRouting policy: free_first={}, session_budget_usd={:.2}.\nWhen answering routing questions, describe this context concretely.",
1185 system,
1186 task_summary,
1187 tier.as_str(),
1188 need.required_tools,
1189 need.required_vision,
1190 need.prefer_local,
1191 summarize_model_chain(&chain_ids, 8),
1192 self.config.routing.free_first,
1193 self.config.budget.session_usd
1194 );
1195
1196 if !messages.is_empty() {
1200 system.push_str(
1201 "\n\n## Conversation continuity\nThis is an ONGOING conversation. The messages below are prior turns and are AUTHORITATIVE memory of what the user told you (names, preferences, facts, decisions). Use them directly; never re-introduce yourself or contradict them.",
1202 );
1203 }
1204
1205 messages.push(Msg {
1207 role: "user".into(),
1208 content: initial_user_content_blocks(&workspace.root, &task.description),
1209 });
1210
1211 let mut total_input: u64 = 0;
1212 let mut total_output: u64 = 0;
1213 let mut estimated_input_unconfirmed: u64 = 0;
1214 let mut estimated_output_unconfirmed: u64 = 0;
1215 let mut estimated_cost_unconfirmed: f64 = 0.0;
1216 let mut cost_usd: f64 = 0.0;
1217 let diffs: Vec<crate::event::FileDiff> = Vec::new();
1218 let mut current_chain_idx = 0usize;
1219 let mut tool_results_pending: Vec<(
1220 String,
1221 String,
1222 serde_json::Value,
1223 Vec<ContentBlock>,
1224 bool,
1225 )> = Vec::new();
1226 let budget_session = self.config.budget.session_usd;
1227 let _budget_daily = self.config.budget.daily_usd;
1228 let redaction = &self.redaction;
1229 let mut had_error = false;
1230 let mut last_error: Option<String> = None;
1231 let mut waiting_for_approval = false;
1232 let mut denied_by_approval = false;
1233 let mut skill_evidence = String::new();
1234 let mut turns: u32 = 0;
1236 const MAX_TURNS: u32 = 60;
1237 let mut had_mutation = false;
1241 let mut verify_attempts: u32 = 0;
1242 const MAX_VERIFY_ATTEMPTS: u32 = 2;
1243 let mut produced_any_output = false;
1247
1248 let send = |event: Event| {
1250 let _ = event_tx.send(redaction.redact_event(&event));
1251 };
1252
1253 const COMPACT_TRANSCRIPT_CHARS: usize = 120_000;
1259 const COMPACT_KEEP_LAST: usize = 6;
1260 let context_manager = crate::redaction::ContextManager::new(200_000);
1261
1262 loop {
1264 if turns > 0 {
1267 let transcript_chars: usize = messages
1268 .iter()
1269 .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1270 .sum();
1271 if transcript_chars > COMPACT_TRANSCRIPT_CHARS && messages.len() > COMPACT_KEEP_LAST
1272 {
1273 let _ = self
1276 .hooks
1277 .execute(&HookEvent::PreCompact, &task.description)
1278 .await;
1279 let before = transcript_chars;
1280 let compacted =
1281 context_manager.compact_messages(&messages, 0, COMPACT_KEEP_LAST);
1282 let after: usize = compacted
1283 .iter()
1284 .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1285 .sum();
1286
1287 let mut handoff = crate::context::HandoffDoc::new(task.description.clone());
1289 handoff.next_steps = vec![format!(
1290 "Resume run {} (turn {}/{})",
1291 run_id.0, turns, MAX_TURNS
1292 )];
1293 let handoff_dir = std::path::PathBuf::from(".sparrow/handoff");
1294 let _ = std::fs::create_dir_all(&handoff_dir);
1295 let handoff_path = handoff_dir.join(format!(
1296 "{}-{}.md",
1297 run_id.0,
1298 chrono::Utc::now().format("%Y%m%dT%H%M%SZ")
1299 ));
1300 let _ = std::fs::write(&handoff_path, handoff.to_markdown());
1301
1302 messages = compacted;
1303 send(Event::Compacted {
1304 run: run_id.clone(),
1305 before_chars: before,
1306 after_chars: after,
1307 handoff_path: Some(handoff_path.to_string_lossy().to_string()),
1308 });
1309 let _ = self
1310 .hooks
1311 .execute(&HookEvent::PostCompact, &task.description)
1312 .await;
1313 }
1314 }
1315 turns += 1;
1317 if turns > MAX_TURNS {
1318 send(Event::Message {
1319 run: run_id.clone(),
1320 role: "guard".into(),
1321 text: format!("iteration cap reached ({} turns) — stopping", MAX_TURNS),
1322 });
1323 break;
1324 }
1325
1326 if cost_usd + estimated_cost_unconfirmed >= budget_session {
1328 let msg = format!(
1329 "Budget exceeded: ${:.4} of ${:.2} session cap",
1330 cost_usd + estimated_cost_unconfirmed,
1331 budget_session
1332 );
1333 send(Event::Error {
1334 run: run_id.clone(),
1335 message: msg.clone(),
1336 });
1337 let _ = self
1340 .hooks
1341 .execute(&HookEvent::OnBudgetThreshold, &msg)
1342 .await;
1343 let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
1344 had_error = true;
1345 last_error = Some("budget exceeded".into());
1346 break;
1347 }
1348 if let Some(_approval_handler) = &self.approval_handler {
1349 if waiting_for_approval {
1350 }
1353 }
1354
1355 if let Some(ref policy) = self.org_policy {
1357 let proposed_file = tool_results_pending
1358 .last()
1359 .map(|(_, _, args, _, _)| {
1360 args.get("path").and_then(|v| v.as_str()).unwrap_or("")
1361 })
1362 .unwrap_or("");
1363 if let Err(violation) =
1364 policy.enforce(&self.config.defaults.autonomy, cost_usd, proposed_file)
1365 {
1366 send(Event::Error {
1367 run: run_id.clone(),
1368 message: format!("Org policy violation: {}", violation),
1369 });
1370 break;
1371 }
1372 }
1373
1374 if let Some(rx) = inject_rx.as_mut() {
1378 loop {
1379 match rx.try_recv() {
1380 Ok(injected) => {
1381 let trimmed = injected.trim().to_string();
1382 if trimmed.is_empty() {
1383 continue;
1384 }
1385 messages.push(Msg {
1386 role: "user".into(),
1387 content: vec![ContentBlock::Text {
1388 text: format!("INTERRUPT FROM USER: {}", trimmed),
1389 }],
1390 });
1391 let _ = event_tx.send(Event::Message {
1392 run: run_id.clone(),
1393 role: "interrupt".into(),
1394 text: trimmed,
1395 });
1396 }
1397 Err(mpsc::error::TryRecvError::Empty) => break,
1398 Err(mpsc::error::TryRecvError::Disconnected) => {
1399 inject_rx = None;
1400 break;
1401 }
1402 }
1403 }
1404 }
1405
1406 let brain = match brain_policy.chain.get(current_chain_idx) {
1407 Some(b) => b.clone(),
1408 None => break,
1409 };
1410
1411 let caps = brain.caps();
1412
1413 {
1418 let req_for_estimate = BrainRequest {
1419 system: Some(system.clone()),
1420 messages: messages.clone(),
1421 tools: if need.required_tools {
1422 tool_specs.clone()
1423 } else {
1424 vec![]
1425 },
1426 max_tokens: caps.max_output as u32,
1427 temperature: 0.0,
1428 stop: vec![],
1429 cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1430 "engine",
1431 &workspace.root,
1432 &tool_specs,
1433 ))),
1434 };
1435 let est = estimate_request_tokens(&req_for_estimate);
1436 let threshold = (caps.context_window as f64 * 0.75) as u64;
1437 if est > threshold && messages.len() > 8 {
1438 let original_task = messages.first().cloned();
1439 let keep_tail: Vec<Msg> =
1440 messages.iter().rev().take(6).cloned().collect::<Vec<_>>();
1441 let middle: Vec<Msg> = messages
1442 .iter()
1443 .skip(1)
1444 .take(messages.len().saturating_sub(7))
1445 .cloned()
1446 .collect();
1447 let dropped = middle.len();
1448
1449 let summary = self
1452 .summarize_messages(brain.as_ref(), &middle)
1453 .await
1454 .unwrap_or_else(|| {
1455 format!(
1456 "{} prior messages were dropped to fit the model window.",
1457 dropped
1458 )
1459 });
1460
1461 let mut compacted: Vec<Msg> = Vec::new();
1462 if let Some(task) = original_task {
1463 compacted.push(task);
1464 }
1465 compacted.push(Msg {
1466 role: "user".into(),
1467 content: vec![ContentBlock::Text {
1468 text: format!(
1469 "[CONTEXT SUMMARY of {} earlier messages]\n{}\n\
1470 (Files edited and tool outputs in the turns below remain authoritative.)",
1471 dropped, summary
1472 ),
1473 }],
1474 });
1475 for m in keep_tail.into_iter().rev() {
1476 compacted.push(m);
1477 }
1478 messages = compacted;
1479 let _ = event_tx.send(Event::Message {
1480 run: run_id.clone(),
1481 role: "compaction".into(),
1482 text: format!(
1483 "context compacted: {} messages summarized ({} tok > {} threshold)",
1484 dropped, est, threshold
1485 ),
1486 });
1487 }
1488 }
1489
1490 let req = BrainRequest {
1491 system: Some(system.clone()),
1492 messages: messages.clone(),
1493 tools: if need.required_tools {
1494 tool_specs.clone()
1495 } else {
1496 vec![]
1497 },
1498 max_tokens: caps.max_output as u32,
1499 temperature: 0.0,
1500 stop: vec![],
1501 cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1502 "engine",
1503 &workspace.root,
1504 &tool_specs,
1505 ))),
1506 };
1507
1508 let estimated_input = estimate_request_tokens(&req);
1509 estimated_input_unconfirmed += estimated_input;
1510 estimated_cost_unconfirmed +=
1511 caps.cost_input_per_mtok * (estimated_input as f64) / 1_000_000.0;
1512 let _ = event_tx.send(Event::TokenUsageEstimated {
1513 run: run_id.clone(),
1514 input: estimated_input,
1515 output: 0,
1516 reason: "prompt estimate before provider usage".into(),
1517 });
1518 let _ = event_tx.send(Event::CostUpdate {
1519 run: run_id.clone(),
1520 usd: cost_usd + estimated_cost_unconfirmed,
1521 });
1522
1523 let _ = event_tx.send(Event::AgentStatus {
1524 run: run_id.clone(),
1525 role: "coder".into(),
1526 status: AgentStatus::Thinking,
1527 note: format!("consulting {} · parsing request…", brain.id()),
1528 });
1529
1530 match brain.complete(req).await {
1531 Ok(mut stream) => {
1532 let mut current_tool_name = String::new();
1533 let mut current_tool_json = String::new();
1534 let mut output_chars_seen: u64 = 0;
1535 let mut output_tokens_emitted: u64 = 0;
1536 let mut continue_agent_loop = false;
1537 let mut stop_after_tool_result = false;
1538 let mut assistant_text = String::new();
1539 let mut tool_output_seen_this_completion = false;
1540 let mut tools_called_this_turn: Vec<String> = Vec::new();
1544 let mut reasoning_buf: String = String::new();
1548
1549 while let Some(event) = stream.next().await {
1550 match event {
1551 BrainEvent::TextDelta(text) => {
1552 assistant_text.push_str(&text);
1553 output_chars_seen += text.chars().count() as u64;
1554 let estimated_output = (output_chars_seen + 3) / 4;
1555 let output_delta =
1556 estimated_output.saturating_sub(output_tokens_emitted);
1557 if output_delta > 0 {
1558 output_tokens_emitted += output_delta;
1559 estimated_output_unconfirmed += output_delta;
1560 estimated_cost_unconfirmed += caps.cost_output_per_mtok
1561 * (output_delta as f64)
1562 / 1_000_000.0;
1563 let _ = event_tx.send(Event::TokenUsageEstimated {
1564 run: run_id.clone(),
1565 input: 0,
1566 output: output_delta,
1567 reason: "streamed output estimate".into(),
1568 });
1569 let _ = event_tx.send(Event::CostUpdate {
1570 run: run_id.clone(),
1571 usd: cost_usd + estimated_cost_unconfirmed,
1572 });
1573 }
1574 let _ = event_tx.send(Event::ThinkingDelta {
1575 run: run_id.clone(),
1576 text: text.clone(),
1577 });
1578 }
1579 BrainEvent::ReasoningDelta(rtext) => {
1580 reasoning_buf.push_str(&rtext);
1586 let _ = event_tx.send(Event::ReasoningDelta {
1587 run: run_id.clone(),
1588 text: rtext,
1589 });
1590 }
1591 BrainEvent::ToolUseStart { id, name } => {
1592 current_tool_name = name.clone();
1593 tools_called_this_turn.push(name.clone());
1594 current_tool_json.clear();
1595 let risk = tools
1596 .get(&name)
1597 .map(|tool| tool.risk())
1598 .unwrap_or(RiskLevel::ReadOnly);
1599 let _ = event_tx.send(Event::ToolUseProposed {
1604 run: run_id.clone(),
1605 id: id.clone(),
1606 name: name.clone(),
1607 args: json!({}),
1608 risk,
1609 });
1610 }
1611 BrainEvent::ToolUseDelta { id, json } => {
1612 let _ = id;
1613 current_tool_json.push_str(&json);
1614 }
1615 BrainEvent::ToolUseEnd { id } => {
1616 let args: serde_json::Value =
1618 serde_json::from_str(¤t_tool_json).unwrap_or(json!({}));
1619
1620 let tool_name = if current_tool_name.is_empty() {
1622 "unknown".to_string()
1623 } else {
1624 current_tool_name.clone()
1625 };
1626 let tool = tools.get(&tool_name);
1627 let risk = tool
1628 .as_ref()
1629 .map(|tool| tool.risk())
1630 .unwrap_or(RiskLevel::ReadOnly);
1631
1632 let _ = event_tx.send(Event::ToolUseProposed {
1638 run: run_id.clone(),
1639 id: id.clone(),
1640 name: tool_name.clone(),
1641 args: args.clone(),
1642 risk: risk.clone(),
1643 });
1644 let proposed = crate::autonomy::ProposedAction {
1645 tool_name: tool_name.clone(),
1646 risk: risk.clone(),
1647 args: args.clone(),
1648 };
1649
1650 let permission =
1651 self.config.permissions.evaluate(&PermissionContext {
1652 tool_name: &proposed.tool_name,
1653 risk: proposed.risk.clone(),
1654 args: &args,
1655 workspace_root: &workspace.root,
1656 provider: Some(brain.id()),
1657 surface: Some("engine"),
1658 });
1659 let autonomy_verdict =
1660 if matches!(permission.decision, Decision::Allow) {
1661 Some(autonomy.evaluate(&proposed))
1662 } else {
1663 None
1664 };
1665 let mut decision = autonomy_verdict
1666 .as_ref()
1667 .map(|verdict| verdict.decision.clone())
1668 .unwrap_or_else(|| permission.decision.clone());
1669 if !matches!(permission.decision, Decision::Allow) {
1670 let _ = event_tx.send(Event::Message {
1671 run: run_id.clone(),
1672 role: "permissions".into(),
1673 text: permission.reason.clone(),
1674 });
1675 }
1676 if matches!(decision, Decision::AskUser) {
1677 let summary = format!(
1678 "{}. Approve {} with args: {}",
1679 permission.reason, proposed.tool_name, args
1680 );
1681 let _ = event_tx.send(Event::ApprovalRequested {
1682 run: run_id.clone(),
1683 id: id.clone(),
1684 summary: summary.clone(),
1685 tool: Some(proposed.tool_name.clone()),
1686 risk: Some(format!("{:?}", proposed.risk)),
1687 });
1688 let _ = self
1692 .hooks
1693 .execute(&HookEvent::OnApprovalRequested, &summary)
1694 .await;
1695 if let Some(handler) = &self.approval_handler {
1696 decision = handler
1697 .request_approval(ApprovalRequest {
1698 run: run_id.clone(),
1699 id: id.clone(),
1700 tool_name: proposed.tool_name.clone(),
1701 risk: proposed.risk.clone(),
1702 args: args.clone(),
1703 summary,
1704 })
1705 .await;
1706 }
1707 }
1708
1709 let _ = event_tx.send(Event::ApprovalResolved {
1710 run: run_id.clone(),
1711 id: id.clone(),
1712 decision: decision.clone(),
1713 });
1714
1715 match decision {
1716 Decision::Allow => {
1717 if autonomy_verdict
1718 .as_ref()
1719 .map(|verdict| verdict.notify)
1720 .unwrap_or(false)
1721 {
1722 let _ = event_tx.send(Event::Message {
1723 run: run_id.clone(),
1724 role: "autonomy".into(),
1725 text: format!(
1726 "{} will run under trusted autonomy with checkpoint notification",
1727 proposed.tool_name
1728 ),
1729 });
1730 }
1731 if matches!(
1733 proposed.risk,
1734 RiskLevel::Mutating | RiskLevel::Destructive
1735 ) {
1736 had_mutation = true;
1737 }
1738 let needs_checkpoint = autonomy_verdict
1740 .as_ref()
1741 .map(|verdict| verdict.needs_checkpoint)
1742 .unwrap_or_else(|| {
1743 matches!(
1744 proposed.risk,
1745 RiskLevel::Mutating
1746 | RiskLevel::Exec
1747 | RiskLevel::Destructive
1748 )
1749 });
1750 if needs_checkpoint {
1751 let vetoes = self
1752 .hooks
1753 .execute(
1754 &HookEvent::PreCheckpoint,
1755 &proposed.tool_name,
1756 )
1757 .await;
1758 let checkpoint_veto = vetoes
1759 .iter()
1760 .find(|result| result.veto)
1761 .and_then(|result| result.veto_reason.clone());
1762 if let Some(reason) = checkpoint_veto {
1763 let _ = event_tx.send(Event::Error {
1764 run: run_id.clone(),
1765 message: reason,
1766 });
1767 denied_by_approval = true;
1768 stop_after_tool_result = true;
1769 continue;
1770 }
1771 let checkpoints =
1772 GitCheckpoints::new(workspace.root.clone());
1773 if let Ok(cp_id) = checkpoints
1774 .snapshot(&format!("pre-{}", proposed.tool_name))
1775 {
1776 let _ = event_tx.send(Event::CheckpointCreated {
1777 run: run_id.clone(),
1778 id: cp_id,
1779 label: format!("pre-{}", proposed.tool_name),
1780 });
1781 let _ = self
1782 .hooks
1783 .execute(
1784 &HookEvent::PostCheckpoint,
1785 &proposed.tool_name,
1786 )
1787 .await;
1788 }
1789 }
1790
1791 let hook_results = self
1792 .hooks
1793 .execute(&HookEvent::PreToolUse, &proposed.tool_name)
1794 .await;
1795 if let Some(reason) = hook_results
1796 .iter()
1797 .find(|result| result.veto)
1798 .and_then(|result| result.veto_reason.clone())
1799 {
1800 denied_by_approval = true;
1801 stop_after_tool_result = true;
1802 let _ = event_tx.send(Event::ToolOutput {
1803 run: run_id.clone(),
1804 id: id.clone(),
1805 blocks: vec![Block::Text(reason.clone())],
1806 });
1807 tool_output_seen_this_completion = true;
1808 tool_results_pending.push((
1809 id.clone(),
1810 proposed.tool_name.clone(),
1811 args.clone(),
1812 vec![ContentBlock::Text { text: reason }],
1813 true,
1814 ));
1815 continue;
1816 }
1817
1818 let _ = event_tx.send(Event::ToolUseStarted {
1819 run: run_id.clone(),
1820 id: id.clone(),
1821 });
1822 let _ = event_tx.send(Event::AgentStatus {
1823 run: run_id.clone(),
1824 role: "coder".into(),
1825 status: AgentStatus::Working,
1826 note: format!("running tool · {}", current_tool_name),
1827 });
1828
1829 let result = if let Some(tool) = tool {
1830 let ctx = ToolCtx {
1831 workspace_root: workspace.root.clone(),
1832 run_id: run_id.clone(),
1833 };
1834 match tool.call(args.clone(), &ctx).await {
1835 Ok(result) => result,
1836 Err(e) => crate::tools::ToolResult::error(format!(
1837 "Tool {} failed: {}",
1838 proposed.tool_name, e
1839 )),
1840 }
1841 } else {
1842 crate::tools::ToolResult::error(format!(
1843 "Unknown tool: {}",
1844 proposed.tool_name
1845 ))
1846 };
1847
1848 for block in &result.content {
1849 if let Block::Diff { file, patch } = block {
1850 let plus = patch
1851 .lines()
1852 .filter(|l| {
1853 l.starts_with('+') && !l.starts_with("+++")
1854 })
1855 .count()
1856 as u32;
1857 let minus = patch
1858 .lines()
1859 .filter(|l| {
1860 l.starts_with('-') && !l.starts_with("---")
1861 })
1862 .count()
1863 as u32;
1864 let _ = event_tx.send(Event::DiffProposed {
1865 run: run_id.clone(),
1866 file: file.clone(),
1867 patch: patch.clone(),
1868 plus,
1869 minus,
1870 });
1871 }
1872 }
1873
1874 let blocks = result.content.clone();
1875 let text = tool_result_text(&blocks);
1876 let content_blocks = tool_result_content_blocks(&blocks);
1877 let is_error = result.is_error;
1878 skill_evidence.push_str(&text);
1879 skill_evidence.push('\n');
1880 let _ = event_tx.send(Event::ToolOutput {
1881 run: run_id.clone(),
1882 id: id.clone(),
1883 blocks,
1884 });
1885 if !is_error
1889 && matches!(
1890 proposed.tool_name.as_str(),
1891 "fs_write" | "edit" | "multi_edit"
1892 )
1893 {
1894 if let Some(p) = args.get("path").and_then(|v| v.as_str())
1895 {
1896 let _ = event_tx.send(Event::DiffApplied {
1897 run: run_id.clone(),
1898 file: p.to_string(),
1899 });
1900 } else if let Some(p) = args
1901 .get("file_path")
1902 .and_then(|v| v.as_str())
1903 {
1904 let _ = event_tx.send(Event::DiffApplied {
1905 run: run_id.clone(),
1906 file: p.to_string(),
1907 });
1908 }
1909 }
1910 let _ = self
1911 .hooks
1912 .execute(&HookEvent::PostToolUse, &proposed.tool_name)
1913 .await;
1914 tool_output_seen_this_completion = true;
1915 tool_results_pending.push((
1916 id.clone(),
1917 proposed.tool_name.clone(),
1918 args.clone(),
1919 content_blocks,
1920 is_error,
1921 ));
1922 }
1923 Decision::AskUser => {
1924 waiting_for_approval = true;
1926 let approval_id = id.clone();
1927 let approval_name = proposed.tool_name.clone();
1928 let approval_args = args.clone();
1929 let approval_risk = proposed.risk;
1930
1931 let _ = event_tx.send(Event::ApprovalRequested {
1933 run: run_id.clone(),
1934 id: approval_id.clone(),
1935 summary: format!(
1936 "{} tool '{}' with args: {}",
1937 format!("{:?}", approval_risk),
1938 approval_name,
1939 approval_args
1940 ),
1941 tool: Some(approval_name.clone()),
1942 risk: Some(format!("{:?}", approval_risk)),
1943 });
1944
1945 use std::io::{self, Write};
1947 print!(
1948 "\n\x1b[1;33mApprove {}? [y/N]\x1b[0m ",
1949 approval_name
1950 );
1951 io::stdout().flush().ok();
1952 let mut input = String::new();
1953 io::stdin().read_line(&mut input).ok();
1954 let approved = input.trim().to_lowercase() == "y";
1955
1956 if approved {
1957 waiting_for_approval = false;
1958 if matches!(
1960 approval_risk,
1961 RiskLevel::Mutating
1962 | RiskLevel::Exec
1963 | RiskLevel::Destructive
1964 ) {
1965 let vetoes = self
1966 .hooks
1967 .execute(
1968 &HookEvent::PreCheckpoint,
1969 &approval_name,
1970 )
1971 .await;
1972 if let Some(reason) = vetoes
1973 .iter()
1974 .find(|result| result.veto)
1975 .and_then(|result| result.veto_reason.clone())
1976 {
1977 let _ = event_tx.send(Event::Error {
1978 run: run_id.clone(),
1979 message: reason,
1980 });
1981 denied_by_approval = true;
1982 stop_after_tool_result = true;
1983 continue;
1984 }
1985 let checkpoints =
1986 GitCheckpoints::new(workspace.root.clone());
1987 if let Ok(cp_id) = checkpoints
1988 .snapshot(&format!("pre-{}", approval_name))
1989 {
1990 let _ =
1991 event_tx.send(Event::CheckpointCreated {
1992 run: run_id.clone(),
1993 id: cp_id,
1994 label: format!("pre-{}", approval_name),
1995 });
1996 let _ = self
1997 .hooks
1998 .execute(
1999 &HookEvent::PostCheckpoint,
2000 &approval_name,
2001 )
2002 .await;
2003 }
2004 }
2005 let hook_results = self
2006 .hooks
2007 .execute(&HookEvent::PreToolUse, &approval_name)
2008 .await;
2009 if let Some(reason) = hook_results
2010 .iter()
2011 .find(|result| result.veto)
2012 .and_then(|result| result.veto_reason.clone())
2013 {
2014 denied_by_approval = true;
2015 stop_after_tool_result = true;
2016 let _ = event_tx.send(Event::ToolOutput {
2017 run: run_id.clone(),
2018 id: approval_id.clone(),
2019 blocks: vec![Block::Text(reason.clone())],
2020 });
2021 tool_output_seen_this_completion = true;
2022 tool_results_pending.push((
2023 approval_id,
2024 approval_name,
2025 approval_args,
2026 vec![ContentBlock::Text { text: reason }],
2027 true,
2028 ));
2029 continue;
2030 }
2031 let _ = event_tx.send(Event::ToolUseStarted {
2032 run: run_id.clone(),
2033 id: approval_id.clone(),
2034 });
2035 let result = if let Some(tool) = tool {
2036 let ctx = ToolCtx {
2037 workspace_root: workspace.root.clone(),
2038 run_id: run_id.clone(),
2039 };
2040 match tool.call(approval_args.clone(), &ctx).await {
2041 Ok(r) => r,
2042 Err(e) => {
2043 crate::tools::ToolResult::error(format!(
2044 "Tool {} failed: {}",
2045 approval_name, e
2046 ))
2047 }
2048 }
2049 } else {
2050 crate::tools::ToolResult::error(format!(
2051 "Unknown tool: {}",
2052 approval_name
2053 ))
2054 };
2055 let blocks = result.content.clone();
2056 let text = tool_result_text(&blocks);
2057 let content_blocks =
2058 tool_result_content_blocks(&blocks);
2059 let is_error = result.is_error;
2060 skill_evidence.push_str(&text);
2061 skill_evidence.push('\n');
2062 let _ = event_tx.send(Event::ToolOutput {
2063 run: run_id.clone(),
2064 id: approval_id.clone(),
2065 blocks,
2066 });
2067 let _ = self
2068 .hooks
2069 .execute(&HookEvent::PostToolUse, &approval_name)
2070 .await;
2071 tool_output_seen_this_completion = true;
2072 tool_results_pending.push((
2073 approval_id,
2074 approval_name,
2075 approval_args,
2076 content_blocks,
2077 is_error,
2078 ));
2079 } else {
2080 let _ = event_tx.send(Event::ToolOutput {
2081 run: run_id.clone(),
2082 id: approval_id.clone(),
2083 blocks: vec![Block::Text("Denied by user".into())],
2084 });
2085 tool_output_seen_this_completion = true;
2086 tool_results_pending.push((
2087 approval_id,
2088 approval_name,
2089 approval_args,
2090 vec![ContentBlock::Text {
2091 text: "Denied by user".into(),
2092 }],
2093 true,
2094 ));
2095 }
2096 }
2097 Decision::Deny => {
2098 denied_by_approval = true;
2099 stop_after_tool_result = true;
2100 let _ = event_tx.send(Event::ToolOutput {
2101 run: run_id.clone(),
2102 id: id.clone(),
2103 blocks: vec![Block::Text(
2104 "Denied by autonomy policy".into(),
2105 )],
2106 });
2107 tool_output_seen_this_completion = true;
2108 tool_results_pending.push((
2109 id.clone(),
2110 proposed.tool_name.clone(),
2111 args.clone(),
2112 vec![ContentBlock::Text {
2113 text: "Denied by autonomy policy".into(),
2114 }],
2115 true,
2116 ));
2117 }
2118 }
2119
2120 current_tool_json.clear();
2121 current_tool_name.clear();
2122 }
2123 BrainEvent::Usage(usage) => {
2124 total_input += usage.input;
2125 total_output += usage.output;
2126 estimated_input_unconfirmed =
2127 estimated_input_unconfirmed.saturating_sub(usage.input);
2128 estimated_output_unconfirmed =
2129 estimated_output_unconfirmed.saturating_sub(usage.output);
2130 let _ = event_tx.send(Event::TokenUsage {
2131 run: run_id.clone(),
2132 input: usage.input,
2133 output: usage.output,
2134 });
2135
2136 let input_cost =
2138 caps.cost_input_per_mtok * (usage.input as f64) / 1_000_000.0;
2139 let output_cost =
2140 caps.cost_output_per_mtok * (usage.output as f64) / 1_000_000.0;
2141 let actual_cost = input_cost + output_cost;
2142 cost_usd += actual_cost;
2143 estimated_cost_unconfirmed =
2144 (estimated_cost_unconfirmed - actual_cost).max(0.0);
2145
2146 let _ = event_tx.send(Event::CostUpdate {
2147 run: run_id.clone(),
2148 usd: cost_usd + estimated_cost_unconfirmed,
2149 });
2150 }
2151 BrainEvent::Done(reason) => {
2152 match reason {
2153 crate::event::StopReason::EndTurn => {
2154 let this_empty = assistant_text.trim().is_empty()
2159 && !tool_output_seen_this_completion;
2160 if this_empty && !produced_any_output {
2161 let next_idx = current_chain_idx + 1;
2162 if next_idx < brain_policy.chain.len() {
2163 current_chain_idx = next_idx;
2164 let _ = event_tx.send(Event::ModelSwitched {
2165 run: run_id.clone(),
2166 from: brain.id().to_string(),
2167 to: brain_policy.chain[current_chain_idx]
2168 .id()
2169 .to_string(),
2170 reason: "empty response".into(),
2171 });
2172 continue_agent_loop = true;
2173 break;
2174 }
2175 }
2176 if !assistant_text.trim().is_empty() {
2177 produced_any_output = true;
2178 let mut blocks = Vec::new();
2179 if !reasoning_buf.is_empty() {
2180 blocks.push(ContentBlock::Reasoning {
2181 text: reasoning_buf.clone(),
2182 });
2183 }
2184 blocks.push(ContentBlock::Text {
2185 text: assistant_text.clone(),
2186 });
2187 let assistant_msg = Msg {
2188 role: "assistant".into(),
2189 content: blocks,
2190 };
2191 let turn_messages = vec![assistant_msg.clone()];
2192 let has_verified_tool_context =
2193 tool_output_seen_this_completion
2194 || messages.iter().any(|m| {
2195 m.content.iter().any(|block| {
2196 matches!(
2197 block,
2198 ContentBlock::ToolResult { .. }
2199 )
2200 })
2201 });
2202
2203 if let Some(correction) = self.reasoning.guard_turn(
2204 &turn_messages,
2205 has_verified_tool_context,
2206 ) {
2207 messages.push(assistant_msg);
2208 let _ = event_tx.send(Event::Message {
2209 run: run_id.clone(),
2210 role: "guard".into(),
2211 text: correction.clone(),
2212 });
2213 messages.push(Msg {
2214 role: "user".into(),
2215 content: vec![ContentBlock::Text {
2216 text: format!("SYSTEM: {}. Execute the relevant tool first, then report the actual raw result.", correction),
2217 }],
2218 });
2219 continue_agent_loop = true;
2220 break;
2221 }
2222
2223 if self.reasoning.hallucination_guard {
2227 if let Some(correction) =
2228 crate::reasoning::HallucinationGuard::verify(
2229 &assistant_text,
2230 &tools_called_this_turn,
2231 )
2232 {
2233 let mut blocks2 = Vec::new();
2234 if !reasoning_buf.is_empty() {
2235 blocks2.push(ContentBlock::Reasoning {
2236 text: reasoning_buf.clone(),
2237 });
2238 }
2239 blocks2.push(ContentBlock::Text {
2240 text: assistant_text.clone(),
2241 });
2242 let assistant_msg2 = Msg {
2243 role: "assistant".into(),
2244 content: blocks2,
2245 };
2246 messages.push(assistant_msg2);
2247 let _ = event_tx.send(Event::Message {
2248 run: run_id.clone(),
2249 role: "guard".into(),
2250 text: correction.clone(),
2251 });
2252 messages.push(Msg {
2253 role: "user".into(),
2254 content: vec![ContentBlock::Text {
2255 text: format!("SYSTEM: {}. Call fs_read or search to verify the file/symbol first, then re-state the claim with the raw evidence.", correction),
2256 }],
2257 });
2258 continue_agent_loop = true;
2259 break;
2260 }
2261 }
2262
2263 skill_evidence.push_str(&assistant_text);
2264 skill_evidence.push('\n');
2265 messages.push(assistant_msg);
2266 }
2267
2268 if had_mutation
2274 && self.reasoning.self_critique
2275 && !diffs.is_empty()
2276 {
2277 let review =
2278 crate::reasoning::SelfCritique::pre_mutation_review(
2279 &diffs,
2280 Some(&task.description),
2281 );
2282 let _ = event_tx.send(Event::Message {
2283 run: run_id.clone(),
2284 role: "self-critique".into(),
2285 text: review,
2286 });
2287 }
2288
2289 if had_mutation && verify_attempts < MAX_VERIFY_ATTEMPTS {
2295 if let Some(verify_cmd) =
2296 self.config.defaults.verify_command.clone()
2297 {
2298 verify_attempts += 1;
2299 had_mutation = false;
2300 let parts: Vec<String> = verify_cmd
2301 .split_whitespace()
2302 .map(String::from)
2303 .collect();
2304 if !parts.is_empty() {
2305 let _ = event_tx.send(Event::AgentStatus {
2306 run: run_id.clone(),
2307 role: "verifier".into(),
2308 status: AgentStatus::Working,
2309 note: format!("running `{}`", verify_cmd),
2310 });
2311 let cmd = crate::sandbox::Command {
2312 program: parts[0].clone(),
2313 args: parts[1..].to_vec(),
2314 env: std::collections::HashMap::new(),
2315 workdir: workspace.root.clone(),
2316 };
2317 let limits = crate::sandbox::Limits {
2318 timeout_ms: 300_000,
2319 max_output_bytes: 16_000,
2320 };
2321 match workspace
2322 .sandbox
2323 .exec(&cmd, &limits)
2324 .await
2325 {
2326 Ok(res) if res.exit_code != 0 => {
2327 let _ = event_tx.send(Event::TestResult {
2328 run: run_id.clone(),
2329 passed: 0,
2330 failed: 1,
2331 detail: format!(
2332 "verify `{}` failed (exit {})",
2333 verify_cmd, res.exit_code
2334 ),
2335 });
2336 let out = format!(
2337 "{}\n{}",
2338 res.stdout, res.stderr
2339 );
2340 let tail: String = out
2341 .lines()
2342 .rev()
2343 .take(40)
2344 .collect::<Vec<_>>()
2345 .into_iter()
2346 .rev()
2347 .collect::<Vec<_>>()
2348 .join("\n");
2349 messages.push(Msg {
2350 role: "user".into(),
2351 content: vec![ContentBlock::Text {
2352 text: format!(
2353 "SYSTEM: verification command `{}` FAILED (exit {}). Fix the code, then it will be re-verified. Output:\n{}",
2354 verify_cmd, res.exit_code, tail
2355 ),
2356 }],
2357 });
2358 continue_agent_loop = true;
2359 break;
2360 }
2361 Ok(_) => {
2362 let _ =
2363 event_tx.send(Event::TestResult {
2364 run: run_id.clone(),
2365 passed: 1,
2366 failed: 0,
2367 detail: format!(
2368 "verify `{}` passed",
2369 verify_cmd
2370 ),
2371 });
2372 }
2373 Err(e) => {
2374 let _ = event_tx.send(Event::Message {
2375 run: run_id.clone(),
2376 role: "guard".into(),
2377 text: format!(
2378 "verify command could not run: {}",
2379 e
2380 ),
2381 });
2382 }
2383 }
2384 }
2385 }
2386 }
2387 }
2388 crate::event::StopReason::ToolUse => {
2389 let drained: Vec<_> =
2402 std::mem::take(&mut tool_results_pending);
2403
2404 let mut assistant_blocks = Vec::new();
2405 if !reasoning_buf.is_empty() {
2406 assistant_blocks.push(ContentBlock::Reasoning {
2407 text: reasoning_buf.clone(),
2408 });
2409 }
2410 for (tool_id, tool_name, args, _content, _is_error) in
2411 &drained
2412 {
2413 assistant_blocks.push(ContentBlock::ToolUse {
2414 id: tool_id.clone(),
2415 name: tool_name.clone(),
2416 input: args.clone(),
2417 });
2418 }
2419 messages.push(Msg {
2420 role: "assistant".into(),
2421 content: assistant_blocks,
2422 });
2423
2424 for (tool_id, _tool_name, _args, content, is_error) in
2425 drained
2426 {
2427 messages.push(Msg {
2428 role: "user".into(),
2429 content: vec![ContentBlock::ToolResult {
2430 tool_use_id: tool_id,
2431 content,
2432 is_error: Some(is_error),
2433 }],
2434 });
2435 }
2436 if tool_output_seen_this_completion {
2437 produced_any_output = true;
2438 }
2439 continue_agent_loop =
2440 !waiting_for_approval && !stop_after_tool_result;
2441 break;
2442 }
2443 _ => {}
2444 }
2445 break; }
2447 BrainEvent::Error(msg) => {
2448 let _ = event_tx.send(Event::Error {
2449 run: run_id.clone(),
2450 message: msg.clone(),
2451 });
2452 let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
2453 let next_idx = current_chain_idx + 1;
2454 if next_idx < brain_policy.chain.len() {
2455 current_chain_idx = next_idx;
2456 let switch_ctx = format!(
2457 "{} -> {}",
2458 brain.id(),
2459 brain_policy.chain[current_chain_idx].id()
2460 );
2461 let _ = event_tx.send(Event::ModelSwitched {
2462 run: run_id.clone(),
2463 from: brain.id().to_string(),
2464 to: brain_policy.chain[current_chain_idx].id().to_string(),
2465 reason: msg,
2466 });
2467 let _ = self
2468 .hooks
2469 .execute(&HookEvent::OnModelSwitched, &switch_ctx)
2470 .await;
2471 continue_agent_loop = true;
2472 } else {
2473 had_error = true;
2474 last_error = Some(msg);
2475 }
2476 break;
2477 }
2478 }
2479 }
2480
2481 if !continue_agent_loop && !had_error {
2486 let this_empty =
2487 assistant_text.trim().is_empty() && !tool_output_seen_this_completion;
2488 if this_empty && !produced_any_output {
2489 let next_idx = current_chain_idx + 1;
2490 if next_idx < brain_policy.chain.len() {
2491 let _ = event_tx.send(Event::ModelSwitched {
2492 run: run_id.clone(),
2493 from: brain.id().to_string(),
2494 to: brain_policy.chain[next_idx].id().to_string(),
2495 reason: "empty response".into(),
2496 });
2497 current_chain_idx = next_idx;
2498 continue;
2499 }
2500 }
2501 }
2502
2503 if continue_agent_loop {
2504 continue;
2505 }
2506 break; }
2508 Err(e) => {
2509 let err_msg = format!("{}", e);
2510 let _ = event_tx.send(Event::Error {
2511 run: run_id.clone(),
2512 message: err_msg.clone(),
2513 });
2514
2515 let next_idx = current_chain_idx + 1;
2517 if next_idx < brain_policy.chain.len() {
2518 current_chain_idx = next_idx;
2519 let _ = event_tx.send(Event::ModelSwitched {
2520 run: run_id.clone(),
2521 from: brain.id().to_string(),
2522 to: brain_policy.chain[current_chain_idx].id().to_string(),
2523 reason: err_msg,
2524 });
2525 } else {
2526 had_error = true;
2527 last_error = Some(err_msg);
2528 break;
2529 }
2530 }
2531 }
2532 }
2533
2534 let final_input = if total_input > 0 {
2536 total_input
2537 } else {
2538 total_input + estimated_input_unconfirmed
2539 };
2540 let final_output = if total_output > 0 {
2541 total_output
2542 } else {
2543 total_output + estimated_output_unconfirmed
2544 };
2545 let _ = event_tx.send(Event::TokenUsage {
2546 run: run_id.clone(),
2547 input: final_input,
2548 output: final_output,
2549 });
2550 let _ = event_tx.send(Event::AgentStatus {
2552 run: run_id.clone(),
2553 role: "coder".into(),
2554 status: AgentStatus::Done,
2555 note: format!("completed · {}↑ {}↓ tok", final_input, final_output),
2556 });
2557
2558 let outcome = OutcomeSummary {
2559 status: if had_error {
2560 format!(
2561 "error: {}",
2562 last_error.unwrap_or_else(|| "run failed".into())
2563 )
2564 } else if waiting_for_approval {
2565 "waiting_for_approval".into()
2566 } else if denied_by_approval {
2567 "denied".into()
2568 } else {
2569 "completed".into()
2570 },
2571 diffs,
2572 cost_usd: cost_usd + estimated_cost_unconfirmed,
2573 tokens: TokenUsage {
2574 input: total_input + estimated_input_unconfirmed,
2575 output: total_output + estimated_output_unconfirmed,
2576 },
2577 };
2578
2579 if let Some(mem) = &self.memory {
2581 let _ = mem.save_task(&crate::memory::TaskMem {
2582 run_id: run_id.0.clone(),
2583 messages: messages.clone(),
2584 created_at: chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(),
2585 });
2586 }
2587
2588 if outcome.status == "completed" {
2590 if let Some(skills) = &self.skills {
2591 if let Some(candidate) = Curator::propose_skill_if_missing(
2592 &task.description,
2593 &skill_evidence,
2594 skills.as_ref(),
2595 ) {
2596 let skill_name = candidate.name.clone();
2597 let _ = event_tx.send(Event::SkillLearned {
2598 run: run_id.clone(),
2599 name: skill_name.clone(),
2600 });
2601 let _ = self
2602 .hooks
2603 .execute(&HookEvent::OnSkillLearned, &skill_name)
2604 .await;
2605 let _ = skills.add(candidate);
2606 }
2607 }
2608
2609 if let Some(mem) = &self.memory {
2614 let events = events_from_messages(&run_id, &messages);
2615 Distiller::distill(mem, &events, &task.description).await;
2616 }
2617 }
2618
2619 let _ = event_tx.send(Event::RunFinished {
2620 run: run_id.clone(),
2621 outcome: outcome.clone(),
2622 });
2623
2624 let _ = self
2626 .hooks
2627 .execute(&HookEvent::PostRun, &task.description)
2628 .await;
2629
2630 Ok(outcome)
2631 }
2632}
2633
2634#[cfg(test)]
2635mod tests {
2636 use super::*;
2637
2638 #[test]
2639 fn initial_user_content_blocks_embeds_uploaded_images() {
2640 let tmp = tempfile::tempdir().expect("tempdir");
2641 let image = tmp.path().join("shot.png");
2642 std::fs::write(
2643 &image,
2644 [
2645 0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n', 0, 0, 0, 0,
2646 ],
2647 )
2648 .expect("write image");
2649 let description = format!(
2650 "analyse this\n\n[Attached files]\n### file: shot.png\n[uploaded: {}]",
2651 image.display()
2652 );
2653
2654 let blocks = initial_user_content_blocks(tmp.path(), &description);
2655 assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
2656 assert!(blocks.iter().any(|block| matches!(
2657 block,
2658 ContentBlock::Image {
2659 source: ImageSource::Base64 {
2660 media_type,
2661 data,
2662 }
2663 } if media_type == "image/png" && !data.is_empty()
2664 )));
2665 }
2666
2667 #[test]
2668 fn tool_result_content_blocks_preserves_images() {
2669 let blocks = tool_result_content_blocks(&[
2670 Block::Text("screenshot captured".into()),
2671 Block::Image {
2672 data: vec![1, 2, 3],
2673 mime: "image/png".into(),
2674 },
2675 ]);
2676
2677 assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
2678 assert!(blocks.iter().any(|block| matches!(
2679 block,
2680 ContentBlock::Image {
2681 source: ImageSource::Base64 {
2682 media_type,
2683 data,
2684 }
2685 } if media_type == "image/png" && data == "AQID"
2686 )));
2687 }
2688}