1use async_trait::async_trait;
2use futures::StreamExt;
3use serde_json::json;
4use std::path::PathBuf;
5use std::sync::Arc;
6use tokio::sync::mpsc;
7
8use crate::agent::AgentStore;
9use crate::autonomy::{AutonomyContract, Checkpoints, GitCheckpoints};
10use crate::capabilities::{Curator, SkillLibrary};
11use crate::config::Config;
12use crate::event::{
13 AgentStatus, AutonomyLevel, Block, Decision, Event, OutcomeSummary, RiskLevel, RunId,
14 TokenUsage,
15};
16use crate::extras::Distiller;
17use crate::hooks::{HookEvent, HookRegistry};
18use crate::instructions::InstructionDoc;
19use crate::memory::{Fact, Memory, MemoryDoc, MemoryDocKind};
20use crate::permissions::PermissionContext;
21use crate::provider::{
22 Brain, BrainEvent, BrainRequest, ContentBlock, ImageSource, Msg, PromptCacheConfig, ToolSpec,
23};
24use crate::reasoning::ReasoningEngine;
25use crate::redaction::RedactionFilter;
26use crate::router::{BudgetState, Router, TaskTier};
27use crate::sandbox::Sandbox;
28use crate::tools::{ToolCtx, ToolRegistry};
29
30pub mod scorer;
31pub mod treesitter;
32
33#[derive(Debug, Clone)]
36pub struct Identity {
37 pub name: String,
38 pub role: String,
39 pub personality: String,
40}
41
42impl Default for Identity {
43 fn default() -> Self {
44 Self {
45 name: "sparrow".into(),
46 role: "software engineer".into(),
47 personality: "concise, competent, helpful".into(),
48 }
49 }
50}
51
52pub struct BrainPolicy {
55 pub chain: Vec<Arc<dyn Brain>>,
57 pub current_index: usize,
58}
59
60impl BrainPolicy {
61 pub fn current(&self) -> Option<Arc<dyn Brain>> {
62 self.chain.get(self.current_index).cloned()
63 }
64
65 pub fn next(&mut self) -> Option<Arc<dyn Brain>> {
66 self.current_index += 1;
67 self.current()
68 }
69}
70
71pub struct Workspace {
74 pub root: PathBuf,
75 pub sandbox: Arc<dyn Sandbox>,
76}
77
78pub struct AgentRun {
81 pub id: RunId,
82 pub identity: Identity,
83 pub brain_policy: BrainPolicy,
84 pub autonomy: AutonomyContract,
85 pub tools: Arc<ToolRegistry>,
86 pub workspace: Workspace,
87}
88
89fn estimate_text_tokens(text: &str) -> u64 {
90 let chars = text.chars().count() as u64;
91 ((chars + 3) / 4).max(1)
92}
93
94fn estimate_content_tokens(blocks: &[ContentBlock]) -> u64 {
95 blocks
96 .iter()
97 .map(|block| match block {
98 ContentBlock::Text { text } => estimate_text_tokens(text),
99 ContentBlock::Image { source } => match source {
100 crate::provider::ImageSource::Base64 { data, .. } => {
101 256 + estimate_text_tokens(data).min(2_000)
102 }
103 crate::provider::ImageSource::Url { url } => 256 + estimate_text_tokens(url),
104 },
105 ContentBlock::ToolUse { name, input, .. } => {
106 estimate_text_tokens(name) + estimate_text_tokens(&input.to_string())
107 }
108 ContentBlock::ToolResult { content, .. } => 8 + estimate_content_tokens(content),
109 ContentBlock::Reasoning { text } => estimate_text_tokens(text),
110 })
111 .sum()
112}
113
114fn estimate_request_tokens(req: &BrainRequest) -> u64 {
115 let system = req.system.as_deref().map(estimate_text_tokens).unwrap_or(0);
116 let messages: u64 = req
117 .messages
118 .iter()
119 .map(|msg| estimate_text_tokens(&msg.role) + estimate_content_tokens(&msg.content) + 4)
120 .sum();
121 let tools: u64 = req
122 .tools
123 .iter()
124 .map(|tool| {
125 estimate_text_tokens(&tool.name)
126 + estimate_text_tokens(&tool.description)
127 + estimate_text_tokens(&tool.input_schema.to_string())
128 })
129 .sum();
130 system + messages + tools
131}
132
133fn base64_encode(data: &[u8]) -> String {
134 const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
135 let mut out = String::with_capacity(data.len().div_ceil(3) * 4);
136 for chunk in data.chunks(3) {
137 let b0 = chunk[0] as u32;
138 let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 };
139 let b2 = if chunk.len() > 2 { chunk[2] as u32 } else { 0 };
140 let triple = (b0 << 16) | (b1 << 8) | b2;
141 out.push(CHARS[((triple >> 18) & 63) as usize] as char);
142 out.push(CHARS[((triple >> 12) & 63) as usize] as char);
143 out.push(if chunk.len() > 1 {
144 CHARS[((triple >> 6) & 63) as usize] as char
145 } else {
146 '='
147 });
148 out.push(if chunk.len() > 2 {
149 CHARS[(triple & 63) as usize] as char
150 } else {
151 '='
152 });
153 }
154 out
155}
156
157fn image_block_from_path(path: &std::path::Path) -> Option<ContentBlock> {
158 let mime = mime_guess::from_path(path).first_or_octet_stream();
159 if !mime.type_().as_str().eq_ignore_ascii_case("image") {
160 return None;
161 }
162 let data = std::fs::read(path).ok()?;
163 Some(ContentBlock::Image {
164 source: ImageSource::Base64 {
165 media_type: mime.to_string(),
166 data: base64_encode(&data),
167 },
168 })
169}
170
171fn collect_uploaded_paths(description: &str) -> Vec<String> {
172 let mut paths = Vec::new();
173 for line in description.lines() {
174 let Some(idx) = line.find("uploaded:") else {
175 continue;
176 };
177 let rest = line[idx + "uploaded:".len()..].trim();
178 let path = rest
179 .strip_prefix('[')
180 .unwrap_or(rest)
181 .split(']')
182 .next()
183 .unwrap_or(rest)
184 .trim()
185 .trim_matches('"')
186 .trim_matches('\'');
187 if !path.is_empty() {
188 paths.push(path.to_string());
189 }
190 }
191 paths
192}
193
194fn initial_user_content_blocks(
195 workspace_root: &std::path::Path,
196 description: &str,
197) -> Vec<ContentBlock> {
198 let mut blocks = vec![ContentBlock::Text {
199 text: description.to_string(),
200 }];
201 let mut seen = std::collections::HashSet::new();
202 for raw_path in collect_uploaded_paths(description) {
203 let path = std::path::PathBuf::from(&raw_path);
204 let full_path = if path.is_absolute() {
205 path
206 } else {
207 workspace_root.join(path)
208 };
209 if !seen.insert(full_path.clone()) {
210 continue;
211 }
212 if let Some(block) = image_block_from_path(&full_path) {
213 blocks.push(block);
214 }
215 }
216 blocks
217}
218
219pub fn summarize_model_chain(chain_ids: &[String], limit: usize) -> String {
220 if chain_ids.is_empty() {
221 return "aucun modèle disponible".into();
222 }
223 let limit = limit.max(1);
224 let mut visible: Vec<String> = chain_ids.iter().take(limit).cloned().collect();
225 if chain_ids.len() > limit {
226 visible.push(format!("+{} autres fallbacks", chain_ids.len() - limit));
227 }
228 visible.join(" -> ")
229}
230
231fn prompt_cache_key(scope: &str, workspace_root: &std::path::Path, tools: &[ToolSpec]) -> String {
232 use std::hash::{Hash, Hasher};
233
234 let mut hasher = std::collections::hash_map::DefaultHasher::new();
235 scope.hash(&mut hasher);
236 workspace_root.display().to_string().hash(&mut hasher);
237 for tool in tools {
238 tool.name.hash(&mut hasher);
239 tool.description.hash(&mut hasher);
240 tool.input_schema.to_string().hash(&mut hasher);
241 }
242 format!("sparrow-{}-{:016x}", scope, hasher.finish())
243}
244
245fn build_system_prompt(
248 identity: &Identity,
249 workspace_root: &PathBuf,
250 facts: &[Fact],
251 memory_docs: &[MemoryDoc],
252 instruction_docs: &[InstructionDoc],
253 skills: &[crate::capabilities::Skill],
254) -> String {
255 let mut parts = vec![format!(
256 r#"You are {name}, a {role}.
257
258Personality: {personality}
259
260You are working in the workspace: {workspace}
261You have access to tools to read, write, edit, search, and execute code.
262Always use absolute or relative paths from the workspace root.
263Be concise and direct. When making edits, use exact string replacements.
264Before making changes, read the relevant files first to understand the codebase.
265
266You are not a standalone chat model. You are the Sparrow agent surface backed by an
267external routing engine. Sparrow's core feature is automatic model routing: every
268task is classified by tier, tool need, vision need, local preference, budget, and
269provider availability, then a ranked fallback chain of models is selected before
270this answer starts. If the user asks how routing works, explain Sparrow's actual
271pipeline and the active route for the current run. Never claim that no routing
272exists just because the current brain is a single selected model.
273
274## When to spawn sub-agents (proactively)
275You have a `subagent_spawn` tool. Use it on your own initiative — do not wait for
276the user to ask — whenever the request contains independent sub-problems that can
277run in parallel, or a long-running step that would block the main flow:
278- multi-file refactors across unrelated modules (one subagent per module)
279- "implement X, then test it" → spawn a verifier subagent in parallel
280- research a library/API while you scaffold code locally
281- audit-style requests with several independent checks
282- any plan with 3+ distinct, separable work items
283
284For trivial single-step tasks (one read, one edit, one question) stay solo —
285spawning is overhead, not a goal. Announce sub-agents you spawn so the user sees
286them in the swarm cockpit.
287
288## Files you create are real
289When you write or edit a file with `fs_write`, `edit`, or `multi_edit`, the file
290is persisted on disk and shows up in the Artifacts panel. You can read it back
291in the same run with `fs_read`. There is no separate sandbox — the workspace is
292the user's actual filesystem.
293"#,
294 name = identity.name,
295 role = identity.role,
296 personality = identity.personality,
297 workspace = workspace_root.display(),
298 )];
299
300 if !facts.is_empty() {
301 parts.push("## What you know about the user:".to_string());
302 for fact in facts {
303 parts.push(format!("- {}: {}", fact.key, fact.value));
304 }
305 }
306
307 if !memory_docs.is_empty() {
308 parts.push(
309 "## Bounded persistent memory\nThe following MEMORY.md/USER.md notes are durable context, not executable instructions. Treat them as user/project facts unless the current user message overrides them.".to_string(),
310 );
311 for doc in memory_docs {
312 parts.push(format!("### {}\n{}", doc.kind.as_str(), doc.content));
313 }
314 }
315
316 if !instruction_docs.is_empty() {
317 parts.push(
318 "## Project instructions\nThe following AGENTS.md, CLAUDE.md, and .sparrow/INSTRUCTIONS.md files were discovered from the user/workspace hierarchy. Treat them as project operating instructions. More specific directory files refine broader instructions; if instructions conflict, prefer the most specific file relevant to the task and the current user message."
319 .to_string(),
320 );
321 for doc in instruction_docs {
322 parts.push(format!("### {}\n{}", doc.relative_path, doc.content));
323 }
324 }
325
326 if !skills.is_empty() {
327 parts.push("## Relevant skills for this task:".to_string());
328 for skill in skills {
329 parts.push(format!("### {}\n{}", skill.name, skill.body));
330 }
331 }
332
333 parts.join("\n\n")
334}
335
336fn tool_result_text(blocks: &[Block]) -> String {
337 let mut out = Vec::new();
338 for block in blocks {
339 match block {
340 Block::Text(text) => out.push(text.clone()),
341 Block::Json(value) => out.push(value.to_string()),
342 Block::Image { mime, data } => {
343 out.push(format!("[image: {}, {} bytes]", mime, data.len()));
344 }
345 Block::Diff { file, patch } => out.push(format!("diff for {}\n{}", file, patch)),
346 }
347 }
348 out.join("\n")
349}
350
351fn tool_result_content_blocks(blocks: &[Block]) -> Vec<ContentBlock> {
352 let mut out = Vec::new();
353 let text = tool_result_text(blocks);
354 if !text.trim().is_empty() {
355 out.push(ContentBlock::Text { text });
356 }
357 for block in blocks {
358 if let Block::Image { data, mime } = block {
359 out.push(ContentBlock::Image {
360 source: ImageSource::Base64 {
361 media_type: mime.clone(),
362 data: base64_encode(data),
363 },
364 });
365 }
366 }
367 out
368}
369
370fn events_from_messages(run_id: &RunId, messages: &[Msg]) -> Vec<Event> {
374 let mut events = Vec::new();
375 for msg in messages {
376 for block in &msg.content {
377 match block {
378 ContentBlock::ToolUse { name, input, .. } => {
379 events.push(Event::ToolUseProposed {
380 run: run_id.clone(),
381 id: String::new(),
382 name: name.clone(),
383 args: input.clone(),
384 risk: RiskLevel::ReadOnly,
385 });
386 }
387 ContentBlock::Text { text } if msg.role == "assistant" => {
388 events.push(Event::ThinkingDelta {
389 run: run_id.clone(),
390 text: text.clone(),
391 });
392 }
393 _ => {}
394 }
395 }
396 }
397 events
398}
399
400#[derive(Debug, Clone)]
403pub struct Task {
404 pub description: String,
405 pub context: Vec<Msg>,
406}
407
408pub struct Engine {
411 router: Arc<dyn Router>,
412 config: Config,
413 identity: Option<Identity>,
414 memory: Option<Arc<dyn Memory>>,
415 skills: Option<Arc<dyn SkillLibrary>>,
416 redaction: RedactionFilter,
417 approval_handler: Option<Arc<dyn ApprovalHandler>>,
418 reasoning: ReasoningEngine,
419 hooks: HookRegistry,
420 agent_store: Option<Arc<dyn AgentStore>>,
421 org_policy: Option<crate::onboarding::enterprise::OrgPolicy>,
422 classify_cache: std::sync::Mutex<std::collections::HashMap<u64, crate::router::TaskTier>>,
424}
425
426#[derive(Debug, Clone)]
427pub struct ApprovalRequest {
428 pub run: RunId,
429 pub id: String,
430 pub tool_name: String,
431 pub risk: RiskLevel,
432 pub args: serde_json::Value,
433 pub summary: String,
434}
435
436#[async_trait]
437pub trait ApprovalHandler: Send + Sync {
438 async fn request_approval(&self, request: ApprovalRequest) -> Decision;
439}
440
441impl Engine {
442 pub fn new(router: Arc<dyn Router>, config: Config) -> Self {
443 let mut hooks = HookRegistry::new(Arc::new(crate::sandbox::LocalSandbox::new(
444 std::env::current_dir().unwrap_or_default(),
445 )));
446 hooks.load(config.hooks.clone());
447 Self {
448 router,
449 config,
450 identity: None,
451 memory: None,
452 skills: None,
453 redaction: RedactionFilter::new(),
454 approval_handler: None,
455 reasoning: ReasoningEngine::default(),
456 hooks,
457 agent_store: None,
458 org_policy: None,
459 classify_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
460 }
461 }
462
463 pub fn with_memory(mut self, memory: Arc<dyn Memory>) -> Self {
464 let secrets: Vec<String> = memory
466 .all_facts()
467 .iter()
468 .filter(|f| f.key.starts_with("secret:"))
469 .map(|f| f.value.clone())
470 .collect();
471 self.redaction.load_secrets(secrets);
472 self.memory = Some(memory);
473 self
474 }
475
476 pub fn with_skills(mut self, skills: Arc<dyn SkillLibrary>) -> Self {
477 self.skills = Some(skills);
478 self
479 }
480
481 pub fn with_identity(mut self, identity: Identity) -> Self {
482 self.identity = Some(identity);
483 self
484 }
485
486 pub fn with_agent_store(mut self, store: Arc<dyn AgentStore>) -> Self {
487 self.agent_store = Some(store);
488 self
489 }
490
491 pub fn with_org_policy(mut self, policy: crate::onboarding::enterprise::OrgPolicy) -> Self {
492 self.org_policy = Some(policy);
493 self
494 }
495
496 pub fn with_hooks_config(mut self, hooks: Vec<crate::hooks::Hook>) -> Self {
497 self.hooks.load(hooks);
498 self
499 }
500
501 pub fn with_approval_handler(mut self, approval_handler: Arc<dyn ApprovalHandler>) -> Self {
502 self.approval_handler = Some(approval_handler);
503 self
504 }
505
506 fn classify_with_confidence(&self, task: &str) -> (TaskTier, bool) {
511 let lower = task.to_lowercase();
512 if lower.contains("vision") || lower.contains("image") || lower.contains("screenshot") {
513 (TaskTier::Vision, false)
514 } else if lower.contains("architecture")
515 || lower.contains("refactor")
516 || lower.contains("audit")
517 || lower.contains("répare")
518 || lower.contains("repare")
519 || lower.contains("livrer")
520 || lower.contains("v1")
521 {
522 (TaskTier::Hard, false)
523 } else if lower.contains("bug")
524 || lower.contains("fix")
525 || lower.contains("corrige")
526 || lower.contains("debug")
527 {
528 (TaskTier::Small, false)
529 } else if lower.contains("routing")
530 || lower.contains("routeur")
531 || lower.contains("modèle")
532 || lower.contains("modele")
533 || lower.contains("model")
534 || lower.contains("sélectionne")
535 || lower.contains("selectionne")
536 {
537 (TaskTier::Small, false)
538 } else if lower.len() < 80 {
539 (TaskTier::Trivial, true)
541 } else {
542 (TaskTier::Medium, true)
543 }
544 }
545
546 async fn classify_via_brain(&self, task: &str, brain: &dyn Brain) -> Option<TaskTier> {
549 let req = BrainRequest {
550 system: Some(
551 "You are a task classifier. Output exactly one word: trivial, small, medium, hard, or vision."
552 .into(),
553 ),
554 messages: vec![Msg {
555 role: "user".into(),
556 content: vec![ContentBlock::Text {
557 text: format!(
558 "Classify this coding task into exactly one tier (trivial, small, medium, hard, vision):\n\n{}\n\nTier:",
559 task
560 ),
561 }],
562 }],
563 tools: vec![],
564 max_tokens: 6,
565 temperature: 0.0,
566 stop: vec![],
567 cache: PromptCacheConfig::disabled(),
568 };
569 let mut stream = brain.complete(req).await.ok()?;
570 let mut out = String::new();
571 while let Some(ev) = stream.next().await {
572 match ev {
573 BrainEvent::TextDelta(t) => out.push_str(&t),
574 BrainEvent::Done(_) => break,
575 BrainEvent::Error(_) => return None,
576 _ => {}
577 }
578 }
579 let word = out.trim().to_lowercase();
580 let word = word.split_whitespace().next().unwrap_or("");
581 match word {
582 "trivial" => Some(TaskTier::Trivial),
583 "small" => Some(TaskTier::Small),
584 "medium" => Some(TaskTier::Medium),
585 "hard" => Some(TaskTier::Hard),
586 "vision" => Some(TaskTier::Vision),
587 _ => None,
588 }
589 }
590
591 fn task_summary(&self, task: &str, tier: &TaskTier) -> String {
592 let lower = task.to_lowercase();
593 if lower.contains("routing")
594 || lower.contains("routeur")
595 || lower.contains("modèle")
596 || lower.contains("modele")
597 || lower.contains("model")
598 {
599 "question meta sur le routing modele".into()
600 } else if lower.contains("code") || lower.contains("bug") || lower.contains("fix") {
601 format!("requete code/{:?}", tier).to_lowercase()
602 } else if lower.contains("config") || lower.contains("provider") {
603 "configuration provider/modele".into()
604 } else {
605 format!("requete {:?}", tier).to_lowercase()
606 }
607 }
608
609 fn is_routing_question(&self, task: &str) -> bool {
610 let lower = task.to_lowercase();
611 (lower.contains("routing") || lower.contains("routeur") || lower.contains("route"))
612 && (lower.contains("modèle") || lower.contains("modele") || lower.contains("model"))
613 || lower.contains("sélectionne tu le model")
614 || lower.contains("selectionne tu le model")
615 }
616
617 fn requires_tools(&self, task: &str, tier: &TaskTier) -> bool {
618 let lower = task.to_lowercase();
619 let tool_keywords = [
620 "outil",
621 "tools",
622 "fichier",
623 "file",
624 "readme",
625 ".rs",
626 ".ts",
627 ".js",
628 ".html",
629 ".md",
630 "repo",
631 "dossier",
632 "workspace",
633 "git",
634 "test",
635 "build",
636 "cargo",
637 "npm",
638 "pnpm",
639 "corrige",
640 "fix",
641 "debug",
642 "bug",
643 "répare",
644 "repare",
645 "modifie",
646 "édite",
647 "edite",
648 "ajoute",
649 "supprime",
650 "écris",
651 "ecris",
652 "write",
653 "create",
654 "crée",
655 "cree",
656 "audit",
657 ];
658
659 if tool_keywords.iter().any(|kw| lower.contains(kw)) {
660 return true;
661 }
662
663 matches!(tier, TaskTier::Medium | TaskTier::Hard | TaskTier::Vision)
664 }
665
666 fn requires_vision(&self, task: &str, tier: &TaskTier) -> bool {
667 let lower = task.to_lowercase();
668 matches!(tier, TaskTier::Vision)
669 || [
670 "image",
671 "screenshot",
672 "capture",
673 "photo",
674 "vision",
675 "logo",
676 "visuel",
677 "interface graphique",
678 ]
679 .iter()
680 .any(|kw| lower.contains(kw))
681 }
682
683 fn routing_explanation(
684 &self,
685 tier: &TaskTier,
686 need: &crate::router::RoutingNeed,
687 chain_ids: &[String],
688 ) -> String {
689 let chain = summarize_model_chain(chain_ids, 5);
690 format!(
691 "Je suis Sparrow, donc je ne réponds pas comme un modèle isolé: avant chaque run, mon routeur classe ta demande puis choisit une chaîne de modèles.\n\nPour cette requête, j'ai détecté: tier `{}` · tools `{}` · vision `{}` · local `{}`.\n\nJe sélectionne ensuite le modèle avec ces critères: adéquation aux capacités demandées, support des tools, besoin vision, préférence local/free-first, budget restant, latence, taille de contexte, puis disponibilité provider. Le résultat est une fallback chain, pas un seul choix figé: `{}`.\n\nConcrètement: une question simple ou meta doit aller vers le modèle le moins coûteux capable de répondre; une tâche code complexe monte vers un modèle plus fort; une tâche avec fichiers/tools exige un modèle compatible tools; une tâche image demande vision; si un provider échoue, je bascule au suivant dans la chaîne.",
692 tier.as_str(),
693 need.required_tools,
694 need.required_vision,
695 need.prefer_local,
696 chain
697 )
698 }
699
700 async fn summarize_messages(&self, brain: &dyn Brain, middle: &[Msg]) -> Option<String> {
703 if middle.is_empty() {
704 return None;
705 }
706 let mut transcript = String::new();
708 for m in middle {
709 for block in &m.content {
710 match block {
711 ContentBlock::Text { text } => {
712 transcript.push_str(&format!("[{}] {}\n", m.role, text));
713 }
714 ContentBlock::ToolUse { name, .. } => {
715 transcript.push_str(&format!("[{}] (tool: {})\n", m.role, name));
716 }
717 ContentBlock::ToolResult { .. } => {
718 transcript.push_str(&format!("[{}] (tool result)\n", m.role));
719 }
720 _ => {}
721 }
722 }
723 }
724 if transcript.len() > 12_000 {
725 transcript.truncate(12_000);
726 }
727 let req = BrainRequest {
728 system: Some(
729 "Summarize this agent conversation in <=200 tokens. Preserve: files edited, \
730 decisions made, current state, and any unfinished work. Plain text only."
731 .into(),
732 ),
733 messages: vec![Msg {
734 role: "user".into(),
735 content: vec![ContentBlock::Text { text: transcript }],
736 }],
737 tools: vec![],
738 max_tokens: 300,
739 temperature: 0.0,
740 stop: vec![],
741 cache: PromptCacheConfig::disabled(),
742 };
743 let mut stream = brain.complete(req).await.ok()?;
744 let mut out = String::new();
745 while let Some(ev) = stream.next().await {
746 match ev {
747 BrainEvent::TextDelta(t) => out.push_str(&t),
748 BrainEvent::Done(_) => break,
749 BrainEvent::Error(_) => return None,
750 _ => {}
751 }
752 }
753 let out = out.trim().to_string();
754 if out.is_empty() { None } else { Some(out) }
755 }
756
757 pub async fn drive(
759 &self,
760 task: Task,
761 event_tx: mpsc::UnboundedSender<Event>,
762 ) -> anyhow::Result<OutcomeSummary> {
763 self.drive_with_run_id(task, event_tx, RunId::new()).await
764 }
765
766 pub async fn drive_with_run_id(
768 &self,
769 task: Task,
770 event_tx: mpsc::UnboundedSender<Event>,
771 run_id: RunId,
772 ) -> anyhow::Result<OutcomeSummary> {
773 self.drive_with_inject(task, event_tx, run_id, None).await
774 }
775
776 pub async fn drive_with_inject(
779 &self,
780 task: Task,
781 event_tx: mpsc::UnboundedSender<Event>,
782 run_id: RunId,
783 mut inject_rx: Option<mpsc::UnboundedReceiver<String>>,
784 ) -> anyhow::Result<OutcomeSummary> {
785 let model_override: Option<String>;
787 let clean_description: String;
788 if let Some(rest) = task.description.strip_prefix("__model:") {
789 if let Some(end) = rest.find("__ ") {
790 model_override = Some(rest[..end].to_string());
791 clean_description = rest[end + 3..].to_string();
792 } else {
793 model_override = None;
794 clean_description = task.description.clone();
795 }
796 } else {
797 model_override = None;
798 clean_description = task.description.clone();
799 }
800 let task = Task {
801 description: clean_description,
802 context: task.context,
803 };
804
805 let mut messages: Vec<Msg> = task.context.clone();
806
807 let (mut tier, ambiguous) = self.classify_with_confidence(&task.description);
809
810 let budget = BudgetState {
812 daily_limit_usd: self.config.budget.daily_usd,
813 daily_spent_usd: 0.0,
814 session_limit_usd: self.config.budget.session_usd,
815 session_spent_usd: 0.0,
816 };
817
818 let mut required_tools = self.requires_tools(&task.description, &tier);
819 let mut required_vision = self.requires_vision(&task.description, &tier);
820 let mut need = crate::router::RoutingNeed {
821 tier: tier.clone(),
822 required_tools,
823 required_vision,
824 prefer_local: false,
825 };
826
827 let mut chain = self.router.select(&need, &budget);
828
829 let router_ref = &self.router;
837 let apply_override = |chain: &mut Vec<Arc<dyn Brain>>| {
838 if let Some(ref override_id) = model_override {
839 let filtered: Vec<_> = chain
840 .iter()
841 .filter(|b| b.id() == override_id.as_str())
842 .cloned()
843 .collect();
844 if !filtered.is_empty() {
845 *chain = filtered;
846 } else if let Some(brain) = router_ref.find_brain_by_id(override_id) {
847 *chain = vec![brain];
848 }
849 }
850 };
851 apply_override(&mut chain);
852
853 if model_override.is_none()
861 && ambiguous
862 && matches!(tier, TaskTier::Medium)
863 && !self.is_routing_question(&task.description)
864 {
865 let desc_hash = {
867 use std::collections::hash_map::DefaultHasher;
868 use std::hash::{Hash, Hasher};
869 let mut h = DefaultHasher::new();
870 task.description.hash(&mut h);
871 h.finish()
872 };
873 let cached = {
874 self.classify_cache
875 .lock()
876 .ok()
877 .and_then(|c| c.get(&desc_hash).cloned())
878 };
879 let refined = match cached {
880 Some(t) => {
881 let _ = event_tx.send(Event::Message {
882 run: run_id.clone(),
883 role: "router".into(),
884 text: format!("classification (cached): {}", t.as_str()),
885 });
886 Some(t)
887 }
888 None => {
889 if let Some(brain) = chain.first().cloned() {
890 let result = self
891 .classify_via_brain(&task.description, brain.as_ref())
892 .await;
893 if let Some(r) = &result {
894 if let Ok(mut c) = self.classify_cache.lock() {
895 c.insert(desc_hash, r.clone());
896 }
897 }
898 result
899 } else {
900 None
901 }
902 }
903 };
904 if let Some(refined) = refined {
905 if std::mem::discriminant(&refined) != std::mem::discriminant(&tier) {
906 let _ = event_tx.send(Event::Message {
907 run: run_id.clone(),
908 role: "router".into(),
909 text: format!(
910 "classification affinée par modèle: {} → {}",
911 tier.as_str(),
912 refined.as_str()
913 ),
914 });
915 tier = refined;
916 required_tools = self.requires_tools(&task.description, &tier);
917 required_vision = self.requires_vision(&task.description, &tier);
918 need = crate::router::RoutingNeed {
919 tier: tier.clone(),
920 required_tools,
921 required_vision,
922 prefer_local: false,
923 };
924 chain = self.router.select(&need, &budget);
925 apply_override(&mut chain);
927 }
928 }
929 }
930
931 let task_summary = self.task_summary(&task.description, &tier);
932 let chain_ids: Vec<String> = chain.iter().map(|b| b.id().to_string()).collect();
933
934 let agent_name = self
935 .identity
936 .as_ref()
937 .map(|identity| identity.name.clone())
938 .unwrap_or_else(|| "sparrow".into());
939 let _ = event_tx.send(Event::RunStarted {
940 run: run_id.clone(),
941 task: task.description.clone(),
942 agent: agent_name,
943 });
944
945 let pre_run_results = self
948 .hooks
949 .execute(&HookEvent::PreRun, &task.description)
950 .await;
951 if let Some(reason) = pre_run_results
952 .iter()
953 .find(|r| r.veto)
954 .and_then(|r| r.veto_reason.clone())
955 {
956 let _ = event_tx.send(Event::Error {
957 run: run_id.clone(),
958 message: format!("PreRun hook vetoed run: {}", reason),
959 });
960 anyhow::bail!("PreRun hook vetoed run: {}", reason);
961 }
962
963 let _ = event_tx.send(Event::Message {
964 run: run_id.clone(),
965 role: "router".into(),
966 text: format!(
967 "requete: {} · tier: {} · tools: {} · vision: {} · local: {}",
968 task_summary,
969 tier.as_str(),
970 need.required_tools,
971 need.required_vision,
972 need.prefer_local
973 ),
974 });
975
976 let _ = event_tx.send(Event::AgentStatus {
977 run: run_id.clone(),
978 role: "planner".into(),
979 status: AgentStatus::Working,
980 note: format!("analyzing request · {} candidates", chain.len()),
981 });
982
983 let primary_ctx = chain
984 .first()
985 .map(|b| b.caps().context_window)
986 .unwrap_or(128_000);
987 let _ = event_tx.send(Event::RouteSelected {
988 run: run_id.clone(),
989 chain: chain_ids.clone(),
990 context_window: primary_ctx,
991 });
992 let _ = event_tx.send(Event::AgentStatus {
993 run: run_id.clone(),
994 role: "planner".into(),
995 status: AgentStatus::Done,
996 note: format!(
997 "route set · {} primary",
998 chain.first().map(|b| b.id()).unwrap_or("—")
999 ),
1000 });
1001
1002 if chain.is_empty() {
1003 let _ = event_tx.send(Event::Error {
1004 run: run_id.clone(),
1005 message: "No available models (budget exhausted or no providers configured)".into(),
1006 });
1007 return Ok(OutcomeSummary {
1008 status: "error: no models".into(),
1009 diffs: vec![],
1010 cost_usd: 0.0,
1011 tokens: TokenUsage {
1012 input: 0,
1013 output: 0,
1014 },
1015 });
1016 }
1017
1018 if self.is_routing_question(&task.description) {
1019 let text = self.routing_explanation(&tier, &need, &chain_ids);
1020 let input_tokens =
1021 estimate_text_tokens(&task.description) + estimate_text_tokens(&task_summary);
1022 let output_tokens = estimate_text_tokens(&text);
1023 let _ = event_tx.send(Event::TokenUsageEstimated {
1024 run: run_id.clone(),
1025 input: input_tokens,
1026 output: 0,
1027 reason: "router meta request estimate".into(),
1028 });
1029 let _ = event_tx.send(Event::TokenUsageEstimated {
1030 run: run_id.clone(),
1031 input: 0,
1032 output: output_tokens,
1033 reason: "router meta response estimate".into(),
1034 });
1035 let _ = event_tx.send(Event::ThinkingDelta {
1036 run: run_id.clone(),
1037 text: text.clone(),
1038 });
1039 let outcome = OutcomeSummary {
1040 status: "completed".into(),
1041 diffs: vec![],
1042 cost_usd: 0.0,
1043 tokens: TokenUsage {
1044 input: input_tokens,
1045 output: output_tokens,
1046 },
1047 };
1048 let _ = event_tx.send(Event::RunFinished {
1049 run: run_id.clone(),
1050 outcome: outcome.clone(),
1051 });
1052 return Ok(outcome);
1053 }
1054
1055 let workspace_root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1057 let sandbox: Arc<dyn Sandbox> = match self.config.defaults.sandbox.as_str() {
1058 "local-hardened" => Arc::new(crate::sandbox::LocalSandbox::hardened(
1059 workspace_root.clone(),
1060 )),
1061 "docker" => Arc::new(crate::sandbox::backends::DockerSandbox::new(
1062 workspace_root.clone(),
1063 "ubuntu:latest",
1064 )),
1065 s if s.starts_with("ssh:") => Arc::new(crate::sandbox::backends::SshSandbox::new(
1066 workspace_root.clone(),
1067 s.trim_start_matches("ssh:"),
1068 )),
1069 "modal" => Arc::new(crate::sandbox::backends::ModalSandbox::new(
1070 workspace_root.clone(),
1071 )),
1072 "daytona" => Arc::new(crate::sandbox::backends::DaytonaSandbox::new(
1073 workspace_root.clone(),
1074 )),
1075 "vercel" => Arc::new(crate::sandbox::backends::VercelSandbox::new(
1076 workspace_root.clone(),
1077 )),
1078 "singularity" => Arc::new(crate::sandbox::backends::SingularitySandbox::new(
1079 workspace_root.clone(),
1080 )),
1081 _ => Arc::new(crate::sandbox::LocalSandbox::new(workspace_root.clone())),
1082 };
1083
1084 let mut registry = ToolRegistry::new();
1085 registry.register(Arc::new(crate::tools::fs::FsRead));
1086 registry.register(Arc::new(crate::tools::fs::FsList));
1087 registry.register(Arc::new(crate::tools::fs::FsWrite));
1088 registry.register(Arc::new(crate::tools::edit::Edit));
1089 registry.register(Arc::new(crate::tools::edit::MultiEdit));
1090 registry.register(Arc::new(crate::tools::search_and_web::Search));
1091 registry.register(Arc::new(crate::tools::search_and_web::WebSearch));
1092 registry.register(Arc::new(crate::tools::search_and_web::WebFetch));
1093 registry.register(Arc::new(crate::tools::browser_sandbox::BrowserTool));
1094 registry.register(Arc::new(crate::tools::browser_sandbox::ComputerTool));
1095 registry.register(Arc::new(crate::tools::git::Git));
1096 registry.register(Arc::new(crate::tools::todo::Todo::new()));
1097 registry.register(Arc::new(crate::tools::exec::Exec::new(sandbox.clone())));
1098 registry.register(Arc::new(crate::tools::media::ImageGen::new()));
1099 registry.register(Arc::new(crate::tools::media::Tts::new()));
1100 registry.register(Arc::new(crate::tools::media::Transcribe::new()));
1101 registry.register(Arc::new(crate::tools::subagent::PythonRpc::new()));
1102 registry.register(Arc::new(crate::tools::builder_tools::LspClient));
1103 registry.register(Arc::new(crate::tools::code_nav::Glob));
1104 registry.register(Arc::new(crate::tools::code_nav::Symbols));
1105 if let Some(mem) = &self.memory {
1106 registry.register(Arc::new(crate::tools::memory::MemoryTool::new(mem.clone())));
1107 registry.register(Arc::new(
1108 crate::tools::knowledge_graph::KnowledgeGraphTool::new(mem.clone()),
1109 ));
1110 }
1111 {
1112 let mut sub = crate::tools::subagent::SubagentSpawn::new(
1114 self.router.clone(),
1115 self.config.clone(),
1116 );
1117 if let Some(mem) = &self.memory {
1118 sub = sub.with_memory(mem.clone());
1119 }
1120 registry.register(Arc::new(sub));
1121 }
1122 let tools = Arc::new(registry);
1123 let tool_specs: Vec<ToolSpec> = tools.to_specs();
1124
1125 let workspace = Workspace {
1126 root: workspace_root,
1127 sandbox,
1128 };
1129
1130 let identity = self.identity.clone().unwrap_or_else(|| Identity {
1131 name: "sparrow".into(),
1132 role: "senior software engineer".into(),
1133 personality: "concise, competent, direct".into(),
1134 });
1135
1136 let brain_policy = BrainPolicy {
1137 chain,
1138 current_index: 0,
1139 };
1140
1141 let mut autonomy = match self.config.defaults.autonomy {
1142 AutonomyLevel::Supervised => AutonomyContract::supervised(),
1143 AutonomyLevel::Trusted => AutonomyContract::trusted(),
1144 AutonomyLevel::Autonomous => AutonomyContract::autonomous(),
1145 };
1146 autonomy.budget.max_usd = self.config.budget.session_usd;
1147 let _ = event_tx.send(Event::AutonomyChanged {
1148 run: run_id.clone(),
1149 level: autonomy.level.clone(),
1150 });
1151
1152 let relevant_skills: Vec<crate::capabilities::Skill> = self
1154 .skills
1155 .as_ref()
1156 .map(|s| s.relevant(&task.description, 3))
1157 .unwrap_or_default();
1158
1159 let system = build_system_prompt(
1160 &identity,
1161 &workspace.root,
1162 &self
1163 .memory
1164 .as_ref()
1165 .map(|m| m.all_facts())
1166 .unwrap_or_default(),
1167 &self
1168 .memory
1169 .as_ref()
1170 .map(|m| {
1171 [MemoryDocKind::Memory, MemoryDocKind::User]
1172 .into_iter()
1173 .filter_map(|kind| m.memory_doc(kind))
1174 .collect::<Vec<_>>()
1175 })
1176 .unwrap_or_default(),
1177 &crate::instructions::discover_workspace_instructions(
1178 &workspace.root,
1179 &task.description,
1180 ),
1181 &relevant_skills,
1182 );
1183 let mut system = format!(
1184 "{}\n\n## Active Sparrow Routing Context\nRequest category: {}\nTask tier: {}\nRequired tools: {}\nRequired vision: {}\nPreferred local: {}\nSelected fallback chain: {}\nRouting policy: free_first={}, session_budget_usd={:.2}.\nWhen answering routing questions, describe this context concretely.",
1185 system,
1186 task_summary,
1187 tier.as_str(),
1188 need.required_tools,
1189 need.required_vision,
1190 need.prefer_local,
1191 summarize_model_chain(&chain_ids, 8),
1192 self.config.routing.free_first,
1193 self.config.budget.session_usd
1194 );
1195
1196 if !messages.is_empty() {
1200 system.push_str(
1201 "\n\n## Conversation continuity\nThis is an ONGOING conversation. The messages below are prior turns and are AUTHORITATIVE memory of what the user told you (names, preferences, facts, decisions). Use them directly; never re-introduce yourself or contradict them.",
1202 );
1203 }
1204
1205 messages.push(Msg {
1207 role: "user".into(),
1208 content: initial_user_content_blocks(&workspace.root, &task.description),
1209 });
1210
1211 let mut total_input: u64 = 0;
1212 let mut total_output: u64 = 0;
1213 let mut estimated_input_unconfirmed: u64 = 0;
1214 let mut estimated_output_unconfirmed: u64 = 0;
1215 let mut estimated_cost_unconfirmed: f64 = 0.0;
1216 let mut cost_usd: f64 = 0.0;
1217 let diffs: Vec<crate::event::FileDiff> = Vec::new();
1218 let mut current_chain_idx = 0usize;
1219 let mut tool_results_pending: Vec<(
1220 String,
1221 String,
1222 serde_json::Value,
1223 Vec<ContentBlock>,
1224 bool,
1225 )> = Vec::new();
1226 let budget_session = self.config.budget.session_usd;
1227 let _budget_daily = self.config.budget.daily_usd;
1228 let redaction = &self.redaction;
1229 let mut had_error = false;
1230 let mut last_error: Option<String> = None;
1231 let mut waiting_for_approval = false;
1232 let mut denied_by_approval = false;
1233 let mut skill_evidence = String::new();
1234 let mut turns: u32 = 0;
1236 const MAX_TURNS: u32 = 60;
1237 let mut had_mutation = false;
1241 let mut verify_attempts: u32 = 0;
1242 const MAX_VERIFY_ATTEMPTS: u32 = 2;
1243 let mut produced_any_output = false;
1247
1248 let send = |event: Event| {
1250 let _ = event_tx.send(redaction.redact_event(&event));
1251 };
1252
1253 const COMPACT_TRANSCRIPT_CHARS: usize = 120_000;
1259 const COMPACT_KEEP_LAST: usize = 6;
1260 let context_manager = crate::redaction::ContextManager::new(200_000);
1261
1262 loop {
1264 if turns > 0 {
1267 let transcript_chars: usize = messages
1268 .iter()
1269 .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1270 .sum();
1271 if transcript_chars > COMPACT_TRANSCRIPT_CHARS && messages.len() > COMPACT_KEEP_LAST
1272 {
1273 let _ = self
1276 .hooks
1277 .execute(&HookEvent::PreCompact, &task.description)
1278 .await;
1279 let before = transcript_chars;
1280 let compacted =
1281 context_manager.compact_messages(&messages, 0, COMPACT_KEEP_LAST);
1282 let after: usize = compacted
1283 .iter()
1284 .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1285 .sum();
1286
1287 let mut handoff = crate::context::HandoffDoc::new(task.description.clone());
1289 handoff.next_steps = vec![format!(
1290 "Resume run {} (turn {}/{})",
1291 run_id.0, turns, MAX_TURNS
1292 )];
1293 let handoff_dir = std::path::PathBuf::from(".sparrow/handoff");
1294 let _ = std::fs::create_dir_all(&handoff_dir);
1295 let handoff_path = handoff_dir.join(format!(
1296 "{}-{}.md",
1297 run_id.0,
1298 chrono::Utc::now().format("%Y%m%dT%H%M%SZ")
1299 ));
1300 let _ = std::fs::write(&handoff_path, handoff.to_markdown());
1301
1302 messages = compacted;
1303 send(Event::Compacted {
1304 run: run_id.clone(),
1305 before_chars: before,
1306 after_chars: after,
1307 handoff_path: Some(handoff_path.to_string_lossy().to_string()),
1308 });
1309 let _ = self
1310 .hooks
1311 .execute(&HookEvent::PostCompact, &task.description)
1312 .await;
1313 }
1314 }
1315 turns += 1;
1317 if turns > MAX_TURNS {
1318 send(Event::Message {
1319 run: run_id.clone(),
1320 role: "guard".into(),
1321 text: format!("iteration cap reached ({} turns) — stopping", MAX_TURNS),
1322 });
1323 break;
1324 }
1325
1326 if cost_usd + estimated_cost_unconfirmed >= budget_session {
1328 let msg = format!(
1329 "Budget exceeded: ${:.4} of ${:.2} session cap",
1330 cost_usd + estimated_cost_unconfirmed,
1331 budget_session
1332 );
1333 send(Event::Error {
1334 run: run_id.clone(),
1335 message: msg.clone(),
1336 });
1337 let _ = self
1340 .hooks
1341 .execute(&HookEvent::OnBudgetThreshold, &msg)
1342 .await;
1343 let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
1344 had_error = true;
1345 last_error = Some("budget exceeded".into());
1346 break;
1347 }
1348 if let Some(_approval_handler) = &self.approval_handler {
1349 if waiting_for_approval {
1350 }
1353 }
1354
1355 if let Some(ref policy) = self.org_policy {
1357 let proposed_file = tool_results_pending
1358 .last()
1359 .map(|(_, _, args, _, _)| {
1360 args.get("path").and_then(|v| v.as_str()).unwrap_or("")
1361 })
1362 .unwrap_or("");
1363 if let Err(violation) =
1364 policy.enforce(&self.config.defaults.autonomy, cost_usd, proposed_file)
1365 {
1366 send(Event::Error {
1367 run: run_id.clone(),
1368 message: format!("Org policy violation: {}", violation),
1369 });
1370 break;
1371 }
1372 }
1373
1374 if let Some(rx) = inject_rx.as_mut() {
1378 loop {
1379 match rx.try_recv() {
1380 Ok(injected) => {
1381 let trimmed = injected.trim().to_string();
1382 if trimmed.is_empty() {
1383 continue;
1384 }
1385 messages.push(Msg {
1386 role: "user".into(),
1387 content: vec![ContentBlock::Text {
1388 text: format!("INTERRUPT FROM USER: {}", trimmed),
1389 }],
1390 });
1391 let _ = event_tx.send(Event::Message {
1392 run: run_id.clone(),
1393 role: "interrupt".into(),
1394 text: trimmed,
1395 });
1396 }
1397 Err(mpsc::error::TryRecvError::Empty) => break,
1398 Err(mpsc::error::TryRecvError::Disconnected) => {
1399 inject_rx = None;
1400 break;
1401 }
1402 }
1403 }
1404 }
1405
1406 let brain = match brain_policy.chain.get(current_chain_idx) {
1407 Some(b) => b.clone(),
1408 None => break,
1409 };
1410
1411 let caps = brain.caps();
1412
1413 {
1418 let req_for_estimate = BrainRequest {
1419 system: Some(system.clone()),
1420 messages: messages.clone(),
1421 tools: if need.required_tools {
1422 tool_specs.clone()
1423 } else {
1424 vec![]
1425 },
1426 max_tokens: caps.max_output as u32,
1427 temperature: 0.0,
1428 stop: vec![],
1429 cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1430 "engine",
1431 &workspace.root,
1432 &tool_specs,
1433 ))),
1434 };
1435 let est = estimate_request_tokens(&req_for_estimate);
1436 let threshold = (caps.context_window as f64 * 0.75) as u64;
1437 if est > threshold && messages.len() > 8 {
1438 let original_task = messages.first().cloned();
1439 let keep_tail: Vec<Msg> =
1440 messages.iter().rev().take(6).cloned().collect::<Vec<_>>();
1441 let middle: Vec<Msg> = messages
1442 .iter()
1443 .skip(1)
1444 .take(messages.len().saturating_sub(7))
1445 .cloned()
1446 .collect();
1447 let dropped = middle.len();
1448
1449 let summary = self
1452 .summarize_messages(brain.as_ref(), &middle)
1453 .await
1454 .unwrap_or_else(|| {
1455 format!(
1456 "{} prior messages were dropped to fit the model window.",
1457 dropped
1458 )
1459 });
1460
1461 let mut compacted: Vec<Msg> = Vec::new();
1462 if let Some(task) = original_task {
1463 compacted.push(task);
1464 }
1465 compacted.push(Msg {
1466 role: "user".into(),
1467 content: vec![ContentBlock::Text {
1468 text: format!(
1469 "[CONTEXT SUMMARY of {} earlier messages]\n{}\n\
1470 (Files edited and tool outputs in the turns below remain authoritative.)",
1471 dropped, summary
1472 ),
1473 }],
1474 });
1475 for m in keep_tail.into_iter().rev() {
1476 compacted.push(m);
1477 }
1478 messages = compacted;
1479 let _ = event_tx.send(Event::Message {
1480 run: run_id.clone(),
1481 role: "compaction".into(),
1482 text: format!(
1483 "context compacted: {} messages summarized ({} tok > {} threshold)",
1484 dropped, est, threshold
1485 ),
1486 });
1487 }
1488 }
1489
1490 let req = BrainRequest {
1491 system: Some(system.clone()),
1492 messages: messages.clone(),
1493 tools: if need.required_tools {
1494 tool_specs.clone()
1495 } else {
1496 vec![]
1497 },
1498 max_tokens: caps.max_output as u32,
1499 temperature: 0.0,
1500 stop: vec![],
1501 cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1502 "engine",
1503 &workspace.root,
1504 &tool_specs,
1505 ))),
1506 };
1507
1508 let estimated_input = estimate_request_tokens(&req);
1509 estimated_input_unconfirmed += estimated_input;
1510 estimated_cost_unconfirmed +=
1511 caps.cost_input_per_mtok * (estimated_input as f64) / 1_000_000.0;
1512 let _ = event_tx.send(Event::TokenUsageEstimated {
1513 run: run_id.clone(),
1514 input: estimated_input,
1515 output: 0,
1516 reason: "prompt estimate before provider usage".into(),
1517 });
1518 let _ = event_tx.send(Event::CostUpdate {
1519 run: run_id.clone(),
1520 usd: cost_usd + estimated_cost_unconfirmed,
1521 });
1522
1523 let _ = event_tx.send(Event::AgentStatus {
1524 run: run_id.clone(),
1525 role: "coder".into(),
1526 status: AgentStatus::Thinking,
1527 note: format!("consulting {} · parsing request…", brain.id()),
1528 });
1529
1530 match brain.complete(req).await {
1531 Ok(mut stream) => {
1532 let mut current_tool_name = String::new();
1533 let mut current_tool_json = String::new();
1534 let mut output_chars_seen: u64 = 0;
1535 let mut output_tokens_emitted: u64 = 0;
1536 let mut continue_agent_loop = false;
1537 let mut stop_after_tool_result = false;
1538 let mut assistant_text = String::new();
1539 let mut tool_output_seen_this_completion = false;
1540 let mut tools_called_this_turn: Vec<String> = Vec::new();
1544 let mut reasoning_buf: String = String::new();
1548
1549 while let Some(event) = stream.next().await {
1550 match event {
1551 BrainEvent::TextDelta(text) => {
1552 assistant_text.push_str(&text);
1553 output_chars_seen += text.chars().count() as u64;
1554 let estimated_output = (output_chars_seen + 3) / 4;
1555 let output_delta =
1556 estimated_output.saturating_sub(output_tokens_emitted);
1557 if output_delta > 0 {
1558 output_tokens_emitted += output_delta;
1559 estimated_output_unconfirmed += output_delta;
1560 estimated_cost_unconfirmed += caps.cost_output_per_mtok
1561 * (output_delta as f64)
1562 / 1_000_000.0;
1563 let _ = event_tx.send(Event::TokenUsageEstimated {
1564 run: run_id.clone(),
1565 input: 0,
1566 output: output_delta,
1567 reason: "streamed output estimate".into(),
1568 });
1569 let _ = event_tx.send(Event::CostUpdate {
1570 run: run_id.clone(),
1571 usd: cost_usd + estimated_cost_unconfirmed,
1572 });
1573 }
1574 let _ = event_tx.send(Event::ThinkingDelta {
1575 run: run_id.clone(),
1576 text: text.clone(),
1577 });
1578 }
1579 BrainEvent::ReasoningDelta(rtext) => {
1580 reasoning_buf.push_str(&rtext);
1586 let _ = event_tx.send(Event::ReasoningDelta {
1587 run: run_id.clone(),
1588 text: rtext,
1589 });
1590 }
1591 BrainEvent::ToolUseStart { id, name } => {
1592 current_tool_name = name.clone();
1593 tools_called_this_turn.push(name.clone());
1594 current_tool_json.clear();
1595 let risk = tools
1596 .get(&name)
1597 .map(|tool| tool.risk())
1598 .unwrap_or(RiskLevel::ReadOnly);
1599 let _ = event_tx.send(Event::ToolUseProposed {
1604 run: run_id.clone(),
1605 id: id.clone(),
1606 name: name.clone(),
1607 args: json!({}),
1608 risk,
1609 });
1610 }
1611 BrainEvent::ToolUseDelta { id, json } => {
1612 let _ = id;
1613 current_tool_json.push_str(&json);
1614 }
1615 BrainEvent::ToolUseEnd { id } => {
1616 let args: serde_json::Value =
1618 serde_json::from_str(¤t_tool_json).unwrap_or(json!({}));
1619
1620 let tool_name = if current_tool_name.is_empty() {
1622 "unknown".to_string()
1623 } else {
1624 current_tool_name.clone()
1625 };
1626 let tool = tools.get(&tool_name);
1627 let risk = tool
1628 .as_ref()
1629 .map(|tool| tool.risk())
1630 .unwrap_or(RiskLevel::ReadOnly);
1631
1632 let _ = event_tx.send(Event::ToolUseProposed {
1638 run: run_id.clone(),
1639 id: id.clone(),
1640 name: tool_name.clone(),
1641 args: args.clone(),
1642 risk: risk.clone(),
1643 });
1644 let proposed = crate::autonomy::ProposedAction {
1645 tool_name: tool_name.clone(),
1646 risk: risk.clone(),
1647 args: args.clone(),
1648 };
1649
1650 let permission =
1651 self.config.permissions.evaluate(&PermissionContext {
1652 tool_name: &proposed.tool_name,
1653 risk: proposed.risk.clone(),
1654 args: &args,
1655 workspace_root: &workspace.root,
1656 provider: Some(brain.id()),
1657 surface: Some("engine"),
1658 });
1659 let autonomy_verdict =
1660 if matches!(permission.decision, Decision::Allow) {
1661 Some(autonomy.evaluate(&proposed))
1662 } else {
1663 None
1664 };
1665 let mut decision = autonomy_verdict
1666 .as_ref()
1667 .map(|verdict| verdict.decision.clone())
1668 .unwrap_or_else(|| permission.decision.clone());
1669 if !matches!(permission.decision, Decision::Allow) {
1670 let _ = event_tx.send(Event::Message {
1671 run: run_id.clone(),
1672 role: "permissions".into(),
1673 text: permission.reason.clone(),
1674 });
1675 }
1676 if matches!(decision, Decision::AskUser) {
1677 let summary = format!(
1678 "{}. Approve {} with args: {}",
1679 permission.reason, proposed.tool_name, args
1680 );
1681 let _ = event_tx.send(Event::ApprovalRequested {
1682 run: run_id.clone(),
1683 id: id.clone(),
1684 summary: summary.clone(),
1685 });
1686 let _ = self
1690 .hooks
1691 .execute(&HookEvent::OnApprovalRequested, &summary)
1692 .await;
1693 if let Some(handler) = &self.approval_handler {
1694 decision = handler
1695 .request_approval(ApprovalRequest {
1696 run: run_id.clone(),
1697 id: id.clone(),
1698 tool_name: proposed.tool_name.clone(),
1699 risk: proposed.risk.clone(),
1700 args: args.clone(),
1701 summary,
1702 })
1703 .await;
1704 }
1705 }
1706
1707 let _ = event_tx.send(Event::ApprovalResolved {
1708 run: run_id.clone(),
1709 id: id.clone(),
1710 decision: decision.clone(),
1711 });
1712
1713 match decision {
1714 Decision::Allow => {
1715 if autonomy_verdict
1716 .as_ref()
1717 .map(|verdict| verdict.notify)
1718 .unwrap_or(false)
1719 {
1720 let _ = event_tx.send(Event::Message {
1721 run: run_id.clone(),
1722 role: "autonomy".into(),
1723 text: format!(
1724 "{} will run under trusted autonomy with checkpoint notification",
1725 proposed.tool_name
1726 ),
1727 });
1728 }
1729 if matches!(
1731 proposed.risk,
1732 RiskLevel::Mutating | RiskLevel::Destructive
1733 ) {
1734 had_mutation = true;
1735 }
1736 let needs_checkpoint = autonomy_verdict
1738 .as_ref()
1739 .map(|verdict| verdict.needs_checkpoint)
1740 .unwrap_or_else(|| {
1741 matches!(
1742 proposed.risk,
1743 RiskLevel::Mutating
1744 | RiskLevel::Exec
1745 | RiskLevel::Destructive
1746 )
1747 });
1748 if needs_checkpoint {
1749 let vetoes = self
1750 .hooks
1751 .execute(
1752 &HookEvent::PreCheckpoint,
1753 &proposed.tool_name,
1754 )
1755 .await;
1756 let checkpoint_veto = vetoes
1757 .iter()
1758 .find(|result| result.veto)
1759 .and_then(|result| result.veto_reason.clone());
1760 if let Some(reason) = checkpoint_veto {
1761 let _ = event_tx.send(Event::Error {
1762 run: run_id.clone(),
1763 message: reason,
1764 });
1765 denied_by_approval = true;
1766 stop_after_tool_result = true;
1767 continue;
1768 }
1769 let checkpoints =
1770 GitCheckpoints::new(workspace.root.clone());
1771 if let Ok(cp_id) = checkpoints
1772 .snapshot(&format!("pre-{}", proposed.tool_name))
1773 {
1774 let _ = event_tx.send(Event::CheckpointCreated {
1775 run: run_id.clone(),
1776 id: cp_id,
1777 label: format!("pre-{}", proposed.tool_name),
1778 });
1779 let _ = self
1780 .hooks
1781 .execute(
1782 &HookEvent::PostCheckpoint,
1783 &proposed.tool_name,
1784 )
1785 .await;
1786 }
1787 }
1788
1789 let hook_results = self
1790 .hooks
1791 .execute(&HookEvent::PreToolUse, &proposed.tool_name)
1792 .await;
1793 if let Some(reason) = hook_results
1794 .iter()
1795 .find(|result| result.veto)
1796 .and_then(|result| result.veto_reason.clone())
1797 {
1798 denied_by_approval = true;
1799 stop_after_tool_result = true;
1800 let _ = event_tx.send(Event::ToolOutput {
1801 run: run_id.clone(),
1802 id: id.clone(),
1803 blocks: vec![Block::Text(reason.clone())],
1804 });
1805 tool_output_seen_this_completion = true;
1806 tool_results_pending.push((
1807 id.clone(),
1808 proposed.tool_name.clone(),
1809 args.clone(),
1810 vec![ContentBlock::Text { text: reason }],
1811 true,
1812 ));
1813 continue;
1814 }
1815
1816 let _ = event_tx.send(Event::ToolUseStarted {
1817 run: run_id.clone(),
1818 id: id.clone(),
1819 });
1820 let _ = event_tx.send(Event::AgentStatus {
1821 run: run_id.clone(),
1822 role: "coder".into(),
1823 status: AgentStatus::Working,
1824 note: format!("running tool · {}", current_tool_name),
1825 });
1826
1827 let result = if let Some(tool) = tool {
1828 let ctx = ToolCtx {
1829 workspace_root: workspace.root.clone(),
1830 run_id: run_id.clone(),
1831 };
1832 match tool.call(args.clone(), &ctx).await {
1833 Ok(result) => result,
1834 Err(e) => crate::tools::ToolResult::error(format!(
1835 "Tool {} failed: {}",
1836 proposed.tool_name, e
1837 )),
1838 }
1839 } else {
1840 crate::tools::ToolResult::error(format!(
1841 "Unknown tool: {}",
1842 proposed.tool_name
1843 ))
1844 };
1845
1846 for block in &result.content {
1847 if let Block::Diff { file, patch } = block {
1848 let plus = patch
1849 .lines()
1850 .filter(|l| {
1851 l.starts_with('+') && !l.starts_with("+++")
1852 })
1853 .count()
1854 as u32;
1855 let minus = patch
1856 .lines()
1857 .filter(|l| {
1858 l.starts_with('-') && !l.starts_with("---")
1859 })
1860 .count()
1861 as u32;
1862 let _ = event_tx.send(Event::DiffProposed {
1863 run: run_id.clone(),
1864 file: file.clone(),
1865 patch: patch.clone(),
1866 plus,
1867 minus,
1868 });
1869 }
1870 }
1871
1872 let blocks = result.content.clone();
1873 let text = tool_result_text(&blocks);
1874 let content_blocks = tool_result_content_blocks(&blocks);
1875 let is_error = result.is_error;
1876 skill_evidence.push_str(&text);
1877 skill_evidence.push('\n');
1878 let _ = event_tx.send(Event::ToolOutput {
1879 run: run_id.clone(),
1880 id: id.clone(),
1881 blocks,
1882 });
1883 if !is_error
1887 && matches!(
1888 proposed.tool_name.as_str(),
1889 "fs_write" | "edit" | "multi_edit"
1890 )
1891 {
1892 if let Some(p) = args.get("path").and_then(|v| v.as_str())
1893 {
1894 let _ = event_tx.send(Event::DiffApplied {
1895 run: run_id.clone(),
1896 file: p.to_string(),
1897 });
1898 } else if let Some(p) = args
1899 .get("file_path")
1900 .and_then(|v| v.as_str())
1901 {
1902 let _ = event_tx.send(Event::DiffApplied {
1903 run: run_id.clone(),
1904 file: p.to_string(),
1905 });
1906 }
1907 }
1908 let _ = self
1909 .hooks
1910 .execute(&HookEvent::PostToolUse, &proposed.tool_name)
1911 .await;
1912 tool_output_seen_this_completion = true;
1913 tool_results_pending.push((
1914 id.clone(),
1915 proposed.tool_name.clone(),
1916 args.clone(),
1917 content_blocks,
1918 is_error,
1919 ));
1920 }
1921 Decision::AskUser => {
1922 waiting_for_approval = true;
1924 let approval_id = id.clone();
1925 let approval_name = proposed.tool_name.clone();
1926 let approval_args = args.clone();
1927 let approval_risk = proposed.risk;
1928
1929 let _ = event_tx.send(Event::ApprovalRequested {
1931 run: run_id.clone(),
1932 id: approval_id.clone(),
1933 summary: format!(
1934 "{} tool '{}' with args: {}",
1935 format!("{:?}", approval_risk),
1936 approval_name,
1937 approval_args
1938 ),
1939 });
1940
1941 use std::io::{self, Write};
1943 print!(
1944 "\n\x1b[1;33mApprove {}? [y/N]\x1b[0m ",
1945 approval_name
1946 );
1947 io::stdout().flush().ok();
1948 let mut input = String::new();
1949 io::stdin().read_line(&mut input).ok();
1950 let approved = input.trim().to_lowercase() == "y";
1951
1952 if approved {
1953 waiting_for_approval = false;
1954 if matches!(
1956 approval_risk,
1957 RiskLevel::Mutating
1958 | RiskLevel::Exec
1959 | RiskLevel::Destructive
1960 ) {
1961 let vetoes = self
1962 .hooks
1963 .execute(
1964 &HookEvent::PreCheckpoint,
1965 &approval_name,
1966 )
1967 .await;
1968 if let Some(reason) = vetoes
1969 .iter()
1970 .find(|result| result.veto)
1971 .and_then(|result| result.veto_reason.clone())
1972 {
1973 let _ = event_tx.send(Event::Error {
1974 run: run_id.clone(),
1975 message: reason,
1976 });
1977 denied_by_approval = true;
1978 stop_after_tool_result = true;
1979 continue;
1980 }
1981 let checkpoints =
1982 GitCheckpoints::new(workspace.root.clone());
1983 if let Ok(cp_id) = checkpoints
1984 .snapshot(&format!("pre-{}", approval_name))
1985 {
1986 let _ =
1987 event_tx.send(Event::CheckpointCreated {
1988 run: run_id.clone(),
1989 id: cp_id,
1990 label: format!("pre-{}", approval_name),
1991 });
1992 let _ = self
1993 .hooks
1994 .execute(
1995 &HookEvent::PostCheckpoint,
1996 &approval_name,
1997 )
1998 .await;
1999 }
2000 }
2001 let hook_results = self
2002 .hooks
2003 .execute(&HookEvent::PreToolUse, &approval_name)
2004 .await;
2005 if let Some(reason) = hook_results
2006 .iter()
2007 .find(|result| result.veto)
2008 .and_then(|result| result.veto_reason.clone())
2009 {
2010 denied_by_approval = true;
2011 stop_after_tool_result = true;
2012 let _ = event_tx.send(Event::ToolOutput {
2013 run: run_id.clone(),
2014 id: approval_id.clone(),
2015 blocks: vec![Block::Text(reason.clone())],
2016 });
2017 tool_output_seen_this_completion = true;
2018 tool_results_pending.push((
2019 approval_id,
2020 approval_name,
2021 approval_args,
2022 vec![ContentBlock::Text { text: reason }],
2023 true,
2024 ));
2025 continue;
2026 }
2027 let _ = event_tx.send(Event::ToolUseStarted {
2028 run: run_id.clone(),
2029 id: approval_id.clone(),
2030 });
2031 let result = if let Some(tool) = tool {
2032 let ctx = ToolCtx {
2033 workspace_root: workspace.root.clone(),
2034 run_id: run_id.clone(),
2035 };
2036 match tool.call(approval_args.clone(), &ctx).await {
2037 Ok(r) => r,
2038 Err(e) => {
2039 crate::tools::ToolResult::error(format!(
2040 "Tool {} failed: {}",
2041 approval_name, e
2042 ))
2043 }
2044 }
2045 } else {
2046 crate::tools::ToolResult::error(format!(
2047 "Unknown tool: {}",
2048 approval_name
2049 ))
2050 };
2051 let blocks = result.content.clone();
2052 let text = tool_result_text(&blocks);
2053 let content_blocks =
2054 tool_result_content_blocks(&blocks);
2055 let is_error = result.is_error;
2056 skill_evidence.push_str(&text);
2057 skill_evidence.push('\n');
2058 let _ = event_tx.send(Event::ToolOutput {
2059 run: run_id.clone(),
2060 id: approval_id.clone(),
2061 blocks,
2062 });
2063 let _ = self
2064 .hooks
2065 .execute(&HookEvent::PostToolUse, &approval_name)
2066 .await;
2067 tool_output_seen_this_completion = true;
2068 tool_results_pending.push((
2069 approval_id,
2070 approval_name,
2071 approval_args,
2072 content_blocks,
2073 is_error,
2074 ));
2075 } else {
2076 let _ = event_tx.send(Event::ToolOutput {
2077 run: run_id.clone(),
2078 id: approval_id.clone(),
2079 blocks: vec![Block::Text("Denied by user".into())],
2080 });
2081 tool_output_seen_this_completion = true;
2082 tool_results_pending.push((
2083 approval_id,
2084 approval_name,
2085 approval_args,
2086 vec![ContentBlock::Text {
2087 text: "Denied by user".into(),
2088 }],
2089 true,
2090 ));
2091 }
2092 }
2093 Decision::Deny => {
2094 denied_by_approval = true;
2095 stop_after_tool_result = true;
2096 let _ = event_tx.send(Event::ToolOutput {
2097 run: run_id.clone(),
2098 id: id.clone(),
2099 blocks: vec![Block::Text(
2100 "Denied by autonomy policy".into(),
2101 )],
2102 });
2103 tool_output_seen_this_completion = true;
2104 tool_results_pending.push((
2105 id.clone(),
2106 proposed.tool_name.clone(),
2107 args.clone(),
2108 vec![ContentBlock::Text {
2109 text: "Denied by autonomy policy".into(),
2110 }],
2111 true,
2112 ));
2113 }
2114 }
2115
2116 current_tool_json.clear();
2117 current_tool_name.clear();
2118 }
2119 BrainEvent::Usage(usage) => {
2120 total_input += usage.input;
2121 total_output += usage.output;
2122 estimated_input_unconfirmed =
2123 estimated_input_unconfirmed.saturating_sub(usage.input);
2124 estimated_output_unconfirmed =
2125 estimated_output_unconfirmed.saturating_sub(usage.output);
2126 let _ = event_tx.send(Event::TokenUsage {
2127 run: run_id.clone(),
2128 input: usage.input,
2129 output: usage.output,
2130 });
2131
2132 let input_cost =
2134 caps.cost_input_per_mtok * (usage.input as f64) / 1_000_000.0;
2135 let output_cost =
2136 caps.cost_output_per_mtok * (usage.output as f64) / 1_000_000.0;
2137 let actual_cost = input_cost + output_cost;
2138 cost_usd += actual_cost;
2139 estimated_cost_unconfirmed =
2140 (estimated_cost_unconfirmed - actual_cost).max(0.0);
2141
2142 let _ = event_tx.send(Event::CostUpdate {
2143 run: run_id.clone(),
2144 usd: cost_usd + estimated_cost_unconfirmed,
2145 });
2146 }
2147 BrainEvent::Done(reason) => {
2148 match reason {
2149 crate::event::StopReason::EndTurn => {
2150 let this_empty = assistant_text.trim().is_empty()
2155 && !tool_output_seen_this_completion;
2156 if this_empty && !produced_any_output {
2157 let next_idx = current_chain_idx + 1;
2158 if next_idx < brain_policy.chain.len() {
2159 current_chain_idx = next_idx;
2160 let _ = event_tx.send(Event::ModelSwitched {
2161 run: run_id.clone(),
2162 from: brain.id().to_string(),
2163 to: brain_policy.chain[current_chain_idx]
2164 .id()
2165 .to_string(),
2166 reason: "empty response".into(),
2167 });
2168 continue_agent_loop = true;
2169 break;
2170 }
2171 }
2172 if !assistant_text.trim().is_empty() {
2173 produced_any_output = true;
2174 let mut blocks = Vec::new();
2175 if !reasoning_buf.is_empty() {
2176 blocks.push(ContentBlock::Reasoning {
2177 text: reasoning_buf.clone(),
2178 });
2179 }
2180 blocks.push(ContentBlock::Text {
2181 text: assistant_text.clone(),
2182 });
2183 let assistant_msg = Msg {
2184 role: "assistant".into(),
2185 content: blocks,
2186 };
2187 let turn_messages = vec![assistant_msg.clone()];
2188 let has_verified_tool_context =
2189 tool_output_seen_this_completion
2190 || messages.iter().any(|m| {
2191 m.content.iter().any(|block| {
2192 matches!(
2193 block,
2194 ContentBlock::ToolResult { .. }
2195 )
2196 })
2197 });
2198
2199 if let Some(correction) = self.reasoning.guard_turn(
2200 &turn_messages,
2201 has_verified_tool_context,
2202 ) {
2203 messages.push(assistant_msg);
2204 let _ = event_tx.send(Event::Message {
2205 run: run_id.clone(),
2206 role: "guard".into(),
2207 text: correction.clone(),
2208 });
2209 messages.push(Msg {
2210 role: "user".into(),
2211 content: vec![ContentBlock::Text {
2212 text: format!("SYSTEM: {}. Execute the relevant tool first, then report the actual raw result.", correction),
2213 }],
2214 });
2215 continue_agent_loop = true;
2216 break;
2217 }
2218
2219 if self.reasoning.hallucination_guard {
2223 if let Some(correction) =
2224 crate::reasoning::HallucinationGuard::verify(
2225 &assistant_text,
2226 &tools_called_this_turn,
2227 )
2228 {
2229 let mut blocks2 = Vec::new();
2230 if !reasoning_buf.is_empty() {
2231 blocks2.push(ContentBlock::Reasoning {
2232 text: reasoning_buf.clone(),
2233 });
2234 }
2235 blocks2.push(ContentBlock::Text {
2236 text: assistant_text.clone(),
2237 });
2238 let assistant_msg2 = Msg {
2239 role: "assistant".into(),
2240 content: blocks2,
2241 };
2242 messages.push(assistant_msg2);
2243 let _ = event_tx.send(Event::Message {
2244 run: run_id.clone(),
2245 role: "guard".into(),
2246 text: correction.clone(),
2247 });
2248 messages.push(Msg {
2249 role: "user".into(),
2250 content: vec![ContentBlock::Text {
2251 text: format!("SYSTEM: {}. Call fs_read or search to verify the file/symbol first, then re-state the claim with the raw evidence.", correction),
2252 }],
2253 });
2254 continue_agent_loop = true;
2255 break;
2256 }
2257 }
2258
2259 skill_evidence.push_str(&assistant_text);
2260 skill_evidence.push('\n');
2261 messages.push(assistant_msg);
2262 }
2263
2264 if had_mutation
2270 && self.reasoning.self_critique
2271 && !diffs.is_empty()
2272 {
2273 let review =
2274 crate::reasoning::SelfCritique::pre_mutation_review(
2275 &diffs,
2276 Some(&task.description),
2277 );
2278 let _ = event_tx.send(Event::Message {
2279 run: run_id.clone(),
2280 role: "self-critique".into(),
2281 text: review,
2282 });
2283 }
2284
2285 if had_mutation && verify_attempts < MAX_VERIFY_ATTEMPTS {
2291 if let Some(verify_cmd) =
2292 self.config.defaults.verify_command.clone()
2293 {
2294 verify_attempts += 1;
2295 had_mutation = false;
2296 let parts: Vec<String> = verify_cmd
2297 .split_whitespace()
2298 .map(String::from)
2299 .collect();
2300 if !parts.is_empty() {
2301 let _ = event_tx.send(Event::AgentStatus {
2302 run: run_id.clone(),
2303 role: "verifier".into(),
2304 status: AgentStatus::Working,
2305 note: format!("running `{}`", verify_cmd),
2306 });
2307 let cmd = crate::sandbox::Command {
2308 program: parts[0].clone(),
2309 args: parts[1..].to_vec(),
2310 env: std::collections::HashMap::new(),
2311 workdir: workspace.root.clone(),
2312 };
2313 let limits = crate::sandbox::Limits {
2314 timeout_ms: 300_000,
2315 max_output_bytes: 16_000,
2316 };
2317 match workspace
2318 .sandbox
2319 .exec(&cmd, &limits)
2320 .await
2321 {
2322 Ok(res) if res.exit_code != 0 => {
2323 let _ = event_tx.send(Event::TestResult {
2324 run: run_id.clone(),
2325 passed: 0,
2326 failed: 1,
2327 detail: format!(
2328 "verify `{}` failed (exit {})",
2329 verify_cmd, res.exit_code
2330 ),
2331 });
2332 let out = format!(
2333 "{}\n{}",
2334 res.stdout, res.stderr
2335 );
2336 let tail: String = out
2337 .lines()
2338 .rev()
2339 .take(40)
2340 .collect::<Vec<_>>()
2341 .into_iter()
2342 .rev()
2343 .collect::<Vec<_>>()
2344 .join("\n");
2345 messages.push(Msg {
2346 role: "user".into(),
2347 content: vec![ContentBlock::Text {
2348 text: format!(
2349 "SYSTEM: verification command `{}` FAILED (exit {}). Fix the code, then it will be re-verified. Output:\n{}",
2350 verify_cmd, res.exit_code, tail
2351 ),
2352 }],
2353 });
2354 continue_agent_loop = true;
2355 break;
2356 }
2357 Ok(_) => {
2358 let _ =
2359 event_tx.send(Event::TestResult {
2360 run: run_id.clone(),
2361 passed: 1,
2362 failed: 0,
2363 detail: format!(
2364 "verify `{}` passed",
2365 verify_cmd
2366 ),
2367 });
2368 }
2369 Err(e) => {
2370 let _ = event_tx.send(Event::Message {
2371 run: run_id.clone(),
2372 role: "guard".into(),
2373 text: format!(
2374 "verify command could not run: {}",
2375 e
2376 ),
2377 });
2378 }
2379 }
2380 }
2381 }
2382 }
2383 }
2384 crate::event::StopReason::ToolUse => {
2385 let drained: Vec<_> =
2398 std::mem::take(&mut tool_results_pending);
2399
2400 let mut assistant_blocks = Vec::new();
2401 if !reasoning_buf.is_empty() {
2402 assistant_blocks.push(ContentBlock::Reasoning {
2403 text: reasoning_buf.clone(),
2404 });
2405 }
2406 for (tool_id, tool_name, args, _content, _is_error) in
2407 &drained
2408 {
2409 assistant_blocks.push(ContentBlock::ToolUse {
2410 id: tool_id.clone(),
2411 name: tool_name.clone(),
2412 input: args.clone(),
2413 });
2414 }
2415 messages.push(Msg {
2416 role: "assistant".into(),
2417 content: assistant_blocks,
2418 });
2419
2420 for (tool_id, _tool_name, _args, content, is_error) in
2421 drained
2422 {
2423 messages.push(Msg {
2424 role: "user".into(),
2425 content: vec![ContentBlock::ToolResult {
2426 tool_use_id: tool_id,
2427 content,
2428 is_error: Some(is_error),
2429 }],
2430 });
2431 }
2432 if tool_output_seen_this_completion {
2433 produced_any_output = true;
2434 }
2435 continue_agent_loop =
2436 !waiting_for_approval && !stop_after_tool_result;
2437 break;
2438 }
2439 _ => {}
2440 }
2441 break; }
2443 BrainEvent::Error(msg) => {
2444 let _ = event_tx.send(Event::Error {
2445 run: run_id.clone(),
2446 message: msg.clone(),
2447 });
2448 let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
2449 let next_idx = current_chain_idx + 1;
2450 if next_idx < brain_policy.chain.len() {
2451 current_chain_idx = next_idx;
2452 let switch_ctx = format!(
2453 "{} -> {}",
2454 brain.id(),
2455 brain_policy.chain[current_chain_idx].id()
2456 );
2457 let _ = event_tx.send(Event::ModelSwitched {
2458 run: run_id.clone(),
2459 from: brain.id().to_string(),
2460 to: brain_policy.chain[current_chain_idx].id().to_string(),
2461 reason: msg,
2462 });
2463 let _ = self
2464 .hooks
2465 .execute(&HookEvent::OnModelSwitched, &switch_ctx)
2466 .await;
2467 continue_agent_loop = true;
2468 } else {
2469 had_error = true;
2470 last_error = Some(msg);
2471 }
2472 break;
2473 }
2474 }
2475 }
2476
2477 if !continue_agent_loop && !had_error {
2482 let this_empty =
2483 assistant_text.trim().is_empty() && !tool_output_seen_this_completion;
2484 if this_empty && !produced_any_output {
2485 let next_idx = current_chain_idx + 1;
2486 if next_idx < brain_policy.chain.len() {
2487 let _ = event_tx.send(Event::ModelSwitched {
2488 run: run_id.clone(),
2489 from: brain.id().to_string(),
2490 to: brain_policy.chain[next_idx].id().to_string(),
2491 reason: "empty response".into(),
2492 });
2493 current_chain_idx = next_idx;
2494 continue;
2495 }
2496 }
2497 }
2498
2499 if continue_agent_loop {
2500 continue;
2501 }
2502 break; }
2504 Err(e) => {
2505 let err_msg = format!("{}", e);
2506 let _ = event_tx.send(Event::Error {
2507 run: run_id.clone(),
2508 message: err_msg.clone(),
2509 });
2510
2511 let next_idx = current_chain_idx + 1;
2513 if next_idx < brain_policy.chain.len() {
2514 current_chain_idx = next_idx;
2515 let _ = event_tx.send(Event::ModelSwitched {
2516 run: run_id.clone(),
2517 from: brain.id().to_string(),
2518 to: brain_policy.chain[current_chain_idx].id().to_string(),
2519 reason: err_msg,
2520 });
2521 } else {
2522 had_error = true;
2523 last_error = Some(err_msg);
2524 break;
2525 }
2526 }
2527 }
2528 }
2529
2530 let final_input = if total_input > 0 {
2532 total_input
2533 } else {
2534 total_input + estimated_input_unconfirmed
2535 };
2536 let final_output = if total_output > 0 {
2537 total_output
2538 } else {
2539 total_output + estimated_output_unconfirmed
2540 };
2541 let _ = event_tx.send(Event::TokenUsage {
2542 run: run_id.clone(),
2543 input: final_input,
2544 output: final_output,
2545 });
2546 let _ = event_tx.send(Event::AgentStatus {
2548 run: run_id.clone(),
2549 role: "coder".into(),
2550 status: AgentStatus::Done,
2551 note: format!("completed · {}↑ {}↓ tok", final_input, final_output),
2552 });
2553
2554 let outcome = OutcomeSummary {
2555 status: if had_error {
2556 format!(
2557 "error: {}",
2558 last_error.unwrap_or_else(|| "run failed".into())
2559 )
2560 } else if waiting_for_approval {
2561 "waiting_for_approval".into()
2562 } else if denied_by_approval {
2563 "denied".into()
2564 } else {
2565 "completed".into()
2566 },
2567 diffs,
2568 cost_usd: cost_usd + estimated_cost_unconfirmed,
2569 tokens: TokenUsage {
2570 input: total_input + estimated_input_unconfirmed,
2571 output: total_output + estimated_output_unconfirmed,
2572 },
2573 };
2574
2575 if let Some(mem) = &self.memory {
2577 let _ = mem.save_task(&crate::memory::TaskMem {
2578 run_id: run_id.0.clone(),
2579 messages: messages.clone(),
2580 created_at: chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(),
2581 });
2582 }
2583
2584 if outcome.status == "completed" {
2586 if let Some(skills) = &self.skills {
2587 if let Some(candidate) = Curator::propose_skill_if_missing(
2588 &task.description,
2589 &skill_evidence,
2590 skills.as_ref(),
2591 ) {
2592 let skill_name = candidate.name.clone();
2593 let _ = event_tx.send(Event::SkillLearned {
2594 run: run_id.clone(),
2595 name: skill_name.clone(),
2596 });
2597 let _ = self
2598 .hooks
2599 .execute(&HookEvent::OnSkillLearned, &skill_name)
2600 .await;
2601 let _ = skills.add(candidate);
2602 }
2603 }
2604
2605 if let Some(mem) = &self.memory {
2610 let events = events_from_messages(&run_id, &messages);
2611 Distiller::distill(mem, &events, &task.description).await;
2612 }
2613 }
2614
2615 let _ = event_tx.send(Event::RunFinished {
2616 run: run_id.clone(),
2617 outcome: outcome.clone(),
2618 });
2619
2620 let _ = self
2622 .hooks
2623 .execute(&HookEvent::PostRun, &task.description)
2624 .await;
2625
2626 Ok(outcome)
2627 }
2628}
2629
2630#[cfg(test)]
2631mod tests {
2632 use super::*;
2633
2634 #[test]
2635 fn initial_user_content_blocks_embeds_uploaded_images() {
2636 let tmp = tempfile::tempdir().expect("tempdir");
2637 let image = tmp.path().join("shot.png");
2638 std::fs::write(
2639 &image,
2640 [
2641 0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n', 0, 0, 0, 0,
2642 ],
2643 )
2644 .expect("write image");
2645 let description = format!(
2646 "analyse this\n\n[Attached files]\n### file: shot.png\n[uploaded: {}]",
2647 image.display()
2648 );
2649
2650 let blocks = initial_user_content_blocks(tmp.path(), &description);
2651 assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
2652 assert!(blocks.iter().any(|block| matches!(
2653 block,
2654 ContentBlock::Image {
2655 source: ImageSource::Base64 {
2656 media_type,
2657 data,
2658 }
2659 } if media_type == "image/png" && !data.is_empty()
2660 )));
2661 }
2662
2663 #[test]
2664 fn tool_result_content_blocks_preserves_images() {
2665 let blocks = tool_result_content_blocks(&[
2666 Block::Text("screenshot captured".into()),
2667 Block::Image {
2668 data: vec![1, 2, 3],
2669 mime: "image/png".into(),
2670 },
2671 ]);
2672
2673 assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
2674 assert!(blocks.iter().any(|block| matches!(
2675 block,
2676 ContentBlock::Image {
2677 source: ImageSource::Base64 {
2678 media_type,
2679 data,
2680 }
2681 } if media_type == "image/png" && data == "AQID"
2682 )));
2683 }
2684}