1use async_trait::async_trait;
2use futures::StreamExt;
3use serde_json::json;
4use std::path::PathBuf;
5use std::sync::Arc;
6use tokio::sync::mpsc;
7
8use crate::agent::AgentStore;
9use crate::autonomy::{AutonomyContract, Checkpoints, GitCheckpoints};
10use crate::capabilities::{Curator, SkillLibrary};
11use crate::config::Config;
12use crate::event::{
13 AgentStatus, AutonomyLevel, Block, Decision, Event, OutcomeSummary, RiskLevel, RunId,
14 TokenUsage,
15};
16use crate::extras::Distiller;
17use crate::hooks::{HookEvent, HookRegistry};
18use crate::instructions::InstructionDoc;
19use crate::memory::{Fact, Memory, MemoryDoc, MemoryDocKind};
20use crate::permissions::PermissionContext;
21use crate::provider::{
22 Brain, BrainError, BrainEvent, BrainRequest, ContentBlock, ImageSource, Msg, PromptCacheConfig,
23 ToolSpec,
24};
25use crate::reasoning::ReasoningEngine;
26use crate::redaction::RedactionFilter;
27use crate::router::{BudgetState, Router, TaskTier};
28use crate::sandbox::Sandbox;
29use crate::tools::{ToolCtx, ToolRegistry};
30
31pub mod scorer;
32pub mod treesitter;
33
34#[derive(Debug, Clone)]
37pub struct Identity {
38 pub name: String,
39 pub role: String,
40 pub personality: String,
41}
42
43impl Default for Identity {
44 fn default() -> Self {
45 Self {
46 name: "sparrow".into(),
47 role: "software engineer".into(),
48 personality: "concise, competent, helpful".into(),
49 }
50 }
51}
52
53pub struct BrainPolicy {
56 pub chain: Vec<Arc<dyn Brain>>,
58 pub current_index: usize,
59}
60
61impl BrainPolicy {
62 pub fn current(&self) -> Option<Arc<dyn Brain>> {
63 self.chain.get(self.current_index).cloned()
64 }
65
66 pub fn next(&mut self) -> Option<Arc<dyn Brain>> {
67 self.current_index += 1;
68 self.current()
69 }
70}
71
72pub struct Workspace {
75 pub root: PathBuf,
76 pub sandbox: Arc<dyn Sandbox>,
77}
78
79pub struct AgentRun {
82 pub id: RunId,
83 pub identity: Identity,
84 pub brain_policy: BrainPolicy,
85 pub autonomy: AutonomyContract,
86 pub tools: Arc<ToolRegistry>,
87 pub workspace: Workspace,
88}
89
90fn estimate_text_tokens(text: &str) -> u64 {
91 let chars = text.chars().count() as u64;
92 ((chars + 3) / 4).max(1)
93}
94
95fn estimate_content_tokens(blocks: &[ContentBlock]) -> u64 {
96 blocks
97 .iter()
98 .map(|block| match block {
99 ContentBlock::Text { text } => estimate_text_tokens(text),
100 ContentBlock::Image { source } => match source {
101 crate::provider::ImageSource::Base64 { data, .. } => {
102 256 + estimate_text_tokens(data).min(2_000)
103 }
104 crate::provider::ImageSource::Url { url } => 256 + estimate_text_tokens(url),
105 },
106 ContentBlock::ToolUse { name, input, .. } => {
107 estimate_text_tokens(name) + estimate_text_tokens(&input.to_string())
108 }
109 ContentBlock::ToolResult { content, .. } => 8 + estimate_content_tokens(content),
110 ContentBlock::Reasoning { text } => estimate_text_tokens(text),
111 })
112 .sum()
113}
114
115fn estimate_request_tokens(req: &BrainRequest) -> u64 {
116 let system = req.system.as_deref().map(estimate_text_tokens).unwrap_or(0);
117 let messages: u64 = req
118 .messages
119 .iter()
120 .map(|msg| estimate_text_tokens(&msg.role) + estimate_content_tokens(&msg.content) + 4)
121 .sum();
122 let tools: u64 = req
123 .tools
124 .iter()
125 .map(|tool| {
126 estimate_text_tokens(&tool.name)
127 + estimate_text_tokens(&tool.description)
128 + estimate_text_tokens(&tool.input_schema.to_string())
129 })
130 .sum();
131 system + messages + tools
132}
133
134fn base64_encode(data: &[u8]) -> String {
135 const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
136 let mut out = String::with_capacity(data.len().div_ceil(3) * 4);
137 for chunk in data.chunks(3) {
138 let b0 = chunk[0] as u32;
139 let b1 = if chunk.len() > 1 { chunk[1] as u32 } else { 0 };
140 let b2 = if chunk.len() > 2 { chunk[2] as u32 } else { 0 };
141 let triple = (b0 << 16) | (b1 << 8) | b2;
142 out.push(CHARS[((triple >> 18) & 63) as usize] as char);
143 out.push(CHARS[((triple >> 12) & 63) as usize] as char);
144 out.push(if chunk.len() > 1 {
145 CHARS[((triple >> 6) & 63) as usize] as char
146 } else {
147 '='
148 });
149 out.push(if chunk.len() > 2 {
150 CHARS[(triple & 63) as usize] as char
151 } else {
152 '='
153 });
154 }
155 out
156}
157
158fn image_block_from_path(path: &std::path::Path) -> Option<ContentBlock> {
159 let mime = mime_guess::from_path(path).first_or_octet_stream();
160 if !mime.type_().as_str().eq_ignore_ascii_case("image") {
161 return None;
162 }
163 let data = std::fs::read(path).ok()?;
164 Some(ContentBlock::Image {
165 source: ImageSource::Base64 {
166 media_type: mime.to_string(),
167 data: base64_encode(&data),
168 },
169 })
170}
171
172fn collect_uploaded_paths(description: &str) -> Vec<String> {
173 let mut paths = Vec::new();
174 for line in description.lines() {
175 let Some(idx) = line.find("uploaded:") else {
176 continue;
177 };
178 let rest = line[idx + "uploaded:".len()..].trim();
179 let path = rest
180 .strip_prefix('[')
181 .unwrap_or(rest)
182 .split(']')
183 .next()
184 .unwrap_or(rest)
185 .trim()
186 .trim_matches('"')
187 .trim_matches('\'');
188 if !path.is_empty() {
189 paths.push(path.to_string());
190 }
191 }
192 paths
193}
194
195fn initial_user_content_blocks(
196 workspace_root: &std::path::Path,
197 description: &str,
198) -> Vec<ContentBlock> {
199 let mut blocks = vec![ContentBlock::Text {
200 text: description.to_string(),
201 }];
202 let mut seen = std::collections::HashSet::new();
203 for raw_path in collect_uploaded_paths(description) {
204 let path = std::path::PathBuf::from(&raw_path);
205 let full_path = if path.is_absolute() {
206 path
207 } else {
208 workspace_root.join(path)
209 };
210 if !seen.insert(full_path.clone()) {
211 continue;
212 }
213 if let Some(block) = image_block_from_path(&full_path) {
214 blocks.push(block);
215 }
216 }
217 blocks
218}
219
220pub fn summarize_model_chain(chain_ids: &[String], limit: usize) -> String {
221 if chain_ids.is_empty() {
222 return "aucun modèle disponible".into();
223 }
224 let limit = limit.max(1);
225 let mut visible: Vec<String> = chain_ids.iter().take(limit).cloned().collect();
226 if chain_ids.len() > limit {
227 visible.push(format!("+{} autres fallbacks", chain_ids.len() - limit));
228 }
229 visible.join(" -> ")
230}
231
232fn prompt_cache_key(scope: &str, workspace_root: &std::path::Path, tools: &[ToolSpec]) -> String {
233 use std::hash::{Hash, Hasher};
234
235 let mut hasher = std::collections::hash_map::DefaultHasher::new();
236 scope.hash(&mut hasher);
237 workspace_root.display().to_string().hash(&mut hasher);
238 for tool in tools {
239 tool.name.hash(&mut hasher);
240 tool.description.hash(&mut hasher);
241 tool.input_schema.to_string().hash(&mut hasher);
242 }
243 format!("sparrow-{}-{:016x}", scope, hasher.finish())
244}
245
246fn read_git_context(workspace_root: &PathBuf) -> Option<String> {
252 use std::process::Command;
253 use std::time::Duration;
254 if !workspace_root.join(".git").exists() {
255 return None;
256 }
257 fn run(workspace_root: &PathBuf, args: &[&str]) -> Option<String> {
258 let mut cmd = Command::new("git");
259 cmd.arg("-C").arg(workspace_root).args(args);
260 let child = cmd
261 .stdout(std::process::Stdio::piped())
262 .stderr(std::process::Stdio::null())
263 .spawn()
264 .ok()?;
265 let deadline = std::time::Instant::now() + Duration::from_millis(1_500);
268 let mut child = child;
269 loop {
270 match child.try_wait().ok()? {
271 Some(_) => break,
272 None if std::time::Instant::now() > deadline => {
273 let _ = child.kill();
274 return None;
275 }
276 None => std::thread::sleep(Duration::from_millis(20)),
277 }
278 }
279 let output = child.wait_with_output().ok()?;
280 if !output.status.success() {
281 return None;
282 }
283 let s = String::from_utf8(output.stdout).ok()?;
284 Some(s.trim().to_string())
285 }
286
287 let branch = run(workspace_root, &["rev-parse", "--abbrev-ref", "HEAD"])
288 .filter(|b| !b.is_empty())
289 .unwrap_or_else(|| "(detached)".into());
290 let head = run(workspace_root, &["rev-parse", "--short", "HEAD"]).unwrap_or_default();
291 let head_subject = run(workspace_root, &["log", "-1", "--pretty=%s"]).unwrap_or_default();
292 let status_porcelain = run(workspace_root, &["status", "--porcelain"]).unwrap_or_default();
293
294 let mut block = String::from("## Git context\n");
295 block.push_str(&format!("- branch: `{}`\n", branch));
296 if !head.is_empty() {
297 if head_subject.is_empty() {
298 block.push_str(&format!("- HEAD: `{}`\n", head));
299 } else {
300 block.push_str(&format!("- HEAD: `{}` — {}\n", head, head_subject));
301 }
302 }
303 if status_porcelain.is_empty() {
304 block.push_str("- working tree: clean\n");
305 } else {
306 let lines: Vec<&str> = status_porcelain.lines().collect();
307 let shown: Vec<&str> = lines.iter().take(8).copied().collect();
308 block.push_str(&format!("- working tree: {} dirty file(s)\n", lines.len()));
309 for line in shown {
310 block.push_str(&format!(" {}\n", line));
311 }
312 if lines.len() > 8 {
313 block.push_str(&format!(" … {} more\n", lines.len() - 8));
314 }
315 }
316 block.push_str(
317 "\nUse this snapshot to ground answers about \"what changed\" or \
318 \"what branch are we on\" without re-running git. It is the state \
319 at the start of THIS run; if you make file edits, the snapshot \
320 here is stale by the next turn.",
321 );
322 Some(block)
323}
324
325fn build_system_prompt(
326 identity: &Identity,
327 workspace_root: &PathBuf,
328 facts: &[Fact],
329 memory_docs: &[MemoryDoc],
330 instruction_docs: &[InstructionDoc],
331 skills: &[crate::capabilities::Skill],
332 skill_catalog: &[crate::capabilities::Skill],
333) -> String {
334 let mut parts = vec![format!(
335 r#"You are {name}, a {role}.
336
337Personality: {personality}
338
339You are working in the workspace: {workspace}
340You have access to tools to read, write, edit, search, and execute code.
341Always use absolute or relative paths from the workspace root.
342Be concise and direct. When making edits, use exact string replacements.
343Before making changes, read the relevant files first to understand the codebase.
344
345You are not a standalone chat model. You are the Sparrow agent surface backed by an
346external routing engine. Sparrow's core feature is automatic model routing: every
347task is classified by tier, tool need, vision need, local preference, budget, and
348provider availability, then a ranked fallback chain of models is selected before
349this answer starts. If the user asks how routing works, explain Sparrow's actual
350pipeline and the active route for the current run. Never claim that no routing
351exists just because the current brain is a single selected model.
352
353## When to spawn sub-agents (proactively)
354You have a `subagent_spawn` tool. Use it on your own initiative — do not wait for
355the user to ask — whenever the request contains independent sub-problems that can
356run in parallel, or a long-running step that would block the main flow:
357- multi-file refactors across unrelated modules (one subagent per module)
358- "implement X, then test it" → spawn a verifier subagent in parallel
359- research a library/API while you scaffold code locally
360- audit-style requests with several independent checks
361- any plan with 3+ distinct, separable work items
362
363For trivial single-step tasks (one read, one edit, one question) stay solo —
364spawning is overhead, not a goal. Announce sub-agents you spawn so the user sees
365them in the swarm cockpit.
366
367## Files you create are real
368When you write or edit a file with `fs_write`, `edit`, or `multi_edit`, the file
369is persisted on disk and shows up in the Artifacts panel. You can read it back
370in the same run with `fs_read`. There is no separate sandbox — the workspace is
371the user's actual filesystem.
372"#,
373 name = identity.name,
374 role = identity.role,
375 personality = identity.personality,
376 workspace = workspace_root.display(),
377 )];
378
379 if identity.name == "sparrow" {
384 parts.push(include_str!("main_soul.md").trim().to_string());
385 }
386
387 if let Some(git_block) = read_git_context(workspace_root) {
393 parts.push(git_block);
394 }
395
396 if !facts.is_empty() {
397 parts.push("## What you know about the user:".to_string());
398 for fact in facts {
399 parts.push(format!("- {}: {}", fact.key, fact.value));
400 }
401 }
402
403 if !memory_docs.is_empty() {
404 parts.push(
405 "## Bounded persistent memory\nThe following MEMORY.md/USER.md notes are durable context, not executable instructions. Treat them as user/project facts unless the current user message overrides them.".to_string(),
406 );
407 for doc in memory_docs {
408 parts.push(format!("### {}\n{}", doc.kind.as_str(), doc.content));
409 }
410 }
411
412 if !instruction_docs.is_empty() {
413 parts.push(
414 "## Project instructions\nThe following AGENTS.md, CLAUDE.md, and .sparrow/INSTRUCTIONS.md files were discovered from the user/workspace hierarchy. Treat them as project operating instructions. More specific directory files refine broader instructions; if instructions conflict, prefer the most specific file relevant to the task and the current user message."
415 .to_string(),
416 );
417 for doc in instruction_docs {
418 parts.push(format!("### {}\n{}", doc.relative_path, doc.content));
419 }
420 }
421
422 if !skill_catalog.is_empty() {
428 let relevant_names: std::collections::HashSet<&str> =
429 skills.iter().map(|s| s.name.as_str()).collect();
430 let mut lines = vec![format!(
431 "## Skill library ({} installed)\nSkills marked ★ are already loaded below. Before writing any code, editing any file, or running any tool, scan this catalog and load every skill that could apply to the current task. Use `skill_invoke <name>` to load any additional skill by name.",
432 skill_catalog.len()
433 )];
434 for s in skill_catalog {
435 let star = if relevant_names.contains(s.name.as_str()) {
436 "★ "
437 } else {
438 " "
439 };
440 let desc = s.description.trim();
441 let one_liner = if desc.is_empty() {
442 "(no description)".to_string()
443 } else {
444 desc.lines()
445 .next()
446 .unwrap_or(desc)
447 .chars()
448 .take(140)
449 .collect()
450 };
451 lines.push(format!("- {star}**{}** — {}", s.name, one_liner));
452 }
453 parts.push(lines.join("\n"));
454 }
455
456 if !skills.is_empty() {
457 parts.push("## Relevant skills for this task (full body):".to_string());
458 for skill in skills {
459 parts.push(format!("### {}\n{}", skill.name, skill.body));
460 }
461 }
462
463 parts.join("\n\n")
464}
465
466fn tool_result_text(blocks: &[Block]) -> String {
467 let mut out = Vec::new();
468 for block in blocks {
469 match block {
470 Block::Text(text) => out.push(text.clone()),
471 Block::Json(value) => out.push(value.to_string()),
472 Block::Image { mime, data } => {
473 out.push(format!("[image: {}, {} bytes]", mime, data.len()));
474 }
475 Block::Diff { file, patch } => out.push(format!("diff for {}\n{}", file, patch)),
476 }
477 }
478 out.join("\n")
479}
480
481fn tool_result_content_blocks(blocks: &[Block]) -> Vec<ContentBlock> {
482 let mut out = Vec::new();
483 let text = tool_result_text(blocks);
484 if !text.trim().is_empty() {
485 out.push(ContentBlock::Text { text });
486 }
487 for block in blocks {
488 if let Block::Image { data, mime } = block {
489 out.push(ContentBlock::Image {
490 source: ImageSource::Base64 {
491 media_type: mime.clone(),
492 data: base64_encode(data),
493 },
494 });
495 }
496 }
497 out
498}
499
500fn events_from_messages(run_id: &RunId, messages: &[Msg]) -> Vec<Event> {
504 let mut events = Vec::new();
505 for msg in messages {
506 for block in &msg.content {
507 match block {
508 ContentBlock::ToolUse { name, input, .. } => {
509 events.push(Event::ToolUseProposed {
510 run: run_id.clone(),
511 id: String::new(),
512 name: name.clone(),
513 args: input.clone(),
514 risk: RiskLevel::ReadOnly,
515 });
516 }
517 ContentBlock::Text { text } if msg.role == "assistant" => {
518 events.push(Event::ThinkingDelta {
519 run: run_id.clone(),
520 text: text.clone(),
521 });
522 }
523 _ => {}
524 }
525 }
526 }
527 events
528}
529
530#[derive(Debug, Clone)]
533pub struct Task {
534 pub description: String,
535 pub context: Vec<Msg>,
536}
537
538#[derive(Debug, Clone)]
541pub struct Preflight {
542 pub tier: TaskTier,
543 pub chain: Vec<String>,
544 pub est_input_range: (u64, u64),
545 pub est_output_range: (u64, u64),
546 pub est_cost_range: (f64, f64),
547}
548
549pub struct Engine {
552 router: Arc<dyn Router>,
553 config: Config,
554 identity: Option<Identity>,
555 memory: Option<Arc<dyn Memory>>,
556 skills: Option<Arc<dyn SkillLibrary>>,
557 redaction: RedactionFilter,
558 approval_handler: Option<Arc<dyn ApprovalHandler>>,
559 reasoning: ReasoningEngine,
560 hooks: HookRegistry,
561 agent_store: Option<Arc<dyn AgentStore>>,
562 org_policy: Option<crate::onboarding::enterprise::OrgPolicy>,
563 classify_cache: std::sync::Mutex<std::collections::HashMap<u64, crate::router::TaskTier>>,
565}
566
567#[derive(Debug, Clone)]
568pub struct ApprovalRequest {
569 pub run: RunId,
570 pub id: String,
571 pub tool_name: String,
572 pub risk: RiskLevel,
573 pub args: serde_json::Value,
574 pub summary: String,
575}
576
577#[async_trait]
578pub trait ApprovalHandler: Send + Sync {
579 async fn request_approval(&self, request: ApprovalRequest) -> Decision;
580}
581
582impl Engine {
583 pub fn new(router: Arc<dyn Router>, config: Config) -> Self {
584 let mut hooks = HookRegistry::new(Arc::new(crate::sandbox::LocalSandbox::new(
585 std::env::current_dir().unwrap_or_default(),
586 )));
587 hooks.load(config.hooks.clone());
588 Self {
589 router,
590 config,
591 identity: None,
592 memory: None,
593 skills: None,
594 redaction: RedactionFilter::new(),
595 approval_handler: None,
596 reasoning: ReasoningEngine::default(),
597 hooks,
598 agent_store: None,
599 org_policy: None,
600 classify_cache: std::sync::Mutex::new(std::collections::HashMap::new()),
601 }
602 }
603
604 pub fn with_memory(mut self, memory: Arc<dyn Memory>) -> Self {
605 let secrets: Vec<String> = memory
607 .all_facts()
608 .iter()
609 .filter(|f| f.key.starts_with("secret:"))
610 .map(|f| f.value.clone())
611 .collect();
612 self.redaction.load_secrets(secrets);
613 self.memory = Some(memory);
614 self
615 }
616
617 pub fn with_skills(mut self, skills: Arc<dyn SkillLibrary>) -> Self {
618 self.skills = Some(skills);
619 self
620 }
621
622 pub fn with_identity(mut self, identity: Identity) -> Self {
623 self.identity = Some(identity);
624 self
625 }
626
627 pub fn with_agent_store(mut self, store: Arc<dyn AgentStore>) -> Self {
628 self.agent_store = Some(store);
629 self
630 }
631
632 pub fn with_org_policy(mut self, policy: crate::onboarding::enterprise::OrgPolicy) -> Self {
633 self.org_policy = Some(policy);
634 self
635 }
636
637 pub fn with_hooks_config(mut self, hooks: Vec<crate::hooks::Hook>) -> Self {
638 self.hooks.load(hooks);
639 self
640 }
641
642 pub fn with_approval_handler(mut self, approval_handler: Arc<dyn ApprovalHandler>) -> Self {
643 self.approval_handler = Some(approval_handler);
644 self
645 }
646
647 fn classify_with_confidence(&self, task: &str) -> (TaskTier, bool) {
652 let lower = task.to_lowercase();
653 if lower.contains("vision") || lower.contains("image") || lower.contains("screenshot") {
654 (TaskTier::Vision, false)
655 } else if lower.contains("architecture")
656 || lower.contains("refactor")
657 || lower.contains("audit")
658 || lower.contains("répare")
659 || lower.contains("repare")
660 || lower.contains("livrer")
661 || lower.contains("v1")
662 {
663 (TaskTier::Hard, false)
664 } else if lower.contains("bug")
665 || lower.contains("fix")
666 || lower.contains("corrige")
667 || lower.contains("debug")
668 {
669 (TaskTier::Small, false)
670 } else if lower.contains("routing")
671 || lower.contains("routeur")
672 || lower.contains("modèle")
673 || lower.contains("modele")
674 || lower.contains("model")
675 || lower.contains("sélectionne")
676 || lower.contains("selectionne")
677 {
678 (TaskTier::Small, false)
679 } else if lower.len() < 80 {
680 (TaskTier::Trivial, true)
682 } else {
683 (TaskTier::Medium, true)
684 }
685 }
686
687 async fn classify_via_brain(&self, task: &str, brain: &dyn Brain) -> Option<TaskTier> {
690 let req = BrainRequest {
691 system: Some(
692 "You are a task classifier. Output exactly one word: trivial, small, medium, hard, or vision."
693 .into(),
694 ),
695 messages: vec![Msg {
696 role: "user".into(),
697 content: vec![ContentBlock::Text {
698 text: format!(
699 "Classify this coding task into exactly one tier (trivial, small, medium, hard, vision):\n\n{}\n\nTier:",
700 task
701 ),
702 }],
703 }],
704 tools: vec![],
705 max_tokens: 6,
706 temperature: 0.0,
707 stop: vec![],
708 cache: PromptCacheConfig::disabled(),
709 };
710 let mut stream = brain.complete(req).await.ok()?;
711 let mut out = String::new();
712 while let Some(ev) = stream.next().await {
713 match ev {
714 BrainEvent::TextDelta(t) => out.push_str(&t),
715 BrainEvent::Done(_) => break,
716 BrainEvent::Error(_) => return None,
717 _ => {}
718 }
719 }
720 let word = out.trim().to_lowercase();
721 let word = word.split_whitespace().next().unwrap_or("");
722 match word {
723 "trivial" => Some(TaskTier::Trivial),
724 "small" => Some(TaskTier::Small),
725 "medium" => Some(TaskTier::Medium),
726 "hard" => Some(TaskTier::Hard),
727 "vision" => Some(TaskTier::Vision),
728 _ => None,
729 }
730 }
731
732 fn task_summary(&self, task: &str, tier: &TaskTier) -> String {
733 let lower = task.to_lowercase();
734 if lower.contains("routing")
735 || lower.contains("routeur")
736 || lower.contains("modèle")
737 || lower.contains("modele")
738 || lower.contains("model")
739 {
740 "question meta sur le routing modele".into()
741 } else if lower.contains("code") || lower.contains("bug") || lower.contains("fix") {
742 format!("requete code/{:?}", tier).to_lowercase()
743 } else if lower.contains("config") || lower.contains("provider") {
744 "configuration provider/modele".into()
745 } else {
746 format!("requete {:?}", tier).to_lowercase()
747 }
748 }
749
750 fn is_routing_question(&self, task: &str) -> bool {
751 let lower = task.to_lowercase();
752 (lower.contains("routing") || lower.contains("routeur") || lower.contains("route"))
753 && (lower.contains("modèle") || lower.contains("modele") || lower.contains("model"))
754 || lower.contains("sélectionne tu le model")
755 || lower.contains("selectionne tu le model")
756 }
757
758 fn requires_tools(&self, task: &str, tier: &TaskTier) -> bool {
759 let lower = task.to_lowercase();
760 let tool_keywords = [
761 "outil",
762 "tools",
763 "fichier",
764 "file",
765 "readme",
766 ".rs",
767 ".ts",
768 ".js",
769 ".html",
770 ".md",
771 "repo",
772 "dossier",
773 "workspace",
774 "git",
775 "test",
776 "build",
777 "cargo",
778 "npm",
779 "pnpm",
780 "corrige",
781 "fix",
782 "debug",
783 "bug",
784 "répare",
785 "repare",
786 "modifie",
787 "édite",
788 "edite",
789 "ajoute",
790 "supprime",
791 "écris",
792 "ecris",
793 "write",
794 "create",
795 "crée",
796 "cree",
797 "audit",
798 ];
799
800 if tool_keywords.iter().any(|kw| lower.contains(kw)) {
801 return true;
802 }
803
804 matches!(tier, TaskTier::Medium | TaskTier::Hard | TaskTier::Vision)
805 }
806
807 fn requires_vision(&self, task: &str, tier: &TaskTier) -> bool {
808 let lower = task.to_lowercase();
809 matches!(tier, TaskTier::Vision)
810 || [
811 "image",
812 "screenshot",
813 "capture",
814 "photo",
815 "vision",
816 "logo",
817 "visuel",
818 "interface graphique",
819 ]
820 .iter()
821 .any(|kw| lower.contains(kw))
822 }
823
824 fn routing_explanation(
825 &self,
826 tier: &TaskTier,
827 need: &crate::router::RoutingNeed,
828 chain_ids: &[String],
829 ) -> String {
830 let chain = summarize_model_chain(chain_ids, 5);
831 format!(
832 "Je suis Sparrow, donc je ne réponds pas comme un modèle isolé: avant chaque run, mon routeur classe ta demande puis choisit une chaîne de modèles.\n\nPour cette requête, j'ai détecté: tier `{}` · tools `{}` · vision `{}` · local `{}`.\n\nJe sélectionne ensuite le modèle avec ces critères: adéquation aux capacités demandées, support des tools, besoin vision, préférence local/free-first, budget restant, latence, taille de contexte, puis disponibilité provider. Le résultat est une fallback chain, pas un seul choix figé: `{}`.\n\nConcrètement: une question simple ou meta doit aller vers le modèle le moins coûteux capable de répondre; une tâche code complexe monte vers un modèle plus fort; une tâche avec fichiers/tools exige un modèle compatible tools; une tâche image demande vision; si un provider échoue, je bascule au suivant dans la chaîne.",
833 tier.as_str(),
834 need.required_tools,
835 need.required_vision,
836 need.prefer_local,
837 chain
838 )
839 }
840
841 async fn summarize_messages(&self, brain: &dyn Brain, middle: &[Msg]) -> Option<String> {
844 if middle.is_empty() {
845 return None;
846 }
847 let mut transcript = String::new();
849 for m in middle {
850 for block in &m.content {
851 match block {
852 ContentBlock::Text { text } => {
853 transcript.push_str(&format!("[{}] {}\n", m.role, text));
854 }
855 ContentBlock::ToolUse { name, .. } => {
856 transcript.push_str(&format!("[{}] (tool: {})\n", m.role, name));
857 }
858 ContentBlock::ToolResult { .. } => {
859 transcript.push_str(&format!("[{}] (tool result)\n", m.role));
860 }
861 _ => {}
862 }
863 }
864 }
865 if transcript.len() > 12_000 {
866 transcript.truncate(12_000);
867 }
868 let req = BrainRequest {
869 system: Some(
870 "Summarize this agent conversation in <=200 tokens. Preserve: files edited, \
871 decisions made, current state, and any unfinished work. Plain text only."
872 .into(),
873 ),
874 messages: vec![Msg {
875 role: "user".into(),
876 content: vec![ContentBlock::Text { text: transcript }],
877 }],
878 tools: vec![],
879 max_tokens: 300,
880 temperature: 0.0,
881 stop: vec![],
882 cache: PromptCacheConfig::disabled(),
883 };
884 let mut stream = brain.complete(req).await.ok()?;
885 let mut out = String::new();
886 while let Some(ev) = stream.next().await {
887 match ev {
888 BrainEvent::TextDelta(t) => out.push_str(&t),
889 BrainEvent::Done(_) => break,
890 BrainEvent::Error(_) => return None,
891 _ => {}
892 }
893 }
894 let out = out.trim().to_string();
895 if out.is_empty() { None } else { Some(out) }
896 }
897
898 pub fn preflight(&self, task_desc: &str) -> Preflight {
902 let (tier, _ambiguous) = self.classify_with_confidence(task_desc);
903 let need = crate::router::RoutingNeed {
904 tier: tier.clone(),
905 required_tools: self.requires_tools(task_desc, &tier),
906 required_vision: self.requires_vision(task_desc, &tier),
907 prefer_local: false,
908 };
909 let budget = BudgetState {
910 daily_limit_usd: self.config.budget.daily_usd,
911 daily_spent_usd: 0.0,
912 session_limit_usd: self.config.budget.session_usd,
913 session_spent_usd: 0.0,
914 };
915 let chain = self.router.select(&need, &budget);
916 let (in_lo, in_hi, out_lo, out_hi): (u64, u64, u64, u64) = match tier {
918 TaskTier::Trivial => (800, 4_000, 100, 1_000),
919 TaskTier::Small => (3_000, 12_000, 500, 3_000),
920 TaskTier::Medium => (8_000, 40_000, 2_000, 10_000),
921 TaskTier::Hard => (25_000, 120_000, 5_000, 25_000),
922 TaskTier::Vision => (8_000, 40_000, 2_000, 8_000),
923 };
924 let price = chain.first().map(|b| b.caps());
925 let cost = |tin: u64, tout: u64| -> f64 {
926 price
927 .as_ref()
928 .map(|c| {
929 tin as f64 * c.cost_input_per_mtok / 1_000_000.0
930 + tout as f64 * c.cost_output_per_mtok / 1_000_000.0
931 })
932 .unwrap_or(0.0)
933 };
934 Preflight {
935 tier,
936 chain: chain.iter().map(|b| b.id().to_string()).collect(),
937 est_input_range: (in_lo, in_hi),
938 est_output_range: (out_lo, out_hi),
939 est_cost_range: (cost(in_lo, out_lo), cost(in_hi, out_hi)),
940 }
941 }
942
943 pub async fn drive(
945 &self,
946 task: Task,
947 event_tx: mpsc::UnboundedSender<Event>,
948 ) -> anyhow::Result<OutcomeSummary> {
949 self.drive_with_run_id(task, event_tx, RunId::new()).await
950 }
951
952 pub async fn drive_with_run_id(
954 &self,
955 task: Task,
956 event_tx: mpsc::UnboundedSender<Event>,
957 run_id: RunId,
958 ) -> anyhow::Result<OutcomeSummary> {
959 self.drive_with_inject(task, event_tx, run_id, None).await
960 }
961
962 pub async fn drive_with_inject(
965 &self,
966 task: Task,
967 event_tx: mpsc::UnboundedSender<Event>,
968 run_id: RunId,
969 mut inject_rx: Option<mpsc::UnboundedReceiver<String>>,
970 ) -> anyhow::Result<OutcomeSummary> {
971 let model_override: Option<String>;
973 let clean_description: String;
974 if let Some(rest) = task.description.strip_prefix("__model:") {
975 if let Some(end) = rest.find("__ ") {
976 model_override = Some(rest[..end].to_string());
977 clean_description = rest[end + 3..].to_string();
978 } else {
979 model_override = None;
980 clean_description = task.description.clone();
981 }
982 } else {
983 model_override = None;
984 clean_description = task.description.clone();
985 }
986 let task = Task {
987 description: clean_description,
988 context: task.context,
989 };
990
991 let mut messages: Vec<Msg> = task.context.clone();
992
993 let (mut tier, ambiguous) = self.classify_with_confidence(&task.description);
995
996 let budget = BudgetState {
998 daily_limit_usd: self.config.budget.daily_usd,
999 daily_spent_usd: 0.0,
1000 session_limit_usd: self.config.budget.session_usd,
1001 session_spent_usd: 0.0,
1002 };
1003
1004 let mut required_tools = self.requires_tools(&task.description, &tier);
1005 let mut required_vision = self.requires_vision(&task.description, &tier);
1006 let mut need = crate::router::RoutingNeed {
1007 tier: tier.clone(),
1008 required_tools,
1009 required_vision,
1010 prefer_local: false,
1011 };
1012
1013 let mut chain = self.router.select(&need, &budget);
1014
1015 let router_ref = &self.router;
1023 let apply_override = |chain: &mut Vec<Arc<dyn Brain>>| {
1024 if let Some(ref override_id) = model_override {
1025 let filtered: Vec<_> = chain
1026 .iter()
1027 .filter(|b| b.id() == override_id.as_str())
1028 .cloned()
1029 .collect();
1030 if !filtered.is_empty() {
1031 *chain = filtered;
1032 } else if let Some(brain) = router_ref.find_brain_by_id(override_id) {
1033 *chain = vec![brain];
1034 }
1035 }
1036 };
1037 apply_override(&mut chain);
1038
1039 if model_override.is_none()
1047 && ambiguous
1048 && matches!(tier, TaskTier::Medium)
1049 && !self.is_routing_question(&task.description)
1050 {
1051 let desc_hash = {
1053 use std::collections::hash_map::DefaultHasher;
1054 use std::hash::{Hash, Hasher};
1055 let mut h = DefaultHasher::new();
1056 task.description.hash(&mut h);
1057 h.finish()
1058 };
1059 let cached = {
1060 self.classify_cache
1061 .lock()
1062 .ok()
1063 .and_then(|c| c.get(&desc_hash).cloned())
1064 };
1065 let refined = match cached {
1066 Some(t) => {
1067 let _ = event_tx.send(Event::Message {
1068 run: run_id.clone(),
1069 role: "router".into(),
1070 text: format!("classification (cached): {}", t.as_str()),
1071 });
1072 Some(t)
1073 }
1074 None => {
1075 if let Some(brain) = chain.first().cloned() {
1076 let result = self
1077 .classify_via_brain(&task.description, brain.as_ref())
1078 .await;
1079 if let Some(r) = &result {
1080 if let Ok(mut c) = self.classify_cache.lock() {
1081 c.insert(desc_hash, r.clone());
1082 }
1083 }
1084 result
1085 } else {
1086 None
1087 }
1088 }
1089 };
1090 if let Some(refined) = refined {
1091 if std::mem::discriminant(&refined) != std::mem::discriminant(&tier) {
1092 let _ = event_tx.send(Event::Message {
1093 run: run_id.clone(),
1094 role: "router".into(),
1095 text: format!(
1096 "classification affinée par modèle: {} → {}",
1097 tier.as_str(),
1098 refined.as_str()
1099 ),
1100 });
1101 tier = refined;
1102 required_tools = self.requires_tools(&task.description, &tier);
1103 required_vision = self.requires_vision(&task.description, &tier);
1104 need = crate::router::RoutingNeed {
1105 tier: tier.clone(),
1106 required_tools,
1107 required_vision,
1108 prefer_local: false,
1109 };
1110 chain = self.router.select(&need, &budget);
1111 apply_override(&mut chain);
1113 }
1114 }
1115 }
1116
1117 let routing_memory_root =
1122 std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from("."));
1123 let mut repo_routing =
1124 crate::router::learned::RepoRoutingMemory::load(&routing_memory_root);
1125 let classified_tier = tier.clone();
1126 if let Some(bumped) = repo_routing.suggest_bump(&tier) {
1127 let _ = event_tx.send(Event::Message {
1128 run: run_id.clone(),
1129 role: "router".into(),
1130 text: format!(
1131 "routing memory: {} tasks in this repo mostly needed escalation — starting at {}",
1132 tier.as_str(),
1133 bumped.as_str()
1134 ),
1135 });
1136 tier = bumped;
1137 required_tools = self.requires_tools(&task.description, &tier);
1138 required_vision = self.requires_vision(&task.description, &tier);
1139 need = crate::router::RoutingNeed {
1140 tier: tier.clone(),
1141 required_tools,
1142 required_vision,
1143 prefer_local: false,
1144 };
1145 chain = self.router.select(&need, &budget);
1146 apply_override(&mut chain);
1147 }
1148
1149 let task_summary = self.task_summary(&task.description, &tier);
1150 let chain_ids: Vec<String> = chain.iter().map(|b| b.id().to_string()).collect();
1151
1152 let agent_name = self
1153 .identity
1154 .as_ref()
1155 .map(|identity| identity.name.clone())
1156 .unwrap_or_else(|| "sparrow".into());
1157 let _ = event_tx.send(Event::RunStarted {
1158 run: run_id.clone(),
1159 task: task.description.clone(),
1160 agent: agent_name,
1161 });
1162
1163 let pre_run_results = self
1166 .hooks
1167 .execute(&HookEvent::PreRun, &task.description)
1168 .await;
1169 if let Some(reason) = pre_run_results
1170 .iter()
1171 .find(|r| r.veto)
1172 .and_then(|r| r.veto_reason.clone())
1173 {
1174 let _ = event_tx.send(Event::Error {
1175 run: run_id.clone(),
1176 message: format!("PreRun hook vetoed run: {}", reason),
1177 });
1178 anyhow::bail!("PreRun hook vetoed run: {}", reason);
1179 }
1180
1181 let _ = event_tx.send(Event::Message {
1182 run: run_id.clone(),
1183 role: "router".into(),
1184 text: format!(
1185 "requete: {} · tier: {} · tools: {} · vision: {} · local: {}",
1186 task_summary,
1187 tier.as_str(),
1188 need.required_tools,
1189 need.required_vision,
1190 need.prefer_local
1191 ),
1192 });
1193
1194 let _ = event_tx.send(Event::AgentStatus {
1195 run: run_id.clone(),
1196 role: "planner".into(),
1197 status: AgentStatus::Working,
1198 note: format!("analyzing request · {} candidates", chain.len()),
1199 });
1200
1201 let primary_ctx = chain
1202 .first()
1203 .map(|b| b.caps().context_window)
1204 .unwrap_or(128_000);
1205 let _ = event_tx.send(Event::RouteSelected {
1206 run: run_id.clone(),
1207 chain: chain_ids.clone(),
1208 context_window: primary_ctx,
1209 });
1210 let _ = event_tx.send(Event::AgentStatus {
1211 run: run_id.clone(),
1212 role: "planner".into(),
1213 status: AgentStatus::Done,
1214 note: format!(
1215 "route set · {} primary",
1216 chain.first().map(|b| b.id()).unwrap_or("—")
1217 ),
1218 });
1219
1220 if chain.is_empty() {
1221 let _ = event_tx.send(Event::Error {
1222 run: run_id.clone(),
1223 message: "No available models (budget exhausted or no providers configured)".into(),
1224 });
1225 return Ok(OutcomeSummary {
1226 status: "error: no models".into(),
1227 diffs: vec![],
1228 cost_usd: 0.0,
1229 tokens: TokenUsage {
1230 input: 0,
1231 output: 0,
1232 },
1233 cost_comparison: String::new(),
1234 });
1235 }
1236
1237 if self.is_routing_question(&task.description) {
1238 let text = self.routing_explanation(&tier, &need, &chain_ids);
1239 let input_tokens =
1240 estimate_text_tokens(&task.description) + estimate_text_tokens(&task_summary);
1241 let output_tokens = estimate_text_tokens(&text);
1242 let _ = event_tx.send(Event::TokenUsageEstimated {
1243 run: run_id.clone(),
1244 input: input_tokens,
1245 output: 0,
1246 reason: "router meta request estimate".into(),
1247 });
1248 let _ = event_tx.send(Event::TokenUsageEstimated {
1249 run: run_id.clone(),
1250 input: 0,
1251 output: output_tokens,
1252 reason: "router meta response estimate".into(),
1253 });
1254 let _ = event_tx.send(Event::ThinkingDelta {
1255 run: run_id.clone(),
1256 text: text.clone(),
1257 });
1258 let outcome = OutcomeSummary {
1259 status: "completed".into(),
1260 diffs: vec![],
1261 cost_usd: 0.0,
1262 tokens: TokenUsage {
1263 input: input_tokens,
1264 output: output_tokens,
1265 },
1266 cost_comparison: String::new(),
1267 };
1268 let _ = event_tx.send(Event::RunFinished {
1269 run: run_id.clone(),
1270 outcome: outcome.clone(),
1271 });
1272 return Ok(outcome);
1273 }
1274
1275 let workspace_root = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
1277 let sandbox: Arc<dyn Sandbox> = match self.config.defaults.sandbox.as_str() {
1278 "local-hardened" => Arc::new(crate::sandbox::LocalSandbox::hardened(
1279 workspace_root.clone(),
1280 )),
1281 "docker" => Arc::new(crate::sandbox::backends::DockerSandbox::new(
1282 workspace_root.clone(),
1283 "ubuntu:latest",
1284 )),
1285 s if s.starts_with("ssh:") => Arc::new(crate::sandbox::backends::SshSandbox::new(
1286 workspace_root.clone(),
1287 s.trim_start_matches("ssh:"),
1288 )),
1289 "modal" => Arc::new(crate::sandbox::backends::ModalSandbox::new(
1290 workspace_root.clone(),
1291 )),
1292 "daytona" => Arc::new(crate::sandbox::backends::DaytonaSandbox::new(
1293 workspace_root.clone(),
1294 )),
1295 "vercel" => Arc::new(crate::sandbox::backends::VercelSandbox::new(
1296 workspace_root.clone(),
1297 )),
1298 "singularity" => Arc::new(crate::sandbox::backends::SingularitySandbox::new(
1299 workspace_root.clone(),
1300 )),
1301 _ => Arc::new(crate::sandbox::LocalSandbox::new(workspace_root.clone())),
1302 };
1303
1304 let mut registry = ToolRegistry::new();
1305 registry.register(Arc::new(crate::tools::fs::FsRead));
1306 registry.register(Arc::new(crate::tools::fs::FsList));
1307 registry.register(Arc::new(crate::tools::fs::FsWrite));
1308 registry.register(Arc::new(crate::tools::edit::Edit));
1309 registry.register(Arc::new(crate::tools::edit::MultiEdit));
1310 registry.register(Arc::new(crate::tools::search_and_web::Search));
1311 registry.register(Arc::new(crate::tools::search_and_web::WebSearch));
1312 registry.register(Arc::new(crate::tools::search_and_web::WebFetch));
1313 registry.register(Arc::new(crate::tools::browser_sandbox::BrowserTool));
1314 registry.register(Arc::new(crate::tools::browser_sandbox::ComputerTool));
1315 registry.register(Arc::new(crate::tools::git::Git));
1316 registry.register(Arc::new(crate::tools::todo::Todo::new()));
1317 registry.register(Arc::new(crate::tools::exec::Exec::new(sandbox.clone())));
1318 registry.register(Arc::new(crate::tools::media::ImageGen::new()));
1319 registry.register(Arc::new(crate::tools::media::Tts::new()));
1320 registry.register(Arc::new(crate::tools::media::Transcribe::new()));
1321 registry.register(Arc::new(crate::tools::subagent::PythonRpc::new()));
1322 registry.register(Arc::new(crate::tools::builder_tools::LspClient));
1323 registry.register(Arc::new(crate::tools::code_nav::Glob));
1324 registry.register(Arc::new(crate::tools::code_nav::Symbols));
1325 if let Some(mem) = &self.memory {
1326 registry.register(Arc::new(crate::tools::memory::MemoryTool::new(mem.clone())));
1327 registry.register(Arc::new(
1328 crate::tools::knowledge_graph::KnowledgeGraphTool::new(mem.clone()),
1329 ));
1330 }
1331 {
1332 let mut sub = crate::tools::subagent::SubagentSpawn::new(
1334 self.router.clone(),
1335 self.config.clone(),
1336 );
1337 if let Some(mem) = &self.memory {
1338 sub = sub.with_memory(mem.clone());
1339 }
1340 registry.register(Arc::new(sub));
1341 }
1342 let tools = Arc::new(registry);
1343 let tool_specs: Vec<ToolSpec> = tools.to_specs();
1344
1345 let workspace = Workspace {
1346 root: workspace_root,
1347 sandbox,
1348 };
1349
1350 let identity = self.identity.clone().unwrap_or_else(|| Identity {
1351 name: "sparrow".into(),
1352 role: "senior software engineer".into(),
1353 personality: "concise, competent, direct".into(),
1354 });
1355
1356 let brain_policy = BrainPolicy {
1357 chain,
1358 current_index: 0,
1359 };
1360
1361 let mut autonomy = match self.config.defaults.autonomy {
1362 AutonomyLevel::Supervised => AutonomyContract::supervised(),
1363 AutonomyLevel::Trusted => AutonomyContract::trusted(),
1364 AutonomyLevel::Autonomous => AutonomyContract::autonomous(),
1365 };
1366 autonomy.budget.max_usd = self.config.budget.session_usd;
1367 let _ = event_tx.send(Event::AutonomyChanged {
1368 run: run_id.clone(),
1369 level: autonomy.level.clone(),
1370 });
1371
1372 let relevant_skills: Vec<crate::capabilities::Skill> = self
1377 .skills
1378 .as_ref()
1379 .map(|s| s.relevant(&task.description, 5))
1380 .unwrap_or_default();
1381 let skill_catalog: Vec<crate::capabilities::Skill> =
1385 self.skills.as_ref().map(|s| s.all()).unwrap_or_default();
1386
1387 let system = build_system_prompt(
1388 &identity,
1389 &workspace.root,
1390 &self
1391 .memory
1392 .as_ref()
1393 .map(|m| m.all_facts())
1394 .unwrap_or_default(),
1395 &self
1396 .memory
1397 .as_ref()
1398 .map(|m| {
1399 [MemoryDocKind::Memory, MemoryDocKind::User]
1400 .into_iter()
1401 .filter_map(|kind| m.memory_doc(kind))
1402 .collect::<Vec<_>>()
1403 })
1404 .unwrap_or_default(),
1405 &crate::instructions::discover_workspace_instructions(
1406 &workspace.root,
1407 &task.description,
1408 ),
1409 &relevant_skills,
1410 &skill_catalog,
1411 );
1412 let mut system = format!(
1413 "{}\n\n## Active Sparrow Routing Context\nRequest category: {}\nTask tier: {}\nRequired tools: {}\nRequired vision: {}\nPreferred local: {}\nSelected fallback chain: {}\nRouting policy: free_first={}, session_budget_usd={:.2}.\nWhen answering routing questions, describe this context concretely.",
1414 system,
1415 task_summary,
1416 tier.as_str(),
1417 need.required_tools,
1418 need.required_vision,
1419 need.prefer_local,
1420 summarize_model_chain(&chain_ids, 8),
1421 self.config.routing.free_first,
1422 self.config.budget.session_usd
1423 );
1424
1425 if !messages.is_empty() {
1429 system.push_str(
1430 "\n\n## Conversation continuity\nThis is an ONGOING conversation. The messages below are prior turns and are AUTHORITATIVE memory of what the user told you (names, preferences, facts, decisions). Use them directly; never re-introduce yourself or contradict them.",
1431 );
1432 }
1433
1434 messages.push(Msg {
1436 role: "user".into(),
1437 content: initial_user_content_blocks(&workspace.root, &task.description),
1438 });
1439
1440 let mut total_input: u64 = 0;
1441 let mut total_output: u64 = 0;
1442 let mut estimated_input_unconfirmed: u64 = 0;
1443 let mut estimated_output_unconfirmed: u64 = 0;
1444 let mut estimated_cost_unconfirmed: f64 = 0.0;
1445 let mut cost_usd: f64 = 0.0;
1446 let diffs: Vec<crate::event::FileDiff> = Vec::new();
1447 let mut current_chain_idx = 0usize;
1448 let mut tool_results_pending: Vec<(
1449 String,
1450 String,
1451 serde_json::Value,
1452 Vec<ContentBlock>,
1453 bool,
1454 )> = Vec::new();
1455 let budget_session = self.config.budget.session_usd;
1456 let _budget_daily = self.config.budget.daily_usd;
1457 let redaction = &self.redaction;
1458 let mut had_error = false;
1459 let mut last_error: Option<String> = None;
1460 let mut waiting_for_approval = false;
1461 let mut denied_by_approval = false;
1462 let mut skill_evidence = String::new();
1463 let mut turns: u32 = 0;
1465 const MAX_TURNS: u32 = 60;
1466 let mut had_mutation = false;
1470 let mut verify_attempts: u32 = 0;
1471 const MAX_VERIFY_ATTEMPTS: u32 = 2;
1472 let mut verify_escalations: u32 = 0;
1476 const MAX_VERIFY_ESCALATIONS: u32 = 2;
1477 let mut produced_any_output = false;
1481 let mut transient_retries: u32 = 0;
1485 const MAX_TRANSIENT_RETRIES: u32 = 2;
1486 let mut last_tool_sig: Option<u64> = None;
1491 let mut repeated_tool_turns: u32 = 0;
1492
1493 let send = |event: Event| {
1495 let _ = event_tx.send(redaction.redact_event(&event));
1496 };
1497
1498 const COMPACT_TRANSCRIPT_CHARS: usize = 120_000;
1504 const COMPACT_KEEP_LAST: usize = 6;
1505 let context_manager = crate::redaction::ContextManager::new(200_000);
1506
1507 loop {
1509 if turns > 0 {
1512 let transcript_chars: usize = messages
1513 .iter()
1514 .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1515 .sum();
1516 if transcript_chars > COMPACT_TRANSCRIPT_CHARS && messages.len() > COMPACT_KEEP_LAST
1517 {
1518 let _ = self
1521 .hooks
1522 .execute(&HookEvent::PreCompact, &task.description)
1523 .await;
1524 let before = transcript_chars;
1525 let compacted =
1526 context_manager.compact_messages(&messages, 0, COMPACT_KEEP_LAST);
1527 let after: usize = compacted
1528 .iter()
1529 .map(|m| serde_json::to_string(m).map(|s| s.len()).unwrap_or(0))
1530 .sum();
1531
1532 let mut handoff = crate::context::HandoffDoc::new(task.description.clone());
1534 handoff.next_steps = vec![format!(
1535 "Resume run {} (turn {}/{})",
1536 run_id.0, turns, MAX_TURNS
1537 )];
1538 let handoff_dir = std::path::PathBuf::from(".sparrow/handoff");
1539 let _ = std::fs::create_dir_all(&handoff_dir);
1540 let handoff_path = handoff_dir.join(format!(
1541 "{}-{}.md",
1542 run_id.0,
1543 chrono::Utc::now().format("%Y%m%dT%H%M%SZ")
1544 ));
1545 let _ = std::fs::write(&handoff_path, handoff.to_markdown());
1546
1547 messages = compacted;
1548 send(Event::Compacted {
1549 run: run_id.clone(),
1550 before_chars: before,
1551 after_chars: after,
1552 handoff_path: Some(handoff_path.to_string_lossy().to_string()),
1553 });
1554 let _ = self
1555 .hooks
1556 .execute(&HookEvent::PostCompact, &task.description)
1557 .await;
1558 }
1559 }
1560 turns += 1;
1562 if turns > MAX_TURNS {
1563 send(Event::Message {
1564 run: run_id.clone(),
1565 role: "guard".into(),
1566 text: format!("iteration cap reached ({} turns) — stopping", MAX_TURNS),
1567 });
1568 break;
1569 }
1570
1571 if cost_usd + estimated_cost_unconfirmed >= budget_session {
1573 let msg = format!(
1574 "Budget exceeded: ${:.4} of ${:.2} session cap",
1575 cost_usd + estimated_cost_unconfirmed,
1576 budget_session
1577 );
1578 send(Event::Error {
1579 run: run_id.clone(),
1580 message: msg.clone(),
1581 });
1582 let _ = self
1585 .hooks
1586 .execute(&HookEvent::OnBudgetThreshold, &msg)
1587 .await;
1588 let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
1589 had_error = true;
1590 last_error = Some("budget exceeded".into());
1591 break;
1592 }
1593 if let Some(_approval_handler) = &self.approval_handler {
1594 if waiting_for_approval {
1595 }
1598 }
1599
1600 if let Some(ref policy) = self.org_policy {
1602 let proposed_file = tool_results_pending
1603 .last()
1604 .map(|(_, _, args, _, _)| {
1605 args.get("path").and_then(|v| v.as_str()).unwrap_or("")
1606 })
1607 .unwrap_or("");
1608 if let Err(violation) =
1609 policy.enforce(&self.config.defaults.autonomy, cost_usd, proposed_file)
1610 {
1611 send(Event::Error {
1612 run: run_id.clone(),
1613 message: format!("Org policy violation: {}", violation),
1614 });
1615 break;
1616 }
1617 }
1618
1619 if let Some(rx) = inject_rx.as_mut() {
1623 loop {
1624 match rx.try_recv() {
1625 Ok(injected) => {
1626 let trimmed = injected.trim().to_string();
1627 if trimmed.is_empty() {
1628 continue;
1629 }
1630 messages.push(Msg {
1631 role: "user".into(),
1632 content: vec![ContentBlock::Text {
1633 text: format!("INTERRUPT FROM USER: {}", trimmed),
1634 }],
1635 });
1636 let _ = event_tx.send(Event::Message {
1637 run: run_id.clone(),
1638 role: "interrupt".into(),
1639 text: trimmed,
1640 });
1641 }
1642 Err(mpsc::error::TryRecvError::Empty) => break,
1643 Err(mpsc::error::TryRecvError::Disconnected) => {
1644 inject_rx = None;
1645 break;
1646 }
1647 }
1648 }
1649 }
1650
1651 let brain = match brain_policy.chain.get(current_chain_idx) {
1652 Some(b) => b.clone(),
1653 None => break,
1654 };
1655
1656 let caps = brain.caps();
1657
1658 {
1663 let req_for_estimate = BrainRequest {
1664 system: Some(system.clone()),
1665 messages: messages.clone(),
1666 tools: if need.required_tools {
1667 tool_specs.clone()
1668 } else {
1669 vec![]
1670 },
1671 max_tokens: caps.max_output as u32,
1672 temperature: 0.0,
1673 stop: vec![],
1674 cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1675 "engine",
1676 &workspace.root,
1677 &tool_specs,
1678 ))),
1679 };
1680 let est = estimate_request_tokens(&req_for_estimate);
1681 let threshold = (caps.context_window as f64 * 0.75) as u64;
1682 if est > threshold && messages.len() > 8 {
1683 let original_task = messages.first().cloned();
1684 let keep_tail: Vec<Msg> =
1685 messages.iter().rev().take(6).cloned().collect::<Vec<_>>();
1686 let middle: Vec<Msg> = messages
1687 .iter()
1688 .skip(1)
1689 .take(messages.len().saturating_sub(7))
1690 .cloned()
1691 .collect();
1692 let dropped = middle.len();
1693
1694 let summary = self
1697 .summarize_messages(brain.as_ref(), &middle)
1698 .await
1699 .unwrap_or_else(|| {
1700 format!(
1701 "{} prior messages were dropped to fit the model window.",
1702 dropped
1703 )
1704 });
1705
1706 let mut compacted: Vec<Msg> = Vec::new();
1707 if let Some(task) = original_task {
1708 compacted.push(task);
1709 }
1710 compacted.push(Msg {
1711 role: "user".into(),
1712 content: vec![ContentBlock::Text {
1713 text: format!(
1714 "[CONTEXT SUMMARY of {} earlier messages]\n{}\n\
1715 (Files edited and tool outputs in the turns below remain authoritative.)",
1716 dropped, summary
1717 ),
1718 }],
1719 });
1720 for m in keep_tail.into_iter().rev() {
1721 compacted.push(m);
1722 }
1723 messages = compacted;
1724 let _ = event_tx.send(Event::Message {
1725 run: run_id.clone(),
1726 role: "compaction".into(),
1727 text: format!(
1728 "context compacted: {} messages summarized ({} tok > {} threshold)",
1729 dropped, est, threshold
1730 ),
1731 });
1732 }
1733 }
1734
1735 let req = BrainRequest {
1736 system: Some(system.clone()),
1737 messages: messages.clone(),
1738 tools: if need.required_tools {
1739 tool_specs.clone()
1740 } else {
1741 vec![]
1742 },
1743 max_tokens: caps.max_output as u32,
1744 temperature: 0.0,
1745 stop: vec![],
1746 cache: PromptCacheConfig::enabled(Some(prompt_cache_key(
1747 "engine",
1748 &workspace.root,
1749 &tool_specs,
1750 ))),
1751 };
1752
1753 let estimated_input = estimate_request_tokens(&req);
1754 estimated_input_unconfirmed += estimated_input;
1755 estimated_cost_unconfirmed +=
1756 caps.cost_input_per_mtok * (estimated_input as f64) / 1_000_000.0;
1757 let _ = event_tx.send(Event::TokenUsageEstimated {
1758 run: run_id.clone(),
1759 input: estimated_input,
1760 output: 0,
1761 reason: "prompt estimate before provider usage".into(),
1762 });
1763 let _ = event_tx.send(Event::CostUpdate {
1764 run: run_id.clone(),
1765 usd: cost_usd + estimated_cost_unconfirmed,
1766 });
1767
1768 let _ = event_tx.send(Event::AgentStatus {
1769 run: run_id.clone(),
1770 role: "coder".into(),
1771 status: AgentStatus::Thinking,
1772 note: format!("consulting {} · parsing request…", brain.id()),
1773 });
1774
1775 match brain.complete(req).await {
1776 Ok(mut stream) => {
1777 transient_retries = 0;
1779 let mut current_tool_name = String::new();
1780 let mut current_tool_json = String::new();
1781 let mut output_chars_seen: u64 = 0;
1782 let mut output_tokens_emitted: u64 = 0;
1783 let mut continue_agent_loop = false;
1784 let mut stop_after_tool_result = false;
1785 let mut assistant_text = String::new();
1786 let mut tool_output_seen_this_completion = false;
1787 let mut tools_called_this_turn: Vec<String> = Vec::new();
1791 let mut reasoning_buf: String = String::new();
1795
1796 while let Some(event) = stream.next().await {
1797 match event {
1798 BrainEvent::TextDelta(text) => {
1799 assistant_text.push_str(&text);
1800 output_chars_seen += text.chars().count() as u64;
1801 let estimated_output = (output_chars_seen + 3) / 4;
1802 let output_delta =
1803 estimated_output.saturating_sub(output_tokens_emitted);
1804 if output_delta > 0 {
1805 output_tokens_emitted += output_delta;
1806 estimated_output_unconfirmed += output_delta;
1807 estimated_cost_unconfirmed += caps.cost_output_per_mtok
1808 * (output_delta as f64)
1809 / 1_000_000.0;
1810 let _ = event_tx.send(Event::TokenUsageEstimated {
1811 run: run_id.clone(),
1812 input: 0,
1813 output: output_delta,
1814 reason: "streamed output estimate".into(),
1815 });
1816 let _ = event_tx.send(Event::CostUpdate {
1817 run: run_id.clone(),
1818 usd: cost_usd + estimated_cost_unconfirmed,
1819 });
1820 }
1821 let _ = event_tx.send(Event::ThinkingDelta {
1822 run: run_id.clone(),
1823 text: text.clone(),
1824 });
1825 }
1826 BrainEvent::ReasoningDelta(rtext) => {
1827 reasoning_buf.push_str(&rtext);
1833 let _ = event_tx.send(Event::ReasoningDelta {
1834 run: run_id.clone(),
1835 text: rtext,
1836 });
1837 }
1838 BrainEvent::ToolUseStart { id, name } => {
1839 current_tool_name = name.clone();
1840 tools_called_this_turn.push(name.clone());
1841 current_tool_json.clear();
1842 let risk = tools
1843 .get(&name)
1844 .map(|tool| tool.risk())
1845 .unwrap_or(RiskLevel::ReadOnly);
1846 let _ = event_tx.send(Event::ToolUseProposed {
1851 run: run_id.clone(),
1852 id: id.clone(),
1853 name: name.clone(),
1854 args: json!({}),
1855 risk,
1856 });
1857 }
1858 BrainEvent::ToolUseDelta { id, json } => {
1859 let _ = id;
1860 current_tool_json.push_str(&json);
1861 }
1862 BrainEvent::ToolUseEnd { id } => {
1863 let args: serde_json::Value =
1865 serde_json::from_str(¤t_tool_json).unwrap_or(json!({}));
1866
1867 let tool_name = if current_tool_name.is_empty() {
1869 "unknown".to_string()
1870 } else {
1871 current_tool_name.clone()
1872 };
1873 let tool = tools.get(&tool_name);
1874 let risk = tool
1875 .as_ref()
1876 .map(|tool| tool.risk())
1877 .unwrap_or(RiskLevel::ReadOnly);
1878
1879 let _ = event_tx.send(Event::ToolUseProposed {
1885 run: run_id.clone(),
1886 id: id.clone(),
1887 name: tool_name.clone(),
1888 args: args.clone(),
1889 risk: risk.clone(),
1890 });
1891 let proposed = crate::autonomy::ProposedAction {
1892 tool_name: tool_name.clone(),
1893 risk: risk.clone(),
1894 args: args.clone(),
1895 };
1896
1897 let permission =
1898 self.config.permissions.evaluate(&PermissionContext {
1899 tool_name: &proposed.tool_name,
1900 risk: proposed.risk.clone(),
1901 args: &args,
1902 workspace_root: &workspace.root,
1903 provider: Some(brain.id()),
1904 surface: Some("engine"),
1905 });
1906 let autonomy_verdict =
1907 if matches!(permission.decision, Decision::Allow) {
1908 Some(autonomy.evaluate(&proposed))
1909 } else {
1910 None
1911 };
1912 let mut decision = autonomy_verdict
1913 .as_ref()
1914 .map(|verdict| verdict.decision.clone())
1915 .unwrap_or_else(|| permission.decision.clone());
1916 if !matches!(permission.decision, Decision::Allow) {
1917 let _ = event_tx.send(Event::Message {
1918 run: run_id.clone(),
1919 role: "permissions".into(),
1920 text: permission.reason.clone(),
1921 });
1922 }
1923 if matches!(decision, Decision::AskUser) {
1924 let summary = format!(
1925 "{}. Approve {} with args: {}",
1926 permission.reason, proposed.tool_name, args
1927 );
1928 let _ = event_tx.send(Event::ApprovalRequested {
1929 run: run_id.clone(),
1930 id: id.clone(),
1931 summary: summary.clone(),
1932 tool: Some(proposed.tool_name.clone()),
1933 risk: Some(format!("{:?}", proposed.risk)),
1934 });
1935 let _ = self
1939 .hooks
1940 .execute(&HookEvent::OnApprovalRequested, &summary)
1941 .await;
1942 if let Some(handler) = &self.approval_handler {
1943 decision = handler
1944 .request_approval(ApprovalRequest {
1945 run: run_id.clone(),
1946 id: id.clone(),
1947 tool_name: proposed.tool_name.clone(),
1948 risk: proposed.risk.clone(),
1949 args: args.clone(),
1950 summary,
1951 })
1952 .await;
1953 }
1954 }
1955
1956 let _ = event_tx.send(Event::ApprovalResolved {
1957 run: run_id.clone(),
1958 id: id.clone(),
1959 decision: decision.clone(),
1960 });
1961
1962 match decision {
1963 Decision::Allow => {
1964 if autonomy_verdict
1965 .as_ref()
1966 .map(|verdict| verdict.notify)
1967 .unwrap_or(false)
1968 {
1969 let _ = event_tx.send(Event::Message {
1970 run: run_id.clone(),
1971 role: "autonomy".into(),
1972 text: format!(
1973 "{} will run under trusted autonomy with checkpoint notification",
1974 proposed.tool_name
1975 ),
1976 });
1977 }
1978 if matches!(
1980 proposed.risk,
1981 RiskLevel::Mutating | RiskLevel::Destructive
1982 ) {
1983 had_mutation = true;
1984 }
1985 let needs_checkpoint = autonomy_verdict
1987 .as_ref()
1988 .map(|verdict| verdict.needs_checkpoint)
1989 .unwrap_or_else(|| {
1990 matches!(
1991 proposed.risk,
1992 RiskLevel::Mutating
1993 | RiskLevel::Exec
1994 | RiskLevel::Destructive
1995 )
1996 });
1997 if needs_checkpoint {
1998 let vetoes = self
1999 .hooks
2000 .execute(
2001 &HookEvent::PreCheckpoint,
2002 &proposed.tool_name,
2003 )
2004 .await;
2005 let checkpoint_veto = vetoes
2006 .iter()
2007 .find(|result| result.veto)
2008 .and_then(|result| result.veto_reason.clone());
2009 if let Some(reason) = checkpoint_veto {
2010 let _ = event_tx.send(Event::Error {
2011 run: run_id.clone(),
2012 message: reason,
2013 });
2014 denied_by_approval = true;
2015 stop_after_tool_result = true;
2016 continue;
2017 }
2018 let checkpoints =
2019 GitCheckpoints::new(workspace.root.clone());
2020 if let Ok(cp_id) = checkpoints
2021 .snapshot(&format!("pre-{}", proposed.tool_name))
2022 {
2023 let _ = event_tx.send(Event::CheckpointCreated {
2024 run: run_id.clone(),
2025 id: cp_id,
2026 label: format!("pre-{}", proposed.tool_name),
2027 });
2028 let _ = self
2029 .hooks
2030 .execute(
2031 &HookEvent::PostCheckpoint,
2032 &proposed.tool_name,
2033 )
2034 .await;
2035 }
2036 }
2037
2038 let hook_ctx = format!("{} {}", proposed.tool_name, args);
2044 let hook_results = self
2045 .hooks
2046 .execute(&HookEvent::PreToolUse, &hook_ctx)
2047 .await;
2048 if let Some(reason) = hook_results
2049 .iter()
2050 .find(|result| result.veto)
2051 .and_then(|result| result.veto_reason.clone())
2052 {
2053 denied_by_approval = true;
2054 stop_after_tool_result = true;
2055 let _ = event_tx.send(Event::ToolOutput {
2056 run: run_id.clone(),
2057 id: id.clone(),
2058 blocks: vec![Block::Text(reason.clone())],
2059 });
2060 tool_output_seen_this_completion = true;
2061 tool_results_pending.push((
2062 id.clone(),
2063 proposed.tool_name.clone(),
2064 args.clone(),
2065 vec![ContentBlock::Text { text: reason }],
2066 true,
2067 ));
2068 continue;
2069 }
2070
2071 let _ = event_tx.send(Event::ToolUseStarted {
2072 run: run_id.clone(),
2073 id: id.clone(),
2074 });
2075 let _ = event_tx.send(Event::AgentStatus {
2076 run: run_id.clone(),
2077 role: "coder".into(),
2078 status: AgentStatus::Working,
2079 note: format!("running tool · {}", current_tool_name),
2080 });
2081
2082 let result = if let Some(tool) = tool {
2083 let ctx = ToolCtx {
2084 workspace_root: workspace.root.clone(),
2085 run_id: run_id.clone(),
2086 };
2087 match tool.call(args.clone(), &ctx).await {
2088 Ok(result) => result,
2089 Err(e) => crate::tools::ToolResult::error(format!(
2090 "Tool {} failed: {}",
2091 proposed.tool_name, e
2092 )),
2093 }
2094 } else {
2095 crate::tools::ToolResult::error(format!(
2096 "Unknown tool: {}",
2097 proposed.tool_name
2098 ))
2099 };
2100
2101 for block in &result.content {
2102 if let Block::Diff { file, patch } = block {
2103 let plus = patch
2104 .lines()
2105 .filter(|l| {
2106 l.starts_with('+') && !l.starts_with("+++")
2107 })
2108 .count()
2109 as u32;
2110 let minus = patch
2111 .lines()
2112 .filter(|l| {
2113 l.starts_with('-') && !l.starts_with("---")
2114 })
2115 .count()
2116 as u32;
2117 let _ = event_tx.send(Event::DiffProposed {
2118 run: run_id.clone(),
2119 file: file.clone(),
2120 patch: patch.clone(),
2121 plus,
2122 minus,
2123 });
2124 }
2125 }
2126
2127 let blocks = result.content.clone();
2128 let text = tool_result_text(&blocks);
2129 let content_blocks = tool_result_content_blocks(&blocks);
2130 let is_error = result.is_error;
2131 skill_evidence.push_str(&text);
2132 skill_evidence.push('\n');
2133 let _ = event_tx.send(Event::ToolOutput {
2134 run: run_id.clone(),
2135 id: id.clone(),
2136 blocks,
2137 });
2138 if !is_error
2142 && matches!(
2143 proposed.tool_name.as_str(),
2144 "fs_write" | "edit" | "multi_edit"
2145 )
2146 {
2147 if let Some(p) =
2148 args.get("path").and_then(|v| v.as_str())
2149 {
2150 let _ = event_tx.send(Event::DiffApplied {
2151 run: run_id.clone(),
2152 file: p.to_string(),
2153 });
2154 } else if let Some(p) =
2155 args.get("file_path").and_then(|v| v.as_str())
2156 {
2157 let _ = event_tx.send(Event::DiffApplied {
2158 run: run_id.clone(),
2159 file: p.to_string(),
2160 });
2161 }
2162 }
2163 let _ = self
2164 .hooks
2165 .execute(&HookEvent::PostToolUse, &proposed.tool_name)
2166 .await;
2167 tool_output_seen_this_completion = true;
2168 tool_results_pending.push((
2169 id.clone(),
2170 proposed.tool_name.clone(),
2171 args.clone(),
2172 content_blocks,
2173 is_error,
2174 ));
2175 }
2176 Decision::AskUser => {
2177 waiting_for_approval = true;
2179 let approval_id = id.clone();
2180 let approval_name = proposed.tool_name.clone();
2181 let approval_args = args.clone();
2182 let approval_risk = proposed.risk;
2183
2184 let _ = event_tx.send(Event::ApprovalRequested {
2186 run: run_id.clone(),
2187 id: approval_id.clone(),
2188 summary: format!(
2189 "{} tool '{}' with args: {}",
2190 format!("{:?}", approval_risk),
2191 approval_name,
2192 approval_args
2193 ),
2194 tool: Some(approval_name.clone()),
2195 risk: Some(format!("{:?}", approval_risk)),
2196 });
2197
2198 use std::io::{self, Write};
2200 print!(
2201 "\n\x1b[1;33mApprove {}? [y/N]\x1b[0m ",
2202 approval_name
2203 );
2204 io::stdout().flush().ok();
2205 let mut input = String::new();
2206 io::stdin().read_line(&mut input).ok();
2207 let approved = input.trim().to_lowercase() == "y";
2208
2209 if approved {
2210 waiting_for_approval = false;
2211 if matches!(
2213 approval_risk,
2214 RiskLevel::Mutating
2215 | RiskLevel::Exec
2216 | RiskLevel::Destructive
2217 ) {
2218 let vetoes = self
2219 .hooks
2220 .execute(
2221 &HookEvent::PreCheckpoint,
2222 &approval_name,
2223 )
2224 .await;
2225 if let Some(reason) = vetoes
2226 .iter()
2227 .find(|result| result.veto)
2228 .and_then(|result| result.veto_reason.clone())
2229 {
2230 let _ = event_tx.send(Event::Error {
2231 run: run_id.clone(),
2232 message: reason,
2233 });
2234 denied_by_approval = true;
2235 stop_after_tool_result = true;
2236 continue;
2237 }
2238 let checkpoints =
2239 GitCheckpoints::new(workspace.root.clone());
2240 if let Ok(cp_id) = checkpoints
2241 .snapshot(&format!("pre-{}", approval_name))
2242 {
2243 let _ =
2244 event_tx.send(Event::CheckpointCreated {
2245 run: run_id.clone(),
2246 id: cp_id,
2247 label: format!("pre-{}", approval_name),
2248 });
2249 let _ = self
2250 .hooks
2251 .execute(
2252 &HookEvent::PostCheckpoint,
2253 &approval_name,
2254 )
2255 .await;
2256 }
2257 }
2258 let hook_results = self
2259 .hooks
2260 .execute(&HookEvent::PreToolUse, &approval_name)
2261 .await;
2262 if let Some(reason) = hook_results
2263 .iter()
2264 .find(|result| result.veto)
2265 .and_then(|result| result.veto_reason.clone())
2266 {
2267 denied_by_approval = true;
2268 stop_after_tool_result = true;
2269 let _ = event_tx.send(Event::ToolOutput {
2270 run: run_id.clone(),
2271 id: approval_id.clone(),
2272 blocks: vec![Block::Text(reason.clone())],
2273 });
2274 tool_output_seen_this_completion = true;
2275 tool_results_pending.push((
2276 approval_id,
2277 approval_name,
2278 approval_args,
2279 vec![ContentBlock::Text { text: reason }],
2280 true,
2281 ));
2282 continue;
2283 }
2284 let _ = event_tx.send(Event::ToolUseStarted {
2285 run: run_id.clone(),
2286 id: approval_id.clone(),
2287 });
2288 let result = if let Some(tool) = tool {
2289 let ctx = ToolCtx {
2290 workspace_root: workspace.root.clone(),
2291 run_id: run_id.clone(),
2292 };
2293 match tool.call(approval_args.clone(), &ctx).await {
2294 Ok(r) => r,
2295 Err(e) => {
2296 crate::tools::ToolResult::error(format!(
2297 "Tool {} failed: {}",
2298 approval_name, e
2299 ))
2300 }
2301 }
2302 } else {
2303 crate::tools::ToolResult::error(format!(
2304 "Unknown tool: {}",
2305 approval_name
2306 ))
2307 };
2308 let blocks = result.content.clone();
2309 let text = tool_result_text(&blocks);
2310 let content_blocks =
2311 tool_result_content_blocks(&blocks);
2312 let is_error = result.is_error;
2313 skill_evidence.push_str(&text);
2314 skill_evidence.push('\n');
2315 let _ = event_tx.send(Event::ToolOutput {
2316 run: run_id.clone(),
2317 id: approval_id.clone(),
2318 blocks,
2319 });
2320 let _ = self
2321 .hooks
2322 .execute(&HookEvent::PostToolUse, &approval_name)
2323 .await;
2324 tool_output_seen_this_completion = true;
2325 tool_results_pending.push((
2326 approval_id,
2327 approval_name,
2328 approval_args,
2329 content_blocks,
2330 is_error,
2331 ));
2332 } else {
2333 let _ = event_tx.send(Event::ToolOutput {
2334 run: run_id.clone(),
2335 id: approval_id.clone(),
2336 blocks: vec![Block::Text("Denied by user".into())],
2337 });
2338 tool_output_seen_this_completion = true;
2339 tool_results_pending.push((
2340 approval_id,
2341 approval_name,
2342 approval_args,
2343 vec![ContentBlock::Text {
2344 text: "Denied by user".into(),
2345 }],
2346 true,
2347 ));
2348 }
2349 }
2350 Decision::Deny => {
2351 denied_by_approval = true;
2352 stop_after_tool_result = true;
2353 let _ = event_tx.send(Event::ToolOutput {
2354 run: run_id.clone(),
2355 id: id.clone(),
2356 blocks: vec![Block::Text(
2357 "Denied by autonomy policy".into(),
2358 )],
2359 });
2360 tool_output_seen_this_completion = true;
2361 tool_results_pending.push((
2362 id.clone(),
2363 proposed.tool_name.clone(),
2364 args.clone(),
2365 vec![ContentBlock::Text {
2366 text: "Denied by autonomy policy".into(),
2367 }],
2368 true,
2369 ));
2370 }
2371 Decision::AllowOnce
2377 | Decision::AllowSession
2378 | Decision::AllowAlways => {}
2379 }
2380
2381 current_tool_json.clear();
2382 current_tool_name.clear();
2383 }
2384 BrainEvent::Usage(usage) => {
2385 total_input += usage.input;
2386 total_output += usage.output;
2387 estimated_input_unconfirmed =
2388 estimated_input_unconfirmed.saturating_sub(usage.input);
2389 estimated_output_unconfirmed =
2390 estimated_output_unconfirmed.saturating_sub(usage.output);
2391 let _ = event_tx.send(Event::TokenUsage {
2392 run: run_id.clone(),
2393 input: usage.input,
2394 output: usage.output,
2395 });
2396
2397 let input_cost =
2399 caps.cost_input_per_mtok * (usage.input as f64) / 1_000_000.0;
2400 let output_cost =
2401 caps.cost_output_per_mtok * (usage.output as f64) / 1_000_000.0;
2402 let actual_cost = input_cost + output_cost;
2403 cost_usd += actual_cost;
2404 estimated_cost_unconfirmed =
2405 (estimated_cost_unconfirmed - actual_cost).max(0.0);
2406
2407 let _ = event_tx.send(Event::CostUpdate {
2408 run: run_id.clone(),
2409 usd: cost_usd + estimated_cost_unconfirmed,
2410 });
2411 }
2412 BrainEvent::Done(reason) => {
2413 match reason {
2414 crate::event::StopReason::EndTurn => {
2415 let this_empty = assistant_text.trim().is_empty()
2420 && !tool_output_seen_this_completion;
2421 if this_empty && !produced_any_output {
2422 let next_idx = current_chain_idx + 1;
2423 if next_idx < brain_policy.chain.len() {
2424 current_chain_idx = next_idx;
2425 let _ = event_tx.send(Event::ModelSwitched {
2426 run: run_id.clone(),
2427 from: brain.id().to_string(),
2428 to: brain_policy.chain[current_chain_idx]
2429 .id()
2430 .to_string(),
2431 reason: "empty response".into(),
2432 });
2433 continue_agent_loop = true;
2434 break;
2435 }
2436 }
2437 if !assistant_text.trim().is_empty() {
2438 produced_any_output = true;
2439 let mut blocks = Vec::new();
2440 if !reasoning_buf.is_empty() {
2441 blocks.push(ContentBlock::Reasoning {
2442 text: reasoning_buf.clone(),
2443 });
2444 }
2445 blocks.push(ContentBlock::Text {
2446 text: assistant_text.clone(),
2447 });
2448 let assistant_msg = Msg {
2449 role: "assistant".into(),
2450 content: blocks,
2451 };
2452 let turn_messages = vec![assistant_msg.clone()];
2453 let has_verified_tool_context =
2454 tool_output_seen_this_completion
2455 || messages.iter().any(|m| {
2456 m.content.iter().any(|block| {
2457 matches!(
2458 block,
2459 ContentBlock::ToolResult { .. }
2460 )
2461 })
2462 });
2463
2464 if let Some(correction) = self.reasoning.guard_turn(
2465 &turn_messages,
2466 has_verified_tool_context,
2467 ) {
2468 messages.push(assistant_msg);
2469 let _ = event_tx.send(Event::Message {
2470 run: run_id.clone(),
2471 role: "guard".into(),
2472 text: correction.clone(),
2473 });
2474 messages.push(Msg {
2475 role: "user".into(),
2476 content: vec![ContentBlock::Text {
2477 text: format!("SYSTEM: {}. Execute the relevant tool first, then report the actual raw result.", correction),
2478 }],
2479 });
2480 continue_agent_loop = true;
2481 break;
2482 }
2483
2484 if self.reasoning.hallucination_guard {
2488 if let Some(correction) =
2489 crate::reasoning::HallucinationGuard::verify(
2490 &assistant_text,
2491 &tools_called_this_turn,
2492 )
2493 {
2494 let mut blocks2 = Vec::new();
2495 if !reasoning_buf.is_empty() {
2496 blocks2.push(ContentBlock::Reasoning {
2497 text: reasoning_buf.clone(),
2498 });
2499 }
2500 blocks2.push(ContentBlock::Text {
2501 text: assistant_text.clone(),
2502 });
2503 let assistant_msg2 = Msg {
2504 role: "assistant".into(),
2505 content: blocks2,
2506 };
2507 messages.push(assistant_msg2);
2508 let _ = event_tx.send(Event::Message {
2509 run: run_id.clone(),
2510 role: "guard".into(),
2511 text: correction.clone(),
2512 });
2513 messages.push(Msg {
2514 role: "user".into(),
2515 content: vec![ContentBlock::Text {
2516 text: format!(
2517 "SYSTEM: {}. Call fs_read or search to verify the file/symbol first, then re-state the claim with the raw evidence.",
2518 correction
2519 ),
2520 }],
2521 });
2522 continue_agent_loop = true;
2523 break;
2524 }
2525 }
2526
2527 if tools_called_this_turn.is_empty()
2533 && tool_narration_detected(&assistant_text)
2534 {
2535 let correction = "You described using a tool but did not actually call it. When a tool would help, CALL it — never narrate what it would do. Use the exact tool call format.";
2536 messages.push(Msg {
2537 role: "assistant".into(),
2538 content: vec![ContentBlock::Text {
2539 text: assistant_text.clone(),
2540 }],
2541 });
2542 let _ = event_tx.send(Event::Message {
2543 run: run_id.clone(),
2544 role: "guard".into(),
2545 text: correction.into(),
2546 });
2547 messages.push(Msg {
2548 role: "user".into(),
2549 content: vec![ContentBlock::Text {
2550 text: format!(
2551 "SYSTEM: {}. Execute the relevant tool first, then report the actual raw result.",
2552 correction
2553 ),
2554 }],
2555 });
2556 continue_agent_loop = true;
2557 break;
2558 }
2559 messages.push(assistant_msg);
2560 }
2561
2562 if had_mutation
2568 && self.reasoning.self_critique
2569 && !diffs.is_empty()
2570 {
2571 let review =
2572 crate::reasoning::SelfCritique::pre_mutation_review(
2573 &diffs,
2574 Some(&task.description),
2575 );
2576 let _ = event_tx.send(Event::Message {
2577 run: run_id.clone(),
2578 role: "self-critique".into(),
2579 text: review,
2580 });
2581 }
2582
2583 if had_mutation {
2593 if let Some(verify_cmd) =
2594 self.config.defaults.verify_command.clone()
2595 {
2596 verify_attempts += 1;
2597 had_mutation = false;
2598 let parts: Vec<String> = verify_cmd
2599 .split_whitespace()
2600 .map(String::from)
2601 .collect();
2602 if !parts.is_empty() {
2603 let _ = event_tx.send(Event::AgentStatus {
2604 run: run_id.clone(),
2605 role: "verifier".into(),
2606 status: AgentStatus::Working,
2607 note: format!("running `{}`", verify_cmd),
2608 });
2609 let cmd = crate::sandbox::Command {
2610 program: parts[0].clone(),
2611 args: parts[1..].to_vec(),
2612 env: std::collections::HashMap::new(),
2613 workdir: workspace.root.clone(),
2614 };
2615 let limits = crate::sandbox::Limits {
2616 timeout_ms: 300_000,
2617 max_output_bytes: 16_000,
2618 };
2619 match workspace
2620 .sandbox
2621 .exec(&cmd, &limits)
2622 .await
2623 {
2624 Ok(res) if res.exit_code != 0 => {
2625 let _ = event_tx.send(Event::TestResult {
2626 run: run_id.clone(),
2627 passed: 0,
2628 failed: 1,
2629 detail: format!(
2630 "verify `{}` failed (exit {})",
2631 verify_cmd, res.exit_code
2632 ),
2633 });
2634 let out = format!(
2635 "{}\n{}",
2636 res.stdout, res.stderr
2637 );
2638 let tail: String = out
2639 .lines()
2640 .rev()
2641 .take(40)
2642 .collect::<Vec<_>>()
2643 .into_iter()
2644 .rev()
2645 .collect::<Vec<_>>()
2646 .join("\n");
2647 if verify_attempts
2648 <= MAX_VERIFY_ATTEMPTS
2649 {
2650 messages.push(Msg {
2653 role: "user".into(),
2654 content: vec![ContentBlock::Text {
2655 text: format!(
2656 "SYSTEM: verification command `{}` FAILED (exit {}). Fix the code, then it will be re-verified. Output:\n{}",
2657 verify_cmd, res.exit_code, tail
2658 ),
2659 }],
2660 });
2661 continue_agent_loop = true;
2662 break;
2663 }
2664 let next_idx = current_chain_idx + 1;
2667 if next_idx < brain_policy.chain.len()
2668 && verify_escalations
2669 < MAX_VERIFY_ESCALATIONS
2670 {
2671 verify_escalations += 1;
2672 verify_attempts = 0;
2673 let from = brain.id().to_string();
2674 let to = brain_policy.chain
2675 [next_idx]
2676 .id()
2677 .to_string();
2678 current_chain_idx = next_idx;
2679 let _ = event_tx.send(
2680 Event::ModelSwitched {
2681 run: run_id.clone(),
2682 from,
2683 to,
2684 reason: format!(
2685 "verification still failing after {} fixes — escalating",
2686 MAX_VERIFY_ATTEMPTS
2687 ),
2688 },
2689 );
2690 messages.push(Msg {
2691 role: "user".into(),
2692 content: vec![ContentBlock::Text {
2693 text: format!(
2694 "SYSTEM: a previous model attempted this task but verification `{}` still FAILS (exit {}). You are a stronger model brought in to finish the job. Diagnose properly, fix the code, then it will be re-verified. Output:\n{}",
2695 verify_cmd, res.exit_code, tail
2696 ),
2697 }],
2698 });
2699 continue_agent_loop = true;
2700 break;
2701 }
2702 had_error = true;
2706 last_error = Some(format!(
2707 "verification `{}` still failing after retries and escalation",
2708 verify_cmd
2709 ));
2710 continue_agent_loop = false;
2711 break;
2712 }
2713 Ok(_) => {
2714 let _ =
2715 event_tx.send(Event::TestResult {
2716 run: run_id.clone(),
2717 passed: 1,
2718 failed: 0,
2719 detail: format!(
2720 "verify `{}` passed",
2721 verify_cmd
2722 ),
2723 });
2724 }
2725 Err(e) => {
2726 let _ = event_tx.send(Event::Message {
2727 run: run_id.clone(),
2728 role: "guard".into(),
2729 text: format!(
2730 "verify command could not run: {}",
2731 e
2732 ),
2733 });
2734 }
2735 }
2736 }
2737 }
2738 }
2739 }
2740 crate::event::StopReason::ToolUse => {
2741 let drained: Vec<_> =
2754 std::mem::take(&mut tool_results_pending);
2755
2756 let mut assistant_blocks = Vec::new();
2757 if !reasoning_buf.is_empty() {
2758 assistant_blocks.push(ContentBlock::Reasoning {
2759 text: reasoning_buf.clone(),
2760 });
2761 }
2762 let turn_sig = {
2765 use std::collections::hash_map::DefaultHasher;
2766 use std::hash::{Hash, Hasher};
2767 let mut h = DefaultHasher::new();
2768 for (_, name, args, _, _) in &drained {
2769 name.hash(&mut h);
2770 args.to_string().hash(&mut h);
2771 }
2772 h.finish()
2773 };
2774 for (tool_id, tool_name, args, _content, _is_error) in
2775 &drained
2776 {
2777 assistant_blocks.push(ContentBlock::ToolUse {
2778 id: tool_id.clone(),
2779 name: tool_name.clone(),
2780 input: args.clone(),
2781 });
2782 }
2783 messages.push(Msg {
2784 role: "assistant".into(),
2785 content: assistant_blocks,
2786 });
2787
2788 let turn_had_tools = !drained.is_empty();
2789 for (tool_id, _tool_name, _args, content, is_error) in
2790 drained
2791 {
2792 messages.push(Msg {
2793 role: "user".into(),
2794 content: vec![ContentBlock::ToolResult {
2795 tool_use_id: tool_id,
2796 content,
2797 is_error: Some(is_error),
2798 }],
2799 });
2800 }
2801 if tool_output_seen_this_completion {
2802 produced_any_output = true;
2803 }
2804
2805 if turn_had_tools {
2807 if last_tool_sig == Some(turn_sig) {
2808 repeated_tool_turns += 1;
2809 } else {
2810 repeated_tool_turns = 0;
2811 last_tool_sig = Some(turn_sig);
2812 }
2813 }
2814 if repeated_tool_turns == 2 {
2815 messages.push(Msg {
2817 role: "user".into(),
2818 content: vec![ContentBlock::Text {
2819 text: "guard: you have issued the exact same \
2820 tool call(s) three turns in a row with \
2821 identical arguments. The result will \
2822 not change. State what you learned and \
2823 take a DIFFERENT action — or finish \
2824 with your best answer now."
2825 .into(),
2826 }],
2827 });
2828 send(Event::Message {
2829 run: run_id.clone(),
2830 role: "guard".into(),
2831 text: "repeated identical tool calls — nudging \
2832 the model to change approach"
2833 .into(),
2834 });
2835 } else if repeated_tool_turns >= 4 {
2836 send(Event::Message {
2839 run: run_id.clone(),
2840 role: "guard".into(),
2841 text: "stuck loop: 5 identical tool-call turns \
2842 — stopping the run"
2843 .into(),
2844 });
2845 had_error = true;
2846 last_error = Some(
2847 "stopped by stuck-loop guard (5 identical \
2848 tool-call turns)"
2849 .into(),
2850 );
2851 continue_agent_loop = false;
2852 break;
2853 }
2854
2855 continue_agent_loop =
2856 !waiting_for_approval && !stop_after_tool_result;
2857 break;
2858 }
2859 _ => {}
2860 }
2861 break; }
2863 BrainEvent::Error(msg) => {
2864 let _ = event_tx.send(Event::Error {
2865 run: run_id.clone(),
2866 message: msg.clone(),
2867 });
2868 let _ = self.hooks.execute(&HookEvent::OnError, &msg).await;
2869 let next_idx = current_chain_idx + 1;
2870 if next_idx < brain_policy.chain.len() {
2871 current_chain_idx = next_idx;
2872 let switch_ctx = format!(
2873 "{} -> {}",
2874 brain.id(),
2875 brain_policy.chain[current_chain_idx].id()
2876 );
2877 let _ = event_tx.send(Event::ModelSwitched {
2878 run: run_id.clone(),
2879 from: brain.id().to_string(),
2880 to: brain_policy.chain[current_chain_idx].id().to_string(),
2881 reason: msg,
2882 });
2883 let _ = self
2884 .hooks
2885 .execute(&HookEvent::OnModelSwitched, &switch_ctx)
2886 .await;
2887 continue_agent_loop = true;
2888 } else {
2889 had_error = true;
2890 last_error = Some(msg);
2891 }
2892 break;
2893 }
2894 }
2895 }
2896
2897 if !continue_agent_loop && !had_error {
2902 let this_empty =
2903 assistant_text.trim().is_empty() && !tool_output_seen_this_completion;
2904 if this_empty && !produced_any_output {
2905 let next_idx = current_chain_idx + 1;
2906 if next_idx < brain_policy.chain.len() {
2907 let _ = event_tx.send(Event::ModelSwitched {
2908 run: run_id.clone(),
2909 from: brain.id().to_string(),
2910 to: brain_policy.chain[next_idx].id().to_string(),
2911 reason: "empty response".into(),
2912 });
2913 current_chain_idx = next_idx;
2914 continue;
2915 }
2916 }
2917 }
2918
2919 if continue_agent_loop {
2920 continue;
2921 }
2922 break; }
2924 Err(e) => {
2925 let err_msg = format!("{}", e);
2926 let _ = event_tx.send(Event::Error {
2927 run: run_id.clone(),
2928 message: err_msg.clone(),
2929 });
2930
2931 let retry_after_hint = match e.downcast_ref::<BrainError>() {
2936 Some(BrainError::RateLimit { retry_after }) => Some(*retry_after),
2937 Some(BrainError::Timeout) => Some(None),
2938 Some(BrainError::ServerError { status, .. }) if *status >= 500 => {
2939 Some(None)
2940 }
2941 Some(_) => None,
2942 None => {
2943 let s = err_msg.to_lowercase();
2944 let transient = s.contains("rate limit")
2945 || s.contains("429")
2946 || s.contains("timeout")
2947 || s.contains("timed out")
2948 || s.contains("connection")
2949 || s.contains("overloaded")
2950 || s.contains("502")
2951 || s.contains("503");
2952 if transient { Some(None) } else { None }
2953 }
2954 };
2955 if let Some(hint) = retry_after_hint {
2956 if transient_retries < MAX_TRANSIENT_RETRIES {
2957 transient_retries += 1;
2958 let secs = hint.unwrap_or(2u64.pow(transient_retries)).min(20);
2961 send(Event::Message {
2962 run: run_id.clone(),
2963 role: "guard".into(),
2964 text: format!(
2965 "provider hiccup ({}) — retrying {} in {}s (attempt {}/{})",
2966 err_msg,
2967 brain.id(),
2968 secs,
2969 transient_retries,
2970 MAX_TRANSIENT_RETRIES
2971 ),
2972 });
2973 tokio::time::sleep(std::time::Duration::from_secs(secs)).await;
2974 continue;
2975 }
2976 }
2977 transient_retries = 0;
2978
2979 let next_idx = current_chain_idx + 1;
2981 if next_idx < brain_policy.chain.len() {
2982 current_chain_idx = next_idx;
2983 let _ = event_tx.send(Event::ModelSwitched {
2984 run: run_id.clone(),
2985 from: brain.id().to_string(),
2986 to: brain_policy.chain[current_chain_idx].id().to_string(),
2987 reason: err_msg,
2988 });
2989 } else {
2990 had_error = true;
2991 last_error = Some(err_msg);
2992 break;
2993 }
2994 }
2995 }
2996 }
2997
2998 let final_input = total_input + estimated_input_unconfirmed;
3004 let final_output = total_output + estimated_output_unconfirmed;
3005 if total_input == 0 && total_output == 0 && (final_input > 0 || final_output > 0) {
3006 let _ = event_tx.send(Event::TokenUsageEstimated {
3007 run: run_id.clone(),
3008 input: final_input,
3009 output: final_output,
3010 reason: "provider reported no usage events".into(),
3011 });
3012 }
3013 let _ = event_tx.send(Event::AgentStatus {
3015 run: run_id.clone(),
3016 role: "coder".into(),
3017 status: AgentStatus::Done,
3018 note: format!("completed · {}↑ {}↓ tok", final_input, final_output),
3019 });
3020
3021 let outcome = OutcomeSummary {
3022 status: if had_error {
3023 format!(
3024 "error: {}",
3025 last_error.unwrap_or_else(|| "run failed".into())
3026 )
3027 } else if waiting_for_approval {
3028 "waiting_for_approval".into()
3029 } else if denied_by_approval {
3030 "denied".into()
3031 } else {
3032 "completed".into()
3033 },
3034 diffs,
3035 cost_usd: cost_usd + estimated_cost_unconfirmed,
3036 tokens: TokenUsage {
3037 input: total_input + estimated_input_unconfirmed,
3038 output: total_output + estimated_output_unconfirmed,
3039 },
3040 cost_comparison: String::new(),
3041 };
3042
3043 if let Some(mem) = &self.memory {
3045 let _ = mem.save_task(&crate::memory::TaskMem {
3046 run_id: run_id.0.clone(),
3047 messages: messages.clone(),
3048 created_at: chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string(),
3049 });
3050 }
3051
3052 {
3055 use crate::router::learned::RunRoutingOutcome;
3056 let routing_outcome = if had_error {
3057 Some(RunRoutingOutcome::Failed)
3058 } else if verify_escalations > 0 {
3059 Some(RunRoutingOutcome::Escalated)
3060 } else if verify_attempts > 0 && outcome.status == "completed" {
3061 Some(RunRoutingOutcome::VerifiedSuccess)
3062 } else {
3063 None
3064 };
3065 if let Some(o) = routing_outcome {
3066 repo_routing.record(&classified_tier, o);
3067 }
3068 }
3069
3070 if outcome.status == "completed" {
3072 if let Some(skills) = &self.skills {
3073 if let Some(candidate) = Curator::propose_skill_if_missing(
3074 &task.description,
3075 &skill_evidence,
3076 skills.as_ref(),
3077 ) {
3078 let skill_name = candidate.name.clone();
3079 let _ = event_tx.send(Event::SkillLearned {
3080 run: run_id.clone(),
3081 name: skill_name.clone(),
3082 });
3083 let _ = self
3084 .hooks
3085 .execute(&HookEvent::OnSkillLearned, &skill_name)
3086 .await;
3087 let _ = skills.add(candidate);
3088 }
3089 }
3090
3091 if let Some(mem) = &self.memory {
3096 let events = events_from_messages(&run_id, &messages);
3097 Distiller::distill(mem, &events, &task.description).await;
3098 }
3099 }
3100
3101 let _ = event_tx.send(Event::RunFinished {
3102 run: run_id.clone(),
3103 outcome: outcome.clone(),
3104 });
3105
3106 let _ = self
3108 .hooks
3109 .execute(&HookEvent::PostRun, &task.description)
3110 .await;
3111
3112 Ok(outcome)
3113 }
3114}
3115
3116fn tool_narration_detected(text: &str) -> bool {
3122 let lower = text.to_lowercase();
3123 let patterns = [
3124 "i'll use",
3125 "i will use",
3126 "let me use",
3127 "i'll run",
3128 "i will run",
3129 "let me run",
3130 "i'll search",
3131 "i will search",
3132 "let me search",
3133 "i'll check",
3134 "i will check",
3135 "let me check",
3136 "i'll read",
3137 "i will read",
3138 "let me read",
3139 "i'll write",
3140 "i will write",
3141 "let me write",
3142 "i'll execute",
3143 "i will execute",
3144 "let me execute",
3145 "i'll call",
3146 "i will call",
3147 "let me call",
3148 "i'll fetch",
3149 "i will fetch",
3150 "let me fetch",
3151 "i'll look up",
3152 "i will look up",
3153 "let me look up",
3154 "i'll test",
3155 "i will test",
3156 "let me test",
3157 "running the test",
3158 "running the command",
3159 "searching for",
3160 "looking up",
3161 ];
3162 patterns.iter().any(|p| lower.contains(p))
3163}
3164
3165#[cfg(test)]
3166mod tests {
3167 use super::*;
3168
3169 #[test]
3170 fn main_agent_system_prompt_carries_the_reasoning_protocol() {
3171 let prompt = build_system_prompt(
3172 &Identity::default(),
3173 &PathBuf::from("."),
3174 &[],
3175 &[],
3176 &[],
3177 &[],
3178 &[],
3179 );
3180 for marker in [
3186 "TIER TRIAGE",
3187 "Tribunal",
3188 "Skeptic",
3189 "Adversary",
3190 "Anti-simulation",
3191 "Real execution beats",
3192 ] {
3193 assert!(prompt.contains(marker), "main soul must contain `{marker}`");
3194 }
3195 }
3196
3197 #[test]
3198 fn named_agents_keep_their_own_soul() {
3199 let planner = Identity {
3200 name: "planner".into(),
3201 role: "technical architect".into(),
3202 personality: "structured".into(),
3203 };
3204 let prompt = build_system_prompt(&planner, &PathBuf::from("."), &[], &[], &[], &[], &[]);
3205 assert!(
3208 !prompt.contains("TIER TRIAGE"),
3209 "named souls must not be diluted by the main protocol"
3210 );
3211 }
3212
3213 #[test]
3214 fn initial_user_content_blocks_embeds_uploaded_images() {
3215 let tmp = tempfile::tempdir().expect("tempdir");
3216 let image = tmp.path().join("shot.png");
3217 std::fs::write(
3218 &image,
3219 [
3220 0x89, b'P', b'N', b'G', b'\r', b'\n', 0x1a, b'\n', 0, 0, 0, 0,
3221 ],
3222 )
3223 .expect("write image");
3224 let description = format!(
3225 "analyse this\n\n[Attached files]\n### file: shot.png\n[uploaded: {}]",
3226 image.display()
3227 );
3228
3229 let blocks = initial_user_content_blocks(tmp.path(), &description);
3230 assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
3231 assert!(blocks.iter().any(|block| matches!(
3232 block,
3233 ContentBlock::Image {
3234 source: ImageSource::Base64 {
3235 media_type,
3236 data,
3237 }
3238 } if media_type == "image/png" && !data.is_empty()
3239 )));
3240 }
3241
3242 #[test]
3243 fn tool_result_content_blocks_preserves_images() {
3244 let blocks = tool_result_content_blocks(&[
3245 Block::Text("screenshot captured".into()),
3246 Block::Image {
3247 data: vec![1, 2, 3],
3248 mime: "image/png".into(),
3249 },
3250 ]);
3251
3252 assert!(matches!(blocks.first(), Some(ContentBlock::Text { .. })));
3253 assert!(blocks.iter().any(|block| matches!(
3254 block,
3255 ContentBlock::Image {
3256 source: ImageSource::Base64 {
3257 media_type,
3258 data,
3259 }
3260 } if media_type == "image/png" && data == "AQID"
3261 )));
3262 }
3263}