bamboo_agent/agent/loop_module/
todo_evaluation.rs1use crate::agent::core::tools::{FunctionSchema, ToolSchema};
5use crate::agent::core::{AgentEvent, Session, TodoItemStatus};
6use crate::agent::llm::LLMProvider;
7use serde_json::json;
8use std::sync::Arc;
9use tokio::sync::mpsc;
10
11use crate::agent::loop_module::todo_context::TodoLoopContext;
12
13#[derive(Debug, Clone)]
15pub struct TodoEvaluationResult {
16 pub needs_evaluation: bool,
18 pub updates: Vec<TodoItemUpdate>,
20 pub reasoning: String,
22}
23
24#[derive(Debug, Clone)]
25pub struct TodoItemUpdate {
26 pub item_id: String,
27 pub status: TodoItemStatus,
28 pub notes: Option<String>,
29}
30
31pub fn build_todo_evaluation_messages(
33 ctx: &TodoLoopContext,
34 _session: &Session,
35) -> Vec<crate::agent::core::Message> {
36 let mut messages = Vec::new();
37
38 let system_prompt = r#"You are a task progress evaluator. Your job is to evaluate whether tasks are complete based on the execution context.
40
41## Your Task
42Review the todo list and execution history, then decide if any tasks should be marked as completed or blocked.
43
44## Rules
451. Mark as "completed" if the task goal has been achieved
462. Mark as "blocked" if there are unresolvable issues
473. Keep as "in_progress" if more work is needed
484. Add brief notes explaining your decision
49
50## Available Actions
51- update_todo_item: Update the status of a todo item
52
53## Constraints
54- Only update items that are currently "in_progress"
55- You MUST call update_todo_item if a task is complete
56- Provide clear reasoning in notes
57"#;
58
59 messages.push(crate::agent::core::Message::system(system_prompt));
60
61 let todo_context = format!(
63 r#"
64## Current Todo List (Round {}/{})
65
66{}
67
68## Recent Tool Executions
69{}
70
71## Instructions
72Review each "in_progress" task above. For each task:
731. Check if the goal has been achieved based on tool execution results
742. If complete, call update_todo_item with status="completed" and brief notes
753. If blocked, call update_todo_item with status="blocked" and explain the issue
76
77Remember: You are NOT executing the task. You are only evaluating if existing work has completed it.
78"#,
79 ctx.current_round + 1,
80 ctx.max_rounds,
81 ctx.format_for_prompt(),
82 format_recent_tools(ctx, 5), );
84
85 messages.push(crate::agent::core::Message::user(todo_context));
86
87 messages
88}
89
90fn format_recent_tools(ctx: &TodoLoopContext, limit: usize) -> String {
92 let mut all_calls: Vec<(
93 String,
94 &crate::agent::loop_module::todo_context::ToolCallRecord,
95 )> = Vec::new();
96
97 for item in &ctx.items {
98 for call in &item.tool_calls {
99 all_calls.push((item.description.clone(), call));
100 }
101 }
102
103 all_calls.sort_by_key(|(_, call)| std::cmp::Reverse(call.timestamp));
105
106 let recent: Vec<_> = all_calls.into_iter().take(limit).collect();
107
108 if recent.is_empty() {
109 return "No tool executions yet.".to_string();
110 }
111
112 let mut output = String::new();
113 for (i, (task_desc, call)) in recent.iter().enumerate() {
114 output.push_str(&format!(
115 "{}. [{}] Tool: {} ({})\n Task: {}\n",
116 i + 1,
117 if call.success { "✓" } else { "✗" },
118 call.tool_name,
119 call.round + 1,
120 task_desc
121 ));
122 }
123
124 output
125}
126
127pub fn get_todo_evaluation_tools() -> Vec<ToolSchema> {
129 vec![ToolSchema {
130 schema_type: "function".to_string(),
131 function: FunctionSchema {
132 name: "update_todo_item".to_string(),
133 description: "Update the status of a todo item based on evaluation".to_string(),
134 parameters: json!({
135 "type": "object",
136 "properties": {
137 "item_id": {
138 "type": "string",
139 "description": "The ID of the todo item to update"
140 },
141 "status": {
142 "type": "string",
143 "enum": ["completed", "blocked"],
144 "description": "New status for the item"
145 },
146 "notes": {
147 "type": "string",
148 "description": "Brief explanation of why the status changed"
149 }
150 },
151 "required": ["item_id", "status"]
152 }),
153 },
154 }]
155}
156
157pub async fn evaluate_todo_progress(
159 ctx: &TodoLoopContext,
160 session: &Session,
161 llm: Arc<dyn LLMProvider>,
162 event_tx: &mpsc::Sender<AgentEvent>,
163 session_id: &str,
164 model: &str, ) -> Result<TodoEvaluationResult, crate::agent::core::AgentError> {
166 use crate::agent::loop_module::stream::handler::consume_llm_stream;
167
168 let in_progress_count = ctx
170 .items
171 .iter()
172 .filter(|item| matches!(item.status, TodoItemStatus::InProgress))
173 .count();
174
175 if in_progress_count == 0 {
176 return Ok(TodoEvaluationResult {
177 needs_evaluation: false,
178 updates: Vec::new(),
179 reasoning: "No in-progress tasks to evaluate".to_string(),
180 });
181 }
182
183 log::info!(
184 "[{}] Evaluating {} in-progress todo items",
185 session_id,
186 in_progress_count
187 );
188
189 let _ = event_tx
191 .send(AgentEvent::TodoEvaluationStarted {
192 session_id: session_id.to_string(),
193 items_count: in_progress_count,
194 })
195 .await;
196
197 let messages = build_todo_evaluation_messages(ctx, session);
199 let tools = get_todo_evaluation_tools();
200
201 log::debug!("[{}] Todo evaluation using model: {}", session_id, model);
203
204 match llm.chat_stream(&messages, &tools, Some(500), model).await {
206 Ok(stream) => {
207 let stream_output = consume_llm_stream(
209 stream,
210 event_tx,
211 &tokio_util::sync::CancellationToken::new(),
212 session_id,
213 )
214 .await
215 .map_err(|e| crate::agent::core::AgentError::LLM(e.to_string()))?;
216
217 log::info!(
218 "[{}] Todo evaluation completed: {} tokens, {} tool calls",
219 session_id,
220 stream_output.token_count,
221 stream_output.tool_calls.len()
222 );
223
224 let mut updates = Vec::new();
226 for tool_call in &stream_output.tool_calls {
227 if tool_call.function.name == "update_todo_item" {
228 if let Ok(args) =
229 serde_json::from_str::<serde_json::Value>(&tool_call.function.arguments)
230 {
231 if let (Some(item_id), Some(status_str)) =
232 (args["item_id"].as_str(), args["status"].as_str())
233 {
234 let status = match status_str {
235 "completed" => TodoItemStatus::Completed,
236 "blocked" => TodoItemStatus::Blocked,
237 _ => continue,
238 };
239
240 updates.push(TodoItemUpdate {
241 item_id: item_id.to_string(),
242 status,
243 notes: args["notes"].as_str().map(String::from),
244 });
245 }
246 }
247 }
248 }
249
250 let _ = event_tx
252 .send(AgentEvent::TodoEvaluationCompleted {
253 session_id: session_id.to_string(),
254 updates_count: updates.len(),
255 reasoning: stream_output.content.clone(),
256 })
257 .await;
258
259 Ok(TodoEvaluationResult {
260 needs_evaluation: true,
261 updates,
262 reasoning: stream_output.content,
263 })
264 }
265 Err(e) => {
266 log::warn!("[{}] Todo evaluation failed: {}", session_id, e);
267 Ok(TodoEvaluationResult {
268 needs_evaluation: false,
269 updates: Vec::new(),
270 reasoning: format!("Evaluation failed: {}", e),
271 })
272 }
273 }
274}
275
276#[cfg(test)]
277mod tests {
278 use super::*;
279 use crate::agent::core::todo::{TodoItem, TodoList};
280 use crate::agent::loop_module::todo_context::{TodoLoopContext, TodoLoopItem};
281 use chrono::Utc;
282
283 fn create_test_context() -> TodoLoopContext {
284 let mut session = crate::agent::core::Session::new("test", "test-model");
285 let todo_list = TodoList {
286 session_id: "test".to_string(),
287 title: "Test Tasks".to_string(),
288 items: vec![TodoItem {
289 id: "1".to_string(),
290 description: "Fix bug in authentication".to_string(),
291 status: TodoItemStatus::InProgress,
292 depends_on: Vec::new(),
293 notes: String::new(),
294 }],
295 created_at: Utc::now(),
296 updated_at: Utc::now(),
297 };
298 session.set_todo_list(todo_list);
299
300 let mut ctx = TodoLoopContext::from_session(&session).unwrap();
301 ctx.items = vec![TodoLoopItem {
302 id: "1".to_string(),
303 description: "Fix bug in authentication".to_string(),
304 status: TodoItemStatus::InProgress,
305 tool_calls: vec![
306 crate::agent::loop_module::todo_context::ToolCallRecord {
307 round: 0,
308 tool_name: "read_file".to_string(),
309 success: true,
310 timestamp: Utc::now(),
311 },
312 crate::agent::loop_module::todo_context::ToolCallRecord {
313 round: 1,
314 tool_name: "write_file".to_string(),
315 success: true,
316 timestamp: Utc::now(),
317 },
318 ],
319 started_at_round: Some(0),
320 completed_at_round: None,
321 }];
322
323 ctx
324 }
325
326 #[test]
327 fn test_build_evaluation_messages() {
328 let ctx = create_test_context();
329 let session = crate::agent::core::Session::new("test", "test-model");
330
331 let messages = build_todo_evaluation_messages(&ctx, &session);
332
333 assert_eq!(messages.len(), 2);
334 assert!(messages[0].content.contains("task progress evaluator"));
335 assert!(messages[1].content.contains("Fix bug in authentication"));
336 }
337
338 #[test]
339 fn test_format_recent_tools() {
340 let ctx = create_test_context();
341 let output = format_recent_tools(&ctx, 5);
342
343 assert!(output.contains("read_file"));
344 assert!(output.contains("write_file"));
345 assert!(output.contains("✓"));
346 }
347
348 #[test]
349 fn test_needs_evaluation() {
350 let mut ctx = create_test_context();
351
352 assert!(ctx
354 .items
355 .iter()
356 .any(|i| matches!(i.status, TodoItemStatus::InProgress)));
357
358 ctx.items[0].status = TodoItemStatus::Completed;
360 assert!(!ctx
361 .items
362 .iter()
363 .any(|i| matches!(i.status, TodoItemStatus::InProgress)));
364 }
365
366 #[test]
373 fn todo_evaluation_requires_model_parameter() {
374 assert!(
391 true,
392 "Model parameter requirement is enforced by function signature"
393 );
394 }
395}