bamboo_engine/runtime/managers/adapters/
llm_mini_loop.rs1use async_trait::async_trait;
2use bamboo_agent_core::tools::ToolCall;
3use bamboo_agent_core::{AgentError, Session};
4use bamboo_domain::{Message, Role};
5use bamboo_infrastructure::LLMProvider;
6
7use crate::runtime::managers::mini_loop::{MiniLoopDecision, MiniLoopExecutor};
8
9pub struct LLMMiniLoopExecutor {
14 provider: std::sync::Arc<dyn LLMProvider>,
15 model: String,
16}
17
18impl LLMMiniLoopExecutor {
19 pub fn new(provider: std::sync::Arc<dyn LLMProvider>, model: String) -> Self {
20 Self { provider, model }
21 }
22}
23
24#[async_trait]
25impl MiniLoopExecutor for LLMMiniLoopExecutor {
26 async fn decide(
27 &self,
28 _session: &Session,
29 prompt: &str,
30 context: &str,
31 ) -> Result<MiniLoopDecision, AgentError> {
32 let user_content = if context.is_empty() {
33 prompt.to_string()
34 } else {
35 format!("Context:\n{}\n\n{}", context, prompt)
36 };
37
38 let now = chrono::Utc::now();
39 let messages = vec![
40 Message {
41 id: String::new(),
42 role: Role::System,
43 content: "You are a task complexity classifier. Respond with exactly one word: simple, standard, or complex.".to_string(),
44 reasoning: None,
45 content_parts: None,
46 image_ocr: None,
47 phase: None,
48 tool_calls: None,
49 tool_call_id: None,
50 tool_success: None,
51 compressed: false,
52 compressed_by_event_id: None,
53 never_compress: false,
54 compression_level: 0,
55 created_at: now,
56 metadata: None,
57 },
58 Message {
59 id: String::new(),
60 role: Role::User,
61 content: user_content,
62 reasoning: None,
63 content_parts: None,
64 image_ocr: None,
65 phase: None,
66 tool_calls: None,
67 tool_call_id: None,
68 tool_success: None,
69 compressed: false,
70 compressed_by_event_id: None,
71 never_compress: false,
72 compression_level: 0,
73 created_at: now,
74 metadata: None,
75 },
76 ];
77
78 let options = bamboo_infrastructure::llm::provider::LLMRequestOptions {
79 session_id: None,
80 reasoning_effort: None,
81 parallel_tool_calls: None,
82 responses: None,
83 request_purpose: Some("mini_loop".to_string()),
84 };
85 let stream = self
86 .provider
87 .chat_stream_with_options(&messages, &[], Some(256), &self.model, Some(&options))
88 .await
89 .map_err(|e| AgentError::LLM(e.to_string()))?;
90
91 let output = crate::runtime::stream::handler::consume_llm_stream_silent(
92 stream,
93 &tokio_util::sync::CancellationToken::new(),
94 "mini-loop",
95 )
96 .await
97 .map_err(|e| AgentError::LLM(e.to_string()))?;
98
99 Ok(MiniLoopDecision {
100 answer: output.content.trim().to_string(),
101 prompt_tokens: 0,
102 completion_tokens: 0,
103 })
104 }
105
106 async fn evaluate_task(
107 &self,
108 _session: &Session,
109 _tool_calls: &[ToolCall],
110 _round: usize,
111 ) -> Result<MiniLoopDecision, AgentError> {
112 Ok(MiniLoopDecision {
115 answer: String::new(),
116 prompt_tokens: 0,
117 completion_tokens: 0,
118 })
119 }
120}
121
122#[cfg(test)]
123mod tests {
124 use super::*;
125 use bamboo_infrastructure::llm::provider::LLMRequestOptions;
126 use bamboo_domain::ReasoningEffort;
127 use bamboo_infrastructure::{LLMChunk, LLMError, LLMProvider, LLMStream};
128 use bamboo_agent_core::tools::ToolSchema;
129 use futures::stream;
130 use std::sync::{Arc, Mutex};
131
132 #[derive(Default)]
133 struct CaptureProvider {
134 captured_max_tokens: Mutex<Vec<Option<u32>>>,
135 captured_reasoning: Mutex<Vec<Option<ReasoningEffort>>>,
136 }
137
138 #[async_trait]
139 impl LLMProvider for CaptureProvider {
140 async fn chat_stream(
141 &self,
142 _messages: &[Message],
143 _tools: &[ToolSchema],
144 _max_output_tokens: Option<u32>,
145 _model: &str,
146 ) -> Result<LLMStream, LLMError> {
147 Ok(Box::pin(stream::iter(vec![
148 Ok::<LLMChunk, LLMError>(LLMChunk::Token("simple".to_string())),
149 Ok::<LLMChunk, LLMError>(LLMChunk::Done),
150 ])))
151 }
152
153 async fn chat_stream_with_options(
154 &self,
155 messages: &[Message],
156 tools: &[ToolSchema],
157 max_output_tokens: Option<u32>,
158 model: &str,
159 options: Option<&LLMRequestOptions>,
160 ) -> Result<LLMStream, LLMError> {
161 self.captured_max_tokens
162 .lock()
163 .expect("lock should not be poisoned")
164 .push(max_output_tokens);
165 self.captured_reasoning
166 .lock()
167 .expect("lock should not be poisoned")
168 .push(options.and_then(|o| o.reasoning_effort));
169 self.chat_stream(messages, tools, max_output_tokens, model)
170 .await
171 }
172 }
173
174 #[tokio::test]
175 async fn mini_loop_sends_no_reasoning_effort() {
176 let provider = Arc::new(CaptureProvider::default());
177 let executor = LLMMiniLoopExecutor::new(provider.clone(), "fast-model".to_string());
178 let session = Session::new("test", "model");
179
180 let decision = executor
181 .decide(&session, "classify this task", "")
182 .await
183 .expect("decide should succeed");
184 assert_eq!(decision.answer, "simple");
185
186 let captured_reasoning = provider
187 .captured_reasoning
188 .lock()
189 .expect("lock should not be poisoned");
190 assert_eq!(
191 captured_reasoning.as_slice(),
192 [None],
193 "mini_loop should not request reasoning to avoid thinking budget consuming output tokens"
194 );
195 }
196
197 #[tokio::test]
198 async fn mini_loop_max_tokens_accommodates_provider_default_reasoning() {
199 let provider = Arc::new(CaptureProvider::default());
200 let executor = LLMMiniLoopExecutor::new(provider.clone(), "fast-model".to_string());
201 let session = Session::new("test", "model");
202
203 let _ = executor
204 .decide(&session, "classify this task", "")
205 .await
206 .expect("decide should succeed");
207
208 let captured = provider
209 .captured_max_tokens
210 .lock()
211 .expect("lock should not be poisoned");
212 let max_tokens = captured[0].expect("max_output_tokens should be set");
213 assert!(
216 max_tokens >= 256,
217 "max_output_tokens ({}) should be at least 256 to accommodate potential provider default reasoning",
218 max_tokens
219 );
220 }
221}