bamboo_engine/runtime/managers/adapters/
llm_mini_loop.rs1use async_trait::async_trait;
2use bamboo_agent_core::tools::ToolCall;
3use bamboo_agent_core::{AgentError, Session};
4use bamboo_domain::{Message, Role};
5use bamboo_infrastructure::LLMProvider;
6
7use crate::runtime::managers::mini_loop::{MiniLoopDecision, MiniLoopExecutor};
8
9pub struct LLMMiniLoopExecutor {
14 provider: std::sync::Arc<dyn LLMProvider>,
15 model: String,
16}
17
18impl LLMMiniLoopExecutor {
19 pub fn new(provider: std::sync::Arc<dyn LLMProvider>, model: String) -> Self {
20 Self { provider, model }
21 }
22}
23
24#[async_trait]
25impl MiniLoopExecutor for LLMMiniLoopExecutor {
26 async fn decide(
27 &self,
28 _session: &Session,
29 prompt: &str,
30 context: &str,
31 ) -> Result<MiniLoopDecision, AgentError> {
32 let user_content = if context.is_empty() {
33 prompt.to_string()
34 } else {
35 format!("Context:\n{}\n\n{}", context, prompt)
36 };
37
38 let now = chrono::Utc::now();
39 let messages = vec![
40 Message {
41 id: String::new(),
42 role: Role::System,
43 content: "You are a task complexity classifier. Respond with exactly one word: simple, standard, or complex.".to_string(),
44 reasoning: None,
45 content_parts: None,
46 image_ocr: None,
47 phase: None,
48 tool_calls: None,
49 tool_call_id: None,
50 tool_success: None,
51 compressed: false,
52 compressed_by_event_id: None,
53 never_compress: false,
54 compression_level: 0,
55 created_at: now,
56 metadata: None,
57 },
58 Message {
59 id: String::new(),
60 role: Role::User,
61 content: user_content,
62 reasoning: None,
63 content_parts: None,
64 image_ocr: None,
65 phase: None,
66 tool_calls: None,
67 tool_call_id: None,
68 tool_success: None,
69 compressed: false,
70 compressed_by_event_id: None,
71 never_compress: false,
72 compression_level: 0,
73 created_at: now,
74 metadata: None,
75 },
76 ];
77
78 let options = bamboo_infrastructure::llm::provider::LLMRequestOptions {
79 session_id: None,
80 reasoning_effort: None,
81 parallel_tool_calls: None,
82 responses: None,
83 request_purpose: Some("mini_loop".to_string()),
84 cache: None,
85 };
86 let stream = self
87 .provider
88 .chat_stream_with_options(&messages, &[], Some(256), &self.model, Some(&options))
89 .await
90 .map_err(|e| AgentError::LLM(e.to_string()))?;
91
92 let output = crate::runtime::stream::handler::consume_llm_stream_silent(
93 stream,
94 &tokio_util::sync::CancellationToken::new(),
95 "mini-loop",
96 )
97 .await
98 .map_err(|e| AgentError::LLM(e.to_string()))?;
99
100 Ok(MiniLoopDecision {
101 answer: output.content.trim().to_string(),
102 prompt_tokens: 0,
103 completion_tokens: 0,
104 })
105 }
106
107 async fn evaluate_task(
108 &self,
109 _session: &Session,
110 _tool_calls: &[ToolCall],
111 _round: usize,
112 ) -> Result<MiniLoopDecision, AgentError> {
113 Ok(MiniLoopDecision {
116 answer: String::new(),
117 prompt_tokens: 0,
118 completion_tokens: 0,
119 })
120 }
121}
122
123#[cfg(test)]
124mod tests {
125 use super::*;
126 use bamboo_agent_core::tools::ToolSchema;
127 use bamboo_domain::ReasoningEffort;
128 use bamboo_infrastructure::llm::provider::LLMRequestOptions;
129 use bamboo_infrastructure::{LLMChunk, LLMError, LLMProvider, LLMStream};
130 use futures::stream;
131 use std::sync::{Arc, Mutex};
132
133 #[derive(Default)]
134 struct CaptureProvider {
135 captured_max_tokens: Mutex<Vec<Option<u32>>>,
136 captured_reasoning: Mutex<Vec<Option<ReasoningEffort>>>,
137 }
138
139 #[async_trait]
140 impl LLMProvider for CaptureProvider {
141 async fn chat_stream(
142 &self,
143 _messages: &[Message],
144 _tools: &[ToolSchema],
145 _max_output_tokens: Option<u32>,
146 _model: &str,
147 ) -> Result<LLMStream, LLMError> {
148 Ok(Box::pin(stream::iter(vec![
149 Ok::<LLMChunk, LLMError>(LLMChunk::Token("simple".to_string())),
150 Ok::<LLMChunk, LLMError>(LLMChunk::Done),
151 ])))
152 }
153
154 async fn chat_stream_with_options(
155 &self,
156 messages: &[Message],
157 tools: &[ToolSchema],
158 max_output_tokens: Option<u32>,
159 model: &str,
160 options: Option<&LLMRequestOptions>,
161 ) -> Result<LLMStream, LLMError> {
162 self.captured_max_tokens
163 .lock()
164 .expect("lock should not be poisoned")
165 .push(max_output_tokens);
166 self.captured_reasoning
167 .lock()
168 .expect("lock should not be poisoned")
169 .push(options.and_then(|o| o.reasoning_effort));
170 self.chat_stream(messages, tools, max_output_tokens, model)
171 .await
172 }
173 }
174
175 #[tokio::test]
176 async fn mini_loop_sends_no_reasoning_effort() {
177 let provider = Arc::new(CaptureProvider::default());
178 let executor = LLMMiniLoopExecutor::new(provider.clone(), "fast-model".to_string());
179 let session = Session::new("test", "model");
180
181 let decision = executor
182 .decide(&session, "classify this task", "")
183 .await
184 .expect("decide should succeed");
185 assert_eq!(decision.answer, "simple");
186
187 let captured_reasoning = provider
188 .captured_reasoning
189 .lock()
190 .expect("lock should not be poisoned");
191 assert_eq!(
192 captured_reasoning.as_slice(),
193 [None],
194 "mini_loop should not request reasoning to avoid thinking budget consuming output tokens"
195 );
196 }
197
198 #[tokio::test]
199 async fn mini_loop_max_tokens_accommodates_provider_default_reasoning() {
200 let provider = Arc::new(CaptureProvider::default());
201 let executor = LLMMiniLoopExecutor::new(provider.clone(), "fast-model".to_string());
202 let session = Session::new("test", "model");
203
204 let _ = executor
205 .decide(&session, "classify this task", "")
206 .await
207 .expect("decide should succeed");
208
209 let captured = provider
210 .captured_max_tokens
211 .lock()
212 .expect("lock should not be poisoned");
213 let max_tokens = captured[0].expect("max_output_tokens should be set");
214 assert!(
217 max_tokens >= 256,
218 "max_output_tokens ({}) should be at least 256 to accommodate potential provider default reasoning",
219 max_tokens
220 );
221 }
222}