Skip to main content

bamboo_engine/runtime/managers/adapters/
llm_mini_loop.rs

1use async_trait::async_trait;
2use bamboo_agent_core::tools::ToolCall;
3use bamboo_agent_core::{AgentError, Session};
4use bamboo_domain::{Message, Role};
5use bamboo_infrastructure::LLMProvider;
6
7use crate::runtime::managers::mini_loop::{MiniLoopDecision, MiniLoopExecutor};
8
9/// Production `MiniLoopExecutor` backed by a real LLM provider.
10///
11/// Uses a fast/cheap model for lightweight decisions such as task complexity
12/// classification, compression decisions, retry classification, and routing.
13pub struct LLMMiniLoopExecutor {
14    provider: std::sync::Arc<dyn LLMProvider>,
15    model: String,
16}
17
18impl LLMMiniLoopExecutor {
19    pub fn new(provider: std::sync::Arc<dyn LLMProvider>, model: String) -> Self {
20        Self { provider, model }
21    }
22}
23
24#[async_trait]
25impl MiniLoopExecutor for LLMMiniLoopExecutor {
26    async fn decide(
27        &self,
28        _session: &Session,
29        prompt: &str,
30        context: &str,
31    ) -> Result<MiniLoopDecision, AgentError> {
32        let user_content = if context.is_empty() {
33            prompt.to_string()
34        } else {
35            format!("Context:\n{}\n\n{}", context, prompt)
36        };
37
38        let now = chrono::Utc::now();
39        let messages = vec![
40            Message {
41                id: String::new(),
42                role: Role::System,
43                content: "You are a task complexity classifier. Respond with exactly one word: simple, standard, or complex.".to_string(),
44                reasoning: None,
45                content_parts: None,
46                image_ocr: None,
47                phase: None,
48                tool_calls: None,
49                tool_call_id: None,
50                tool_success: None,
51                compressed: false,
52                compressed_by_event_id: None,
53                never_compress: false,
54                compression_level: 0,
55                created_at: now,
56                metadata: None,
57            },
58            Message {
59                id: String::new(),
60                role: Role::User,
61                content: user_content,
62                reasoning: None,
63                content_parts: None,
64                image_ocr: None,
65                phase: None,
66                tool_calls: None,
67                tool_call_id: None,
68                tool_success: None,
69                compressed: false,
70                compressed_by_event_id: None,
71                never_compress: false,
72                compression_level: 0,
73                created_at: now,
74                metadata: None,
75            },
76        ];
77
78        let options = bamboo_infrastructure::llm::provider::LLMRequestOptions {
79            session_id: None,
80            reasoning_effort: None,
81            parallel_tool_calls: None,
82            responses: None,
83            request_purpose: Some("mini_loop".to_string()),
84            cache: None,
85        };
86        let stream = self
87            .provider
88            .chat_stream_with_options(&messages, &[], Some(256), &self.model, Some(&options))
89            .await
90            .map_err(|e| AgentError::LLM(e.to_string()))?;
91
92        let output = crate::runtime::stream::handler::consume_llm_stream_silent(
93            stream,
94            &tokio_util::sync::CancellationToken::new(),
95            "mini-loop",
96        )
97        .await
98        .map_err(|e| AgentError::LLM(e.to_string()))?;
99
100        Ok(MiniLoopDecision {
101            answer: output.content.trim().to_string(),
102            prompt_tokens: 0,
103            completion_tokens: 0,
104        })
105    }
106
107    async fn evaluate_task(
108        &self,
109        _session: &Session,
110        _tool_calls: &[ToolCall],
111        _round: usize,
112    ) -> Result<MiniLoopDecision, AgentError> {
113        // Task evaluation is handled by the dedicated task_evaluation module.
114        // This implementation returns an empty decision as a fallback.
115        Ok(MiniLoopDecision {
116            answer: String::new(),
117            prompt_tokens: 0,
118            completion_tokens: 0,
119        })
120    }
121}
122
123#[cfg(test)]
124mod tests {
125    use super::*;
126    use bamboo_agent_core::tools::ToolSchema;
127    use bamboo_domain::ReasoningEffort;
128    use bamboo_infrastructure::llm::provider::LLMRequestOptions;
129    use bamboo_infrastructure::{LLMChunk, LLMError, LLMProvider, LLMStream};
130    use futures::stream;
131    use std::sync::{Arc, Mutex};
132
133    #[derive(Default)]
134    struct CaptureProvider {
135        captured_max_tokens: Mutex<Vec<Option<u32>>>,
136        captured_reasoning: Mutex<Vec<Option<ReasoningEffort>>>,
137    }
138
139    #[async_trait]
140    impl LLMProvider for CaptureProvider {
141        async fn chat_stream(
142            &self,
143            _messages: &[Message],
144            _tools: &[ToolSchema],
145            _max_output_tokens: Option<u32>,
146            _model: &str,
147        ) -> Result<LLMStream, LLMError> {
148            Ok(Box::pin(stream::iter(vec![
149                Ok::<LLMChunk, LLMError>(LLMChunk::Token("simple".to_string())),
150                Ok::<LLMChunk, LLMError>(LLMChunk::Done),
151            ])))
152        }
153
154        async fn chat_stream_with_options(
155            &self,
156            messages: &[Message],
157            tools: &[ToolSchema],
158            max_output_tokens: Option<u32>,
159            model: &str,
160            options: Option<&LLMRequestOptions>,
161        ) -> Result<LLMStream, LLMError> {
162            self.captured_max_tokens
163                .lock()
164                .expect("lock should not be poisoned")
165                .push(max_output_tokens);
166            self.captured_reasoning
167                .lock()
168                .expect("lock should not be poisoned")
169                .push(options.and_then(|o| o.reasoning_effort));
170            self.chat_stream(messages, tools, max_output_tokens, model)
171                .await
172        }
173    }
174
175    #[tokio::test]
176    async fn mini_loop_sends_no_reasoning_effort() {
177        let provider = Arc::new(CaptureProvider::default());
178        let executor = LLMMiniLoopExecutor::new(provider.clone(), "fast-model".to_string());
179        let session = Session::new("test", "model");
180
181        let decision = executor
182            .decide(&session, "classify this task", "")
183            .await
184            .expect("decide should succeed");
185        assert_eq!(decision.answer, "simple");
186
187        let captured_reasoning = provider
188            .captured_reasoning
189            .lock()
190            .expect("lock should not be poisoned");
191        assert_eq!(
192            captured_reasoning.as_slice(),
193            [None],
194            "mini_loop should not request reasoning to avoid thinking budget consuming output tokens"
195        );
196    }
197
198    #[tokio::test]
199    async fn mini_loop_max_tokens_accommodates_provider_default_reasoning() {
200        let provider = Arc::new(CaptureProvider::default());
201        let executor = LLMMiniLoopExecutor::new(provider.clone(), "fast-model".to_string());
202        let session = Session::new("test", "model");
203
204        let _ = executor
205            .decide(&session, "classify this task", "")
206            .await
207            .expect("decide should succeed");
208
209        let captured = provider
210            .captured_max_tokens
211            .lock()
212            .expect("lock should not be poisoned");
213        let max_tokens = captured[0].expect("max_output_tokens should be set");
214        // Even if the provider has a default_reasoning_effort of High (4096 thinking budget),
215        // 256 tokens is enough for the one-word response while leaving room for thinking.
216        assert!(
217            max_tokens >= 256,
218            "max_output_tokens ({}) should be at least 256 to accommodate potential provider default reasoning",
219            max_tokens
220        );
221    }
222}