Skip to main content

bamboo_engine/runtime/managers/adapters/
llm_mini_loop.rs

1use async_trait::async_trait;
2use bamboo_agent_core::tools::ToolCall;
3use bamboo_agent_core::{AgentError, Session};
4use bamboo_domain::{Message, Role};
5use bamboo_infrastructure::LLMProvider;
6
7use crate::runtime::managers::mini_loop::{MiniLoopDecision, MiniLoopExecutor};
8
9/// Production `MiniLoopExecutor` backed by a real LLM provider.
10///
11/// Uses a fast/cheap model for lightweight decisions such as task complexity
12/// classification, compression decisions, retry classification, and routing.
13pub struct LLMMiniLoopExecutor {
14    provider: std::sync::Arc<dyn LLMProvider>,
15    model: String,
16}
17
18impl LLMMiniLoopExecutor {
19    pub fn new(provider: std::sync::Arc<dyn LLMProvider>, model: String) -> Self {
20        Self { provider, model }
21    }
22}
23
24#[async_trait]
25impl MiniLoopExecutor for LLMMiniLoopExecutor {
26    async fn decide(
27        &self,
28        _session: &Session,
29        prompt: &str,
30        context: &str,
31    ) -> Result<MiniLoopDecision, AgentError> {
32        let user_content = if context.is_empty() {
33            prompt.to_string()
34        } else {
35            format!("Context:\n{}\n\n{}", context, prompt)
36        };
37
38        let now = chrono::Utc::now();
39        let messages = vec![
40            Message {
41                id: String::new(),
42                role: Role::System,
43                content: "You are a task complexity classifier. Respond with exactly one word: simple, standard, or complex.".to_string(),
44                reasoning: None,
45                content_parts: None,
46                image_ocr: None,
47                phase: None,
48                tool_calls: None,
49                tool_call_id: None,
50                tool_success: None,
51                compressed: false,
52                compressed_by_event_id: None,
53                never_compress: false,
54                compression_level: 0,
55                created_at: now,
56                metadata: None,
57            },
58            Message {
59                id: String::new(),
60                role: Role::User,
61                content: user_content,
62                reasoning: None,
63                content_parts: None,
64                image_ocr: None,
65                phase: None,
66                tool_calls: None,
67                tool_call_id: None,
68                tool_success: None,
69                compressed: false,
70                compressed_by_event_id: None,
71                never_compress: false,
72                compression_level: 0,
73                created_at: now,
74                metadata: None,
75            },
76        ];
77
78        let options = bamboo_infrastructure::llm::provider::LLMRequestOptions {
79            session_id: None,
80            reasoning_effort: None,
81            parallel_tool_calls: None,
82            responses: None,
83            request_purpose: Some("mini_loop".to_string()),
84        };
85        let stream = self
86            .provider
87            .chat_stream_with_options(&messages, &[], Some(256), &self.model, Some(&options))
88            .await
89            .map_err(|e| AgentError::LLM(e.to_string()))?;
90
91        let output = crate::runtime::stream::handler::consume_llm_stream_silent(
92            stream,
93            &tokio_util::sync::CancellationToken::new(),
94            "mini-loop",
95        )
96        .await
97        .map_err(|e| AgentError::LLM(e.to_string()))?;
98
99        Ok(MiniLoopDecision {
100            answer: output.content.trim().to_string(),
101            prompt_tokens: 0,
102            completion_tokens: 0,
103        })
104    }
105
106    async fn evaluate_task(
107        &self,
108        _session: &Session,
109        _tool_calls: &[ToolCall],
110        _round: usize,
111    ) -> Result<MiniLoopDecision, AgentError> {
112        // Task evaluation is handled by the dedicated task_evaluation module.
113        // This implementation returns an empty decision as a fallback.
114        Ok(MiniLoopDecision {
115            answer: String::new(),
116            prompt_tokens: 0,
117            completion_tokens: 0,
118        })
119    }
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125    use bamboo_agent_core::tools::ToolSchema;
126    use bamboo_domain::ReasoningEffort;
127    use bamboo_infrastructure::llm::provider::LLMRequestOptions;
128    use bamboo_infrastructure::{LLMChunk, LLMError, LLMProvider, LLMStream};
129    use futures::stream;
130    use std::sync::{Arc, Mutex};
131
132    #[derive(Default)]
133    struct CaptureProvider {
134        captured_max_tokens: Mutex<Vec<Option<u32>>>,
135        captured_reasoning: Mutex<Vec<Option<ReasoningEffort>>>,
136    }
137
138    #[async_trait]
139    impl LLMProvider for CaptureProvider {
140        async fn chat_stream(
141            &self,
142            _messages: &[Message],
143            _tools: &[ToolSchema],
144            _max_output_tokens: Option<u32>,
145            _model: &str,
146        ) -> Result<LLMStream, LLMError> {
147            Ok(Box::pin(stream::iter(vec![
148                Ok::<LLMChunk, LLMError>(LLMChunk::Token("simple".to_string())),
149                Ok::<LLMChunk, LLMError>(LLMChunk::Done),
150            ])))
151        }
152
153        async fn chat_stream_with_options(
154            &self,
155            messages: &[Message],
156            tools: &[ToolSchema],
157            max_output_tokens: Option<u32>,
158            model: &str,
159            options: Option<&LLMRequestOptions>,
160        ) -> Result<LLMStream, LLMError> {
161            self.captured_max_tokens
162                .lock()
163                .expect("lock should not be poisoned")
164                .push(max_output_tokens);
165            self.captured_reasoning
166                .lock()
167                .expect("lock should not be poisoned")
168                .push(options.and_then(|o| o.reasoning_effort));
169            self.chat_stream(messages, tools, max_output_tokens, model)
170                .await
171        }
172    }
173
174    #[tokio::test]
175    async fn mini_loop_sends_no_reasoning_effort() {
176        let provider = Arc::new(CaptureProvider::default());
177        let executor = LLMMiniLoopExecutor::new(provider.clone(), "fast-model".to_string());
178        let session = Session::new("test", "model");
179
180        let decision = executor
181            .decide(&session, "classify this task", "")
182            .await
183            .expect("decide should succeed");
184        assert_eq!(decision.answer, "simple");
185
186        let captured_reasoning = provider
187            .captured_reasoning
188            .lock()
189            .expect("lock should not be poisoned");
190        assert_eq!(
191            captured_reasoning.as_slice(),
192            [None],
193            "mini_loop should not request reasoning to avoid thinking budget consuming output tokens"
194        );
195    }
196
197    #[tokio::test]
198    async fn mini_loop_max_tokens_accommodates_provider_default_reasoning() {
199        let provider = Arc::new(CaptureProvider::default());
200        let executor = LLMMiniLoopExecutor::new(provider.clone(), "fast-model".to_string());
201        let session = Session::new("test", "model");
202
203        let _ = executor
204            .decide(&session, "classify this task", "")
205            .await
206            .expect("decide should succeed");
207
208        let captured = provider
209            .captured_max_tokens
210            .lock()
211            .expect("lock should not be poisoned");
212        let max_tokens = captured[0].expect("max_output_tokens should be set");
213        // Even if the provider has a default_reasoning_effort of High (4096 thinking budget),
214        // 256 tokens is enough for the one-word response while leaving room for thinking.
215        assert!(
216            max_tokens >= 256,
217            "max_output_tokens ({}) should be at least 256 to accommodate potential provider default reasoning",
218            max_tokens
219        );
220    }
221}