Skip to main content

brainwires_mdap/
microagent.rs

1//! Microagent - Minimal Context Single-Step Agent
2//!
3//! Implements the paper's concept of maximal agentic decomposition (MAD)
4//! where each agent handles exactly ONE step (m=1).
5//!
6//! The microagent is designed to:
7//! - Execute a single minimal subtask
8//! - Use minimal context to reduce error accumulation
9//! - Produce consistent, structured outputs
10//! - Be suitable for voting/consensus
11//!
12use std::sync::Arc;
13
14use serde::{Deserialize, Serialize};
15
16use super::error::{MdapResult, MicroagentError};
17use super::red_flags::{OutputFormat, RedFlagConfig, StandardRedFlagValidator};
18use super::voting::{FirstToAheadByKVoter, ResponseMetadata, SampledResponse, VoteResult};
19
20/// A minimal subtask that can be executed by a microagent
21#[derive(Clone, Debug, Serialize, Deserialize)]
22pub struct Subtask {
23    /// Unique identifier for this subtask
24    pub id: String,
25    /// Human-readable description of what this subtask does
26    pub description: String,
27    /// Input state/context for this subtask
28    pub input_state: serde_json::Value,
29    /// Expected output format for validation
30    pub expected_output_format: Option<OutputFormat>,
31    /// IDs of subtasks this one depends on
32    pub depends_on: Vec<String>,
33    /// Complexity estimate (0.0-1.0) for cost estimation
34    pub complexity_estimate: f32,
35    /// Optional specific instructions for this subtask
36    pub instructions: Option<String>,
37}
38
39impl Subtask {
40    /// Create an atomic (non-decomposable) subtask
41    pub fn atomic(description: impl Into<String>) -> Self {
42        Self {
43            id: uuid::Uuid::new_v4().to_string(),
44            description: description.into(),
45            input_state: serde_json::Value::Null,
46            expected_output_format: None,
47            depends_on: Vec::new(),
48            complexity_estimate: 0.5,
49            instructions: None,
50        }
51    }
52
53    /// Create a subtask with full configuration
54    pub fn new(
55        id: impl Into<String>,
56        description: impl Into<String>,
57        input_state: serde_json::Value,
58    ) -> Self {
59        Self {
60            id: id.into(),
61            description: description.into(),
62            input_state,
63            expected_output_format: None,
64            depends_on: Vec::new(),
65            complexity_estimate: 0.5,
66            instructions: None,
67        }
68    }
69
70    /// Set expected output format
71    pub fn with_format(mut self, format: OutputFormat) -> Self {
72        self.expected_output_format = Some(format);
73        self
74    }
75
76    /// Add dependencies
77    pub fn depends_on(mut self, deps: Vec<String>) -> Self {
78        self.depends_on = deps;
79        self
80    }
81
82    /// Set complexity estimate
83    pub fn with_complexity(mut self, complexity: f32) -> Self {
84        self.complexity_estimate = complexity.clamp(0.0, 1.0);
85        self
86    }
87
88    /// Set specific instructions
89    pub fn with_instructions(mut self, instructions: impl Into<String>) -> Self {
90        self.instructions = Some(instructions.into());
91        self
92    }
93}
94
95/// Output from a subtask execution
96#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
97pub struct SubtaskOutput {
98    /// The subtask ID this output is for
99    pub subtask_id: String,
100    /// The output value
101    pub value: serde_json::Value,
102    /// Optional next state (for stateful subtasks)
103    pub next_state: Option<serde_json::Value>,
104}
105
106impl SubtaskOutput {
107    /// Create a new output
108    pub fn new(subtask_id: impl Into<String>, value: serde_json::Value) -> Self {
109        Self {
110            subtask_id: subtask_id.into(),
111            value,
112            next_state: None,
113        }
114    }
115
116    /// Create with next state
117    pub fn with_state(mut self, state: serde_json::Value) -> Self {
118        self.next_state = Some(state);
119        self
120    }
121}
122
123/// Configuration for microagent execution
124#[derive(Clone, Debug)]
125pub struct MicroagentConfig {
126    /// Maximum output tokens (strict limit for red-flagging, paper: ~750)
127    pub max_output_tokens: u32,
128    /// Sampling temperature (paper used low temp for consistency)
129    pub temperature: f32,
130    /// System prompt template
131    pub system_prompt_template: String,
132    /// Red-flag configuration
133    pub red_flag_config: RedFlagConfig,
134    /// Request timeout in milliseconds
135    pub timeout_ms: u64,
136}
137
138impl Default for MicroagentConfig {
139    fn default() -> Self {
140        Self {
141            max_output_tokens: 750,
142            temperature: 0.1, // Low temperature for consistency
143            system_prompt_template: MICROAGENT_SYSTEM_PROMPT.to_string(),
144            red_flag_config: RedFlagConfig::strict(),
145            timeout_ms: 30000,
146        }
147    }
148}
149
150const MICROAGENT_SYSTEM_PROMPT: &str = r#"You are a focused execution agent. Your job is to complete ONE specific subtask.
151
152RULES:
1531. Complete ONLY the specified subtask - nothing more, nothing less
1542. Output ONLY the requested format - no explanations unless required
1553. If you're unsure, output your best answer - do NOT hedge or explain uncertainty
1564. Do NOT use phrases like "Wait,", "Actually,", "Let me reconsider" - just give the answer
1575. Be concise and direct
158
159Your subtask: {subtask_description}
160Expected output format: {output_format}"#;
161
162/// A focused agent for executing a single minimal subtask
163///
164/// This implements the paper's concept of maximal agentic decomposition (MAD)
165/// where each agent handles exactly ONE step (m=1).
166pub struct Microagent<P> {
167    /// The provider for making LLM calls
168    provider: Arc<P>,
169    /// The subtask to execute
170    subtask: Subtask,
171    /// Configuration
172    config: MicroagentConfig,
173    /// Red-flag validator
174    red_flag_validator: StandardRedFlagValidator,
175}
176
177/// Trait for providers that can be used with microagents
178#[async_trait::async_trait]
179pub trait MicroagentProvider: Send + Sync {
180    /// Execute a chat completion
181    async fn chat(
182        &self,
183        system: &str,
184        user: &str,
185        temperature: f32,
186        max_tokens: u32,
187    ) -> MdapResult<MicroagentResponse>;
188
189    /// Get available tools for intent expression (not execution)
190    ///
191    /// These tools are described to the LLM so it can express intent to use them.
192    /// Actual execution happens after voting consensus, outside the microagent.
193    fn available_tools(&self) -> Vec<super::tool_intent::ToolSchema> {
194        vec![] // Default: no tools available
195    }
196
197    /// Check if this provider has tools available
198    fn has_tools(&self) -> bool {
199        !self.available_tools().is_empty()
200    }
201}
202
203/// Response from a microagent provider
204#[derive(Clone, Debug)]
205pub struct MicroagentResponse {
206    /// The response text
207    pub text: String,
208    /// Number of input tokens
209    pub input_tokens: u32,
210    /// Number of output tokens
211    pub output_tokens: u32,
212    /// Finish reason (if available)
213    pub finish_reason: Option<String>,
214    /// Response time in milliseconds
215    pub response_time_ms: u64,
216}
217
218impl<P: MicroagentProvider + 'static> Microagent<P> {
219    /// Create a new microagent
220    pub fn new(provider: Arc<P>, subtask: Subtask, config: MicroagentConfig) -> Self {
221        let red_flag_validator = StandardRedFlagValidator::new(
222            config.red_flag_config.clone(),
223            subtask.expected_output_format.clone(),
224        );
225
226        Self {
227            provider,
228            subtask,
229            config,
230            red_flag_validator,
231        }
232    }
233
234    /// Create with default configuration
235    pub fn with_defaults(provider: Arc<P>, subtask: Subtask) -> Self {
236        Self::new(provider, subtask, MicroagentConfig::default())
237    }
238
239    /// Execute the subtask once (single sample for voting)
240    pub async fn execute_once(
241        &self,
242        input: &serde_json::Value,
243    ) -> MdapResult<SampledResponse<SubtaskOutput>> {
244        let system_prompt = self.build_system_prompt();
245        let user_prompt = self.build_user_prompt(input);
246
247        let start = std::time::Instant::now();
248
249        let response = self
250            .provider
251            .chat(
252                &system_prompt,
253                &user_prompt,
254                self.config.temperature,
255                self.config.max_output_tokens,
256            )
257            .await
258            .map_err(|e| MicroagentError::ProviderError(e.to_string()))?;
259
260        let elapsed = start.elapsed();
261
262        let metadata = ResponseMetadata {
263            token_count: response.output_tokens,
264            response_time_ms: elapsed.as_millis() as u64,
265            format_valid: true, // Will be validated by red-flag checker
266            finish_reason: response.finish_reason,
267            model: None,
268        };
269
270        let output = self.parse_output(&response.text)?;
271
272        // Extract confidence from response (CISC paper: arxiv:2502.06233v1)
273        let confidence = extract_response_confidence(&response.text, &metadata);
274
275        Ok(SampledResponse {
276            value: output,
277            metadata,
278            raw_response: response.text,
279            confidence,
280        })
281    }
282
283    /// Execute subtask with voting for error correction
284    pub async fn execute_with_voting(
285        &self,
286        input: &serde_json::Value,
287        voter: &FirstToAheadByKVoter,
288    ) -> MdapResult<VoteResult<SubtaskOutput>> {
289        let input = input.clone();
290        let provider = self.provider.clone();
291        let subtask = self.subtask.clone();
292        let config = self.config.clone();
293
294        // Create a closure that captures necessary state
295        voter
296            .vote(
297                || {
298                    let provider = provider.clone();
299                    let subtask = subtask.clone();
300                    let config = config.clone();
301                    let input = input.clone();
302
303                    async move {
304                        let agent = Microagent::new(provider, subtask, config);
305                        agent.execute_once(&input).await
306                    }
307                },
308                &self.red_flag_validator,
309                |output: &SubtaskOutput| {
310                    // Use the value as the key for voting
311                    serde_json::to_string(&output.value).unwrap_or_default()
312                },
313            )
314            .await
315    }
316
317    /// Build the system prompt
318    fn build_system_prompt(&self) -> String {
319        let format_desc = self
320            .subtask
321            .expected_output_format
322            .as_ref()
323            .map(|f| f.description())
324            .unwrap_or_else(|| "Plain text response".to_string());
325
326        self.config
327            .system_prompt_template
328            .replace("{subtask_description}", &self.subtask.description)
329            .replace("{output_format}", &format_desc)
330    }
331
332    /// Build the user prompt
333    fn build_user_prompt(&self, input: &serde_json::Value) -> String {
334        let mut prompt = String::new();
335
336        // Add specific instructions if provided
337        if let Some(ref instructions) = self.subtask.instructions {
338            prompt.push_str("Instructions:\n");
339            prompt.push_str(instructions);
340            prompt.push_str("\n\n");
341        }
342
343        // Add input state
344        prompt.push_str("Input:\n");
345        prompt.push_str(&serde_json::to_string_pretty(input).unwrap_or_default());
346        prompt.push_str("\n\n");
347
348        prompt.push_str("Provide your output:");
349
350        prompt
351    }
352
353    /// Parse the output from the response
354    fn parse_output(&self, response: &str) -> MdapResult<SubtaskOutput> {
355        let trimmed = response.trim();
356
357        // Try to parse as JSON first
358        let value = if let Ok(json) = serde_json::from_str::<serde_json::Value>(trimmed) {
359            json
360        } else {
361            // Fall back to string value
362            serde_json::Value::String(trimmed.to_string())
363        };
364
365        Ok(SubtaskOutput::new(self.subtask.id.clone(), value))
366    }
367
368    /// Get the subtask
369    pub fn subtask(&self) -> &Subtask {
370        &self.subtask
371    }
372
373    /// Get the configuration
374    pub fn config(&self) -> &MicroagentConfig {
375        &self.config
376    }
377}
378
379/// Builder for microagent configuration
380pub struct MicroagentConfigBuilder {
381    config: MicroagentConfig,
382}
383
384impl Default for MicroagentConfigBuilder {
385    fn default() -> Self {
386        Self::new()
387    }
388}
389
390impl MicroagentConfigBuilder {
391    /// Create a new builder with default configuration.
392    pub fn new() -> Self {
393        Self {
394            config: MicroagentConfig::default(),
395        }
396    }
397
398    /// Set the maximum output tokens.
399    pub fn max_output_tokens(mut self, tokens: u32) -> Self {
400        self.config.max_output_tokens = tokens;
401        self
402    }
403
404    /// Set the temperature for sampling (clamped to 0.0-2.0).
405    pub fn temperature(mut self, temp: f32) -> Self {
406        self.config.temperature = temp.clamp(0.0, 2.0);
407        self
408    }
409
410    /// Set the system prompt template.
411    pub fn system_prompt(mut self, prompt: impl Into<String>) -> Self {
412        self.config.system_prompt_template = prompt.into();
413        self
414    }
415
416    /// Set the red-flag validation configuration.
417    pub fn red_flag_config(mut self, config: RedFlagConfig) -> Self {
418        self.config.red_flag_config = config;
419        self
420    }
421
422    /// Set the execution timeout in milliseconds.
423    pub fn timeout_ms(mut self, timeout: u64) -> Self {
424        self.config.timeout_ms = timeout;
425        self
426    }
427
428    /// Build the microagent configuration.
429    pub fn build(self) -> MicroagentConfig {
430        self.config
431    }
432}
433
434/// Extract confidence from a microagent response (CISC paper: arxiv:2502.06233v1)
435///
436/// Analyzes the response text and metadata to determine confidence level.
437/// This replaces the hardcoded 0.75 with dynamic extraction based on:
438/// 1. Finish reason (completion status)
439/// 2. Response length (too short/long indicates issues)
440/// 3. Language patterns (hedging, self-correction, assertions)
441fn extract_response_confidence(text: &str, metadata: &ResponseMetadata) -> f64 {
442    let mut confidence = 0.75; // Start with baseline
443
444    // 1. Adjust based on finish reason
445    match metadata.finish_reason.as_deref() {
446        Some("stop") | Some("end_turn") => confidence += 0.10,
447        Some("length") | Some("max_tokens") => confidence -= 0.25, // Truncated
448        _ => {}
449    }
450
451    // 2. Adjust based on response length
452    let token_estimate = metadata.token_count as usize;
453    if token_estimate < 10 {
454        confidence -= 0.20; // Very short, possibly incomplete
455    } else if token_estimate > 700 {
456        confidence -= 0.15; // Near token limit, possibly verbose
457    }
458
459    // 3. Check for hedging/uncertainty patterns (reduces confidence)
460    let text_lower = text.to_lowercase();
461    let hedging_patterns = [
462        "i'm not sure",
463        "i think",
464        "possibly",
465        "might be",
466        "could be",
467        "probably",
468        "perhaps",
469        "maybe",
470        "unclear",
471        "i guess",
472    ];
473    let hedging_count = hedging_patterns
474        .iter()
475        .filter(|p| text_lower.contains(*p))
476        .count();
477    confidence -= (hedging_count as f64 * 0.08).min(0.30);
478
479    // 4. Check for self-correction patterns (reduces confidence more)
480    let self_correction_patterns = [
481        "wait,",
482        "actually,",
483        "let me reconsider",
484        "i made a mistake",
485        "correction:",
486        "i was wrong",
487        "on second thought",
488    ];
489    let correction_count = self_correction_patterns
490        .iter()
491        .filter(|p| text_lower.contains(*p))
492        .count();
493    confidence -= (correction_count as f64 * 0.15).min(0.30);
494
495    // 5. Check for confident assertion patterns (slight boost)
496    let confident_patterns = [
497        "the answer is",
498        "definitely",
499        "certainly",
500        "clearly",
501        "the solution is",
502        "this will work",
503    ];
504    let confident_count = confident_patterns
505        .iter()
506        .filter(|p| text_lower.contains(*p))
507        .count();
508    confidence += (confident_count as f64 * 0.05).min(0.10);
509
510    // 6. Check format validity from metadata
511    if !metadata.format_valid {
512        confidence -= 0.20;
513    }
514
515    confidence.clamp(0.1, 0.99)
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521
522    struct MockProvider {
523        response: String,
524    }
525
526    #[async_trait::async_trait]
527    impl MicroagentProvider for MockProvider {
528        async fn chat(
529            &self,
530            _system: &str,
531            _user: &str,
532            _temperature: f32,
533            _max_tokens: u32,
534        ) -> MdapResult<MicroagentResponse> {
535            Ok(MicroagentResponse {
536                text: self.response.clone(),
537                input_tokens: 100,
538                output_tokens: 50,
539                finish_reason: Some("stop".to_string()),
540                response_time_ms: 100,
541            })
542        }
543    }
544
545    #[test]
546    fn test_subtask_creation() {
547        let subtask = Subtask::atomic("Calculate 2 + 2");
548        assert_eq!(subtask.description, "Calculate 2 + 2");
549        assert!(subtask.depends_on.is_empty());
550    }
551
552    #[test]
553    fn test_subtask_builder() {
554        let subtask = Subtask::new("task_1", "Add numbers", serde_json::json!({"a": 1, "b": 2}))
555            .with_complexity(0.3)
556            .with_format(OutputFormat::Json)
557            .depends_on(vec!["task_0".to_string()]);
558
559        assert_eq!(subtask.id, "task_1");
560        assert_eq!(subtask.complexity_estimate, 0.3);
561        assert_eq!(subtask.depends_on, vec!["task_0"]);
562    }
563
564    #[test]
565    fn test_subtask_output() {
566        let output = SubtaskOutput::new("task_1", serde_json::json!(42))
567            .with_state(serde_json::json!({"done": true}));
568
569        assert_eq!(output.subtask_id, "task_1");
570        assert_eq!(output.value, serde_json::json!(42));
571        assert!(output.next_state.is_some());
572    }
573
574    #[test]
575    fn test_microagent_config_builder() {
576        let config = MicroagentConfigBuilder::new()
577            .max_output_tokens(500)
578            .temperature(0.5)
579            .timeout_ms(60000)
580            .build();
581
582        assert_eq!(config.max_output_tokens, 500);
583        assert_eq!(config.temperature, 0.5);
584        assert_eq!(config.timeout_ms, 60000);
585    }
586
587    #[tokio::test]
588    async fn test_microagent_execute_once() {
589        let provider = Arc::new(MockProvider {
590            response: "42".to_string(),
591        });
592
593        let subtask = Subtask::atomic("Calculate 2 + 2");
594        let agent = Microagent::with_defaults(provider, subtask);
595
596        let result = agent
597            .execute_once(&serde_json::json!({"expression": "2 + 2"}))
598            .await
599            .unwrap();
600
601        // "42" is valid JSON, so it parses as a number
602        assert_eq!(result.value.value, serde_json::json!(42));
603    }
604
605    #[tokio::test]
606    async fn test_microagent_parse_json() {
607        let provider = Arc::new(MockProvider {
608            response: r#"{"result": 42}"#.to_string(),
609        });
610
611        let subtask = Subtask::atomic("Return JSON").with_format(OutputFormat::Json);
612        let agent = Microagent::with_defaults(provider, subtask);
613
614        let result = agent.execute_once(&serde_json::Value::Null).await.unwrap();
615
616        assert!(result.value.value.is_object());
617        assert_eq!(result.value.value["result"], 42);
618    }
619
620    #[test]
621    fn test_system_prompt_generation() {
622        let provider = Arc::new(MockProvider {
623            response: "".to_string(),
624        });
625
626        let subtask = Subtask::atomic("Test task").with_format(OutputFormat::Json);
627        let agent = Microagent::with_defaults(provider, subtask);
628
629        let prompt = agent.build_system_prompt();
630        assert!(prompt.contains("Test task"));
631        assert!(prompt.contains("JSON"));
632    }
633
634    #[test]
635    fn test_user_prompt_generation() {
636        let provider = Arc::new(MockProvider {
637            response: "".to_string(),
638        });
639
640        let subtask = Subtask::atomic("Test task").with_instructions("Be precise");
641        let agent = Microagent::with_defaults(provider, subtask);
642
643        let prompt = agent.build_user_prompt(&serde_json::json!({"x": 1}));
644        assert!(prompt.contains("Be precise"));
645        assert!(prompt.contains("\"x\": 1"));
646    }
647}