Skip to main content

punch_runtime/
fighter_loop.rs

1//! The core agent execution loop.
2//!
3//! `run_fighter_loop` is the heart of the Punch runtime. It orchestrates the
4//! conversation between the user, the LLM, and the tools (moves), persisting
5//! messages to the memory substrate and enforcing loop guards.
6//!
7//! ## Production features
8//!
9//! - **Context window management**: Tracks estimated token count and trims
10//!   messages when approaching the context limit.
11//! - **Session repair**: Fixes orphaned tool results, empty messages,
12//!   duplicate results, and missing results on startup and after errors.
13//! - **Error recovery**: Handles empty responses, MaxTokens continuation,
14//!   and per-tool timeouts.
15//! - **Loop guard**: Graduated response (Allow → Warn → Block → CircuitBreak)
16//!   with ping-pong detection and poll-tool relaxation.
17
18use std::sync::Arc;
19
20use tracing::{debug, error, info, instrument, warn};
21
22use punch_memory::{BoutId, MemorySubstrate};
23use punch_types::{
24    AgentCoordinator, FighterId, FighterManifest, Message, PolicyEngine, PunchError, PunchResult,
25    Role, SandboxEnforcer, ShellBleedDetector, ToolCallResult, ToolDefinition,
26};
27
28use crate::context_budget::ContextBudget;
29use crate::driver::{CompletionRequest, LlmDriver, StopReason, TokenUsage};
30use crate::guard::{GuardConfig, LoopGuard, LoopGuardVerdict};
31use crate::session_repair;
32use crate::tool_executor::{self, ToolExecutionContext};
33
34/// Maximum number of MaxTokens continuations before giving up.
35const MAX_CONTINUATION_LOOPS: usize = 5;
36
37/// Default per-tool timeout in seconds.
38const DEFAULT_TOOL_TIMEOUT_SECS: u64 = 120;
39
40/// Parameters for the fighter loop.
41pub struct FighterLoopParams {
42    /// The fighter's manifest (identity, model config, system prompt, capabilities).
43    pub manifest: FighterManifest,
44    /// The user's message to process.
45    pub user_message: String,
46    /// The bout (session) ID.
47    pub bout_id: BoutId,
48    /// The fighter's unique ID.
49    pub fighter_id: FighterId,
50    /// Shared memory substrate for persistence.
51    pub memory: Arc<MemorySubstrate>,
52    /// The LLM driver to use for completions.
53    pub driver: Arc<dyn LlmDriver>,
54    /// Tools available for this fighter to use.
55    pub available_tools: Vec<ToolDefinition>,
56    /// Maximum loop iterations before forced termination (default: 50).
57    pub max_iterations: Option<usize>,
58    /// Context window size in tokens (default: 200K).
59    pub context_window: Option<usize>,
60    /// Per-tool timeout in seconds (default: 120).
61    pub tool_timeout_secs: Option<u64>,
62    /// Optional agent coordinator for inter-agent tools.
63    pub coordinator: Option<Arc<dyn AgentCoordinator>>,
64    /// Optional policy engine for approval-gated tool execution.
65    /// When present, the referee checks every move before the fighter can throw it.
66    pub approval_engine: Option<Arc<PolicyEngine>>,
67    /// Optional subprocess sandbox (containment ring) for shell and filesystem tools.
68    /// When present, commands are validated and environments are sanitized before execution.
69    pub sandbox: Option<Arc<SandboxEnforcer>>,
70}
71
72/// Result of a completed fighter loop run.
73#[derive(Debug, Clone)]
74pub struct FighterLoopResult {
75    /// The final text response from the fighter.
76    pub response: String,
77    /// Cumulative token usage across all LLM calls in this run.
78    pub usage: TokenUsage,
79    /// Number of loop iterations performed.
80    pub iterations: usize,
81    /// Number of individual tool calls executed.
82    pub tool_calls_made: usize,
83}
84
85/// Run the fighter loop: the core agent execution engine.
86///
87/// This function:
88/// 1. Loads message history from the bout and repairs it
89/// 2. Recalls relevant memories
90/// 3. Builds the system prompt with context
91/// 4. Applies context budget management before each LLM call
92/// 5. Calls the LLM with available tools
93/// 6. If the LLM requests tool use, executes tools and loops
94/// 7. Handles empty responses, MaxTokens continuation, and errors
95/// 8. Enforces loop guards against runaway iterations
96#[instrument(
97    skip(params),
98    fields(
99        fighter = %params.fighter_id,
100        bout = %params.bout_id,
101        fighter_name = %params.manifest.name,
102    )
103)]
104pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterLoopResult> {
105    let max_iterations = params.max_iterations.unwrap_or(50);
106    let context_window = params.context_window.unwrap_or(200_000);
107    let tool_timeout = params
108        .tool_timeout_secs
109        .unwrap_or(DEFAULT_TOOL_TIMEOUT_SECS);
110
111    let budget = ContextBudget::new(context_window);
112    let mut guard = LoopGuard::with_config(GuardConfig {
113        max_iterations,
114        ..Default::default()
115    });
116    let mut total_usage = TokenUsage::default();
117    let mut tool_calls_made: usize = 0;
118    let mut continuation_count: usize = 0;
119
120    // 1. Load message history and repair.
121    let mut messages = params.memory.load_messages(&params.bout_id).await?;
122    debug!(history_len = messages.len(), "loaded bout message history");
123
124    // Run session repair on loaded history.
125    let repair_stats = session_repair::repair_session(&mut messages);
126    if repair_stats.any_repairs() {
127        info!(repairs = %repair_stats, "repaired loaded message history");
128    }
129
130    // 2. Append the user's new message and persist it.
131    let user_msg = Message::new(Role::User, &params.user_message);
132    params
133        .memory
134        .save_message(&params.bout_id, &user_msg)
135        .await?;
136    messages.push(user_msg);
137
138    // 3. Recall relevant memories and build an enriched system prompt.
139    let system_prompt =
140        build_system_prompt(&params.manifest, &params.fighter_id, &params.memory).await;
141
142    // Build the tool execution context.
143    let tool_context = ToolExecutionContext {
144        working_dir: std::env::current_dir().unwrap_or_default(),
145        fighter_id: params.fighter_id,
146        memory: Arc::clone(&params.memory),
147        coordinator: params.coordinator.clone(),
148        approval_engine: params.approval_engine.clone(),
149        sandbox: params.sandbox.clone(),
150        bleed_detector: Some(Arc::new(ShellBleedDetector::new())),
151        browser_pool: None,
152    };
153
154    // 4. Main loop.
155    loop {
156        // --- Context Budget: check and trim before LLM call ---
157        if let Some(trim_action) = budget.check_trim_needed(&messages, &params.available_tools) {
158            budget.apply_trim(&mut messages, trim_action);
159
160            // Re-run session repair after trimming (may create orphans).
161            let post_trim_repair = session_repair::repair_session(&mut messages);
162            if post_trim_repair.any_repairs() {
163                debug!(repairs = %post_trim_repair, "repaired after context trim");
164            }
165        }
166
167        // Apply context guard (truncate oversized tool results).
168        budget.apply_context_guard(&mut messages);
169
170        // Build the completion request.
171        let request = CompletionRequest {
172            model: params.manifest.model.model.clone(),
173            messages: messages.clone(),
174            tools: params.available_tools.clone(),
175            max_tokens: params.manifest.model.max_tokens.unwrap_or(
176                // Reasoning models (Qwen, DeepSeek) use thinking tokens internally,
177                // so they need a much higher default to leave room for visible output.
178                // The thinking budget can easily consume 2000-4000 tokens alone.
179                match params.manifest.model.provider {
180                    punch_types::Provider::Ollama => 16384,
181                    _ => 4096,
182                }
183            ),
184            temperature: params.manifest.model.temperature,
185            system_prompt: Some(system_prompt.clone()),
186        };
187
188        // Call the LLM.
189        let completion = match params.driver.complete(request).await {
190            Ok(c) => c,
191            Err(e) => {
192                error!(error = %e, "LLM completion failed");
193                return Err(e);
194            }
195        };
196        total_usage.accumulate(&completion.usage);
197
198        debug!(
199            stop_reason = ?completion.stop_reason,
200            input_tokens = completion.usage.input_tokens,
201            output_tokens = completion.usage.output_tokens,
202            tool_calls = completion.message.tool_calls.len(),
203            "LLM completion received"
204        );
205
206        match completion.stop_reason {
207            StopReason::EndTurn => {
208                // --- Empty response handling ---
209                if completion.message.content.is_empty() && completion.message.tool_calls.is_empty()
210                {
211                    if guard.iterations() == 0 {
212                        // Empty response on iteration 0: one-shot retry.
213                        warn!("empty response on first iteration, retrying once");
214                        guard.record_iteration();
215                        continue;
216                    }
217
218                    // Empty response after tool use: insert fallback.
219                    let has_prior_tools = messages.iter().any(|m| m.role == Role::Tool);
220
221                    if has_prior_tools {
222                        warn!("empty response after tool use, inserting fallback");
223                        let fallback_msg = Message::new(
224                            Role::Assistant,
225                            "I completed the requested operations. The tool results above \
226                             contain the output.",
227                        );
228                        params
229                            .memory
230                            .save_message(&params.bout_id, &fallback_msg)
231                            .await?;
232                        messages.push(fallback_msg.clone());
233
234                        return Ok(FighterLoopResult {
235                            response: fallback_msg.content,
236                            usage: total_usage,
237                            iterations: guard.iterations(),
238                            tool_calls_made,
239                        });
240                    }
241                }
242
243                // The fighter is done. Save and return the response.
244                params
245                    .memory
246                    .save_message(&params.bout_id, &completion.message)
247                    .await?;
248                messages.push(completion.message.clone());
249
250                let response = completion.message.content.clone();
251
252                info!(
253                    iterations = guard.iterations(),
254                    tool_calls = tool_calls_made,
255                    total_tokens = total_usage.total(),
256                    "fighter loop complete"
257                );
258
259                // --- CREED EVOLUTION ---
260                // Update the creed with bout statistics after completion.
261                if let Ok(Some(mut creed)) = params.memory.load_creed_by_name(&params.manifest.name).await {
262                    creed.record_bout();
263                    creed.record_messages(guard.iterations() as u64 + 1); // +1 for user msg
264                    // Bind to current fighter instance
265                    creed.fighter_id = Some(params.fighter_id);
266                    if let Err(e) = params.memory.save_creed(&creed).await {
267                        warn!(error = %e, "failed to update creed after bout");
268                    } else {
269                        debug!(fighter = %params.manifest.name, bout_count = creed.bout_count, "creed evolved");
270                    }
271                }
272
273                return Ok(FighterLoopResult {
274                    response,
275                    usage: total_usage,
276                    iterations: guard.iterations(),
277                    tool_calls_made,
278                });
279            }
280
281            StopReason::MaxTokens => {
282                // --- MaxTokens continuation ---
283                params
284                    .memory
285                    .save_message(&params.bout_id, &completion.message)
286                    .await?;
287                messages.push(completion.message.clone());
288
289                continuation_count += 1;
290
291                if continuation_count > MAX_CONTINUATION_LOOPS {
292                    warn!(
293                        continuation_count = continuation_count,
294                        "max continuation loops exceeded, returning partial response"
295                    );
296                    return Ok(FighterLoopResult {
297                        response: completion.message.content,
298                        usage: total_usage,
299                        iterations: guard.iterations(),
300                        tool_calls_made,
301                    });
302                }
303
304                info!(
305                    continuation = continuation_count,
306                    max = MAX_CONTINUATION_LOOPS,
307                    "MaxTokens hit, appending continuation prompt"
308                );
309
310                // Append a user message asking to continue.
311                let continue_msg =
312                    Message::new(Role::User, "Please continue from where you left off.");
313                params
314                    .memory
315                    .save_message(&params.bout_id, &continue_msg)
316                    .await?;
317                messages.push(continue_msg);
318
319                guard.record_iteration();
320                continue;
321            }
322
323            StopReason::ToolUse => {
324                // Reset continuation count since we got a real tool use.
325                continuation_count = 0;
326
327                // Check the loop guard before executing tools.
328                let verdict = guard.record_tool_calls(&completion.message.tool_calls);
329                match verdict {
330                    LoopGuardVerdict::Break(reason) => {
331                        warn!(reason = %reason, "loop guard triggered");
332
333                        // Save the assistant message, then return with a guard message.
334                        params
335                            .memory
336                            .save_message(&params.bout_id, &completion.message)
337                            .await?;
338                        messages.push(completion.message.clone());
339
340                        let guard_response = format!(
341                            "{}\n\n[Loop terminated: {}]",
342                            completion.message.content, reason
343                        );
344
345                        return Ok(FighterLoopResult {
346                            response: guard_response,
347                            usage: total_usage,
348                            iterations: guard.iterations(),
349                            tool_calls_made,
350                        });
351                    }
352                    LoopGuardVerdict::Continue => {}
353                }
354
355                // Save the assistant message (with tool calls).
356                params
357                    .memory
358                    .save_message(&params.bout_id, &completion.message)
359                    .await?;
360                messages.push(completion.message.clone());
361
362                // Execute each tool call with per-tool timeout.
363                let mut tool_results = Vec::new();
364
365                for tc in &completion.message.tool_calls {
366                    debug!(tool = %tc.name, id = %tc.id, "executing tool call");
367
368                    // Check per-call guard verdict.
369                    let call_verdict = guard.evaluate_call(tc);
370                    if let crate::guard::GuardVerdict::Block(reason) = &call_verdict {
371                        warn!(tool = %tc.name, reason = %reason, "tool call blocked by guard");
372                        tool_results.push(ToolCallResult {
373                            id: tc.id.clone(),
374                            content: format!("Error: {}", reason),
375                            is_error: true,
376                        });
377                        tool_calls_made += 1;
378                        continue;
379                    }
380
381                    let result = tokio::time::timeout(
382                        std::time::Duration::from_secs(tool_timeout),
383                        tool_executor::execute_tool(
384                            &tc.name,
385                            &tc.input,
386                            &params.manifest.capabilities,
387                            &tool_context,
388                        ),
389                    )
390                    .await;
391
392                    let tool_call_result = match result {
393                        Ok(Ok(tool_result)) => {
394                            let content = if tool_result.success {
395                                tool_result.output.to_string()
396                            } else {
397                                tool_result
398                                    .error
399                                    .unwrap_or_else(|| "tool execution failed".to_string())
400                            };
401
402                            // Record outcome for future blocking.
403                            guard.record_outcome(tc, &content);
404
405                            // Truncate result if it exceeds the per-result cap.
406                            let cap = budget.per_result_cap().min(budget.single_result_max());
407                            let content = if content.len() > cap {
408                                debug!(
409                                    tool = %tc.name,
410                                    original_len = content.len(),
411                                    cap = cap,
412                                    "truncating tool result"
413                                );
414                                ContextBudget::truncate_result(&content, cap)
415                            } else {
416                                content
417                            };
418
419                            ToolCallResult {
420                                id: tc.id.clone(),
421                                content,
422                                is_error: !tool_result.success,
423                            }
424                        }
425                        Ok(Err(e)) => {
426                            error!(tool = %tc.name, error = %e, "tool execution error");
427                            ToolCallResult {
428                                id: tc.id.clone(),
429                                content: format!("Error: {}", e),
430                                is_error: true,
431                            }
432                        }
433                        Err(_) => {
434                            error!(
435                                tool = %tc.name,
436                                timeout_secs = tool_timeout,
437                                "tool execution timed out"
438                            );
439                            ToolCallResult {
440                                id: tc.id.clone(),
441                                content: format!(
442                                    "Error: tool '{}' timed out after {}s",
443                                    tc.name, tool_timeout
444                                ),
445                                is_error: true,
446                            }
447                        }
448                    };
449
450                    tool_results.push(tool_call_result);
451                    tool_calls_made += 1;
452                }
453
454                // Create and save the tool results message.
455                let tool_msg = Message {
456                    role: Role::Tool,
457                    content: String::new(),
458                    tool_calls: Vec::new(),
459                    tool_results,
460                    timestamp: chrono::Utc::now(),
461                };
462
463                params
464                    .memory
465                    .save_message(&params.bout_id, &tool_msg)
466                    .await?;
467                messages.push(tool_msg);
468
469                // Continue the loop -- call the LLM again with tool results.
470            }
471
472            StopReason::Error => {
473                error!("LLM returned error stop reason");
474                return Err(PunchError::Provider {
475                    provider: params.manifest.model.provider.to_string(),
476                    message: "model returned an error".to_string(),
477                });
478            }
479        }
480    }
481}
482
483/// Build an enriched system prompt by combining the fighter's base system
484/// prompt with recalled memories.
485async fn build_system_prompt(
486    manifest: &FighterManifest,
487    fighter_id: &FighterId,
488    memory: &MemorySubstrate,
489) -> String {
490    let mut prompt = manifest.system_prompt.clone();
491
492    // --- CREED INJECTION ---
493    // Load the fighter's creed (consciousness layer) if one exists.
494    // The creed is tied to fighter NAME so it persists across respawns.
495    match memory.load_creed_by_name(&manifest.name).await {
496        Ok(Some(creed)) => {
497            prompt.push_str("\n\n");
498            prompt.push_str(&creed.render());
499        }
500        Ok(None) => {
501            // No creed defined — fighter runs without consciousness layer.
502        }
503        Err(e) => {
504            warn!(error = %e, "failed to load creed for fighter");
505        }
506    }
507
508    // Try to recall recent/relevant memories.
509    match memory.recall_memories(fighter_id, "", 10).await {
510        Ok(memories) if !memories.is_empty() => {
511            prompt.push_str("\n\n## Recalled Memories\n");
512            for mem in &memories {
513                prompt.push_str(&format!(
514                    "- **{}**: {} (confidence: {:.0}%)\n",
515                    mem.key,
516                    mem.value,
517                    mem.confidence * 100.0
518                ));
519            }
520        }
521        Ok(_) => {
522            // No memories to inject.
523        }
524        Err(e) => {
525            warn!(error = %e, "failed to recall memories for system prompt");
526        }
527    }
528
529    prompt
530}