Skip to main content

punch_runtime/
fighter_loop.rs

1//! The core agent execution loop.
2//!
3//! `run_fighter_loop` is the heart of the Punch runtime. It orchestrates the
4//! conversation between the user, the LLM, and the tools (moves), persisting
5//! messages to the memory substrate and enforcing loop guards.
6//!
7//! ## Production features
8//!
9//! - **Context window management**: Tracks estimated token count and trims
10//!   messages when approaching the context limit.
11//! - **Session repair**: Fixes orphaned tool results, empty messages,
12//!   duplicate results, and missing results on startup and after errors.
13//! - **Error recovery**: Handles empty responses, MaxTokens continuation,
14//!   and per-tool timeouts.
15//! - **Loop guard**: Graduated response (Allow → Warn → Block → CircuitBreak)
16//!   with ping-pong detection and poll-tool relaxation.
17
18use std::sync::Arc;
19
20use serde::Deserialize as SerdeDeserialize;
21use tracing::{debug, error, info, instrument, warn};
22
23use dashmap::DashMap;
24use punch_memory::{BoutId, MemorySubstrate};
25use punch_types::{
26    AgentCoordinator, Capability, ChannelNotifier, FighterId, FighterManifest, Message,
27    PolicyEngine, PunchError, PunchResult, Role, SandboxEnforcer, ShellBleedDetector,
28    ToolCallResult, ToolDefinition,
29};
30
31use punch_types::config::ModelRoutingConfig;
32
33use crate::mcp::McpClient;
34use crate::model_router::ModelRouter;
35
36use crate::context_budget::ContextBudget;
37use crate::driver::{CompletionRequest, LlmDriver, StopReason, TokenUsage};
38use crate::guard::{GuardConfig, LoopGuard, LoopGuardVerdict};
39use crate::session_repair;
40use crate::tool_executor::{self, ToolExecutionContext};
41
42/// Maximum number of MaxTokens continuations before giving up.
43const MAX_CONTINUATION_LOOPS: usize = 5;
44
45/// Default per-tool timeout in seconds.
46const DEFAULT_TOOL_TIMEOUT_SECS: u64 = 120;
47
48/// Default max output tokens for the cheap model tier.
49const DEFAULT_MAX_TOKENS_CHEAP: u32 = 1024;
50/// Default max output tokens for the mid model tier.
51const DEFAULT_MAX_TOKENS_MID: u32 = 2048;
52/// Default max output tokens for the expensive tier (or when no routing is configured).
53const DEFAULT_MAX_TOKENS_EXPENSIVE: u32 = 4096;
54/// Default max output tokens for Ollama models (reasoning models need extra headroom).
55const DEFAULT_MAX_TOKENS_OLLAMA: u32 = 16384;
56/// Minimum message count (including history) for a bout to be considered substantive
57/// enough to warrant a post-bout reflection LLM call.
58const REFLECTION_MIN_MESSAGES: usize = 6;
59
60/// Parameters for the fighter loop.
61pub struct FighterLoopParams {
62    /// The fighter's manifest (identity, model config, system prompt, capabilities).
63    pub manifest: FighterManifest,
64    /// The user's message to process.
65    pub user_message: String,
66    /// The bout (session) ID.
67    pub bout_id: BoutId,
68    /// The fighter's unique ID.
69    pub fighter_id: FighterId,
70    /// Shared memory substrate for persistence.
71    pub memory: Arc<MemorySubstrate>,
72    /// The LLM driver to use for completions.
73    pub driver: Arc<dyn LlmDriver>,
74    /// Tools available for this fighter to use.
75    /// When provided, bypasses dynamic tool selection (used by workflows, gorillas, tests).
76    /// When empty, the fighter loop uses `ToolSelector` for context-aware tool loading.
77    pub available_tools: Vec<ToolDefinition>,
78    /// Pre-fetched MCP tools for this fighter (merged into tool list each turn).
79    pub mcp_tools: Vec<ToolDefinition>,
80    /// Maximum loop iterations before forced termination (default: 50).
81    pub max_iterations: Option<usize>,
82    /// Context window size in tokens (default: 200K).
83    pub context_window: Option<usize>,
84    /// Per-tool timeout in seconds (default: 120).
85    pub tool_timeout_secs: Option<u64>,
86    /// Optional agent coordinator for inter-agent tools.
87    pub coordinator: Option<Arc<dyn AgentCoordinator>>,
88    /// Optional policy engine for approval-gated tool execution.
89    /// When present, the referee checks every move before the fighter can throw it.
90    pub approval_engine: Option<Arc<PolicyEngine>>,
91    /// Optional subprocess sandbox (containment ring) for shell and filesystem tools.
92    /// When present, commands are validated and environments are sanitized before execution.
93    pub sandbox: Option<Arc<SandboxEnforcer>>,
94    /// Active MCP server clients shared across fighters.
95    /// When present, MCP tools are available for dispatch.
96    pub mcp_clients: Option<Arc<DashMap<String, Arc<McpClient>>>>,
97    /// Smart model routing configuration. When enabled, the router selects
98    /// cheap / mid / expensive models based on the user's message complexity.
99    pub model_routing: Option<ModelRoutingConfig>,
100    /// Optional channel notifier for proactive outbound messaging.
101    /// When present, the `channel_notify` tool can send messages to
102    /// connected channels (Telegram, Slack, Discord, etc.).
103    pub channel_notifier: Option<Arc<dyn ChannelNotifier>>,
104    /// Optional multimodal content parts (images) to attach to the user message.
105    /// When present, the user message is sent with these parts for vision-capable models.
106    #[allow(clippy::struct_field_names)]
107    pub user_content_parts: Vec<punch_types::ContentPart>,
108    /// When true, the fighter operates in eco mode: forces cheap model tier,
109    /// caps max_tokens to 1024, skips post-bout reflection, and uses compact creed.
110    /// Activated when approaching budget limits.
111    pub eco_mode: bool,
112}
113
114/// Result of a completed fighter loop run.
115#[derive(Debug, Clone)]
116pub struct FighterLoopResult {
117    /// The final text response from the fighter.
118    pub response: String,
119    /// Cumulative token usage across all LLM calls in this run.
120    pub usage: TokenUsage,
121    /// Number of loop iterations performed.
122    pub iterations: usize,
123    /// Number of individual tool calls executed.
124    pub tool_calls_made: usize,
125}
126
127/// Run the fighter loop: the core agent execution engine.
128///
129/// This function:
130/// 1. Loads message history from the bout and repairs it
131/// 2. Recalls relevant memories
132/// 3. Builds the system prompt with context
133/// 4. Applies context budget management before each LLM call
134/// 5. Calls the LLM with available tools
135/// 6. If the LLM requests tool use, executes tools and loops
136/// 7. Handles empty responses, MaxTokens continuation, and errors
137/// 8. Enforces loop guards against runaway iterations
138#[instrument(
139    skip(params),
140    fields(
141        fighter = %params.fighter_id,
142        bout = %params.bout_id,
143        fighter_name = %params.manifest.name,
144    )
145)]
146pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterLoopResult> {
147    let max_iterations = params.max_iterations.unwrap_or(50);
148    let context_window = params.context_window.unwrap_or(200_000);
149    let tool_timeout = params
150        .tool_timeout_secs
151        .unwrap_or(DEFAULT_TOOL_TIMEOUT_SECS);
152
153    let budget = ContextBudget::new(context_window);
154    let mut guard = LoopGuard::with_config(GuardConfig {
155        max_iterations,
156        ..Default::default()
157    });
158    let mut total_usage = TokenUsage::default();
159    let mut tool_calls_made: usize = 0;
160    let mut continuation_count: usize = 0;
161    let mut tool_failure_nudge_sent = false;
162
163    // 1. Load message history and repair.
164    let mut messages = params.memory.load_messages(&params.bout_id).await?;
165    debug!(history_len = messages.len(), "loaded bout message history");
166
167    // Run session repair on loaded history.
168    let repair_stats = session_repair::repair_session(&mut messages);
169    if repair_stats.any_repairs() {
170        info!(repairs = %repair_stats, "repaired loaded message history");
171    }
172
173    // 2. Append the user's new message and persist it.
174    let user_msg = if params.user_content_parts.is_empty() {
175        Message::new(Role::User, &params.user_message)
176    } else {
177        Message::with_parts(Role::User, &params.user_message, params.user_content_parts)
178    };
179    params
180        .memory
181        .save_message(&params.bout_id, &user_msg)
182        .await?;
183    messages.push(user_msg);
184
185    // 2b. Model routing: check if we should use a tier-specific driver.
186    // In eco mode, force the cheap tier to minimize costs.
187    let mut routed_tier: Option<String> = None;
188    let mut routed_provider: Option<punch_types::Provider> = None;
189    let routed_driver: Option<Arc<dyn LlmDriver>> =
190        if params.eco_mode {
191            // Eco mode: try to use the cheap model if routing is configured.
192            params
193            .model_routing
194            .as_ref()
195            .and_then(|routing_config| {
196                let router = ModelRouter::new(routing_config.clone());
197                router.select_model(crate::model_router::ModelTier::Cheap).cloned()
198            })
199            .and_then(|model_config| match ModelRouter::create_tier_driver(&model_config) {
200                Ok(driver) => {
201                    info!(
202                        model = %model_config.model,
203                        "eco mode: forcing cheap tier to save costs"
204                    );
205                    routed_tier = Some("cheap".to_string());
206                    routed_provider = Some(model_config.provider);
207                    Some(driver)
208                }
209                Err(e) => {
210                    warn!(error = %e, "eco mode: failed to create cheap driver, using default");
211                    routed_tier = Some("cheap".to_string());
212                    None
213                }
214            })
215        } else {
216            params
217            .model_routing
218            .as_ref()
219            .and_then(|routing_config| {
220                let router = ModelRouter::new(routing_config.clone());
221                router.route_message_with_context(&params.user_message, &messages)
222            })
223            .and_then(
224                |(tier, model_config)| match ModelRouter::create_tier_driver(&model_config) {
225                    Ok(driver) => {
226                        info!(
227                            tier = %tier,
228                            model = %model_config.model,
229                            "model router: using tier-specific driver"
230                        );
231                        routed_tier = Some(tier.to_string());
232                        routed_provider = Some(model_config.provider);
233                        Some(driver)
234                    }
235                    Err(e) => {
236                        warn!(
237                            tier = %tier,
238                            error = %e,
239                            "model router: failed to create tier driver, falling back to default"
240                        );
241                        None
242                    }
243                },
244            )
245        };
246    let active_driver: &dyn LlmDriver = match &routed_driver {
247        Some(d) => d.as_ref(),
248        None => params.driver.as_ref(),
249    };
250
251    // Use compact creed rendering for cheap/mid tiers (and always in eco mode) to save tokens.
252    let use_compact_creed = params.eco_mode
253        || routed_tier
254            .as_deref()
255            .is_some_and(|t| t == "cheap" || t == "mid");
256
257    // 3. Recall relevant memories and build an enriched system prompt.
258    let system_prompt = build_system_prompt(
259        &params.manifest,
260        &params.fighter_id,
261        &params.memory,
262        use_compact_creed,
263    )
264    .await;
265
266    // Build the tool execution context.
267    let mut tool_context = ToolExecutionContext {
268        working_dir: std::env::current_dir().unwrap_or_default(),
269        fighter_id: params.fighter_id,
270        memory: Arc::clone(&params.memory),
271        coordinator: params.coordinator.clone(),
272        approval_engine: params.approval_engine.clone(),
273        sandbox: params.sandbox.clone(),
274        bleed_detector: Some(Arc::new(ShellBleedDetector::new())),
275        browser_pool: None,
276        plugin_registry: None,
277        mcp_clients: params.mcp_clients.clone(),
278        channel_notifier: params.channel_notifier.clone(),
279        automation_backend: None, // Initialized below if fighter has automation capabilities.
280    };
281
282    // Initialize automation backend if the fighter has any automation capability.
283    {
284        let has_automation = params.manifest.capabilities.iter().any(|c| {
285            matches!(
286                c,
287                Capability::SystemAutomation
288                    | Capability::UiAutomation(_)
289                    | Capability::AppIntegration(_)
290            )
291        });
292        if has_automation {
293            tool_context.automation_backend = Some(Arc::from(crate::automation::create_backend()));
294            debug!("automation backend initialized for fighter");
295        }
296    }
297
298    // --- Dynamic tool selection ---
299    // If available_tools is pre-populated (workflows, gorillas, tests), use it as-is.
300    // Otherwise, use ToolSelector for context-aware per-turn tool loading.
301    let use_dynamic_tools = params.available_tools.is_empty();
302    let mut tool_selector = if use_dynamic_tools {
303        Some(crate::tools::ToolSelector::new(
304            &params.manifest.capabilities,
305        ))
306    } else {
307        None
308    };
309
310    // Pre-build static tool list (avoids cloning per loop iteration for static path).
311    let static_tools: Option<Vec<ToolDefinition>> = if !use_dynamic_tools {
312        Some(params.available_tools)
313    } else {
314        None
315    };
316
317    let static_tool_count = static_tools.as_ref().map_or(0, |t| t.len());
318    info!(
319        dynamic_tools = use_dynamic_tools,
320        static_tool_count,
321        mcp_tool_count = params.mcp_tools.len(),
322        fighter = %params.manifest.name,
323        model = %params.manifest.model.model,
324        "fighter loop starting"
325    );
326
327    // 4. Main loop.
328    loop {
329        // --- Dynamic tool selection: pick tools for this turn ---
330        let turn_tools = if let Some(ref mut selector) = tool_selector {
331            let (mut selected, _changed) = selector.select_tools(&messages);
332            // Merge MCP tools (already capability-filtered in ring.rs).
333            selected.extend(params.mcp_tools.iter().cloned());
334            selected
335        } else {
336            // Static path: clone from pre-built list (CompletionRequest takes ownership).
337            static_tools
338                .as_ref()
339                .expect("static_tools set when not using dynamic selection")
340                .clone()
341        };
342
343        // --- Context Budget: check and trim before LLM call ---
344        if let Some(trim_action) = budget.check_trim_needed(&messages, &turn_tools) {
345            budget.apply_trim(&mut messages, trim_action);
346
347            // Re-run session repair after trimming (may create orphans).
348            let post_trim_repair = session_repair::repair_session(&mut messages);
349            if post_trim_repair.any_repairs() {
350                debug!(repairs = %post_trim_repair, "repaired after context trim");
351            }
352        }
353
354        // Apply context guard (truncate oversized tool results).
355        budget.apply_context_guard(&mut messages);
356
357        // Build the completion request.
358        let request = CompletionRequest {
359            model: params.manifest.model.model.clone(),
360            messages: messages.clone(),
361            tools: turn_tools,
362            max_tokens: params.manifest.model.max_tokens.unwrap_or_else(|| {
363                // Adaptive max_tokens: scale output budget by model tier.
364                // Cheap tier gets less headroom since greetings/simple answers
365                // don't need 4K output tokens. Expensive tier gets full budget.
366                // In eco mode, always cap to cheap-tier budget even when routing
367                // is not configured (prevents eco mode from being a no-op).
368                let tier_default = if params.eco_mode {
369                    DEFAULT_MAX_TOKENS_CHEAP
370                } else {
371                    match routed_tier.as_deref() {
372                        Some("cheap") => DEFAULT_MAX_TOKENS_CHEAP,
373                        Some("mid") => DEFAULT_MAX_TOKENS_MID,
374                        _ => DEFAULT_MAX_TOKENS_EXPENSIVE,
375                    }
376                };
377                // Reasoning models (Qwen, DeepSeek) use thinking tokens internally,
378                // so they need a much higher default to leave room for visible output.
379                // Use the routed provider if routing selected a tier, otherwise fall
380                // back to the base manifest provider.
381                let active_provider = routed_provider
382                    .clone()
383                    .unwrap_or_else(|| params.manifest.model.provider.clone());
384                match active_provider {
385                    punch_types::Provider::Ollama => DEFAULT_MAX_TOKENS_OLLAMA,
386                    _ => tier_default,
387                }
388            }),
389            temperature: params.manifest.model.temperature,
390            system_prompt: Some(system_prompt.clone()),
391        };
392
393        // Call the LLM (using routed driver if model routing selected one).
394        let completion = match active_driver.complete(request).await {
395            Ok(c) => c,
396            Err(e) => {
397                error!(error = %e, "LLM completion failed");
398                return Err(e);
399            }
400        };
401        total_usage.accumulate(&completion.usage);
402
403        debug!(
404            stop_reason = ?completion.stop_reason,
405            input_tokens = completion.usage.input_tokens,
406            output_tokens = completion.usage.output_tokens,
407            tool_calls = completion.message.tool_calls.len(),
408            "LLM completion received"
409        );
410
411        match completion.stop_reason {
412            StopReason::EndTurn => {
413                // --- Empty response handling ---
414                if completion.message.content.is_empty() && completion.message.tool_calls.is_empty()
415                {
416                    if guard.iterations() == 0 {
417                        // Empty response on iteration 0: one-shot retry.
418                        warn!("empty response on first iteration, retrying once");
419                        guard.record_iteration();
420                        continue;
421                    }
422
423                    // Empty response after tool use: insert fallback.
424                    let has_prior_tools = messages.iter().any(|m| m.role == Role::Tool);
425
426                    if has_prior_tools {
427                        warn!("empty response after tool use, inserting fallback");
428                        let fallback_msg = Message::new(
429                            Role::Assistant,
430                            "I completed the requested operations. The tool results above \
431                             contain the output.",
432                        );
433                        params
434                            .memory
435                            .save_message(&params.bout_id, &fallback_msg)
436                            .await?;
437                        messages.push(fallback_msg.clone());
438
439                        return Ok(FighterLoopResult {
440                            response: fallback_msg.content,
441                            usage: total_usage,
442                            iterations: guard.iterations(),
443                            tool_calls_made,
444                        });
445                    }
446                }
447
448                // The fighter is done. Save and return the response.
449                params
450                    .memory
451                    .save_message(&params.bout_id, &completion.message)
452                    .await?;
453                messages.push(completion.message.clone());
454
455                let response = completion.message.content.clone();
456
457                info!(
458                    iterations = guard.iterations(),
459                    tool_calls = tool_calls_made,
460                    total_tokens = total_usage.total(),
461                    "fighter loop complete"
462                );
463
464                // --- CREED EVOLUTION ---
465                // Update the creed with bout statistics after completion.
466                if let Ok(Some(mut creed)) = params
467                    .memory
468                    .load_creed_by_name(&params.manifest.name)
469                    .await
470                {
471                    creed.record_bout();
472                    creed.record_messages(guard.iterations() as u64 + 1); // +1 for user msg
473                    // Bind to current fighter instance
474                    creed.fighter_id = Some(params.fighter_id);
475
476                    // --- HEARTBEAT MARK ---
477                    // Mark due heartbeat tasks as checked now that the bout is complete.
478                    let due_indices: Vec<usize> = creed
479                        .heartbeat
480                        .iter()
481                        .enumerate()
482                        .filter(|(_, h)| {
483                            if !h.active {
484                                return false;
485                            }
486                            let now = chrono::Utc::now();
487                            match h.cadence.as_str() {
488                                "every_bout" => true,
489                                "on_wake" => h.last_checked.is_none(),
490                                "hourly" => h
491                                    .last_checked
492                                    .is_none_or(|t| (now - t) > chrono::Duration::hours(1)),
493                                "daily" => h
494                                    .last_checked
495                                    .is_none_or(|t| (now - t) > chrono::Duration::hours(24)),
496                                _ => false,
497                            }
498                        })
499                        .map(|(i, _)| i)
500                        .collect();
501                    for idx in due_indices {
502                        creed.mark_heartbeat_checked(idx);
503                    }
504
505                    if let Err(e) = params.memory.save_creed(&creed).await {
506                        warn!(error = %e, "failed to update creed after bout");
507                    } else {
508                        debug!(fighter = %params.manifest.name, bout_count = creed.bout_count, "creed evolved");
509                    }
510                }
511
512                // Conditional reflection: only reflect on substantive bouts.
513                // Skip reflection for simple exchanges (few messages and no tool use)
514                // to avoid wasting an LLM call on "hello" / "how are you?" bouts.
515                // Also skip entirely in eco mode to minimize token spend.
516                let is_substantive_bout = !params.eco_mode
517                    && (messages.len() >= REFLECTION_MIN_MESSAGES || tool_calls_made > 0);
518                if is_substantive_bout {
519                    let driver = Arc::clone(&params.driver);
520                    let memory = Arc::clone(&params.memory);
521                    let model = params.manifest.model.model.clone();
522                    let fighter_name = params.manifest.name.clone();
523                    let reflection_messages = messages.clone();
524                    tokio::spawn(async move {
525                        reflect_on_bout(driver, memory, model, fighter_name, reflection_messages)
526                            .await;
527                    });
528                } else {
529                    debug!(
530                        message_count = messages.len(),
531                        tool_calls = tool_calls_made,
532                        "skipping post-bout reflection (simple exchange)"
533                    );
534                }
535
536                return Ok(FighterLoopResult {
537                    response,
538                    usage: total_usage,
539                    iterations: guard.iterations(),
540                    tool_calls_made,
541                });
542            }
543
544            StopReason::MaxTokens => {
545                // --- MaxTokens continuation ---
546                params
547                    .memory
548                    .save_message(&params.bout_id, &completion.message)
549                    .await?;
550                messages.push(completion.message.clone());
551
552                continuation_count += 1;
553
554                if continuation_count > MAX_CONTINUATION_LOOPS {
555                    warn!(
556                        continuation_count = continuation_count,
557                        "max continuation loops exceeded, returning partial response"
558                    );
559                    return Ok(FighterLoopResult {
560                        response: completion.message.content,
561                        usage: total_usage,
562                        iterations: guard.iterations(),
563                        tool_calls_made,
564                    });
565                }
566
567                info!(
568                    continuation = continuation_count,
569                    max = MAX_CONTINUATION_LOOPS,
570                    "MaxTokens hit, appending continuation prompt"
571                );
572
573                // Append a user message asking to continue.
574                let continue_msg =
575                    Message::new(Role::User, "Please continue from where you left off.");
576                params
577                    .memory
578                    .save_message(&params.bout_id, &continue_msg)
579                    .await?;
580                messages.push(continue_msg);
581
582                guard.record_iteration();
583                continue;
584            }
585
586            StopReason::ToolUse => {
587                // Reset continuation count since we got a real tool use.
588                continuation_count = 0;
589
590                // Check the loop guard before executing tools.
591                let verdict = guard.record_tool_calls(&completion.message.tool_calls);
592                match verdict {
593                    LoopGuardVerdict::Break(reason) => {
594                        warn!(reason = %reason, "loop guard triggered");
595
596                        // Save the assistant message, then return with a guard message.
597                        params
598                            .memory
599                            .save_message(&params.bout_id, &completion.message)
600                            .await?;
601                        messages.push(completion.message.clone());
602
603                        let guard_response = format!(
604                            "{}\n\n[Loop terminated: {}]",
605                            completion.message.content, reason
606                        );
607
608                        return Ok(FighterLoopResult {
609                            response: guard_response,
610                            usage: total_usage,
611                            iterations: guard.iterations(),
612                            tool_calls_made,
613                        });
614                    }
615                    LoopGuardVerdict::Continue => {}
616                }
617
618                // Save the assistant message (with tool calls).
619                params
620                    .memory
621                    .save_message(&params.bout_id, &completion.message)
622                    .await?;
623                messages.push(completion.message.clone());
624
625                // Execute each tool call with per-tool timeout.
626                let mut tool_results = Vec::new();
627
628                for tc in &completion.message.tool_calls {
629                    debug!(tool = %tc.name, id = %tc.id, "executing tool call");
630
631                    // Check per-call guard verdict.
632                    let call_verdict = guard.evaluate_call(tc);
633                    if let crate::guard::GuardVerdict::Block(reason) = &call_verdict {
634                        warn!(tool = %tc.name, reason = %reason, "tool call blocked by guard");
635                        tool_results.push(ToolCallResult {
636                            id: tc.id.clone(),
637                            content: format!("Error: {}", reason),
638                            is_error: true,
639                            image: None,
640                        });
641                        tool_calls_made += 1;
642                        continue;
643                    }
644
645                    let result = tokio::time::timeout(
646                        std::time::Duration::from_secs(tool_timeout),
647                        tool_executor::execute_tool(
648                            &tc.name,
649                            &tc.input,
650                            &params.manifest.capabilities,
651                            &tool_context,
652                        ),
653                    )
654                    .await;
655
656                    let tool_call_result = match result {
657                        Ok(Ok(tool_result)) => {
658                            let content = if tool_result.success {
659                                tool_result.output.to_string()
660                            } else {
661                                tool_result
662                                    .error
663                                    .unwrap_or_else(|| "tool execution failed".to_string())
664                            };
665
666                            // Record outcome for future blocking.
667                            guard.record_outcome(tc, &content);
668
669                            // Truncate result if it exceeds the per-result cap.
670                            let cap = budget.per_result_cap().min(budget.single_result_max());
671                            let content = if content.len() > cap {
672                                debug!(
673                                    tool = %tc.name,
674                                    original_len = content.len(),
675                                    cap = cap,
676                                    "truncating tool result"
677                                );
678                                ContextBudget::truncate_result(&content, cap)
679                            } else {
680                                content
681                            };
682
683                            // Extract image from screenshot tool results.
684                            let image = if tool_result.success {
685                                tool_result
686                                    .output
687                                    .get("png_base64")
688                                    .and_then(|v| v.as_str())
689                                    .map(|b64| punch_types::ContentPart::Image {
690                                        media_type: "image/png".to_string(),
691                                        data: b64.to_string(),
692                                    })
693                            } else {
694                                None
695                            };
696
697                            ToolCallResult {
698                                id: tc.id.clone(),
699                                content,
700                                is_error: !tool_result.success,
701                                image,
702                            }
703                        }
704                        Ok(Err(e)) => {
705                            error!(tool = %tc.name, error = %e, "tool execution error");
706                            ToolCallResult {
707                                id: tc.id.clone(),
708                                content: format!("Error: {}", e),
709                                is_error: true,
710                                image: None,
711                            }
712                        }
713                        Err(_) => {
714                            error!(
715                                tool = %tc.name,
716                                timeout_secs = tool_timeout,
717                                "tool execution timed out"
718                            );
719                            ToolCallResult {
720                                id: tc.id.clone(),
721                                content: format!(
722                                    "Error: tool '{}' timed out after {}s",
723                                    tc.name, tool_timeout
724                                ),
725                                is_error: true,
726                                image: None,
727                            }
728                        }
729                    };
730
731                    tool_results.push(tool_call_result);
732                    tool_calls_made += 1;
733                }
734
735                // Check if any tool calls failed — inject persistence nudge.
736                let has_errors = tool_results.iter().any(|r| r.is_error);
737
738                // Create and save the tool results message.
739                let tool_msg = Message {
740                    role: Role::Tool,
741                    content: String::new(),
742                    tool_calls: Vec::new(),
743                    tool_results,
744                    timestamp: chrono::Utc::now(),
745                    content_parts: Vec::new(),
746                };
747
748                params
749                    .memory
750                    .save_message(&params.bout_id, &tool_msg)
751                    .await?;
752                messages.push(tool_msg);
753
754                // --- PERSISTENCE NUDGE ---
755                // When tools fail, inject an ephemeral system message forcing
756                // the LLM to try alternative approaches instead of giving up.
757                // Not persisted to memory — only affects the current LLM call.
758                if has_errors && !tool_failure_nudge_sent {
759                    tool_failure_nudge_sent = true;
760                    let nudge = Message::new(
761                        Role::System,
762                        "A tool failed. Do NOT give up or ask for permissions. \
763                         Try a completely different approach using shell_exec. \
764                         Use shell commands appropriate for the user's platform \
765                         (e.g. app launchers, screenshot utilities, database CLIs, \
766                         curl for APIs). You have full system access. Try now.",
767                    );
768                    // Ephemeral: push to messages for this LLM call but do NOT
769                    // persist to memory so it doesn't pollute conversation history.
770                    messages.push(nudge);
771                }
772
773                // Continue the loop -- call the LLM again with tool results.
774            }
775
776            StopReason::Error => {
777                error!("LLM returned error stop reason");
778                return Err(PunchError::Provider {
779                    provider: params.manifest.model.provider.to_string(),
780                    message: "model returned an error".to_string(),
781                });
782            }
783        }
784    }
785}
786
787/// Build an enriched system prompt by combining the fighter's base system
788/// prompt with recalled memories.
789async fn build_system_prompt(
790    manifest: &FighterManifest,
791    fighter_id: &FighterId,
792    memory: &MemorySubstrate,
793    compact_creed: bool,
794) -> String {
795    let mut prompt = manifest.system_prompt.clone();
796
797    // --- CREED INJECTION ---
798    // Load the fighter's creed (consciousness layer) if one exists.
799    // The creed is tied to fighter NAME so it persists across respawns.
800    // Use compact rendering for cheap/mid model tiers to save tokens.
801    match memory.load_creed_by_name(&manifest.name).await {
802        Ok(Some(creed)) => {
803            prompt.push_str("\n\n");
804            if compact_creed {
805                prompt.push_str(&creed.render_compact());
806            } else {
807                prompt.push_str(&creed.render());
808            }
809
810            // --- HEARTBEAT INJECTION ---
811            // Check for due heartbeat tasks and inject them into the prompt.
812            let due_tasks = creed.due_heartbeat_tasks();
813            if !due_tasks.is_empty() {
814                prompt.push_str("\n\n## HEARTBEAT — Due Tasks\n");
815                prompt.push_str(
816                    "The following proactive tasks are due. Address them briefly before responding to the user:\n",
817                );
818                for task in &due_tasks {
819                    prompt.push_str(&format!("- {}\n", task.task));
820                }
821            }
822        }
823        Ok(None) => {
824            // No creed defined — fighter runs without consciousness layer.
825        }
826        Err(e) => {
827            warn!(error = %e, "failed to load creed for fighter");
828        }
829    }
830
831    // --- SKILL INJECTION ---
832    // Load markdown-based skills from workspace, user, and bundled directories.
833    {
834        let workspace_skills = std::path::Path::new("./skills");
835        let user_skills = std::env::var("HOME")
836            .ok()
837            .map(|h| std::path::PathBuf::from(h).join(".punch").join("skills"));
838        // Bundled skills ship in the binary's directory
839        let bundled_skills = std::env::current_exe()
840            .ok()
841            .and_then(|p| p.parent().map(|d| d.join("skills")));
842
843        let skills = punch_skills::load_all_skills(
844            Some(workspace_skills),
845            user_skills.as_deref(),
846            bundled_skills.as_deref(),
847        );
848
849        if !skills.is_empty() {
850            prompt.push_str("\n\n");
851            prompt.push_str(&punch_skills::render_skills_prompt(&skills));
852        }
853    }
854
855    // Try to recall recent/relevant memories.
856    match memory.recall_memories(fighter_id, "", 10).await {
857        Ok(memories) if !memories.is_empty() => {
858            prompt.push_str("\n\n## Recalled Memories\n");
859            for mem in &memories {
860                prompt.push_str(&format!(
861                    "- **{}**: {} (confidence: {:.0}%)\n",
862                    mem.key,
863                    mem.value,
864                    mem.confidence * 100.0
865                ));
866            }
867        }
868        Ok(_) => {
869            // No memories to inject.
870        }
871        Err(e) => {
872            warn!(error = %e, "failed to recall memories for system prompt");
873        }
874    }
875
876    prompt
877}
878
879/// A single learned behavior extracted from post-bout reflection.
880#[derive(Debug, SerdeDeserialize)]
881struct ReflectionItem {
882    observation: String,
883    confidence: f64,
884}
885
886/// Post-bout reflection output from the LLM.
887#[derive(Debug, SerdeDeserialize)]
888struct ReflectionOutput {
889    behaviors: Vec<ReflectionItem>,
890    #[serde(default)]
891    interaction_quality: Option<f64>,
892}
893
894/// Reflect on a completed bout to extract learned behaviors.
895///
896/// Makes a lightweight LLM call asking the model to extract insights from
897/// the conversation. Updates the creed with new learned behaviors and
898/// adjusts the user relationship trust based on interaction quality.
899async fn reflect_on_bout(
900    driver: Arc<dyn LlmDriver>,
901    memory: Arc<MemorySubstrate>,
902    model: String,
903    fighter_name: String,
904    messages: Vec<Message>,
905) {
906    // Only use the last 20 messages to keep the reflection call small
907    let recent: Vec<Message> = messages.into_iter().rev().take(20).rev().collect();
908
909    let reflection_prompt = r#"You just completed a conversation. Reflect on it and extract learned behaviors.
910
911Respond ONLY with valid JSON (no markdown fences, no commentary):
912{
913  "behaviors": [
914    {"observation": "what you learned", "confidence": 0.0-1.0}
915  ],
916  "interaction_quality": 0.0-1.0
917}
918
919Rules:
920- Extract 0-3 genuinely new insights about the user, effective patterns, or self-improvement notes
921- confidence: 0.5 = uncertain, 0.9 = very confident
922- interaction_quality: how productive/positive was this interaction (0.5 = neutral, 0.9 = great)
923- If nothing notable was learned, return: {"behaviors": [], "interaction_quality": 0.7}
924- DO NOT restate your directives or identity as learned behaviors"#;
925
926    let request = CompletionRequest {
927        model,
928        messages: recent,
929        tools: vec![],
930        max_tokens: 512,
931        temperature: Some(0.3),
932        system_prompt: Some(reflection_prompt.to_string()),
933    };
934
935    let response = match driver.complete(request).await {
936        Ok(resp) => resp,
937        Err(e) => {
938            debug!(error = %e, fighter = %fighter_name, "reflection LLM call failed (non-critical)");
939            return;
940        }
941    };
942
943    let content = response.message.content.trim().to_string();
944
945    // Try to parse JSON, stripping markdown fences if present
946    let json_str = if let Some(start) = content.find('{') {
947        if let Some(end) = content.rfind('}') {
948            &content[start..=end]
949        } else {
950            &content
951        }
952    } else {
953        &content
954    };
955
956    let output: ReflectionOutput = match serde_json::from_str(json_str) {
957        Ok(o) => o,
958        Err(e) => {
959            debug!(error = %e, fighter = %fighter_name, "failed to parse reflection JSON (non-critical)");
960            return;
961        }
962    };
963
964    // Load creed, apply changes, save
965    let mut creed = match memory.load_creed_by_name(&fighter_name).await {
966        Ok(Some(c)) => c,
967        _ => return,
968    };
969
970    // Apply confidence decay to existing behaviors
971    creed.decay_learned_behaviors(0.01, 0.3);
972
973    // Learn new behaviors
974    for item in &output.behaviors {
975        if !item.observation.is_empty() {
976            creed.learn(&item.observation, item.confidence.clamp(0.0, 1.0));
977        }
978    }
979
980    // Prune to max 20 behaviors
981    creed.prune_learned_behaviors(20);
982
983    // Update user relationship trust based on interaction quality
984    if let Some(quality) = output.interaction_quality {
985        let quality = quality.clamp(0.0, 1.0);
986        if let Some(rel) = creed
987            .relationships
988            .iter_mut()
989            .find(|r| r.entity_type == "user")
990        {
991            rel.trust = (rel.trust * 0.9 + quality * 0.1).clamp(0.0, 1.0);
992            rel.interaction_count += 1;
993        } else {
994            creed.relationships.push(punch_types::Relationship {
995                entity: "user".to_string(),
996                entity_type: "user".to_string(),
997                nature: "operator".to_string(),
998                trust: quality,
999                interaction_count: 1,
1000                notes: format!(
1001                    "First interaction: {}",
1002                    chrono::Utc::now().format("%Y-%m-%d %H:%M UTC")
1003                ),
1004            });
1005        }
1006    }
1007
1008    if let Err(e) = memory.save_creed(&creed).await {
1009        warn!(error = %e, fighter = %fighter_name, "failed to save creed after reflection");
1010    } else {
1011        info!(
1012            fighter = %fighter_name,
1013            new_behaviors = output.behaviors.len(),
1014            total_behaviors = creed.learned_behaviors.len(),
1015            "creed evolved via post-bout reflection"
1016        );
1017    }
1018}