Skip to main content

punch_kernel/
ring.rs

1//! **The Ring** — the central kernel and coordinator for the Punch system.
2//!
3//! The [`Ring`] owns every fighter and gorilla, wires them to the memory
4//! substrate, the LLM driver, the event bus, the scheduler, the background
5//! executor, the workflow engine, and the metering engine. All mutations
6//! go through the Ring so that invariants (quotas, capabilities, lifecycle
7//! events) are enforced in a single place.
8
9use std::sync::Arc;
10
11use async_trait::async_trait;
12use chrono;
13use dashmap::DashMap;
14use serde::{Deserialize, Serialize};
15use tokio::sync::Mutex;
16use tokio::sync::watch;
17use tokio::task::JoinHandle;
18use tracing::{info, instrument, warn};
19
20use punch_memory::{BoutId, MemorySubstrate};
21use punch_runtime::{FighterLoopParams, FighterLoopResult, LlmDriver, McpClient, run_fighter_loop};
22use punch_types::{
23    AgentCoordinator, AgentInfo, AgentMessageResult, CoordinationStrategy, FighterId,
24    FighterManifest, FighterStatus, GorillaId, GorillaManifest, GorillaMetrics, GorillaStatus,
25    PunchConfig, PunchError, PunchEvent, PunchResult, TenantId, TenantStatus, Troop, TroopId,
26};
27
28use punch_skills::{SkillMarketplace, builtin_skills};
29
30use crate::agent_messaging::MessageRouter;
31use crate::background::BackgroundExecutor;
32use crate::budget::{BudgetEnforcer, BudgetLimit};
33use crate::event_bus::EventBus;
34use crate::heartbeat_scheduler::HeartbeatScheduler;
35use crate::metering::MeteringEngine;
36use crate::metrics::{self, MetricsRegistry};
37use crate::scheduler::{QuotaConfig, Scheduler};
38use crate::swarm::SwarmCoordinator;
39use crate::tenant_registry::TenantRegistry;
40use crate::triggers::{Trigger, TriggerEngine, TriggerId, TriggerSummary};
41use crate::troop::TroopManager;
42use crate::workflow::{Workflow, WorkflowEngine, WorkflowId, WorkflowRunId};
43
44// ---------------------------------------------------------------------------
45// Entry types
46// ---------------------------------------------------------------------------
47
48/// Everything the Ring tracks about a single fighter.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct FighterEntry {
51    /// The fighter's manifest (identity, model, capabilities, etc.).
52    pub manifest: FighterManifest,
53    /// Current operational status.
54    pub status: FighterStatus,
55    /// The active bout (conversation session) ID, if any.
56    pub current_bout: Option<BoutId>,
57}
58
59/// Everything the Ring tracks about a single gorilla.
60///
61/// The `task_handle` is behind a `Mutex` because [`JoinHandle`] is not `Clone`
62/// and we need interior mutability when starting / stopping background tasks.
63pub struct GorillaEntry {
64    /// The gorilla's manifest.
65    pub manifest: GorillaManifest,
66    /// Current operational status.
67    pub status: GorillaStatus,
68    /// Runtime metrics.
69    pub metrics: GorillaMetrics,
70    /// Handle to the background task (if running).
71    task_handle: Option<JoinHandle<()>>,
72}
73
74// Manual Debug impl because JoinHandle doesn't implement Debug in a useful way.
75impl std::fmt::Debug for GorillaEntry {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        f.debug_struct("GorillaEntry")
78            .field("manifest", &self.manifest)
79            .field("status", &self.status)
80            .field("metrics", &self.metrics)
81            .field("has_task", &self.task_handle.is_some())
82            .finish()
83    }
84}
85
86// ---------------------------------------------------------------------------
87// The Ring
88// ---------------------------------------------------------------------------
89
90/// The Ring — the central coordinator for the Punch Agent Combat System.
91///
92/// Thread-safe by design: all collections use [`DashMap`] and all shared state
93/// is behind `Arc`. Wrap the `Ring` itself in an `Arc` to share across tasks.
94pub struct Ring {
95    /// All active fighters, keyed by their unique ID.
96    fighters: DashMap<FighterId, FighterEntry>,
97    /// All registered gorillas, keyed by their unique ID.
98    gorillas: DashMap<GorillaId, Mutex<GorillaEntry>>,
99    /// Shared memory substrate (SQLite persistence).
100    memory: Arc<MemorySubstrate>,
101    /// The LLM driver used for completions.
102    driver: Arc<dyn LlmDriver>,
103    /// System-wide event bus.
104    event_bus: EventBus,
105    /// Per-fighter quota scheduler.
106    scheduler: Scheduler,
107    /// Top-level Punch configuration.
108    config: PunchConfig,
109    /// Background executor for autonomous gorilla tasks.
110    background: BackgroundExecutor,
111    /// Proactive heartbeat scheduler for fighter agents.
112    heartbeat_scheduler: HeartbeatScheduler,
113    /// Optional channel notifier for proactive heartbeat notifications.
114    channel_notifier: Arc<std::sync::RwLock<Option<Arc<dyn punch_types::ChannelNotifier>>>>,
115    /// Multi-step workflow engine.
116    workflow_engine: WorkflowEngine,
117    /// Cost tracking and metering engine.
118    metering: MeteringEngine,
119    /// Budget enforcement layer (opt-in spending limits).
120    budget_enforcer: Arc<BudgetEnforcer>,
121    /// Event-driven trigger engine.
122    trigger_engine: TriggerEngine,
123    /// Troop manager for multi-agent coordination.
124    troop_manager: TroopManager,
125    /// Swarm coordinator for emergent behavior tasks.
126    swarm_coordinator: SwarmCoordinator,
127    /// Inter-agent message router.
128    message_router: MessageRouter,
129    /// Production observability metrics.
130    metrics: Arc<MetricsRegistry>,
131    /// Multi-tenant registry.
132    tenant_registry: TenantRegistry,
133    /// Skill marketplace for discovering and installing moves.
134    marketplace: SkillMarketplace,
135    /// Active MCP server clients, keyed by server name.
136    mcp_clients: Arc<DashMap<String, Arc<McpClient>>>,
137    /// Shutdown signal sender.
138    shutdown_tx: watch::Sender<bool>,
139    /// Shutdown signal receiver.
140    _shutdown_rx: watch::Receiver<bool>,
141}
142
143/// Apply budget limits from `PunchConfig` to the `BudgetEnforcer`.
144///
145/// Converts daily and monthly USD limits into a single daily USD limit
146/// (using the more restrictive of the two) and sets it on the enforcer.
147fn apply_budget_config(config: &PunchConfig, enforcer: &BudgetEnforcer) {
148    if !config.budget.has_any_limit() {
149        return;
150    }
151    let daily_from_config = config.budget.daily_cost_limit_usd;
152    let daily_from_monthly = config.budget.monthly_cost_limit_usd.map(|m| m / 30.0);
153    let max_cost_per_day_usd = match (daily_from_config, daily_from_monthly) {
154        (Some(d), Some(m)) => Some(d.min(m)),
155        (Some(d), None) => Some(d),
156        (None, Some(m)) => Some(m),
157        (None, None) => None,
158    };
159    let limit = BudgetLimit {
160        warning_threshold_percent: config.budget.eco_mode_threshold_percent,
161        max_cost_per_day_usd,
162        ..Default::default()
163    };
164    enforcer.set_global_limit(limit);
165    info!("budget limits loaded from config");
166}
167
168impl Ring {
169    /// Create a new Ring.
170    ///
171    /// The caller provides the already-initialised memory substrate, LLM
172    /// driver, and configuration. The Ring will create its own event bus and
173    /// scheduler internally.
174    pub fn new(
175        config: PunchConfig,
176        memory: Arc<MemorySubstrate>,
177        driver: Arc<dyn LlmDriver>,
178    ) -> Self {
179        let (shutdown_tx, shutdown_rx) = watch::channel(false);
180        let background =
181            BackgroundExecutor::with_shutdown(shutdown_tx.clone(), shutdown_rx.clone());
182        let heartbeat_scheduler =
183            HeartbeatScheduler::with_shutdown(shutdown_tx.clone(), shutdown_rx.clone());
184        let metering = MeteringEngine::new(Arc::clone(&memory));
185        let metering_arc = Arc::new(MeteringEngine::new(Arc::clone(&memory)));
186        let budget_enforcer = Arc::new(BudgetEnforcer::new(Arc::clone(&metering_arc)));
187
188        // Apply config-based budget limits to the enforcer.
189        apply_budget_config(&config, &budget_enforcer);
190
191        let metrics_registry = Arc::new(MetricsRegistry::new());
192        metrics::register_default_metrics(&metrics_registry);
193
194        let marketplace = SkillMarketplace::new();
195        for listing in builtin_skills() {
196            marketplace.publish(listing);
197        }
198
199        let mcp_clients = Arc::new(DashMap::new());
200
201        Self {
202            fighters: DashMap::new(),
203            gorillas: DashMap::new(),
204            memory,
205            driver,
206            event_bus: EventBus::new(),
207            scheduler: Scheduler::new(QuotaConfig::default()),
208            config,
209            background,
210            heartbeat_scheduler,
211            channel_notifier: Arc::new(std::sync::RwLock::new(None)),
212            workflow_engine: WorkflowEngine::new(),
213            metering,
214            budget_enforcer,
215            trigger_engine: TriggerEngine::new(),
216            troop_manager: TroopManager::new(),
217            swarm_coordinator: SwarmCoordinator::new(),
218            message_router: MessageRouter::new(),
219            metrics: metrics_registry,
220            tenant_registry: TenantRegistry::new(),
221            marketplace,
222            mcp_clients,
223            shutdown_tx,
224            _shutdown_rx: shutdown_rx,
225        }
226    }
227
228    /// Create a new Ring with a custom quota configuration.
229    pub fn with_quota_config(
230        config: PunchConfig,
231        memory: Arc<MemorySubstrate>,
232        driver: Arc<dyn LlmDriver>,
233        quota_config: QuotaConfig,
234    ) -> Self {
235        let (shutdown_tx, shutdown_rx) = watch::channel(false);
236        let background =
237            BackgroundExecutor::with_shutdown(shutdown_tx.clone(), shutdown_rx.clone());
238        let heartbeat_scheduler =
239            HeartbeatScheduler::with_shutdown(shutdown_tx.clone(), shutdown_rx.clone());
240        let metering = MeteringEngine::new(Arc::clone(&memory));
241        let metering_arc = Arc::new(MeteringEngine::new(Arc::clone(&memory)));
242        let budget_enforcer = Arc::new(BudgetEnforcer::new(Arc::clone(&metering_arc)));
243
244        // Apply config-based budget limits.
245        apply_budget_config(&config, &budget_enforcer);
246
247        let metrics_registry = Arc::new(MetricsRegistry::new());
248        metrics::register_default_metrics(&metrics_registry);
249
250        let marketplace = SkillMarketplace::new();
251        for listing in builtin_skills() {
252            marketplace.publish(listing);
253        }
254
255        let mcp_clients = Arc::new(DashMap::new());
256
257        Self {
258            fighters: DashMap::new(),
259            gorillas: DashMap::new(),
260            memory,
261            driver,
262            event_bus: EventBus::new(),
263            scheduler: Scheduler::new(quota_config),
264            config,
265            background,
266            heartbeat_scheduler,
267            channel_notifier: Arc::new(std::sync::RwLock::new(None)),
268            workflow_engine: WorkflowEngine::new(),
269            metering,
270            budget_enforcer,
271            trigger_engine: TriggerEngine::new(),
272            troop_manager: TroopManager::new(),
273            swarm_coordinator: SwarmCoordinator::new(),
274            message_router: MessageRouter::new(),
275            metrics: metrics_registry,
276            tenant_registry: TenantRegistry::new(),
277            marketplace,
278            mcp_clients,
279            shutdown_tx,
280            _shutdown_rx: shutdown_rx,
281        }
282    }
283
284    // -- Accessors -----------------------------------------------------------
285
286    /// Get a reference to the event bus.
287    pub fn event_bus(&self) -> &EventBus {
288        &self.event_bus
289    }
290
291    /// Get a reference to the scheduler.
292    pub fn scheduler(&self) -> &Scheduler {
293        &self.scheduler
294    }
295
296    /// Get a reference to the memory substrate.
297    pub fn memory(&self) -> &Arc<MemorySubstrate> {
298        &self.memory
299    }
300
301    /// Get a reference to the configuration.
302    pub fn config(&self) -> &PunchConfig {
303        &self.config
304    }
305
306    /// Get a reference to the background executor.
307    pub fn background(&self) -> &BackgroundExecutor {
308        &self.background
309    }
310
311    /// Get a reference to the workflow engine.
312    pub fn workflow_engine(&self) -> &WorkflowEngine {
313        &self.workflow_engine
314    }
315
316    /// Get a reference to the metering engine.
317    pub fn metering(&self) -> &MeteringEngine {
318        &self.metering
319    }
320
321    /// Get a reference to the budget enforcer.
322    pub fn budget_enforcer(&self) -> &Arc<BudgetEnforcer> {
323        &self.budget_enforcer
324    }
325
326    /// Get a reference to the trigger engine.
327    pub fn trigger_engine(&self) -> &TriggerEngine {
328        &self.trigger_engine
329    }
330
331    /// Get a reference to the metrics registry.
332    pub fn metrics(&self) -> &Arc<MetricsRegistry> {
333        &self.metrics
334    }
335
336    /// Get a reference to the tenant registry.
337    pub fn tenant_registry(&self) -> &TenantRegistry {
338        &self.tenant_registry
339    }
340
341    /// Access the skill marketplace.
342    pub fn marketplace(&self) -> &SkillMarketplace {
343        &self.marketplace
344    }
345
346    /// Access the active MCP clients.
347    pub fn mcp_clients(&self) -> &Arc<DashMap<String, Arc<McpClient>>> {
348        &self.mcp_clients
349    }
350
351    // -- MCP server lifecycle ------------------------------------------------
352
353    /// Spawn and initialize all MCP servers defined in the configuration.
354    ///
355    /// Each server is started as a subprocess, initialized via JSON-RPC 2.0
356    /// handshake, and its tools are discovered. Servers that fail to start
357    /// are logged and skipped — they don't block the Ring from operating.
358    pub async fn spawn_mcp_servers(&self) {
359        for (name, server_config) in &self.config.mcp_servers {
360            info!(server = %name, command = %server_config.command, "spawning MCP server");
361
362            match McpClient::spawn(
363                name.clone(),
364                &server_config.command,
365                &server_config.args,
366                &server_config.env,
367            )
368            .await
369            {
370                Ok(client) => {
371                    if let Err(e) = client.initialize().await {
372                        warn!(server = %name, error = %e, "MCP server initialization failed, skipping");
373                        let _ = client.shutdown().await;
374                        continue;
375                    }
376
377                    match client.list_tools().await {
378                        Ok(tools) => {
379                            info!(
380                                server = %name,
381                                tool_count = tools.len(),
382                                "MCP server ready with {} tools",
383                                tools.len()
384                            );
385                        }
386                        Err(e) => {
387                            warn!(server = %name, error = %e, "failed to list MCP tools");
388                        }
389                    }
390
391                    self.mcp_clients.insert(name.clone(), Arc::new(client));
392
393                    self.event_bus.publish(PunchEvent::McpServerStarted {
394                        server_name: name.clone(),
395                    });
396                }
397                Err(e) => {
398                    warn!(server = %name, error = %e, "failed to spawn MCP server, skipping");
399                }
400            }
401        }
402    }
403
404    /// Shut down all active MCP servers gracefully.
405    pub async fn shutdown_mcp_servers(&self) {
406        for entry in self.mcp_clients.iter() {
407            let name = entry.key().clone();
408            info!(server = %name, "shutting down MCP server");
409            if let Err(e) = entry.value().shutdown().await {
410                warn!(server = %name, error = %e, "MCP server shutdown error");
411            }
412        }
413        self.mcp_clients.clear();
414    }
415
416    /// Collect tool definitions from all active MCP servers.
417    pub async fn mcp_tools(&self) -> Vec<punch_types::ToolDefinition> {
418        let mut tools = Vec::new();
419        for entry in self.mcp_clients.iter() {
420            match entry.value().list_tools().await {
421                Ok(server_tools) => tools.extend(server_tools),
422                Err(e) => {
423                    warn!(
424                        server = %entry.key(),
425                        error = %e,
426                        "failed to list tools from MCP server"
427                    );
428                }
429            }
430        }
431        tools
432    }
433
434    // -- Tenant-scoped operations --------------------------------------------
435
436    /// Spawn a fighter scoped to a tenant, enforcing quota limits.
437    ///
438    /// Returns an error if the tenant is suspended or the fighter quota is
439    /// exceeded.
440    #[instrument(skip(self, manifest), fields(fighter_name = %manifest.name))]
441    pub async fn spawn_fighter_for_tenant(
442        &self,
443        tenant_id: &TenantId,
444        mut manifest: FighterManifest,
445    ) -> PunchResult<FighterId> {
446        // Verify tenant exists and is active.
447        let tenant = self
448            .tenant_registry
449            .get_tenant(tenant_id)
450            .ok_or_else(|| PunchError::Tenant(format!("tenant {} not found", tenant_id)))?;
451
452        if tenant.status == TenantStatus::Suspended {
453            return Err(PunchError::Tenant(format!(
454                "tenant {} is suspended",
455                tenant_id
456            )));
457        }
458
459        // Check fighter quota.
460        let current_count = self
461            .fighters
462            .iter()
463            .filter(|e| e.value().manifest.tenant_id.as_ref() == Some(tenant_id))
464            .count();
465
466        if current_count >= tenant.quota.max_fighters {
467            return Err(PunchError::QuotaExceeded(format!(
468                "tenant {} has reached max fighters limit ({})",
469                tenant_id, tenant.quota.max_fighters
470            )));
471        }
472
473        // Stamp the manifest with the tenant ID.
474        manifest.tenant_id = Some(*tenant_id);
475        Ok(self.spawn_fighter(manifest).await)
476    }
477
478    /// List fighters that belong to a specific tenant.
479    pub fn list_fighters_for_tenant(
480        &self,
481        tenant_id: &TenantId,
482    ) -> Vec<(FighterId, FighterManifest, FighterStatus)> {
483        self.fighters
484            .iter()
485            .filter(|entry| entry.value().manifest.tenant_id.as_ref() == Some(tenant_id))
486            .map(|entry| {
487                let id = *entry.key();
488                let e = entry.value();
489                (id, e.manifest.clone(), e.status)
490            })
491            .collect()
492    }
493
494    /// Kill a fighter, validating that the caller tenant owns it.
495    ///
496    /// Returns an error if the fighter doesn't belong to the given tenant.
497    #[instrument(skip(self), fields(%fighter_id, %tenant_id))]
498    pub fn kill_fighter_for_tenant(
499        &self,
500        fighter_id: &FighterId,
501        tenant_id: &TenantId,
502    ) -> PunchResult<()> {
503        let entry = self
504            .fighters
505            .get(fighter_id)
506            .ok_or_else(|| PunchError::Fighter(format!("fighter {} not found", fighter_id)))?;
507
508        if entry.manifest.tenant_id.as_ref() != Some(tenant_id) {
509            return Err(PunchError::Auth(format!(
510                "fighter {} does not belong to tenant {}",
511                fighter_id, tenant_id
512            )));
513        }
514
515        drop(entry);
516        self.kill_fighter(fighter_id);
517        Ok(())
518    }
519
520    /// Check whether a tenant's tool access is allowed for the given tool name.
521    ///
522    /// Returns `true` if the tenant has no tool restrictions (empty list) or
523    /// the tool is in the allowed list.
524    pub fn check_tenant_tool_access(&self, tenant_id: &TenantId, tool_name: &str) -> bool {
525        match self.tenant_registry.get_tenant(tenant_id) {
526            Some(tenant) => {
527                tenant.quota.max_tools.is_empty()
528                    || tenant.quota.max_tools.iter().any(|t| t == tool_name)
529            }
530            None => false,
531        }
532    }
533
534    // -- Trigger operations --------------------------------------------------
535
536    /// Register a trigger with the engine.
537    pub fn register_trigger(&self, trigger: Trigger) -> TriggerId {
538        self.trigger_engine.register_trigger(trigger)
539    }
540
541    /// Remove a trigger by ID.
542    pub fn remove_trigger(&self, id: &TriggerId) {
543        self.trigger_engine.remove_trigger(id);
544    }
545
546    /// List all triggers with summary information.
547    pub fn list_triggers(&self) -> Vec<(TriggerId, TriggerSummary)> {
548        self.trigger_engine.list_triggers()
549    }
550
551    // -- Fighter operations --------------------------------------------------
552
553    /// Spawn a new fighter from a manifest.
554    ///
555    /// Returns the newly-assigned [`FighterId`]. The fighter starts in
556    /// [`FighterStatus::Idle`] and is persisted to the memory substrate.
557    #[instrument(skip(self, manifest), fields(fighter_name = %manifest.name))]
558    pub async fn spawn_fighter(&self, manifest: FighterManifest) -> FighterId {
559        let id = FighterId::new();
560        let name = manifest.name.clone();
561
562        // Persist to the database first so FK constraints work.
563        if let Err(e) = self
564            .memory
565            .save_fighter(&id, &manifest, FighterStatus::Idle)
566            .await
567        {
568            warn!(error = %e, "failed to persist fighter to database (continuing in-memory only)");
569        }
570
571        let entry = FighterEntry {
572            manifest,
573            status: FighterStatus::Idle,
574            current_bout: None,
575        };
576
577        self.fighters.insert(id, entry);
578
579        // Record metrics.
580        self.metrics.counter_inc(metrics::FIGHTER_SPAWNS_TOTAL);
581        self.metrics
582            .gauge_set(metrics::ACTIVE_FIGHTERS, self.fighters.len() as i64);
583
584        self.event_bus.publish(PunchEvent::FighterSpawned {
585            fighter_id: id,
586            name: name.clone(),
587        });
588
589        info!(%id, name, "fighter spawned");
590
591        // --- Creed creation & binding ---
592        // Create a default creed if none exists, then bind it to this instance.
593        // This ensures every fighter has a consciousness layer from birth.
594        {
595            let memory = Arc::clone(&self.memory);
596            let creed_name = name.clone();
597            let fid = id;
598            // Clone the manifest for the creed before it's moved into the entry.
599            let manifest_for_creed = self.fighters.get(&id).map(|e| e.value().manifest.clone());
600            tokio::spawn(async move {
601                // Ensure a creed exists (create default if not), and keep
602                // the self-model in sync with the current manifest/config.
603                match memory.load_creed_by_name(&creed_name).await {
604                    Ok(None) if manifest_for_creed.is_some() => {
605                        let manifest = manifest_for_creed.as_ref().unwrap();
606                        let creed =
607                            punch_types::Creed::new(&creed_name).with_self_awareness(manifest);
608                        if let Err(e) = memory.save_creed(&creed).await {
609                            warn!(error = %e, fighter = %creed_name, "failed to create default creed");
610                        } else {
611                            info!(fighter = %creed_name, "default creed created on spawn");
612                        }
613                    }
614                    Ok(Some(mut creed)) if manifest_for_creed.is_some() => {
615                        // Update self-model if the config has changed (e.g. switched
616                        // from Ollama to Gemini). Preserves all other creed state.
617                        let manifest = manifest_for_creed.as_ref().unwrap();
618                        let new_model = manifest.model.model.clone();
619                        let new_provider = manifest.model.provider.to_string();
620                        if creed.self_model.model_name != new_model
621                            || creed.self_model.provider != new_provider
622                        {
623                            info!(
624                                fighter = %creed_name,
625                                old_model = %creed.self_model.model_name,
626                                new_model = %new_model,
627                                "updating creed self-model to match current config"
628                            );
629                            creed = creed.with_self_awareness(manifest);
630                            if let Err(e) = memory.save_creed(&creed).await {
631                                warn!(error = %e, fighter = %creed_name, "failed to update creed self-model");
632                            }
633                        }
634                    }
635                    Ok(_) => {}
636                    Err(e) => {
637                        warn!(error = %e, fighter = %creed_name, "failed to check creed on spawn");
638                    }
639                }
640                // Bind the creed to this fighter instance.
641                if let Err(e) = memory.bind_creed_to_fighter(&creed_name, &fid).await {
642                    warn!(error = %e, fighter = %creed_name, "failed to bind creed on spawn");
643                } else {
644                    info!(fighter = %creed_name, id = %fid, "creed bound to fighter on spawn");
645                }
646            });
647        }
648
649        // --- Start heartbeat monitoring ---
650        // The heartbeat loop tolerates the creed not existing yet (sleeps and retries).
651        if let Some(entry) = self.fighters.get(&id) {
652            let manifest = entry.value().manifest.clone();
653            drop(entry); // Release DashMap guard before async work
654            self.heartbeat_scheduler.start_monitoring(
655                crate::heartbeat_scheduler::HeartbeatStartConfig {
656                    fighter_id: id,
657                    fighter_name: name.clone(),
658                    manifest,
659                    memory: Arc::clone(&self.memory),
660                    driver: Arc::clone(&self.driver),
661                    event_bus: self.event_bus.clone(),
662                    channel_notifier: self.channel_notifier.read().ok().and_then(|g| g.clone()),
663                    model_routing: Some(self.config.model_routing.clone()),
664                    chat_id_hint: None, // populated from channel config
665                },
666            );
667        }
668
669        id
670    }
671
672    /// Create a default creed for a fighter if none exists.
673    /// The default creed includes self-awareness from the manifest.
674    pub async fn ensure_creed(&self, fighter_name: &str, manifest: &FighterManifest) {
675        match self.memory.load_creed_by_name(fighter_name).await {
676            Ok(Some(_)) => {
677                // Creed already exists.
678            }
679            Ok(None) => {
680                // Create a default creed with self-awareness.
681                let creed = punch_types::Creed::new(fighter_name).with_self_awareness(manifest);
682                if let Err(e) = self.memory.save_creed(&creed).await {
683                    warn!(error = %e, "failed to create default creed");
684                } else {
685                    info!(fighter = %fighter_name, "default creed created with self-awareness");
686                }
687            }
688            Err(e) => {
689                warn!(error = %e, "failed to check for existing creed");
690            }
691        }
692    }
693
694    /// Send a user message to a fighter and run the agent loop (without coordinator).
695    ///
696    /// Convenience wrapper around [`send_message_with_coordinator`] that passes
697    /// `None`, meaning the fighter will not have access to inter-agent tools.
698    #[instrument(skip(self, message), fields(%fighter_id))]
699    pub async fn send_message(
700        &self,
701        fighter_id: &FighterId,
702        message: String,
703    ) -> PunchResult<FighterLoopResult> {
704        self.send_message_with_coordinator(fighter_id, message, None, vec![])
705            .await
706    }
707
708    /// Send a user message to a fighter and run the agent loop.
709    ///
710    /// This creates (or reuses) a bout for the fighter, checks quotas, then
711    /// delegates to [`run_fighter_loop`]. Usage is recorded through the
712    /// metering engine after a successful completion.
713    ///
714    /// If `coordinator` is provided, the fighter can use inter-agent tools
715    /// (`agent_spawn`, `agent_message`, `agent_list`).
716    #[instrument(skip(self, message, coordinator, content_parts), fields(%fighter_id))]
717    pub async fn send_message_with_coordinator(
718        &self,
719        fighter_id: &FighterId,
720        message: String,
721        coordinator: Option<Arc<dyn AgentCoordinator>>,
722        content_parts: Vec<punch_types::ContentPart>,
723    ) -> PunchResult<FighterLoopResult> {
724        // Look up the fighter.
725        let mut entry = self
726            .fighters
727            .get_mut(fighter_id)
728            .ok_or_else(|| PunchError::Fighter(format!("fighter {} not found", fighter_id)))?;
729
730        // Check quota.
731        if !self.scheduler.check_quota(fighter_id) {
732            entry.status = FighterStatus::Resting;
733            return Err(PunchError::RateLimited {
734                provider: "scheduler".to_string(),
735                retry_after_ms: 60_000,
736            });
737        }
738
739        // Check budget enforcement (opt-in — only blocks if limits are configured).
740        // When approaching a limit (Warning), activate eco mode for this request.
741        let mut eco_mode = false;
742        match self.budget_enforcer.check_budget(fighter_id).await {
743            Ok(crate::budget::BudgetVerdict::Blocked {
744                reason,
745                retry_after_secs,
746            }) => {
747                entry.status = FighterStatus::Resting;
748                return Err(PunchError::RateLimited {
749                    provider: format!("budget: {}", reason),
750                    retry_after_ms: retry_after_secs * 1000,
751                });
752            }
753            Ok(crate::budget::BudgetVerdict::Warning { message, .. }) => {
754                info!(warning = %message, "budget warning — activating eco mode");
755                eco_mode = true;
756            }
757            Ok(crate::budget::BudgetVerdict::Allowed) => {}
758            Err(e) => {
759                warn!(error = %e, "budget check failed, allowing request");
760            }
761        }
762
763        // Ensure the fighter has an active bout.
764        let bout_id = match entry.current_bout {
765            Some(id) => id,
766            None => {
767                // Try to restore the most recent bout from the database first
768                // (handles daemon restart — the in-memory map was lost but the
769                // bout still exists on disk).
770                let id = match self.memory.latest_bout_for_fighter(fighter_id).await {
771                    Ok(Some(existing)) => existing,
772                    _ => {
773                        // No prior bout found — create a fresh one.
774                        let new_id =
775                            self.memory.create_bout(fighter_id).await.map_err(|e| {
776                                PunchError::Bout(format!("failed to create bout: {e}"))
777                            })?;
778
779                        self.event_bus.publish(PunchEvent::BoutStarted {
780                            bout_id: new_id.0,
781                            fighter_id: *fighter_id,
782                        });
783
784                        new_id
785                    }
786                };
787                entry.current_bout = Some(id);
788
789                id
790            }
791        };
792
793        // Mark as fighting and extract what we need before dropping the guard.
794        entry.status = FighterStatus::Fighting;
795        let manifest = entry.manifest.clone();
796        let fighter_name = manifest.name.clone();
797        drop(entry); // Release the DashMap guard before any async calls.
798
799        // Collect MCP tools separately (the fighter loop handles dynamic
800        // built-in tool selection via ToolSelector when available_tools is empty).
801        let mut mcp_tools = Vec::new();
802        let has_mcp_access = manifest
803            .capabilities
804            .iter()
805            .any(|c| matches!(c, punch_types::Capability::McpAccess(_)));
806        if has_mcp_access && !self.mcp_clients.is_empty() {
807            for mcp_entry in self.mcp_clients.iter() {
808                let server_name = mcp_entry.key();
809                let can_access = manifest.capabilities.iter().any(|c| {
810                    if let punch_types::Capability::McpAccess(pattern) = c {
811                        pattern == "*" || pattern == server_name
812                    } else {
813                        false
814                    }
815                });
816                if can_access {
817                    match mcp_entry.value().list_tools().await {
818                        Ok(tools) => mcp_tools.extend(tools),
819                        Err(e) => {
820                            warn!(
821                                server = %server_name,
822                                error = %e,
823                                "failed to list MCP tools for fighter"
824                            );
825                        }
826                    }
827                }
828            }
829        }
830
831        // Publish the incoming user message event.
832        self.event_bus.publish(PunchEvent::FighterMessage {
833            fighter_id: *fighter_id,
834            bout_id: bout_id.0,
835            role: "user".to_string(),
836            content_preview: truncate_preview(&message, 120),
837        });
838
839        // Run the fighter loop.
840        // available_tools is empty — the fighter loop uses ToolSelector for
841        // context-aware dynamic tool loading per turn.
842        let params = FighterLoopParams {
843            manifest: manifest.clone(),
844            user_message: message,
845            bout_id,
846            fighter_id: *fighter_id,
847            memory: Arc::clone(&self.memory),
848            driver: Arc::clone(&self.driver),
849            available_tools: Vec::new(),
850            mcp_tools,
851            max_iterations: None,
852            context_window: None,
853            tool_timeout_secs: None,
854            coordinator,
855            approval_engine: None,
856            sandbox: None,
857            mcp_clients: if self.mcp_clients.is_empty() {
858                None
859            } else {
860                Some(Arc::clone(&self.mcp_clients))
861            },
862            model_routing: if self.config.model_routing.enabled {
863                Some(self.config.model_routing.clone())
864            } else {
865                None
866            },
867            channel_notifier: None,
868            user_content_parts: content_parts,
869            eco_mode,
870        };
871
872        // Record message metric.
873        self.metrics.counter_inc(metrics::MESSAGES_TOTAL);
874
875        let result = run_fighter_loop(params).await;
876
877        // Update state based on the outcome.
878        if let Some(mut entry) = self.fighters.get_mut(fighter_id) {
879            match &result {
880                Ok(loop_result) => {
881                    entry.status = FighterStatus::Idle;
882                    self.scheduler
883                        .record_usage(fighter_id, loop_result.usage.total());
884
885                    // Record token usage metrics.
886                    self.metrics
887                        .counter_add(metrics::TOKENS_INPUT_TOTAL, loop_result.usage.input_tokens);
888                    self.metrics.counter_add(
889                        metrics::TOKENS_OUTPUT_TOTAL,
890                        loop_result.usage.output_tokens,
891                    );
892
893                    // Record usage through the metering engine.
894                    if let Err(e) = self
895                        .metering
896                        .record_usage(
897                            fighter_id,
898                            &manifest.model.model,
899                            loop_result.usage.input_tokens,
900                            loop_result.usage.output_tokens,
901                        )
902                        .await
903                    {
904                        warn!(error = %e, "failed to record metering usage");
905                    }
906
907                    // Publish response event.
908                    let preview = truncate_preview(&loop_result.response, 120);
909                    self.event_bus.publish(PunchEvent::FighterMessage {
910                        fighter_id: *fighter_id,
911                        bout_id: bout_id.0,
912                        role: "assistant".to_string(),
913                        content_preview: preview,
914                    });
915
916                    // Publish bout ended.
917                    self.event_bus.publish(PunchEvent::BoutEnded {
918                        bout_id: bout_id.0,
919                        fighter_id: *fighter_id,
920                        messages_exchanged: loop_result.usage.total(),
921                    });
922                }
923                Err(e) => {
924                    entry.status = FighterStatus::KnockedOut;
925                    self.metrics.counter_inc(metrics::ERRORS_TOTAL);
926
927                    // Publish error event.
928                    self.event_bus.publish(PunchEvent::Error {
929                        source: fighter_name.clone(),
930                        message: format!("{e}"),
931                    });
932                }
933            }
934        }
935
936        result
937    }
938
939    /// Access the heartbeat scheduler (e.g. for refresh after heartbeat config changes).
940    pub fn heartbeat_scheduler(&self) -> &HeartbeatScheduler {
941        &self.heartbeat_scheduler
942    }
943
944    /// Set the channel notifier for proactive heartbeat notifications.
945    ///
946    /// Should be called once during API server setup after the channel bridge
947    /// is constructed. Existing heartbeat monitors will pick up the notifier
948    /// on their next refresh cycle.
949    pub fn set_channel_notifier(&self, notifier: Arc<dyn punch_types::ChannelNotifier>) {
950        if let Ok(mut guard) = self.channel_notifier.write() {
951            *guard = Some(notifier);
952        }
953    }
954
955    /// Kill (remove) a fighter.
956    #[instrument(skip(self), fields(%fighter_id))]
957    pub fn kill_fighter(&self, fighter_id: &FighterId) {
958        // Stop heartbeat monitoring before removing.
959        self.heartbeat_scheduler.stop_monitoring(fighter_id);
960
961        if let Some((_, entry)) = self.fighters.remove(fighter_id) {
962            self.scheduler.remove_fighter(fighter_id);
963            self.metrics
964                .gauge_set(metrics::ACTIVE_FIGHTERS, self.fighters.len() as i64);
965
966            self.event_bus.publish(PunchEvent::Error {
967                source: "ring".to_string(),
968                message: format!("Fighter '{}' killed", entry.manifest.name),
969            });
970
971            info!(name = %entry.manifest.name, "fighter killed");
972        } else {
973            warn!("attempted to kill unknown fighter");
974        }
975    }
976
977    /// List all fighters with their current status.
978    pub fn list_fighters(&self) -> Vec<(FighterId, FighterManifest, FighterStatus)> {
979        self.fighters
980            .iter()
981            .map(|entry| {
982                let id = *entry.key();
983                let e = entry.value();
984                (id, e.manifest.clone(), e.status)
985            })
986            .collect()
987    }
988
989    /// Get a snapshot of a single fighter's entry.
990    pub fn get_fighter(&self, fighter_id: &FighterId) -> Option<FighterEntry> {
991        self.fighters.get(fighter_id).map(|e| e.value().clone())
992    }
993
994    // -- Gorilla operations --------------------------------------------------
995
996    /// Register a gorilla with the Ring.
997    ///
998    /// Returns the newly-assigned [`GorillaId`]. The gorilla starts in
999    /// [`GorillaStatus::Caged`].
1000    #[instrument(skip(self, manifest), fields(gorilla_name = %manifest.name))]
1001    pub fn register_gorilla(&self, manifest: GorillaManifest) -> GorillaId {
1002        let id = GorillaId::new();
1003        let name = manifest.name.clone();
1004
1005        let entry = GorillaEntry {
1006            manifest,
1007            status: GorillaStatus::Caged,
1008            metrics: GorillaMetrics::default(),
1009            task_handle: None,
1010        };
1011
1012        self.gorillas.insert(id, Mutex::new(entry));
1013        info!(%id, name, "gorilla registered");
1014        id
1015    }
1016
1017    /// Unleash (start) a gorilla's background task.
1018    ///
1019    /// This uses the [`BackgroundExecutor`] to spawn the gorilla's autonomous
1020    /// loop, which will run the fighter loop on the gorilla's schedule.
1021    #[instrument(skip(self), fields(%gorilla_id))]
1022    pub async fn unleash_gorilla(&self, gorilla_id: &GorillaId) -> PunchResult<()> {
1023        let entry_ref = self
1024            .gorillas
1025            .get(gorilla_id)
1026            .ok_or_else(|| PunchError::Gorilla(format!("gorilla {} not found", gorilla_id)))?;
1027
1028        let mut entry = entry_ref.value().lock().await;
1029
1030        if entry.status == GorillaStatus::Unleashed || entry.status == GorillaStatus::Rampaging {
1031            return Err(PunchError::Gorilla(format!(
1032                "gorilla {} is already active",
1033                gorilla_id
1034            )));
1035        }
1036
1037        let gorilla_id_owned = *gorilla_id;
1038        let name = entry.manifest.name.clone();
1039        let manifest = entry.manifest.clone();
1040
1041        // Start the gorilla via the background executor.
1042        self.background.start_gorilla(
1043            gorilla_id_owned,
1044            manifest,
1045            self.config.default_model.clone(),
1046            Arc::clone(&self.memory),
1047            Arc::clone(&self.driver),
1048        )?;
1049
1050        entry.status = GorillaStatus::Unleashed;
1051        drop(entry);
1052        drop(entry_ref);
1053
1054        // Record gorilla metrics.
1055        self.metrics.counter_inc(metrics::GORILLA_RUNS_TOTAL);
1056        self.metrics.gauge_inc(metrics::ACTIVE_GORILLAS);
1057
1058        self.event_bus.publish(PunchEvent::GorillaUnleashed {
1059            gorilla_id: gorilla_id_owned,
1060            name,
1061        });
1062
1063        Ok(())
1064    }
1065
1066    /// Cage (stop) a gorilla's background task.
1067    #[instrument(skip(self), fields(%gorilla_id))]
1068    pub async fn cage_gorilla(&self, gorilla_id: &GorillaId) -> PunchResult<()> {
1069        let entry_ref = self
1070            .gorillas
1071            .get(gorilla_id)
1072            .ok_or_else(|| PunchError::Gorilla(format!("gorilla {} not found", gorilla_id)))?;
1073
1074        let mut entry = entry_ref.value().lock().await;
1075
1076        // Stop via background executor.
1077        self.background.stop_gorilla(gorilla_id);
1078
1079        // Also abort any legacy task handle.
1080        if let Some(handle) = entry.task_handle.take() {
1081            handle.abort();
1082        }
1083
1084        let name = entry.manifest.name.clone();
1085        entry.status = GorillaStatus::Caged;
1086        drop(entry);
1087        drop(entry_ref);
1088
1089        self.metrics.gauge_dec(metrics::ACTIVE_GORILLAS);
1090
1091        self.event_bus.publish(PunchEvent::GorillaPaused {
1092            gorilla_id: *gorilla_id,
1093            reason: "manually caged".to_string(),
1094        });
1095
1096        info!(name, "gorilla caged");
1097        Ok(())
1098    }
1099
1100    /// List all gorillas with their current status and metrics.
1101    pub async fn list_gorillas(
1102        &self,
1103    ) -> Vec<(GorillaId, GorillaManifest, GorillaStatus, GorillaMetrics)> {
1104        let mut result = Vec::new();
1105
1106        for entry in self.gorillas.iter() {
1107            let id = *entry.key();
1108            let inner = entry.value().lock().await;
1109            result.push((
1110                id,
1111                inner.manifest.clone(),
1112                inner.status,
1113                inner.metrics.clone(),
1114            ));
1115        }
1116
1117        result
1118    }
1119
1120    /// Get a gorilla's manifest by ID.
1121    pub async fn get_gorilla_manifest(&self, gorilla_id: &GorillaId) -> Option<GorillaManifest> {
1122        let entry_ref = self.gorillas.get(gorilla_id)?;
1123        let entry = entry_ref.value().lock().await;
1124        Some(entry.manifest.clone())
1125    }
1126
1127    /// Find a gorilla ID by name (case-insensitive).
1128    pub async fn find_gorilla_by_name(&self, name: &str) -> Option<GorillaId> {
1129        for entry in self.gorillas.iter() {
1130            let inner = entry.value().lock().await;
1131            if inner.manifest.name.eq_ignore_ascii_case(name) {
1132                return Some(*entry.key());
1133            }
1134        }
1135        None
1136    }
1137
1138    /// Run a single autonomous tick for a gorilla (for testing/debugging).
1139    ///
1140    /// This executes the gorilla's autonomous prompt once, without starting
1141    /// the background scheduler. Useful for verifying configuration.
1142    #[instrument(skip(self), fields(%gorilla_id))]
1143    pub async fn run_gorilla_tick(
1144        &self,
1145        gorilla_id: &GorillaId,
1146    ) -> PunchResult<punch_runtime::FighterLoopResult> {
1147        let entry_ref = self
1148            .gorillas
1149            .get(gorilla_id)
1150            .ok_or_else(|| PunchError::Gorilla(format!("gorilla {} not found", gorilla_id)))?;
1151
1152        let entry = entry_ref.value().lock().await;
1153        let manifest = entry.manifest.clone();
1154        drop(entry);
1155        drop(entry_ref);
1156
1157        crate::background::run_gorilla_tick(
1158            *gorilla_id,
1159            &manifest,
1160            &self.config.default_model,
1161            &self.memory,
1162            &self.driver,
1163        )
1164        .await
1165    }
1166
1167    /// Get the LLM driver (useful for CLI commands that need to run ticks directly).
1168    pub fn driver(&self) -> &Arc<dyn LlmDriver> {
1169        &self.driver
1170    }
1171
1172    // -- Inter-agent communication -------------------------------------------
1173
1174    /// Send a message from one fighter to another.
1175    ///
1176    /// The source fighter's message becomes the target fighter's input,
1177    /// enriched with source context so the target knows who is speaking.
1178    /// The target processes it through its own fighter loop (with its own creed)
1179    /// and the response is returned.
1180    #[instrument(skip(self, message), fields(%source_id, %target_id))]
1181    pub async fn fighter_to_fighter(
1182        &self,
1183        source_id: &FighterId,
1184        target_id: &FighterId,
1185        message: String,
1186    ) -> PunchResult<FighterLoopResult> {
1187        // Get source fighter name for context.
1188        let source_name = self
1189            .fighters
1190            .get(source_id)
1191            .map(|entry| entry.value().manifest.name.clone())
1192            .ok_or_else(|| {
1193                PunchError::Fighter(format!("source fighter {} not found", source_id))
1194            })?;
1195
1196        // Verify target exists.
1197        if self.fighters.get(target_id).is_none() {
1198            return Err(PunchError::Fighter(format!(
1199                "target fighter {} not found",
1200                target_id
1201            )));
1202        }
1203
1204        // Wrap the message with source context so the target knows who is speaking.
1205        let enriched_message = format!(
1206            "[Message from fighter '{}' (id: {})]\n\n{}",
1207            source_name, source_id, message
1208        );
1209
1210        // Send to target through normal message flow (uses target's creed).
1211        self.send_message(target_id, enriched_message).await
1212    }
1213
1214    /// Find a fighter by name (case-insensitive).
1215    ///
1216    /// Returns the fighter ID and manifest if found.
1217    pub fn find_fighter_by_name_sync(&self, name: &str) -> Option<(FighterId, FighterManifest)> {
1218        self.fighters.iter().find_map(|entry| {
1219            if entry.value().manifest.name.eq_ignore_ascii_case(name) {
1220                Some((*entry.key(), entry.value().manifest.clone()))
1221            } else {
1222                None
1223            }
1224        })
1225    }
1226
1227    /// Update relationship tracking in both fighters' creeds after inter-agent
1228    /// communication.
1229    ///
1230    /// Loads both creeds, adds or updates the peer relationship entry
1231    /// (incrementing interaction_count), and saves them back.
1232    pub async fn update_fighter_relationships(&self, fighter_a_name: &str, fighter_b_name: &str) {
1233        let memory = Arc::clone(&self.memory);
1234        let a_name = fighter_a_name.to_string();
1235        let b_name = fighter_b_name.to_string();
1236
1237        // Update in a spawned task to avoid blocking the caller.
1238        tokio::spawn(async move {
1239            // Update A's creed with relationship to B.
1240            if let Ok(Some(mut creed_a)) = memory.load_creed_by_name(&a_name).await {
1241                update_relationship(&mut creed_a, &b_name, None);
1242                if let Err(e) = memory.save_creed(&creed_a).await {
1243                    warn!(error = %e, fighter = %a_name, "failed to save creed relationship update");
1244                }
1245            }
1246
1247            // Update B's creed with relationship to A.
1248            if let Ok(Some(mut creed_b)) = memory.load_creed_by_name(&b_name).await {
1249                update_relationship(&mut creed_b, &a_name, None);
1250                if let Err(e) = memory.save_creed(&creed_b).await {
1251                    warn!(error = %e, fighter = %b_name, "failed to save creed relationship update");
1252                }
1253            }
1254        });
1255    }
1256
1257    // -- Troop / Swarm / Messaging accessors ---------------------------------
1258
1259    /// Get a reference to the troop manager.
1260    pub fn troop_manager(&self) -> &TroopManager {
1261        &self.troop_manager
1262    }
1263
1264    /// Get a reference to the swarm coordinator.
1265    pub fn swarm_coordinator(&self) -> &SwarmCoordinator {
1266        &self.swarm_coordinator
1267    }
1268
1269    /// Get a reference to the message router.
1270    pub fn message_router(&self) -> &MessageRouter {
1271        &self.message_router
1272    }
1273
1274    // -- Troop operations ----------------------------------------------------
1275
1276    /// Form a new troop with a leader and initial members.
1277    #[instrument(skip(self, members), fields(troop_name = %name))]
1278    pub fn form_troop(
1279        &self,
1280        name: String,
1281        leader: FighterId,
1282        members: Vec<FighterId>,
1283        strategy: CoordinationStrategy,
1284    ) -> PunchResult<TroopId> {
1285        // Verify the leader exists.
1286        if self.fighters.get(&leader).is_none() {
1287            return Err(PunchError::Troop(format!(
1288                "leader fighter {} not found",
1289                leader
1290            )));
1291        }
1292
1293        // Verify all members exist.
1294        for member in &members {
1295            if self.fighters.get(member).is_none() {
1296                return Err(PunchError::Troop(format!(
1297                    "member fighter {} not found",
1298                    member
1299                )));
1300            }
1301        }
1302
1303        let member_count = members.len() + 1; // +1 for leader if not in list
1304        let troop_id = self
1305            .troop_manager
1306            .form_troop(name.clone(), leader, members, strategy);
1307
1308        self.event_bus.publish(PunchEvent::TroopFormed {
1309            troop_id,
1310            name,
1311            member_count,
1312        });
1313
1314        Ok(troop_id)
1315    }
1316
1317    /// Disband (dissolve) a troop.
1318    #[instrument(skip(self), fields(%troop_id))]
1319    pub fn disband_troop(&self, troop_id: &TroopId) -> PunchResult<()> {
1320        let name = self.troop_manager.disband_troop(troop_id)?;
1321
1322        self.event_bus.publish(PunchEvent::TroopDisbanded {
1323            troop_id: *troop_id,
1324            name,
1325        });
1326
1327        Ok(())
1328    }
1329
1330    /// Assign a task to a troop, returning the fighters that should handle it.
1331    pub fn assign_troop_task(
1332        &self,
1333        troop_id: &TroopId,
1334        task_description: &str,
1335    ) -> PunchResult<Vec<FighterId>> {
1336        self.troop_manager.assign_task(troop_id, task_description)
1337    }
1338
1339    /// Assign a task to a troop asynchronously, returning full results
1340    /// including responses from fighters.
1341    pub async fn assign_troop_task_async(
1342        &self,
1343        troop_id: &TroopId,
1344        task: &str,
1345    ) -> PunchResult<crate::troop::TaskAssignmentResult> {
1346        self.troop_manager.assign_task_async(troop_id, task).await
1347    }
1348
1349    /// Get the current status of a troop.
1350    pub fn get_troop_status(&self, troop_id: &TroopId) -> Option<Troop> {
1351        self.troop_manager.get_troop(troop_id)
1352    }
1353
1354    /// List all troops.
1355    pub fn list_troops(&self) -> Vec<Troop> {
1356        self.troop_manager.list_troops()
1357    }
1358
1359    /// Recruit a fighter into a troop.
1360    pub fn recruit_to_troop(&self, troop_id: &TroopId, fighter_id: FighterId) -> PunchResult<()> {
1361        // Verify the fighter exists.
1362        if self.fighters.get(&fighter_id).is_none() {
1363            return Err(PunchError::Troop(format!(
1364                "fighter {} not found",
1365                fighter_id
1366            )));
1367        }
1368        self.troop_manager.recruit(troop_id, fighter_id)
1369    }
1370
1371    /// Dismiss a fighter from a troop.
1372    pub fn dismiss_from_troop(
1373        &self,
1374        troop_id: &TroopId,
1375        fighter_id: &FighterId,
1376    ) -> PunchResult<()> {
1377        self.troop_manager.dismiss(troop_id, fighter_id)
1378    }
1379
1380    // -- Troop-aware fighter lifecycle ---------------------------------------
1381
1382    /// Kill a fighter, warning if they're in a troop.
1383    ///
1384    /// Unlike [`kill_fighter`], this checks troop membership and dismisses
1385    /// the fighter from all troops before killing them.
1386    #[instrument(skip(self), fields(%fighter_id))]
1387    pub fn kill_fighter_safe(&self, fighter_id: &FighterId) {
1388        // Dismiss from any troops first.
1389        let troop_ids = self.troop_manager.get_fighter_troops(fighter_id);
1390        for troop_id in troop_ids {
1391            if let Err(e) = self.troop_manager.dismiss(&troop_id, fighter_id) {
1392                warn!(
1393                    %troop_id,
1394                    %fighter_id,
1395                    error = %e,
1396                    "failed to dismiss fighter from troop before kill"
1397                );
1398            }
1399        }
1400        self.kill_fighter(fighter_id);
1401    }
1402
1403    // -- Workflow operations -------------------------------------------------
1404
1405    /// Register a workflow with the engine.
1406    pub fn register_workflow(&self, workflow: Workflow) -> WorkflowId {
1407        self.workflow_engine.register_workflow(workflow)
1408    }
1409
1410    /// Execute a workflow by ID with the given input.
1411    pub async fn execute_workflow(
1412        &self,
1413        workflow_id: &WorkflowId,
1414        input: String,
1415    ) -> PunchResult<WorkflowRunId> {
1416        self.workflow_engine
1417            .execute_workflow(
1418                workflow_id,
1419                input,
1420                Arc::clone(&self.memory),
1421                Arc::clone(&self.driver),
1422                &self.config.default_model,
1423            )
1424            .await
1425    }
1426
1427    // -- Shutdown ------------------------------------------------------------
1428
1429    /// Gracefully shut down the Ring, stopping all gorillas and background tasks.
1430    pub fn shutdown(&self) {
1431        info!("Ring shutdown initiated");
1432
1433        // Signal shutdown to all background tasks.
1434        let _ = self.shutdown_tx.send(true);
1435
1436        // Stop all gorilla tasks via the background executor.
1437        self.background.shutdown_all();
1438
1439        info!("Ring shutdown complete");
1440    }
1441}
1442
1443// ---------------------------------------------------------------------------
1444// Relationship tracking helper
1445// ---------------------------------------------------------------------------
1446
1447/// Truncate a string to a maximum length, respecting UTF-8 char boundaries.
1448fn truncate_preview(s: &str, max_len: usize) -> String {
1449    if s.len() <= max_len {
1450        return s.to_string();
1451    }
1452    // Find the last char boundary at or before max_len - 3 (room for "...")
1453    let end = s
1454        .char_indices()
1455        .take_while(|(i, _)| *i <= max_len.saturating_sub(3))
1456        .last()
1457        .map(|(i, c)| i + c.len_utf8())
1458        .unwrap_or(0);
1459    format!("{}...", &s[..end])
1460}
1461
1462/// Update or insert a peer relationship in a creed.
1463fn update_relationship(creed: &mut punch_types::Creed, peer_name: &str, trust_nudge: Option<f64>) {
1464    if let Some(rel) = creed
1465        .relationships
1466        .iter_mut()
1467        .find(|r| r.entity == peer_name && r.entity_type == "fighter")
1468    {
1469        rel.interaction_count += 1;
1470        if let Some(nudge) = trust_nudge {
1471            rel.trust = (rel.trust * 0.9 + nudge * 0.1).clamp(0.0, 1.0);
1472        }
1473        rel.notes = format!(
1474            "Last interaction: {}",
1475            chrono::Utc::now().format("%Y-%m-%d %H:%M UTC")
1476        );
1477    } else {
1478        creed.relationships.push(punch_types::Relationship {
1479            entity: peer_name.to_string(),
1480            entity_type: "fighter".to_string(),
1481            nature: "peer".to_string(),
1482            trust: trust_nudge.unwrap_or(0.5),
1483            interaction_count: 1,
1484            notes: format!(
1485                "First interaction: {}",
1486                chrono::Utc::now().format("%Y-%m-%d %H:%M UTC")
1487            ),
1488        });
1489    }
1490    creed.updated_at = chrono::Utc::now();
1491    creed.version += 1;
1492}
1493
1494// ---------------------------------------------------------------------------
1495// AgentCoordinator implementation
1496// ---------------------------------------------------------------------------
1497
1498#[async_trait]
1499impl AgentCoordinator for Ring {
1500    async fn spawn_fighter(&self, manifest: FighterManifest) -> PunchResult<FighterId> {
1501        Ok(Ring::spawn_fighter(self, manifest).await)
1502    }
1503
1504    async fn send_message_to_agent(
1505        &self,
1506        target: &FighterId,
1507        message: String,
1508    ) -> PunchResult<AgentMessageResult> {
1509        let result = self.send_message(target, message).await?;
1510        Ok(AgentMessageResult {
1511            response: result.response,
1512            tokens_used: result.usage.total(),
1513        })
1514    }
1515
1516    async fn find_fighter_by_name(&self, name: &str) -> PunchResult<Option<FighterId>> {
1517        let found = self.fighters.iter().find_map(|entry| {
1518            if entry.value().manifest.name.eq_ignore_ascii_case(name) {
1519                Some(*entry.key())
1520            } else {
1521                None
1522            }
1523        });
1524        Ok(found)
1525    }
1526
1527    async fn list_fighters(&self) -> PunchResult<Vec<AgentInfo>> {
1528        let fighters = self
1529            .fighters
1530            .iter()
1531            .map(|entry| AgentInfo {
1532                id: *entry.key(),
1533                name: entry.value().manifest.name.clone(),
1534                status: entry.value().status,
1535            })
1536            .collect();
1537        Ok(fighters)
1538    }
1539}
1540
1541// ---------------------------------------------------------------------------
1542// Compile-time Send + Sync assertion
1543// ---------------------------------------------------------------------------
1544
1545/// Compile-time assertion that `Ring` is `Send + Sync`.
1546const _: () = {
1547    fn _assert_send<T: Send>() {}
1548    fn _assert_sync<T: Sync>() {}
1549    fn _assert() {
1550        _assert_send::<Ring>();
1551        _assert_sync::<Ring>();
1552    }
1553};