Skip to main content

punch_kernel/
ring.rs

1//! **The Ring** — the central kernel and coordinator for the Punch system.
2//!
3//! The [`Ring`] owns every fighter and gorilla, wires them to the memory
4//! substrate, the LLM driver, the event bus, the scheduler, the background
5//! executor, the workflow engine, and the metering engine. All mutations
6//! go through the Ring so that invariants (quotas, capabilities, lifecycle
7//! events) are enforced in a single place.
8
9use std::sync::Arc;
10
11use async_trait::async_trait;
12use chrono;
13use dashmap::DashMap;
14use serde::{Deserialize, Serialize};
15use tokio::sync::Mutex;
16use tokio::sync::watch;
17use tokio::task::JoinHandle;
18use tracing::{info, instrument, warn};
19
20use punch_memory::{BoutId, MemorySubstrate};
21use punch_runtime::{
22    FighterLoopParams, FighterLoopResult, LlmDriver, run_fighter_loop, tools_for_capabilities,
23};
24use punch_types::{
25    AgentCoordinator, AgentInfo, AgentMessageResult, CoordinationStrategy, FighterId,
26    FighterManifest, FighterStatus, GorillaId, GorillaManifest, GorillaMetrics, GorillaStatus,
27    PunchConfig, PunchError, PunchEvent, PunchResult, TenantId, TenantStatus, Troop,
28    TroopId,
29};
30
31use crate::agent_messaging::MessageRouter;
32use crate::background::BackgroundExecutor;
33use crate::budget::BudgetEnforcer;
34use crate::event_bus::EventBus;
35use crate::metering::MeteringEngine;
36use crate::metrics::{self, MetricsRegistry};
37use crate::scheduler::{QuotaConfig, Scheduler};
38use crate::swarm::SwarmCoordinator;
39use crate::triggers::{Trigger, TriggerEngine, TriggerId, TriggerSummary};
40use crate::troop::TroopManager;
41use crate::tenant_registry::TenantRegistry;
42use crate::workflow::{Workflow, WorkflowEngine, WorkflowId, WorkflowRunId};
43
44// ---------------------------------------------------------------------------
45// Entry types
46// ---------------------------------------------------------------------------
47
48/// Everything the Ring tracks about a single fighter.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct FighterEntry {
51    /// The fighter's manifest (identity, model, capabilities, etc.).
52    pub manifest: FighterManifest,
53    /// Current operational status.
54    pub status: FighterStatus,
55    /// The active bout (conversation session) ID, if any.
56    pub current_bout: Option<BoutId>,
57}
58
59/// Everything the Ring tracks about a single gorilla.
60///
61/// The `task_handle` is behind a `Mutex` because [`JoinHandle`] is not `Clone`
62/// and we need interior mutability when starting / stopping background tasks.
63pub struct GorillaEntry {
64    /// The gorilla's manifest.
65    pub manifest: GorillaManifest,
66    /// Current operational status.
67    pub status: GorillaStatus,
68    /// Runtime metrics.
69    pub metrics: GorillaMetrics,
70    /// Handle to the background task (if running).
71    task_handle: Option<JoinHandle<()>>,
72}
73
74// Manual Debug impl because JoinHandle doesn't implement Debug in a useful way.
75impl std::fmt::Debug for GorillaEntry {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        f.debug_struct("GorillaEntry")
78            .field("manifest", &self.manifest)
79            .field("status", &self.status)
80            .field("metrics", &self.metrics)
81            .field("has_task", &self.task_handle.is_some())
82            .finish()
83    }
84}
85
86// ---------------------------------------------------------------------------
87// The Ring
88// ---------------------------------------------------------------------------
89
90/// The Ring — the central coordinator for the Punch Agent Combat System.
91///
92/// Thread-safe by design: all collections use [`DashMap`] and all shared state
93/// is behind `Arc`. Wrap the `Ring` itself in an `Arc` to share across tasks.
94pub struct Ring {
95    /// All active fighters, keyed by their unique ID.
96    fighters: DashMap<FighterId, FighterEntry>,
97    /// All registered gorillas, keyed by their unique ID.
98    gorillas: DashMap<GorillaId, Mutex<GorillaEntry>>,
99    /// Shared memory substrate (SQLite persistence).
100    memory: Arc<MemorySubstrate>,
101    /// The LLM driver used for completions.
102    driver: Arc<dyn LlmDriver>,
103    /// System-wide event bus.
104    event_bus: EventBus,
105    /// Per-fighter quota scheduler.
106    scheduler: Scheduler,
107    /// Top-level Punch configuration.
108    config: PunchConfig,
109    /// Background executor for autonomous gorilla tasks.
110    background: BackgroundExecutor,
111    /// Multi-step workflow engine.
112    workflow_engine: WorkflowEngine,
113    /// Cost tracking and metering engine.
114    metering: MeteringEngine,
115    /// Budget enforcement layer (opt-in spending limits).
116    budget_enforcer: Arc<BudgetEnforcer>,
117    /// Event-driven trigger engine.
118    trigger_engine: TriggerEngine,
119    /// Troop manager for multi-agent coordination.
120    troop_manager: TroopManager,
121    /// Swarm coordinator for emergent behavior tasks.
122    swarm_coordinator: SwarmCoordinator,
123    /// Inter-agent message router.
124    message_router: MessageRouter,
125    /// Production observability metrics.
126    metrics: Arc<MetricsRegistry>,
127    /// Multi-tenant registry.
128    tenant_registry: TenantRegistry,
129    /// Shutdown signal sender.
130    shutdown_tx: watch::Sender<bool>,
131    /// Shutdown signal receiver.
132    _shutdown_rx: watch::Receiver<bool>,
133}
134
135impl Ring {
136    /// Create a new Ring.
137    ///
138    /// The caller provides the already-initialised memory substrate, LLM
139    /// driver, and configuration. The Ring will create its own event bus and
140    /// scheduler internally.
141    pub fn new(
142        config: PunchConfig,
143        memory: Arc<MemorySubstrate>,
144        driver: Arc<dyn LlmDriver>,
145    ) -> Self {
146        let (shutdown_tx, shutdown_rx) = watch::channel(false);
147        let background =
148            BackgroundExecutor::with_shutdown(shutdown_tx.clone(), shutdown_rx.clone());
149        let metering = MeteringEngine::new(Arc::clone(&memory));
150        let metering_arc = Arc::new(MeteringEngine::new(Arc::clone(&memory)));
151        let budget_enforcer = Arc::new(BudgetEnforcer::new(Arc::clone(&metering_arc)));
152        let metrics_registry = Arc::new(MetricsRegistry::new());
153        metrics::register_default_metrics(&metrics_registry);
154
155        Self {
156            fighters: DashMap::new(),
157            gorillas: DashMap::new(),
158            memory,
159            driver,
160            event_bus: EventBus::new(),
161            scheduler: Scheduler::new(QuotaConfig::default()),
162            config,
163            background,
164            workflow_engine: WorkflowEngine::new(),
165            metering,
166            budget_enforcer,
167            trigger_engine: TriggerEngine::new(),
168            troop_manager: TroopManager::new(),
169            swarm_coordinator: SwarmCoordinator::new(),
170            message_router: MessageRouter::new(),
171            metrics: metrics_registry,
172            tenant_registry: TenantRegistry::new(),
173            shutdown_tx,
174            _shutdown_rx: shutdown_rx,
175        }
176    }
177
178    /// Create a new Ring with a custom quota configuration.
179    pub fn with_quota_config(
180        config: PunchConfig,
181        memory: Arc<MemorySubstrate>,
182        driver: Arc<dyn LlmDriver>,
183        quota_config: QuotaConfig,
184    ) -> Self {
185        let (shutdown_tx, shutdown_rx) = watch::channel(false);
186        let background =
187            BackgroundExecutor::with_shutdown(shutdown_tx.clone(), shutdown_rx.clone());
188        let metering = MeteringEngine::new(Arc::clone(&memory));
189        let metering_arc = Arc::new(MeteringEngine::new(Arc::clone(&memory)));
190        let budget_enforcer = Arc::new(BudgetEnforcer::new(Arc::clone(&metering_arc)));
191        let metrics_registry = Arc::new(MetricsRegistry::new());
192        metrics::register_default_metrics(&metrics_registry);
193
194        Self {
195            fighters: DashMap::new(),
196            gorillas: DashMap::new(),
197            memory,
198            driver,
199            event_bus: EventBus::new(),
200            scheduler: Scheduler::new(quota_config),
201            config,
202            background,
203            workflow_engine: WorkflowEngine::new(),
204            metering,
205            budget_enforcer,
206            trigger_engine: TriggerEngine::new(),
207            troop_manager: TroopManager::new(),
208            swarm_coordinator: SwarmCoordinator::new(),
209            message_router: MessageRouter::new(),
210            metrics: metrics_registry,
211            tenant_registry: TenantRegistry::new(),
212            shutdown_tx,
213            _shutdown_rx: shutdown_rx,
214        }
215    }
216
217    // -- Accessors -----------------------------------------------------------
218
219    /// Get a reference to the event bus.
220    pub fn event_bus(&self) -> &EventBus {
221        &self.event_bus
222    }
223
224    /// Get a reference to the scheduler.
225    pub fn scheduler(&self) -> &Scheduler {
226        &self.scheduler
227    }
228
229    /// Get a reference to the memory substrate.
230    pub fn memory(&self) -> &Arc<MemorySubstrate> {
231        &self.memory
232    }
233
234    /// Get a reference to the configuration.
235    pub fn config(&self) -> &PunchConfig {
236        &self.config
237    }
238
239    /// Get a reference to the background executor.
240    pub fn background(&self) -> &BackgroundExecutor {
241        &self.background
242    }
243
244    /// Get a reference to the workflow engine.
245    pub fn workflow_engine(&self) -> &WorkflowEngine {
246        &self.workflow_engine
247    }
248
249    /// Get a reference to the metering engine.
250    pub fn metering(&self) -> &MeteringEngine {
251        &self.metering
252    }
253
254    /// Get a reference to the budget enforcer.
255    pub fn budget_enforcer(&self) -> &Arc<BudgetEnforcer> {
256        &self.budget_enforcer
257    }
258
259    /// Get a reference to the trigger engine.
260    pub fn trigger_engine(&self) -> &TriggerEngine {
261        &self.trigger_engine
262    }
263
264    /// Get a reference to the metrics registry.
265    pub fn metrics(&self) -> &Arc<MetricsRegistry> {
266        &self.metrics
267    }
268
269    /// Get a reference to the tenant registry.
270    pub fn tenant_registry(&self) -> &TenantRegistry {
271        &self.tenant_registry
272    }
273
274    // -- Tenant-scoped operations --------------------------------------------
275
276    /// Spawn a fighter scoped to a tenant, enforcing quota limits.
277    ///
278    /// Returns an error if the tenant is suspended or the fighter quota is
279    /// exceeded.
280    #[instrument(skip(self, manifest), fields(fighter_name = %manifest.name))]
281    pub async fn spawn_fighter_for_tenant(
282        &self,
283        tenant_id: &TenantId,
284        mut manifest: FighterManifest,
285    ) -> PunchResult<FighterId> {
286        // Verify tenant exists and is active.
287        let tenant = self
288            .tenant_registry
289            .get_tenant(tenant_id)
290            .ok_or_else(|| PunchError::Tenant(format!("tenant {} not found", tenant_id)))?;
291
292        if tenant.status == TenantStatus::Suspended {
293            return Err(PunchError::Tenant(format!(
294                "tenant {} is suspended",
295                tenant_id
296            )));
297        }
298
299        // Check fighter quota.
300        let current_count = self
301            .fighters
302            .iter()
303            .filter(|e| e.value().manifest.tenant_id.as_ref() == Some(tenant_id))
304            .count();
305
306        if current_count >= tenant.quota.max_fighters {
307            return Err(PunchError::QuotaExceeded(format!(
308                "tenant {} has reached max fighters limit ({})",
309                tenant_id, tenant.quota.max_fighters
310            )));
311        }
312
313        // Stamp the manifest with the tenant ID.
314        manifest.tenant_id = Some(*tenant_id);
315        Ok(self.spawn_fighter(manifest).await)
316    }
317
318    /// List fighters that belong to a specific tenant.
319    pub fn list_fighters_for_tenant(
320        &self,
321        tenant_id: &TenantId,
322    ) -> Vec<(FighterId, FighterManifest, FighterStatus)> {
323        self.fighters
324            .iter()
325            .filter(|entry| entry.value().manifest.tenant_id.as_ref() == Some(tenant_id))
326            .map(|entry| {
327                let id = *entry.key();
328                let e = entry.value();
329                (id, e.manifest.clone(), e.status)
330            })
331            .collect()
332    }
333
334    /// Kill a fighter, validating that the caller tenant owns it.
335    ///
336    /// Returns an error if the fighter doesn't belong to the given tenant.
337    #[instrument(skip(self), fields(%fighter_id, %tenant_id))]
338    pub fn kill_fighter_for_tenant(
339        &self,
340        fighter_id: &FighterId,
341        tenant_id: &TenantId,
342    ) -> PunchResult<()> {
343        let entry = self.fighters.get(fighter_id).ok_or_else(|| {
344            PunchError::Fighter(format!("fighter {} not found", fighter_id))
345        })?;
346
347        if entry.manifest.tenant_id.as_ref() != Some(tenant_id) {
348            return Err(PunchError::Auth(format!(
349                "fighter {} does not belong to tenant {}",
350                fighter_id, tenant_id
351            )));
352        }
353
354        drop(entry);
355        self.kill_fighter(fighter_id);
356        Ok(())
357    }
358
359    /// Check whether a tenant's tool access is allowed for the given tool name.
360    ///
361    /// Returns `true` if the tenant has no tool restrictions (empty list) or
362    /// the tool is in the allowed list.
363    pub fn check_tenant_tool_access(&self, tenant_id: &TenantId, tool_name: &str) -> bool {
364        match self.tenant_registry.get_tenant(tenant_id) {
365            Some(tenant) => {
366                tenant.quota.max_tools.is_empty()
367                    || tenant.quota.max_tools.iter().any(|t| t == tool_name)
368            }
369            None => false,
370        }
371    }
372
373    // -- Trigger operations --------------------------------------------------
374
375    /// Register a trigger with the engine.
376    pub fn register_trigger(&self, trigger: Trigger) -> TriggerId {
377        self.trigger_engine.register_trigger(trigger)
378    }
379
380    /// Remove a trigger by ID.
381    pub fn remove_trigger(&self, id: &TriggerId) {
382        self.trigger_engine.remove_trigger(id);
383    }
384
385    /// List all triggers with summary information.
386    pub fn list_triggers(&self) -> Vec<(TriggerId, TriggerSummary)> {
387        self.trigger_engine.list_triggers()
388    }
389
390    // -- Fighter operations --------------------------------------------------
391
392    /// Spawn a new fighter from a manifest.
393    ///
394    /// Returns the newly-assigned [`FighterId`]. The fighter starts in
395    /// [`FighterStatus::Idle`] and is persisted to the memory substrate.
396    #[instrument(skip(self, manifest), fields(fighter_name = %manifest.name))]
397    pub async fn spawn_fighter(&self, manifest: FighterManifest) -> FighterId {
398        let id = FighterId::new();
399        let name = manifest.name.clone();
400
401        // Persist to the database first so FK constraints work.
402        if let Err(e) = self
403            .memory
404            .save_fighter(&id, &manifest, FighterStatus::Idle)
405            .await
406        {
407            warn!(error = %e, "failed to persist fighter to database (continuing in-memory only)");
408        }
409
410        let entry = FighterEntry {
411            manifest,
412            status: FighterStatus::Idle,
413            current_bout: None,
414        };
415
416        self.fighters.insert(id, entry);
417
418        // Record metrics.
419        self.metrics.counter_inc(metrics::FIGHTER_SPAWNS_TOTAL);
420        self.metrics
421            .gauge_set(metrics::ACTIVE_FIGHTERS, self.fighters.len() as i64);
422
423        self.event_bus.publish(PunchEvent::FighterSpawned {
424            fighter_id: id,
425            name: name.clone(),
426        });
427
428        info!(%id, name, "fighter spawned");
429
430        // --- Creed binding ---
431        // If a creed exists for this fighter name, bind it to the new instance.
432        // This ensures the creed persists across kill/respawn cycles.
433        {
434            let memory = Arc::clone(&self.memory);
435            let creed_name = name.clone();
436            let fid = id;
437            tokio::spawn(async move {
438                if let Ok(Some(_)) = memory.load_creed_by_name(&creed_name).await {
439                    if let Err(e) = memory.bind_creed_to_fighter(&creed_name, &fid).await {
440                        warn!(error = %e, fighter = %creed_name, "failed to bind creed on spawn");
441                    } else {
442                        info!(fighter = %creed_name, id = %fid, "creed bound to fighter on spawn");
443                    }
444                }
445            });
446        }
447
448        id
449    }
450
451    /// Create a default creed for a fighter if none exists.
452    /// The default creed includes self-awareness from the manifest.
453    pub async fn ensure_creed(&self, fighter_name: &str, manifest: &FighterManifest) {
454        match self.memory.load_creed_by_name(fighter_name).await {
455            Ok(Some(_)) => {
456                // Creed already exists.
457            }
458            Ok(None) => {
459                // Create a default creed with self-awareness.
460                let creed = punch_types::Creed::new(fighter_name)
461                    .with_self_awareness(manifest);
462                if let Err(e) = self.memory.save_creed(&creed).await {
463                    warn!(error = %e, "failed to create default creed");
464                } else {
465                    info!(fighter = %fighter_name, "default creed created with self-awareness");
466                }
467            }
468            Err(e) => {
469                warn!(error = %e, "failed to check for existing creed");
470            }
471        }
472    }
473
474    /// Send a user message to a fighter and run the agent loop (without coordinator).
475    ///
476    /// Convenience wrapper around [`send_message_with_coordinator`] that passes
477    /// `None`, meaning the fighter will not have access to inter-agent tools.
478    #[instrument(skip(self, message), fields(%fighter_id))]
479    pub async fn send_message(
480        &self,
481        fighter_id: &FighterId,
482        message: String,
483    ) -> PunchResult<FighterLoopResult> {
484        self.send_message_with_coordinator(fighter_id, message, None)
485            .await
486    }
487
488    /// Send a user message to a fighter and run the agent loop.
489    ///
490    /// This creates (or reuses) a bout for the fighter, checks quotas, then
491    /// delegates to [`run_fighter_loop`]. Usage is recorded through the
492    /// metering engine after a successful completion.
493    ///
494    /// If `coordinator` is provided, the fighter can use inter-agent tools
495    /// (`agent_spawn`, `agent_message`, `agent_list`).
496    #[instrument(skip(self, message, coordinator), fields(%fighter_id))]
497    pub async fn send_message_with_coordinator(
498        &self,
499        fighter_id: &FighterId,
500        message: String,
501        coordinator: Option<Arc<dyn AgentCoordinator>>,
502    ) -> PunchResult<FighterLoopResult> {
503        // Look up the fighter.
504        let mut entry = self
505            .fighters
506            .get_mut(fighter_id)
507            .ok_or_else(|| PunchError::Fighter(format!("fighter {} not found", fighter_id)))?;
508
509        // Check quota.
510        if !self.scheduler.check_quota(fighter_id) {
511            entry.status = FighterStatus::Resting;
512            return Err(PunchError::RateLimited {
513                provider: "scheduler".to_string(),
514                retry_after_ms: 60_000,
515            });
516        }
517
518        // Check budget enforcement (opt-in — only blocks if limits are configured).
519        match self.budget_enforcer.check_budget(fighter_id).await {
520            Ok(crate::budget::BudgetVerdict::Blocked {
521                reason,
522                retry_after_secs,
523            }) => {
524                entry.status = FighterStatus::Resting;
525                return Err(PunchError::RateLimited {
526                    provider: format!("budget: {}", reason),
527                    retry_after_ms: retry_after_secs * 1000,
528                });
529            }
530            Ok(crate::budget::BudgetVerdict::Warning { message, .. }) => {
531                info!(warning = %message, "budget warning for fighter");
532            }
533            Ok(crate::budget::BudgetVerdict::Allowed) => {}
534            Err(e) => {
535                warn!(error = %e, "budget check failed, allowing request");
536            }
537        }
538
539        // Ensure the fighter has an active bout.
540        let bout_id = match entry.current_bout {
541            Some(id) => id,
542            None => {
543                // Create the bout in the database.
544                let id = self
545                    .memory
546                    .create_bout(fighter_id)
547                    .await
548                    .map_err(|e| PunchError::Bout(format!("failed to create bout: {e}")))?;
549                entry.current_bout = Some(id);
550
551                self.event_bus.publish(PunchEvent::BoutStarted {
552                    bout_id: id.0,
553                    fighter_id: *fighter_id,
554                });
555
556                id
557            }
558        };
559
560        // Mark as fighting.
561        entry.status = FighterStatus::Fighting;
562        let manifest = entry.manifest.clone();
563        let available_tools = tools_for_capabilities(&manifest.capabilities);
564        drop(entry); // Release the DashMap guard before the async call.
565
566        // Run the fighter loop.
567        let params = FighterLoopParams {
568            manifest: manifest.clone(),
569            user_message: message,
570            bout_id,
571            fighter_id: *fighter_id,
572            memory: Arc::clone(&self.memory),
573            driver: Arc::clone(&self.driver),
574            available_tools,
575            max_iterations: None,
576            context_window: None,
577            tool_timeout_secs: None,
578            coordinator,
579            approval_engine: None,
580            sandbox: None,
581        };
582
583        // Record message metric.
584        self.metrics.counter_inc(metrics::MESSAGES_TOTAL);
585
586        let result = run_fighter_loop(params).await;
587
588        // Update state based on the outcome.
589        if let Some(mut entry) = self.fighters.get_mut(fighter_id) {
590            match &result {
591                Ok(loop_result) => {
592                    entry.status = FighterStatus::Idle;
593                    self.scheduler
594                        .record_usage(fighter_id, loop_result.usage.total());
595
596                    // Record token usage metrics.
597                    self.metrics.counter_add(
598                        metrics::TOKENS_INPUT_TOTAL,
599                        loop_result.usage.input_tokens,
600                    );
601                    self.metrics.counter_add(
602                        metrics::TOKENS_OUTPUT_TOTAL,
603                        loop_result.usage.output_tokens,
604                    );
605
606                    // Record usage through the metering engine.
607                    if let Err(e) = self
608                        .metering
609                        .record_usage(
610                            fighter_id,
611                            &manifest.model.model,
612                            loop_result.usage.input_tokens,
613                            loop_result.usage.output_tokens,
614                        )
615                        .await
616                    {
617                        warn!(error = %e, "failed to record metering usage");
618                    }
619                }
620                Err(_) => {
621                    entry.status = FighterStatus::KnockedOut;
622                    self.metrics.counter_inc(metrics::ERRORS_TOTAL);
623                }
624            }
625        }
626
627        result
628    }
629
630    /// Kill (remove) a fighter.
631    #[instrument(skip(self), fields(%fighter_id))]
632    pub fn kill_fighter(&self, fighter_id: &FighterId) {
633        if let Some((_, entry)) = self.fighters.remove(fighter_id) {
634            self.scheduler.remove_fighter(fighter_id);
635            self.metrics
636                .gauge_set(metrics::ACTIVE_FIGHTERS, self.fighters.len() as i64);
637            info!(name = %entry.manifest.name, "fighter killed");
638        } else {
639            warn!("attempted to kill unknown fighter");
640        }
641    }
642
643    /// List all fighters with their current status.
644    pub fn list_fighters(&self) -> Vec<(FighterId, FighterManifest, FighterStatus)> {
645        self.fighters
646            .iter()
647            .map(|entry| {
648                let id = *entry.key();
649                let e = entry.value();
650                (id, e.manifest.clone(), e.status)
651            })
652            .collect()
653    }
654
655    /// Get a snapshot of a single fighter's entry.
656    pub fn get_fighter(&self, fighter_id: &FighterId) -> Option<FighterEntry> {
657        self.fighters.get(fighter_id).map(|e| e.value().clone())
658    }
659
660    // -- Gorilla operations --------------------------------------------------
661
662    /// Register a gorilla with the Ring.
663    ///
664    /// Returns the newly-assigned [`GorillaId`]. The gorilla starts in
665    /// [`GorillaStatus::Caged`].
666    #[instrument(skip(self, manifest), fields(gorilla_name = %manifest.name))]
667    pub fn register_gorilla(&self, manifest: GorillaManifest) -> GorillaId {
668        let id = GorillaId::new();
669        let name = manifest.name.clone();
670
671        let entry = GorillaEntry {
672            manifest,
673            status: GorillaStatus::Caged,
674            metrics: GorillaMetrics::default(),
675            task_handle: None,
676        };
677
678        self.gorillas.insert(id, Mutex::new(entry));
679        info!(%id, name, "gorilla registered");
680        id
681    }
682
683    /// Unleash (start) a gorilla's background task.
684    ///
685    /// This uses the [`BackgroundExecutor`] to spawn the gorilla's autonomous
686    /// loop, which will run the fighter loop on the gorilla's schedule.
687    #[instrument(skip(self), fields(%gorilla_id))]
688    pub async fn unleash_gorilla(&self, gorilla_id: &GorillaId) -> PunchResult<()> {
689        let entry_ref = self
690            .gorillas
691            .get(gorilla_id)
692            .ok_or_else(|| PunchError::Gorilla(format!("gorilla {} not found", gorilla_id)))?;
693
694        let mut entry = entry_ref.value().lock().await;
695
696        if entry.status == GorillaStatus::Unleashed || entry.status == GorillaStatus::Rampaging {
697            return Err(PunchError::Gorilla(format!(
698                "gorilla {} is already active",
699                gorilla_id
700            )));
701        }
702
703        let gorilla_id_owned = *gorilla_id;
704        let name = entry.manifest.name.clone();
705        let manifest = entry.manifest.clone();
706
707        // Start the gorilla via the background executor.
708        self.background.start_gorilla(
709            gorilla_id_owned,
710            manifest,
711            self.config.default_model.clone(),
712            Arc::clone(&self.memory),
713            Arc::clone(&self.driver),
714        )?;
715
716        entry.status = GorillaStatus::Unleashed;
717        drop(entry);
718        drop(entry_ref);
719
720        // Record gorilla metrics.
721        self.metrics.counter_inc(metrics::GORILLA_RUNS_TOTAL);
722        self.metrics.gauge_inc(metrics::ACTIVE_GORILLAS);
723
724        self.event_bus.publish(PunchEvent::GorillaUnleashed {
725            gorilla_id: gorilla_id_owned,
726            name,
727        });
728
729        Ok(())
730    }
731
732    /// Cage (stop) a gorilla's background task.
733    #[instrument(skip(self), fields(%gorilla_id))]
734    pub async fn cage_gorilla(&self, gorilla_id: &GorillaId) -> PunchResult<()> {
735        let entry_ref = self
736            .gorillas
737            .get(gorilla_id)
738            .ok_or_else(|| PunchError::Gorilla(format!("gorilla {} not found", gorilla_id)))?;
739
740        let mut entry = entry_ref.value().lock().await;
741
742        // Stop via background executor.
743        self.background.stop_gorilla(gorilla_id);
744
745        // Also abort any legacy task handle.
746        if let Some(handle) = entry.task_handle.take() {
747            handle.abort();
748        }
749
750        let name = entry.manifest.name.clone();
751        entry.status = GorillaStatus::Caged;
752        drop(entry);
753        drop(entry_ref);
754
755        self.metrics.gauge_dec(metrics::ACTIVE_GORILLAS);
756
757        self.event_bus.publish(PunchEvent::GorillaPaused {
758            gorilla_id: *gorilla_id,
759            reason: "manually caged".to_string(),
760        });
761
762        info!(name, "gorilla caged");
763        Ok(())
764    }
765
766    /// List all gorillas with their current status.
767    pub async fn list_gorillas(&self) -> Vec<(GorillaId, GorillaManifest, GorillaStatus)> {
768        let mut result = Vec::new();
769
770        for entry in self.gorillas.iter() {
771            let id = *entry.key();
772            let inner = entry.value().lock().await;
773            result.push((id, inner.manifest.clone(), inner.status));
774        }
775
776        result
777    }
778
779    /// Get a gorilla's manifest by ID.
780    pub async fn get_gorilla_manifest(&self, gorilla_id: &GorillaId) -> Option<GorillaManifest> {
781        let entry_ref = self.gorillas.get(gorilla_id)?;
782        let entry = entry_ref.value().lock().await;
783        Some(entry.manifest.clone())
784    }
785
786    /// Find a gorilla ID by name (case-insensitive).
787    pub async fn find_gorilla_by_name(&self, name: &str) -> Option<GorillaId> {
788        for entry in self.gorillas.iter() {
789            let inner = entry.value().lock().await;
790            if inner.manifest.name.eq_ignore_ascii_case(name) {
791                return Some(*entry.key());
792            }
793        }
794        None
795    }
796
797    /// Run a single autonomous tick for a gorilla (for testing/debugging).
798    ///
799    /// This executes the gorilla's autonomous prompt once, without starting
800    /// the background scheduler. Useful for verifying configuration.
801    #[instrument(skip(self), fields(%gorilla_id))]
802    pub async fn run_gorilla_tick(
803        &self,
804        gorilla_id: &GorillaId,
805    ) -> PunchResult<punch_runtime::FighterLoopResult> {
806        let entry_ref = self
807            .gorillas
808            .get(gorilla_id)
809            .ok_or_else(|| PunchError::Gorilla(format!("gorilla {} not found", gorilla_id)))?;
810
811        let entry = entry_ref.value().lock().await;
812        let manifest = entry.manifest.clone();
813        drop(entry);
814        drop(entry_ref);
815
816        crate::background::run_gorilla_tick(
817            *gorilla_id,
818            &manifest,
819            &self.config.default_model,
820            &self.memory,
821            &self.driver,
822        )
823        .await
824    }
825
826    /// Get the LLM driver (useful for CLI commands that need to run ticks directly).
827    pub fn driver(&self) -> &Arc<dyn LlmDriver> {
828        &self.driver
829    }
830
831    // -- Inter-agent communication -------------------------------------------
832
833    /// Send a message from one fighter to another.
834    ///
835    /// The source fighter's message becomes the target fighter's input,
836    /// enriched with source context so the target knows who is speaking.
837    /// The target processes it through its own fighter loop (with its own creed)
838    /// and the response is returned.
839    #[instrument(skip(self, message), fields(%source_id, %target_id))]
840    pub async fn fighter_to_fighter(
841        &self,
842        source_id: &FighterId,
843        target_id: &FighterId,
844        message: String,
845    ) -> PunchResult<FighterLoopResult> {
846        // Get source fighter name for context.
847        let source_name = self
848            .fighters
849            .get(source_id)
850            .map(|entry| entry.value().manifest.name.clone())
851            .ok_or_else(|| {
852                PunchError::Fighter(format!("source fighter {} not found", source_id))
853            })?;
854
855        // Verify target exists.
856        if self.fighters.get(target_id).is_none() {
857            return Err(PunchError::Fighter(format!(
858                "target fighter {} not found",
859                target_id
860            )));
861        }
862
863        // Wrap the message with source context so the target knows who is speaking.
864        let enriched_message = format!(
865            "[Message from fighter '{}' (id: {})]\n\n{}",
866            source_name, source_id, message
867        );
868
869        // Send to target through normal message flow (uses target's creed).
870        self.send_message(target_id, enriched_message).await
871    }
872
873    /// Find a fighter by name (case-insensitive).
874    ///
875    /// Returns the fighter ID and manifest if found.
876    pub fn find_fighter_by_name_sync(&self, name: &str) -> Option<(FighterId, FighterManifest)> {
877        self.fighters.iter().find_map(|entry| {
878            if entry.value().manifest.name.eq_ignore_ascii_case(name) {
879                Some((*entry.key(), entry.value().manifest.clone()))
880            } else {
881                None
882            }
883        })
884    }
885
886    /// Update relationship tracking in both fighters' creeds after inter-agent
887    /// communication.
888    ///
889    /// Loads both creeds, adds or updates the peer relationship entry
890    /// (incrementing interaction_count), and saves them back.
891    pub async fn update_fighter_relationships(
892        &self,
893        fighter_a_name: &str,
894        fighter_b_name: &str,
895    ) {
896        let memory = Arc::clone(&self.memory);
897        let a_name = fighter_a_name.to_string();
898        let b_name = fighter_b_name.to_string();
899
900        // Update in a spawned task to avoid blocking the caller.
901        tokio::spawn(async move {
902            // Update A's creed with relationship to B.
903            if let Ok(Some(mut creed_a)) = memory.load_creed_by_name(&a_name).await {
904                update_relationship(&mut creed_a, &b_name);
905                if let Err(e) = memory.save_creed(&creed_a).await {
906                    warn!(error = %e, fighter = %a_name, "failed to save creed relationship update");
907                }
908            }
909
910            // Update B's creed with relationship to A.
911            if let Ok(Some(mut creed_b)) = memory.load_creed_by_name(&b_name).await {
912                update_relationship(&mut creed_b, &a_name);
913                if let Err(e) = memory.save_creed(&creed_b).await {
914                    warn!(error = %e, fighter = %b_name, "failed to save creed relationship update");
915                }
916            }
917        });
918    }
919
920    // -- Troop / Swarm / Messaging accessors ---------------------------------
921
922    /// Get a reference to the troop manager.
923    pub fn troop_manager(&self) -> &TroopManager {
924        &self.troop_manager
925    }
926
927    /// Get a reference to the swarm coordinator.
928    pub fn swarm_coordinator(&self) -> &SwarmCoordinator {
929        &self.swarm_coordinator
930    }
931
932    /// Get a reference to the message router.
933    pub fn message_router(&self) -> &MessageRouter {
934        &self.message_router
935    }
936
937    // -- Troop operations ----------------------------------------------------
938
939    /// Form a new troop with a leader and initial members.
940    #[instrument(skip(self, members), fields(troop_name = %name))]
941    pub fn form_troop(
942        &self,
943        name: String,
944        leader: FighterId,
945        members: Vec<FighterId>,
946        strategy: CoordinationStrategy,
947    ) -> PunchResult<TroopId> {
948        // Verify the leader exists.
949        if self.fighters.get(&leader).is_none() {
950            return Err(PunchError::Troop(format!(
951                "leader fighter {} not found",
952                leader
953            )));
954        }
955
956        // Verify all members exist.
957        for member in &members {
958            if self.fighters.get(member).is_none() {
959                return Err(PunchError::Troop(format!(
960                    "member fighter {} not found",
961                    member
962                )));
963            }
964        }
965
966        let member_count = members.len() + 1; // +1 for leader if not in list
967        let troop_id = self.troop_manager.form_troop(name.clone(), leader, members, strategy);
968
969        self.event_bus.publish(PunchEvent::TroopFormed {
970            troop_id,
971            name,
972            member_count,
973        });
974
975        Ok(troop_id)
976    }
977
978    /// Disband (dissolve) a troop.
979    #[instrument(skip(self), fields(%troop_id))]
980    pub fn disband_troop(&self, troop_id: &TroopId) -> PunchResult<()> {
981        let name = self.troop_manager.disband_troop(troop_id)?;
982
983        self.event_bus.publish(PunchEvent::TroopDisbanded {
984            troop_id: *troop_id,
985            name,
986        });
987
988        Ok(())
989    }
990
991    /// Assign a task to a troop, returning the fighters that should handle it.
992    pub fn assign_troop_task(
993        &self,
994        troop_id: &TroopId,
995        task_description: &str,
996    ) -> PunchResult<Vec<FighterId>> {
997        self.troop_manager.assign_task(troop_id, task_description)
998    }
999
1000    /// Get the current status of a troop.
1001    pub fn get_troop_status(&self, troop_id: &TroopId) -> Option<Troop> {
1002        self.troop_manager.get_troop(troop_id)
1003    }
1004
1005    /// List all troops.
1006    pub fn list_troops(&self) -> Vec<Troop> {
1007        self.troop_manager.list_troops()
1008    }
1009
1010    /// Recruit a fighter into a troop.
1011    pub fn recruit_to_troop(
1012        &self,
1013        troop_id: &TroopId,
1014        fighter_id: FighterId,
1015    ) -> PunchResult<()> {
1016        // Verify the fighter exists.
1017        if self.fighters.get(&fighter_id).is_none() {
1018            return Err(PunchError::Troop(format!(
1019                "fighter {} not found",
1020                fighter_id
1021            )));
1022        }
1023        self.troop_manager.recruit(troop_id, fighter_id)
1024    }
1025
1026    /// Dismiss a fighter from a troop.
1027    pub fn dismiss_from_troop(
1028        &self,
1029        troop_id: &TroopId,
1030        fighter_id: &FighterId,
1031    ) -> PunchResult<()> {
1032        self.troop_manager.dismiss(troop_id, fighter_id)
1033    }
1034
1035    // -- Troop-aware fighter lifecycle ---------------------------------------
1036
1037    /// Kill a fighter, warning if they're in a troop.
1038    ///
1039    /// Unlike [`kill_fighter`], this checks troop membership and dismisses
1040    /// the fighter from all troops before killing them.
1041    #[instrument(skip(self), fields(%fighter_id))]
1042    pub fn kill_fighter_safe(&self, fighter_id: &FighterId) {
1043        // Dismiss from any troops first.
1044        let troop_ids = self.troop_manager.get_fighter_troops(fighter_id);
1045        for troop_id in troop_ids {
1046            if let Err(e) = self.troop_manager.dismiss(&troop_id, fighter_id) {
1047                warn!(
1048                    %troop_id,
1049                    %fighter_id,
1050                    error = %e,
1051                    "failed to dismiss fighter from troop before kill"
1052                );
1053            }
1054        }
1055        self.kill_fighter(fighter_id);
1056    }
1057
1058    // -- Workflow operations -------------------------------------------------
1059
1060    /// Register a workflow with the engine.
1061    pub fn register_workflow(&self, workflow: Workflow) -> WorkflowId {
1062        self.workflow_engine.register_workflow(workflow)
1063    }
1064
1065    /// Execute a workflow by ID with the given input.
1066    pub async fn execute_workflow(
1067        &self,
1068        workflow_id: &WorkflowId,
1069        input: String,
1070    ) -> PunchResult<WorkflowRunId> {
1071        self.workflow_engine
1072            .execute_workflow(
1073                workflow_id,
1074                input,
1075                Arc::clone(&self.memory),
1076                Arc::clone(&self.driver),
1077                &self.config.default_model,
1078            )
1079            .await
1080    }
1081
1082    // -- Shutdown ------------------------------------------------------------
1083
1084    /// Gracefully shut down the Ring, stopping all gorillas and background tasks.
1085    pub fn shutdown(&self) {
1086        info!("Ring shutdown initiated");
1087
1088        // Signal shutdown to all background tasks.
1089        let _ = self.shutdown_tx.send(true);
1090
1091        // Stop all gorilla tasks via the background executor.
1092        self.background.shutdown_all();
1093
1094        info!("Ring shutdown complete");
1095    }
1096}
1097
1098// ---------------------------------------------------------------------------
1099// Relationship tracking helper
1100// ---------------------------------------------------------------------------
1101
1102/// Update or insert a peer relationship in a creed.
1103fn update_relationship(creed: &mut punch_types::Creed, peer_name: &str) {
1104    if let Some(rel) = creed
1105        .relationships
1106        .iter_mut()
1107        .find(|r| r.entity == peer_name && r.entity_type == "fighter")
1108    {
1109        rel.interaction_count += 1;
1110        rel.notes = format!(
1111            "Last interaction: {}",
1112            chrono::Utc::now().format("%Y-%m-%d %H:%M UTC")
1113        );
1114    } else {
1115        creed.relationships.push(punch_types::Relationship {
1116            entity: peer_name.to_string(),
1117            entity_type: "fighter".to_string(),
1118            nature: "peer".to_string(),
1119            trust: 0.5,
1120            interaction_count: 1,
1121            notes: format!(
1122                "First interaction: {}",
1123                chrono::Utc::now().format("%Y-%m-%d %H:%M UTC")
1124            ),
1125        });
1126    }
1127    creed.updated_at = chrono::Utc::now();
1128    creed.version += 1;
1129}
1130
1131// ---------------------------------------------------------------------------
1132// AgentCoordinator implementation
1133// ---------------------------------------------------------------------------
1134
1135#[async_trait]
1136impl AgentCoordinator for Ring {
1137    async fn spawn_fighter(&self, manifest: FighterManifest) -> PunchResult<FighterId> {
1138        Ok(Ring::spawn_fighter(self, manifest).await)
1139    }
1140
1141    async fn send_message_to_agent(
1142        &self,
1143        target: &FighterId,
1144        message: String,
1145    ) -> PunchResult<AgentMessageResult> {
1146        let result = self.send_message(target, message).await?;
1147        Ok(AgentMessageResult {
1148            response: result.response,
1149            tokens_used: result.usage.total(),
1150        })
1151    }
1152
1153    async fn find_fighter_by_name(&self, name: &str) -> PunchResult<Option<FighterId>> {
1154        let found = self.fighters.iter().find_map(|entry| {
1155            if entry.value().manifest.name.eq_ignore_ascii_case(name) {
1156                Some(*entry.key())
1157            } else {
1158                None
1159            }
1160        });
1161        Ok(found)
1162    }
1163
1164    async fn list_fighters(&self) -> PunchResult<Vec<AgentInfo>> {
1165        let fighters = self
1166            .fighters
1167            .iter()
1168            .map(|entry| AgentInfo {
1169                id: *entry.key(),
1170                name: entry.value().manifest.name.clone(),
1171                status: entry.value().status,
1172            })
1173            .collect();
1174        Ok(fighters)
1175    }
1176}
1177
1178// ---------------------------------------------------------------------------
1179// Compile-time Send + Sync assertion
1180// ---------------------------------------------------------------------------
1181
1182/// Compile-time assertion that `Ring` is `Send + Sync`.
1183const _: () = {
1184    fn _assert_send<T: Send>() {}
1185    fn _assert_sync<T: Sync>() {}
1186    fn _assert() {
1187        _assert_send::<Ring>();
1188        _assert_sync::<Ring>();
1189    }
1190};