Skip to main content

punch_kernel/
ring.rs

1//! **The Ring** — the central kernel and coordinator for the Punch system.
2//!
3//! The [`Ring`] owns every fighter and gorilla, wires them to the memory
4//! substrate, the LLM driver, the event bus, the scheduler, the background
5//! executor, the workflow engine, and the metering engine. All mutations
6//! go through the Ring so that invariants (quotas, capabilities, lifecycle
7//! events) are enforced in a single place.
8
9use std::sync::Arc;
10
11use async_trait::async_trait;
12use chrono;
13use dashmap::DashMap;
14use serde::{Deserialize, Serialize};
15use tokio::sync::Mutex;
16use tokio::sync::watch;
17use tokio::task::JoinHandle;
18use tracing::{info, instrument, warn};
19
20use punch_memory::{BoutId, MemorySubstrate};
21use punch_runtime::{
22    FighterLoopParams, FighterLoopResult, LlmDriver, run_fighter_loop, tools_for_capabilities,
23};
24use punch_types::{
25    AgentCoordinator, AgentInfo, AgentMessageResult, CoordinationStrategy, FighterId,
26    FighterManifest, FighterStatus, GorillaId, GorillaManifest, GorillaMetrics, GorillaStatus,
27    PunchConfig, PunchError, PunchEvent, PunchResult, TenantId, TenantStatus, Troop, TroopId,
28};
29
30use punch_skills::{SkillMarketplace, builtin_skills};
31
32use crate::agent_messaging::MessageRouter;
33use crate::background::BackgroundExecutor;
34use crate::budget::BudgetEnforcer;
35use crate::event_bus::EventBus;
36use crate::metering::MeteringEngine;
37use crate::metrics::{self, MetricsRegistry};
38use crate::scheduler::{QuotaConfig, Scheduler};
39use crate::swarm::SwarmCoordinator;
40use crate::tenant_registry::TenantRegistry;
41use crate::triggers::{Trigger, TriggerEngine, TriggerId, TriggerSummary};
42use crate::troop::TroopManager;
43use crate::workflow::{Workflow, WorkflowEngine, WorkflowId, WorkflowRunId};
44
45// ---------------------------------------------------------------------------
46// Entry types
47// ---------------------------------------------------------------------------
48
49/// Everything the Ring tracks about a single fighter.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct FighterEntry {
52    /// The fighter's manifest (identity, model, capabilities, etc.).
53    pub manifest: FighterManifest,
54    /// Current operational status.
55    pub status: FighterStatus,
56    /// The active bout (conversation session) ID, if any.
57    pub current_bout: Option<BoutId>,
58}
59
60/// Everything the Ring tracks about a single gorilla.
61///
62/// The `task_handle` is behind a `Mutex` because [`JoinHandle`] is not `Clone`
63/// and we need interior mutability when starting / stopping background tasks.
64pub struct GorillaEntry {
65    /// The gorilla's manifest.
66    pub manifest: GorillaManifest,
67    /// Current operational status.
68    pub status: GorillaStatus,
69    /// Runtime metrics.
70    pub metrics: GorillaMetrics,
71    /// Handle to the background task (if running).
72    task_handle: Option<JoinHandle<()>>,
73}
74
75// Manual Debug impl because JoinHandle doesn't implement Debug in a useful way.
76impl std::fmt::Debug for GorillaEntry {
77    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78        f.debug_struct("GorillaEntry")
79            .field("manifest", &self.manifest)
80            .field("status", &self.status)
81            .field("metrics", &self.metrics)
82            .field("has_task", &self.task_handle.is_some())
83            .finish()
84    }
85}
86
87// ---------------------------------------------------------------------------
88// The Ring
89// ---------------------------------------------------------------------------
90
91/// The Ring — the central coordinator for the Punch Agent Combat System.
92///
93/// Thread-safe by design: all collections use [`DashMap`] and all shared state
94/// is behind `Arc`. Wrap the `Ring` itself in an `Arc` to share across tasks.
95pub struct Ring {
96    /// All active fighters, keyed by their unique ID.
97    fighters: DashMap<FighterId, FighterEntry>,
98    /// All registered gorillas, keyed by their unique ID.
99    gorillas: DashMap<GorillaId, Mutex<GorillaEntry>>,
100    /// Shared memory substrate (SQLite persistence).
101    memory: Arc<MemorySubstrate>,
102    /// The LLM driver used for completions.
103    driver: Arc<dyn LlmDriver>,
104    /// System-wide event bus.
105    event_bus: EventBus,
106    /// Per-fighter quota scheduler.
107    scheduler: Scheduler,
108    /// Top-level Punch configuration.
109    config: PunchConfig,
110    /// Background executor for autonomous gorilla tasks.
111    background: BackgroundExecutor,
112    /// Multi-step workflow engine.
113    workflow_engine: WorkflowEngine,
114    /// Cost tracking and metering engine.
115    metering: MeteringEngine,
116    /// Budget enforcement layer (opt-in spending limits).
117    budget_enforcer: Arc<BudgetEnforcer>,
118    /// Event-driven trigger engine.
119    trigger_engine: TriggerEngine,
120    /// Troop manager for multi-agent coordination.
121    troop_manager: TroopManager,
122    /// Swarm coordinator for emergent behavior tasks.
123    swarm_coordinator: SwarmCoordinator,
124    /// Inter-agent message router.
125    message_router: MessageRouter,
126    /// Production observability metrics.
127    metrics: Arc<MetricsRegistry>,
128    /// Multi-tenant registry.
129    tenant_registry: TenantRegistry,
130    /// Skill marketplace for discovering and installing moves.
131    marketplace: SkillMarketplace,
132    /// Shutdown signal sender.
133    shutdown_tx: watch::Sender<bool>,
134    /// Shutdown signal receiver.
135    _shutdown_rx: watch::Receiver<bool>,
136}
137
138impl Ring {
139    /// Create a new Ring.
140    ///
141    /// The caller provides the already-initialised memory substrate, LLM
142    /// driver, and configuration. The Ring will create its own event bus and
143    /// scheduler internally.
144    pub fn new(
145        config: PunchConfig,
146        memory: Arc<MemorySubstrate>,
147        driver: Arc<dyn LlmDriver>,
148    ) -> Self {
149        let (shutdown_tx, shutdown_rx) = watch::channel(false);
150        let background =
151            BackgroundExecutor::with_shutdown(shutdown_tx.clone(), shutdown_rx.clone());
152        let metering = MeteringEngine::new(Arc::clone(&memory));
153        let metering_arc = Arc::new(MeteringEngine::new(Arc::clone(&memory)));
154        let budget_enforcer = Arc::new(BudgetEnforcer::new(Arc::clone(&metering_arc)));
155        let metrics_registry = Arc::new(MetricsRegistry::new());
156        metrics::register_default_metrics(&metrics_registry);
157
158        let marketplace = SkillMarketplace::new();
159        for listing in builtin_skills() {
160            marketplace.publish(listing);
161        }
162
163        Self {
164            fighters: DashMap::new(),
165            gorillas: DashMap::new(),
166            memory,
167            driver,
168            event_bus: EventBus::new(),
169            scheduler: Scheduler::new(QuotaConfig::default()),
170            config,
171            background,
172            workflow_engine: WorkflowEngine::new(),
173            metering,
174            budget_enforcer,
175            trigger_engine: TriggerEngine::new(),
176            troop_manager: TroopManager::new(),
177            swarm_coordinator: SwarmCoordinator::new(),
178            message_router: MessageRouter::new(),
179            metrics: metrics_registry,
180            tenant_registry: TenantRegistry::new(),
181            marketplace,
182            shutdown_tx,
183            _shutdown_rx: shutdown_rx,
184        }
185    }
186
187    /// Create a new Ring with a custom quota configuration.
188    pub fn with_quota_config(
189        config: PunchConfig,
190        memory: Arc<MemorySubstrate>,
191        driver: Arc<dyn LlmDriver>,
192        quota_config: QuotaConfig,
193    ) -> Self {
194        let (shutdown_tx, shutdown_rx) = watch::channel(false);
195        let background =
196            BackgroundExecutor::with_shutdown(shutdown_tx.clone(), shutdown_rx.clone());
197        let metering = MeteringEngine::new(Arc::clone(&memory));
198        let metering_arc = Arc::new(MeteringEngine::new(Arc::clone(&memory)));
199        let budget_enforcer = Arc::new(BudgetEnforcer::new(Arc::clone(&metering_arc)));
200        let metrics_registry = Arc::new(MetricsRegistry::new());
201        metrics::register_default_metrics(&metrics_registry);
202
203        let marketplace = SkillMarketplace::new();
204        for listing in builtin_skills() {
205            marketplace.publish(listing);
206        }
207
208        Self {
209            fighters: DashMap::new(),
210            gorillas: DashMap::new(),
211            memory,
212            driver,
213            event_bus: EventBus::new(),
214            scheduler: Scheduler::new(quota_config),
215            config,
216            background,
217            workflow_engine: WorkflowEngine::new(),
218            metering,
219            budget_enforcer,
220            trigger_engine: TriggerEngine::new(),
221            troop_manager: TroopManager::new(),
222            swarm_coordinator: SwarmCoordinator::new(),
223            message_router: MessageRouter::new(),
224            metrics: metrics_registry,
225            tenant_registry: TenantRegistry::new(),
226            marketplace,
227            shutdown_tx,
228            _shutdown_rx: shutdown_rx,
229        }
230    }
231
232    // -- Accessors -----------------------------------------------------------
233
234    /// Get a reference to the event bus.
235    pub fn event_bus(&self) -> &EventBus {
236        &self.event_bus
237    }
238
239    /// Get a reference to the scheduler.
240    pub fn scheduler(&self) -> &Scheduler {
241        &self.scheduler
242    }
243
244    /// Get a reference to the memory substrate.
245    pub fn memory(&self) -> &Arc<MemorySubstrate> {
246        &self.memory
247    }
248
249    /// Get a reference to the configuration.
250    pub fn config(&self) -> &PunchConfig {
251        &self.config
252    }
253
254    /// Get a reference to the background executor.
255    pub fn background(&self) -> &BackgroundExecutor {
256        &self.background
257    }
258
259    /// Get a reference to the workflow engine.
260    pub fn workflow_engine(&self) -> &WorkflowEngine {
261        &self.workflow_engine
262    }
263
264    /// Get a reference to the metering engine.
265    pub fn metering(&self) -> &MeteringEngine {
266        &self.metering
267    }
268
269    /// Get a reference to the budget enforcer.
270    pub fn budget_enforcer(&self) -> &Arc<BudgetEnforcer> {
271        &self.budget_enforcer
272    }
273
274    /// Get a reference to the trigger engine.
275    pub fn trigger_engine(&self) -> &TriggerEngine {
276        &self.trigger_engine
277    }
278
279    /// Get a reference to the metrics registry.
280    pub fn metrics(&self) -> &Arc<MetricsRegistry> {
281        &self.metrics
282    }
283
284    /// Get a reference to the tenant registry.
285    pub fn tenant_registry(&self) -> &TenantRegistry {
286        &self.tenant_registry
287    }
288
289    /// Access the skill marketplace.
290    pub fn marketplace(&self) -> &SkillMarketplace {
291        &self.marketplace
292    }
293
294    // -- Tenant-scoped operations --------------------------------------------
295
296    /// Spawn a fighter scoped to a tenant, enforcing quota limits.
297    ///
298    /// Returns an error if the tenant is suspended or the fighter quota is
299    /// exceeded.
300    #[instrument(skip(self, manifest), fields(fighter_name = %manifest.name))]
301    pub async fn spawn_fighter_for_tenant(
302        &self,
303        tenant_id: &TenantId,
304        mut manifest: FighterManifest,
305    ) -> PunchResult<FighterId> {
306        // Verify tenant exists and is active.
307        let tenant = self
308            .tenant_registry
309            .get_tenant(tenant_id)
310            .ok_or_else(|| PunchError::Tenant(format!("tenant {} not found", tenant_id)))?;
311
312        if tenant.status == TenantStatus::Suspended {
313            return Err(PunchError::Tenant(format!(
314                "tenant {} is suspended",
315                tenant_id
316            )));
317        }
318
319        // Check fighter quota.
320        let current_count = self
321            .fighters
322            .iter()
323            .filter(|e| e.value().manifest.tenant_id.as_ref() == Some(tenant_id))
324            .count();
325
326        if current_count >= tenant.quota.max_fighters {
327            return Err(PunchError::QuotaExceeded(format!(
328                "tenant {} has reached max fighters limit ({})",
329                tenant_id, tenant.quota.max_fighters
330            )));
331        }
332
333        // Stamp the manifest with the tenant ID.
334        manifest.tenant_id = Some(*tenant_id);
335        Ok(self.spawn_fighter(manifest).await)
336    }
337
338    /// List fighters that belong to a specific tenant.
339    pub fn list_fighters_for_tenant(
340        &self,
341        tenant_id: &TenantId,
342    ) -> Vec<(FighterId, FighterManifest, FighterStatus)> {
343        self.fighters
344            .iter()
345            .filter(|entry| entry.value().manifest.tenant_id.as_ref() == Some(tenant_id))
346            .map(|entry| {
347                let id = *entry.key();
348                let e = entry.value();
349                (id, e.manifest.clone(), e.status)
350            })
351            .collect()
352    }
353
354    /// Kill a fighter, validating that the caller tenant owns it.
355    ///
356    /// Returns an error if the fighter doesn't belong to the given tenant.
357    #[instrument(skip(self), fields(%fighter_id, %tenant_id))]
358    pub fn kill_fighter_for_tenant(
359        &self,
360        fighter_id: &FighterId,
361        tenant_id: &TenantId,
362    ) -> PunchResult<()> {
363        let entry = self
364            .fighters
365            .get(fighter_id)
366            .ok_or_else(|| PunchError::Fighter(format!("fighter {} not found", fighter_id)))?;
367
368        if entry.manifest.tenant_id.as_ref() != Some(tenant_id) {
369            return Err(PunchError::Auth(format!(
370                "fighter {} does not belong to tenant {}",
371                fighter_id, tenant_id
372            )));
373        }
374
375        drop(entry);
376        self.kill_fighter(fighter_id);
377        Ok(())
378    }
379
380    /// Check whether a tenant's tool access is allowed for the given tool name.
381    ///
382    /// Returns `true` if the tenant has no tool restrictions (empty list) or
383    /// the tool is in the allowed list.
384    pub fn check_tenant_tool_access(&self, tenant_id: &TenantId, tool_name: &str) -> bool {
385        match self.tenant_registry.get_tenant(tenant_id) {
386            Some(tenant) => {
387                tenant.quota.max_tools.is_empty()
388                    || tenant.quota.max_tools.iter().any(|t| t == tool_name)
389            }
390            None => false,
391        }
392    }
393
394    // -- Trigger operations --------------------------------------------------
395
396    /// Register a trigger with the engine.
397    pub fn register_trigger(&self, trigger: Trigger) -> TriggerId {
398        self.trigger_engine.register_trigger(trigger)
399    }
400
401    /// Remove a trigger by ID.
402    pub fn remove_trigger(&self, id: &TriggerId) {
403        self.trigger_engine.remove_trigger(id);
404    }
405
406    /// List all triggers with summary information.
407    pub fn list_triggers(&self) -> Vec<(TriggerId, TriggerSummary)> {
408        self.trigger_engine.list_triggers()
409    }
410
411    // -- Fighter operations --------------------------------------------------
412
413    /// Spawn a new fighter from a manifest.
414    ///
415    /// Returns the newly-assigned [`FighterId`]. The fighter starts in
416    /// [`FighterStatus::Idle`] and is persisted to the memory substrate.
417    #[instrument(skip(self, manifest), fields(fighter_name = %manifest.name))]
418    pub async fn spawn_fighter(&self, manifest: FighterManifest) -> FighterId {
419        let id = FighterId::new();
420        let name = manifest.name.clone();
421
422        // Persist to the database first so FK constraints work.
423        if let Err(e) = self
424            .memory
425            .save_fighter(&id, &manifest, FighterStatus::Idle)
426            .await
427        {
428            warn!(error = %e, "failed to persist fighter to database (continuing in-memory only)");
429        }
430
431        let entry = FighterEntry {
432            manifest,
433            status: FighterStatus::Idle,
434            current_bout: None,
435        };
436
437        self.fighters.insert(id, entry);
438
439        // Record metrics.
440        self.metrics.counter_inc(metrics::FIGHTER_SPAWNS_TOTAL);
441        self.metrics
442            .gauge_set(metrics::ACTIVE_FIGHTERS, self.fighters.len() as i64);
443
444        self.event_bus.publish(PunchEvent::FighterSpawned {
445            fighter_id: id,
446            name: name.clone(),
447        });
448
449        info!(%id, name, "fighter spawned");
450
451        // --- Creed creation & binding ---
452        // Create a default creed if none exists, then bind it to this instance.
453        // This ensures every fighter has a consciousness layer from birth.
454        {
455            let memory = Arc::clone(&self.memory);
456            let creed_name = name.clone();
457            let fid = id;
458            // Clone the manifest for the creed before it's moved into the entry.
459            let manifest_for_creed = self.fighters.get(&id).map(|e| e.value().manifest.clone());
460            tokio::spawn(async move {
461                // Ensure a creed exists (create default if not).
462                if let Ok(None) = memory.load_creed_by_name(&creed_name).await
463                    && let Some(manifest) = &manifest_for_creed
464                {
465                    let creed = punch_types::Creed::new(&creed_name).with_self_awareness(manifest);
466                    if let Err(e) = memory.save_creed(&creed).await {
467                        warn!(error = %e, fighter = %creed_name, "failed to create default creed");
468                    } else {
469                        info!(fighter = %creed_name, "default creed created on spawn");
470                    }
471                }
472                // Bind the creed to this fighter instance.
473                if let Err(e) = memory.bind_creed_to_fighter(&creed_name, &fid).await {
474                    warn!(error = %e, fighter = %creed_name, "failed to bind creed on spawn");
475                } else {
476                    info!(fighter = %creed_name, id = %fid, "creed bound to fighter on spawn");
477                }
478            });
479        }
480
481        id
482    }
483
484    /// Create a default creed for a fighter if none exists.
485    /// The default creed includes self-awareness from the manifest.
486    pub async fn ensure_creed(&self, fighter_name: &str, manifest: &FighterManifest) {
487        match self.memory.load_creed_by_name(fighter_name).await {
488            Ok(Some(_)) => {
489                // Creed already exists.
490            }
491            Ok(None) => {
492                // Create a default creed with self-awareness.
493                let creed = punch_types::Creed::new(fighter_name).with_self_awareness(manifest);
494                if let Err(e) = self.memory.save_creed(&creed).await {
495                    warn!(error = %e, "failed to create default creed");
496                } else {
497                    info!(fighter = %fighter_name, "default creed created with self-awareness");
498                }
499            }
500            Err(e) => {
501                warn!(error = %e, "failed to check for existing creed");
502            }
503        }
504    }
505
506    /// Send a user message to a fighter and run the agent loop (without coordinator).
507    ///
508    /// Convenience wrapper around [`send_message_with_coordinator`] that passes
509    /// `None`, meaning the fighter will not have access to inter-agent tools.
510    #[instrument(skip(self, message), fields(%fighter_id))]
511    pub async fn send_message(
512        &self,
513        fighter_id: &FighterId,
514        message: String,
515    ) -> PunchResult<FighterLoopResult> {
516        self.send_message_with_coordinator(fighter_id, message, None)
517            .await
518    }
519
520    /// Send a user message to a fighter and run the agent loop.
521    ///
522    /// This creates (or reuses) a bout for the fighter, checks quotas, then
523    /// delegates to [`run_fighter_loop`]. Usage is recorded through the
524    /// metering engine after a successful completion.
525    ///
526    /// If `coordinator` is provided, the fighter can use inter-agent tools
527    /// (`agent_spawn`, `agent_message`, `agent_list`).
528    #[instrument(skip(self, message, coordinator), fields(%fighter_id))]
529    pub async fn send_message_with_coordinator(
530        &self,
531        fighter_id: &FighterId,
532        message: String,
533        coordinator: Option<Arc<dyn AgentCoordinator>>,
534    ) -> PunchResult<FighterLoopResult> {
535        // Look up the fighter.
536        let mut entry = self
537            .fighters
538            .get_mut(fighter_id)
539            .ok_or_else(|| PunchError::Fighter(format!("fighter {} not found", fighter_id)))?;
540
541        // Check quota.
542        if !self.scheduler.check_quota(fighter_id) {
543            entry.status = FighterStatus::Resting;
544            return Err(PunchError::RateLimited {
545                provider: "scheduler".to_string(),
546                retry_after_ms: 60_000,
547            });
548        }
549
550        // Check budget enforcement (opt-in — only blocks if limits are configured).
551        match self.budget_enforcer.check_budget(fighter_id).await {
552            Ok(crate::budget::BudgetVerdict::Blocked {
553                reason,
554                retry_after_secs,
555            }) => {
556                entry.status = FighterStatus::Resting;
557                return Err(PunchError::RateLimited {
558                    provider: format!("budget: {}", reason),
559                    retry_after_ms: retry_after_secs * 1000,
560                });
561            }
562            Ok(crate::budget::BudgetVerdict::Warning { message, .. }) => {
563                info!(warning = %message, "budget warning for fighter");
564            }
565            Ok(crate::budget::BudgetVerdict::Allowed) => {}
566            Err(e) => {
567                warn!(error = %e, "budget check failed, allowing request");
568            }
569        }
570
571        // Ensure the fighter has an active bout.
572        let bout_id = match entry.current_bout {
573            Some(id) => id,
574            None => {
575                // Create the bout in the database.
576                let id = self
577                    .memory
578                    .create_bout(fighter_id)
579                    .await
580                    .map_err(|e| PunchError::Bout(format!("failed to create bout: {e}")))?;
581                entry.current_bout = Some(id);
582
583                self.event_bus.publish(PunchEvent::BoutStarted {
584                    bout_id: id.0,
585                    fighter_id: *fighter_id,
586                });
587
588                id
589            }
590        };
591
592        // Mark as fighting and publish status change.
593        entry.status = FighterStatus::Fighting;
594        let manifest = entry.manifest.clone();
595        let fighter_name = manifest.name.clone();
596        let available_tools = tools_for_capabilities(&manifest.capabilities);
597        drop(entry); // Release the DashMap guard before the async call.
598
599        // Publish the incoming user message event.
600        self.event_bus.publish(PunchEvent::FighterMessage {
601            fighter_id: *fighter_id,
602            bout_id: bout_id.0,
603            role: "user".to_string(),
604            content_preview: truncate_preview(&message, 120),
605        });
606
607        // Run the fighter loop.
608        let params = FighterLoopParams {
609            manifest: manifest.clone(),
610            user_message: message,
611            bout_id,
612            fighter_id: *fighter_id,
613            memory: Arc::clone(&self.memory),
614            driver: Arc::clone(&self.driver),
615            available_tools,
616            max_iterations: None,
617            context_window: None,
618            tool_timeout_secs: None,
619            coordinator,
620            approval_engine: None,
621            sandbox: None,
622        };
623
624        // Record message metric.
625        self.metrics.counter_inc(metrics::MESSAGES_TOTAL);
626
627        let result = run_fighter_loop(params).await;
628
629        // Update state based on the outcome.
630        if let Some(mut entry) = self.fighters.get_mut(fighter_id) {
631            match &result {
632                Ok(loop_result) => {
633                    entry.status = FighterStatus::Idle;
634                    self.scheduler
635                        .record_usage(fighter_id, loop_result.usage.total());
636
637                    // Record token usage metrics.
638                    self.metrics
639                        .counter_add(metrics::TOKENS_INPUT_TOTAL, loop_result.usage.input_tokens);
640                    self.metrics.counter_add(
641                        metrics::TOKENS_OUTPUT_TOTAL,
642                        loop_result.usage.output_tokens,
643                    );
644
645                    // Record usage through the metering engine.
646                    if let Err(e) = self
647                        .metering
648                        .record_usage(
649                            fighter_id,
650                            &manifest.model.model,
651                            loop_result.usage.input_tokens,
652                            loop_result.usage.output_tokens,
653                        )
654                        .await
655                    {
656                        warn!(error = %e, "failed to record metering usage");
657                    }
658
659                    // Publish response event.
660                    let preview = truncate_preview(&loop_result.response, 120);
661                    self.event_bus.publish(PunchEvent::FighterMessage {
662                        fighter_id: *fighter_id,
663                        bout_id: bout_id.0,
664                        role: "assistant".to_string(),
665                        content_preview: preview,
666                    });
667
668                    // Publish bout ended.
669                    self.event_bus.publish(PunchEvent::BoutEnded {
670                        bout_id: bout_id.0,
671                        fighter_id: *fighter_id,
672                        messages_exchanged: loop_result.usage.total(),
673                    });
674                }
675                Err(e) => {
676                    entry.status = FighterStatus::KnockedOut;
677                    self.metrics.counter_inc(metrics::ERRORS_TOTAL);
678
679                    // Publish error event.
680                    self.event_bus.publish(PunchEvent::Error {
681                        source: fighter_name.clone(),
682                        message: format!("{e}"),
683                    });
684                }
685            }
686        }
687
688        result
689    }
690
691    /// Kill (remove) a fighter.
692    #[instrument(skip(self), fields(%fighter_id))]
693    pub fn kill_fighter(&self, fighter_id: &FighterId) {
694        if let Some((_, entry)) = self.fighters.remove(fighter_id) {
695            self.scheduler.remove_fighter(fighter_id);
696            self.metrics
697                .gauge_set(metrics::ACTIVE_FIGHTERS, self.fighters.len() as i64);
698
699            self.event_bus.publish(PunchEvent::Error {
700                source: "ring".to_string(),
701                message: format!("Fighter '{}' killed", entry.manifest.name),
702            });
703
704            info!(name = %entry.manifest.name, "fighter killed");
705        } else {
706            warn!("attempted to kill unknown fighter");
707        }
708    }
709
710    /// List all fighters with their current status.
711    pub fn list_fighters(&self) -> Vec<(FighterId, FighterManifest, FighterStatus)> {
712        self.fighters
713            .iter()
714            .map(|entry| {
715                let id = *entry.key();
716                let e = entry.value();
717                (id, e.manifest.clone(), e.status)
718            })
719            .collect()
720    }
721
722    /// Get a snapshot of a single fighter's entry.
723    pub fn get_fighter(&self, fighter_id: &FighterId) -> Option<FighterEntry> {
724        self.fighters.get(fighter_id).map(|e| e.value().clone())
725    }
726
727    // -- Gorilla operations --------------------------------------------------
728
729    /// Register a gorilla with the Ring.
730    ///
731    /// Returns the newly-assigned [`GorillaId`]. The gorilla starts in
732    /// [`GorillaStatus::Caged`].
733    #[instrument(skip(self, manifest), fields(gorilla_name = %manifest.name))]
734    pub fn register_gorilla(&self, manifest: GorillaManifest) -> GorillaId {
735        let id = GorillaId::new();
736        let name = manifest.name.clone();
737
738        let entry = GorillaEntry {
739            manifest,
740            status: GorillaStatus::Caged,
741            metrics: GorillaMetrics::default(),
742            task_handle: None,
743        };
744
745        self.gorillas.insert(id, Mutex::new(entry));
746        info!(%id, name, "gorilla registered");
747        id
748    }
749
750    /// Unleash (start) a gorilla's background task.
751    ///
752    /// This uses the [`BackgroundExecutor`] to spawn the gorilla's autonomous
753    /// loop, which will run the fighter loop on the gorilla's schedule.
754    #[instrument(skip(self), fields(%gorilla_id))]
755    pub async fn unleash_gorilla(&self, gorilla_id: &GorillaId) -> PunchResult<()> {
756        let entry_ref = self
757            .gorillas
758            .get(gorilla_id)
759            .ok_or_else(|| PunchError::Gorilla(format!("gorilla {} not found", gorilla_id)))?;
760
761        let mut entry = entry_ref.value().lock().await;
762
763        if entry.status == GorillaStatus::Unleashed || entry.status == GorillaStatus::Rampaging {
764            return Err(PunchError::Gorilla(format!(
765                "gorilla {} is already active",
766                gorilla_id
767            )));
768        }
769
770        let gorilla_id_owned = *gorilla_id;
771        let name = entry.manifest.name.clone();
772        let manifest = entry.manifest.clone();
773
774        // Start the gorilla via the background executor.
775        self.background.start_gorilla(
776            gorilla_id_owned,
777            manifest,
778            self.config.default_model.clone(),
779            Arc::clone(&self.memory),
780            Arc::clone(&self.driver),
781        )?;
782
783        entry.status = GorillaStatus::Unleashed;
784        drop(entry);
785        drop(entry_ref);
786
787        // Record gorilla metrics.
788        self.metrics.counter_inc(metrics::GORILLA_RUNS_TOTAL);
789        self.metrics.gauge_inc(metrics::ACTIVE_GORILLAS);
790
791        self.event_bus.publish(PunchEvent::GorillaUnleashed {
792            gorilla_id: gorilla_id_owned,
793            name,
794        });
795
796        Ok(())
797    }
798
799    /// Cage (stop) a gorilla's background task.
800    #[instrument(skip(self), fields(%gorilla_id))]
801    pub async fn cage_gorilla(&self, gorilla_id: &GorillaId) -> PunchResult<()> {
802        let entry_ref = self
803            .gorillas
804            .get(gorilla_id)
805            .ok_or_else(|| PunchError::Gorilla(format!("gorilla {} not found", gorilla_id)))?;
806
807        let mut entry = entry_ref.value().lock().await;
808
809        // Stop via background executor.
810        self.background.stop_gorilla(gorilla_id);
811
812        // Also abort any legacy task handle.
813        if let Some(handle) = entry.task_handle.take() {
814            handle.abort();
815        }
816
817        let name = entry.manifest.name.clone();
818        entry.status = GorillaStatus::Caged;
819        drop(entry);
820        drop(entry_ref);
821
822        self.metrics.gauge_dec(metrics::ACTIVE_GORILLAS);
823
824        self.event_bus.publish(PunchEvent::GorillaPaused {
825            gorilla_id: *gorilla_id,
826            reason: "manually caged".to_string(),
827        });
828
829        info!(name, "gorilla caged");
830        Ok(())
831    }
832
833    /// List all gorillas with their current status and metrics.
834    pub async fn list_gorillas(
835        &self,
836    ) -> Vec<(GorillaId, GorillaManifest, GorillaStatus, GorillaMetrics)> {
837        let mut result = Vec::new();
838
839        for entry in self.gorillas.iter() {
840            let id = *entry.key();
841            let inner = entry.value().lock().await;
842            result.push((
843                id,
844                inner.manifest.clone(),
845                inner.status,
846                inner.metrics.clone(),
847            ));
848        }
849
850        result
851    }
852
853    /// Get a gorilla's manifest by ID.
854    pub async fn get_gorilla_manifest(&self, gorilla_id: &GorillaId) -> Option<GorillaManifest> {
855        let entry_ref = self.gorillas.get(gorilla_id)?;
856        let entry = entry_ref.value().lock().await;
857        Some(entry.manifest.clone())
858    }
859
860    /// Find a gorilla ID by name (case-insensitive).
861    pub async fn find_gorilla_by_name(&self, name: &str) -> Option<GorillaId> {
862        for entry in self.gorillas.iter() {
863            let inner = entry.value().lock().await;
864            if inner.manifest.name.eq_ignore_ascii_case(name) {
865                return Some(*entry.key());
866            }
867        }
868        None
869    }
870
871    /// Run a single autonomous tick for a gorilla (for testing/debugging).
872    ///
873    /// This executes the gorilla's autonomous prompt once, without starting
874    /// the background scheduler. Useful for verifying configuration.
875    #[instrument(skip(self), fields(%gorilla_id))]
876    pub async fn run_gorilla_tick(
877        &self,
878        gorilla_id: &GorillaId,
879    ) -> PunchResult<punch_runtime::FighterLoopResult> {
880        let entry_ref = self
881            .gorillas
882            .get(gorilla_id)
883            .ok_or_else(|| PunchError::Gorilla(format!("gorilla {} not found", gorilla_id)))?;
884
885        let entry = entry_ref.value().lock().await;
886        let manifest = entry.manifest.clone();
887        drop(entry);
888        drop(entry_ref);
889
890        crate::background::run_gorilla_tick(
891            *gorilla_id,
892            &manifest,
893            &self.config.default_model,
894            &self.memory,
895            &self.driver,
896        )
897        .await
898    }
899
900    /// Get the LLM driver (useful for CLI commands that need to run ticks directly).
901    pub fn driver(&self) -> &Arc<dyn LlmDriver> {
902        &self.driver
903    }
904
905    // -- Inter-agent communication -------------------------------------------
906
907    /// Send a message from one fighter to another.
908    ///
909    /// The source fighter's message becomes the target fighter's input,
910    /// enriched with source context so the target knows who is speaking.
911    /// The target processes it through its own fighter loop (with its own creed)
912    /// and the response is returned.
913    #[instrument(skip(self, message), fields(%source_id, %target_id))]
914    pub async fn fighter_to_fighter(
915        &self,
916        source_id: &FighterId,
917        target_id: &FighterId,
918        message: String,
919    ) -> PunchResult<FighterLoopResult> {
920        // Get source fighter name for context.
921        let source_name = self
922            .fighters
923            .get(source_id)
924            .map(|entry| entry.value().manifest.name.clone())
925            .ok_or_else(|| {
926                PunchError::Fighter(format!("source fighter {} not found", source_id))
927            })?;
928
929        // Verify target exists.
930        if self.fighters.get(target_id).is_none() {
931            return Err(PunchError::Fighter(format!(
932                "target fighter {} not found",
933                target_id
934            )));
935        }
936
937        // Wrap the message with source context so the target knows who is speaking.
938        let enriched_message = format!(
939            "[Message from fighter '{}' (id: {})]\n\n{}",
940            source_name, source_id, message
941        );
942
943        // Send to target through normal message flow (uses target's creed).
944        self.send_message(target_id, enriched_message).await
945    }
946
947    /// Find a fighter by name (case-insensitive).
948    ///
949    /// Returns the fighter ID and manifest if found.
950    pub fn find_fighter_by_name_sync(&self, name: &str) -> Option<(FighterId, FighterManifest)> {
951        self.fighters.iter().find_map(|entry| {
952            if entry.value().manifest.name.eq_ignore_ascii_case(name) {
953                Some((*entry.key(), entry.value().manifest.clone()))
954            } else {
955                None
956            }
957        })
958    }
959
960    /// Update relationship tracking in both fighters' creeds after inter-agent
961    /// communication.
962    ///
963    /// Loads both creeds, adds or updates the peer relationship entry
964    /// (incrementing interaction_count), and saves them back.
965    pub async fn update_fighter_relationships(&self, fighter_a_name: &str, fighter_b_name: &str) {
966        let memory = Arc::clone(&self.memory);
967        let a_name = fighter_a_name.to_string();
968        let b_name = fighter_b_name.to_string();
969
970        // Update in a spawned task to avoid blocking the caller.
971        tokio::spawn(async move {
972            // Update A's creed with relationship to B.
973            if let Ok(Some(mut creed_a)) = memory.load_creed_by_name(&a_name).await {
974                update_relationship(&mut creed_a, &b_name, None);
975                if let Err(e) = memory.save_creed(&creed_a).await {
976                    warn!(error = %e, fighter = %a_name, "failed to save creed relationship update");
977                }
978            }
979
980            // Update B's creed with relationship to A.
981            if let Ok(Some(mut creed_b)) = memory.load_creed_by_name(&b_name).await {
982                update_relationship(&mut creed_b, &a_name, None);
983                if let Err(e) = memory.save_creed(&creed_b).await {
984                    warn!(error = %e, fighter = %b_name, "failed to save creed relationship update");
985                }
986            }
987        });
988    }
989
990    // -- Troop / Swarm / Messaging accessors ---------------------------------
991
992    /// Get a reference to the troop manager.
993    pub fn troop_manager(&self) -> &TroopManager {
994        &self.troop_manager
995    }
996
997    /// Get a reference to the swarm coordinator.
998    pub fn swarm_coordinator(&self) -> &SwarmCoordinator {
999        &self.swarm_coordinator
1000    }
1001
1002    /// Get a reference to the message router.
1003    pub fn message_router(&self) -> &MessageRouter {
1004        &self.message_router
1005    }
1006
1007    // -- Troop operations ----------------------------------------------------
1008
1009    /// Form a new troop with a leader and initial members.
1010    #[instrument(skip(self, members), fields(troop_name = %name))]
1011    pub fn form_troop(
1012        &self,
1013        name: String,
1014        leader: FighterId,
1015        members: Vec<FighterId>,
1016        strategy: CoordinationStrategy,
1017    ) -> PunchResult<TroopId> {
1018        // Verify the leader exists.
1019        if self.fighters.get(&leader).is_none() {
1020            return Err(PunchError::Troop(format!(
1021                "leader fighter {} not found",
1022                leader
1023            )));
1024        }
1025
1026        // Verify all members exist.
1027        for member in &members {
1028            if self.fighters.get(member).is_none() {
1029                return Err(PunchError::Troop(format!(
1030                    "member fighter {} not found",
1031                    member
1032                )));
1033            }
1034        }
1035
1036        let member_count = members.len() + 1; // +1 for leader if not in list
1037        let troop_id = self
1038            .troop_manager
1039            .form_troop(name.clone(), leader, members, strategy);
1040
1041        self.event_bus.publish(PunchEvent::TroopFormed {
1042            troop_id,
1043            name,
1044            member_count,
1045        });
1046
1047        Ok(troop_id)
1048    }
1049
1050    /// Disband (dissolve) a troop.
1051    #[instrument(skip(self), fields(%troop_id))]
1052    pub fn disband_troop(&self, troop_id: &TroopId) -> PunchResult<()> {
1053        let name = self.troop_manager.disband_troop(troop_id)?;
1054
1055        self.event_bus.publish(PunchEvent::TroopDisbanded {
1056            troop_id: *troop_id,
1057            name,
1058        });
1059
1060        Ok(())
1061    }
1062
1063    /// Assign a task to a troop, returning the fighters that should handle it.
1064    pub fn assign_troop_task(
1065        &self,
1066        troop_id: &TroopId,
1067        task_description: &str,
1068    ) -> PunchResult<Vec<FighterId>> {
1069        self.troop_manager.assign_task(troop_id, task_description)
1070    }
1071
1072    /// Assign a task to a troop asynchronously, returning full results
1073    /// including responses from fighters.
1074    pub async fn assign_troop_task_async(
1075        &self,
1076        troop_id: &TroopId,
1077        task: &str,
1078    ) -> PunchResult<crate::troop::TaskAssignmentResult> {
1079        self.troop_manager.assign_task_async(troop_id, task).await
1080    }
1081
1082    /// Get the current status of a troop.
1083    pub fn get_troop_status(&self, troop_id: &TroopId) -> Option<Troop> {
1084        self.troop_manager.get_troop(troop_id)
1085    }
1086
1087    /// List all troops.
1088    pub fn list_troops(&self) -> Vec<Troop> {
1089        self.troop_manager.list_troops()
1090    }
1091
1092    /// Recruit a fighter into a troop.
1093    pub fn recruit_to_troop(&self, troop_id: &TroopId, fighter_id: FighterId) -> PunchResult<()> {
1094        // Verify the fighter exists.
1095        if self.fighters.get(&fighter_id).is_none() {
1096            return Err(PunchError::Troop(format!(
1097                "fighter {} not found",
1098                fighter_id
1099            )));
1100        }
1101        self.troop_manager.recruit(troop_id, fighter_id)
1102    }
1103
1104    /// Dismiss a fighter from a troop.
1105    pub fn dismiss_from_troop(
1106        &self,
1107        troop_id: &TroopId,
1108        fighter_id: &FighterId,
1109    ) -> PunchResult<()> {
1110        self.troop_manager.dismiss(troop_id, fighter_id)
1111    }
1112
1113    // -- Troop-aware fighter lifecycle ---------------------------------------
1114
1115    /// Kill a fighter, warning if they're in a troop.
1116    ///
1117    /// Unlike [`kill_fighter`], this checks troop membership and dismisses
1118    /// the fighter from all troops before killing them.
1119    #[instrument(skip(self), fields(%fighter_id))]
1120    pub fn kill_fighter_safe(&self, fighter_id: &FighterId) {
1121        // Dismiss from any troops first.
1122        let troop_ids = self.troop_manager.get_fighter_troops(fighter_id);
1123        for troop_id in troop_ids {
1124            if let Err(e) = self.troop_manager.dismiss(&troop_id, fighter_id) {
1125                warn!(
1126                    %troop_id,
1127                    %fighter_id,
1128                    error = %e,
1129                    "failed to dismiss fighter from troop before kill"
1130                );
1131            }
1132        }
1133        self.kill_fighter(fighter_id);
1134    }
1135
1136    // -- Workflow operations -------------------------------------------------
1137
1138    /// Register a workflow with the engine.
1139    pub fn register_workflow(&self, workflow: Workflow) -> WorkflowId {
1140        self.workflow_engine.register_workflow(workflow)
1141    }
1142
1143    /// Execute a workflow by ID with the given input.
1144    pub async fn execute_workflow(
1145        &self,
1146        workflow_id: &WorkflowId,
1147        input: String,
1148    ) -> PunchResult<WorkflowRunId> {
1149        self.workflow_engine
1150            .execute_workflow(
1151                workflow_id,
1152                input,
1153                Arc::clone(&self.memory),
1154                Arc::clone(&self.driver),
1155                &self.config.default_model,
1156            )
1157            .await
1158    }
1159
1160    // -- Shutdown ------------------------------------------------------------
1161
1162    /// Gracefully shut down the Ring, stopping all gorillas and background tasks.
1163    pub fn shutdown(&self) {
1164        info!("Ring shutdown initiated");
1165
1166        // Signal shutdown to all background tasks.
1167        let _ = self.shutdown_tx.send(true);
1168
1169        // Stop all gorilla tasks via the background executor.
1170        self.background.shutdown_all();
1171
1172        info!("Ring shutdown complete");
1173    }
1174}
1175
1176// ---------------------------------------------------------------------------
1177// Relationship tracking helper
1178// ---------------------------------------------------------------------------
1179
1180/// Truncate a string to a maximum length, respecting UTF-8 char boundaries.
1181fn truncate_preview(s: &str, max_len: usize) -> String {
1182    if s.len() <= max_len {
1183        return s.to_string();
1184    }
1185    // Find the last char boundary at or before max_len - 3 (room for "...")
1186    let end = s
1187        .char_indices()
1188        .take_while(|(i, _)| *i <= max_len.saturating_sub(3))
1189        .last()
1190        .map(|(i, c)| i + c.len_utf8())
1191        .unwrap_or(0);
1192    format!("{}...", &s[..end])
1193}
1194
1195/// Update or insert a peer relationship in a creed.
1196fn update_relationship(creed: &mut punch_types::Creed, peer_name: &str, trust_nudge: Option<f64>) {
1197    if let Some(rel) = creed
1198        .relationships
1199        .iter_mut()
1200        .find(|r| r.entity == peer_name && r.entity_type == "fighter")
1201    {
1202        rel.interaction_count += 1;
1203        if let Some(nudge) = trust_nudge {
1204            rel.trust = (rel.trust * 0.9 + nudge * 0.1).clamp(0.0, 1.0);
1205        }
1206        rel.notes = format!(
1207            "Last interaction: {}",
1208            chrono::Utc::now().format("%Y-%m-%d %H:%M UTC")
1209        );
1210    } else {
1211        creed.relationships.push(punch_types::Relationship {
1212            entity: peer_name.to_string(),
1213            entity_type: "fighter".to_string(),
1214            nature: "peer".to_string(),
1215            trust: trust_nudge.unwrap_or(0.5),
1216            interaction_count: 1,
1217            notes: format!(
1218                "First interaction: {}",
1219                chrono::Utc::now().format("%Y-%m-%d %H:%M UTC")
1220            ),
1221        });
1222    }
1223    creed.updated_at = chrono::Utc::now();
1224    creed.version += 1;
1225}
1226
1227// ---------------------------------------------------------------------------
1228// AgentCoordinator implementation
1229// ---------------------------------------------------------------------------
1230
1231#[async_trait]
1232impl AgentCoordinator for Ring {
1233    async fn spawn_fighter(&self, manifest: FighterManifest) -> PunchResult<FighterId> {
1234        Ok(Ring::spawn_fighter(self, manifest).await)
1235    }
1236
1237    async fn send_message_to_agent(
1238        &self,
1239        target: &FighterId,
1240        message: String,
1241    ) -> PunchResult<AgentMessageResult> {
1242        let result = self.send_message(target, message).await?;
1243        Ok(AgentMessageResult {
1244            response: result.response,
1245            tokens_used: result.usage.total(),
1246        })
1247    }
1248
1249    async fn find_fighter_by_name(&self, name: &str) -> PunchResult<Option<FighterId>> {
1250        let found = self.fighters.iter().find_map(|entry| {
1251            if entry.value().manifest.name.eq_ignore_ascii_case(name) {
1252                Some(*entry.key())
1253            } else {
1254                None
1255            }
1256        });
1257        Ok(found)
1258    }
1259
1260    async fn list_fighters(&self) -> PunchResult<Vec<AgentInfo>> {
1261        let fighters = self
1262            .fighters
1263            .iter()
1264            .map(|entry| AgentInfo {
1265                id: *entry.key(),
1266                name: entry.value().manifest.name.clone(),
1267                status: entry.value().status,
1268            })
1269            .collect();
1270        Ok(fighters)
1271    }
1272}
1273
1274// ---------------------------------------------------------------------------
1275// Compile-time Send + Sync assertion
1276// ---------------------------------------------------------------------------
1277
1278/// Compile-time assertion that `Ring` is `Send + Sync`.
1279const _: () = {
1280    fn _assert_send<T: Send>() {}
1281    fn _assert_sync<T: Sync>() {}
1282    fn _assert() {
1283        _assert_send::<Ring>();
1284        _assert_sync::<Ring>();
1285    }
1286};