Skip to main content

codetether_agent/telemetry/
swarm.rs

1//! Multi-agent swarm telemetry.
2//!
3//! Tracks the lifecycle of one swarm operation: start time, progress, and
4//! final outcome. Emits structured tracing at each transition.
5
6use chrono::{DateTime, Utc};
7use tokio::sync::Mutex;
8
9use super::metrics::TelemetryMetrics;
10
11/// Live collector for a running swarm. Methods are async because the
12/// underlying fields use [`tokio::sync::Mutex`] — swarm events are rare,
13/// so lock contention is not a concern.
14#[derive(Debug, Default)]
15pub struct SwarmTelemetryCollector {
16    task_id: Mutex<Option<String>>,
17    agent_count: Mutex<usize>,
18    completed: Mutex<usize>,
19    total: Mutex<usize>,
20    start_time: Mutex<Option<DateTime<Utc>>>,
21}
22
23impl SwarmTelemetryCollector {
24    /// Construct an empty collector.
25    pub fn new() -> Self {
26        Self::default()
27    }
28
29    /// Record swarm start. `_strategy` is logged downstream.
30    pub async fn start_swarm(&self, task_id: &str, agent_count: usize, _strategy: &str) {
31        *self.task_id.lock().await = Some(task_id.to_string());
32        *self.agent_count.lock().await = agent_count;
33        *self.start_time.lock().await = Some(Utc::now());
34        tracing::info!(task_id, agent_count, "Swarm started");
35    }
36
37    /// Update `(completed, total)` progress counters.
38    pub async fn record_progress(&self, completed: usize, total: usize) {
39        *self.completed.lock().await = completed;
40        *self.total.lock().await = total;
41    }
42
43    /// Log a named latency sample (e.g. per-stage timing).
44    pub async fn record_swarm_latency(&self, label: &str, duration: std::time::Duration) {
45        tracing::debug!(
46            label,
47            duration_ms = duration.as_millis(),
48            "Swarm latency recorded"
49        );
50    }
51
52    /// Finalize the swarm and produce a [`TelemetryMetrics`] summary.
53    pub async fn complete_swarm(&self, success: bool) -> TelemetryMetrics {
54        let duration = self
55            .start_time
56            .lock()
57            .await
58            .map(|s| (Utc::now() - s).num_milliseconds() as u64)
59            .unwrap_or(0);
60
61        let completed = *self.completed.lock().await;
62        let total = *self.total.lock().await;
63
64        tracing::info!(
65            success,
66            completed,
67            total,
68            duration_ms = duration,
69            "Swarm completed"
70        );
71
72        TelemetryMetrics {
73            tool_invocations: total as u64,
74            successful_operations: if success { completed as u64 } else { 0 },
75            failed_operations: if !success {
76                (total.saturating_sub(completed)) as u64
77            } else {
78                0
79            },
80            total_tokens: 0,
81            avg_latency_ms: duration as f64,
82        }
83    }
84}