Skip to main content

bamboo_engine/metrics/
events.rs

1//! Metrics Event System
2//!
3//! This module defines the event types used for tracking agent performance and behavior.
4//! The event system follows a unified architecture that supports both internal agent operations
5//! and HTTP proxy forwarding.
6//!
7//! # Event Categories
8//!
9//! The system organizes metrics into three distinct categories:
10//!
11//! ## Chat Events (`ChatEvent`)
12//! Events emitted by the agent's internal chat loop during conversation sessions.
13//! These track the lifecycle of sessions, rounds, and tool invocations.
14//!
15//! ## Forward Events (`ForwardEvent`)
16//! Events emitted when forwarding requests to upstream API providers (e.g., OpenAI, Anthropic).
17//! These track HTTP proxy operations for external API calls.
18//!
19//! ## System Events (`SystemEvent`)
20//! Operational events for monitoring the metrics system itself, including error tracking
21//! and worker lifecycle management.
22//!
23//! # Architecture
24//!
25//! All events share common metadata (`EventMeta`) that includes:
26//! - Unique event identifier (UUID v4)
27//! - Precise timestamp of occurrence
28//! - Optional trace ID for distributed request tracing
29//!
30//! Events are designed to be:
31//! - **Serializable**: All events implement `Serialize` and `Deserialize` for persistence
32//! - **Immutable**: Once created, events should not be modified
33//! - **Traceable**: Events can be linked via trace IDs across system boundaries
34
35use chrono::{DateTime, Utc};
36use serde::{Deserialize, Serialize};
37use uuid::Uuid;
38
39use crate::metrics::types::{ForwardStatus, RoundStatus, SessionStatus, TokenUsage};
40
41/// Metadata attached to every metrics event.
42///
43/// This structure provides common identification and timing information
44/// that is shared across all event types in the metrics system.
45/// Metadata attached to every metrics event.
46///
47/// This structure provides common identification and timing information
48/// that is shared across all event types in the metrics system.
49///
50/// # Fields
51///
52/// - `event_id`: Unique identifier for this event (UUID v4 format)
53/// - `occurred_at`: UTC timestamp when the event occurred
54/// - `trace_id`: Optional identifier for correlating related events across services
55///
56/// # Example
57///
58/// ```rust,ignore
59///
60/// // Create event metadata without trace ID
61/// let meta = EventMeta::new();
62/// assert!(!meta.event_id.is_empty());
63/// assert!(meta.trace_id.is_none());
64///
65/// // Create event metadata with trace ID for distributed tracing
66/// let traced_meta = EventMeta::with_trace_id("req-abc-123");
67/// assert_eq!(traced_meta.trace_id, Some("req-abc-123".to_string()));
68/// ```
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct EventMeta {
71    /// Unique event ID (UUID v4)
72    pub event_id: String,
73    /// When the event occurred
74    pub occurred_at: DateTime<Utc>,
75    /// Optional trace ID for correlating request chains
76    pub trace_id: Option<String>,
77}
78
79impl EventMeta {
80    /// Creates new event metadata with a fresh UUID and current timestamp.
81    ///
82    /// The trace ID is set to `None`. Use this constructor when you don't need
83    /// to correlate this event with other events across service boundaries.
84    ///
85    /// # Example
86    ///
87    /// ```rust,ignore
88    /// use bamboo_agent::agent::metrics::events::EventMeta;
89    ///
90    /// let meta = EventMeta::new();
91    /// assert!(!meta.event_id.is_empty());
92    /// assert!(meta.trace_id.is_none());
93    /// ```
94    pub fn new() -> Self {
95        Self {
96            event_id: Uuid::new_v4().to_string(),
97            occurred_at: Utc::now(),
98            trace_id: None,
99        }
100    }
101
102    /// Creates new event metadata with a trace ID for distributed request tracing.
103    ///
104    /// Use this constructor when you need to correlate this event with other
105    /// events that are part of the same logical request or operation flow.
106    ///
107    /// # Arguments
108    ///
109    /// * `trace_id` - A unique identifier for the request chain (will be converted to String)
110    ///
111    /// # Example
112    ///
113    /// ```rust,ignore
114    /// use bamboo_agent::agent::metrics::events::EventMeta;
115    ///
116    /// let meta = EventMeta::with_trace_id("req-12345");
117    /// assert_eq!(meta.trace_id, Some("req-12345".to_string()));
118    /// ```
119    pub fn with_trace_id(trace_id: impl Into<String>) -> Self {
120        Self {
121            event_id: Uuid::new_v4().to_string(),
122            occurred_at: Utc::now(),
123            trace_id: Some(trace_id.into()),
124        }
125    }
126}
127
128impl Default for EventMeta {
129    fn default() -> Self {
130        Self::new()
131    }
132}
133
134/// Unified metrics event enum encompassing all event categories.
135///
136/// This enum serves as the top-level container for all metrics events
137/// in the system. It provides a single type that can be serialized,
138/// deserialized, and processed uniformly.
139///
140/// # Variants
141///
142/// - `Chat`: Events from the internal agent chat loop
143/// - `Forward`: Events from HTTP proxy operations to upstream APIs
144/// - `System`: Operational events for system monitoring
145///
146/// # Serialization
147///
148/// All variants are JSON-serializable via serde, making them suitable
149/// for storage, transmission over networks, and logging.
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub enum MetricsEvent {
152    Chat(ChatEvent),
153    Forward(ForwardEvent),
154    System(SystemEvent),
155}
156
157// ============================================================================
158// Chat Events (Agent-internal usage)
159// ============================================================================
160
161/// Events emitted by the agent loop during chat sessions.
162///
163/// These events track the complete lifecycle of chat interactions within
164/// the agent, from session initialization to completion, including all
165/// intermediate operations like rounds and tool calls.
166///
167/// # Event Lifecycle
168///
169/// A typical chat session produces events in this order:
170/// 1. `SessionStarted` - When a new conversation begins
171/// 2. `RoundStarted` - When the agent begins processing a message
172/// 3. `ToolCalled` (multiple) - For each tool invocation during the round
173/// 4. `RoundCompleted` - When the agent finishes processing the message
174/// 5. `MessageCountUpdated` - When the message count changes
175/// 6. `SessionCompleted` - When the conversation ends
176///
177/// # Usage
178///
179/// These events are primarily used for:
180/// - Performance monitoring (latency tracking)
181/// - Resource usage analysis (token consumption)
182/// - Behavior analytics (tool usage patterns)
183/// - Error tracking and debugging
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub enum ChatEvent {
186    /// Emitted when a new chat session is initialized.
187    ///
188    /// This marks the beginning of a conversation between the user and agent.
189    SessionStarted {
190        /// Event metadata including unique ID and timestamp
191        meta: EventMeta,
192        /// Unique identifier for this chat session
193        session_id: String,
194        /// The AI model being used for this session (e.g., "gpt-4", "claude-3")
195        model: String,
196    },
197
198    /// Emitted when a chat session completes or terminates.
199    ///
200    /// This marks the end of a conversation, whether successful or due to an error.
201    SessionCompleted {
202        /// Event metadata including unique ID and timestamp
203        meta: EventMeta,
204        /// Unique identifier for the chat session that completed
205        session_id: String,
206        /// Final status of the session (completed, failed, or cancelled)
207        status: SessionStatus,
208    },
209
210    /// Emitted when the agent starts processing a new message round.
211    ///
212    /// A round represents a single request-response cycle within a session.
213    /// Each user message typically triggers one round.
214    RoundStarted {
215        /// Event metadata including unique ID and timestamp
216        meta: EventMeta,
217        /// Unique identifier for this round
218        round_id: String,
219        /// Session this round belongs to
220        session_id: String,
221        /// The AI model being used for this round
222        model: String,
223    },
224
225    /// Emitted when the agent completes processing a message round.
226    ///
227    /// Contains comprehensive metrics about the round including token usage,
228    /// latency, and any errors that occurred.
229    RoundCompleted {
230        /// Event metadata including unique ID and timestamp
231        meta: EventMeta,
232        /// Unique identifier for the round that completed
233        round_id: String,
234        /// Session this round belongs to
235        session_id: String,
236        /// Final status of the round (success or failed)
237        status: RoundStatus,
238        /// Token consumption during this round
239        usage: TokenUsage,
240        /// Total time to process the round in milliseconds
241        latency_ms: u64,
242        /// Error message if the round failed, None on success
243        error: Option<String>,
244    },
245
246    /// Emitted when the agent invokes a tool during a round.
247    ///
248    /// Tracks tool execution for understanding agent behavior and
249    /// measuring tool performance.
250    ToolCalled {
251        /// Event metadata including unique ID and timestamp
252        meta: EventMeta,
253        /// Unique identifier for this tool invocation
254        tool_call_id: String,
255        /// Round this tool call belongs to
256        round_id: String,
257        /// Session this tool call belongs to
258        session_id: String,
259        /// Name of the tool being invoked (e.g., "read_file", "execute_command")
260        tool_name: String,
261        /// Time taken to execute the tool in milliseconds
262        latency_ms: u64,
263        /// Whether the tool execution succeeded
264        success: bool,
265    },
266
267    /// Emitted when the message count for a session is updated.
268    ///
269    /// This tracks the total number of messages exchanged in the session,
270    /// including both user and assistant messages.
271    MessageCountUpdated {
272        /// Event metadata including unique ID and timestamp
273        meta: EventMeta,
274        /// Session whose message count was updated
275        session_id: String,
276        /// New total message count for the session
277        message_count: u32,
278    },
279}
280
281// ============================================================================
282// Forward Events (HTTP proxy)
283// ============================================================================
284
285/// Events emitted when forwarding requests to upstream APIs.
286///
287/// These events track HTTP proxy operations when the system forwards
288/// requests to external API providers like OpenAI, Anthropic, or other
289/// LLM services. They enable monitoring of API usage, costs, and performance.
290///
291/// # Event Lifecycle
292///
293/// A typical forward operation produces events in this order:
294/// 1. `RequestStarted` - When a request is initiated to the upstream API
295/// 2. `RequestCompleted` - When the response is received (or an error occurs)
296///
297/// # Use Cases
298///
299/// These events support:
300/// - API cost tracking via token consumption
301/// - Performance monitoring (latency, success rates)
302/// - Error rate analysis per endpoint
303/// - Load balancing and capacity planning
304/// - Compliance and audit logging
305#[derive(Debug, Clone, Serialize, Deserialize)]
306pub enum ForwardEvent {
307    /// Emitted when a request is initiated to an upstream API.
308    ///
309    /// This marks the beginning of a forwarded HTTP request to an
310    /// external service provider.
311    RequestStarted {
312        /// Event metadata including unique ID and timestamp
313        meta: EventMeta,
314        /// Unique identifier for this forwarded request
315        request_id: String,
316        /// Endpoint identifier (e.g., "openai.chat_completions" or "anthropic.messages")
317        endpoint: String,
318        /// The AI model being requested (e.g., "gpt-4", "claude-3-opus")
319        model: String,
320        /// Whether this is a streaming request (SSE) or regular request
321        is_stream: bool,
322    },
323
324    /// Emitted when a forwarded request completes or fails.
325    ///
326    /// Contains comprehensive information about the request outcome,
327    /// including HTTP status, token usage, latency, and any errors.
328    RequestCompleted {
329        /// Event metadata including unique ID and timestamp
330        meta: EventMeta,
331        /// Unique identifier for the request that completed
332        request_id: String,
333        /// HTTP status code returned by the upstream API
334        status_code: u16,
335        /// Status classification (success, error, or timeout)
336        status: ForwardStatus,
337        /// Token usage if available from the API response
338        usage: Option<TokenUsage>,
339        /// Total time for the request in milliseconds
340        latency_ms: u64,
341        /// Error message if the request failed, None on success
342        error: Option<String>,
343    },
344}
345
346// ============================================================================
347// System Events
348// ============================================================================
349
350/// System-level events for operational metrics and monitoring.
351///
352/// These events track the health and operation of the metrics system itself,
353/// providing visibility into system behavior, errors, and resource management.
354/// They are primarily used for operational monitoring and debugging.
355///
356/// # Use Cases
357///
358/// - Monitoring system health and stability
359/// - Tracking metrics collection reliability
360/// - Identifying storage or processing issues
361/// - Understanding worker lifecycle
362#[derive(Debug, Clone, Serialize, Deserialize)]
363pub enum SystemEvent {
364    /// Emitted when metrics events are dropped due to errors or resource constraints.
365    ///
366    /// This typically indicates system stress or configuration issues
367    /// that should be investigated.
368    ///
369    /// # Fields
370    ///
371    /// - `count`: Number of events that were dropped
372    /// - `reason`: Human-readable explanation of why events were dropped
373    MetricsDropped {
374        /// Number of events that were dropped
375        count: u64,
376        /// Explanation of why the events were dropped
377        reason: String,
378    },
379
380    /// Emitted when a storage operation fails.
381    ///
382    /// Tracks errors in persisting metrics to the storage backend,
383    /// helping identify database issues or data quality problems.
384    ///
385    /// # Fields
386    ///
387    /// - `error`: Description of the storage error
388    /// - `event_type`: Type of event that failed to be stored
389    StorageError {
390        /// Description of the storage error that occurred
391        error: String,
392        /// Type of metrics event that failed to be stored
393        event_type: String,
394    },
395
396    /// Emitted when a metrics processing worker starts.
397    ///
398    /// Indicates that a new worker thread/task has begun processing
399    /// metrics events from the event queue.
400    WorkerStarted,
401
402    /// Emitted when a metrics processing worker stops.
403    ///
404    /// Indicates that a worker has shut down, either gracefully or
405    /// due to an error. Used to track worker lifecycle and system capacity.
406    WorkerStopped,
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    #[test]
414    fn test_event_meta_new() {
415        let meta = EventMeta::new();
416        assert!(!meta.event_id.is_empty());
417        assert!(meta.trace_id.is_none());
418    }
419
420    #[test]
421    fn test_event_meta_with_trace_id() {
422        let meta = EventMeta::with_trace_id("trace-123");
423        assert!(!meta.event_id.is_empty());
424        assert_eq!(meta.trace_id, Some("trace-123".to_string()));
425    }
426
427    #[test]
428    fn test_chat_event_serialization() {
429        let event = MetricsEvent::Chat(ChatEvent::SessionStarted {
430            meta: EventMeta::new(),
431            session_id: "session-123".to_string(),
432            model: "gpt-4".to_string(),
433        });
434
435        let json = serde_json::to_string(&event).expect("serialize");
436        let deserialized: MetricsEvent = serde_json::from_str(&json).expect("deserialize");
437
438        match deserialized {
439            MetricsEvent::Chat(ChatEvent::SessionStarted {
440                session_id, model, ..
441            }) => {
442                assert_eq!(session_id, "session-123");
443                assert_eq!(model, "gpt-4");
444            }
445            _ => panic!("Expected SessionStarted event"),
446        }
447    }
448
449    #[test]
450    fn test_forward_event_serialization() {
451        let event = MetricsEvent::Forward(ForwardEvent::RequestStarted {
452            meta: EventMeta::new(),
453            request_id: "req-456".to_string(),
454            endpoint: "openai.chat_completions".to_string(),
455            model: "gpt-5-mini".to_string(),
456            is_stream: true,
457        });
458
459        let json = serde_json::to_string(&event).expect("serialize");
460        let deserialized: MetricsEvent = serde_json::from_str(&json).expect("deserialize");
461
462        match deserialized {
463            MetricsEvent::Forward(ForwardEvent::RequestStarted {
464                request_id,
465                endpoint,
466                model,
467                is_stream,
468                ..
469            }) => {
470                assert_eq!(request_id, "req-456");
471                assert_eq!(endpoint, "openai.chat_completions");
472                assert_eq!(model, "gpt-5-mini");
473                assert!(is_stream);
474            }
475            _ => panic!("Expected RequestStarted event"),
476        }
477    }
478}