bamboo_engine/metrics/events.rs
1//! Metrics Event System
2//!
3//! This module defines the event types used for tracking agent performance and behavior.
4//! The event system follows a unified architecture that supports both internal agent operations
5//! and HTTP proxy forwarding.
6//!
7//! # Event Categories
8//!
9//! The system organizes metrics into three distinct categories:
10//!
11//! ## Chat Events (`ChatEvent`)
12//! Events emitted by the agent's internal chat loop during conversation sessions.
13//! These track the lifecycle of sessions, rounds, and tool invocations.
14//!
15//! ## Forward Events (`ForwardEvent`)
16//! Events emitted when forwarding requests to upstream API providers (e.g., OpenAI, Anthropic).
17//! These track HTTP proxy operations for external API calls.
18//!
19//! ## System Events (`SystemEvent`)
20//! Operational events for monitoring the metrics system itself, including error tracking
21//! and worker lifecycle management.
22//!
23//! # Architecture
24//!
25//! All events share common metadata (`EventMeta`) that includes:
26//! - Unique event identifier (UUID v4)
27//! - Precise timestamp of occurrence
28//! - Optional trace ID for distributed request tracing
29//!
30//! Events are designed to be:
31//! - **Serializable**: All events implement `Serialize` and `Deserialize` for persistence
32//! - **Immutable**: Once created, events should not be modified
33//! - **Traceable**: Events can be linked via trace IDs across system boundaries
34
35use chrono::{DateTime, Utc};
36use serde::{Deserialize, Serialize};
37use uuid::Uuid;
38
39use crate::metrics::types::{ForwardStatus, RoundStatus, SessionStatus, TokenUsage};
40
41/// Metadata attached to every metrics event.
42///
43/// This structure provides common identification and timing information
44/// that is shared across all event types in the metrics system.
45/// Metadata attached to every metrics event.
46///
47/// This structure provides common identification and timing information
48/// that is shared across all event types in the metrics system.
49///
50/// # Fields
51///
52/// - `event_id`: Unique identifier for this event (UUID v4 format)
53/// - `occurred_at`: UTC timestamp when the event occurred
54/// - `trace_id`: Optional identifier for correlating related events across services
55///
56/// # Example
57///
58/// ```rust,ignore
59///
60/// // Create event metadata without trace ID
61/// let meta = EventMeta::new();
62/// assert!(!meta.event_id.is_empty());
63/// assert!(meta.trace_id.is_none());
64///
65/// // Create event metadata with trace ID for distributed tracing
66/// let traced_meta = EventMeta::with_trace_id("req-abc-123");
67/// assert_eq!(traced_meta.trace_id, Some("req-abc-123".to_string()));
68/// ```
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct EventMeta {
71 /// Unique event ID (UUID v4)
72 pub event_id: String,
73 /// When the event occurred
74 pub occurred_at: DateTime<Utc>,
75 /// Optional trace ID for correlating request chains
76 pub trace_id: Option<String>,
77}
78
79impl EventMeta {
80 /// Creates new event metadata with a fresh UUID and current timestamp.
81 ///
82 /// The trace ID is set to `None`. Use this constructor when you don't need
83 /// to correlate this event with other events across service boundaries.
84 ///
85 /// # Example
86 ///
87 /// ```rust,ignore
88 /// use bamboo_agent::agent::metrics::events::EventMeta;
89 ///
90 /// let meta = EventMeta::new();
91 /// assert!(!meta.event_id.is_empty());
92 /// assert!(meta.trace_id.is_none());
93 /// ```
94 pub fn new() -> Self {
95 Self {
96 event_id: Uuid::new_v4().to_string(),
97 occurred_at: Utc::now(),
98 trace_id: None,
99 }
100 }
101
102 /// Creates new event metadata with a trace ID for distributed request tracing.
103 ///
104 /// Use this constructor when you need to correlate this event with other
105 /// events that are part of the same logical request or operation flow.
106 ///
107 /// # Arguments
108 ///
109 /// * `trace_id` - A unique identifier for the request chain (will be converted to String)
110 ///
111 /// # Example
112 ///
113 /// ```rust,ignore
114 /// use bamboo_agent::agent::metrics::events::EventMeta;
115 ///
116 /// let meta = EventMeta::with_trace_id("req-12345");
117 /// assert_eq!(meta.trace_id, Some("req-12345".to_string()));
118 /// ```
119 pub fn with_trace_id(trace_id: impl Into<String>) -> Self {
120 Self {
121 event_id: Uuid::new_v4().to_string(),
122 occurred_at: Utc::now(),
123 trace_id: Some(trace_id.into()),
124 }
125 }
126}
127
128impl Default for EventMeta {
129 fn default() -> Self {
130 Self::new()
131 }
132}
133
134/// Unified metrics event enum encompassing all event categories.
135///
136/// This enum serves as the top-level container for all metrics events
137/// in the system. It provides a single type that can be serialized,
138/// deserialized, and processed uniformly.
139///
140/// # Variants
141///
142/// - `Chat`: Events from the internal agent chat loop
143/// - `Forward`: Events from HTTP proxy operations to upstream APIs
144/// - `System`: Operational events for system monitoring
145///
146/// # Serialization
147///
148/// All variants are JSON-serializable via serde, making them suitable
149/// for storage, transmission over networks, and logging.
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub enum MetricsEvent {
152 Chat(ChatEvent),
153 Forward(ForwardEvent),
154 System(SystemEvent),
155}
156
157// ============================================================================
158// Chat Events (Agent-internal usage)
159// ============================================================================
160
161/// Events emitted by the agent loop during chat sessions.
162///
163/// These events track the complete lifecycle of chat interactions within
164/// the agent, from session initialization to completion, including all
165/// intermediate operations like rounds and tool calls.
166///
167/// # Event Lifecycle
168///
169/// A typical chat session produces events in this order:
170/// 1. `SessionStarted` - When a new conversation begins
171/// 2. `RoundStarted` - When the agent begins processing a message
172/// 3. `ToolCalled` (multiple) - For each tool invocation during the round
173/// 4. `RoundCompleted` - When the agent finishes processing the message
174/// 5. `MessageCountUpdated` - When the message count changes
175/// 6. `SessionCompleted` - When the conversation ends
176///
177/// # Usage
178///
179/// These events are primarily used for:
180/// - Performance monitoring (latency tracking)
181/// - Resource usage analysis (token consumption)
182/// - Behavior analytics (tool usage patterns)
183/// - Error tracking and debugging
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub enum ChatEvent {
186 /// Emitted when a new chat session is initialized.
187 ///
188 /// This marks the beginning of a conversation between the user and agent.
189 SessionStarted {
190 /// Event metadata including unique ID and timestamp
191 meta: EventMeta,
192 /// Unique identifier for this chat session
193 session_id: String,
194 /// The AI model being used for this session (e.g., "gpt-4", "claude-3")
195 model: String,
196 },
197
198 /// Emitted when a chat session completes or terminates.
199 ///
200 /// This marks the end of a conversation, whether successful or due to an error.
201 SessionCompleted {
202 /// Event metadata including unique ID and timestamp
203 meta: EventMeta,
204 /// Unique identifier for the chat session that completed
205 session_id: String,
206 /// Final status of the session (completed, failed, or cancelled)
207 status: SessionStatus,
208 },
209
210 /// Emitted when the agent starts processing a new message round.
211 ///
212 /// A round represents a single request-response cycle within a session.
213 /// Each user message typically triggers one round.
214 RoundStarted {
215 /// Event metadata including unique ID and timestamp
216 meta: EventMeta,
217 /// Unique identifier for this round
218 round_id: String,
219 /// Session this round belongs to
220 session_id: String,
221 /// The AI model being used for this round
222 model: String,
223 },
224
225 /// Emitted when the agent completes processing a message round.
226 ///
227 /// Contains comprehensive metrics about the round including token usage,
228 /// latency, and any errors that occurred.
229 RoundCompleted {
230 /// Event metadata including unique ID and timestamp
231 meta: EventMeta,
232 /// Unique identifier for the round that completed
233 round_id: String,
234 /// Session this round belongs to
235 session_id: String,
236 /// Final status of the round (success or failed)
237 status: RoundStatus,
238 /// Token consumption during this round
239 usage: TokenUsage,
240 /// Total time to process the round in milliseconds
241 latency_ms: u64,
242 /// Error message if the round failed, None on success
243 error: Option<String>,
244 },
245
246 /// Emitted when the agent invokes a tool during a round.
247 ///
248 /// Tracks tool execution for understanding agent behavior and
249 /// measuring tool performance.
250 ToolCalled {
251 /// Event metadata including unique ID and timestamp
252 meta: EventMeta,
253 /// Unique identifier for this tool invocation
254 tool_call_id: String,
255 /// Round this tool call belongs to
256 round_id: String,
257 /// Session this tool call belongs to
258 session_id: String,
259 /// Name of the tool being invoked (e.g., "read_file", "execute_command")
260 tool_name: String,
261 /// Time taken to execute the tool in milliseconds
262 latency_ms: u64,
263 /// Whether the tool execution succeeded
264 success: bool,
265 },
266
267 /// Emitted when the message count for a session is updated.
268 ///
269 /// This tracks the total number of messages exchanged in the session,
270 /// including both user and assistant messages.
271 MessageCountUpdated {
272 /// Event metadata including unique ID and timestamp
273 meta: EventMeta,
274 /// Session whose message count was updated
275 session_id: String,
276 /// New total message count for the session
277 message_count: u32,
278 },
279}
280
281// ============================================================================
282// Forward Events (HTTP proxy)
283// ============================================================================
284
285/// Events emitted when forwarding requests to upstream APIs.
286///
287/// These events track HTTP proxy operations when the system forwards
288/// requests to external API providers like OpenAI, Anthropic, or other
289/// LLM services. They enable monitoring of API usage, costs, and performance.
290///
291/// # Event Lifecycle
292///
293/// A typical forward operation produces events in this order:
294/// 1. `RequestStarted` - When a request is initiated to the upstream API
295/// 2. `RequestCompleted` - When the response is received (or an error occurs)
296///
297/// # Use Cases
298///
299/// These events support:
300/// - API cost tracking via token consumption
301/// - Performance monitoring (latency, success rates)
302/// - Error rate analysis per endpoint
303/// - Load balancing and capacity planning
304/// - Compliance and audit logging
305#[derive(Debug, Clone, Serialize, Deserialize)]
306pub enum ForwardEvent {
307 /// Emitted when a request is initiated to an upstream API.
308 ///
309 /// This marks the beginning of a forwarded HTTP request to an
310 /// external service provider.
311 RequestStarted {
312 /// Event metadata including unique ID and timestamp
313 meta: EventMeta,
314 /// Unique identifier for this forwarded request
315 request_id: String,
316 /// Endpoint identifier (e.g., "openai.chat_completions" or "anthropic.messages")
317 endpoint: String,
318 /// The AI model being requested (e.g., "gpt-4", "claude-3-opus")
319 model: String,
320 /// Whether this is a streaming request (SSE) or regular request
321 is_stream: bool,
322 },
323
324 /// Emitted when a forwarded request completes or fails.
325 ///
326 /// Contains comprehensive information about the request outcome,
327 /// including HTTP status, token usage, latency, and any errors.
328 RequestCompleted {
329 /// Event metadata including unique ID and timestamp
330 meta: EventMeta,
331 /// Unique identifier for the request that completed
332 request_id: String,
333 /// HTTP status code returned by the upstream API
334 status_code: u16,
335 /// Status classification (success, error, or timeout)
336 status: ForwardStatus,
337 /// Token usage if available from the API response
338 usage: Option<TokenUsage>,
339 /// Total time for the request in milliseconds
340 latency_ms: u64,
341 /// Error message if the request failed, None on success
342 error: Option<String>,
343 },
344}
345
346// ============================================================================
347// System Events
348// ============================================================================
349
350/// System-level events for operational metrics and monitoring.
351///
352/// These events track the health and operation of the metrics system itself,
353/// providing visibility into system behavior, errors, and resource management.
354/// They are primarily used for operational monitoring and debugging.
355///
356/// # Use Cases
357///
358/// - Monitoring system health and stability
359/// - Tracking metrics collection reliability
360/// - Identifying storage or processing issues
361/// - Understanding worker lifecycle
362#[derive(Debug, Clone, Serialize, Deserialize)]
363pub enum SystemEvent {
364 /// Emitted when metrics events are dropped due to errors or resource constraints.
365 ///
366 /// This typically indicates system stress or configuration issues
367 /// that should be investigated.
368 ///
369 /// # Fields
370 ///
371 /// - `count`: Number of events that were dropped
372 /// - `reason`: Human-readable explanation of why events were dropped
373 MetricsDropped {
374 /// Number of events that were dropped
375 count: u64,
376 /// Explanation of why the events were dropped
377 reason: String,
378 },
379
380 /// Emitted when a storage operation fails.
381 ///
382 /// Tracks errors in persisting metrics to the storage backend,
383 /// helping identify database issues or data quality problems.
384 ///
385 /// # Fields
386 ///
387 /// - `error`: Description of the storage error
388 /// - `event_type`: Type of event that failed to be stored
389 StorageError {
390 /// Description of the storage error that occurred
391 error: String,
392 /// Type of metrics event that failed to be stored
393 event_type: String,
394 },
395
396 /// Emitted when a metrics processing worker starts.
397 ///
398 /// Indicates that a new worker thread/task has begun processing
399 /// metrics events from the event queue.
400 WorkerStarted,
401
402 /// Emitted when a metrics processing worker stops.
403 ///
404 /// Indicates that a worker has shut down, either gracefully or
405 /// due to an error. Used to track worker lifecycle and system capacity.
406 WorkerStopped,
407}
408
409#[cfg(test)]
410mod tests {
411 use super::*;
412
413 #[test]
414 fn test_event_meta_new() {
415 let meta = EventMeta::new();
416 assert!(!meta.event_id.is_empty());
417 assert!(meta.trace_id.is_none());
418 }
419
420 #[test]
421 fn test_event_meta_with_trace_id() {
422 let meta = EventMeta::with_trace_id("trace-123");
423 assert!(!meta.event_id.is_empty());
424 assert_eq!(meta.trace_id, Some("trace-123".to_string()));
425 }
426
427 #[test]
428 fn test_chat_event_serialization() {
429 let event = MetricsEvent::Chat(ChatEvent::SessionStarted {
430 meta: EventMeta::new(),
431 session_id: "session-123".to_string(),
432 model: "gpt-4".to_string(),
433 });
434
435 let json = serde_json::to_string(&event).expect("serialize");
436 let deserialized: MetricsEvent = serde_json::from_str(&json).expect("deserialize");
437
438 match deserialized {
439 MetricsEvent::Chat(ChatEvent::SessionStarted {
440 session_id, model, ..
441 }) => {
442 assert_eq!(session_id, "session-123");
443 assert_eq!(model, "gpt-4");
444 }
445 _ => panic!("Expected SessionStarted event"),
446 }
447 }
448
449 #[test]
450 fn test_forward_event_serialization() {
451 let event = MetricsEvent::Forward(ForwardEvent::RequestStarted {
452 meta: EventMeta::new(),
453 request_id: "req-456".to_string(),
454 endpoint: "openai.chat_completions".to_string(),
455 model: "gpt-5-mini".to_string(),
456 is_stream: true,
457 });
458
459 let json = serde_json::to_string(&event).expect("serialize");
460 let deserialized: MetricsEvent = serde_json::from_str(&json).expect("deserialize");
461
462 match deserialized {
463 MetricsEvent::Forward(ForwardEvent::RequestStarted {
464 request_id,
465 endpoint,
466 model,
467 is_stream,
468 ..
469 }) => {
470 assert_eq!(request_id, "req-456");
471 assert_eq!(endpoint, "openai.chat_completions");
472 assert_eq!(model, "gpt-5-mini");
473 assert!(is_stream);
474 }
475 _ => panic!("Expected RequestStarted event"),
476 }
477 }
478}