tailtriage_core/events.rs
1use serde::{Deserialize, Serialize};
2
3use crate::CaptureMode;
4
5/// Current schema version for `Run` JSON artifacts.
6pub const SCHEMA_VERSION: u64 = 1;
7
8/// Logical request outcome categories used by the public API.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum Outcome {
11 /// Request completed successfully.
12 Ok,
13 /// Request completed with an error.
14 Error,
15 /// Request exceeded a timeout threshold.
16 Timeout,
17 /// Request was cancelled before completion.
18 Cancelled,
19 /// Request was rejected before normal execution.
20 Rejected,
21 /// Caller-provided custom outcome label.
22 Other(String),
23}
24
25impl Outcome {
26 /// Returns the canonical string label for this outcome.
27 #[must_use]
28 pub fn as_str(&self) -> &str {
29 match self {
30 Self::Ok => "ok",
31 Self::Error => "error",
32 Self::Timeout => "timeout",
33 Self::Cancelled => "cancelled",
34 Self::Rejected => "rejected",
35 Self::Other(value) => value.as_str(),
36 }
37 }
38
39 /// Converts this outcome into an owned string label.
40 #[must_use]
41 pub fn into_string(self) -> String {
42 match self {
43 Self::Ok => "ok".to_string(),
44 Self::Error => "error".to_string(),
45 Self::Timeout => "timeout".to_string(),
46 Self::Cancelled => "cancelled".to_string(),
47 Self::Rejected => "rejected".to_string(),
48 Self::Other(value) => value,
49 }
50 }
51}
52
53/// A full output artifact for one tailtriage capture run.
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
55pub struct Run {
56 /// Run artifact schema version.
57 pub schema_version: u64,
58 /// Metadata for the capture session.
59 pub metadata: RunMetadata,
60 /// Request timing events.
61 pub requests: Vec<RequestEvent>,
62 /// Stage timing events.
63 pub stages: Vec<StageEvent>,
64 /// Queue wait timing events.
65 pub queues: Vec<QueueEvent>,
66 /// In-flight gauge changes over time.
67 pub inflight: Vec<InFlightSnapshot>,
68 /// Tokio runtime metrics snapshots.
69 pub runtime_snapshots: Vec<RuntimeSnapshot>,
70 /// Capture truncation summary for bounded collection.
71 #[serde(default)]
72 pub truncation: TruncationSummary,
73}
74
75impl Run {
76 /// Creates an empty run with the provided metadata.
77 #[must_use]
78 pub fn new(metadata: RunMetadata) -> Self {
79 Self {
80 schema_version: SCHEMA_VERSION,
81 metadata,
82 requests: Vec::new(),
83 stages: Vec::new(),
84 queues: Vec::new(),
85 inflight: Vec::new(),
86 runtime_snapshots: Vec::new(),
87 truncation: TruncationSummary::default(),
88 }
89 }
90}
91
92/// Per-section counters indicating dropped samples due to capture limits.
93#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
94pub struct TruncationSummary {
95 /// Number of request events dropped after `max_requests` was reached.
96 pub dropped_requests: u64,
97 /// Number of stage events dropped after `max_stages` was reached.
98 pub dropped_stages: u64,
99 /// Number of queue events dropped after `max_queues` was reached.
100 pub dropped_queues: u64,
101 /// Number of in-flight snapshots dropped after `max_inflight_snapshots` was reached.
102 pub dropped_inflight_snapshots: u64,
103 /// Number of runtime snapshots dropped after `max_runtime_snapshots` was reached.
104 pub dropped_runtime_snapshots: u64,
105}
106
107impl TruncationSummary {
108 /// Returns true when any capture section was truncated.
109 #[must_use]
110 pub const fn is_truncated(&self) -> bool {
111 self.dropped_requests > 0
112 || self.dropped_stages > 0
113 || self.dropped_queues > 0
114 || self.dropped_inflight_snapshots > 0
115 || self.dropped_runtime_snapshots > 0
116 }
117}
118
119/// Top-level metadata for one capture run.
120#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
121pub struct RunMetadata {
122 /// A unique identifier for the run.
123 pub run_id: String,
124 /// Service/application name.
125 pub service_name: String,
126 /// Optional service version.
127 pub service_version: Option<String>,
128 /// Timestamp (milliseconds since epoch UTC) when collection started.
129 pub started_at_unix_ms: u64,
130 /// Timestamp (milliseconds since epoch UTC) when collection ended.
131 pub finished_at_unix_ms: u64,
132 /// Capture mode, such as "light" or "investigation".
133 pub mode: CaptureMode,
134 /// Hostname if available.
135 pub host: Option<String>,
136 /// Process identifier if available.
137 pub pid: Option<u32>,
138 /// Lifecycle warnings generated during shutdown validation.
139 #[serde(default)]
140 pub lifecycle_warnings: Vec<String>,
141 /// Incomplete request summary captured at shutdown.
142 #[serde(default)]
143 pub unfinished_requests: UnfinishedRequests,
144}
145
146/// Summary of unfinished requests detected at shutdown.
147#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
148pub struct UnfinishedRequests {
149 /// Count of requests still pending when shutdown ran.
150 pub count: u64,
151 /// Small sample of unfinished requests for debugging.
152 pub sample: Vec<UnfinishedRequestSample>,
153}
154
155/// One unfinished request sample captured for lifecycle warnings.
156#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
157pub struct UnfinishedRequestSample {
158 /// Correlation ID for the unfinished request.
159 pub request_id: String,
160 /// Route or operation name associated with the unfinished request.
161 pub route: String,
162}
163
164/// Per-request timing and status.
165#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
166pub struct RequestEvent {
167 /// Correlation ID for the request.
168 pub request_id: String,
169 /// Route name, operation, or endpoint.
170 pub route: String,
171 /// Semantic request kind.
172 pub kind: Option<String>,
173 /// Request start timestamp (milliseconds since epoch UTC).
174 pub started_at_unix_ms: u64,
175 /// Request completion timestamp (milliseconds since epoch UTC).
176 pub finished_at_unix_ms: u64,
177 /// Total request latency in microseconds.
178 pub latency_us: u64,
179 /// Logical outcome such as "ok", "error", or "timeout".
180 pub outcome: String,
181}
182
183/// Timing record for one named stage.
184#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
185pub struct StageEvent {
186 /// Parent request ID.
187 pub request_id: String,
188 /// Stage identifier.
189 pub stage: String,
190 /// Stage start timestamp (milliseconds since epoch UTC).
191 pub started_at_unix_ms: u64,
192 /// Stage completion timestamp (milliseconds since epoch UTC).
193 pub finished_at_unix_ms: u64,
194 /// Stage latency in microseconds.
195 pub latency_us: u64,
196 /// Whether the stage completed successfully (`Result::is_ok()` for
197 /// `StageTimer::await_on`, always `true` for `StageTimer::await_value`).
198 pub success: bool,
199}
200
201/// Queue wait measurement for a request waiting on a queue/permit.
202#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
203pub struct QueueEvent {
204 /// Parent request ID.
205 pub request_id: String,
206 /// Queue identifier.
207 pub queue: String,
208 /// Queue wait start timestamp (milliseconds since epoch UTC).
209 pub waited_from_unix_ms: u64,
210 /// Queue wait end timestamp (milliseconds since epoch UTC).
211 pub waited_until_unix_ms: u64,
212 /// Total wait time in microseconds.
213 pub wait_us: u64,
214 /// Queue depth sample captured at wait start, if known.
215 pub depth_at_start: Option<u64>,
216}
217
218/// Point-in-time in-flight gauge reading.
219#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
220pub struct InFlightSnapshot {
221 /// Gauge name.
222 pub gauge: String,
223 /// Timestamp (milliseconds since epoch UTC).
224 pub at_unix_ms: u64,
225 /// Number of in-flight units.
226 pub count: u64,
227}
228
229/// Point-in-time runtime metrics sample.
230#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
231pub struct RuntimeSnapshot {
232 /// Timestamp (milliseconds since epoch UTC).
233 pub at_unix_ms: u64,
234 /// Number of alive tasks.
235 pub alive_tasks: Option<u64>,
236 /// Runtime global queue depth.
237 pub global_queue_depth: Option<u64>,
238 /// Aggregated runtime local queue depth across worker threads.
239 pub local_queue_depth: Option<u64>,
240 /// Runtime blocking pool queue depth.
241 pub blocking_queue_depth: Option<u64>,
242 /// Runtime remote schedule count.
243 pub remote_schedule_count: Option<u64>,
244}