Skip to main content

tailtriage_core/
events.rs

1use serde::{Deserialize, Serialize};
2
3use crate::CaptureMode;
4
5/// Current schema version for `Run` JSON artifacts.
6pub const SCHEMA_VERSION: u64 = 1;
7
8/// Logical request outcome categories used by the public API.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum Outcome {
11    /// Request completed successfully.
12    Ok,
13    /// Request completed with an error.
14    Error,
15    /// Request exceeded a timeout threshold.
16    Timeout,
17    /// Request was cancelled before completion.
18    Cancelled,
19    /// Request was rejected before normal execution.
20    Rejected,
21    /// Caller-provided custom outcome label.
22    Other(String),
23}
24
25impl Outcome {
26    /// Returns the canonical string label for this outcome.
27    #[must_use]
28    pub fn as_str(&self) -> &str {
29        match self {
30            Self::Ok => "ok",
31            Self::Error => "error",
32            Self::Timeout => "timeout",
33            Self::Cancelled => "cancelled",
34            Self::Rejected => "rejected",
35            Self::Other(value) => value.as_str(),
36        }
37    }
38
39    /// Converts this outcome into an owned string label.
40    #[must_use]
41    pub fn into_string(self) -> String {
42        match self {
43            Self::Ok => "ok".to_string(),
44            Self::Error => "error".to_string(),
45            Self::Timeout => "timeout".to_string(),
46            Self::Cancelled => "cancelled".to_string(),
47            Self::Rejected => "rejected".to_string(),
48            Self::Other(value) => value,
49        }
50    }
51}
52
53/// A full output artifact for one tailtriage capture run.
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
55pub struct Run {
56    /// Run artifact schema version.
57    pub schema_version: u64,
58    /// Metadata for the capture session.
59    pub metadata: RunMetadata,
60    /// Request timing events.
61    pub requests: Vec<RequestEvent>,
62    /// Stage timing events.
63    pub stages: Vec<StageEvent>,
64    /// Queue wait timing events.
65    pub queues: Vec<QueueEvent>,
66    /// In-flight gauge changes over time.
67    pub inflight: Vec<InFlightSnapshot>,
68    /// Tokio runtime metrics snapshots.
69    pub runtime_snapshots: Vec<RuntimeSnapshot>,
70    /// Capture truncation summary for bounded collection.
71    #[serde(default)]
72    pub truncation: TruncationSummary,
73}
74
75impl Run {
76    /// Creates an empty run with the provided metadata.
77    #[must_use]
78    pub fn new(metadata: RunMetadata) -> Self {
79        Self {
80            schema_version: SCHEMA_VERSION,
81            metadata,
82            requests: Vec::new(),
83            stages: Vec::new(),
84            queues: Vec::new(),
85            inflight: Vec::new(),
86            runtime_snapshots: Vec::new(),
87            truncation: TruncationSummary::default(),
88        }
89    }
90}
91
92/// Per-section counters indicating dropped samples due to capture limits.
93#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
94pub struct TruncationSummary {
95    /// Number of request events dropped after `max_requests` was reached.
96    pub dropped_requests: u64,
97    /// Number of stage events dropped after `max_stages` was reached.
98    pub dropped_stages: u64,
99    /// Number of queue events dropped after `max_queues` was reached.
100    pub dropped_queues: u64,
101    /// Number of in-flight snapshots dropped after `max_inflight_snapshots` was reached.
102    pub dropped_inflight_snapshots: u64,
103    /// Number of runtime snapshots dropped after `max_runtime_snapshots` was reached.
104    pub dropped_runtime_snapshots: u64,
105}
106
107impl TruncationSummary {
108    /// Returns true when any capture section was truncated.
109    #[must_use]
110    pub const fn is_truncated(&self) -> bool {
111        self.dropped_requests > 0
112            || self.dropped_stages > 0
113            || self.dropped_queues > 0
114            || self.dropped_inflight_snapshots > 0
115            || self.dropped_runtime_snapshots > 0
116    }
117}
118
119/// Top-level metadata for one capture run.
120#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
121pub struct RunMetadata {
122    /// A unique identifier for the run.
123    pub run_id: String,
124    /// Service/application name.
125    pub service_name: String,
126    /// Optional service version.
127    pub service_version: Option<String>,
128    /// Timestamp (milliseconds since epoch UTC) when collection started.
129    pub started_at_unix_ms: u64,
130    /// Timestamp (milliseconds since epoch UTC) when collection ended.
131    pub finished_at_unix_ms: u64,
132    /// Capture mode, such as "light" or "investigation".
133    pub mode: CaptureMode,
134    /// Hostname if available.
135    pub host: Option<String>,
136    /// Process identifier if available.
137    pub pid: Option<u32>,
138    /// Lifecycle warnings generated during shutdown validation.
139    #[serde(default)]
140    pub lifecycle_warnings: Vec<String>,
141    /// Incomplete request summary captured at shutdown.
142    #[serde(default)]
143    pub unfinished_requests: UnfinishedRequests,
144}
145
146/// Summary of unfinished requests detected at shutdown.
147#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
148pub struct UnfinishedRequests {
149    /// Count of requests still pending when shutdown ran.
150    pub count: u64,
151    /// Small sample of unfinished requests for debugging.
152    pub sample: Vec<UnfinishedRequestSample>,
153}
154
155/// One unfinished request sample captured for lifecycle warnings.
156#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
157pub struct UnfinishedRequestSample {
158    /// Correlation ID for the unfinished request.
159    pub request_id: String,
160    /// Route or operation name associated with the unfinished request.
161    pub route: String,
162}
163
164/// Per-request timing and status.
165#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
166pub struct RequestEvent {
167    /// Correlation ID for the request.
168    pub request_id: String,
169    /// Route name, operation, or endpoint.
170    pub route: String,
171    /// Semantic request kind.
172    pub kind: Option<String>,
173    /// Request start timestamp (milliseconds since epoch UTC).
174    pub started_at_unix_ms: u64,
175    /// Request completion timestamp (milliseconds since epoch UTC).
176    pub finished_at_unix_ms: u64,
177    /// Total request latency in microseconds.
178    pub latency_us: u64,
179    /// Logical outcome such as "ok", "error", or "timeout".
180    pub outcome: String,
181}
182
183/// Timing record for one named stage.
184#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
185pub struct StageEvent {
186    /// Parent request ID.
187    pub request_id: String,
188    /// Stage identifier.
189    pub stage: String,
190    /// Stage start timestamp (milliseconds since epoch UTC).
191    pub started_at_unix_ms: u64,
192    /// Stage completion timestamp (milliseconds since epoch UTC).
193    pub finished_at_unix_ms: u64,
194    /// Stage latency in microseconds.
195    pub latency_us: u64,
196    /// Whether the stage completed successfully (`Result::is_ok()` for
197    /// `StageTimer::await_on`, always `true` for `StageTimer::await_value`).
198    pub success: bool,
199}
200
201/// Queue wait measurement for a request waiting on a queue/permit.
202#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
203pub struct QueueEvent {
204    /// Parent request ID.
205    pub request_id: String,
206    /// Queue identifier.
207    pub queue: String,
208    /// Queue wait start timestamp (milliseconds since epoch UTC).
209    pub waited_from_unix_ms: u64,
210    /// Queue wait end timestamp (milliseconds since epoch UTC).
211    pub waited_until_unix_ms: u64,
212    /// Total wait time in microseconds.
213    pub wait_us: u64,
214    /// Queue depth sample captured at wait start, if known.
215    pub depth_at_start: Option<u64>,
216}
217
218/// Point-in-time in-flight gauge reading.
219#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
220pub struct InFlightSnapshot {
221    /// Gauge name.
222    pub gauge: String,
223    /// Timestamp (milliseconds since epoch UTC).
224    pub at_unix_ms: u64,
225    /// Number of in-flight units.
226    pub count: u64,
227}
228
229/// Point-in-time runtime metrics sample.
230#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
231pub struct RuntimeSnapshot {
232    /// Timestamp (milliseconds since epoch UTC).
233    pub at_unix_ms: u64,
234    /// Number of alive tasks.
235    pub alive_tasks: Option<u64>,
236    /// Runtime global queue depth.
237    pub global_queue_depth: Option<u64>,
238    /// Aggregated runtime local queue depth across worker threads.
239    pub local_queue_depth: Option<u64>,
240    /// Runtime blocking pool queue depth.
241    pub blocking_queue_depth: Option<u64>,
242    /// Runtime remote schedule count.
243    pub remote_schedule_count: Option<u64>,
244}