Skip to main content

runmat_telemetry/
lib.rs

1use serde::Serialize;
2
3use runmat_accelerate_api::{ApiDeviceInfo, ProviderTelemetry};
4
5pub const EVENT_RUNTIME_STARTED: &str = "runtime.run.started";
6pub const EVENT_RUNTIME_FINISHED: &str = "runtime.run.finished";
7
8#[derive(Debug, Clone, Copy)]
9pub enum TelemetryEventKind {
10    RuntimeStarted,
11    RuntimeFinished,
12}
13
14impl TelemetryEventKind {
15    pub fn label(&self) -> &'static str {
16        match self {
17            TelemetryEventKind::RuntimeStarted => EVENT_RUNTIME_STARTED,
18            TelemetryEventKind::RuntimeFinished => EVENT_RUNTIME_FINISHED,
19        }
20    }
21}
22
23#[derive(Debug, Clone, Serialize)]
24#[serde(rename_all = "snake_case")]
25pub enum TelemetryRunKind {
26    Script,
27    Repl,
28    Benchmark,
29    Install,
30}
31
32impl TelemetryRunKind {
33    pub fn as_str(&self) -> &'static str {
34        match self {
35            TelemetryRunKind::Script => "script",
36            TelemetryRunKind::Repl => "repl",
37            TelemetryRunKind::Benchmark => "benchmark",
38            TelemetryRunKind::Install => "install",
39        }
40    }
41}
42
43#[derive(Debug, Clone, Serialize)]
44pub struct ProviderSnapshot {
45    pub device: ApiDeviceInfo,
46    pub telemetry: ProviderTelemetry,
47}
48
49impl ProviderSnapshot {
50    pub fn gpu_wall_ns(&self) -> u64 {
51        self.telemetry.fused_elementwise.total_wall_time_ns
52            + self.telemetry.fused_reduction.total_wall_time_ns
53            + self.telemetry.matmul.total_wall_time_ns
54    }
55
56    pub fn gpu_dispatches(&self) -> u64 {
57        self.telemetry.fused_elementwise.count
58            + self.telemetry.fused_reduction.count
59            + self.telemetry.matmul.count
60    }
61}
62
63#[derive(Debug, Clone, Serialize)]
64pub struct RuntimeTelemetryEnvelope<P: Serialize> {
65    #[serde(rename = "event_label")]
66    pub event_label: &'static str,
67    pub uuid: String,
68    #[serde(skip_serializing_if = "Option::is_none")]
69    pub cid: Option<String>,
70    pub session_id: String,
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub os: Option<String>,
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub arch: Option<String>,
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub release: Option<String>,
77    pub run_kind: String,
78    pub payload: P,
79}
80
81#[derive(Debug, Clone, Serialize)]
82pub struct RuntimeStartedPayload {
83    pub jit_enabled: bool,
84    pub accelerate_enabled: bool,
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub timestamp_ms: Option<u64>,
87}
88
89pub type RuntimeStartedEnvelope = RuntimeTelemetryEnvelope<RuntimeStartedPayload>;
90
91#[derive(Debug, Clone, Serialize)]
92pub struct RuntimeExecutionCounters {
93    pub total_executions: u64,
94    pub jit_compiled: u64,
95    pub interpreter_fallback: u64,
96}
97
98#[derive(Debug, Clone, Serialize)]
99pub struct RuntimeFinishedPayload {
100    #[serde(skip_serializing_if = "Option::is_none")]
101    pub duration_us: Option<u64>,
102    pub success: bool,
103    pub jit_enabled: bool,
104    pub jit_used: bool,
105    pub accelerate_enabled: bool,
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub timestamp_ms: Option<u64>,
108    #[serde(skip_serializing_if = "Option::is_none")]
109    pub error: Option<String>,
110    #[serde(
111        rename = "runtime.failure.stage",
112        skip_serializing_if = "Option::is_none"
113    )]
114    pub runtime_failure_stage: Option<String>,
115    #[serde(
116        rename = "runtime.failure.code",
117        skip_serializing_if = "Option::is_none"
118    )]
119    pub runtime_failure_code: Option<String>,
120    #[serde(
121        rename = "runtime.failure.has_span",
122        skip_serializing_if = "Option::is_none"
123    )]
124    pub runtime_failure_has_span: Option<bool>,
125    #[serde(
126        rename = "runtime.failure.host",
127        skip_serializing_if = "Option::is_none"
128    )]
129    pub runtime_failure_host: Option<String>,
130    #[serde(
131        rename = "runtime.failure.component",
132        skip_serializing_if = "Option::is_none"
133    )]
134    pub runtime_failure_component: Option<String>,
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub counters: Option<RuntimeExecutionCounters>,
137    #[serde(skip_serializing_if = "Option::is_none")]
138    pub provider: Option<ProviderSnapshot>,
139    #[serde(skip_serializing_if = "Option::is_none")]
140    pub gpu_wall_ns: Option<u64>,
141    #[serde(skip_serializing_if = "Option::is_none")]
142    pub gpu_ratio: Option<f64>,
143    #[serde(skip_serializing_if = "Option::is_none")]
144    pub gpu_dispatches: Option<u64>,
145    #[serde(skip_serializing_if = "Option::is_none")]
146    pub gpu_upload_bytes: Option<u64>,
147    #[serde(skip_serializing_if = "Option::is_none")]
148    pub gpu_download_bytes: Option<u64>,
149    #[serde(skip_serializing_if = "Option::is_none")]
150    pub fusion_cache_hits: Option<u64>,
151    #[serde(skip_serializing_if = "Option::is_none")]
152    pub fusion_cache_misses: Option<u64>,
153    #[serde(skip_serializing_if = "Option::is_none")]
154    pub fusion_hit_ratio: Option<f64>,
155}
156
157pub type RuntimeFinishedEnvelope = RuntimeTelemetryEnvelope<RuntimeFinishedPayload>;
158
159pub fn serialize_envelope<P: Serialize>(envelope: &RuntimeTelemetryEnvelope<P>) -> Option<String> {
160    serde_json::to_string(envelope).ok()
161}