Skip to main content

raknet_rust/
telemetry.rs

1use std::collections::BTreeMap;
2use std::fmt::Write as _;
3
4use crate::server::RaknetServerEvent;
5use crate::transport::TransportMetricsSnapshot;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum TelemetryMetricKind {
9    Counter,
10    Gauge,
11}
12
13impl TelemetryMetricKind {
14    fn as_prometheus_type(self) -> &'static str {
15        match self {
16            Self::Counter => "counter",
17            Self::Gauge => "gauge",
18        }
19    }
20}
21
22#[derive(Debug, Clone, PartialEq)]
23pub struct TelemetryRecord {
24    pub name: String,
25    pub help: &'static str,
26    pub kind: TelemetryMetricKind,
27    pub value: f64,
28    pub labels: Vec<(String, String)>,
29}
30
31#[derive(Debug, Clone, Copy, Default)]
32pub struct ShardTelemetrySnapshot {
33    pub snapshot: TransportMetricsSnapshot,
34    pub dropped_non_critical_events: u64,
35}
36
37#[derive(Debug, Clone, Copy, Default)]
38pub struct AggregatedTelemetrySnapshot {
39    pub snapshot: TransportMetricsSnapshot,
40    pub dropped_non_critical_events: u64,
41}
42
43#[derive(Debug, Clone, Copy)]
44struct MetricFamilySpec<'a> {
45    prefix: &'a str,
46    name: &'a str,
47    help: &'static str,
48    kind: TelemetryMetricKind,
49    total_value: f64,
50}
51
52#[derive(Debug, Clone, Default)]
53pub struct TelemetryRegistry {
54    shards: BTreeMap<usize, ShardTelemetrySnapshot>,
55}
56
57impl TelemetryRegistry {
58    pub fn new() -> Self {
59        Self::default()
60    }
61
62    pub fn clear(&mut self) {
63        self.shards.clear();
64    }
65
66    pub fn is_empty(&self) -> bool {
67        self.shards.is_empty()
68    }
69
70    pub fn shard_count(&self) -> usize {
71        self.shards.len()
72    }
73
74    pub fn shard_snapshot(&self, shard_id: usize) -> Option<&ShardTelemetrySnapshot> {
75        self.shards.get(&shard_id)
76    }
77
78    pub fn iter_shards(&self) -> impl Iterator<Item = (usize, &ShardTelemetrySnapshot)> {
79        self.shards.iter().map(|(id, snapshot)| (*id, snapshot))
80    }
81
82    pub fn ingest_snapshot(
83        &mut self,
84        shard_id: usize,
85        snapshot: TransportMetricsSnapshot,
86        dropped_non_critical_events: u64,
87    ) {
88        self.shards.insert(
89            shard_id,
90            ShardTelemetrySnapshot {
91                snapshot,
92                dropped_non_critical_events,
93            },
94        );
95    }
96
97    pub fn ingest_server_event(&mut self, event: &RaknetServerEvent) -> bool {
98        let Some((shard_id, snapshot, dropped_non_critical_events)) = event.metrics_snapshot()
99        else {
100            return false;
101        };
102        self.ingest_snapshot(shard_id, *snapshot, dropped_non_critical_events);
103        true
104    }
105
106    pub fn aggregate(&self) -> AggregatedTelemetrySnapshot {
107        let mut total = TransportMetricsSnapshot::default();
108        let mut dropped_non_critical_events = 0u64;
109
110        let mut weighted_srtt_sum = 0.0;
111        let mut weighted_rttvar_sum = 0.0;
112        let mut weighted_resend_rto_sum = 0.0;
113        let mut weighted_cwnd_sum = 0.0;
114
115        for shard in self.shards.values() {
116            let s = shard.snapshot;
117            dropped_non_critical_events =
118                dropped_non_critical_events.saturating_add(shard.dropped_non_critical_events);
119
120            total.session_count = total.session_count.saturating_add(s.session_count);
121            total.sessions_started_total = total
122                .sessions_started_total
123                .saturating_add(s.sessions_started_total);
124            total.sessions_closed_total = total
125                .sessions_closed_total
126                .saturating_add(s.sessions_closed_total);
127            total.packets_forwarded_total = total
128                .packets_forwarded_total
129                .saturating_add(s.packets_forwarded_total);
130            total.bytes_forwarded_total = total
131                .bytes_forwarded_total
132                .saturating_add(s.bytes_forwarded_total);
133            total.pending_outgoing_frames = total
134                .pending_outgoing_frames
135                .saturating_add(s.pending_outgoing_frames);
136            total.pending_outgoing_bytes = total
137                .pending_outgoing_bytes
138                .saturating_add(s.pending_outgoing_bytes);
139            total.pending_unhandled_frames = total
140                .pending_unhandled_frames
141                .saturating_add(s.pending_unhandled_frames);
142            total.pending_unhandled_bytes = total
143                .pending_unhandled_bytes
144                .saturating_add(s.pending_unhandled_bytes);
145            total.ingress_datagrams = total.ingress_datagrams.saturating_add(s.ingress_datagrams);
146            total.ingress_frames = total.ingress_frames.saturating_add(s.ingress_frames);
147            total.duplicate_reliable_drops = total
148                .duplicate_reliable_drops
149                .saturating_add(s.duplicate_reliable_drops);
150            total.ordered_stale_drops = total
151                .ordered_stale_drops
152                .saturating_add(s.ordered_stale_drops);
153            total.ordered_buffer_full_drops = total
154                .ordered_buffer_full_drops
155                .saturating_add(s.ordered_buffer_full_drops);
156            total.sequenced_stale_drops = total
157                .sequenced_stale_drops
158                .saturating_add(s.sequenced_stale_drops);
159            total.sequenced_missing_index_drops = total
160                .sequenced_missing_index_drops
161                .saturating_add(s.sequenced_missing_index_drops);
162            total.reliable_sent_datagrams = total
163                .reliable_sent_datagrams
164                .saturating_add(s.reliable_sent_datagrams);
165            total.resent_datagrams = total.resent_datagrams.saturating_add(s.resent_datagrams);
166            total.ack_out_total = total.ack_out_total.saturating_add(s.ack_out_total);
167            total.nack_out_total = total.nack_out_total.saturating_add(s.nack_out_total);
168            total.acked_datagrams = total.acked_datagrams.saturating_add(s.acked_datagrams);
169            total.nacked_datagrams = total.nacked_datagrams.saturating_add(s.nacked_datagrams);
170            total.split_ttl_drops = total.split_ttl_drops.saturating_add(s.split_ttl_drops);
171            total.outgoing_queue_drops = total
172                .outgoing_queue_drops
173                .saturating_add(s.outgoing_queue_drops);
174            total.outgoing_queue_defers = total
175                .outgoing_queue_defers
176                .saturating_add(s.outgoing_queue_defers);
177            total.outgoing_queue_disconnects = total
178                .outgoing_queue_disconnects
179                .saturating_add(s.outgoing_queue_disconnects);
180            total.backpressure_delays = total
181                .backpressure_delays
182                .saturating_add(s.backpressure_delays);
183            total.backpressure_drops = total
184                .backpressure_drops
185                .saturating_add(s.backpressure_drops);
186            total.backpressure_disconnects = total
187                .backpressure_disconnects
188                .saturating_add(s.backpressure_disconnects);
189            total.local_requested_disconnects = total
190                .local_requested_disconnects
191                .saturating_add(s.local_requested_disconnects);
192            total.remote_disconnect_notifications = total
193                .remote_disconnect_notifications
194                .saturating_add(s.remote_disconnect_notifications);
195            total.remote_detect_lost_disconnects = total
196                .remote_detect_lost_disconnects
197                .saturating_add(s.remote_detect_lost_disconnects);
198            total.illegal_state_transitions = total
199                .illegal_state_transitions
200                .saturating_add(s.illegal_state_transitions);
201            total.timed_out_sessions = total
202                .timed_out_sessions
203                .saturating_add(s.timed_out_sessions);
204            total.keepalive_pings_sent = total
205                .keepalive_pings_sent
206                .saturating_add(s.keepalive_pings_sent);
207            total.unhandled_frames_queued = total
208                .unhandled_frames_queued
209                .saturating_add(s.unhandled_frames_queued);
210            total.unhandled_frames_flushed = total
211                .unhandled_frames_flushed
212                .saturating_add(s.unhandled_frames_flushed);
213            total.unhandled_frames_dropped = total
214                .unhandled_frames_dropped
215                .saturating_add(s.unhandled_frames_dropped);
216            total.rate_global_limit_hits = total
217                .rate_global_limit_hits
218                .saturating_add(s.rate_global_limit_hits);
219            total.rate_ip_block_hits = total
220                .rate_ip_block_hits
221                .saturating_add(s.rate_ip_block_hits);
222            total.rate_ip_block_hits_rate_exceeded = total
223                .rate_ip_block_hits_rate_exceeded
224                .saturating_add(s.rate_ip_block_hits_rate_exceeded);
225            total.rate_ip_block_hits_manual = total
226                .rate_ip_block_hits_manual
227                .saturating_add(s.rate_ip_block_hits_manual);
228            total.rate_ip_block_hits_handshake_heuristic = total
229                .rate_ip_block_hits_handshake_heuristic
230                .saturating_add(s.rate_ip_block_hits_handshake_heuristic);
231            total.rate_ip_block_hits_cookie_mismatch_guard = total
232                .rate_ip_block_hits_cookie_mismatch_guard
233                .saturating_add(s.rate_ip_block_hits_cookie_mismatch_guard);
234            total.rate_addresses_blocked = total
235                .rate_addresses_blocked
236                .saturating_add(s.rate_addresses_blocked);
237            total.rate_addresses_blocked_rate_exceeded = total
238                .rate_addresses_blocked_rate_exceeded
239                .saturating_add(s.rate_addresses_blocked_rate_exceeded);
240            total.rate_addresses_blocked_manual = total
241                .rate_addresses_blocked_manual
242                .saturating_add(s.rate_addresses_blocked_manual);
243            total.rate_addresses_blocked_handshake_heuristic = total
244                .rate_addresses_blocked_handshake_heuristic
245                .saturating_add(s.rate_addresses_blocked_handshake_heuristic);
246            total.rate_addresses_blocked_cookie_mismatch_guard = total
247                .rate_addresses_blocked_cookie_mismatch_guard
248                .saturating_add(s.rate_addresses_blocked_cookie_mismatch_guard);
249            total.rate_addresses_unblocked = total
250                .rate_addresses_unblocked
251                .saturating_add(s.rate_addresses_unblocked);
252            total.rate_blocked_addresses = total
253                .rate_blocked_addresses
254                .saturating_add(s.rate_blocked_addresses);
255            total.rate_exception_addresses = total
256                .rate_exception_addresses
257                .saturating_add(s.rate_exception_addresses);
258            total.processing_budget_drops_total = total
259                .processing_budget_drops_total
260                .saturating_add(s.processing_budget_drops_total);
261            total.processing_budget_drops_ip_exhausted_total = total
262                .processing_budget_drops_ip_exhausted_total
263                .saturating_add(s.processing_budget_drops_ip_exhausted_total);
264            total.processing_budget_drops_global_exhausted_total = total
265                .processing_budget_drops_global_exhausted_total
266                .saturating_add(s.processing_budget_drops_global_exhausted_total);
267            total.processing_budget_consumed_units_total = total
268                .processing_budget_consumed_units_total
269                .saturating_add(s.processing_budget_consumed_units_total);
270            total.processing_budget_active_ip_buckets = total
271                .processing_budget_active_ip_buckets
272                .saturating_add(s.processing_budget_active_ip_buckets);
273            total.cookie_rotations = total.cookie_rotations.saturating_add(s.cookie_rotations);
274            total.cookie_mismatch_drops = total
275                .cookie_mismatch_drops
276                .saturating_add(s.cookie_mismatch_drops);
277            total.cookie_mismatch_blocks = total
278                .cookie_mismatch_blocks
279                .saturating_add(s.cookie_mismatch_blocks);
280            total.handshake_stage_cancel_drops = total
281                .handshake_stage_cancel_drops
282                .saturating_add(s.handshake_stage_cancel_drops);
283            total.handshake_req1_req2_timeouts = total
284                .handshake_req1_req2_timeouts
285                .saturating_add(s.handshake_req1_req2_timeouts);
286            total.handshake_reply2_connect_timeouts = total
287                .handshake_reply2_connect_timeouts
288                .saturating_add(s.handshake_reply2_connect_timeouts);
289            total.handshake_missing_req1_drops = total
290                .handshake_missing_req1_drops
291                .saturating_add(s.handshake_missing_req1_drops);
292            total.handshake_auto_blocks = total
293                .handshake_auto_blocks
294                .saturating_add(s.handshake_auto_blocks);
295            total.handshake_already_connected_rejects = total
296                .handshake_already_connected_rejects
297                .saturating_add(s.handshake_already_connected_rejects);
298            total.handshake_ip_recently_connected_rejects = total
299                .handshake_ip_recently_connected_rejects
300                .saturating_add(s.handshake_ip_recently_connected_rejects);
301            total.request2_server_addr_mismatch_drops = total
302                .request2_server_addr_mismatch_drops
303                .saturating_add(s.request2_server_addr_mismatch_drops);
304            total.request2_legacy_parse_hits = total
305                .request2_legacy_parse_hits
306                .saturating_add(s.request2_legacy_parse_hits);
307            total.request2_legacy_drops = total
308                .request2_legacy_drops
309                .saturating_add(s.request2_legacy_drops);
310            total.request2_ambiguous_parse_hits = total
311                .request2_ambiguous_parse_hits
312                .saturating_add(s.request2_ambiguous_parse_hits);
313            total.request2_ambiguous_drops = total
314                .request2_ambiguous_drops
315                .saturating_add(s.request2_ambiguous_drops);
316            total.proxy_inbound_reroutes = total
317                .proxy_inbound_reroutes
318                .saturating_add(s.proxy_inbound_reroutes);
319            total.proxy_inbound_drops = total
320                .proxy_inbound_drops
321                .saturating_add(s.proxy_inbound_drops);
322            total.proxy_outbound_reroutes = total
323                .proxy_outbound_reroutes
324                .saturating_add(s.proxy_outbound_reroutes);
325            total.proxy_outbound_drops = total
326                .proxy_outbound_drops
327                .saturating_add(s.proxy_outbound_drops);
328
329            let weight = s.session_count as f64;
330            weighted_srtt_sum += s.avg_srtt_ms * weight;
331            weighted_rttvar_sum += s.avg_rttvar_ms * weight;
332            weighted_resend_rto_sum += s.avg_resend_rto_ms * weight;
333            weighted_cwnd_sum += s.avg_congestion_window_packets * weight;
334        }
335
336        if total.session_count > 0 {
337            let weight_sum = total.session_count as f64;
338            total.avg_srtt_ms = weighted_srtt_sum / weight_sum;
339            total.avg_rttvar_ms = weighted_rttvar_sum / weight_sum;
340            total.avg_resend_rto_ms = weighted_resend_rto_sum / weight_sum;
341            total.avg_congestion_window_packets = weighted_cwnd_sum / weight_sum;
342        }
343
344        total.resend_ratio = if total.reliable_sent_datagrams == 0 {
345            0.0
346        } else {
347            total.resent_datagrams as f64 / total.reliable_sent_datagrams as f64
348        };
349
350        AggregatedTelemetrySnapshot {
351            snapshot: total,
352            dropped_non_critical_events,
353        }
354    }
355
356    pub fn to_records(&self) -> Vec<TelemetryRecord> {
357        self.to_records_with_prefix("raknet")
358    }
359
360    pub fn to_records_with_prefix(&self, prefix: &str) -> Vec<TelemetryRecord> {
361        let mut records = Vec::new();
362        let aggregated = self.aggregate();
363
364        macro_rules! push_snapshot_counter {
365            ($name:literal, $help:literal, $field:ident) => {
366                self.push_metric_records(
367                    &mut records,
368                    MetricFamilySpec {
369                        prefix,
370                        name: $name,
371                        help: $help,
372                        kind: TelemetryMetricKind::Counter,
373                        total_value: aggregated.snapshot.$field as f64,
374                    },
375                    |shard| shard.snapshot.$field as f64,
376                );
377            };
378        }
379
380        macro_rules! push_snapshot_gauge {
381            ($name:literal, $help:literal, $field:ident) => {
382                self.push_metric_records(
383                    &mut records,
384                    MetricFamilySpec {
385                        prefix,
386                        name: $name,
387                        help: $help,
388                        kind: TelemetryMetricKind::Gauge,
389                        total_value: aggregated.snapshot.$field as f64,
390                    },
391                    |shard| shard.snapshot.$field as f64,
392                );
393            };
394        }
395
396        macro_rules! push_snapshot_gauge_f64 {
397            ($name:literal, $help:literal, $field:ident) => {
398                self.push_metric_records(
399                    &mut records,
400                    MetricFamilySpec {
401                        prefix,
402                        name: $name,
403                        help: $help,
404                        kind: TelemetryMetricKind::Gauge,
405                        total_value: aggregated.snapshot.$field,
406                    },
407                    |shard| shard.snapshot.$field,
408                );
409            };
410        }
411
412        // P2.3 canonical metric dictionary.
413        push_snapshot_gauge!("sessions_active", "Active RakNet sessions", session_count);
414        push_snapshot_counter!(
415            "sessions_started_total",
416            "Total sessions that reached connected state",
417            sessions_started_total
418        );
419        push_snapshot_counter!(
420            "sessions_closed_total",
421            "Total connected sessions closed",
422            sessions_closed_total
423        );
424        push_snapshot_counter!(
425            "packets_forwarded_total",
426            "Total app frames forwarded to upper layer",
427            packets_forwarded_total
428        );
429        push_snapshot_counter!(
430            "bytes_forwarded_total",
431            "Total app payload bytes forwarded to upper layer",
432            bytes_forwarded_total
433        );
434        push_snapshot_counter!(
435            "ack_out_total",
436            "Total outbound ACK datagrams",
437            ack_out_total
438        );
439        push_snapshot_counter!(
440            "nack_out_total",
441            "Total outbound NACK datagrams",
442            nack_out_total
443        );
444        push_snapshot_counter!(
445            "resend_total",
446            "Total datagrams resent after loss/timeout",
447            resent_datagrams
448        );
449        push_snapshot_gauge!(
450            "rtt_srtt_ms",
451            "Average smoothed RTT in milliseconds",
452            avg_srtt_ms
453        );
454        push_snapshot_gauge!(
455            "rtt_rttvar_ms",
456            "Average RTT variance in milliseconds",
457            avg_rttvar_ms
458        );
459        push_snapshot_gauge!(
460            "rto_ms",
461            "Average resend RTO in milliseconds",
462            avg_resend_rto_ms
463        );
464        push_snapshot_gauge!(
465            "cwnd_packets",
466            "Average congestion window (datagram packets)",
467            avg_congestion_window_packets
468        );
469        push_snapshot_counter!(
470            "duplicate_drop_total",
471            "Dropped duplicate reliable frames",
472            duplicate_reliable_drops
473        );
474        push_snapshot_counter!(
475            "split_ttl_drop_total",
476            "Dropped split compounds due to TTL expiry",
477            split_ttl_drops
478        );
479
480        // Legacy names kept for backward compatibility.
481        push_snapshot_gauge!("session_count", "Active RakNet sessions", session_count);
482        push_snapshot_gauge!(
483            "pending_outgoing_frames",
484            "Queued outgoing frames before datagram packaging",
485            pending_outgoing_frames
486        );
487        push_snapshot_gauge!(
488            "pending_outgoing_bytes",
489            "Queued outgoing bytes before datagram packaging",
490            pending_outgoing_bytes
491        );
492        push_snapshot_gauge!(
493            "pending_unhandled_frames",
494            "Unhandled app frames waiting for connected state",
495            pending_unhandled_frames
496        );
497        push_snapshot_gauge!(
498            "pending_unhandled_bytes",
499            "Unhandled app frame bytes waiting for connected state",
500            pending_unhandled_bytes
501        );
502
503        push_snapshot_counter!(
504            "ingress_datagrams_total",
505            "Total datagrams received",
506            ingress_datagrams
507        );
508        push_snapshot_counter!(
509            "ingress_frames_total",
510            "Total frames received",
511            ingress_frames
512        );
513        push_snapshot_counter!(
514            "duplicate_reliable_drops_total",
515            "Dropped duplicate reliable frames",
516            duplicate_reliable_drops
517        );
518        push_snapshot_counter!(
519            "ordered_stale_drops_total",
520            "Dropped stale ordered frames",
521            ordered_stale_drops
522        );
523        push_snapshot_counter!(
524            "ordered_buffer_full_drops_total",
525            "Dropped ordered frames due to reorder buffer overflow",
526            ordered_buffer_full_drops
527        );
528        push_snapshot_counter!(
529            "sequenced_stale_drops_total",
530            "Dropped stale sequenced frames",
531            sequenced_stale_drops
532        );
533        push_snapshot_counter!(
534            "sequenced_missing_index_drops_total",
535            "Dropped sequenced frames missing sequence index",
536            sequenced_missing_index_drops
537        );
538        push_snapshot_counter!(
539            "reliable_sent_datagrams_total",
540            "Total reliable datagrams sent",
541            reliable_sent_datagrams
542        );
543        push_snapshot_counter!(
544            "resent_datagrams_total",
545            "Total datagrams resent after loss/timeout",
546            resent_datagrams
547        );
548        push_snapshot_counter!(
549            "acked_datagrams_total",
550            "Total datagrams acknowledged",
551            acked_datagrams
552        );
553        push_snapshot_counter!(
554            "nacked_datagrams_total",
555            "Total datagrams negatively acknowledged",
556            nacked_datagrams
557        );
558        push_snapshot_counter!(
559            "split_ttl_drops_total",
560            "Dropped split compounds due to TTL expiry",
561            split_ttl_drops
562        );
563        push_snapshot_counter!(
564            "outgoing_queue_drops_total",
565            "Dropped payloads due to outgoing queue soft pressure",
566            outgoing_queue_drops
567        );
568        push_snapshot_counter!(
569            "outgoing_queue_defers_total",
570            "Deferred payloads due to outgoing queue soft pressure",
571            outgoing_queue_defers
572        );
573        push_snapshot_counter!(
574            "outgoing_queue_disconnects_total",
575            "Disconnects triggered by outgoing queue hard pressure",
576            outgoing_queue_disconnects
577        );
578        push_snapshot_counter!(
579            "backpressure_delay_total",
580            "Backpressure delay actions (deferred packets)",
581            backpressure_delays
582        );
583        push_snapshot_counter!(
584            "backpressure_drop_total",
585            "Backpressure shed actions (dropped packets)",
586            backpressure_drops
587        );
588        push_snapshot_counter!(
589            "backpressure_disconnect_total",
590            "Backpressure disconnect actions",
591            backpressure_disconnects
592        );
593        push_snapshot_counter!(
594            "local_requested_disconnects_total",
595            "Disconnects explicitly requested by local control path",
596            local_requested_disconnects
597        );
598        push_snapshot_counter!(
599            "remote_disconnect_notifications_total",
600            "Remote disconnect notifications received",
601            remote_disconnect_notifications
602        );
603        push_snapshot_counter!(
604            "remote_detect_lost_disconnects_total",
605            "Remote detect-lost disconnect signals received",
606            remote_detect_lost_disconnects
607        );
608        push_snapshot_counter!(
609            "illegal_state_transitions_total",
610            "Illegal session state transitions detected",
611            illegal_state_transitions
612        );
613        push_snapshot_counter!(
614            "timed_out_sessions_total",
615            "Sessions closed due to idle timeout",
616            timed_out_sessions
617        );
618        push_snapshot_counter!(
619            "keepalive_pings_sent_total",
620            "Connected keepalive pings sent",
621            keepalive_pings_sent
622        );
623        push_snapshot_counter!(
624            "unhandled_frames_queued_total",
625            "Unhandled app frames queued before connected state",
626            unhandled_frames_queued
627        );
628        push_snapshot_counter!(
629            "unhandled_frames_flushed_total",
630            "Unhandled app frames flushed after connection",
631            unhandled_frames_flushed
632        );
633        push_snapshot_counter!(
634            "unhandled_frames_dropped_total",
635            "Unhandled app frames dropped due to pipeline overflow",
636            unhandled_frames_dropped
637        );
638        push_snapshot_counter!(
639            "rate_global_limit_hits_total",
640            "Global rate limit hits",
641            rate_global_limit_hits
642        );
643        push_snapshot_counter!(
644            "rate_ip_block_hits_total",
645            "Per-IP rate limiter block hits",
646            rate_ip_block_hits
647        );
648        push_snapshot_counter!(
649            "rate_ip_block_hits_rate_exceeded_total",
650            "Per-IP block hits caused by packet rate exceeding threshold",
651            rate_ip_block_hits_rate_exceeded
652        );
653        push_snapshot_counter!(
654            "rate_ip_block_hits_manual_total",
655            "Per-IP block hits caused by manual address blocks",
656            rate_ip_block_hits_manual
657        );
658        push_snapshot_counter!(
659            "rate_ip_block_hits_handshake_heuristic_total",
660            "Per-IP block hits caused by handshake heuristic guard",
661            rate_ip_block_hits_handshake_heuristic
662        );
663        push_snapshot_counter!(
664            "rate_ip_block_hits_cookie_mismatch_guard_total",
665            "Per-IP block hits caused by cookie mismatch guard",
666            rate_ip_block_hits_cookie_mismatch_guard
667        );
668        push_snapshot_counter!(
669            "rate_addresses_blocked_total",
670            "Addresses blocked by rate limiter",
671            rate_addresses_blocked
672        );
673        push_snapshot_counter!(
674            "rate_addresses_blocked_rate_exceeded_total",
675            "Addresses blocked due to packet rate exceeding threshold",
676            rate_addresses_blocked_rate_exceeded
677        );
678        push_snapshot_counter!(
679            "rate_addresses_blocked_manual_total",
680            "Addresses blocked manually",
681            rate_addresses_blocked_manual
682        );
683        push_snapshot_counter!(
684            "rate_addresses_blocked_handshake_heuristic_total",
685            "Addresses blocked by handshake heuristic guard",
686            rate_addresses_blocked_handshake_heuristic
687        );
688        push_snapshot_counter!(
689            "rate_addresses_blocked_cookie_mismatch_guard_total",
690            "Addresses blocked by cookie mismatch guard",
691            rate_addresses_blocked_cookie_mismatch_guard
692        );
693        push_snapshot_counter!(
694            "rate_addresses_unblocked_total",
695            "Addresses unblocked by rate limiter",
696            rate_addresses_unblocked
697        );
698        push_snapshot_gauge!(
699            "rate_blocked_addresses",
700            "Currently blocked addresses in rate limiter",
701            rate_blocked_addresses
702        );
703        push_snapshot_gauge!(
704            "rate_exception_addresses",
705            "Rate limiter exception addresses",
706            rate_exception_addresses
707        );
708        push_snapshot_counter!(
709            "processing_budget_drops_total",
710            "Connected datagrams dropped by processing budget limiter",
711            processing_budget_drops_total
712        );
713        push_snapshot_counter!(
714            "processing_budget_drops_ip_exhausted_total",
715            "Connected datagrams dropped because per-IP processing budget was exhausted",
716            processing_budget_drops_ip_exhausted_total
717        );
718        push_snapshot_counter!(
719            "processing_budget_drops_global_exhausted_total",
720            "Connected datagrams dropped because global processing budget was exhausted",
721            processing_budget_drops_global_exhausted_total
722        );
723        push_snapshot_counter!(
724            "processing_budget_consumed_units_total",
725            "Total processing budget units consumed by connected datagrams",
726            processing_budget_consumed_units_total
727        );
728        push_snapshot_gauge!(
729            "processing_budget_active_ip_buckets",
730            "Active per-IP processing budget buckets",
731            processing_budget_active_ip_buckets
732        );
733        push_snapshot_counter!(
734            "cookie_rotations_total",
735            "Cookie key rotations",
736            cookie_rotations
737        );
738        push_snapshot_counter!(
739            "cookie_mismatch_drops_total",
740            "Dropped handshakes due to cookie mismatch",
741            cookie_mismatch_drops
742        );
743        push_snapshot_counter!(
744            "cookie_mismatch_blocks_total",
745            "Addresses blocked by cookie mismatch guard",
746            cookie_mismatch_blocks
747        );
748        push_snapshot_counter!(
749            "handshake_stage_cancel_drops_total",
750            "Dropped handshakes due to stage cancel",
751            handshake_stage_cancel_drops
752        );
753        push_snapshot_counter!(
754            "handshake_req1_req2_timeouts_total",
755            "REQ1->REQ2 handshake timeout drops",
756            handshake_req1_req2_timeouts
757        );
758        push_snapshot_counter!(
759            "handshake_reply2_connect_timeouts_total",
760            "REPLY2->CONNECT handshake timeout drops",
761            handshake_reply2_connect_timeouts
762        );
763        push_snapshot_counter!(
764            "handshake_missing_req1_drops_total",
765            "Dropped REQ2 packets without pending REQ1",
766            handshake_missing_req1_drops
767        );
768        push_snapshot_counter!(
769            "handshake_auto_blocks_total",
770            "Automatic rate blocks triggered by handshake heuristics",
771            handshake_auto_blocks
772        );
773        push_snapshot_counter!(
774            "handshake_already_connected_rejects_total",
775            "REQ1/REQ2 rejects answered with AlreadyConnected",
776            handshake_already_connected_rejects
777        );
778        push_snapshot_counter!(
779            "handshake_ip_recently_connected_rejects_total",
780            "REQ1/REQ2 rejects answered with IpRecentlyConnected",
781            handshake_ip_recently_connected_rejects
782        );
783        push_snapshot_counter!(
784            "request2_server_addr_mismatch_drops_total",
785            "Dropped REQ2 packets due to request2_server_addr_policy mismatch",
786            request2_server_addr_mismatch_drops
787        );
788        push_snapshot_counter!(
789            "request2_legacy_parse_hits_total",
790            "Legacy Request2 parse path hits",
791            request2_legacy_parse_hits
792        );
793        push_snapshot_counter!(
794            "request2_legacy_drops_total",
795            "Drops caused by legacy Request2 parse path",
796            request2_legacy_drops
797        );
798        push_snapshot_counter!(
799            "request2_ambiguous_parse_hits_total",
800            "Ambiguous Request2 parse path hits",
801            request2_ambiguous_parse_hits
802        );
803        push_snapshot_counter!(
804            "request2_ambiguous_drops_total",
805            "Drops caused by ambiguous Request2 parse path",
806            request2_ambiguous_drops
807        );
808        push_snapshot_counter!(
809            "proxy_inbound_reroutes_total",
810            "Inbound packets rerouted by proxy routing",
811            proxy_inbound_reroutes
812        );
813        push_snapshot_counter!(
814            "proxy_inbound_drops_total",
815            "Inbound packets dropped by proxy routing",
816            proxy_inbound_drops
817        );
818        push_snapshot_counter!(
819            "proxy_outbound_reroutes_total",
820            "Outbound packets rerouted by proxy routing",
821            proxy_outbound_reroutes
822        );
823        push_snapshot_counter!(
824            "proxy_outbound_drops_total",
825            "Outbound packets dropped by proxy routing",
826            proxy_outbound_drops
827        );
828        push_snapshot_gauge_f64!(
829            "avg_srtt_ms",
830            "Average smoothed RTT in milliseconds",
831            avg_srtt_ms
832        );
833        push_snapshot_gauge_f64!(
834            "avg_rttvar_ms",
835            "Average RTT variance in milliseconds",
836            avg_rttvar_ms
837        );
838        push_snapshot_gauge_f64!(
839            "avg_resend_rto_ms",
840            "Average resend RTO in milliseconds",
841            avg_resend_rto_ms
842        );
843        push_snapshot_gauge_f64!(
844            "avg_congestion_window_packets",
845            "Average congestion window (datagram packets)",
846            avg_congestion_window_packets
847        );
848        push_snapshot_gauge_f64!(
849            "resend_ratio",
850            "Resend ratio (resent/reliable_sent)",
851            resend_ratio
852        );
853
854        self.push_metric_records(
855            &mut records,
856            MetricFamilySpec {
857                prefix,
858                name: "dropped_non_critical_events_total",
859                help: "Dropped non-critical runtime events due to overflow policy",
860                kind: TelemetryMetricKind::Counter,
861                total_value: aggregated.dropped_non_critical_events as f64,
862            },
863            |shard| shard.dropped_non_critical_events as f64,
864        );
865
866        records
867    }
868
869    pub fn render_prometheus(&self) -> String {
870        self.render_prometheus_with_prefix("raknet")
871    }
872
873    pub fn render_prometheus_with_prefix(&self, prefix: &str) -> String {
874        let mut out = String::new();
875        let aggregated = self.aggregate();
876
877        macro_rules! write_snapshot_counter {
878            ($name:literal, $help:literal, $field:ident) => {
879                self.write_metric_family(
880                    &mut out,
881                    MetricFamilySpec {
882                        prefix,
883                        name: $name,
884                        help: $help,
885                        kind: TelemetryMetricKind::Counter,
886                        total_value: aggregated.snapshot.$field as f64,
887                    },
888                    |shard| shard.snapshot.$field as f64,
889                );
890            };
891        }
892
893        macro_rules! write_snapshot_gauge {
894            ($name:literal, $help:literal, $field:ident) => {
895                self.write_metric_family(
896                    &mut out,
897                    MetricFamilySpec {
898                        prefix,
899                        name: $name,
900                        help: $help,
901                        kind: TelemetryMetricKind::Gauge,
902                        total_value: aggregated.snapshot.$field as f64,
903                    },
904                    |shard| shard.snapshot.$field as f64,
905                );
906            };
907        }
908
909        macro_rules! write_snapshot_gauge_f64 {
910            ($name:literal, $help:literal, $field:ident) => {
911                self.write_metric_family(
912                    &mut out,
913                    MetricFamilySpec {
914                        prefix,
915                        name: $name,
916                        help: $help,
917                        kind: TelemetryMetricKind::Gauge,
918                        total_value: aggregated.snapshot.$field,
919                    },
920                    |shard| shard.snapshot.$field,
921                );
922            };
923        }
924
925        // P2.3 canonical metric dictionary.
926        write_snapshot_gauge!("sessions_active", "Active RakNet sessions", session_count);
927        write_snapshot_counter!(
928            "sessions_started_total",
929            "Total sessions that reached connected state",
930            sessions_started_total
931        );
932        write_snapshot_counter!(
933            "sessions_closed_total",
934            "Total connected sessions closed",
935            sessions_closed_total
936        );
937        write_snapshot_counter!(
938            "packets_forwarded_total",
939            "Total app frames forwarded to upper layer",
940            packets_forwarded_total
941        );
942        write_snapshot_counter!(
943            "bytes_forwarded_total",
944            "Total app payload bytes forwarded to upper layer",
945            bytes_forwarded_total
946        );
947        write_snapshot_counter!(
948            "ack_out_total",
949            "Total outbound ACK datagrams",
950            ack_out_total
951        );
952        write_snapshot_counter!(
953            "nack_out_total",
954            "Total outbound NACK datagrams",
955            nack_out_total
956        );
957        write_snapshot_counter!(
958            "resend_total",
959            "Total datagrams resent after loss/timeout",
960            resent_datagrams
961        );
962        write_snapshot_gauge!(
963            "rtt_srtt_ms",
964            "Average smoothed RTT in milliseconds",
965            avg_srtt_ms
966        );
967        write_snapshot_gauge!(
968            "rtt_rttvar_ms",
969            "Average RTT variance in milliseconds",
970            avg_rttvar_ms
971        );
972        write_snapshot_gauge!(
973            "rto_ms",
974            "Average resend RTO in milliseconds",
975            avg_resend_rto_ms
976        );
977        write_snapshot_gauge!(
978            "cwnd_packets",
979            "Average congestion window (datagram packets)",
980            avg_congestion_window_packets
981        );
982        write_snapshot_counter!(
983            "duplicate_drop_total",
984            "Dropped duplicate reliable frames",
985            duplicate_reliable_drops
986        );
987        write_snapshot_counter!(
988            "split_ttl_drop_total",
989            "Dropped split compounds due to TTL expiry",
990            split_ttl_drops
991        );
992
993        // Legacy names kept for backward compatibility.
994        write_snapshot_gauge!("session_count", "Active RakNet sessions", session_count);
995        write_snapshot_gauge!(
996            "pending_outgoing_frames",
997            "Queued outgoing frames before datagram packaging",
998            pending_outgoing_frames
999        );
1000        write_snapshot_gauge!(
1001            "pending_outgoing_bytes",
1002            "Queued outgoing bytes before datagram packaging",
1003            pending_outgoing_bytes
1004        );
1005        write_snapshot_gauge!(
1006            "pending_unhandled_frames",
1007            "Unhandled app frames waiting for connected state",
1008            pending_unhandled_frames
1009        );
1010        write_snapshot_gauge!(
1011            "pending_unhandled_bytes",
1012            "Unhandled app frame bytes waiting for connected state",
1013            pending_unhandled_bytes
1014        );
1015
1016        write_snapshot_counter!(
1017            "ingress_datagrams_total",
1018            "Total datagrams received",
1019            ingress_datagrams
1020        );
1021        write_snapshot_counter!(
1022            "ingress_frames_total",
1023            "Total frames received",
1024            ingress_frames
1025        );
1026        write_snapshot_counter!(
1027            "duplicate_reliable_drops_total",
1028            "Dropped duplicate reliable frames",
1029            duplicate_reliable_drops
1030        );
1031        write_snapshot_counter!(
1032            "ordered_stale_drops_total",
1033            "Dropped stale ordered frames",
1034            ordered_stale_drops
1035        );
1036        write_snapshot_counter!(
1037            "ordered_buffer_full_drops_total",
1038            "Dropped ordered frames due to reorder buffer overflow",
1039            ordered_buffer_full_drops
1040        );
1041        write_snapshot_counter!(
1042            "sequenced_stale_drops_total",
1043            "Dropped stale sequenced frames",
1044            sequenced_stale_drops
1045        );
1046        write_snapshot_counter!(
1047            "sequenced_missing_index_drops_total",
1048            "Dropped sequenced frames missing sequence index",
1049            sequenced_missing_index_drops
1050        );
1051        write_snapshot_counter!(
1052            "reliable_sent_datagrams_total",
1053            "Total reliable datagrams sent",
1054            reliable_sent_datagrams
1055        );
1056        write_snapshot_counter!(
1057            "resent_datagrams_total",
1058            "Total datagrams resent after loss/timeout",
1059            resent_datagrams
1060        );
1061        write_snapshot_counter!(
1062            "acked_datagrams_total",
1063            "Total datagrams acknowledged",
1064            acked_datagrams
1065        );
1066        write_snapshot_counter!(
1067            "nacked_datagrams_total",
1068            "Total datagrams negatively acknowledged",
1069            nacked_datagrams
1070        );
1071        write_snapshot_counter!(
1072            "split_ttl_drops_total",
1073            "Dropped split compounds due to TTL expiry",
1074            split_ttl_drops
1075        );
1076        write_snapshot_counter!(
1077            "outgoing_queue_drops_total",
1078            "Dropped payloads due to outgoing queue soft pressure",
1079            outgoing_queue_drops
1080        );
1081        write_snapshot_counter!(
1082            "outgoing_queue_defers_total",
1083            "Deferred payloads due to outgoing queue soft pressure",
1084            outgoing_queue_defers
1085        );
1086        write_snapshot_counter!(
1087            "outgoing_queue_disconnects_total",
1088            "Disconnects triggered by outgoing queue hard pressure",
1089            outgoing_queue_disconnects
1090        );
1091        write_snapshot_counter!(
1092            "backpressure_delay_total",
1093            "Backpressure delay actions (deferred packets)",
1094            backpressure_delays
1095        );
1096        write_snapshot_counter!(
1097            "backpressure_drop_total",
1098            "Backpressure shed actions (dropped packets)",
1099            backpressure_drops
1100        );
1101        write_snapshot_counter!(
1102            "backpressure_disconnect_total",
1103            "Backpressure disconnect actions",
1104            backpressure_disconnects
1105        );
1106        write_snapshot_counter!(
1107            "local_requested_disconnects_total",
1108            "Disconnects explicitly requested by local control path",
1109            local_requested_disconnects
1110        );
1111        write_snapshot_counter!(
1112            "remote_disconnect_notifications_total",
1113            "Remote disconnect notifications received",
1114            remote_disconnect_notifications
1115        );
1116        write_snapshot_counter!(
1117            "remote_detect_lost_disconnects_total",
1118            "Remote detect-lost disconnect signals received",
1119            remote_detect_lost_disconnects
1120        );
1121        write_snapshot_counter!(
1122            "illegal_state_transitions_total",
1123            "Illegal session state transitions detected",
1124            illegal_state_transitions
1125        );
1126        write_snapshot_counter!(
1127            "timed_out_sessions_total",
1128            "Sessions closed due to idle timeout",
1129            timed_out_sessions
1130        );
1131        write_snapshot_counter!(
1132            "keepalive_pings_sent_total",
1133            "Connected keepalive pings sent",
1134            keepalive_pings_sent
1135        );
1136        write_snapshot_counter!(
1137            "unhandled_frames_queued_total",
1138            "Unhandled app frames queued before connected state",
1139            unhandled_frames_queued
1140        );
1141        write_snapshot_counter!(
1142            "unhandled_frames_flushed_total",
1143            "Unhandled app frames flushed after connection",
1144            unhandled_frames_flushed
1145        );
1146        write_snapshot_counter!(
1147            "unhandled_frames_dropped_total",
1148            "Unhandled app frames dropped due to pipeline overflow",
1149            unhandled_frames_dropped
1150        );
1151        write_snapshot_counter!(
1152            "rate_global_limit_hits_total",
1153            "Global rate limit hits",
1154            rate_global_limit_hits
1155        );
1156        write_snapshot_counter!(
1157            "rate_ip_block_hits_total",
1158            "Per-IP rate limiter block hits",
1159            rate_ip_block_hits
1160        );
1161        write_snapshot_counter!(
1162            "rate_ip_block_hits_rate_exceeded_total",
1163            "Per-IP block hits caused by packet rate exceeding threshold",
1164            rate_ip_block_hits_rate_exceeded
1165        );
1166        write_snapshot_counter!(
1167            "rate_ip_block_hits_manual_total",
1168            "Per-IP block hits caused by manual address blocks",
1169            rate_ip_block_hits_manual
1170        );
1171        write_snapshot_counter!(
1172            "rate_ip_block_hits_handshake_heuristic_total",
1173            "Per-IP block hits caused by handshake heuristic guard",
1174            rate_ip_block_hits_handshake_heuristic
1175        );
1176        write_snapshot_counter!(
1177            "rate_ip_block_hits_cookie_mismatch_guard_total",
1178            "Per-IP block hits caused by cookie mismatch guard",
1179            rate_ip_block_hits_cookie_mismatch_guard
1180        );
1181        write_snapshot_counter!(
1182            "rate_addresses_blocked_total",
1183            "Addresses blocked by rate limiter",
1184            rate_addresses_blocked
1185        );
1186        write_snapshot_counter!(
1187            "rate_addresses_blocked_rate_exceeded_total",
1188            "Addresses blocked due to packet rate exceeding threshold",
1189            rate_addresses_blocked_rate_exceeded
1190        );
1191        write_snapshot_counter!(
1192            "rate_addresses_blocked_manual_total",
1193            "Addresses blocked manually",
1194            rate_addresses_blocked_manual
1195        );
1196        write_snapshot_counter!(
1197            "rate_addresses_blocked_handshake_heuristic_total",
1198            "Addresses blocked by handshake heuristic guard",
1199            rate_addresses_blocked_handshake_heuristic
1200        );
1201        write_snapshot_counter!(
1202            "rate_addresses_blocked_cookie_mismatch_guard_total",
1203            "Addresses blocked by cookie mismatch guard",
1204            rate_addresses_blocked_cookie_mismatch_guard
1205        );
1206        write_snapshot_counter!(
1207            "rate_addresses_unblocked_total",
1208            "Addresses unblocked by rate limiter",
1209            rate_addresses_unblocked
1210        );
1211        write_snapshot_gauge!(
1212            "rate_blocked_addresses",
1213            "Currently blocked addresses in rate limiter",
1214            rate_blocked_addresses
1215        );
1216        write_snapshot_gauge!(
1217            "rate_exception_addresses",
1218            "Rate limiter exception addresses",
1219            rate_exception_addresses
1220        );
1221        write_snapshot_counter!(
1222            "processing_budget_drops_total",
1223            "Connected datagrams dropped by processing budget limiter",
1224            processing_budget_drops_total
1225        );
1226        write_snapshot_counter!(
1227            "processing_budget_drops_ip_exhausted_total",
1228            "Connected datagrams dropped because per-IP processing budget was exhausted",
1229            processing_budget_drops_ip_exhausted_total
1230        );
1231        write_snapshot_counter!(
1232            "processing_budget_drops_global_exhausted_total",
1233            "Connected datagrams dropped because global processing budget was exhausted",
1234            processing_budget_drops_global_exhausted_total
1235        );
1236        write_snapshot_counter!(
1237            "processing_budget_consumed_units_total",
1238            "Total processing budget units consumed by connected datagrams",
1239            processing_budget_consumed_units_total
1240        );
1241        write_snapshot_gauge!(
1242            "processing_budget_active_ip_buckets",
1243            "Active per-IP processing budget buckets",
1244            processing_budget_active_ip_buckets
1245        );
1246        write_snapshot_counter!(
1247            "cookie_rotations_total",
1248            "Cookie key rotations",
1249            cookie_rotations
1250        );
1251        write_snapshot_counter!(
1252            "cookie_mismatch_drops_total",
1253            "Dropped handshakes due to cookie mismatch",
1254            cookie_mismatch_drops
1255        );
1256        write_snapshot_counter!(
1257            "cookie_mismatch_blocks_total",
1258            "Addresses blocked by cookie mismatch guard",
1259            cookie_mismatch_blocks
1260        );
1261        write_snapshot_counter!(
1262            "handshake_stage_cancel_drops_total",
1263            "Dropped handshakes due to stage cancel",
1264            handshake_stage_cancel_drops
1265        );
1266        write_snapshot_counter!(
1267            "handshake_req1_req2_timeouts_total",
1268            "REQ1->REQ2 handshake timeout drops",
1269            handshake_req1_req2_timeouts
1270        );
1271        write_snapshot_counter!(
1272            "handshake_reply2_connect_timeouts_total",
1273            "REPLY2->CONNECT handshake timeout drops",
1274            handshake_reply2_connect_timeouts
1275        );
1276        write_snapshot_counter!(
1277            "handshake_missing_req1_drops_total",
1278            "Dropped REQ2 packets without pending REQ1",
1279            handshake_missing_req1_drops
1280        );
1281        write_snapshot_counter!(
1282            "handshake_auto_blocks_total",
1283            "Automatic rate blocks triggered by handshake heuristics",
1284            handshake_auto_blocks
1285        );
1286        write_snapshot_counter!(
1287            "handshake_already_connected_rejects_total",
1288            "REQ1/REQ2 rejects answered with AlreadyConnected",
1289            handshake_already_connected_rejects
1290        );
1291        write_snapshot_counter!(
1292            "handshake_ip_recently_connected_rejects_total",
1293            "REQ1/REQ2 rejects answered with IpRecentlyConnected",
1294            handshake_ip_recently_connected_rejects
1295        );
1296        write_snapshot_counter!(
1297            "request2_server_addr_mismatch_drops_total",
1298            "Dropped REQ2 packets due to request2_server_addr_policy mismatch",
1299            request2_server_addr_mismatch_drops
1300        );
1301        write_snapshot_counter!(
1302            "request2_legacy_parse_hits_total",
1303            "Legacy Request2 parse path hits",
1304            request2_legacy_parse_hits
1305        );
1306        write_snapshot_counter!(
1307            "request2_legacy_drops_total",
1308            "Drops caused by legacy Request2 parse path",
1309            request2_legacy_drops
1310        );
1311        write_snapshot_counter!(
1312            "request2_ambiguous_parse_hits_total",
1313            "Ambiguous Request2 parse path hits",
1314            request2_ambiguous_parse_hits
1315        );
1316        write_snapshot_counter!(
1317            "request2_ambiguous_drops_total",
1318            "Drops caused by ambiguous Request2 parse path",
1319            request2_ambiguous_drops
1320        );
1321        write_snapshot_counter!(
1322            "proxy_inbound_reroutes_total",
1323            "Inbound packets rerouted by proxy routing",
1324            proxy_inbound_reroutes
1325        );
1326        write_snapshot_counter!(
1327            "proxy_inbound_drops_total",
1328            "Inbound packets dropped by proxy routing",
1329            proxy_inbound_drops
1330        );
1331        write_snapshot_counter!(
1332            "proxy_outbound_reroutes_total",
1333            "Outbound packets rerouted by proxy routing",
1334            proxy_outbound_reroutes
1335        );
1336        write_snapshot_counter!(
1337            "proxy_outbound_drops_total",
1338            "Outbound packets dropped by proxy routing",
1339            proxy_outbound_drops
1340        );
1341        write_snapshot_gauge_f64!(
1342            "avg_srtt_ms",
1343            "Average smoothed RTT in milliseconds",
1344            avg_srtt_ms
1345        );
1346        write_snapshot_gauge_f64!(
1347            "avg_rttvar_ms",
1348            "Average RTT variance in milliseconds",
1349            avg_rttvar_ms
1350        );
1351        write_snapshot_gauge_f64!(
1352            "avg_resend_rto_ms",
1353            "Average resend RTO in milliseconds",
1354            avg_resend_rto_ms
1355        );
1356        write_snapshot_gauge_f64!(
1357            "avg_congestion_window_packets",
1358            "Average congestion window (datagram packets)",
1359            avg_congestion_window_packets
1360        );
1361        write_snapshot_gauge_f64!(
1362            "resend_ratio",
1363            "Resend ratio (resent/reliable_sent)",
1364            resend_ratio
1365        );
1366
1367        self.write_metric_family(
1368            &mut out,
1369            MetricFamilySpec {
1370                prefix,
1371                name: "dropped_non_critical_events_total",
1372                help: "Dropped non-critical runtime events due to overflow policy",
1373                kind: TelemetryMetricKind::Counter,
1374                total_value: aggregated.dropped_non_critical_events as f64,
1375            },
1376            |shard| shard.dropped_non_critical_events as f64,
1377        );
1378
1379        out
1380    }
1381
1382    fn push_metric_records<F>(
1383        &self,
1384        records: &mut Vec<TelemetryRecord>,
1385        spec: MetricFamilySpec<'_>,
1386        extract: F,
1387    ) where
1388        F: Fn(&ShardTelemetrySnapshot) -> f64,
1389    {
1390        for (shard_id, shard) in &self.shards {
1391            records.push(TelemetryRecord {
1392                name: format!("{}_{}", spec.prefix, spec.name),
1393                help: spec.help,
1394                kind: spec.kind,
1395                value: extract(shard),
1396                labels: vec![
1397                    ("scope".to_string(), "shard".to_string()),
1398                    ("shard".to_string(), shard_id.to_string()),
1399                ],
1400            });
1401        }
1402
1403        records.push(TelemetryRecord {
1404            name: format!("{}_{}", spec.prefix, spec.name),
1405            help: spec.help,
1406            kind: spec.kind,
1407            value: spec.total_value,
1408            labels: vec![
1409                ("scope".to_string(), "all".to_string()),
1410                ("shard".to_string(), "all".to_string()),
1411            ],
1412        });
1413    }
1414
1415    fn write_metric_family<F>(&self, out: &mut String, spec: MetricFamilySpec<'_>, extract: F)
1416    where
1417        F: Fn(&ShardTelemetrySnapshot) -> f64,
1418    {
1419        let _ = writeln!(out, "# HELP {}_{} {}", spec.prefix, spec.name, spec.help);
1420        let _ = writeln!(
1421            out,
1422            "# TYPE {}_{} {}",
1423            spec.prefix,
1424            spec.name,
1425            spec.kind.as_prometheus_type()
1426        );
1427        for (shard_id, shard) in &self.shards {
1428            let _ = writeln!(
1429                out,
1430                "{}_{}{{scope=\"shard\",shard=\"{shard_id}\"}} {}",
1431                spec.prefix,
1432                spec.name,
1433                extract(shard)
1434            );
1435        }
1436        let _ = writeln!(
1437            out,
1438            "{}_{}{{scope=\"all\",shard=\"all\"}} {}",
1439            spec.prefix, spec.name, spec.total_value
1440        );
1441    }
1442}
1443
1444#[derive(Debug, Clone)]
1445pub struct TelemetryExporter {
1446    registry: TelemetryRegistry,
1447    prefix: String,
1448}
1449
1450impl Default for TelemetryExporter {
1451    fn default() -> Self {
1452        Self {
1453            registry: TelemetryRegistry::new(),
1454            prefix: "raknet".to_string(),
1455        }
1456    }
1457}
1458
1459impl TelemetryExporter {
1460    pub fn new() -> Self {
1461        Self::default()
1462    }
1463
1464    pub fn with_prefix(prefix: impl Into<String>) -> Self {
1465        Self {
1466            prefix: prefix.into(),
1467            ..Self::default()
1468        }
1469    }
1470
1471    pub fn prefix(&self) -> &str {
1472        &self.prefix
1473    }
1474
1475    pub fn set_prefix(&mut self, prefix: impl Into<String>) {
1476        self.prefix = prefix.into();
1477    }
1478
1479    pub fn clear(&mut self) {
1480        self.registry.clear();
1481    }
1482
1483    pub fn shard_count(&self) -> usize {
1484        self.registry.shard_count()
1485    }
1486
1487    pub fn ingest_snapshot(
1488        &mut self,
1489        shard_id: usize,
1490        snapshot: TransportMetricsSnapshot,
1491        dropped_non_critical_events: u64,
1492    ) {
1493        self.registry
1494            .ingest_snapshot(shard_id, snapshot, dropped_non_critical_events);
1495    }
1496
1497    pub fn ingest_server_event(&mut self, event: &RaknetServerEvent) -> bool {
1498        self.registry.ingest_server_event(event)
1499    }
1500
1501    pub fn aggregate(&self) -> AggregatedTelemetrySnapshot {
1502        self.registry.aggregate()
1503    }
1504
1505    pub fn render_prometheus(&self) -> String {
1506        self.registry.render_prometheus_with_prefix(&self.prefix)
1507    }
1508
1509    pub fn records(&self) -> Vec<TelemetryRecord> {
1510        self.registry.to_records_with_prefix(&self.prefix)
1511    }
1512
1513    pub fn registry(&self) -> &TelemetryRegistry {
1514        &self.registry
1515    }
1516}
1517
1518#[cfg(test)]
1519mod tests {
1520    use super::{TelemetryExporter, TelemetryMetricKind, TelemetryRegistry};
1521    use crate::server::RaknetServerEvent;
1522    use crate::transport::TransportMetricsSnapshot;
1523
1524    fn metrics_event(
1525        shard_id: usize,
1526        snapshot: TransportMetricsSnapshot,
1527        dropped_non_critical_events: u64,
1528    ) -> RaknetServerEvent {
1529        RaknetServerEvent::Metrics {
1530            shard_id,
1531            snapshot: Box::new(snapshot),
1532            dropped_non_critical_events,
1533        }
1534    }
1535
1536    #[test]
1537    fn ingest_server_event_updates_shard_snapshot() {
1538        let mut registry = TelemetryRegistry::new();
1539        let snapshot = TransportMetricsSnapshot {
1540            session_count: 2,
1541            ingress_datagrams: 11,
1542            ..TransportMetricsSnapshot::default()
1543        };
1544
1545        assert!(registry.ingest_server_event(&metrics_event(3, snapshot, 9)));
1546        assert_eq!(registry.shard_count(), 1);
1547
1548        let shard = registry
1549            .shard_snapshot(3)
1550            .expect("shard snapshot should exist");
1551        assert_eq!(shard.snapshot.session_count, 2);
1552        assert_eq!(shard.snapshot.ingress_datagrams, 11);
1553        assert_eq!(shard.dropped_non_critical_events, 9);
1554    }
1555
1556    #[test]
1557    fn aggregate_recomputes_weighted_averages_and_resend_ratio() {
1558        let mut registry = TelemetryRegistry::new();
1559
1560        let shard0 = TransportMetricsSnapshot {
1561            session_count: 2,
1562            ingress_datagrams: 100,
1563            resent_datagrams: 20,
1564            reliable_sent_datagrams: 200,
1565            processing_budget_drops_total: 3,
1566            processing_budget_consumed_units_total: 10_000,
1567            avg_srtt_ms: 10.0,
1568            avg_rttvar_ms: 3.0,
1569            avg_resend_rto_ms: 25.0,
1570            avg_congestion_window_packets: 8.0,
1571            ..TransportMetricsSnapshot::default()
1572        };
1573        let shard1 = TransportMetricsSnapshot {
1574            session_count: 1,
1575            ingress_datagrams: 50,
1576            resent_datagrams: 10,
1577            reliable_sent_datagrams: 100,
1578            processing_budget_drops_total: 5,
1579            processing_budget_consumed_units_total: 20_000,
1580            avg_srtt_ms: 40.0,
1581            avg_rttvar_ms: 6.0,
1582            avg_resend_rto_ms: 55.0,
1583            avg_congestion_window_packets: 4.0,
1584            ..TransportMetricsSnapshot::default()
1585        };
1586
1587        registry.ingest_snapshot(0, shard0, 2);
1588        registry.ingest_snapshot(1, shard1, 7);
1589
1590        let total = registry.aggregate();
1591        assert_eq!(total.snapshot.session_count, 3);
1592        assert_eq!(total.snapshot.ingress_datagrams, 150);
1593        assert_eq!(total.snapshot.resent_datagrams, 30);
1594        assert_eq!(total.snapshot.reliable_sent_datagrams, 300);
1595        assert_eq!(total.snapshot.processing_budget_drops_total, 8);
1596        assert_eq!(
1597            total.snapshot.processing_budget_consumed_units_total,
1598            30_000
1599        );
1600        assert_eq!(total.dropped_non_critical_events, 9);
1601
1602        assert!((total.snapshot.avg_srtt_ms - 20.0).abs() < 1e-9);
1603        assert!((total.snapshot.avg_rttvar_ms - 4.0).abs() < 1e-9);
1604        assert!((total.snapshot.avg_resend_rto_ms - 35.0).abs() < 1e-9);
1605        assert!((total.snapshot.avg_congestion_window_packets - (20.0 / 3.0)).abs() < 1e-9);
1606        assert!((total.snapshot.resend_ratio - 0.1).abs() < 1e-9);
1607    }
1608
1609    #[test]
1610    fn prometheus_render_contains_shard_and_all_labels() {
1611        let mut registry = TelemetryRegistry::new();
1612        let snapshot = TransportMetricsSnapshot {
1613            session_count: 1,
1614            ingress_datagrams: 9,
1615            sessions_started_total: 2,
1616            sessions_closed_total: 1,
1617            packets_forwarded_total: 7,
1618            bytes_forwarded_total: 321,
1619            ack_out_total: 4,
1620            nack_out_total: 1,
1621            processing_budget_drops_total: 2,
1622            ..TransportMetricsSnapshot::default()
1623        };
1624        registry.ingest_snapshot(2, snapshot, 5);
1625
1626        let body = registry.render_prometheus_with_prefix("raknet");
1627        assert!(body.contains("# HELP raknet_sessions_active Active RakNet sessions"));
1628        assert!(body.contains("# TYPE raknet_sessions_active gauge"));
1629        assert!(body.contains("raknet_sessions_active{scope=\"shard\",shard=\"2\"} 1"));
1630        assert!(body.contains("raknet_sessions_started_total{scope=\"all\",shard=\"all\"} 2"));
1631        assert!(body.contains("raknet_packets_forwarded_total{scope=\"all\",shard=\"all\"} 7"));
1632        assert!(body.contains("raknet_bytes_forwarded_total{scope=\"all\",shard=\"all\"} 321"));
1633        assert!(body.contains("raknet_ack_out_total{scope=\"all\",shard=\"all\"} 4"));
1634        assert!(body.contains("raknet_nack_out_total{scope=\"all\",shard=\"all\"} 1"));
1635        assert!(
1636            body.contains("raknet_processing_budget_drops_total{scope=\"all\",shard=\"all\"} 2")
1637        );
1638        assert!(body.contains("# HELP raknet_session_count Active RakNet sessions"));
1639        assert!(body.contains("# TYPE raknet_session_count gauge"));
1640        assert!(body.contains("raknet_session_count{scope=\"shard\",shard=\"2\"} 1"));
1641        assert!(body.contains("raknet_session_count{scope=\"all\",shard=\"all\"} 1"));
1642        assert!(
1643            body.contains(
1644                "raknet_dropped_non_critical_events_total{scope=\"shard\",shard=\"2\"} 5"
1645            )
1646        );
1647        assert!(
1648            body.contains(
1649                "raknet_dropped_non_critical_events_total{scope=\"all\",shard=\"all\"} 5"
1650            )
1651        );
1652    }
1653
1654    #[test]
1655    fn record_export_marks_metric_kind_and_scope_labels() {
1656        let mut registry = TelemetryRegistry::new();
1657        let snapshot = TransportMetricsSnapshot {
1658            session_count: 4,
1659            sessions_started_total: 10,
1660            ..TransportMetricsSnapshot::default()
1661        };
1662        registry.ingest_snapshot(1, snapshot, 0);
1663
1664        let records = registry.to_records_with_prefix("demo");
1665        let canonical = records
1666            .iter()
1667            .find(|record| {
1668                record.name == "demo_sessions_active"
1669                    && record
1670                        .labels
1671                        .iter()
1672                        .any(|(k, v)| k == "scope" && v == "shard")
1673                    && record.labels.iter().any(|(k, v)| k == "shard" && v == "1")
1674            })
1675            .expect("sessions_active shard record should exist");
1676
1677        assert_eq!(canonical.kind, TelemetryMetricKind::Gauge);
1678        assert!((canonical.value - 4.0).abs() < 1e-9);
1679
1680        let target = records
1681            .iter()
1682            .find(|record| {
1683                record.name == "demo_session_count"
1684                    && record
1685                        .labels
1686                        .iter()
1687                        .any(|(k, v)| k == "scope" && v == "shard")
1688                    && record.labels.iter().any(|(k, v)| k == "shard" && v == "1")
1689            })
1690            .expect("session_count shard record should exist");
1691
1692        assert_eq!(target.kind, TelemetryMetricKind::Gauge);
1693        assert!((target.value - 4.0).abs() < 1e-9);
1694    }
1695
1696    #[test]
1697    fn telemetry_exporter_uses_prefix_and_ingests_metrics_events() {
1698        let mut exporter = TelemetryExporter::with_prefix("demo");
1699        let snapshot = TransportMetricsSnapshot {
1700            session_count: 2,
1701            sessions_started_total: 5,
1702            ..TransportMetricsSnapshot::default()
1703        };
1704        assert!(exporter.ingest_server_event(&metrics_event(0, snapshot, 0)));
1705        assert_eq!(exporter.shard_count(), 1);
1706
1707        let body = exporter.render_prometheus();
1708        assert!(body.contains("demo_sessions_active"));
1709        assert!(body.contains("demo_sessions_started_total"));
1710    }
1711}