1use std::collections::BTreeMap;
2use std::fmt::Write as _;
3
4use crate::server::RaknetServerEvent;
5use crate::transport::TransportMetricsSnapshot;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum TelemetryMetricKind {
9 Counter,
10 Gauge,
11}
12
13impl TelemetryMetricKind {
14 fn as_prometheus_type(self) -> &'static str {
15 match self {
16 Self::Counter => "counter",
17 Self::Gauge => "gauge",
18 }
19 }
20}
21
22#[derive(Debug, Clone, PartialEq)]
23pub struct TelemetryRecord {
24 pub name: String,
25 pub help: &'static str,
26 pub kind: TelemetryMetricKind,
27 pub value: f64,
28 pub labels: Vec<(String, String)>,
29}
30
31#[derive(Debug, Clone, Copy, Default)]
32pub struct ShardTelemetrySnapshot {
33 pub snapshot: TransportMetricsSnapshot,
34 pub dropped_non_critical_events: u64,
35}
36
37#[derive(Debug, Clone, Copy, Default)]
38pub struct AggregatedTelemetrySnapshot {
39 pub snapshot: TransportMetricsSnapshot,
40 pub dropped_non_critical_events: u64,
41}
42
43#[derive(Debug, Clone, Copy)]
44struct MetricFamilySpec<'a> {
45 prefix: &'a str,
46 name: &'a str,
47 help: &'static str,
48 kind: TelemetryMetricKind,
49 total_value: f64,
50}
51
52#[derive(Debug, Clone, Default)]
53pub struct TelemetryRegistry {
54 shards: BTreeMap<usize, ShardTelemetrySnapshot>,
55}
56
57impl TelemetryRegistry {
58 pub fn new() -> Self {
59 Self::default()
60 }
61
62 pub fn clear(&mut self) {
63 self.shards.clear();
64 }
65
66 pub fn is_empty(&self) -> bool {
67 self.shards.is_empty()
68 }
69
70 pub fn shard_count(&self) -> usize {
71 self.shards.len()
72 }
73
74 pub fn shard_snapshot(&self, shard_id: usize) -> Option<&ShardTelemetrySnapshot> {
75 self.shards.get(&shard_id)
76 }
77
78 pub fn iter_shards(&self) -> impl Iterator<Item = (usize, &ShardTelemetrySnapshot)> {
79 self.shards.iter().map(|(id, snapshot)| (*id, snapshot))
80 }
81
82 pub fn ingest_snapshot(
83 &mut self,
84 shard_id: usize,
85 snapshot: TransportMetricsSnapshot,
86 dropped_non_critical_events: u64,
87 ) {
88 self.shards.insert(
89 shard_id,
90 ShardTelemetrySnapshot {
91 snapshot,
92 dropped_non_critical_events,
93 },
94 );
95 }
96
97 pub fn ingest_server_event(&mut self, event: &RaknetServerEvent) -> bool {
98 let Some((shard_id, snapshot, dropped_non_critical_events)) = event.metrics_snapshot()
99 else {
100 return false;
101 };
102 self.ingest_snapshot(shard_id, *snapshot, dropped_non_critical_events);
103 true
104 }
105
106 pub fn aggregate(&self) -> AggregatedTelemetrySnapshot {
107 let mut total = TransportMetricsSnapshot::default();
108 let mut dropped_non_critical_events = 0u64;
109
110 let mut weighted_srtt_sum = 0.0;
111 let mut weighted_rttvar_sum = 0.0;
112 let mut weighted_resend_rto_sum = 0.0;
113 let mut weighted_cwnd_sum = 0.0;
114
115 for shard in self.shards.values() {
116 let s = shard.snapshot;
117 dropped_non_critical_events =
118 dropped_non_critical_events.saturating_add(shard.dropped_non_critical_events);
119
120 total.session_count = total.session_count.saturating_add(s.session_count);
121 total.sessions_started_total = total
122 .sessions_started_total
123 .saturating_add(s.sessions_started_total);
124 total.sessions_closed_total = total
125 .sessions_closed_total
126 .saturating_add(s.sessions_closed_total);
127 total.packets_forwarded_total = total
128 .packets_forwarded_total
129 .saturating_add(s.packets_forwarded_total);
130 total.bytes_forwarded_total = total
131 .bytes_forwarded_total
132 .saturating_add(s.bytes_forwarded_total);
133 total.pending_outgoing_frames = total
134 .pending_outgoing_frames
135 .saturating_add(s.pending_outgoing_frames);
136 total.pending_outgoing_bytes = total
137 .pending_outgoing_bytes
138 .saturating_add(s.pending_outgoing_bytes);
139 total.pending_unhandled_frames = total
140 .pending_unhandled_frames
141 .saturating_add(s.pending_unhandled_frames);
142 total.pending_unhandled_bytes = total
143 .pending_unhandled_bytes
144 .saturating_add(s.pending_unhandled_bytes);
145 total.ingress_datagrams = total.ingress_datagrams.saturating_add(s.ingress_datagrams);
146 total.ingress_frames = total.ingress_frames.saturating_add(s.ingress_frames);
147 total.duplicate_reliable_drops = total
148 .duplicate_reliable_drops
149 .saturating_add(s.duplicate_reliable_drops);
150 total.ordered_stale_drops = total
151 .ordered_stale_drops
152 .saturating_add(s.ordered_stale_drops);
153 total.ordered_buffer_full_drops = total
154 .ordered_buffer_full_drops
155 .saturating_add(s.ordered_buffer_full_drops);
156 total.sequenced_stale_drops = total
157 .sequenced_stale_drops
158 .saturating_add(s.sequenced_stale_drops);
159 total.sequenced_missing_index_drops = total
160 .sequenced_missing_index_drops
161 .saturating_add(s.sequenced_missing_index_drops);
162 total.reliable_sent_datagrams = total
163 .reliable_sent_datagrams
164 .saturating_add(s.reliable_sent_datagrams);
165 total.resent_datagrams = total.resent_datagrams.saturating_add(s.resent_datagrams);
166 total.ack_out_total = total.ack_out_total.saturating_add(s.ack_out_total);
167 total.nack_out_total = total.nack_out_total.saturating_add(s.nack_out_total);
168 total.acked_datagrams = total.acked_datagrams.saturating_add(s.acked_datagrams);
169 total.nacked_datagrams = total.nacked_datagrams.saturating_add(s.nacked_datagrams);
170 total.split_ttl_drops = total.split_ttl_drops.saturating_add(s.split_ttl_drops);
171 total.outgoing_queue_drops = total
172 .outgoing_queue_drops
173 .saturating_add(s.outgoing_queue_drops);
174 total.outgoing_queue_defers = total
175 .outgoing_queue_defers
176 .saturating_add(s.outgoing_queue_defers);
177 total.outgoing_queue_disconnects = total
178 .outgoing_queue_disconnects
179 .saturating_add(s.outgoing_queue_disconnects);
180 total.backpressure_delays = total
181 .backpressure_delays
182 .saturating_add(s.backpressure_delays);
183 total.backpressure_drops = total
184 .backpressure_drops
185 .saturating_add(s.backpressure_drops);
186 total.backpressure_disconnects = total
187 .backpressure_disconnects
188 .saturating_add(s.backpressure_disconnects);
189 total.local_requested_disconnects = total
190 .local_requested_disconnects
191 .saturating_add(s.local_requested_disconnects);
192 total.remote_disconnect_notifications = total
193 .remote_disconnect_notifications
194 .saturating_add(s.remote_disconnect_notifications);
195 total.remote_detect_lost_disconnects = total
196 .remote_detect_lost_disconnects
197 .saturating_add(s.remote_detect_lost_disconnects);
198 total.illegal_state_transitions = total
199 .illegal_state_transitions
200 .saturating_add(s.illegal_state_transitions);
201 total.timed_out_sessions = total
202 .timed_out_sessions
203 .saturating_add(s.timed_out_sessions);
204 total.keepalive_pings_sent = total
205 .keepalive_pings_sent
206 .saturating_add(s.keepalive_pings_sent);
207 total.unhandled_frames_queued = total
208 .unhandled_frames_queued
209 .saturating_add(s.unhandled_frames_queued);
210 total.unhandled_frames_flushed = total
211 .unhandled_frames_flushed
212 .saturating_add(s.unhandled_frames_flushed);
213 total.unhandled_frames_dropped = total
214 .unhandled_frames_dropped
215 .saturating_add(s.unhandled_frames_dropped);
216 total.rate_global_limit_hits = total
217 .rate_global_limit_hits
218 .saturating_add(s.rate_global_limit_hits);
219 total.rate_ip_block_hits = total
220 .rate_ip_block_hits
221 .saturating_add(s.rate_ip_block_hits);
222 total.rate_ip_block_hits_rate_exceeded = total
223 .rate_ip_block_hits_rate_exceeded
224 .saturating_add(s.rate_ip_block_hits_rate_exceeded);
225 total.rate_ip_block_hits_manual = total
226 .rate_ip_block_hits_manual
227 .saturating_add(s.rate_ip_block_hits_manual);
228 total.rate_ip_block_hits_handshake_heuristic = total
229 .rate_ip_block_hits_handshake_heuristic
230 .saturating_add(s.rate_ip_block_hits_handshake_heuristic);
231 total.rate_ip_block_hits_cookie_mismatch_guard = total
232 .rate_ip_block_hits_cookie_mismatch_guard
233 .saturating_add(s.rate_ip_block_hits_cookie_mismatch_guard);
234 total.rate_addresses_blocked = total
235 .rate_addresses_blocked
236 .saturating_add(s.rate_addresses_blocked);
237 total.rate_addresses_blocked_rate_exceeded = total
238 .rate_addresses_blocked_rate_exceeded
239 .saturating_add(s.rate_addresses_blocked_rate_exceeded);
240 total.rate_addresses_blocked_manual = total
241 .rate_addresses_blocked_manual
242 .saturating_add(s.rate_addresses_blocked_manual);
243 total.rate_addresses_blocked_handshake_heuristic = total
244 .rate_addresses_blocked_handshake_heuristic
245 .saturating_add(s.rate_addresses_blocked_handshake_heuristic);
246 total.rate_addresses_blocked_cookie_mismatch_guard = total
247 .rate_addresses_blocked_cookie_mismatch_guard
248 .saturating_add(s.rate_addresses_blocked_cookie_mismatch_guard);
249 total.rate_addresses_unblocked = total
250 .rate_addresses_unblocked
251 .saturating_add(s.rate_addresses_unblocked);
252 total.rate_blocked_addresses = total
253 .rate_blocked_addresses
254 .saturating_add(s.rate_blocked_addresses);
255 total.rate_exception_addresses = total
256 .rate_exception_addresses
257 .saturating_add(s.rate_exception_addresses);
258 total.processing_budget_drops_total = total
259 .processing_budget_drops_total
260 .saturating_add(s.processing_budget_drops_total);
261 total.processing_budget_drops_ip_exhausted_total = total
262 .processing_budget_drops_ip_exhausted_total
263 .saturating_add(s.processing_budget_drops_ip_exhausted_total);
264 total.processing_budget_drops_global_exhausted_total = total
265 .processing_budget_drops_global_exhausted_total
266 .saturating_add(s.processing_budget_drops_global_exhausted_total);
267 total.processing_budget_consumed_units_total = total
268 .processing_budget_consumed_units_total
269 .saturating_add(s.processing_budget_consumed_units_total);
270 total.processing_budget_active_ip_buckets = total
271 .processing_budget_active_ip_buckets
272 .saturating_add(s.processing_budget_active_ip_buckets);
273 total.cookie_rotations = total.cookie_rotations.saturating_add(s.cookie_rotations);
274 total.cookie_mismatch_drops = total
275 .cookie_mismatch_drops
276 .saturating_add(s.cookie_mismatch_drops);
277 total.cookie_mismatch_blocks = total
278 .cookie_mismatch_blocks
279 .saturating_add(s.cookie_mismatch_blocks);
280 total.handshake_stage_cancel_drops = total
281 .handshake_stage_cancel_drops
282 .saturating_add(s.handshake_stage_cancel_drops);
283 total.handshake_req1_req2_timeouts = total
284 .handshake_req1_req2_timeouts
285 .saturating_add(s.handshake_req1_req2_timeouts);
286 total.handshake_reply2_connect_timeouts = total
287 .handshake_reply2_connect_timeouts
288 .saturating_add(s.handshake_reply2_connect_timeouts);
289 total.handshake_missing_req1_drops = total
290 .handshake_missing_req1_drops
291 .saturating_add(s.handshake_missing_req1_drops);
292 total.handshake_auto_blocks = total
293 .handshake_auto_blocks
294 .saturating_add(s.handshake_auto_blocks);
295 total.handshake_already_connected_rejects = total
296 .handshake_already_connected_rejects
297 .saturating_add(s.handshake_already_connected_rejects);
298 total.handshake_ip_recently_connected_rejects = total
299 .handshake_ip_recently_connected_rejects
300 .saturating_add(s.handshake_ip_recently_connected_rejects);
301 total.request2_server_addr_mismatch_drops = total
302 .request2_server_addr_mismatch_drops
303 .saturating_add(s.request2_server_addr_mismatch_drops);
304 total.request2_legacy_parse_hits = total
305 .request2_legacy_parse_hits
306 .saturating_add(s.request2_legacy_parse_hits);
307 total.request2_legacy_drops = total
308 .request2_legacy_drops
309 .saturating_add(s.request2_legacy_drops);
310 total.request2_ambiguous_parse_hits = total
311 .request2_ambiguous_parse_hits
312 .saturating_add(s.request2_ambiguous_parse_hits);
313 total.request2_ambiguous_drops = total
314 .request2_ambiguous_drops
315 .saturating_add(s.request2_ambiguous_drops);
316 total.proxy_inbound_reroutes = total
317 .proxy_inbound_reroutes
318 .saturating_add(s.proxy_inbound_reroutes);
319 total.proxy_inbound_drops = total
320 .proxy_inbound_drops
321 .saturating_add(s.proxy_inbound_drops);
322 total.proxy_outbound_reroutes = total
323 .proxy_outbound_reroutes
324 .saturating_add(s.proxy_outbound_reroutes);
325 total.proxy_outbound_drops = total
326 .proxy_outbound_drops
327 .saturating_add(s.proxy_outbound_drops);
328
329 let weight = s.session_count as f64;
330 weighted_srtt_sum += s.avg_srtt_ms * weight;
331 weighted_rttvar_sum += s.avg_rttvar_ms * weight;
332 weighted_resend_rto_sum += s.avg_resend_rto_ms * weight;
333 weighted_cwnd_sum += s.avg_congestion_window_packets * weight;
334 }
335
336 if total.session_count > 0 {
337 let weight_sum = total.session_count as f64;
338 total.avg_srtt_ms = weighted_srtt_sum / weight_sum;
339 total.avg_rttvar_ms = weighted_rttvar_sum / weight_sum;
340 total.avg_resend_rto_ms = weighted_resend_rto_sum / weight_sum;
341 total.avg_congestion_window_packets = weighted_cwnd_sum / weight_sum;
342 }
343
344 total.resend_ratio = if total.reliable_sent_datagrams == 0 {
345 0.0
346 } else {
347 total.resent_datagrams as f64 / total.reliable_sent_datagrams as f64
348 };
349
350 AggregatedTelemetrySnapshot {
351 snapshot: total,
352 dropped_non_critical_events,
353 }
354 }
355
356 pub fn to_records(&self) -> Vec<TelemetryRecord> {
357 self.to_records_with_prefix("raknet")
358 }
359
360 pub fn to_records_with_prefix(&self, prefix: &str) -> Vec<TelemetryRecord> {
361 let mut records = Vec::new();
362 let aggregated = self.aggregate();
363
364 macro_rules! push_snapshot_counter {
365 ($name:literal, $help:literal, $field:ident) => {
366 self.push_metric_records(
367 &mut records,
368 MetricFamilySpec {
369 prefix,
370 name: $name,
371 help: $help,
372 kind: TelemetryMetricKind::Counter,
373 total_value: aggregated.snapshot.$field as f64,
374 },
375 |shard| shard.snapshot.$field as f64,
376 );
377 };
378 }
379
380 macro_rules! push_snapshot_gauge {
381 ($name:literal, $help:literal, $field:ident) => {
382 self.push_metric_records(
383 &mut records,
384 MetricFamilySpec {
385 prefix,
386 name: $name,
387 help: $help,
388 kind: TelemetryMetricKind::Gauge,
389 total_value: aggregated.snapshot.$field as f64,
390 },
391 |shard| shard.snapshot.$field as f64,
392 );
393 };
394 }
395
396 macro_rules! push_snapshot_gauge_f64 {
397 ($name:literal, $help:literal, $field:ident) => {
398 self.push_metric_records(
399 &mut records,
400 MetricFamilySpec {
401 prefix,
402 name: $name,
403 help: $help,
404 kind: TelemetryMetricKind::Gauge,
405 total_value: aggregated.snapshot.$field,
406 },
407 |shard| shard.snapshot.$field,
408 );
409 };
410 }
411
412 push_snapshot_gauge!("sessions_active", "Active RakNet sessions", session_count);
414 push_snapshot_counter!(
415 "sessions_started_total",
416 "Total sessions that reached connected state",
417 sessions_started_total
418 );
419 push_snapshot_counter!(
420 "sessions_closed_total",
421 "Total connected sessions closed",
422 sessions_closed_total
423 );
424 push_snapshot_counter!(
425 "packets_forwarded_total",
426 "Total app frames forwarded to upper layer",
427 packets_forwarded_total
428 );
429 push_snapshot_counter!(
430 "bytes_forwarded_total",
431 "Total app payload bytes forwarded to upper layer",
432 bytes_forwarded_total
433 );
434 push_snapshot_counter!(
435 "ack_out_total",
436 "Total outbound ACK datagrams",
437 ack_out_total
438 );
439 push_snapshot_counter!(
440 "nack_out_total",
441 "Total outbound NACK datagrams",
442 nack_out_total
443 );
444 push_snapshot_counter!(
445 "resend_total",
446 "Total datagrams resent after loss/timeout",
447 resent_datagrams
448 );
449 push_snapshot_gauge!(
450 "rtt_srtt_ms",
451 "Average smoothed RTT in milliseconds",
452 avg_srtt_ms
453 );
454 push_snapshot_gauge!(
455 "rtt_rttvar_ms",
456 "Average RTT variance in milliseconds",
457 avg_rttvar_ms
458 );
459 push_snapshot_gauge!(
460 "rto_ms",
461 "Average resend RTO in milliseconds",
462 avg_resend_rto_ms
463 );
464 push_snapshot_gauge!(
465 "cwnd_packets",
466 "Average congestion window (datagram packets)",
467 avg_congestion_window_packets
468 );
469 push_snapshot_counter!(
470 "duplicate_drop_total",
471 "Dropped duplicate reliable frames",
472 duplicate_reliable_drops
473 );
474 push_snapshot_counter!(
475 "split_ttl_drop_total",
476 "Dropped split compounds due to TTL expiry",
477 split_ttl_drops
478 );
479
480 push_snapshot_gauge!("session_count", "Active RakNet sessions", session_count);
482 push_snapshot_gauge!(
483 "pending_outgoing_frames",
484 "Queued outgoing frames before datagram packaging",
485 pending_outgoing_frames
486 );
487 push_snapshot_gauge!(
488 "pending_outgoing_bytes",
489 "Queued outgoing bytes before datagram packaging",
490 pending_outgoing_bytes
491 );
492 push_snapshot_gauge!(
493 "pending_unhandled_frames",
494 "Unhandled app frames waiting for connected state",
495 pending_unhandled_frames
496 );
497 push_snapshot_gauge!(
498 "pending_unhandled_bytes",
499 "Unhandled app frame bytes waiting for connected state",
500 pending_unhandled_bytes
501 );
502
503 push_snapshot_counter!(
504 "ingress_datagrams_total",
505 "Total datagrams received",
506 ingress_datagrams
507 );
508 push_snapshot_counter!(
509 "ingress_frames_total",
510 "Total frames received",
511 ingress_frames
512 );
513 push_snapshot_counter!(
514 "duplicate_reliable_drops_total",
515 "Dropped duplicate reliable frames",
516 duplicate_reliable_drops
517 );
518 push_snapshot_counter!(
519 "ordered_stale_drops_total",
520 "Dropped stale ordered frames",
521 ordered_stale_drops
522 );
523 push_snapshot_counter!(
524 "ordered_buffer_full_drops_total",
525 "Dropped ordered frames due to reorder buffer overflow",
526 ordered_buffer_full_drops
527 );
528 push_snapshot_counter!(
529 "sequenced_stale_drops_total",
530 "Dropped stale sequenced frames",
531 sequenced_stale_drops
532 );
533 push_snapshot_counter!(
534 "sequenced_missing_index_drops_total",
535 "Dropped sequenced frames missing sequence index",
536 sequenced_missing_index_drops
537 );
538 push_snapshot_counter!(
539 "reliable_sent_datagrams_total",
540 "Total reliable datagrams sent",
541 reliable_sent_datagrams
542 );
543 push_snapshot_counter!(
544 "resent_datagrams_total",
545 "Total datagrams resent after loss/timeout",
546 resent_datagrams
547 );
548 push_snapshot_counter!(
549 "acked_datagrams_total",
550 "Total datagrams acknowledged",
551 acked_datagrams
552 );
553 push_snapshot_counter!(
554 "nacked_datagrams_total",
555 "Total datagrams negatively acknowledged",
556 nacked_datagrams
557 );
558 push_snapshot_counter!(
559 "split_ttl_drops_total",
560 "Dropped split compounds due to TTL expiry",
561 split_ttl_drops
562 );
563 push_snapshot_counter!(
564 "outgoing_queue_drops_total",
565 "Dropped payloads due to outgoing queue soft pressure",
566 outgoing_queue_drops
567 );
568 push_snapshot_counter!(
569 "outgoing_queue_defers_total",
570 "Deferred payloads due to outgoing queue soft pressure",
571 outgoing_queue_defers
572 );
573 push_snapshot_counter!(
574 "outgoing_queue_disconnects_total",
575 "Disconnects triggered by outgoing queue hard pressure",
576 outgoing_queue_disconnects
577 );
578 push_snapshot_counter!(
579 "backpressure_delay_total",
580 "Backpressure delay actions (deferred packets)",
581 backpressure_delays
582 );
583 push_snapshot_counter!(
584 "backpressure_drop_total",
585 "Backpressure shed actions (dropped packets)",
586 backpressure_drops
587 );
588 push_snapshot_counter!(
589 "backpressure_disconnect_total",
590 "Backpressure disconnect actions",
591 backpressure_disconnects
592 );
593 push_snapshot_counter!(
594 "local_requested_disconnects_total",
595 "Disconnects explicitly requested by local control path",
596 local_requested_disconnects
597 );
598 push_snapshot_counter!(
599 "remote_disconnect_notifications_total",
600 "Remote disconnect notifications received",
601 remote_disconnect_notifications
602 );
603 push_snapshot_counter!(
604 "remote_detect_lost_disconnects_total",
605 "Remote detect-lost disconnect signals received",
606 remote_detect_lost_disconnects
607 );
608 push_snapshot_counter!(
609 "illegal_state_transitions_total",
610 "Illegal session state transitions detected",
611 illegal_state_transitions
612 );
613 push_snapshot_counter!(
614 "timed_out_sessions_total",
615 "Sessions closed due to idle timeout",
616 timed_out_sessions
617 );
618 push_snapshot_counter!(
619 "keepalive_pings_sent_total",
620 "Connected keepalive pings sent",
621 keepalive_pings_sent
622 );
623 push_snapshot_counter!(
624 "unhandled_frames_queued_total",
625 "Unhandled app frames queued before connected state",
626 unhandled_frames_queued
627 );
628 push_snapshot_counter!(
629 "unhandled_frames_flushed_total",
630 "Unhandled app frames flushed after connection",
631 unhandled_frames_flushed
632 );
633 push_snapshot_counter!(
634 "unhandled_frames_dropped_total",
635 "Unhandled app frames dropped due to pipeline overflow",
636 unhandled_frames_dropped
637 );
638 push_snapshot_counter!(
639 "rate_global_limit_hits_total",
640 "Global rate limit hits",
641 rate_global_limit_hits
642 );
643 push_snapshot_counter!(
644 "rate_ip_block_hits_total",
645 "Per-IP rate limiter block hits",
646 rate_ip_block_hits
647 );
648 push_snapshot_counter!(
649 "rate_ip_block_hits_rate_exceeded_total",
650 "Per-IP block hits caused by packet rate exceeding threshold",
651 rate_ip_block_hits_rate_exceeded
652 );
653 push_snapshot_counter!(
654 "rate_ip_block_hits_manual_total",
655 "Per-IP block hits caused by manual address blocks",
656 rate_ip_block_hits_manual
657 );
658 push_snapshot_counter!(
659 "rate_ip_block_hits_handshake_heuristic_total",
660 "Per-IP block hits caused by handshake heuristic guard",
661 rate_ip_block_hits_handshake_heuristic
662 );
663 push_snapshot_counter!(
664 "rate_ip_block_hits_cookie_mismatch_guard_total",
665 "Per-IP block hits caused by cookie mismatch guard",
666 rate_ip_block_hits_cookie_mismatch_guard
667 );
668 push_snapshot_counter!(
669 "rate_addresses_blocked_total",
670 "Addresses blocked by rate limiter",
671 rate_addresses_blocked
672 );
673 push_snapshot_counter!(
674 "rate_addresses_blocked_rate_exceeded_total",
675 "Addresses blocked due to packet rate exceeding threshold",
676 rate_addresses_blocked_rate_exceeded
677 );
678 push_snapshot_counter!(
679 "rate_addresses_blocked_manual_total",
680 "Addresses blocked manually",
681 rate_addresses_blocked_manual
682 );
683 push_snapshot_counter!(
684 "rate_addresses_blocked_handshake_heuristic_total",
685 "Addresses blocked by handshake heuristic guard",
686 rate_addresses_blocked_handshake_heuristic
687 );
688 push_snapshot_counter!(
689 "rate_addresses_blocked_cookie_mismatch_guard_total",
690 "Addresses blocked by cookie mismatch guard",
691 rate_addresses_blocked_cookie_mismatch_guard
692 );
693 push_snapshot_counter!(
694 "rate_addresses_unblocked_total",
695 "Addresses unblocked by rate limiter",
696 rate_addresses_unblocked
697 );
698 push_snapshot_gauge!(
699 "rate_blocked_addresses",
700 "Currently blocked addresses in rate limiter",
701 rate_blocked_addresses
702 );
703 push_snapshot_gauge!(
704 "rate_exception_addresses",
705 "Rate limiter exception addresses",
706 rate_exception_addresses
707 );
708 push_snapshot_counter!(
709 "processing_budget_drops_total",
710 "Connected datagrams dropped by processing budget limiter",
711 processing_budget_drops_total
712 );
713 push_snapshot_counter!(
714 "processing_budget_drops_ip_exhausted_total",
715 "Connected datagrams dropped because per-IP processing budget was exhausted",
716 processing_budget_drops_ip_exhausted_total
717 );
718 push_snapshot_counter!(
719 "processing_budget_drops_global_exhausted_total",
720 "Connected datagrams dropped because global processing budget was exhausted",
721 processing_budget_drops_global_exhausted_total
722 );
723 push_snapshot_counter!(
724 "processing_budget_consumed_units_total",
725 "Total processing budget units consumed by connected datagrams",
726 processing_budget_consumed_units_total
727 );
728 push_snapshot_gauge!(
729 "processing_budget_active_ip_buckets",
730 "Active per-IP processing budget buckets",
731 processing_budget_active_ip_buckets
732 );
733 push_snapshot_counter!(
734 "cookie_rotations_total",
735 "Cookie key rotations",
736 cookie_rotations
737 );
738 push_snapshot_counter!(
739 "cookie_mismatch_drops_total",
740 "Dropped handshakes due to cookie mismatch",
741 cookie_mismatch_drops
742 );
743 push_snapshot_counter!(
744 "cookie_mismatch_blocks_total",
745 "Addresses blocked by cookie mismatch guard",
746 cookie_mismatch_blocks
747 );
748 push_snapshot_counter!(
749 "handshake_stage_cancel_drops_total",
750 "Dropped handshakes due to stage cancel",
751 handshake_stage_cancel_drops
752 );
753 push_snapshot_counter!(
754 "handshake_req1_req2_timeouts_total",
755 "REQ1->REQ2 handshake timeout drops",
756 handshake_req1_req2_timeouts
757 );
758 push_snapshot_counter!(
759 "handshake_reply2_connect_timeouts_total",
760 "REPLY2->CONNECT handshake timeout drops",
761 handshake_reply2_connect_timeouts
762 );
763 push_snapshot_counter!(
764 "handshake_missing_req1_drops_total",
765 "Dropped REQ2 packets without pending REQ1",
766 handshake_missing_req1_drops
767 );
768 push_snapshot_counter!(
769 "handshake_auto_blocks_total",
770 "Automatic rate blocks triggered by handshake heuristics",
771 handshake_auto_blocks
772 );
773 push_snapshot_counter!(
774 "handshake_already_connected_rejects_total",
775 "REQ1/REQ2 rejects answered with AlreadyConnected",
776 handshake_already_connected_rejects
777 );
778 push_snapshot_counter!(
779 "handshake_ip_recently_connected_rejects_total",
780 "REQ1/REQ2 rejects answered with IpRecentlyConnected",
781 handshake_ip_recently_connected_rejects
782 );
783 push_snapshot_counter!(
784 "request2_server_addr_mismatch_drops_total",
785 "Dropped REQ2 packets due to request2_server_addr_policy mismatch",
786 request2_server_addr_mismatch_drops
787 );
788 push_snapshot_counter!(
789 "request2_legacy_parse_hits_total",
790 "Legacy Request2 parse path hits",
791 request2_legacy_parse_hits
792 );
793 push_snapshot_counter!(
794 "request2_legacy_drops_total",
795 "Drops caused by legacy Request2 parse path",
796 request2_legacy_drops
797 );
798 push_snapshot_counter!(
799 "request2_ambiguous_parse_hits_total",
800 "Ambiguous Request2 parse path hits",
801 request2_ambiguous_parse_hits
802 );
803 push_snapshot_counter!(
804 "request2_ambiguous_drops_total",
805 "Drops caused by ambiguous Request2 parse path",
806 request2_ambiguous_drops
807 );
808 push_snapshot_counter!(
809 "proxy_inbound_reroutes_total",
810 "Inbound packets rerouted by proxy routing",
811 proxy_inbound_reroutes
812 );
813 push_snapshot_counter!(
814 "proxy_inbound_drops_total",
815 "Inbound packets dropped by proxy routing",
816 proxy_inbound_drops
817 );
818 push_snapshot_counter!(
819 "proxy_outbound_reroutes_total",
820 "Outbound packets rerouted by proxy routing",
821 proxy_outbound_reroutes
822 );
823 push_snapshot_counter!(
824 "proxy_outbound_drops_total",
825 "Outbound packets dropped by proxy routing",
826 proxy_outbound_drops
827 );
828 push_snapshot_gauge_f64!(
829 "avg_srtt_ms",
830 "Average smoothed RTT in milliseconds",
831 avg_srtt_ms
832 );
833 push_snapshot_gauge_f64!(
834 "avg_rttvar_ms",
835 "Average RTT variance in milliseconds",
836 avg_rttvar_ms
837 );
838 push_snapshot_gauge_f64!(
839 "avg_resend_rto_ms",
840 "Average resend RTO in milliseconds",
841 avg_resend_rto_ms
842 );
843 push_snapshot_gauge_f64!(
844 "avg_congestion_window_packets",
845 "Average congestion window (datagram packets)",
846 avg_congestion_window_packets
847 );
848 push_snapshot_gauge_f64!(
849 "resend_ratio",
850 "Resend ratio (resent/reliable_sent)",
851 resend_ratio
852 );
853
854 self.push_metric_records(
855 &mut records,
856 MetricFamilySpec {
857 prefix,
858 name: "dropped_non_critical_events_total",
859 help: "Dropped non-critical runtime events due to overflow policy",
860 kind: TelemetryMetricKind::Counter,
861 total_value: aggregated.dropped_non_critical_events as f64,
862 },
863 |shard| shard.dropped_non_critical_events as f64,
864 );
865
866 records
867 }
868
869 pub fn render_prometheus(&self) -> String {
870 self.render_prometheus_with_prefix("raknet")
871 }
872
873 pub fn render_prometheus_with_prefix(&self, prefix: &str) -> String {
874 let mut out = String::new();
875 let aggregated = self.aggregate();
876
877 macro_rules! write_snapshot_counter {
878 ($name:literal, $help:literal, $field:ident) => {
879 self.write_metric_family(
880 &mut out,
881 MetricFamilySpec {
882 prefix,
883 name: $name,
884 help: $help,
885 kind: TelemetryMetricKind::Counter,
886 total_value: aggregated.snapshot.$field as f64,
887 },
888 |shard| shard.snapshot.$field as f64,
889 );
890 };
891 }
892
893 macro_rules! write_snapshot_gauge {
894 ($name:literal, $help:literal, $field:ident) => {
895 self.write_metric_family(
896 &mut out,
897 MetricFamilySpec {
898 prefix,
899 name: $name,
900 help: $help,
901 kind: TelemetryMetricKind::Gauge,
902 total_value: aggregated.snapshot.$field as f64,
903 },
904 |shard| shard.snapshot.$field as f64,
905 );
906 };
907 }
908
909 macro_rules! write_snapshot_gauge_f64 {
910 ($name:literal, $help:literal, $field:ident) => {
911 self.write_metric_family(
912 &mut out,
913 MetricFamilySpec {
914 prefix,
915 name: $name,
916 help: $help,
917 kind: TelemetryMetricKind::Gauge,
918 total_value: aggregated.snapshot.$field,
919 },
920 |shard| shard.snapshot.$field,
921 );
922 };
923 }
924
925 write_snapshot_gauge!("sessions_active", "Active RakNet sessions", session_count);
927 write_snapshot_counter!(
928 "sessions_started_total",
929 "Total sessions that reached connected state",
930 sessions_started_total
931 );
932 write_snapshot_counter!(
933 "sessions_closed_total",
934 "Total connected sessions closed",
935 sessions_closed_total
936 );
937 write_snapshot_counter!(
938 "packets_forwarded_total",
939 "Total app frames forwarded to upper layer",
940 packets_forwarded_total
941 );
942 write_snapshot_counter!(
943 "bytes_forwarded_total",
944 "Total app payload bytes forwarded to upper layer",
945 bytes_forwarded_total
946 );
947 write_snapshot_counter!(
948 "ack_out_total",
949 "Total outbound ACK datagrams",
950 ack_out_total
951 );
952 write_snapshot_counter!(
953 "nack_out_total",
954 "Total outbound NACK datagrams",
955 nack_out_total
956 );
957 write_snapshot_counter!(
958 "resend_total",
959 "Total datagrams resent after loss/timeout",
960 resent_datagrams
961 );
962 write_snapshot_gauge!(
963 "rtt_srtt_ms",
964 "Average smoothed RTT in milliseconds",
965 avg_srtt_ms
966 );
967 write_snapshot_gauge!(
968 "rtt_rttvar_ms",
969 "Average RTT variance in milliseconds",
970 avg_rttvar_ms
971 );
972 write_snapshot_gauge!(
973 "rto_ms",
974 "Average resend RTO in milliseconds",
975 avg_resend_rto_ms
976 );
977 write_snapshot_gauge!(
978 "cwnd_packets",
979 "Average congestion window (datagram packets)",
980 avg_congestion_window_packets
981 );
982 write_snapshot_counter!(
983 "duplicate_drop_total",
984 "Dropped duplicate reliable frames",
985 duplicate_reliable_drops
986 );
987 write_snapshot_counter!(
988 "split_ttl_drop_total",
989 "Dropped split compounds due to TTL expiry",
990 split_ttl_drops
991 );
992
993 write_snapshot_gauge!("session_count", "Active RakNet sessions", session_count);
995 write_snapshot_gauge!(
996 "pending_outgoing_frames",
997 "Queued outgoing frames before datagram packaging",
998 pending_outgoing_frames
999 );
1000 write_snapshot_gauge!(
1001 "pending_outgoing_bytes",
1002 "Queued outgoing bytes before datagram packaging",
1003 pending_outgoing_bytes
1004 );
1005 write_snapshot_gauge!(
1006 "pending_unhandled_frames",
1007 "Unhandled app frames waiting for connected state",
1008 pending_unhandled_frames
1009 );
1010 write_snapshot_gauge!(
1011 "pending_unhandled_bytes",
1012 "Unhandled app frame bytes waiting for connected state",
1013 pending_unhandled_bytes
1014 );
1015
1016 write_snapshot_counter!(
1017 "ingress_datagrams_total",
1018 "Total datagrams received",
1019 ingress_datagrams
1020 );
1021 write_snapshot_counter!(
1022 "ingress_frames_total",
1023 "Total frames received",
1024 ingress_frames
1025 );
1026 write_snapshot_counter!(
1027 "duplicate_reliable_drops_total",
1028 "Dropped duplicate reliable frames",
1029 duplicate_reliable_drops
1030 );
1031 write_snapshot_counter!(
1032 "ordered_stale_drops_total",
1033 "Dropped stale ordered frames",
1034 ordered_stale_drops
1035 );
1036 write_snapshot_counter!(
1037 "ordered_buffer_full_drops_total",
1038 "Dropped ordered frames due to reorder buffer overflow",
1039 ordered_buffer_full_drops
1040 );
1041 write_snapshot_counter!(
1042 "sequenced_stale_drops_total",
1043 "Dropped stale sequenced frames",
1044 sequenced_stale_drops
1045 );
1046 write_snapshot_counter!(
1047 "sequenced_missing_index_drops_total",
1048 "Dropped sequenced frames missing sequence index",
1049 sequenced_missing_index_drops
1050 );
1051 write_snapshot_counter!(
1052 "reliable_sent_datagrams_total",
1053 "Total reliable datagrams sent",
1054 reliable_sent_datagrams
1055 );
1056 write_snapshot_counter!(
1057 "resent_datagrams_total",
1058 "Total datagrams resent after loss/timeout",
1059 resent_datagrams
1060 );
1061 write_snapshot_counter!(
1062 "acked_datagrams_total",
1063 "Total datagrams acknowledged",
1064 acked_datagrams
1065 );
1066 write_snapshot_counter!(
1067 "nacked_datagrams_total",
1068 "Total datagrams negatively acknowledged",
1069 nacked_datagrams
1070 );
1071 write_snapshot_counter!(
1072 "split_ttl_drops_total",
1073 "Dropped split compounds due to TTL expiry",
1074 split_ttl_drops
1075 );
1076 write_snapshot_counter!(
1077 "outgoing_queue_drops_total",
1078 "Dropped payloads due to outgoing queue soft pressure",
1079 outgoing_queue_drops
1080 );
1081 write_snapshot_counter!(
1082 "outgoing_queue_defers_total",
1083 "Deferred payloads due to outgoing queue soft pressure",
1084 outgoing_queue_defers
1085 );
1086 write_snapshot_counter!(
1087 "outgoing_queue_disconnects_total",
1088 "Disconnects triggered by outgoing queue hard pressure",
1089 outgoing_queue_disconnects
1090 );
1091 write_snapshot_counter!(
1092 "backpressure_delay_total",
1093 "Backpressure delay actions (deferred packets)",
1094 backpressure_delays
1095 );
1096 write_snapshot_counter!(
1097 "backpressure_drop_total",
1098 "Backpressure shed actions (dropped packets)",
1099 backpressure_drops
1100 );
1101 write_snapshot_counter!(
1102 "backpressure_disconnect_total",
1103 "Backpressure disconnect actions",
1104 backpressure_disconnects
1105 );
1106 write_snapshot_counter!(
1107 "local_requested_disconnects_total",
1108 "Disconnects explicitly requested by local control path",
1109 local_requested_disconnects
1110 );
1111 write_snapshot_counter!(
1112 "remote_disconnect_notifications_total",
1113 "Remote disconnect notifications received",
1114 remote_disconnect_notifications
1115 );
1116 write_snapshot_counter!(
1117 "remote_detect_lost_disconnects_total",
1118 "Remote detect-lost disconnect signals received",
1119 remote_detect_lost_disconnects
1120 );
1121 write_snapshot_counter!(
1122 "illegal_state_transitions_total",
1123 "Illegal session state transitions detected",
1124 illegal_state_transitions
1125 );
1126 write_snapshot_counter!(
1127 "timed_out_sessions_total",
1128 "Sessions closed due to idle timeout",
1129 timed_out_sessions
1130 );
1131 write_snapshot_counter!(
1132 "keepalive_pings_sent_total",
1133 "Connected keepalive pings sent",
1134 keepalive_pings_sent
1135 );
1136 write_snapshot_counter!(
1137 "unhandled_frames_queued_total",
1138 "Unhandled app frames queued before connected state",
1139 unhandled_frames_queued
1140 );
1141 write_snapshot_counter!(
1142 "unhandled_frames_flushed_total",
1143 "Unhandled app frames flushed after connection",
1144 unhandled_frames_flushed
1145 );
1146 write_snapshot_counter!(
1147 "unhandled_frames_dropped_total",
1148 "Unhandled app frames dropped due to pipeline overflow",
1149 unhandled_frames_dropped
1150 );
1151 write_snapshot_counter!(
1152 "rate_global_limit_hits_total",
1153 "Global rate limit hits",
1154 rate_global_limit_hits
1155 );
1156 write_snapshot_counter!(
1157 "rate_ip_block_hits_total",
1158 "Per-IP rate limiter block hits",
1159 rate_ip_block_hits
1160 );
1161 write_snapshot_counter!(
1162 "rate_ip_block_hits_rate_exceeded_total",
1163 "Per-IP block hits caused by packet rate exceeding threshold",
1164 rate_ip_block_hits_rate_exceeded
1165 );
1166 write_snapshot_counter!(
1167 "rate_ip_block_hits_manual_total",
1168 "Per-IP block hits caused by manual address blocks",
1169 rate_ip_block_hits_manual
1170 );
1171 write_snapshot_counter!(
1172 "rate_ip_block_hits_handshake_heuristic_total",
1173 "Per-IP block hits caused by handshake heuristic guard",
1174 rate_ip_block_hits_handshake_heuristic
1175 );
1176 write_snapshot_counter!(
1177 "rate_ip_block_hits_cookie_mismatch_guard_total",
1178 "Per-IP block hits caused by cookie mismatch guard",
1179 rate_ip_block_hits_cookie_mismatch_guard
1180 );
1181 write_snapshot_counter!(
1182 "rate_addresses_blocked_total",
1183 "Addresses blocked by rate limiter",
1184 rate_addresses_blocked
1185 );
1186 write_snapshot_counter!(
1187 "rate_addresses_blocked_rate_exceeded_total",
1188 "Addresses blocked due to packet rate exceeding threshold",
1189 rate_addresses_blocked_rate_exceeded
1190 );
1191 write_snapshot_counter!(
1192 "rate_addresses_blocked_manual_total",
1193 "Addresses blocked manually",
1194 rate_addresses_blocked_manual
1195 );
1196 write_snapshot_counter!(
1197 "rate_addresses_blocked_handshake_heuristic_total",
1198 "Addresses blocked by handshake heuristic guard",
1199 rate_addresses_blocked_handshake_heuristic
1200 );
1201 write_snapshot_counter!(
1202 "rate_addresses_blocked_cookie_mismatch_guard_total",
1203 "Addresses blocked by cookie mismatch guard",
1204 rate_addresses_blocked_cookie_mismatch_guard
1205 );
1206 write_snapshot_counter!(
1207 "rate_addresses_unblocked_total",
1208 "Addresses unblocked by rate limiter",
1209 rate_addresses_unblocked
1210 );
1211 write_snapshot_gauge!(
1212 "rate_blocked_addresses",
1213 "Currently blocked addresses in rate limiter",
1214 rate_blocked_addresses
1215 );
1216 write_snapshot_gauge!(
1217 "rate_exception_addresses",
1218 "Rate limiter exception addresses",
1219 rate_exception_addresses
1220 );
1221 write_snapshot_counter!(
1222 "processing_budget_drops_total",
1223 "Connected datagrams dropped by processing budget limiter",
1224 processing_budget_drops_total
1225 );
1226 write_snapshot_counter!(
1227 "processing_budget_drops_ip_exhausted_total",
1228 "Connected datagrams dropped because per-IP processing budget was exhausted",
1229 processing_budget_drops_ip_exhausted_total
1230 );
1231 write_snapshot_counter!(
1232 "processing_budget_drops_global_exhausted_total",
1233 "Connected datagrams dropped because global processing budget was exhausted",
1234 processing_budget_drops_global_exhausted_total
1235 );
1236 write_snapshot_counter!(
1237 "processing_budget_consumed_units_total",
1238 "Total processing budget units consumed by connected datagrams",
1239 processing_budget_consumed_units_total
1240 );
1241 write_snapshot_gauge!(
1242 "processing_budget_active_ip_buckets",
1243 "Active per-IP processing budget buckets",
1244 processing_budget_active_ip_buckets
1245 );
1246 write_snapshot_counter!(
1247 "cookie_rotations_total",
1248 "Cookie key rotations",
1249 cookie_rotations
1250 );
1251 write_snapshot_counter!(
1252 "cookie_mismatch_drops_total",
1253 "Dropped handshakes due to cookie mismatch",
1254 cookie_mismatch_drops
1255 );
1256 write_snapshot_counter!(
1257 "cookie_mismatch_blocks_total",
1258 "Addresses blocked by cookie mismatch guard",
1259 cookie_mismatch_blocks
1260 );
1261 write_snapshot_counter!(
1262 "handshake_stage_cancel_drops_total",
1263 "Dropped handshakes due to stage cancel",
1264 handshake_stage_cancel_drops
1265 );
1266 write_snapshot_counter!(
1267 "handshake_req1_req2_timeouts_total",
1268 "REQ1->REQ2 handshake timeout drops",
1269 handshake_req1_req2_timeouts
1270 );
1271 write_snapshot_counter!(
1272 "handshake_reply2_connect_timeouts_total",
1273 "REPLY2->CONNECT handshake timeout drops",
1274 handshake_reply2_connect_timeouts
1275 );
1276 write_snapshot_counter!(
1277 "handshake_missing_req1_drops_total",
1278 "Dropped REQ2 packets without pending REQ1",
1279 handshake_missing_req1_drops
1280 );
1281 write_snapshot_counter!(
1282 "handshake_auto_blocks_total",
1283 "Automatic rate blocks triggered by handshake heuristics",
1284 handshake_auto_blocks
1285 );
1286 write_snapshot_counter!(
1287 "handshake_already_connected_rejects_total",
1288 "REQ1/REQ2 rejects answered with AlreadyConnected",
1289 handshake_already_connected_rejects
1290 );
1291 write_snapshot_counter!(
1292 "handshake_ip_recently_connected_rejects_total",
1293 "REQ1/REQ2 rejects answered with IpRecentlyConnected",
1294 handshake_ip_recently_connected_rejects
1295 );
1296 write_snapshot_counter!(
1297 "request2_server_addr_mismatch_drops_total",
1298 "Dropped REQ2 packets due to request2_server_addr_policy mismatch",
1299 request2_server_addr_mismatch_drops
1300 );
1301 write_snapshot_counter!(
1302 "request2_legacy_parse_hits_total",
1303 "Legacy Request2 parse path hits",
1304 request2_legacy_parse_hits
1305 );
1306 write_snapshot_counter!(
1307 "request2_legacy_drops_total",
1308 "Drops caused by legacy Request2 parse path",
1309 request2_legacy_drops
1310 );
1311 write_snapshot_counter!(
1312 "request2_ambiguous_parse_hits_total",
1313 "Ambiguous Request2 parse path hits",
1314 request2_ambiguous_parse_hits
1315 );
1316 write_snapshot_counter!(
1317 "request2_ambiguous_drops_total",
1318 "Drops caused by ambiguous Request2 parse path",
1319 request2_ambiguous_drops
1320 );
1321 write_snapshot_counter!(
1322 "proxy_inbound_reroutes_total",
1323 "Inbound packets rerouted by proxy routing",
1324 proxy_inbound_reroutes
1325 );
1326 write_snapshot_counter!(
1327 "proxy_inbound_drops_total",
1328 "Inbound packets dropped by proxy routing",
1329 proxy_inbound_drops
1330 );
1331 write_snapshot_counter!(
1332 "proxy_outbound_reroutes_total",
1333 "Outbound packets rerouted by proxy routing",
1334 proxy_outbound_reroutes
1335 );
1336 write_snapshot_counter!(
1337 "proxy_outbound_drops_total",
1338 "Outbound packets dropped by proxy routing",
1339 proxy_outbound_drops
1340 );
1341 write_snapshot_gauge_f64!(
1342 "avg_srtt_ms",
1343 "Average smoothed RTT in milliseconds",
1344 avg_srtt_ms
1345 );
1346 write_snapshot_gauge_f64!(
1347 "avg_rttvar_ms",
1348 "Average RTT variance in milliseconds",
1349 avg_rttvar_ms
1350 );
1351 write_snapshot_gauge_f64!(
1352 "avg_resend_rto_ms",
1353 "Average resend RTO in milliseconds",
1354 avg_resend_rto_ms
1355 );
1356 write_snapshot_gauge_f64!(
1357 "avg_congestion_window_packets",
1358 "Average congestion window (datagram packets)",
1359 avg_congestion_window_packets
1360 );
1361 write_snapshot_gauge_f64!(
1362 "resend_ratio",
1363 "Resend ratio (resent/reliable_sent)",
1364 resend_ratio
1365 );
1366
1367 self.write_metric_family(
1368 &mut out,
1369 MetricFamilySpec {
1370 prefix,
1371 name: "dropped_non_critical_events_total",
1372 help: "Dropped non-critical runtime events due to overflow policy",
1373 kind: TelemetryMetricKind::Counter,
1374 total_value: aggregated.dropped_non_critical_events as f64,
1375 },
1376 |shard| shard.dropped_non_critical_events as f64,
1377 );
1378
1379 out
1380 }
1381
1382 fn push_metric_records<F>(
1383 &self,
1384 records: &mut Vec<TelemetryRecord>,
1385 spec: MetricFamilySpec<'_>,
1386 extract: F,
1387 ) where
1388 F: Fn(&ShardTelemetrySnapshot) -> f64,
1389 {
1390 for (shard_id, shard) in &self.shards {
1391 records.push(TelemetryRecord {
1392 name: format!("{}_{}", spec.prefix, spec.name),
1393 help: spec.help,
1394 kind: spec.kind,
1395 value: extract(shard),
1396 labels: vec![
1397 ("scope".to_string(), "shard".to_string()),
1398 ("shard".to_string(), shard_id.to_string()),
1399 ],
1400 });
1401 }
1402
1403 records.push(TelemetryRecord {
1404 name: format!("{}_{}", spec.prefix, spec.name),
1405 help: spec.help,
1406 kind: spec.kind,
1407 value: spec.total_value,
1408 labels: vec![
1409 ("scope".to_string(), "all".to_string()),
1410 ("shard".to_string(), "all".to_string()),
1411 ],
1412 });
1413 }
1414
1415 fn write_metric_family<F>(&self, out: &mut String, spec: MetricFamilySpec<'_>, extract: F)
1416 where
1417 F: Fn(&ShardTelemetrySnapshot) -> f64,
1418 {
1419 let _ = writeln!(out, "# HELP {}_{} {}", spec.prefix, spec.name, spec.help);
1420 let _ = writeln!(
1421 out,
1422 "# TYPE {}_{} {}",
1423 spec.prefix,
1424 spec.name,
1425 spec.kind.as_prometheus_type()
1426 );
1427 for (shard_id, shard) in &self.shards {
1428 let _ = writeln!(
1429 out,
1430 "{}_{}{{scope=\"shard\",shard=\"{shard_id}\"}} {}",
1431 spec.prefix,
1432 spec.name,
1433 extract(shard)
1434 );
1435 }
1436 let _ = writeln!(
1437 out,
1438 "{}_{}{{scope=\"all\",shard=\"all\"}} {}",
1439 spec.prefix, spec.name, spec.total_value
1440 );
1441 }
1442}
1443
1444#[derive(Debug, Clone)]
1445pub struct TelemetryExporter {
1446 registry: TelemetryRegistry,
1447 prefix: String,
1448}
1449
1450impl Default for TelemetryExporter {
1451 fn default() -> Self {
1452 Self {
1453 registry: TelemetryRegistry::new(),
1454 prefix: "raknet".to_string(),
1455 }
1456 }
1457}
1458
1459impl TelemetryExporter {
1460 pub fn new() -> Self {
1461 Self::default()
1462 }
1463
1464 pub fn with_prefix(prefix: impl Into<String>) -> Self {
1465 Self {
1466 prefix: prefix.into(),
1467 ..Self::default()
1468 }
1469 }
1470
1471 pub fn prefix(&self) -> &str {
1472 &self.prefix
1473 }
1474
1475 pub fn set_prefix(&mut self, prefix: impl Into<String>) {
1476 self.prefix = prefix.into();
1477 }
1478
1479 pub fn clear(&mut self) {
1480 self.registry.clear();
1481 }
1482
1483 pub fn shard_count(&self) -> usize {
1484 self.registry.shard_count()
1485 }
1486
1487 pub fn ingest_snapshot(
1488 &mut self,
1489 shard_id: usize,
1490 snapshot: TransportMetricsSnapshot,
1491 dropped_non_critical_events: u64,
1492 ) {
1493 self.registry
1494 .ingest_snapshot(shard_id, snapshot, dropped_non_critical_events);
1495 }
1496
1497 pub fn ingest_server_event(&mut self, event: &RaknetServerEvent) -> bool {
1498 self.registry.ingest_server_event(event)
1499 }
1500
1501 pub fn aggregate(&self) -> AggregatedTelemetrySnapshot {
1502 self.registry.aggregate()
1503 }
1504
1505 pub fn render_prometheus(&self) -> String {
1506 self.registry.render_prometheus_with_prefix(&self.prefix)
1507 }
1508
1509 pub fn records(&self) -> Vec<TelemetryRecord> {
1510 self.registry.to_records_with_prefix(&self.prefix)
1511 }
1512
1513 pub fn registry(&self) -> &TelemetryRegistry {
1514 &self.registry
1515 }
1516}
1517
1518#[cfg(test)]
1519mod tests {
1520 use super::{TelemetryExporter, TelemetryMetricKind, TelemetryRegistry};
1521 use crate::server::RaknetServerEvent;
1522 use crate::transport::TransportMetricsSnapshot;
1523
1524 fn metrics_event(
1525 shard_id: usize,
1526 snapshot: TransportMetricsSnapshot,
1527 dropped_non_critical_events: u64,
1528 ) -> RaknetServerEvent {
1529 RaknetServerEvent::Metrics {
1530 shard_id,
1531 snapshot: Box::new(snapshot),
1532 dropped_non_critical_events,
1533 }
1534 }
1535
1536 #[test]
1537 fn ingest_server_event_updates_shard_snapshot() {
1538 let mut registry = TelemetryRegistry::new();
1539 let snapshot = TransportMetricsSnapshot {
1540 session_count: 2,
1541 ingress_datagrams: 11,
1542 ..TransportMetricsSnapshot::default()
1543 };
1544
1545 assert!(registry.ingest_server_event(&metrics_event(3, snapshot, 9)));
1546 assert_eq!(registry.shard_count(), 1);
1547
1548 let shard = registry
1549 .shard_snapshot(3)
1550 .expect("shard snapshot should exist");
1551 assert_eq!(shard.snapshot.session_count, 2);
1552 assert_eq!(shard.snapshot.ingress_datagrams, 11);
1553 assert_eq!(shard.dropped_non_critical_events, 9);
1554 }
1555
1556 #[test]
1557 fn aggregate_recomputes_weighted_averages_and_resend_ratio() {
1558 let mut registry = TelemetryRegistry::new();
1559
1560 let shard0 = TransportMetricsSnapshot {
1561 session_count: 2,
1562 ingress_datagrams: 100,
1563 resent_datagrams: 20,
1564 reliable_sent_datagrams: 200,
1565 processing_budget_drops_total: 3,
1566 processing_budget_consumed_units_total: 10_000,
1567 avg_srtt_ms: 10.0,
1568 avg_rttvar_ms: 3.0,
1569 avg_resend_rto_ms: 25.0,
1570 avg_congestion_window_packets: 8.0,
1571 ..TransportMetricsSnapshot::default()
1572 };
1573 let shard1 = TransportMetricsSnapshot {
1574 session_count: 1,
1575 ingress_datagrams: 50,
1576 resent_datagrams: 10,
1577 reliable_sent_datagrams: 100,
1578 processing_budget_drops_total: 5,
1579 processing_budget_consumed_units_total: 20_000,
1580 avg_srtt_ms: 40.0,
1581 avg_rttvar_ms: 6.0,
1582 avg_resend_rto_ms: 55.0,
1583 avg_congestion_window_packets: 4.0,
1584 ..TransportMetricsSnapshot::default()
1585 };
1586
1587 registry.ingest_snapshot(0, shard0, 2);
1588 registry.ingest_snapshot(1, shard1, 7);
1589
1590 let total = registry.aggregate();
1591 assert_eq!(total.snapshot.session_count, 3);
1592 assert_eq!(total.snapshot.ingress_datagrams, 150);
1593 assert_eq!(total.snapshot.resent_datagrams, 30);
1594 assert_eq!(total.snapshot.reliable_sent_datagrams, 300);
1595 assert_eq!(total.snapshot.processing_budget_drops_total, 8);
1596 assert_eq!(
1597 total.snapshot.processing_budget_consumed_units_total,
1598 30_000
1599 );
1600 assert_eq!(total.dropped_non_critical_events, 9);
1601
1602 assert!((total.snapshot.avg_srtt_ms - 20.0).abs() < 1e-9);
1603 assert!((total.snapshot.avg_rttvar_ms - 4.0).abs() < 1e-9);
1604 assert!((total.snapshot.avg_resend_rto_ms - 35.0).abs() < 1e-9);
1605 assert!((total.snapshot.avg_congestion_window_packets - (20.0 / 3.0)).abs() < 1e-9);
1606 assert!((total.snapshot.resend_ratio - 0.1).abs() < 1e-9);
1607 }
1608
1609 #[test]
1610 fn prometheus_render_contains_shard_and_all_labels() {
1611 let mut registry = TelemetryRegistry::new();
1612 let snapshot = TransportMetricsSnapshot {
1613 session_count: 1,
1614 ingress_datagrams: 9,
1615 sessions_started_total: 2,
1616 sessions_closed_total: 1,
1617 packets_forwarded_total: 7,
1618 bytes_forwarded_total: 321,
1619 ack_out_total: 4,
1620 nack_out_total: 1,
1621 processing_budget_drops_total: 2,
1622 ..TransportMetricsSnapshot::default()
1623 };
1624 registry.ingest_snapshot(2, snapshot, 5);
1625
1626 let body = registry.render_prometheus_with_prefix("raknet");
1627 assert!(body.contains("# HELP raknet_sessions_active Active RakNet sessions"));
1628 assert!(body.contains("# TYPE raknet_sessions_active gauge"));
1629 assert!(body.contains("raknet_sessions_active{scope=\"shard\",shard=\"2\"} 1"));
1630 assert!(body.contains("raknet_sessions_started_total{scope=\"all\",shard=\"all\"} 2"));
1631 assert!(body.contains("raknet_packets_forwarded_total{scope=\"all\",shard=\"all\"} 7"));
1632 assert!(body.contains("raknet_bytes_forwarded_total{scope=\"all\",shard=\"all\"} 321"));
1633 assert!(body.contains("raknet_ack_out_total{scope=\"all\",shard=\"all\"} 4"));
1634 assert!(body.contains("raknet_nack_out_total{scope=\"all\",shard=\"all\"} 1"));
1635 assert!(
1636 body.contains("raknet_processing_budget_drops_total{scope=\"all\",shard=\"all\"} 2")
1637 );
1638 assert!(body.contains("# HELP raknet_session_count Active RakNet sessions"));
1639 assert!(body.contains("# TYPE raknet_session_count gauge"));
1640 assert!(body.contains("raknet_session_count{scope=\"shard\",shard=\"2\"} 1"));
1641 assert!(body.contains("raknet_session_count{scope=\"all\",shard=\"all\"} 1"));
1642 assert!(
1643 body.contains(
1644 "raknet_dropped_non_critical_events_total{scope=\"shard\",shard=\"2\"} 5"
1645 )
1646 );
1647 assert!(
1648 body.contains(
1649 "raknet_dropped_non_critical_events_total{scope=\"all\",shard=\"all\"} 5"
1650 )
1651 );
1652 }
1653
1654 #[test]
1655 fn record_export_marks_metric_kind_and_scope_labels() {
1656 let mut registry = TelemetryRegistry::new();
1657 let snapshot = TransportMetricsSnapshot {
1658 session_count: 4,
1659 sessions_started_total: 10,
1660 ..TransportMetricsSnapshot::default()
1661 };
1662 registry.ingest_snapshot(1, snapshot, 0);
1663
1664 let records = registry.to_records_with_prefix("demo");
1665 let canonical = records
1666 .iter()
1667 .find(|record| {
1668 record.name == "demo_sessions_active"
1669 && record
1670 .labels
1671 .iter()
1672 .any(|(k, v)| k == "scope" && v == "shard")
1673 && record.labels.iter().any(|(k, v)| k == "shard" && v == "1")
1674 })
1675 .expect("sessions_active shard record should exist");
1676
1677 assert_eq!(canonical.kind, TelemetryMetricKind::Gauge);
1678 assert!((canonical.value - 4.0).abs() < 1e-9);
1679
1680 let target = records
1681 .iter()
1682 .find(|record| {
1683 record.name == "demo_session_count"
1684 && record
1685 .labels
1686 .iter()
1687 .any(|(k, v)| k == "scope" && v == "shard")
1688 && record.labels.iter().any(|(k, v)| k == "shard" && v == "1")
1689 })
1690 .expect("session_count shard record should exist");
1691
1692 assert_eq!(target.kind, TelemetryMetricKind::Gauge);
1693 assert!((target.value - 4.0).abs() < 1e-9);
1694 }
1695
1696 #[test]
1697 fn telemetry_exporter_uses_prefix_and_ingests_metrics_events() {
1698 let mut exporter = TelemetryExporter::with_prefix("demo");
1699 let snapshot = TransportMetricsSnapshot {
1700 session_count: 2,
1701 sessions_started_total: 5,
1702 ..TransportMetricsSnapshot::default()
1703 };
1704 assert!(exporter.ingest_server_event(&metrics_event(0, snapshot, 0)));
1705 assert_eq!(exporter.shard_count(), 1);
1706
1707 let body = exporter.render_prometheus();
1708 assert!(body.contains("demo_sessions_active"));
1709 assert!(body.contains("demo_sessions_started_total"));
1710 }
1711}