rlx_llada2/tide/
stats.rs

1// RLX — versatile ML compiler + runtime.
2// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, version 3.
7//
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License
14// along with this program. If not, see <https://www.gnu.org/licenses/>.
15
16// RLX — TIDE offload_stats aggregation.
17
18use rlx_runtime::{ExpertPool, ExpertPoolStats, MoeResidencyStats};
19
20/// Cumulative counters aligned with TIDE `LLaDA2MoeSparseMoeBlock.offload_stats`.
21#[derive(Debug, Clone, Default, PartialEq, Eq)]
22pub struct TideOffloadStats {
23    pub cpu_tokens: u64,
24    pub gpu_tokens: u64,
25    pub cpu_calls: u64,
26    pub gpu_calls: u64,
27    pub cpu_compute_time: u64,
28    pub gpu_compute_time: u64,
29    pub cpu_tokens_move_time: u64,
30    pub gpu_tokens_move_time: u64,
31    pub experts_move_time: u64,
32    pub promotions: u64,
33    pub demotions: u64,
34}
35
36impl TideOffloadStats {
37    pub fn merge(&mut self, other: &Self) {
38        self.cpu_tokens += other.cpu_tokens;
39        self.gpu_tokens += other.gpu_tokens;
40        self.cpu_calls += other.cpu_calls;
41        self.gpu_calls += other.gpu_calls;
42        self.cpu_compute_time += other.cpu_compute_time;
43        self.gpu_compute_time += other.gpu_compute_time;
44        self.cpu_tokens_move_time += other.cpu_tokens_move_time;
45        self.gpu_tokens_move_time += other.gpu_tokens_move_time;
46        self.experts_move_time += other.experts_move_time;
47        self.promotions += other.promotions;
48        self.demotions += other.demotions;
49    }
50
51    /// Map to TIDE `get_offload_stats()` key names.
52    pub fn as_tide_dict(&self) -> std::collections::HashMap<&'static str, u64> {
53        let mut m = std::collections::HashMap::new();
54        m.insert("cpu_tokens", self.cpu_tokens);
55        m.insert("gpu_tokens", self.gpu_tokens);
56        m.insert("cpu_calls", self.cpu_calls);
57        m.insert("gpu_calls", self.gpu_calls);
58        m.insert("cpu_compute_time", self.cpu_compute_time);
59        m.insert("gpu_compute_time", self.gpu_compute_time);
60        m.insert("cpu_tokens_move_time", self.cpu_tokens_move_time);
61        m.insert("gpu_tokens_move_time", self.gpu_tokens_move_time);
62        m.insert("experts_move_time", self.experts_move_time);
63        m.insert("promotions", self.promotions);
64        m.insert("demotions", self.demotions);
65        m
66    }
67}
68
69pub fn pool_stats_to_tide(stats: &ExpertPoolStats) -> TideOffloadStats {
70    TideOffloadStats {
71        promotions: stats.promotions,
72        demotions: stats.demotions,
73        ..Default::default()
74    }
75}
76
77pub fn residency_stats_to_tide(stats: &MoeResidencyStats) -> TideOffloadStats {
78    TideOffloadStats {
79        cpu_tokens: stats.cpu_tokens,
80        gpu_tokens: stats.gpu_tokens,
81        cpu_calls: stats.cpu_expert_calls,
82        gpu_calls: stats.gpu_expert_calls,
83        ..Default::default()
84    }
85}
86
87/// Sum per-layer pool stats + optional CPU residency accounting from last forward.
88pub fn aggregate_offload_stats(
89    pools: &[ExpertPool],
90    residency: Option<&MoeResidencyStats>,
91) -> TideOffloadStats {
92    let mut out = TideOffloadStats::default();
93    for pool in pools {
94        out.merge(&pool_stats_to_tide(pool.stats()));
95    }
96    if let Some(r) = residency {
97        out.merge(&residency_stats_to_tide(r));
98    }
99    out
100}
rlx_llada2/tide/stats.rs

rlx_llada2/tide/
stats.rs