1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
//! GPU management
mod nvml;
pub use nvml::NvmlGpu;
pub mod power;
// GPU management.
// As long as there is a struct that implements the GpuManager trait,
// the code below will work with any GPU management library.
use std::time::Instant;
use tokio::sync::mpsc::{Sender, UnboundedReceiver, UnboundedSender};
use tracing::Span;
use crate::error::ZeusdError;
/// A trait for structs that manage one GPU.
///
/// This trait can be used to abstract over different GPU management libraries.
/// Currently, this was done to facilitate testing.
pub trait GpuManager {
/// Get the number of GPUs visible in the node.
fn device_count() -> Result<u32, ZeusdError>
where
Self: Sized;
/// Set the persistence mode of the GPU.
fn set_persistence_mode(&mut self, enabled: bool) -> Result<(), ZeusdError>;
/// Set the power management limit in milliwatts.
fn set_power_management_limit(&mut self, power_limit: u32) -> Result<(), ZeusdError>;
/// Set the GPU's locked clock range in MHz.
fn set_gpu_locked_clocks(
&mut self,
min_clock_mhz: u32,
max_clock_mhz: u32,
) -> Result<(), ZeusdError>;
/// Reset the GPU's locked clocks.
fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError>;
/// Set the memory locked clock range in MHz.
fn set_mem_locked_clocks(
&mut self,
min_clock_mhz: u32,
max_clock_mhz: u32,
) -> Result<(), ZeusdError>;
/// Reset the memory locked clocks.
fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError>;
/// Read instantaneous power draw in milliwatts.
fn get_instant_power_mw(&mut self) -> Result<u32, ZeusdError>;
/// Get total energy consumption since driver load in millijoules.
fn get_total_energy_consumption(&mut self) -> Result<u64, ZeusdError>;
}
/// Response from a GPU command.
#[derive(Debug)]
pub enum GpuResponse {
Ok,
Energy { energy_mj: u64 },
}
/// A request to execute a GPU command.
///
/// This is the type that is sent to the GPU management background task.
/// The optional `Sender` is used to send a response back to the caller if the
/// user wanted to block until the command is done executing.
/// The `Instant` object is when the request was received by the server.
/// It's used to log how long it took until the command was executed on the GPU.
/// The `Span` object is used to propagate tracing context starting from the request.
pub type GpuCommandRequest = (
GpuCommand,
Option<Sender<Result<GpuResponse, ZeusdError>>>,
Instant,
Span,
);
/// A collection of GPU management tasks.
///
/// This struct is used to send commands to the GPU management tasks.
/// It's also application state that gets cloned and passed to request handlers by actix-web.
#[derive(Clone)]
pub struct GpuManagementTasks {
// Senders to the GPU management tasks. index is the GPU ID.
senders: Vec<UnboundedSender<GpuCommandRequest>>,
}
impl GpuManagementTasks {
/// Start GPU management tasks for the given GPUs.
/// It's generic over the type of GPU manager to allow for testing.
pub fn start<T>(gpus: Vec<T>) -> Result<Self, ZeusdError>
where
T: GpuManager + Send + 'static,
{
let mut senders = Vec::with_capacity(gpus.len());
for (gpu_id, gpu) in gpus.into_iter().enumerate() {
// Channel to send commands to the GPU management task.
let (tx, rx) = tokio::sync::mpsc::unbounded_channel();
senders.push(tx);
// The GPU management task will automatically terminate
// when the server terminates and the last sender is dropped.
tokio::spawn(gpu_management_task(gpu, rx));
tracing::info!("Background task for GPU {} successfully spawned", gpu_id);
}
Ok(Self { senders })
}
/// Return the number of GPUs managed by these tasks.
pub fn device_count(&self) -> usize {
self.senders.len()
}
/// Send a command to the corresponding GPU management task and immediately return
/// without checking the result. Results will be logged via tracing.
/// Returns `Ok(())` if the command was *sent* successfully.
pub fn send_command_nonblocking(
&self,
gpu_id: usize,
command: GpuCommand,
request_start_time: Instant,
) -> Result<(), ZeusdError> {
if gpu_id >= self.senders.len() {
return Err(ZeusdError::GpuNotFoundError(gpu_id));
}
self.senders[gpu_id]
.send((command, None, request_start_time, Span::current()))
.map_err(|e| e.into())
}
/// Send a command to the corresponding GPU management task and wait for completion.
pub async fn send_command_blocking(
&self,
gpu_id: usize,
command: GpuCommand,
request_start_time: Instant,
) -> Result<GpuResponse, ZeusdError> {
if gpu_id >= self.senders.len() {
return Err(ZeusdError::GpuNotFoundError(gpu_id));
}
let (tx, mut rx) = tokio::sync::mpsc::channel(1);
self.senders[gpu_id]
.send((command, Some(tx), request_start_time, Span::current()))
.map_err(ZeusdError::from)?;
match rx.recv().await {
Some(result) => result,
None => Err(ZeusdError::GpuManagementTaskTerminatedError(gpu_id)),
}
}
}
/// A asynchronous Tokio background task that manages one GPU.
///
/// Listens for commands on a channel and executes them on the GPU it manages.
async fn gpu_management_task<T: GpuManager>(
mut gpu: T,
mut rx: UnboundedReceiver<GpuCommandRequest>,
) {
while let Some((command, response, start_time, span)) = rx.recv().await {
let _span_guard = span.enter();
let result = command.execute(&mut gpu, start_time);
if let Some(response) = response {
if response.send(result).await.is_err() {
tracing::error!("Failed to send response to caller");
}
}
}
}
/// A GPU command that can be executed on a GPU.
#[derive(Debug, Clone)]
pub enum GpuCommand {
/// Enable or disable persistence mode.
SetPersistenceMode { enabled: bool },
/// Set the power management limit in milliwatts.
SetPowerLimit { power_limit_mw: u32 },
/// Set the GPU's locked clock range in MHz.
SetGpuLockedClocks {
min_clock_mhz: u32,
max_clock_mhz: u32,
},
/// Reset the GPU's locked clocks.
ResetGpuLockedClocks,
/// Set the GPU's memory locked clock range in MHz.
SetMemLockedClocks {
min_clock_mhz: u32,
max_clock_mhz: u32,
},
/// Reset the GPU's memory locked clocks.
ResetMemLockedClocks,
/// Get total energy consumption since driver load.
GetTotalEnergyConsumption,
}
/// Log the result of a GPU command with timing information.
fn log_command_result<T>(
result: &Result<T, ZeusdError>,
request_arrival_time: Instant,
command_start_time: Instant,
ok_msg: &str,
err_msg: &str,
) {
if result.is_ok() {
tracing::info!(
time_to_command_done = ?request_arrival_time.elapsed(),
zeusd_overhead = ?command_start_time - request_arrival_time,
"{ok_msg}",
);
} else {
tracing::warn!(
time_to_command_done = ?request_arrival_time.elapsed(),
zeusd_overhead = ?command_start_time - request_arrival_time,
"{err_msg}",
);
}
}
impl GpuCommand {
fn execute<T>(
&self,
device: &mut T,
request_arrival_time: Instant,
) -> Result<GpuResponse, ZeusdError>
where
T: GpuManager,
{
let command_start_time = Instant::now();
match *self {
Self::SetPersistenceMode { enabled } => {
let result = device.set_persistence_mode(enabled);
let (action, verb) = if enabled {
("enabled", "enable")
} else {
("disabled", "disable")
};
log_command_result(
&result,
request_arrival_time,
command_start_time,
&format!("Persistence mode {action}"),
&format!("Cannot {verb} persistence mode"),
);
result.map(|_| GpuResponse::Ok)
}
Self::SetPowerLimit { power_limit_mw } => {
let result = device.set_power_management_limit(power_limit_mw);
let watts = power_limit_mw / 1000;
log_command_result(
&result,
request_arrival_time,
command_start_time,
&format!("Power limit set to {watts} W"),
&format!("Cannot set power limit to {watts} W"),
);
result.map(|_| GpuResponse::Ok)
}
Self::SetGpuLockedClocks {
min_clock_mhz,
max_clock_mhz,
} => {
let result = device.set_gpu_locked_clocks(min_clock_mhz, max_clock_mhz);
log_command_result(
&result,
request_arrival_time,
command_start_time,
&format!("GPU frequency set to [{min_clock_mhz}, {max_clock_mhz}] MHz"),
&format!("Cannot set GPU frequency to [{min_clock_mhz}, {max_clock_mhz}] MHz"),
);
result.map(|_| GpuResponse::Ok)
}
Self::ResetGpuLockedClocks => {
let result = device.reset_gpu_locked_clocks();
log_command_result(
&result,
request_arrival_time,
command_start_time,
"GPU locked clocks reset",
"Cannot reset GPU locked clocks",
);
result.map(|_| GpuResponse::Ok)
}
Self::SetMemLockedClocks {
min_clock_mhz,
max_clock_mhz,
} => {
let result = device.set_mem_locked_clocks(min_clock_mhz, max_clock_mhz);
log_command_result(
&result,
request_arrival_time,
command_start_time,
&format!("Memory locked clocks set to [{min_clock_mhz}, {max_clock_mhz}] MHz"),
&format!(
"Cannot set memory locked clocks to [{min_clock_mhz}, {max_clock_mhz}] MHz"
),
);
result.map(|_| GpuResponse::Ok)
}
Self::ResetMemLockedClocks => {
let result = device.reset_mem_locked_clocks();
log_command_result(
&result,
request_arrival_time,
command_start_time,
"Memory locked clocks reset",
"Cannot reset memory locked clocks",
);
result.map(|_| GpuResponse::Ok)
}
Self::GetTotalEnergyConsumption => {
let result = device.get_total_energy_consumption();
log_command_result(
&result,
request_arrival_time,
command_start_time,
"Total energy consumption read",
"Cannot read total energy consumption",
);
result.map(|energy_mj| GpuResponse::Energy { energy_mj })
}
}
}
}