crankshaft_engine/task/
resources.rs

1//! Task resource specifications.
2
3use std::borrow::Cow;
4use std::collections::HashMap;
5
6use bollard::secret::DeviceRequest;
7use bollard::secret::HostConfig;
8use bollard::secret::TaskSpecResources;
9use bon::Builder;
10use crankshaft_config::backend::Defaults;
11use tracing::debug;
12
13/// A set of requested resources.
14#[derive(Builder, Clone, Debug)]
15#[builder(builder_type = Builder)]
16pub struct Resources {
17    /// The requested number of CPU cores.
18    ///
19    /// Partial CPU requests are supported but not always respected depending on
20    /// the backend.
21    pub(crate) cpu: Option<f64>,
22
23    /// The requested CPU limit.
24    ///
25    /// Not all backends support limits on CPU usage.
26    pub(crate) cpu_limit: Option<f64>,
27
28    /// The requested random access memory size (in GiB).
29    pub(crate) ram: Option<f64>,
30
31    /// The requested RAM limit (in GiB).
32    ///
33    /// Not all backends support limits on memory usage.
34    pub(crate) ram_limit: Option<f64>,
35
36    /// The requested disk size (in GiB).
37    pub(crate) disk: Option<f64>,
38
39    /// Whether or not the task may use preemptible resources.
40    #[builder(into)]
41    pub(crate) preemptible: Option<bool>,
42
43    /// The associated compute zones.
44    #[builder(into, default)]
45    pub(crate) zones: Vec<String>,
46
47    /// The number of GPUs requested.
48    #[builder(into)]
49    pub(crate) gpu: Option<u64>,
50}
51
52impl Resources {
53    /// The number of CPU cores.
54    pub fn cpu(&self) -> Option<f64> {
55        self.cpu
56    }
57
58    /// The CPU limit.
59    pub fn cpu_limit(&self) -> Option<f64> {
60        self.cpu_limit
61    }
62
63    /// The amount of RAM in gibibytes (GiB).
64    pub fn ram(&self) -> Option<f64> {
65        self.ram
66    }
67
68    /// The RAM limit in gibibytes (GiB).
69    pub fn ram_limit(&self) -> Option<f64> {
70        self.ram_limit
71    }
72
73    /// The amount of disk space in gibibytes (GiB).
74    pub fn disk(&self) -> Option<f64> {
75        self.disk
76    }
77
78    /// Whether the instance should be preemptible.
79    pub fn preemptible(&self) -> Option<bool> {
80        self.preemptible
81    }
82
83    /// The set of requested zones.
84    pub fn zones(&self) -> &[String] {
85        &self.zones
86    }
87
88    /// The number of GPUs requested.
89    pub fn gpu(&self) -> Option<u64> {
90        self.gpu
91    }
92
93    /// Applies any provided options in `other` to the [`Resources`].
94    pub fn apply(mut self, other: &Self) -> Self {
95        if let Some(cores) = other.cpu {
96            self.cpu = Some(cores);
97        }
98
99        if let Some(limit) = other.cpu_limit {
100            self.cpu_limit = Some(limit);
101        }
102
103        if let Some(ram) = other.ram {
104            self.ram = Some(ram);
105        }
106
107        if let Some(limit) = other.ram_limit {
108            self.ram_limit = Some(limit);
109        }
110
111        if let Some(disk) = other.disk {
112            self.disk = Some(disk);
113        }
114
115        if let Some(preemptible) = other.preemptible {
116            self.preemptible = Some(preemptible);
117        }
118
119        if let Some(gpu) = other.gpu {
120            self.gpu = Some(gpu);
121        }
122
123        self.zones = other.zones.clone();
124        self
125    }
126
127    /// Creates a [`HashMap`] representation of the resources.
128    ///
129    /// This is used when doing command substitution for generic backends.
130    // NOTE: keys in this HashMap are intended to _exactly_ match the names of
131    // the fields in the struct. This is to ensure that mapping between the
132    // underlying code and the configuration objects for generic configuration
133    // is as seamless as possible (no extra translations unnecessarily).
134    //
135    // Please do not deviate from this unless you have a really strong,
136    // articulated reason that is agreed upon by the core developers.
137    pub fn to_hashmap(&self) -> HashMap<Cow<'static, str>, Cow<'static, str>> {
138        let mut map = HashMap::new();
139
140        if let Some(cores) = self.cpu {
141            map.insert("cpu".into(), cores.to_string().into());
142        }
143
144        if let Some(limit) = self.cpu_limit {
145            map.insert("cpu_limit".into(), limit.to_string().into());
146        }
147
148        if let Some(ram) = self.ram {
149            map.insert("ram".into(), ram.to_string().into());
150            // TODO(clay): improve this.
151            map.insert("ram_mb".into(), (ram * 1024.0).to_string().into());
152        }
153
154        if let Some(limit) = self.ram_limit {
155            map.insert("ram_limit".into(), limit.to_string().into());
156        }
157
158        if let Some(disk) = self.disk {
159            map.insert("disk".into(), disk.to_string().into());
160            // TODO(clay): improve this.
161            map.insert("disk_mb".into(), (disk * 1024.0).to_string().into());
162        }
163
164        if let Some(preemptible) = self.preemptible {
165            map.insert("preemptible".into(), preemptible.to_string().into());
166        }
167
168        if let Some(gpu) = self.gpu {
169            map.insert("gpu".into(), gpu.to_string().into());
170        }
171
172        // Zones are explicitly not included.
173        map
174    }
175}
176
177impl Default for Resources {
178    fn default() -> Self {
179        Self {
180            cpu: Some(1.0),
181            cpu_limit: None,
182            ram: Some(2.0),
183            ram_limit: None,
184            disk: Some(8.0),
185            preemptible: Some(false),
186            zones: Default::default(),
187            gpu: None,
188        }
189    }
190}
191
192impl From<&Defaults> for Resources {
193    fn from(defaults: &Defaults) -> Self {
194        Self {
195            cpu: defaults.cpu(),
196            cpu_limit: defaults.cpu(),
197            ram: defaults.ram(),
198            ram_limit: defaults.ram_limit(),
199            disk: defaults.disk(),
200            preemptible: Default::default(),
201            zones: Default::default(),
202            gpu: defaults.gpu(),
203        }
204    }
205}
206
207impl From<&Resources> for HostConfig {
208    fn from(resources: &Resources) -> Self {
209        let mut host_config = Self::default();
210
211        // Note: Docker doesn't have a CPU reservation for containers
212        if resources.cpu().is_some() {
213            debug!(
214                "ignoring minimum CPU reservation for a Docker daemon not participating in a swarm"
215            );
216        }
217
218        if let Some(cpu) = resources.cpu_limit() {
219            host_config.nano_cpus = Some((cpu * 1_000_000_000.0) as i64);
220        }
221
222        // Note: Docker doesn't have a memory reservation for containers
223        if resources.ram().is_some() {
224            debug!(
225                "ignoring minimum memory reservation for a Docker daemon not participating in a \
226                 swarm"
227            );
228        }
229
230        // The Docker `memory_reservation` setting acts as a soft limit and not as
231        // something informing a scheduler of minimum requirements for the container
232
233        if let Some(ram) = resources.ram_limit() {
234            host_config.memory = Some((ram * 1024. * 1024. * 1024.) as i64);
235        }
236
237        if let Some(disk) = resources.disk() {
238            let mut storage_opt: HashMap<String, String> = HashMap::new();
239            storage_opt.insert("size".to_string(), disk.to_string());
240            host_config.storage_opt = Some(storage_opt);
241        }
242
243        if let Some(gpu) = resources.gpu() {
244            // TODO(clay): Only NVIDIA GPUs are supported at the moment. Add
245            // support for other GPU vendors (AMD, Intel) in the future.
246            //
247            // These are specified as documented in
248            // https://docs.docker.com/compose/how-tos/gpu-support/.
249            const NVIDIA_DRIVER: &str = "nvidia";
250            const GPU_CAPABILITY: &str = "gpu";
251
252            host_config.device_requests = Some(vec![DeviceRequest {
253                driver: Some(NVIDIA_DRIVER.into()),
254                count: Some(gpu as i64),
255                device_ids: None,
256                capabilities: Some(vec![vec![GPU_CAPABILITY.into()]]),
257                options: None,
258            }]);
259        }
260
261        host_config
262    }
263}
264
265impl From<&Resources> for TaskSpecResources {
266    fn from(resources: &Resources) -> Self {
267        let mut spec = Self::default();
268
269        if let Some(cpu) = resources.cpu() {
270            spec.reservations.get_or_insert_default().nano_cpus =
271                Some((cpu * 1_000_000_000.0) as i64);
272        }
273
274        if let Some(cpu) = resources.cpu_limit() {
275            spec.limits.get_or_insert_default().nano_cpus = Some((cpu * 1_000_000_000.0) as i64);
276        }
277
278        if let Some(ram) = resources.ram() {
279            spec.reservations.get_or_insert_default().memory_bytes =
280                Some((ram * 1024. * 1024. * 1024.) as i64);
281        }
282
283        if let Some(ram) = resources.ram_limit() {
284            spec.limits.get_or_insert_default().memory_bytes =
285                Some((ram * 1024. * 1024. * 1024.) as i64);
286        }
287
288        spec
289    }
290}
291
292impl From<Resources> for tes::v1::types::task::Resources {
293    fn from(resources: Resources) -> Self {
294        fn gib_to_gb(v: f64) -> f64 {
295            (v * (1024.0 * 1024.0 * 1024.0)) / (1000.0 * 1000.0 * 1000.0)
296        }
297
298        Self {
299            cpu_cores: resources.cpu().map(|inner| inner.ceil() as i32),
300            ram_gb: resources.ram().map(gib_to_gb),
301            disk_gb: resources.disk().map(gib_to_gb),
302            preemptible: resources.preemptible(),
303            zones: if resources.zones.is_empty() {
304                None
305            } else {
306                Some(resources.zones)
307            },
308            backend_parameters: None,
309            backend_parameters_strict: None,
310        }
311    }
312}
313
314#[cfg(test)]
315mod test {
316    use approx::assert_relative_eq;
317
318    use super::*;
319
320    #[test]
321    fn tes_resource_conversion() {
322        let resources = Resources {
323            cpu: Some(1.5),
324            cpu_limit: None,
325            ram: Some(16.),
326            ram_limit: None,
327            disk: Some(80.),
328            preemptible: Some(true),
329            zones: vec!["foo".into(), "bar".into(), "baz".into()],
330            gpu: None,
331        };
332
333        let tes: tes::v1::types::task::Resources = resources.into();
334        assert_eq!(tes.cpu_cores, Some(2));
335        assert_relative_eq!(tes.ram_gb.unwrap(), 17.179869184);
336        assert_relative_eq!(tes.disk_gb.unwrap(), 85.89934592);
337        assert_eq!(tes.preemptible, Some(true));
338        assert_eq!(
339            tes.zones,
340            Some(vec!["foo".into(), "bar".into(), "baz".into()])
341        );
342        assert_eq!(tes.backend_parameters, None);
343        assert_eq!(tes.backend_parameters_strict, None);
344    }
345
346    #[test]
347    fn gpu_creates_device_request() {
348        let resources = Resources {
349            cpu: None,
350            cpu_limit: None,
351            ram: None,
352            ram_limit: None,
353            disk: None,
354            preemptible: None,
355            zones: vec![],
356            gpu: Some(1),
357        };
358
359        let host_config: HostConfig = (&resources).into();
360
361        assert!(host_config.device_requests.is_some());
362        let device_requests = host_config.device_requests.unwrap();
363        assert_eq!(device_requests.len(), 1);
364
365        let device_request = &device_requests[0];
366        assert_eq!(device_request.driver.as_deref(), Some("nvidia"));
367        assert_eq!(device_request.count, Some(1));
368        assert_eq!(device_request.device_ids, None);
369        assert_eq!(
370            device_request.capabilities.as_ref(),
371            Some(&vec![vec!["gpu".into()]])
372        );
373        assert_eq!(device_request.options, None);
374    }
375
376    #[test]
377    fn no_gpu_creates_no_device_request() {
378        let resources = Resources {
379            cpu: None,
380            cpu_limit: None,
381            ram: None,
382            ram_limit: None,
383            disk: None,
384            preemptible: None,
385            zones: vec![],
386            gpu: None,
387        };
388
389        let host_config: HostConfig = (&resources).into();
390
391        assert!(host_config.device_requests.is_none());
392    }
393}