cgroups_rs/fs/
cpuset.rs

1// Copyright (c) 2018 Levente Kurusa
2// Copyright (c) 2020 Ant Group
3//
4// SPDX-License-Identifier: Apache-2.0 or MIT
5//
6
7//! This module contains the implementation of the `cpuset` cgroup subsystem.
8//!
9//! See the Kernel's documentation for more information about this subsystem, found at:
10//!  [Documentation/cgroup-v1/cpusets.txt](https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt)
11
12use log::*;
13use std::io::Write;
14use std::path::PathBuf;
15
16use crate::fs::error::ErrorKind::*;
17use crate::fs::error::*;
18
19use crate::fs::{read_string_from, read_u64_from};
20use crate::fs::{
21    ControllIdentifier, ControllerInternal, Controllers, CpuResources, Resources, Subsystem,
22};
23
24/// A controller that allows controlling the `cpuset` subsystem of a Cgroup.
25///
26/// In essence, this controller is responsible for restricting the tasks in the control group to a
27/// set of CPUs and/or memory nodes.
28#[derive(Debug, Clone)]
29pub struct CpuSetController {
30    base: PathBuf,
31    path: PathBuf,
32    v2: bool,
33}
34
35/// The current state of the `cpuset` controller for this control group.
36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
37pub struct CpuSet {
38    /// If true, no other control groups can share the CPUs listed in the `cpus` field.
39    pub cpu_exclusive: bool,
40    /// The list of CPUs the tasks of the control group can run on.
41    ///
42    /// This is a vector of `(start, end)` tuples, where each tuple is a range of CPUs where the
43    /// control group is allowed to run on. Both sides of the range are inclusive.
44    pub cpus: Vec<(u64, u64)>,
45    /// The list of CPUs that the tasks can effectively run on. This removes the list of CPUs that
46    /// the parent (and all of its parents) cannot run on from the `cpus` field of this control
47    /// group.
48    pub effective_cpus: Vec<(u64, u64)>,
49    /// The list of memory nodes that the tasks can effectively use. This removes the list of nodes that
50    /// the parent (and all of its parents) cannot use from the `mems` field of this control
51    /// group.
52    pub effective_mems: Vec<(u64, u64)>,
53    /// If true, no other control groups can share the memory nodes listed in the `mems` field.
54    pub mem_exclusive: bool,
55    /// If true, the control group is 'hardwalled'. Kernel memory allocations (except for a few
56    /// minor exceptions) are made from the memory nodes designated in the `mems` field.
57    pub mem_hardwall: bool,
58    /// If true, whenever `mems` is changed via `set_mems()`, the memory stored on the previous
59    /// nodes are migrated to the new nodes selected by the new `mems`.
60    pub memory_migrate: bool,
61    /// Running average of the memory pressured faced by the tasks in the control group.
62    pub memory_pressure: u64,
63    /// This field is only at the root control group and controls whether the kernel will compute
64    /// the memory pressure for control groups or not.
65    pub memory_pressure_enabled: Option<bool>,
66    /// If true, filesystem buffers are spread across evenly between the nodes specified in `mems`.
67    pub memory_spread_page: bool,
68    /// If true, kernel slab caches for file I/O are spread across evenly between the nodes
69    /// specified in `mems`.
70    pub memory_spread_slab: bool,
71    /// The list of memory nodes the tasks of the control group can use.
72    ///
73    /// The format is the same as the `cpus`, `effective_cpus` and `effective_mems` fields.
74    pub mems: Vec<(u64, u64)>,
75    /// If true, the kernel will attempt to rebalance the load between the CPUs specified in the
76    /// `cpus` field of this control group.
77    pub sched_load_balance: bool,
78    /// Represents how much work the kernel should do to rebalance this cpuset.
79    ///
80    /// | `sched_load_balance` | Effect |
81    /// | -------------------- | ------ |
82    /// |          -1          | Use the system default value |
83    /// |           0          | Only balance loads periodically |
84    /// |           1          | Immediately balance the load across tasks on the same core |
85    /// |           2          | Immediately balance the load across cores in the same CPU package |
86    /// |           4          | Immediately balance the load across CPUs on the same node |
87    /// |           5          | Immediately balance the load between CPUs even if the system is NUMA |
88    /// |           6          | Immediately balance the load between all CPUs |
89    pub sched_relax_domain_level: u64,
90}
91
92impl ControllerInternal for CpuSetController {
93    fn control_type(&self) -> Controllers {
94        Controllers::CpuSet
95    }
96    fn get_path(&self) -> &PathBuf {
97        &self.path
98    }
99    fn get_path_mut(&mut self) -> &mut PathBuf {
100        &mut self.path
101    }
102    fn get_base(&self) -> &PathBuf {
103        &self.base
104    }
105
106    fn is_v2(&self) -> bool {
107        self.v2
108    }
109
110    fn apply(&self, res: &Resources) -> Result<()> {
111        // get the resources that apply to this controller
112        let res: &CpuResources = &res.cpu;
113
114        update!(self, set_cpus, res.cpus.as_ref());
115        update!(self, set_mems, res.mems.as_ref());
116
117        Ok(())
118    }
119
120    fn post_create(&self) {
121        if self.is_v2() {
122            return;
123        }
124        let current = self.get_path();
125
126        if current != self.get_base() {
127            match copy_from_parent(current.to_str().unwrap(), "cpuset.cpus") {
128                Ok(_) => (),
129                Err(err) => error!("error create_dir for cpuset.cpus {:?}", err),
130            }
131            match copy_from_parent(current.to_str().unwrap(), "cpuset.mems") {
132                Ok(_) => (),
133                Err(err) => error!("error create_dir for cpuset.mems {:?}", err),
134            }
135        }
136    }
137}
138
139fn find_no_empty_parent(from: &str, file: &str) -> Result<(String, Vec<PathBuf>)> {
140    let mut current_path = ::std::path::Path::new(from).to_path_buf();
141    let mut v = vec![];
142
143    loop {
144        let current_value =
145            match ::std::fs::read_to_string(current_path.clone().join(file).to_str().unwrap()) {
146                Ok(cpus) => String::from(cpus.trim()),
147                Err(e) => {
148                    return Err(Error::with_cause(
149                        ReadFailed(current_path.display().to_string()),
150                        e,
151                    ))
152                }
153            };
154
155        if !current_value.is_empty() {
156            return Ok((current_value, v));
157        }
158        v.push(current_path.clone());
159
160        let parent = match current_path.parent() {
161            Some(p) => p,
162            None => return Ok(("".to_string(), v)),
163        };
164
165        // next loop, find parent
166        current_path = parent.to_path_buf();
167    }
168}
169
170/// copy_from_parent copy the cpuset.cpus and cpuset.mems from the parent
171/// directory to the current directory if the file's contents are 0
172fn copy_from_parent(current: &str, file: &str) -> Result<()> {
173    // find not empty cpus/memes from current directory.
174    let (value, parents) = find_no_empty_parent(current, file)?;
175
176    if value.is_empty() || parents.is_empty() {
177        return Ok(());
178    }
179
180    for p in parents.iter().rev() {
181        let mut pb = p.clone();
182        pb.push(file);
183        match ::std::fs::write(pb.to_str().unwrap(), value.as_bytes()) {
184            Ok(_) => (),
185            Err(e) => {
186                return Err(Error::with_cause(
187                    WriteFailed(pb.display().to_string(), pb.display().to_string()),
188                    e,
189                ))
190            }
191        }
192    }
193
194    Ok(())
195}
196
197impl ControllIdentifier for CpuSetController {
198    fn controller_type() -> Controllers {
199        Controllers::CpuSet
200    }
201}
202
203impl<'a> From<&'a Subsystem> for &'a CpuSetController {
204    fn from(sub: &'a Subsystem) -> &'a CpuSetController {
205        unsafe {
206            match sub {
207                Subsystem::CpuSet(c) => c,
208                _ => {
209                    assert_eq!(1, 0);
210                    let v = std::mem::MaybeUninit::uninit();
211                    v.assume_init()
212                }
213            }
214        }
215    }
216}
217
218/// Parse a string like "1,2,4-5,8" into a list of (start, end) tuples.
219fn parse_range(s: String) -> Result<Vec<(u64, u64)>> {
220    let mut fin = Vec::new();
221
222    if s.is_empty() {
223        return Ok(fin);
224    }
225
226    // first split by commas
227    let comma_split = s.split(',');
228
229    for sp in comma_split {
230        if sp.contains('-') {
231            // this is a true range
232            let dash_split = sp.split('-').collect::<Vec<_>>();
233            if dash_split.len() != 2 {
234                return Err(Error::new(ParseError));
235            }
236            let first = dash_split[0].parse::<u64>();
237            let second = dash_split[1].parse::<u64>();
238            if first.is_err() || second.is_err() {
239                return Err(Error::new(ParseError));
240            }
241            fin.push((first.unwrap(), second.unwrap()));
242        } else {
243            // this is just a single number
244            let num = sp.parse::<u64>();
245            if num.is_err() {
246                return Err(Error::new(ParseError));
247            }
248            fin.push((num.clone().unwrap(), num.clone().unwrap()));
249        }
250    }
251
252    Ok(fin)
253}
254
255impl CpuSetController {
256    /// Contructs a new `CpuSetController` with `root` serving as the root of the control group.
257    pub fn new(point: PathBuf, root: PathBuf, v2: bool) -> Self {
258        Self {
259            base: root,
260            path: point,
261            v2,
262        }
263    }
264
265    /// Returns the statistics gathered by the kernel for this control group. See the struct for
266    /// more information on what information this entails.
267    pub fn cpuset(&self) -> CpuSet {
268        CpuSet {
269            cpu_exclusive: {
270                self.open_path("cpuset.cpu_exclusive", false)
271                    .and_then(read_u64_from)
272                    .map(|x| x == 1)
273                    .unwrap_or(false)
274            },
275            cpus: {
276                self.open_path("cpuset.cpus", false)
277                    .and_then(read_string_from)
278                    .and_then(parse_range)
279                    .unwrap_or_default()
280            },
281            effective_cpus: {
282                self.open_path("cpuset.effective_cpus", false)
283                    .and_then(read_string_from)
284                    .and_then(parse_range)
285                    .unwrap_or_default()
286            },
287            effective_mems: {
288                self.open_path("cpuset.effective_mems", false)
289                    .and_then(read_string_from)
290                    .and_then(parse_range)
291                    .unwrap_or_default()
292            },
293            mem_exclusive: {
294                self.open_path("cpuset.mem_exclusive", false)
295                    .and_then(read_u64_from)
296                    .map(|x| x == 1)
297                    .unwrap_or(false)
298            },
299            mem_hardwall: {
300                self.open_path("cpuset.mem_hardwall", false)
301                    .and_then(read_u64_from)
302                    .map(|x| x == 1)
303                    .unwrap_or(false)
304            },
305            memory_migrate: {
306                self.open_path("cpuset.memory_migrate", false)
307                    .and_then(read_u64_from)
308                    .map(|x| x == 1)
309                    .unwrap_or(false)
310            },
311            memory_pressure: {
312                self.open_path("cpuset.memory_pressure", false)
313                    .and_then(read_u64_from)
314                    .unwrap_or(0)
315            },
316            memory_pressure_enabled: {
317                self.open_path("cpuset.memory_pressure_enabled", false)
318                    .and_then(read_u64_from)
319                    .map(|x| x == 1)
320                    .ok()
321            },
322            memory_spread_page: {
323                self.open_path("cpuset.memory_spread_page", false)
324                    .and_then(read_u64_from)
325                    .map(|x| x == 1)
326                    .unwrap_or(false)
327            },
328            memory_spread_slab: {
329                self.open_path("cpuset.memory_spread_slab", false)
330                    .and_then(read_u64_from)
331                    .map(|x| x == 1)
332                    .unwrap_or(false)
333            },
334            mems: {
335                self.open_path("cpuset.mems", false)
336                    .and_then(read_string_from)
337                    .and_then(parse_range)
338                    .unwrap_or_default()
339            },
340            sched_load_balance: {
341                self.open_path("cpuset.sched_load_balance", false)
342                    .and_then(read_u64_from)
343                    .map(|x| x == 1)
344                    .unwrap_or(false)
345            },
346            sched_relax_domain_level: {
347                self.open_path("cpuset.sched_relax_domain_level", false)
348                    .and_then(read_u64_from)
349                    .unwrap_or(0)
350            },
351        }
352    }
353
354    /// Control whether the CPUs selected via `set_cpus()` should be exclusive to this control
355    /// group or not.
356    pub fn set_cpu_exclusive(&self, b: bool) -> Result<()> {
357        self.open_path("cpuset.cpu_exclusive", true)
358            .and_then(|mut file| {
359                if b {
360                    file.write_all(b"1").map_err(|e| {
361                        Error::with_cause(
362                            WriteFailed("cpuset.cpu_exclusive".to_string(), "1".to_string()),
363                            e,
364                        )
365                    })
366                } else {
367                    file.write_all(b"0").map_err(|e| {
368                        Error::with_cause(
369                            WriteFailed("cpuset.cpu_exclusive".to_string(), "0".to_string()),
370                            e,
371                        )
372                    })
373                }
374            })
375    }
376
377    /// Control whether the memory nodes selected via `set_memss()` should be exclusive to this control
378    /// group or not.
379    pub fn set_mem_exclusive(&self, b: bool) -> Result<()> {
380        self.open_path("cpuset.mem_exclusive", true)
381            .and_then(|mut file| {
382                if b {
383                    file.write_all(b"1").map_err(|e| {
384                        Error::with_cause(
385                            WriteFailed("cpuset.mem_exclusive".to_string(), "1".to_string()),
386                            e,
387                        )
388                    })
389                } else {
390                    file.write_all(b"0").map_err(|e| {
391                        Error::with_cause(
392                            WriteFailed("cpuset.mem_exclusive".to_string(), "0".to_string()),
393                            e,
394                        )
395                    })
396                }
397            })
398    }
399
400    /// Set the CPUs that the tasks in this control group can run on.
401    ///
402    /// Syntax is a comma separated list of CPUs, with an additional extension that ranges can
403    /// be represented via dashes.
404    pub fn set_cpus(&self, cpus: &str) -> Result<()> {
405        self.open_path("cpuset.cpus", true).and_then(|mut file| {
406            file.write_all(cpus.as_ref()).map_err(|e| {
407                Error::with_cause(WriteFailed("cpuset.cpus".to_string(), cpus.to_string()), e)
408            })
409        })
410    }
411
412    /// Set the memory nodes that the tasks in this control group can use.
413    ///
414    /// Syntax is the same as with `set_cpus()`.
415    pub fn set_mems(&self, mems: &str) -> Result<()> {
416        self.open_path("cpuset.mems", true).and_then(|mut file| {
417            file.write_all(mems.as_ref()).map_err(|e| {
418                Error::with_cause(WriteFailed("cpuset.mems".to_string(), mems.to_string()), e)
419            })
420        })
421    }
422
423    /// Controls whether the control group should be "hardwalled", i.e., whether kernel allocations
424    /// should exclusively use the memory nodes set via `set_mems()`.
425    ///
426    /// Note that some kernel allocations, most notably those that are made in interrupt handlers
427    /// may disregard this.
428    pub fn set_hardwall(&self, b: bool) -> Result<()> {
429        self.open_path("cpuset.mem_hardwall", true)
430            .and_then(|mut file| {
431                if b {
432                    file.write_all(b"1").map_err(|e| {
433                        Error::with_cause(
434                            WriteFailed("cpuset.mem_hardwall".to_string(), "1".to_string()),
435                            e,
436                        )
437                    })
438                } else {
439                    file.write_all(b"0").map_err(|e| {
440                        Error::with_cause(
441                            WriteFailed("cpuset.mem_hardwall".to_string(), "0".to_string()),
442                            e,
443                        )
444                    })
445                }
446            })
447    }
448
449    /// Controls whether the kernel should attempt to rebalance the load between the CPUs specified in the
450    /// `cpus` field of this control group.
451    pub fn set_load_balancing(&self, b: bool) -> Result<()> {
452        self.open_path("cpuset.sched_load_balance", true)
453            .and_then(|mut file| {
454                if b {
455                    file.write_all(b"1").map_err(|e| {
456                        Error::with_cause(
457                            WriteFailed("cpuset.sched_load_balance".to_string(), "1".to_string()),
458                            e,
459                        )
460                    })
461                } else {
462                    file.write_all(b"0").map_err(|e| {
463                        Error::with_cause(
464                            WriteFailed("cpuset.sched_load_balance".to_string(), "0".to_string()),
465                            e,
466                        )
467                    })
468                }
469            })
470    }
471
472    /// Contorl how much effort the kernel should invest in rebalacing the control group.
473    ///
474    /// See @CpuSet 's similar field for more information.
475    pub fn set_rebalance_relax_domain_level(&self, i: i64) -> Result<()> {
476        self.open_path("cpuset.sched_relax_domain_level", true)
477            .and_then(|mut file| {
478                file.write_all(i.to_string().as_ref()).map_err(|e| {
479                    Error::with_cause(
480                        WriteFailed("cpuset.sched_relax_domain_level".to_string(), i.to_string()),
481                        e,
482                    )
483                })
484            })
485    }
486
487    /// Control whether when using `set_mems()` the existing memory used by the tasks should be
488    /// migrated over to the now-selected nodes.
489    pub fn set_memory_migration(&self, b: bool) -> Result<()> {
490        self.open_path("cpuset.memory_migrate", true)
491            .and_then(|mut file| {
492                if b {
493                    file.write_all(b"1").map_err(|e| {
494                        Error::with_cause(
495                            WriteFailed("cpuset.memory_migrate".to_string(), "1".to_string()),
496                            e,
497                        )
498                    })
499                } else {
500                    file.write_all(b"0").map_err(|e| {
501                        Error::with_cause(
502                            WriteFailed("cpuset.memory_migrate".to_string(), "0".to_string()),
503                            e,
504                        )
505                    })
506                }
507            })
508    }
509
510    /// Control whether filesystem buffers should be evenly split across the nodes selected via
511    /// `set_mems()`.
512    pub fn set_memory_spread_page(&self, b: bool) -> Result<()> {
513        self.open_path("cpuset.memory_spread_page", true)
514            .and_then(|mut file| {
515                if b {
516                    file.write_all(b"1").map_err(|e| {
517                        Error::with_cause(
518                            WriteFailed("cpuset.memory_spread_page".to_string(), "1".to_string()),
519                            e,
520                        )
521                    })
522                } else {
523                    file.write_all(b"0").map_err(|e| {
524                        Error::with_cause(
525                            WriteFailed("cpuset.memory_spread_page".to_string(), "0".to_string()),
526                            e,
527                        )
528                    })
529                }
530            })
531    }
532
533    /// Control whether the kernel's slab cache for file I/O should be evenly split across the
534    /// nodes selected via `set_mems()`.
535    pub fn set_memory_spread_slab(&self, b: bool) -> Result<()> {
536        self.open_path("cpuset.memory_spread_slab", true)
537            .and_then(|mut file| {
538                if b {
539                    file.write_all(b"1").map_err(|e| {
540                        Error::with_cause(
541                            WriteFailed("cpuset.memory_spread_slab".to_string(), "1".to_string()),
542                            e,
543                        )
544                    })
545                } else {
546                    file.write_all(b"0").map_err(|e| {
547                        Error::with_cause(
548                            WriteFailed("cpuset.memory_spread_slab".to_string(), "0".to_string()),
549                            e,
550                        )
551                    })
552                }
553            })
554    }
555
556    /// Control whether the kernel should collect information to calculate memory pressure for
557    /// control groups.
558    ///
559    /// Note: This will fail with `InvalidOperation` if the current congrol group is not the root
560    /// control group.
561    pub fn set_enable_memory_pressure(&self, b: bool) -> Result<()> {
562        if !self.path_exists("cpuset.memory_pressure_enabled") {
563            return Err(Error::new(InvalidOperation));
564        }
565        self.open_path("cpuset.memory_pressure_enabled", true)
566            .and_then(|mut file| {
567                if b {
568                    file.write_all(b"1").map_err(|e| {
569                        Error::with_cause(
570                            WriteFailed(
571                                "cpuset.memory_pressure_enabled".to_string(),
572                                "1".to_string(),
573                            ),
574                            e,
575                        )
576                    })
577                } else {
578                    file.write_all(b"0").map_err(|e| {
579                        Error::with_cause(
580                            WriteFailed(
581                                "cpuset.memory_pressure_enabled".to_string(),
582                                "0".to_string(),
583                            ),
584                            e,
585                        )
586                    })
587                }
588            })
589    }
590}
591
592#[cfg(test)]
593mod tests {
594    use crate::fs::cpuset;
595    #[test]
596    fn test_parse_range() {
597        let test_cases = vec![
598            "1,2,4-6,9".to_string(),
599            "".to_string(),
600            "1".to_string(),
601            "1-111".to_string(),
602            "1,2,3,4".to_string(),
603            "1-5,6-7,8-9".to_string(),
604        ];
605        let expecteds = [
606            vec![(1, 1), (2, 2), (4, 6), (9, 9)],
607            vec![],
608            vec![(1, 1)],
609            vec![(1, 111)],
610            vec![(1, 1), (2, 2), (3, 3), (4, 4)],
611            vec![(1, 5), (6, 7), (8, 9)],
612        ];
613
614        for (i, case) in test_cases.into_iter().enumerate() {
615            let range = cpuset::parse_range(case.clone());
616            println!("{:?} => {:?}", case, range);
617            assert!(range.is_ok());
618            assert_eq!(range.unwrap(), expecteds[i]);
619        }
620    }
621}