cgroups_rs/
cgroup.rs

1// Copyright (c) 2018 Levente Kurusa
2// Copyright (c) 2020 Ant Group
3//
4// SPDX-License-Identifier: Apache-2.0 or MIT
5//
6
7//! This module handles cgroup operations. Start here!
8
9use crate::error::ErrorKind::*;
10use crate::error::*;
11
12use crate::{CgroupPid, ControllIdentifier, Controller, Hierarchy, Resources, Subsystem};
13
14use std::collections::HashMap;
15use std::convert::From;
16use std::fs;
17use std::path::{Path, PathBuf};
18
19pub const CGROUP_MODE_DOMAIN: &str = "domain";
20pub const CGROUP_MODE_DOMAIN_THREADED: &str = "domain threaded";
21pub const CGROUP_MODE_DOMAIN_INVALID: &str = "domain invalid";
22pub const CGROUP_MODE_THREADED: &str = "threaded";
23
24/// A control group is the central structure to this crate.
25///
26///
27/// # What are control groups?
28///
29/// Lifting over from the Linux kernel sources:
30///
31/// > Control Groups provide a mechanism for aggregating/partitioning sets of
32/// > tasks, and all their future children, into hierarchical groups with
33/// > specialized behaviour.
34///
35/// This crate is an attempt at providing a Rust-native way of managing these cgroups.
36#[derive(Debug)]
37pub struct Cgroup {
38    /// The list of subsystems that control this cgroup
39    subsystems: Vec<Subsystem>,
40
41    /// The hierarchy.
42    hier: Box<dyn Hierarchy>,
43    path: String,
44
45    /// List of controllers specifically enabled in the control group.
46    specified_controllers: Option<Vec<String>>,
47}
48
49impl Clone for Cgroup {
50    fn clone(&self) -> Self {
51        Cgroup {
52            subsystems: self.subsystems.clone(),
53            hier: crate::hierarchies::auto(),
54            path: self.path.clone(),
55            specified_controllers: None,
56        }
57    }
58}
59
60impl Default for Cgroup {
61    fn default() -> Self {
62        Cgroup {
63            subsystems: Vec::new(),
64            hier: crate::hierarchies::auto(),
65            path: "".to_string(),
66            specified_controllers: None,
67        }
68    }
69}
70
71impl Cgroup {
72    pub fn v2(&self) -> bool {
73        self.hier.v2()
74    }
75
76    /// Return the path the cgroup is located at.
77    pub fn path(&self) -> &str {
78        &self.path
79    }
80
81    /// Create this control group.
82    pub fn create(&self) -> Result<()> {
83        if self.hier.v2() {
84            create_v2_cgroup(self.hier.root(), &self.path, &self.specified_controllers)
85        } else {
86            for subsystem in &self.subsystems {
87                subsystem.to_controller().create();
88            }
89            Ok(())
90        }
91    }
92
93    /// Create a new control group in the hierarchy `hier`, with name `path`.
94    ///
95    /// Returns a handle to the control group that can be used to manipulate it.
96    pub fn new<P: AsRef<Path>>(hier: Box<dyn Hierarchy>, path: P) -> Result<Cgroup> {
97        let cg = Cgroup::load(hier, path);
98        cg.create()?;
99        Ok(cg)
100    }
101
102    /// Create a new control group in the hierarchy `hier`, with name `path`.
103    ///
104    /// Returns a handle to the control group that can be used to manipulate it.
105    pub fn new_with_specified_controllers<P: AsRef<Path>>(
106        hier: Box<dyn Hierarchy>,
107        path: P,
108        specified_controllers: Option<Vec<String>>,
109    ) -> Result<Cgroup> {
110        let cg = if let Some(sc) = specified_controllers {
111            Cgroup::load_with_specified_controllers(hier, path, sc)
112        } else {
113            Cgroup::load(hier, path)
114        };
115        cg.create()?;
116        Ok(cg)
117    }
118
119    /// Create a new control group in the hierarchy `hier`, with name `path` and `relative_paths`
120    ///
121    /// Returns a handle to the control group that can be used to manipulate it.
122    ///
123    /// Note that this method is only meaningful for cgroup v1, call it is equivalent to call `new` in the v2 mode.
124    pub fn new_with_relative_paths<P: AsRef<Path>>(
125        hier: Box<dyn Hierarchy>,
126        path: P,
127        relative_paths: HashMap<String, String>,
128    ) -> Result<Cgroup> {
129        let cg = Cgroup::load_with_relative_paths(hier, path, relative_paths);
130        cg.create()?;
131        Ok(cg)
132    }
133
134    /// Create a handle for a control group in the hierarchy `hier`, with name `path`.
135    ///
136    /// Returns a handle to the control group (that possibly does not exist until `create()` has
137    /// been called on the cgroup.
138    pub fn load<P: AsRef<Path>>(hier: Box<dyn Hierarchy>, path: P) -> Cgroup {
139        let path = path.as_ref();
140        let mut subsystems = hier.subsystems();
141        if path.as_os_str() != "" {
142            subsystems = subsystems
143                .into_iter()
144                .map(|x| x.enter(path))
145                .collect::<Vec<_>>();
146        }
147
148        Cgroup {
149            path: path.to_str().unwrap().to_string(),
150            subsystems,
151            hier,
152            specified_controllers: None,
153        }
154    }
155
156    /// Create a handle for a specified control group in the hierarchy `hier`, with name `path`.
157    ///
158    /// Returns a handle to the control group (that possibly does not exist until `create()` has
159    /// been called on the cgroup.
160    pub fn load_with_specified_controllers<P: AsRef<Path>>(
161        hier: Box<dyn Hierarchy>,
162        path: P,
163        specified_controllers: Vec<String>,
164    ) -> Cgroup {
165        let path = path.as_ref();
166        let mut subsystems = hier.subsystems();
167        if path.as_os_str() != "" {
168            subsystems = subsystems
169                .into_iter()
170                .filter(|x| specified_controllers.contains(&x.controller_name()))
171                .map(|x| x.enter(path))
172                .collect::<Vec<_>>();
173        }
174
175        Cgroup {
176            path: path.to_str().unwrap().to_string(),
177            subsystems,
178            hier,
179            specified_controllers: Some(specified_controllers),
180        }
181    }
182
183    /// Create a handle for a control group in the hierarchy `hier`, with name `path` and `relative_paths`
184    ///
185    /// Returns a handle to the control group (that possibly does not exist until `create()` has
186    /// been called on the cgroup.
187    ///
188    /// Note that this method is only meaningful for cgroup v1, call it is equivalent to call `load` in the v2 mode
189    pub fn load_with_relative_paths<P: AsRef<Path>>(
190        hier: Box<dyn Hierarchy>,
191        path: P,
192        relative_paths: HashMap<String, String>,
193    ) -> Cgroup {
194        // relative_paths only valid for cgroup v1
195        if hier.v2() {
196            return Self::load(hier, path);
197        }
198
199        let path = path.as_ref();
200        let mut subsystems = hier.subsystems();
201        if path.as_os_str() != "" {
202            subsystems = subsystems
203                .into_iter()
204                .map(|x| {
205                    let cn = x.controller_name();
206                    if relative_paths.contains_key(&cn) {
207                        let rp = relative_paths.get(&cn).unwrap();
208                        let valid_path = rp.trim_start_matches('/').to_string();
209                        let mut p = PathBuf::from(valid_path);
210                        p.push(path);
211                        x.enter(p.as_ref())
212                    } else {
213                        x.enter(path)
214                    }
215                })
216                .collect::<Vec<_>>();
217        }
218
219        Cgroup {
220            subsystems,
221            hier,
222            path: path.to_str().unwrap().to_string(),
223            specified_controllers: None,
224        }
225    }
226
227    /// The list of subsystems that this control group supports.
228    pub fn subsystems(&self) -> &Vec<Subsystem> {
229        &self.subsystems
230    }
231
232    /// Deletes the control group.
233    ///
234    /// Note that this function makes no effort in cleaning up the descendant and the underlying
235    /// system call will fail if there are any descendants. Thus, one should check whether it was
236    /// actually removed, and remove the descendants first if not. In the future, this behavior
237    /// will change.
238    pub fn delete(&self) -> Result<()> {
239        if self.v2() {
240            if !self.path.is_empty() {
241                let mut p = self.hier.root();
242                p.push(self.path.clone());
243                return fs::remove_dir(p).map_err(|e| Error::with_cause(RemoveFailed, e));
244            }
245            return Ok(());
246        }
247
248        self.subsystems.iter().try_for_each(|sub| match sub {
249            Subsystem::Pid(pidc) => pidc.delete(),
250            Subsystem::Mem(c) => c.delete(),
251            Subsystem::CpuSet(c) => c.delete(),
252            Subsystem::CpuAcct(c) => c.delete(),
253            Subsystem::Cpu(c) => c.delete(),
254            Subsystem::Devices(c) => c.delete(),
255            Subsystem::Freezer(c) => c.delete(),
256            Subsystem::NetCls(c) => c.delete(),
257            Subsystem::BlkIo(c) => c.delete(),
258            Subsystem::PerfEvent(c) => c.delete(),
259            Subsystem::NetPrio(c) => c.delete(),
260            Subsystem::HugeTlb(c) => c.delete(),
261            Subsystem::Rdma(c) => c.delete(),
262            Subsystem::Systemd(c) => c.delete(),
263        })
264    }
265
266    /// Apply a set of resource limits to the control group.
267    pub fn apply(&self, res: &Resources) -> Result<()> {
268        self.subsystems
269            .iter()
270            .try_fold((), |_, e| e.to_controller().apply(res))
271    }
272
273    /// Retrieve a container based on type inference.
274    ///
275    /// ## Example:
276    ///
277    /// ```text
278    /// let pids: &PidController = control_group.controller_of()
279    ///                             .expect("No pids controller attached!");
280    /// let cpu: &CpuController = control_group.controller_of()
281    ///                             .expect("No cpu controller attached!");
282    /// ```
283    pub fn controller_of<'a, T>(&'a self) -> Option<&'a T>
284    where
285        &'a T: From<&'a Subsystem>,
286        T: Controller + ControllIdentifier,
287    {
288        for i in &self.subsystems {
289            if i.to_controller().control_type() == T::controller_type() {
290                // N.B.:
291                // https://play.rust-lang.org/?gist=978b2846bacebdaa00be62374f4f4334&version=stable&mode=debug&edition=2015
292                return Some(i.into());
293            }
294        }
295        None
296    }
297
298    /// Removes tasks from the control group by thread group id.
299    ///
300    /// Note that this means that the task will be moved back to the root control group in the
301    /// hierarchy and any rules applied to that control group will _still_ apply to the proc.
302    pub fn remove_task_by_tgid(&self, tgid: CgroupPid) -> Result<()> {
303        self.hier.root_control_group().add_task_by_tgid(tgid)
304    }
305
306    /// Removes a task from the control group.
307    ///
308    /// Note that this means that the task will be moved back to the root control group in the
309    /// hierarchy and any rules applied to that control group will _still_ apply to the task.
310    pub fn remove_task(&self, tid: CgroupPid) -> Result<()> {
311        self.hier.root_control_group().add_task(tid)
312    }
313
314    /// Moves tasks to the parent control group by thread group id.
315    pub fn move_task_to_parent_by_tgid(&self, tgid: CgroupPid) -> Result<()> {
316        self.hier
317            .parent_control_group(&self.path)
318            .add_task_by_tgid(tgid)
319    }
320
321    /// Moves a task to the parent control group.
322    pub fn move_task_to_parent(&self, tid: CgroupPid) -> Result<()> {
323        self.hier.parent_control_group(&self.path).add_task(tid)
324    }
325
326    /// Return a handle to the parent control group in the hierarchy.
327    pub fn parent_control_group(&self) -> Cgroup {
328        self.hier.parent_control_group(&self.path)
329    }
330
331    /// Kill every process in the control group. Only supported for v2 cgroups and on
332    /// kernels 5.14+. This will fail with InvalidOperation if the 'cgroup.kill' file does
333    /// not exist.
334    pub fn kill(&self) -> Result<()> {
335        if !self.v2() {
336            return Err(Error::new(CgroupVersion));
337        }
338
339        let val = "1";
340        let file_name = "cgroup.kill";
341        let p = self.hier.root().join(self.path.clone()).join(file_name);
342
343        // If cgroup.kill doesn't exist they're not on 5.14+ so lets
344        // surface some error the caller can check against.
345        if !p.exists() {
346            return Err(Error::new(InvalidOperation));
347        }
348
349        fs::write(p, val)
350            .map_err(|e| Error::with_cause(WriteFailed(file_name.to_string(), val.to_string()), e))
351    }
352
353    /// Attach a task to the control group.
354    pub fn add_task(&self, tid: CgroupPid) -> Result<()> {
355        if self.v2() {
356            let subsystems = self.subsystems();
357            if !subsystems.is_empty() {
358                let c = subsystems[0].to_controller();
359                let cgroup_type = self.get_cgroup_type()?;
360                // In cgroup v2, writing to the cgroup.threads file is only supported in thread mode.
361                if cgroup_type == *CGROUP_MODE_DOMAIN_THREADED
362                    || cgroup_type == *CGROUP_MODE_THREADED
363                {
364                    // It is used to move the threads of a process into a cgroup in thread mode.
365                    c.add_task(&tid)
366                } else {
367                    // When the cgroup type is domain or domain invalid,
368                    // cgroup.threads cannot be written.
369                    Err(Error::new(CgroupMode))
370                }
371            } else {
372                Err(Error::new(SubsystemsEmpty))
373            }
374        } else {
375            self.subsystems()
376                .iter()
377                .try_for_each(|sub| sub.to_controller().add_task(&tid))
378        }
379    }
380
381    /// Attach tasks to the control group by thread group id.
382    pub fn add_task_by_tgid(&self, tgid: CgroupPid) -> Result<()> {
383        if self.v2() {
384            let subsystems = self.subsystems();
385            if !subsystems.is_empty() {
386                let c = subsystems[0].to_controller();
387                // It is used to move a thread of the process to a cgroup,
388                // and other threads of the process will also move together.
389                c.add_task_by_tgid(&tgid)
390            } else {
391                Err(Error::new(SubsystemsEmpty))
392            }
393        } else {
394            self.subsystems()
395                .iter()
396                .try_for_each(|sub| sub.to_controller().add_task_by_tgid(&tgid))
397        }
398    }
399
400    /// set cgroup.type
401    pub fn set_cgroup_type(&self, cgroup_type: &str) -> Result<()> {
402        if self.v2() {
403            let subsystems = self.subsystems();
404            if !subsystems.is_empty() {
405                let c = subsystems[0].to_controller();
406                c.set_cgroup_type(cgroup_type)
407            } else {
408                Err(Error::new(SubsystemsEmpty))
409            }
410        } else {
411            Err(Error::new(CgroupVersion))
412        }
413    }
414
415    /// get cgroup.type
416    pub fn get_cgroup_type(&self) -> Result<String> {
417        if self.v2() {
418            let subsystems = self.subsystems();
419            if !subsystems.is_empty() {
420                let c = subsystems[0].to_controller();
421                let cgroup_type = c.get_cgroup_type()?;
422                Ok(cgroup_type)
423            } else {
424                Err(Error::new(SubsystemsEmpty))
425            }
426        } else {
427            Err(Error::new(CgroupVersion))
428        }
429    }
430
431    /// Set notify_on_release to the control group.
432    pub fn set_notify_on_release(&self, enable: bool) -> Result<()> {
433        self.subsystems()
434            .iter()
435            .try_for_each(|sub| sub.to_controller().set_notify_on_release(enable))
436    }
437
438    /// Set release_agent
439    pub fn set_release_agent(&self, path: &str) -> Result<()> {
440        self.hier
441            .root_control_group()
442            .subsystems()
443            .iter()
444            .try_for_each(|sub| sub.to_controller().set_release_agent(path))
445    }
446
447    /// Returns an Iterator that can be used to iterate over the procs that are currently in the
448    /// control group.
449    pub fn procs(&self) -> Vec<CgroupPid> {
450        // Collect the procs from all subsystems
451        let mut v = if self.v2() {
452            let subsystems = self.subsystems();
453            if !subsystems.is_empty() {
454                let c = subsystems[0].to_controller();
455                c.procs()
456            } else {
457                vec![]
458            }
459        } else {
460            self.subsystems()
461                .iter()
462                .map(|x| x.to_controller().procs())
463                .fold(vec![], |mut acc, mut x| {
464                    acc.append(&mut x);
465                    acc
466                })
467        };
468
469        v.sort();
470        v.dedup();
471        v
472    }
473
474    /// Returns an Iterator that can be used to iterate over the tasks that are currently in the
475    /// control group.
476    pub fn tasks(&self) -> Vec<CgroupPid> {
477        // Collect the tasks from all subsystems
478        let mut v = if self.v2() {
479            let subsystems = self.subsystems();
480            if !subsystems.is_empty() {
481                let c = subsystems[0].to_controller();
482                c.tasks()
483            } else {
484                vec![]
485            }
486        } else {
487            self.subsystems()
488                .iter()
489                .map(|x| x.to_controller().tasks())
490                .fold(vec![], |mut acc, mut x| {
491                    acc.append(&mut x);
492                    acc
493                })
494        };
495
496        v.sort();
497        v.dedup();
498        v
499    }
500
501    /// Checks if the cgroup exists.
502    ///
503    /// Returns true if at least one subsystem exists.
504    pub fn exists(&self) -> bool {
505        self.subsystems().iter().any(|e| e.to_controller().exists())
506    }
507}
508
509pub const UNIFIED_MOUNTPOINT: &str = "/sys/fs/cgroup";
510
511fn enable_controllers(controllers: &[String], path: &Path) {
512    let f = path.join("cgroup.subtree_control");
513    for c in controllers {
514        let body = format!("+{}", c);
515        let _rest = fs::write(f.as_path(), body.as_bytes());
516    }
517}
518
519fn supported_controllers() -> Vec<String> {
520    let p = format!("{}/{}", UNIFIED_MOUNTPOINT, "cgroup.controllers");
521    let ret = fs::read_to_string(p.as_str());
522    ret.unwrap_or_default()
523        .split(' ')
524        .map(|x| x.to_string())
525        .collect::<Vec<String>>()
526}
527
528fn create_v2_cgroup(
529    root: PathBuf,
530    path: &str,
531    specified_controllers: &Option<Vec<String>>,
532) -> Result<()> {
533    // controler list ["memory", "cpu"]
534    let controllers = if let Some(s_controllers) = specified_controllers.clone() {
535        if verify_supported_controllers(s_controllers.as_ref()) {
536            s_controllers
537        } else {
538            return Err(Error::new(ErrorKind::SpecifiedControllers));
539        }
540    } else {
541        supported_controllers()
542    };
543
544    let mut fp = root;
545
546    // enable for root
547    enable_controllers(&controllers, &fp);
548
549    // path: "a/b/c"
550    let elements = path.split('/').collect::<Vec<&str>>();
551    let last_index = elements.len() - 1;
552    for (i, ele) in elements.iter().enumerate() {
553        // ROOT/a
554        fp.push(ele);
555        // create dir, need not check if is a file or directory
556        if !fp.exists() {
557            if let Err(e) = std::fs::create_dir(fp.clone()) {
558                return Err(Error::with_cause(ErrorKind::FsError, e));
559            }
560        }
561
562        if i < last_index {
563            // enable controllers for substree
564            enable_controllers(&controllers, &fp);
565        }
566    }
567
568    Ok(())
569}
570
571pub fn verify_supported_controllers(controllers: &[String]) -> bool {
572    let sc = supported_controllers();
573    for controller in controllers.iter() {
574        if !sc.contains(controller) {
575            return false;
576        }
577    }
578    true
579}
580
581pub fn get_cgroups_relative_paths() -> Result<HashMap<String, String>> {
582    let path = "/proc/self/cgroup".to_string();
583    get_cgroups_relative_paths_by_path(path)
584}
585
586pub fn get_cgroups_relative_paths_by_pid(pid: u32) -> Result<HashMap<String, String>> {
587    let path = format!("/proc/{}/cgroup", pid);
588    get_cgroups_relative_paths_by_path(path)
589}
590
591fn get_cgroups_relative_paths_by_path(path: String) -> Result<HashMap<String, String>> {
592    let mut m = HashMap::new();
593    let content =
594        fs::read_to_string(path.clone()).map_err(|e| Error::with_cause(ReadFailed(path), e))?;
595    for l in content.lines() {
596        let fl: Vec<&str> = l.split(':').collect();
597        if fl.len() != 3 {
598            continue;
599        }
600
601        let keys: Vec<&str> = fl[1].split(',').collect();
602        for key in &keys {
603            m.insert(key.to_string(), fl[2].to_string());
604        }
605    }
606    Ok(m)
607}