cgroups_rs/fs/
cgroup.rs

1// Copyright (c) 2018 Levente Kurusa
2// Copyright (c) 2020 Ant Group
3//
4// SPDX-License-Identifier: Apache-2.0 or MIT
5//
6
7//! This module handles cgroup operations. Start here!
8
9use crate::fs::error::ErrorKind::*;
10use crate::fs::error::*;
11
12use crate::fs::hierarchies::V1;
13use crate::fs::{CgroupPid, ControllIdentifier, Controller, Hierarchy, Resources, Subsystem};
14
15use std::collections::HashMap;
16use std::convert::From;
17use std::fs;
18use std::path::{Path, PathBuf};
19
20pub const CGROUP_MODE_DOMAIN: &str = "domain";
21pub const CGROUP_MODE_DOMAIN_THREADED: &str = "domain threaded";
22pub const CGROUP_MODE_DOMAIN_INVALID: &str = "domain invalid";
23pub const CGROUP_MODE_THREADED: &str = "threaded";
24
25/// A control group is the central structure to this crate.
26///
27///
28/// # What are control groups?
29///
30/// Lifting over from the Linux kernel sources:
31///
32/// > Control Groups provide a mechanism for aggregating/partitioning sets of
33/// > tasks, and all their future children, into hierarchical groups with
34/// > specialized behaviour.
35///
36/// This crate is an attempt at providing a Rust-native way of managing these cgroups.
37#[derive(Debug)]
38pub struct Cgroup {
39    /// The list of subsystems that control this cgroup
40    subsystems: Vec<Subsystem>,
41
42    /// The hierarchy.
43    hier: Box<dyn Hierarchy>,
44    path: String,
45
46    /// List of controllers specifically enabled in the control group.
47    specified_controllers: Option<Vec<String>>,
48}
49
50impl Clone for Cgroup {
51    fn clone(&self) -> Self {
52        Cgroup {
53            subsystems: self.subsystems.clone(),
54            hier: crate::fs::hierarchies::auto(),
55            path: self.path.clone(),
56            specified_controllers: None,
57        }
58    }
59}
60
61impl Default for Cgroup {
62    fn default() -> Self {
63        Cgroup {
64            subsystems: Vec::new(),
65            hier: crate::fs::hierarchies::auto(),
66            path: "".to_string(),
67            specified_controllers: None,
68        }
69    }
70}
71
72impl Cgroup {
73    pub fn v2(&self) -> bool {
74        self.hier.v2()
75    }
76
77    /// Return the path the cgroup is located at.
78    pub fn path(&self) -> &str {
79        &self.path
80    }
81
82    /// Create this control group.
83    pub fn create(&self) -> Result<()> {
84        if self.hier.v2() {
85            create_v2_cgroup(self.hier.root(), &self.path, &self.specified_controllers)
86        } else {
87            for subsystem in &self.subsystems {
88                subsystem.to_controller().create();
89            }
90            Ok(())
91        }
92    }
93
94    /// Create a new control group in the hierarchy `hier`, with name `path`.
95    ///
96    /// Returns a handle to the control group that can be used to manipulate it.
97    pub fn new<P: AsRef<Path>>(hier: Box<dyn Hierarchy>, path: P) -> Result<Cgroup> {
98        let cg = Cgroup::load(hier, path);
99        cg.create()?;
100        Ok(cg)
101    }
102
103    /// Create a new control group in the hierarchy `hier`, with name `path`.
104    ///
105    /// Returns a handle to the control group that can be used to manipulate it.
106    pub fn new_with_specified_controllers<P: AsRef<Path>>(
107        hier: Box<dyn Hierarchy>,
108        path: P,
109        specified_controllers: Option<Vec<String>>,
110    ) -> Result<Cgroup> {
111        let cg = if let Some(sc) = specified_controllers {
112            Cgroup::load_with_specified_controllers(hier, path, sc)
113        } else {
114            Cgroup::load(hier, path)
115        };
116        cg.create()?;
117        Ok(cg)
118    }
119
120    /// Create a new control group in the hierarchy `hier`, with name `path` and `relative_paths`
121    ///
122    /// Returns a handle to the control group that can be used to manipulate it.
123    ///
124    /// Note that this method is only meaningful for cgroup v1, call it is equivalent to call `new` in the v2 mode.
125    pub fn new_with_relative_paths<P: AsRef<Path>>(
126        hier: Box<dyn Hierarchy>,
127        path: P,
128        relative_paths: HashMap<String, String>,
129    ) -> Result<Cgroup> {
130        let cg = Cgroup::load_with_relative_paths(hier, path, relative_paths);
131        cg.create()?;
132        Ok(cg)
133    }
134
135    /// Create a handle for a control group in the hierarchy `hier`, with name `path`.
136    ///
137    /// Returns a handle to the control group (that possibly does not exist until `create()` has
138    /// been called on the cgroup.
139    pub fn load<P: AsRef<Path>>(hier: Box<dyn Hierarchy>, path: P) -> Cgroup {
140        let path = path.as_ref();
141        let mut subsystems = hier.subsystems();
142        if path.as_os_str() != "" {
143            subsystems = subsystems
144                .into_iter()
145                .map(|x| x.enter(path))
146                .collect::<Vec<_>>();
147        }
148
149        Cgroup {
150            path: path.to_str().unwrap().to_string(),
151            subsystems,
152            hier,
153            specified_controllers: None,
154        }
155    }
156
157    /// Create a handle for a specified control group in the hierarchy `hier`, with name `path`.
158    ///
159    /// Returns a handle to the control group (that possibly does not exist until `create()` has
160    /// been called on the cgroup.
161    pub fn load_with_specified_controllers<P: AsRef<Path>>(
162        hier: Box<dyn Hierarchy>,
163        path: P,
164        specified_controllers: Vec<String>,
165    ) -> Cgroup {
166        let path = path.as_ref();
167        let mut subsystems = hier.subsystems();
168        if path.as_os_str() != "" {
169            subsystems = subsystems
170                .into_iter()
171                .filter(|x| specified_controllers.contains(&x.controller_name()))
172                .map(|x| x.enter(path))
173                .collect::<Vec<_>>();
174        }
175
176        Cgroup {
177            path: path.to_str().unwrap().to_string(),
178            subsystems,
179            hier,
180            specified_controllers: Some(specified_controllers),
181        }
182    }
183
184    /// Create a handle for a control group in the hierarchy `hier`, with name `path` and `relative_paths`
185    ///
186    /// Returns a handle to the control group (that possibly does not exist until `create()` has
187    /// been called on the cgroup.
188    ///
189    /// Note that this method is only meaningful for cgroup v1, call it is equivalent to call `load` in the v2 mode
190    pub fn load_with_relative_paths<P: AsRef<Path>>(
191        hier: Box<dyn Hierarchy>,
192        path: P,
193        relative_paths: HashMap<String, String>,
194    ) -> Cgroup {
195        // relative_paths only valid for cgroup v1
196        if hier.v2() {
197            return Self::load(hier, path);
198        }
199
200        let path = path.as_ref();
201        let mut subsystems = hier.subsystems();
202        if path.as_os_str() != "" {
203            subsystems = subsystems
204                .into_iter()
205                .map(|x| {
206                    let cn = x.controller_name();
207                    if relative_paths.contains_key(&cn) {
208                        let rp = relative_paths.get(&cn).unwrap();
209                        let valid_path = rp.trim_start_matches('/').to_string();
210                        let mut p = PathBuf::from(valid_path);
211                        p.push(path);
212                        x.enter(p.as_ref())
213                    } else {
214                        x.enter(path)
215                    }
216                })
217                .collect::<Vec<_>>();
218        }
219
220        Cgroup {
221            subsystems,
222            hier,
223            path: path.to_str().unwrap().to_string(),
224            specified_controllers: None,
225        }
226    }
227
228    /// The list of subsystems that this control group supports.
229    pub fn subsystems(&self) -> &Vec<Subsystem> {
230        &self.subsystems
231    }
232
233    /// Deletes the control group.
234    ///
235    /// Note that this function makes no effort in cleaning up the descendant and the underlying
236    /// system call will fail if there are any descendants. Thus, one should check whether it was
237    /// actually removed, and remove the descendants first if not. In the future, this behavior
238    /// will change.
239    pub fn delete(&self) -> Result<()> {
240        if self.v2() {
241            if !self.path.is_empty() {
242                let mut p = self.hier.root();
243                p.push(self.path.clone());
244                return fs::remove_dir(p).map_err(|e| Error::with_cause(RemoveFailed, e));
245            }
246            return Ok(());
247        }
248
249        self.subsystems.iter().try_for_each(|sub| match sub {
250            Subsystem::Pid(pidc) => pidc.delete(),
251            Subsystem::Mem(c) => c.delete(),
252            Subsystem::CpuSet(c) => c.delete(),
253            Subsystem::CpuAcct(c) => c.delete(),
254            Subsystem::Cpu(c) => c.delete(),
255            Subsystem::Devices(c) => c.delete(),
256            Subsystem::Freezer(c) => c.delete(),
257            Subsystem::NetCls(c) => c.delete(),
258            Subsystem::BlkIo(c) => c.delete(),
259            Subsystem::PerfEvent(c) => c.delete(),
260            Subsystem::NetPrio(c) => c.delete(),
261            Subsystem::HugeTlb(c) => c.delete(),
262            Subsystem::Rdma(c) => c.delete(),
263            Subsystem::Systemd(c) => c.delete(),
264        })
265    }
266
267    /// Apply a set of resource limits to the control group.
268    pub fn apply(&self, res: &Resources) -> Result<()> {
269        self.subsystems
270            .iter()
271            .try_fold((), |_, e| e.to_controller().apply(res))
272    }
273
274    /// Retrieve a container based on type inference.
275    ///
276    /// ## Example:
277    ///
278    /// ```text
279    /// let pids: &PidController = control_group.controller_of()
280    ///                             .expect("No pids controller attached!");
281    /// let cpu: &CpuController = control_group.controller_of()
282    ///                             .expect("No cpu controller attached!");
283    /// ```
284    pub fn controller_of<'a, T>(&'a self) -> Option<&'a T>
285    where
286        &'a T: From<&'a Subsystem>,
287        T: Controller + ControllIdentifier,
288    {
289        for i in &self.subsystems {
290            if i.to_controller().control_type() == T::controller_type() {
291                // N.B.:
292                // https://play.rust-lang.org/?gist=978b2846bacebdaa00be62374f4f4334&version=stable&mode=debug&edition=2015
293                return Some(i.into());
294            }
295        }
296        None
297    }
298
299    /// Removes tasks from the control group by thread group id.
300    ///
301    /// Note that this means that the task will be moved back to the root control group in the
302    /// hierarchy and any rules applied to that control group will _still_ apply to the proc.
303    pub fn remove_task_by_tgid(&self, tgid: CgroupPid) -> Result<()> {
304        self.hier.root_control_group().add_task_by_tgid(tgid)
305    }
306
307    /// Removes a task from the control group.
308    ///
309    /// Note that this means that the task will be moved back to the root control group in the
310    /// hierarchy and any rules applied to that control group will _still_ apply to the task.
311    pub fn remove_task(&self, tid: CgroupPid) -> Result<()> {
312        self.hier.root_control_group().add_task(tid)
313    }
314
315    /// Moves tasks to the parent control group by thread group id.
316    pub fn move_task_to_parent_by_tgid(&self, tgid: CgroupPid) -> Result<()> {
317        self.hier
318            .parent_control_group(&self.path)
319            .add_task_by_tgid(tgid)
320    }
321
322    /// Moves a task to the parent control group.
323    pub fn move_task_to_parent(&self, tid: CgroupPid) -> Result<()> {
324        self.hier.parent_control_group(&self.path).add_task(tid)
325    }
326
327    /// Return a handle to the parent control group in the hierarchy.
328    pub fn parent_control_group(&self) -> Cgroup {
329        self.hier.parent_control_group(&self.path)
330    }
331
332    /// Kill every process in the control group. Only supported for v2 cgroups and on
333    /// kernels 5.14+. This will fail with InvalidOperation if the 'cgroup.kill' file does
334    /// not exist.
335    pub fn kill(&self) -> Result<()> {
336        if !self.v2() {
337            return Err(Error::new(CgroupVersion));
338        }
339
340        let val = "1";
341        let file_name = "cgroup.kill";
342        let p = self.hier.root().join(self.path.clone()).join(file_name);
343
344        // If cgroup.kill doesn't exist they're not on 5.14+ so lets
345        // surface some error the caller can check against.
346        if !p.exists() {
347            return Err(Error::new(InvalidOperation));
348        }
349
350        fs::write(p, val)
351            .map_err(|e| Error::with_cause(WriteFailed(file_name.to_string(), val.to_string()), e))
352    }
353
354    /// Attach a task to the control group.
355    pub fn add_task(&self, tid: CgroupPid) -> Result<()> {
356        if self.v2() {
357            let subsystems = self.subsystems();
358            if !subsystems.is_empty() {
359                let c = subsystems[0].to_controller();
360                let cgroup_type = self.get_cgroup_type()?;
361                // In cgroup v2, writing to the cgroup.threads file is only supported in thread mode.
362                if cgroup_type == *CGROUP_MODE_DOMAIN_THREADED
363                    || cgroup_type == *CGROUP_MODE_THREADED
364                {
365                    // It is used to move the threads of a process into a cgroup in thread mode.
366                    c.add_task(&tid)
367                } else {
368                    // When the cgroup type is domain or domain invalid,
369                    // cgroup.threads cannot be written.
370                    Err(Error::new(CgroupMode))
371                }
372            } else {
373                Err(Error::new(SubsystemsEmpty))
374            }
375        } else {
376            self.subsystems()
377                .iter()
378                .try_for_each(|sub| sub.to_controller().add_task(&tid))
379        }
380    }
381
382    /// Attach tasks to the control group by thread group id.
383    pub fn add_task_by_tgid(&self, tgid: CgroupPid) -> Result<()> {
384        if self.v2() {
385            let subsystems = self.subsystems();
386            if !subsystems.is_empty() {
387                let c = subsystems[0].to_controller();
388                // It is used to move a thread of the process to a cgroup,
389                // and other threads of the process will also move together.
390                c.add_task_by_tgid(&tgid)
391            } else {
392                Err(Error::new(SubsystemsEmpty))
393            }
394        } else {
395            self.subsystems()
396                .iter()
397                .try_for_each(|sub| sub.to_controller().add_task_by_tgid(&tgid))
398        }
399    }
400
401    /// set cgroup.type
402    pub fn set_cgroup_type(&self, cgroup_type: &str) -> Result<()> {
403        if self.v2() {
404            let subsystems = self.subsystems();
405            if !subsystems.is_empty() {
406                let c = subsystems[0].to_controller();
407                c.set_cgroup_type(cgroup_type)
408            } else {
409                Err(Error::new(SubsystemsEmpty))
410            }
411        } else {
412            Err(Error::new(CgroupVersion))
413        }
414    }
415
416    /// get cgroup.type
417    pub fn get_cgroup_type(&self) -> Result<String> {
418        if self.v2() {
419            let subsystems = self.subsystems();
420            if !subsystems.is_empty() {
421                let c = subsystems[0].to_controller();
422                let cgroup_type = c.get_cgroup_type()?;
423                Ok(cgroup_type)
424            } else {
425                Err(Error::new(SubsystemsEmpty))
426            }
427        } else {
428            Err(Error::new(CgroupVersion))
429        }
430    }
431
432    /// Set notify_on_release to the control group.
433    pub fn set_notify_on_release(&self, enable: bool) -> Result<()> {
434        self.subsystems()
435            .iter()
436            .try_for_each(|sub| sub.to_controller().set_notify_on_release(enable))
437    }
438
439    /// Set release_agent
440    pub fn set_release_agent(&self, path: &str) -> Result<()> {
441        self.hier
442            .root_control_group()
443            .subsystems()
444            .iter()
445            .try_for_each(|sub| sub.to_controller().set_release_agent(path))
446    }
447
448    /// Returns an Iterator that can be used to iterate over the procs that are currently in the
449    /// control group.
450    pub fn procs(&self) -> Vec<CgroupPid> {
451        // Collect the procs from all subsystems
452        let mut v = if self.v2() {
453            let subsystems = self.subsystems();
454            if !subsystems.is_empty() {
455                let c = subsystems[0].to_controller();
456                c.procs()
457            } else {
458                vec![]
459            }
460        } else {
461            self.subsystems()
462                .iter()
463                .map(|x| x.to_controller().procs())
464                .fold(vec![], |mut acc, mut x| {
465                    acc.append(&mut x);
466                    acc
467                })
468        };
469
470        v.sort();
471        v.dedup();
472        v
473    }
474
475    /// Returns an Iterator that can be used to iterate over the tasks that are currently in the
476    /// control group.
477    pub fn tasks(&self) -> Vec<CgroupPid> {
478        // Collect the tasks from all subsystems
479        let mut v = if self.v2() {
480            let subsystems = self.subsystems();
481            if !subsystems.is_empty() {
482                let c = subsystems[0].to_controller();
483                c.tasks()
484            } else {
485                vec![]
486            }
487        } else {
488            self.subsystems()
489                .iter()
490                .map(|x| x.to_controller().tasks())
491                .fold(vec![], |mut acc, mut x| {
492                    acc.append(&mut x);
493                    acc
494                })
495        };
496
497        v.sort();
498        v.dedup();
499        v
500    }
501
502    /// Checks if the cgroup exists.
503    ///
504    /// Returns true if at least one subsystem exists.
505    pub fn exists(&self) -> bool {
506        self.subsystems().iter().any(|e| e.to_controller().exists())
507    }
508}
509
510pub const UNIFIED_MOUNTPOINT: &str = "/sys/fs/cgroup";
511
512fn enable_controllers(controllers: &[String], path: &Path) {
513    let f = path.join("cgroup.subtree_control");
514    for c in controllers {
515        let body = format!("+{}", c);
516        let _rest = fs::write(f.as_path(), body.as_bytes());
517    }
518}
519
520fn supported_controllers() -> Vec<String> {
521    let p = format!("{}/{}", UNIFIED_MOUNTPOINT, "cgroup.controllers");
522    let ret = fs::read_to_string(p.as_str());
523    ret.unwrap_or_default()
524        .split(' ')
525        .map(|x| x.trim().to_string())
526        .collect::<Vec<String>>()
527}
528
529fn create_v2_cgroup(
530    root: PathBuf,
531    path: &str,
532    specified_controllers: &Option<Vec<String>>,
533) -> Result<()> {
534    // controler list ["memory", "cpu"]
535    let controllers = if let Some(s_controllers) = specified_controllers.clone() {
536        if verify_supported_controllers(s_controllers.as_ref()) {
537            s_controllers
538        } else {
539            return Err(Error::new(ErrorKind::SpecifiedControllers));
540        }
541    } else {
542        supported_controllers()
543    };
544
545    let mut fp = root;
546
547    // enable for root
548    enable_controllers(&controllers, &fp);
549
550    // path: "a/b/c"
551    let elements = path.split('/').collect::<Vec<&str>>();
552    let last_index = elements.len() - 1;
553    for (i, ele) in elements.iter().enumerate() {
554        // ROOT/a
555        fp.push(ele);
556        // create dir, need not check if is a file or directory
557        if !fp.exists() {
558            if let Err(e) = std::fs::create_dir(fp.clone()) {
559                return Err(Error::with_cause(ErrorKind::FsError, e));
560            }
561        }
562
563        if i < last_index {
564            // enable controllers for substree
565            enable_controllers(&controllers, &fp);
566        }
567    }
568
569    Ok(())
570}
571
572pub fn verify_supported_controllers(controllers: &[String]) -> bool {
573    let sc = supported_controllers();
574    for controller in controllers.iter() {
575        if !sc.contains(controller) {
576            return false;
577        }
578    }
579    true
580}
581
582pub fn get_cgroups_relative_paths() -> Result<HashMap<String, String>> {
583    let path = "/proc/self/cgroup".to_string();
584    get_cgroups_relative_paths_by_path(path)
585}
586
587pub fn get_cgroups_relative_paths_by_pid(pid: u32) -> Result<HashMap<String, String>> {
588    let path = format!("/proc/{}/cgroup", pid);
589    get_cgroups_relative_paths_by_path(path)
590}
591
592fn get_cgroup_destination(mut mount_root: String, pidpath: String) -> String {
593    if mount_root == "/" {
594        mount_root = String::from("");
595    }
596    pidpath.trim_start_matches(&mount_root).to_string()
597}
598
599pub fn existing_path(paths: HashMap<String, String>) -> Result<HashMap<String, String>> {
600    let mount_roots_v1 = V1::new();
601    let mut mount_roots_subsystems_map = HashMap::new();
602
603    for s in mount_roots_v1.subsystems().iter() {
604        let controller_name = s.controller_name();
605        let path_from_cgroup = paths
606            .get(&controller_name)
607            .ok_or(Error::new(Common(format!(
608                "controller {} found in mountinfo, but not found in cgroup.",
609                controller_name
610            ))))?;
611        let path_from_mountinfo = s.to_controller().base().to_string_lossy().to_string();
612
613        let des_path = get_cgroup_destination(path_from_mountinfo, path_from_cgroup.to_owned());
614        mount_roots_subsystems_map.insert(controller_name, des_path);
615    }
616    Ok(mount_roots_subsystems_map)
617}
618
619fn get_cgroups_relative_paths_by_path(path: String) -> Result<HashMap<String, String>> {
620    let mut m = HashMap::new();
621    let content =
622        fs::read_to_string(path.clone()).map_err(|e| Error::with_cause(ReadFailed(path), e))?;
623    // cgroup path may have ":" , likes
624    // "2:cpu,cpuacct:/system.slice/containerd.service/test.slice:cri-containerd:96b37a2edf84351487f42039e137427f1812f678850675fac214caf597ee5e4a"
625    for line in content.lines() {
626        if let Some((first_value_part, remaining_path)) =
627            line.split_once(':').unwrap_or_default().1.split_once(':')
628        {
629            let keys: Vec<&str> = first_value_part.split(',').collect();
630            keys.iter().for_each(|key| {
631                m.insert(key.to_string(), remaining_path.to_string());
632            });
633        }
634    }
635    Ok(m)
636}