1use std::fmt::{Debug, Display};
2use std::fs::{self, File};
3use std::io::{BufRead, BufReader, Write};
4use std::path::{Path, PathBuf, StripPrefixError};
5use std::time::Duration;
6
7use nix::sys::statfs::{CGROUP2_SUPER_MAGIC, TMPFS_MAGIC, statfs};
8use nix::unistd::Pid;
9use oci_spec::runtime::LinuxResources;
10#[cfg(any(feature = "cgroupsv2_devices", feature = "v1"))]
11use oci_spec::runtime::{
12 LinuxDevice, LinuxDeviceBuilder, LinuxDeviceCgroup, LinuxDeviceCgroupBuilder, LinuxDeviceType,
13};
14
15use super::stats::Stats;
16use super::{systemd, v1, v2};
17
18pub const CGROUP_PROCS: &str = "cgroup.procs";
19pub const DEFAULT_CGROUP_ROOT: &str = "/sys/fs/cgroup";
20
21#[cfg(feature = "systemd")]
22#[inline]
23fn is_true_root() -> Result<bool, WrappedIoError> {
24 if !nix::unistd::geteuid().is_root() {
25 return Ok(false);
26 }
27 let uid_map_path = "/proc/self/uid_map";
28 let content = std::fs::read_to_string(uid_map_path).map_err(|e| WrappedIoError::Read {
29 err: e,
30 path: uid_map_path.into(),
31 })?;
32 Ok(content.contains("4294967295"))
33}
34pub trait CgroupManager {
35 type Error;
36
37 fn add_task(&self, pid: Pid) -> Result<(), Self::Error>;
39
40 fn apply(&self, controller_opt: &ControllerOpt) -> Result<(), Self::Error>;
42
43 fn remove(&self) -> Result<(), Self::Error>;
45
46 fn freeze(&self, state: FreezerState) -> Result<(), Self::Error>;
48
49 fn stats(&self) -> Result<Stats, Self::Error>;
51
52 fn get_all_pids(&self) -> Result<Vec<Pid>, Self::Error>;
54}
55
56#[derive(thiserror::Error, Debug)]
57pub enum AnyManagerError {
58 #[error(transparent)]
59 Systemd(#[from] systemd::manager::SystemdManagerError),
60 #[error(transparent)]
61 V1(#[from] v1::manager::V1ManagerError),
62 #[error(transparent)]
63 V2(#[from] v2::manager::V2ManagerError),
64}
65
66pub enum AnyCgroupManager {
68 Systemd(Box<systemd::manager::Manager>),
69 V1(v1::manager::Manager),
70 V2(v2::manager::Manager),
71}
72
73impl CgroupManager for AnyCgroupManager {
74 type Error = AnyManagerError;
75
76 fn add_task(&self, pid: Pid) -> Result<(), Self::Error> {
77 match self {
78 AnyCgroupManager::Systemd(m) => Ok(m.add_task(pid)?),
79 AnyCgroupManager::V1(m) => Ok(m.add_task(pid)?),
80 AnyCgroupManager::V2(m) => Ok(m.add_task(pid)?),
81 }
82 }
83
84 fn apply(&self, controller_opt: &ControllerOpt) -> Result<(), Self::Error> {
85 match self {
86 AnyCgroupManager::Systemd(m) => Ok(m.apply(controller_opt)?),
87 AnyCgroupManager::V1(m) => Ok(m.apply(controller_opt)?),
88 AnyCgroupManager::V2(m) => Ok(m.apply(controller_opt)?),
89 }
90 }
91
92 fn remove(&self) -> Result<(), Self::Error> {
93 match self {
94 AnyCgroupManager::Systemd(m) => Ok(m.remove()?),
95 AnyCgroupManager::V1(m) => Ok(m.remove()?),
96 AnyCgroupManager::V2(m) => Ok(m.remove()?),
97 }
98 }
99
100 fn freeze(&self, state: FreezerState) -> Result<(), Self::Error> {
101 match self {
102 AnyCgroupManager::Systemd(m) => Ok(m.freeze(state)?),
103 AnyCgroupManager::V1(m) => Ok(m.freeze(state)?),
104 AnyCgroupManager::V2(m) => Ok(m.freeze(state)?),
105 }
106 }
107
108 fn stats(&self) -> Result<Stats, Self::Error> {
109 match self {
110 AnyCgroupManager::Systemd(m) => Ok(m.stats()?),
111 AnyCgroupManager::V1(m) => Ok(m.stats()?),
112 AnyCgroupManager::V2(m) => Ok(m.stats()?),
113 }
114 }
115
116 fn get_all_pids(&self) -> Result<Vec<Pid>, Self::Error> {
117 match self {
118 AnyCgroupManager::Systemd(m) => Ok(m.get_all_pids()?),
119 AnyCgroupManager::V1(m) => Ok(m.get_all_pids()?),
120 AnyCgroupManager::V2(m) => Ok(m.get_all_pids()?),
121 }
122 }
123}
124
125#[derive(Debug)]
126pub enum CgroupSetup {
127 Hybrid,
128 Legacy,
129 Unified,
130}
131
132impl Display for CgroupSetup {
133 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
134 let print = match self {
135 CgroupSetup::Hybrid => "hybrid",
136 CgroupSetup::Legacy => "legacy",
137 CgroupSetup::Unified => "unified",
138 };
139
140 write!(f, "{print}")
141 }
142}
143
144#[derive(Clone, Copy, Debug, Eq, PartialEq)]
146pub enum FreezerState {
147 Undefined,
149 Frozen,
151 Thawed,
153}
154
155#[derive(Clone, Debug)]
157pub struct ControllerOpt<'a> {
158 pub resources: &'a LinuxResources,
160 pub disable_oom_killer: bool,
162 pub oom_score_adj: Option<i32>,
164 pub freezer_state: Option<FreezerState>,
166}
167
168#[derive(thiserror::Error, Debug)]
169pub enum WrappedIoError {
170 #[error("failed to open {path}: {err}")]
171 Open { err: std::io::Error, path: PathBuf },
172 #[error("failed to write {data} to {path}: {err}")]
173 Write {
174 err: std::io::Error,
175 path: PathBuf,
176 data: String,
177 },
178 #[error("failed to read {path}: {err}")]
179 Read { err: std::io::Error, path: PathBuf },
180 #[error("failed to create dir {path}: {err}")]
181 CreateDir { err: std::io::Error, path: PathBuf },
182 #[error("at {path}: {err}")]
183 Other { err: std::io::Error, path: PathBuf },
184}
185
186impl WrappedIoError {
187 pub fn inner(&self) -> &std::io::Error {
188 match self {
189 WrappedIoError::Open { err, .. } => err,
190 WrappedIoError::Write { err, .. } => err,
191 WrappedIoError::Read { err, .. } => err,
192 WrappedIoError::CreateDir { err, .. } => err,
193 WrappedIoError::Other { err, .. } => err,
194 }
195 }
196}
197
198#[inline]
199pub fn write_cgroup_file_str<P: AsRef<Path>>(path: P, data: &str) -> Result<(), WrappedIoError> {
200 let path = path.as_ref();
201
202 fs::OpenOptions::new()
203 .create(false)
204 .write(true)
205 .truncate(false)
206 .open(path)
207 .map_err(|err| WrappedIoError::Open {
208 err,
209 path: path.to_path_buf(),
210 })?
211 .write_all(data.as_bytes())
212 .map_err(|err| WrappedIoError::Write {
213 err,
214 path: path.to_path_buf(),
215 data: data.into(),
216 })?;
217
218 Ok(())
219}
220
221#[inline]
222pub fn write_cgroup_file<P: AsRef<Path>, T: ToString>(
223 path: P,
224 data: T,
225) -> Result<(), WrappedIoError> {
226 let path = path.as_ref();
227 let data = data.to_string();
228
229 fs::OpenOptions::new()
230 .create(false)
231 .write(true)
232 .truncate(false)
233 .open(path)
234 .map_err(|err| WrappedIoError::Open {
235 err,
236 path: path.to_path_buf(),
237 })?
238 .write_all(data.as_bytes())
239 .map_err(|err| WrappedIoError::Write {
240 err,
241 path: path.to_path_buf(),
242 data,
243 })?;
244
245 Ok(())
246}
247
248#[inline]
249pub fn read_cgroup_file<P: AsRef<Path>>(path: P) -> Result<String, WrappedIoError> {
250 let path = path.as_ref();
251 fs::read_to_string(path).map_err(|err| WrappedIoError::Read {
252 err,
253 path: path.to_path_buf(),
254 })
255}
256
257#[derive(thiserror::Error, Debug)]
258pub enum GetCgroupSetupError {
259 #[error("io error: {0}")]
260 WrappedIo(#[from] WrappedIoError),
261 #[error("non default cgroup root not supported")]
262 NonDefault,
263 #[error("failed to detect cgroup setup")]
264 FailedToDetect,
265}
266
267pub fn get_cgroup_setup_with_root(root_path: &Path) -> Result<CgroupSetup, GetCgroupSetupError> {
276 match root_path.exists() {
277 true => {
278 let stat = statfs(root_path)
283 .map_err(std::io::Error::other)
284 .wrap_other(root_path)?;
285 if stat.filesystem_type() == CGROUP2_SUPER_MAGIC {
286 return Ok(CgroupSetup::Unified);
287 }
288
289 if stat.filesystem_type() == TMPFS_MAGIC {
290 let unified = &Path::new(root_path).join("unified");
291 if Path::new(unified).exists() {
292 let stat = statfs(unified)
293 .map_err(std::io::Error::other)
294 .wrap_other(unified)?;
295 if stat.filesystem_type() == CGROUP2_SUPER_MAGIC {
296 return Ok(CgroupSetup::Hybrid);
297 }
298 }
299
300 return Ok(CgroupSetup::Legacy);
301 }
302 }
303 false => return Err(GetCgroupSetupError::NonDefault),
304 }
305
306 Err(GetCgroupSetupError::FailedToDetect)
307}
308
309pub fn get_cgroup_setup() -> Result<CgroupSetup, GetCgroupSetupError> {
310 get_cgroup_setup_with_root(Path::new(DEFAULT_CGROUP_ROOT))
311}
312
313#[derive(thiserror::Error, Debug)]
314pub enum CreateCgroupSetupError {
315 #[error("io error: {0}")]
316 WrappedIo(#[from] WrappedIoError),
317 #[error("non default cgroup root not supported")]
318 NonDefault,
319 #[error("failed to detect cgroup setup")]
320 FailedToDetect,
321 #[error("v1 error: {0}")]
322 V1(#[from] v1::manager::V1ManagerError),
323 #[error("v2 error: {0}")]
324 V2(#[from] v2::manager::V2ManagerError),
325 #[error("systemd error: {0}")]
326 Systemd(#[from] systemd::manager::SystemdManagerError),
327}
328
329#[derive(Clone)]
330pub struct CgroupConfig {
331 pub cgroup_path: PathBuf,
332 pub systemd_cgroup: bool,
333 pub container_name: String,
334}
335
336pub fn create_cgroup_manager_with_root(
339 root_path: Option<&Path>,
340 config: CgroupConfig,
341) -> Result<AnyCgroupManager, CreateCgroupSetupError> {
342 let root = match root_path {
343 Some(p) => p,
344 None => Path::new(DEFAULT_CGROUP_ROOT),
345 };
346
347 let cgroup_setup = get_cgroup_setup_with_root(root).map_err(|err| match err {
348 GetCgroupSetupError::WrappedIo(err) => CreateCgroupSetupError::WrappedIo(err),
349 GetCgroupSetupError::NonDefault => CreateCgroupSetupError::NonDefault,
350 GetCgroupSetupError::FailedToDetect => CreateCgroupSetupError::FailedToDetect,
351 })?;
352 let cgroup_path = config.cgroup_path.as_path();
353
354 match cgroup_setup {
355 CgroupSetup::Legacy | CgroupSetup::Hybrid => {
356 Ok(create_v1_cgroup_manager(cgroup_path)?.any())
357 }
358 CgroupSetup::Unified => {
359 if cgroup_path.is_absolute() || !config.systemd_cgroup {
361 return Ok(create_v2_cgroup_manager(root, cgroup_path)?.any());
362 }
363 Ok(
364 create_systemd_cgroup_manager(root, cgroup_path, config.container_name.as_str())?
365 .any(),
366 )
367 }
368 }
369}
370
371pub fn create_cgroup_manager(
372 config: CgroupConfig,
373) -> Result<AnyCgroupManager, CreateCgroupSetupError> {
374 create_cgroup_manager_with_root(Some(Path::new(DEFAULT_CGROUP_ROOT)), config)
375}
376
377#[cfg(feature = "v1")]
378fn create_v1_cgroup_manager(
379 cgroup_path: &Path,
380) -> Result<v1::manager::Manager, v1::manager::V1ManagerError> {
381 tracing::info!("cgroup manager V1 will be used");
382 v1::manager::Manager::new(cgroup_path)
383}
384
385#[cfg(not(feature = "v1"))]
386fn create_v1_cgroup_manager(
387 _cgroup_path: &Path,
388) -> Result<v1::manager::Manager, v1::manager::V1ManagerError> {
389 Err(v1::manager::V1ManagerError::NotEnabled)
390}
391
392#[cfg(feature = "v2")]
393fn create_v2_cgroup_manager(
394 root_path: &Path,
395 cgroup_path: &Path,
396) -> Result<v2::manager::Manager, v2::manager::V2ManagerError> {
397 tracing::info!("cgroup manager V2 will be used");
398 v2::manager::Manager::new(root_path.to_path_buf(), cgroup_path.to_owned())
399}
400
401#[cfg(not(feature = "v2"))]
402fn create_v2_cgroup_manager(
403 _root_path: &Path,
404 _cgroup_path: &Path,
405) -> Result<v2::manager::Manager, v2::manager::V2ManagerError> {
406 Err(v2::manager::V2ManagerError::NotEnabled)
407}
408
409#[cfg(feature = "systemd")]
410fn create_systemd_cgroup_manager(
411 root_path: &Path,
412 cgroup_path: &Path,
413 container_name: &str,
414) -> Result<systemd::manager::Manager, systemd::manager::SystemdManagerError> {
415 use crate::systemd::manager::PROCESS_IN_CGROUP_TIMEOUT_DURATION;
416
417 if !systemd::booted() {
418 panic!(
419 "systemd cgroup flag passed, but systemd support for managing cgroups is not available"
420 );
421 }
422
423 let use_system = is_true_root().map_err(systemd::manager::SystemdManagerError::WrappedIo)?;
424
425 tracing::info!(
426 "systemd cgroup manager with system bus {} will be used",
427 use_system
428 );
429 systemd::manager::Manager::new(
430 root_path.into(),
431 cgroup_path.to_owned(),
432 container_name.into(),
433 use_system,
434 PROCESS_IN_CGROUP_TIMEOUT_DURATION,
435 )
436}
437
438#[cfg(not(feature = "systemd"))]
439fn create_systemd_cgroup_manager(
440 _root_path: &Path,
441 _cgroup_path: &Path,
442 _container_name: &str,
443) -> Result<systemd::manager::Manager, systemd::manager::SystemdManagerError> {
444 Err(systemd::manager::SystemdManagerError::NotEnabled)
445}
446
447pub fn get_all_pids(path: &Path) -> Result<Vec<Pid>, WrappedIoError> {
448 tracing::debug!("scan pids in folder: {:?}", path);
449 let mut result = vec![];
450 walk_dir(path, &mut |p| {
451 let file_path = p.join(CGROUP_PROCS);
452 if file_path.exists() {
453 let file = File::open(&file_path).wrap_open(&file_path)?;
454 for line in BufReader::new(file).lines().map_while(Result::ok) {
455 result.push(Pid::from_raw(
456 line.parse::<i32>()
457 .map_err(|err| std::io::Error::new(std::io::ErrorKind::InvalidData, err))
458 .wrap_other(&file_path)?,
459 ))
460 }
461 }
462 Ok::<(), WrappedIoError>(())
463 })?;
464 Ok(result)
465}
466
467fn walk_dir<F, E>(path: &Path, c: &mut F) -> Result<(), E>
468where
469 F: FnMut(&Path) -> Result<(), E>,
470 E: From<WrappedIoError>,
471{
472 c(path)?;
473 for entry in fs::read_dir(path).wrap_read(path)? {
474 let entry = entry.wrap_open(path)?;
475 let path = entry.path();
476
477 if path.is_dir() {
478 walk_dir(&path, c)?;
479 }
480 }
481 Ok(())
482}
483
484pub(crate) trait PathBufExt {
485 fn join_safely<P: AsRef<Path>>(&self, path: P) -> Result<PathBuf, JoinSafelyError>;
486}
487
488#[derive(thiserror::Error, Debug)]
489pub enum JoinSafelyError {
490 #[error("failed to strip prefix from {path}: {err}")]
491 StripPrefix {
492 err: StripPrefixError,
493 path: PathBuf,
494 },
495}
496
497impl PathBufExt for PathBuf {
498 fn join_safely<P: AsRef<Path>>(&self, path: P) -> Result<PathBuf, JoinSafelyError> {
499 let path = path.as_ref();
500 if path.is_relative() {
501 return Ok(self.join(path));
502 }
503
504 let stripped = path
505 .strip_prefix("/")
506 .map_err(|err| JoinSafelyError::StripPrefix {
507 err,
508 path: path.to_path_buf(),
509 })?;
510 Ok(self.join(stripped))
511 }
512}
513
514#[cfg(any(feature = "cgroupsv2_devices", feature = "v1"))]
515pub(crate) fn default_allow_devices() -> Vec<LinuxDeviceCgroup> {
516 vec![
517 LinuxDeviceCgroupBuilder::default()
518 .allow(true)
519 .typ(LinuxDeviceType::C)
520 .access("m")
521 .build()
522 .unwrap(),
523 LinuxDeviceCgroupBuilder::default()
524 .allow(true)
525 .typ(LinuxDeviceType::B)
526 .access("m")
527 .build()
528 .unwrap(),
529 LinuxDeviceCgroupBuilder::default()
531 .allow(true)
532 .typ(LinuxDeviceType::C)
533 .major(5)
534 .minor(1)
535 .access("rwm")
536 .build()
537 .unwrap(),
538 LinuxDeviceCgroupBuilder::default()
540 .allow(true)
541 .typ(LinuxDeviceType::C)
542 .major(136)
543 .access("rwm")
544 .build()
545 .unwrap(),
546 LinuxDeviceCgroupBuilder::default()
547 .allow(true)
548 .typ(LinuxDeviceType::C)
549 .major(5)
550 .minor(2)
551 .access("rwm")
552 .build()
553 .unwrap(),
554 LinuxDeviceCgroupBuilder::default()
556 .allow(true)
557 .typ(LinuxDeviceType::C)
558 .major(10)
559 .minor(200)
560 .access("rwm")
561 .build()
562 .unwrap(),
563 ]
564}
565
566#[cfg(any(feature = "cgroupsv2_devices", feature = "v1"))]
567pub(crate) fn default_devices() -> Vec<LinuxDevice> {
568 vec![
569 LinuxDeviceBuilder::default()
570 .path(PathBuf::from("/dev/null"))
571 .typ(LinuxDeviceType::C)
572 .major(1)
573 .minor(3)
574 .file_mode(0o066u32)
575 .build()
576 .unwrap(),
577 LinuxDeviceBuilder::default()
578 .path(PathBuf::from("/dev/zero"))
579 .typ(LinuxDeviceType::C)
580 .major(1)
581 .minor(5)
582 .file_mode(0o066u32)
583 .build()
584 .unwrap(),
585 LinuxDeviceBuilder::default()
586 .path(PathBuf::from("/dev/full"))
587 .typ(LinuxDeviceType::C)
588 .major(1)
589 .minor(7)
590 .file_mode(0o066u32)
591 .build()
592 .unwrap(),
593 LinuxDeviceBuilder::default()
594 .path(PathBuf::from("/dev/tty"))
595 .typ(LinuxDeviceType::C)
596 .major(5)
597 .minor(0)
598 .file_mode(0o066u32)
599 .build()
600 .unwrap(),
601 LinuxDeviceBuilder::default()
602 .path(PathBuf::from("/dev/urandom"))
603 .typ(LinuxDeviceType::C)
604 .major(1)
605 .minor(9)
606 .file_mode(0o066u32)
607 .build()
608 .unwrap(),
609 LinuxDeviceBuilder::default()
610 .path(PathBuf::from("/dev/random"))
611 .typ(LinuxDeviceType::C)
612 .major(1)
613 .minor(8)
614 .file_mode(0o066u32)
615 .build()
616 .unwrap(),
617 ]
618}
619
620pub(crate) fn delete_with_retry<P: AsRef<Path>, L: Into<Option<Duration>>>(
622 path: P,
623 retries: u32,
624 limit_backoff: L,
625) -> Result<(), WrappedIoError> {
626 let mut attempts = 0;
627 let mut delay = Duration::from_millis(10);
628 let path = path.as_ref();
629 let limit = limit_backoff.into().unwrap_or(Duration::MAX);
630
631 while attempts < retries {
632 if fs::remove_dir(path).is_ok() {
633 return Ok(());
634 }
635
636 std::thread::sleep(delay);
637 attempts += 1;
638 delay *= attempts;
639 if delay > limit {
640 delay = limit;
641 }
642 }
643
644 Err(std::io::Error::new(
645 std::io::ErrorKind::TimedOut,
646 "could not delete".to_string(),
647 ))
648 .wrap_other(path)?
649}
650
651pub(crate) trait WrapIoResult {
652 type Target;
653
654 fn wrap_create_dir<P: Into<PathBuf>>(self, path: P) -> Result<Self::Target, WrappedIoError>;
655 fn wrap_read<P: Into<PathBuf>>(self, path: P) -> Result<Self::Target, WrappedIoError>;
656 fn wrap_open<P: Into<PathBuf>>(self, path: P) -> Result<Self::Target, WrappedIoError>;
657 fn wrap_write<P: Into<PathBuf>, D: Into<String>>(
658 self,
659 path: P,
660 data: D,
661 ) -> Result<Self::Target, WrappedIoError>;
662 fn wrap_other<P: Into<PathBuf>>(self, path: P) -> Result<Self::Target, WrappedIoError>;
663}
664
665impl<T> WrapIoResult for Result<T, std::io::Error> {
666 type Target = T;
667
668 fn wrap_create_dir<P: Into<PathBuf>>(self, path: P) -> Result<Self::Target, WrappedIoError> {
669 self.map_err(|err| WrappedIoError::CreateDir {
670 err,
671 path: path.into(),
672 })
673 }
674
675 fn wrap_read<P: Into<PathBuf>>(self, path: P) -> Result<Self::Target, WrappedIoError> {
676 self.map_err(|err| WrappedIoError::Read {
677 err,
678 path: path.into(),
679 })
680 }
681
682 fn wrap_open<P: Into<PathBuf>>(self, path: P) -> Result<Self::Target, WrappedIoError> {
683 self.map_err(|err| WrappedIoError::Open {
684 err,
685 path: path.into(),
686 })
687 }
688
689 fn wrap_write<P: Into<PathBuf>, D: Into<String>>(
690 self,
691 path: P,
692 data: D,
693 ) -> Result<Self::Target, WrappedIoError> {
694 self.map_err(|err| WrappedIoError::Write {
695 err,
696 path: path.into(),
697 data: data.into(),
698 })
699 }
700
701 fn wrap_other<P: Into<PathBuf>>(self, path: P) -> Result<Self::Target, WrappedIoError> {
702 self.map_err(|err| WrappedIoError::Other {
703 err,
704 path: path.into(),
705 })
706 }
707}
708
709#[derive(Debug)]
710pub enum EitherError<L, R> {
711 Left(L),
712 Right(R),
713}
714
715impl<L: Display, R: Display> Display for EitherError<L, R> {
716 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
717 match self {
718 EitherError::Left(left) => <L as Display>::fmt(left, f),
719 EitherError::Right(right) => <R as Display>::fmt(right, f),
720 }
721 }
722}
723
724impl<L: Debug + Display, R: Debug + Display> std::error::Error for EitherError<L, R> {}
725
726#[derive(Debug)]
727pub struct MustBePowerOfTwo;
728
729impl Display for MustBePowerOfTwo {
730 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
731 f.write_str("page size must be in the format of 2^(integer)")
732 }
733}