cgroups_rs/
memory.rs

1// Copyright (c) 2018 Levente Kurusa
2// Copyright (c) 2020 Ant Group
3//
4// SPDX-License-Identifier: Apache-2.0 or MIT
5//
6
7//! This module contains the implementation of the `memory` cgroup subsystem.
8//!
9//! See the Kernel's documentation for more information about this subsystem, found at:
10//!  [Documentation/cgroup-v1/memory.txt](https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt)
11use log::warn;
12use std::collections::HashMap;
13use std::io::Write;
14use std::path::PathBuf;
15use std::sync::mpsc::Receiver;
16
17use crate::error::ErrorKind::*;
18use crate::error::*;
19use crate::events;
20use crate::{read_i64_from, read_string_from, read_u64_from};
21
22use crate::flat_keyed_to_hashmap;
23
24use crate::{
25    ControllIdentifier, ControllerInternal, Controllers, CustomizedAttribute, MaxValue,
26    MemoryResources, Resources, Subsystem,
27};
28
29/// A controller that allows controlling the `memory` subsystem of a Cgroup.
30///
31/// In essence, using the memory controller, the user can gather statistics about the memory usage
32/// of the tasks in the control group. Additonally, one can also set powerful limits on their
33/// memory usage.
34#[derive(Debug, Clone)]
35pub struct MemController {
36    base: PathBuf,
37    path: PathBuf,
38    v2: bool,
39}
40
41#[derive(Default, Debug, PartialEq, Eq)]
42#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
43pub struct SetMemory {
44    pub low: Option<MaxValue>,
45    pub high: Option<MaxValue>,
46    pub min: Option<MaxValue>,
47    pub max: Option<MaxValue>,
48}
49
50/// Controls statistics and controls about the OOM killer operating in this control group.
51#[derive(Default, Debug, PartialEq, Eq)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub struct OomControl {
54    /// If true, the OOM killer has been disabled for the tasks in this control group.
55    pub oom_kill_disable: bool,
56    /// Is the OOM killer currently running for the tasks in the control group?
57    pub under_oom: bool,
58    /// How many tasks were killed by the OOM killer so far.
59    pub oom_kill: u64,
60}
61
62#[allow(clippy::unnecessary_wraps)]
63fn parse_oom_control(s: String) -> Result<OomControl> {
64    let spl = s.split_whitespace().collect::<Vec<_>>();
65
66    let oom_kill_disable = if spl.len() > 1 {
67        spl[1].parse::<u64>().unwrap() == 1
68    } else {
69        false
70    };
71
72    let under_oom = if spl.len() > 3 {
73        spl[3].parse::<u64>().unwrap() == 1
74    } else {
75        false
76    };
77
78    let oom_kill = if spl.len() > 5 {
79        spl[5].parse::<u64>().unwrap()
80    } else {
81        0
82    };
83
84    Ok(OomControl {
85        oom_kill_disable,
86        under_oom,
87        oom_kill,
88    })
89}
90
91/// Contains statistics about the NUMA locality of the control group's tasks.
92#[derive(Default, Debug, PartialEq, Eq)]
93#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
94pub struct NumaStat {
95    /// Total amount of pages used by the control group.
96    pub total_pages: u64,
97    /// Total amount of pages used by the control group, broken down by NUMA node.
98    pub total_pages_per_node: Vec<u64>,
99    /// Total amount of file pages used by the control group.
100    pub file_pages: u64,
101    /// Total amount of file pages used by the control group, broken down by NUMA node.
102    pub file_pages_per_node: Vec<u64>,
103    /// Total amount of anonymous pages used by the control group.
104    pub anon_pages: u64,
105    /// Total amount of anonymous pages used by the control group, broken down by NUMA node.
106    pub anon_pages_per_node: Vec<u64>,
107    /// Total amount of unevictable pages used by the control group.
108    pub unevictable_pages: u64,
109    /// Total amount of unevictable pages used by the control group, broken down by NUMA node.
110    pub unevictable_pages_per_node: Vec<u64>,
111
112    /// Same as `total_pages`, but includes the descedant control groups' number as well.
113    pub hierarchical_total_pages: u64,
114    /// Same as `total_pages_per_node`, but includes the descedant control groups' number as well.
115    pub hierarchical_total_pages_per_node: Vec<u64>,
116    /// Same as `file_pages`, but includes the descedant control groups' number as well.
117    pub hierarchical_file_pages: u64,
118    /// Same as `file_pages_per_node`, but includes the descedant control groups' number as well.
119    pub hierarchical_file_pages_per_node: Vec<u64>,
120    /// Same as `anon_pages`, but includes the descedant control groups' number as well.
121    pub hierarchical_anon_pages: u64,
122    /// Same as `anon_pages_per_node`, but includes the descedant control groups' number as well.
123    pub hierarchical_anon_pages_per_node: Vec<u64>,
124    /// Same as `unevictable`, but includes the descedant control groups' number as well.
125    pub hierarchical_unevictable_pages: u64,
126    /// Same as `unevictable_per_node`, but includes the descedant control groups' number as well.
127    pub hierarchical_unevictable_pages_per_node: Vec<u64>,
128}
129
130#[allow(clippy::unnecessary_wraps)]
131fn parse_numa_stat(s: String) -> Result<NumaStat> {
132    // Parse the number of nodes
133    let _nodes = (s.split_whitespace().count() - 8) / 8;
134    let mut ls = s.lines();
135    let total_line = ls.next().unwrap();
136    let file_line = ls.next().unwrap();
137    let anon_line = ls.next().unwrap();
138    let unevict_line = ls.next().unwrap();
139    let hier_total_line = ls.next().unwrap_or_default();
140    let hier_file_line = ls.next().unwrap_or_default();
141    let hier_anon_line = ls.next().unwrap_or_default();
142    let hier_unevict_line = ls.next().unwrap_or_default();
143
144    Ok(NumaStat {
145        total_pages: total_line
146            .split(|x| x == ' ' || x == '=')
147            .collect::<Vec<_>>()[1]
148            .parse::<u64>()
149            .unwrap_or(0),
150        total_pages_per_node: {
151            let spl = &total_line.split(' ').collect::<Vec<_>>()[1..];
152            spl.iter()
153                .map(|x| {
154                    x.split('=').collect::<Vec<_>>()[1]
155                        .parse::<u64>()
156                        .unwrap_or(0)
157                })
158                .collect()
159        },
160        file_pages: file_line
161            .split(|x| x == ' ' || x == '=')
162            .collect::<Vec<_>>()[1]
163            .parse::<u64>()
164            .unwrap_or(0),
165        file_pages_per_node: {
166            let spl = &file_line.split(' ').collect::<Vec<_>>()[1..];
167            spl.iter()
168                .map(|x| {
169                    x.split('=').collect::<Vec<_>>()[1]
170                        .parse::<u64>()
171                        .unwrap_or(0)
172                })
173                .collect()
174        },
175        anon_pages: anon_line
176            .split(|x| x == ' ' || x == '=')
177            .collect::<Vec<_>>()[1]
178            .parse::<u64>()
179            .unwrap_or(0),
180        anon_pages_per_node: {
181            let spl = &anon_line.split(' ').collect::<Vec<_>>()[1..];
182            spl.iter()
183                .map(|x| {
184                    x.split('=').collect::<Vec<_>>()[1]
185                        .parse::<u64>()
186                        .unwrap_or(0)
187                })
188                .collect()
189        },
190        unevictable_pages: unevict_line
191            .split(|x| x == ' ' || x == '=')
192            .collect::<Vec<_>>()[1]
193            .parse::<u64>()
194            .unwrap_or(0),
195        unevictable_pages_per_node: {
196            let spl = &unevict_line.split(' ').collect::<Vec<_>>()[1..];
197            spl.iter()
198                .map(|x| {
199                    x.split('=').collect::<Vec<_>>()[1]
200                        .parse::<u64>()
201                        .unwrap_or(0)
202                })
203                .collect()
204        },
205        hierarchical_total_pages: {
206            if !hier_total_line.is_empty() {
207                hier_total_line
208                    .split(|x| x == ' ' || x == '=')
209                    .collect::<Vec<_>>()[1]
210                    .parse::<u64>()
211                    .unwrap_or(0)
212            } else {
213                0
214            }
215        },
216        hierarchical_total_pages_per_node: {
217            if !hier_total_line.is_empty() {
218                let spl = &hier_total_line.split(' ').collect::<Vec<_>>()[1..];
219                spl.iter()
220                    .map(|x| {
221                        x.split('=').collect::<Vec<_>>()[1]
222                            .parse::<u64>()
223                            .unwrap_or(0)
224                    })
225                    .collect()
226            } else {
227                Vec::new()
228            }
229        },
230        hierarchical_file_pages: {
231            if !hier_file_line.is_empty() {
232                hier_file_line
233                    .split(|x| x == ' ' || x == '=')
234                    .collect::<Vec<_>>()[1]
235                    .parse::<u64>()
236                    .unwrap_or(0)
237            } else {
238                0
239            }
240        },
241        hierarchical_file_pages_per_node: {
242            if !hier_file_line.is_empty() {
243                let spl = &hier_file_line.split(' ').collect::<Vec<_>>()[1..];
244                spl.iter()
245                    .map(|x| {
246                        x.split('=').collect::<Vec<_>>()[1]
247                            .parse::<u64>()
248                            .unwrap_or(0)
249                    })
250                    .collect()
251            } else {
252                Vec::new()
253            }
254        },
255        hierarchical_anon_pages: {
256            if !hier_anon_line.is_empty() {
257                hier_anon_line
258                    .split(|x| x == ' ' || x == '=')
259                    .collect::<Vec<_>>()[1]
260                    .parse::<u64>()
261                    .unwrap_or(0)
262            } else {
263                0
264            }
265        },
266        hierarchical_anon_pages_per_node: {
267            if !hier_anon_line.is_empty() {
268                let spl = &hier_anon_line.split(' ').collect::<Vec<_>>()[1..];
269                spl.iter()
270                    .map(|x| {
271                        x.split('=').collect::<Vec<_>>()[1]
272                            .parse::<u64>()
273                            .unwrap_or(0)
274                    })
275                    .collect()
276            } else {
277                Vec::new()
278            }
279        },
280        hierarchical_unevictable_pages: {
281            if !hier_unevict_line.is_empty() {
282                hier_unevict_line
283                    .split(|x| x == ' ' || x == '=')
284                    .collect::<Vec<_>>()[1]
285                    .parse::<u64>()
286                    .unwrap_or(0)
287            } else {
288                0
289            }
290        },
291        hierarchical_unevictable_pages_per_node: {
292            if !hier_unevict_line.is_empty() {
293                let spl = &hier_unevict_line.split(' ').collect::<Vec<_>>()[1..];
294                spl.iter()
295                    .map(|x| {
296                        x.split('=').collect::<Vec<_>>()[1]
297                            .parse::<u64>()
298                            .unwrap_or(0)
299                    })
300                    .collect()
301            } else {
302                Vec::new()
303            }
304        },
305    })
306}
307
308#[derive(Default, Debug, PartialEq, Eq)]
309#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
310pub struct MemoryStat {
311    pub cache: u64,
312    pub rss: u64,
313    pub rss_huge: u64,
314    pub shmem: u64,
315    pub mapped_file: u64,
316    pub dirty: u64,
317    pub writeback: u64,
318    pub swap: u64,
319    pub pgpgin: u64,
320    pub pgpgout: u64,
321    pub pgfault: u64,
322    pub pgmajfault: u64,
323    pub inactive_anon: u64,
324    pub active_anon: u64,
325    pub inactive_file: u64,
326    pub active_file: u64,
327    pub unevictable: u64,
328    pub hierarchical_memory_limit: i64,
329    pub hierarchical_memsw_limit: i64,
330    pub total_cache: u64,
331    pub total_rss: u64,
332    pub total_rss_huge: u64,
333    pub total_shmem: u64,
334    pub total_mapped_file: u64,
335    pub total_dirty: u64,
336    pub total_writeback: u64,
337    pub total_swap: u64,
338    pub total_pgpgin: u64,
339    pub total_pgpgout: u64,
340    pub total_pgfault: u64,
341    pub total_pgmajfault: u64,
342    pub total_inactive_anon: u64,
343    pub total_active_anon: u64,
344    pub total_inactive_file: u64,
345    pub total_active_file: u64,
346    pub total_unevictable: u64,
347    pub raw: HashMap<String, u64>,
348}
349
350#[allow(clippy::unnecessary_wraps)]
351fn parse_memory_stat(s: String) -> Result<MemoryStat> {
352    let mut raw = HashMap::new();
353
354    for l in s.lines() {
355        let t: Vec<&str> = l.split(' ').collect();
356        if t.len() != 2 {
357            continue;
358        }
359        let n = t[1].trim().parse::<u64>();
360        if n.is_err() {
361            continue;
362        }
363
364        raw.insert(t[0].to_string(), n.unwrap());
365    }
366
367    Ok(MemoryStat {
368        cache: *raw.get("cache").unwrap_or(&0),
369        rss: *raw.get("rss").unwrap_or(&0),
370        rss_huge: *raw.get("rss_huge").unwrap_or(&0),
371        shmem: *raw.get("shmem").unwrap_or(&0),
372        mapped_file: *raw.get("mapped_file").unwrap_or(&0),
373        dirty: *raw.get("dirty").unwrap_or(&0),
374        writeback: *raw.get("writeback").unwrap_or(&0),
375        swap: *raw.get("swap").unwrap_or(&0),
376        pgpgin: *raw.get("pgpgin").unwrap_or(&0),
377        pgpgout: *raw.get("pgpgout").unwrap_or(&0),
378        pgfault: *raw.get("pgfault").unwrap_or(&0),
379        pgmajfault: *raw.get("pgmajfault").unwrap_or(&0),
380        inactive_anon: *raw.get("inactive_anon").unwrap_or(&0),
381        active_anon: *raw.get("active_anon").unwrap_or(&0),
382        inactive_file: *raw.get("inactive_file").unwrap_or(&0),
383        active_file: *raw.get("active_file").unwrap_or(&0),
384        unevictable: *raw.get("unevictable").unwrap_or(&0),
385        hierarchical_memory_limit: *raw.get("hierarchical_memory_limit").unwrap_or(&0) as i64,
386        hierarchical_memsw_limit: *raw.get("hierarchical_memsw_limit").unwrap_or(&0) as i64,
387        total_cache: *raw.get("total_cache").unwrap_or(&0),
388        total_rss: *raw.get("total_rss").unwrap_or(&0),
389        total_rss_huge: *raw.get("total_rss_huge").unwrap_or(&0),
390        total_shmem: *raw.get("total_shmem").unwrap_or(&0),
391        total_mapped_file: *raw.get("total_mapped_file").unwrap_or(&0),
392        total_dirty: *raw.get("total_dirty").unwrap_or(&0),
393        total_writeback: *raw.get("total_writeback").unwrap_or(&0),
394        total_swap: *raw.get("total_swap").unwrap_or(&0),
395        total_pgpgin: *raw.get("total_pgpgin").unwrap_or(&0),
396        total_pgpgout: *raw.get("total_pgpgout").unwrap_or(&0),
397        total_pgfault: *raw.get("total_pgfault").unwrap_or(&0),
398        total_pgmajfault: *raw.get("total_pgmajfault").unwrap_or(&0),
399        total_inactive_anon: *raw.get("total_inactive_anon").unwrap_or(&0),
400        total_active_anon: *raw.get("total_active_anon").unwrap_or(&0),
401        total_inactive_file: *raw.get("total_inactive_file").unwrap_or(&0),
402        total_active_file: *raw.get("total_active_file").unwrap_or(&0),
403        total_unevictable: *raw.get("total_unevictable").unwrap_or(&0),
404        raw,
405    })
406}
407
408/// Contains statistics about the current usage of memory and swap (together, not seperately) by
409/// the control group's tasks.
410#[derive(Debug)]
411#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
412pub struct MemSwap {
413    /// How many times the limit has been hit.
414    pub fail_cnt: u64,
415    /// Memory and swap usage limit in bytes.
416    pub limit_in_bytes: i64,
417    /// Current usage of memory and swap in bytes.
418    pub usage_in_bytes: u64,
419    /// The maximum observed usage of memory and swap in bytes.
420    pub max_usage_in_bytes: u64,
421}
422
423/// State of and statistics gathered by the kernel about the memory usage of the control group's
424/// tasks.
425#[derive(Debug)]
426#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
427pub struct Memory {
428    /// How many times the limit has been hit.
429    pub fail_cnt: u64,
430    /// The limit in bytes of the memory usage of the control group's tasks.
431    pub limit_in_bytes: i64,
432    /// The current usage of memory by the control group's tasks.
433    pub usage_in_bytes: u64,
434    /// The maximum observed usage of memory by the control group's tasks.
435    pub max_usage_in_bytes: u64,
436    /// Whether moving charges at immigrate is allowed.
437    pub move_charge_at_immigrate: u64,
438    /// Contains various statistics about the NUMA locality of the control group's tasks.
439    ///
440    /// The format of this field (as lifted from the kernel sources):
441    /// ```text
442    /// total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
443    /// file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
444    /// anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
445    /// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
446    /// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
447    /// ```
448    pub numa_stat: NumaStat,
449    /// Various statistics and control information about the Out Of Memory killer.
450    pub oom_control: OomControl,
451    /// Allows setting a limit to memory usage which is enforced when the system (note, _not_ the
452    /// control group) detects memory pressure.
453    pub soft_limit_in_bytes: i64,
454    /// Contains a wide array of statistics about the memory usage of the tasks in the control
455    /// group.
456    pub stat: MemoryStat,
457    /// Set the tendency of the kernel to swap out parts of the address space consumed by the
458    /// control group's tasks.
459    ///
460    /// Note that setting this to zero does *not* prevent swapping, use `mlock(2)` for that
461    /// purpose.
462    pub swappiness: u64,
463    /// If set, then under OOM conditions, the kernel will try to reclaim memory from the children
464    /// of the offending process too. By default, this is not allowed.
465    pub use_hierarchy: u64,
466}
467
468/// The current state of and gathered statistics about the kernel's memory usage for TCP-related
469/// data structures.
470#[derive(Debug)]
471#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
472pub struct Tcp {
473    /// How many times the limit has been hit.
474    pub fail_cnt: u64,
475    /// The limit in bytes of the memory usage of the kernel's TCP buffers by control group's
476    /// tasks.
477    pub limit_in_bytes: i64,
478    /// The current memory used by the kernel's TCP buffers related to these tasks.
479    pub usage_in_bytes: u64,
480    /// The observed maximum usage of memory by the kernel's TCP buffers (that originated from
481    /// these tasks).
482    pub max_usage_in_bytes: u64,
483}
484
485/// Gathered statistics and the current state of limitation of the kernel's memory usage. Note that
486/// this is per-cgroup, so the kernel can of course use more memory, but it will fail operations by
487/// these tasks if it would think that the limits here would be violated. It's important to note
488/// that interrupts in particular might not be able to enforce these limits.
489#[derive(Debug)]
490#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
491pub struct Kmem {
492    /// How many times the limit has been hit.
493    pub fail_cnt: u64,
494    /// The limit in bytes of the kernel memory used by the control group's tasks.
495    pub limit_in_bytes: i64,
496    /// The current usage of kernel memory used by the control group's tasks, in bytes.
497    pub usage_in_bytes: u64,
498    /// The maximum observed usage of kernel memory used by the control group's tasks, in bytes.
499    pub max_usage_in_bytes: u64,
500    /// Contains information about the memory usage of the kernel's caches, per control group.
501    pub slabinfo: String,
502}
503
504impl ControllerInternal for MemController {
505    fn control_type(&self) -> Controllers {
506        Controllers::Mem
507    }
508    fn get_path(&self) -> &PathBuf {
509        &self.path
510    }
511    fn get_path_mut(&mut self) -> &mut PathBuf {
512        &mut self.path
513    }
514    fn get_base(&self) -> &PathBuf {
515        &self.base
516    }
517
518    fn is_v2(&self) -> bool {
519        self.v2
520    }
521
522    fn apply(&self, res: &Resources) -> Result<()> {
523        // get the resources that apply to this controller
524        let memres: &MemoryResources = &res.memory;
525
526        update!(self, set_limit, memres.memory_hard_limit);
527        update!(self, set_soft_limit, memres.memory_soft_limit);
528        update!(self, set_kmem_limit, memres.kernel_memory_limit);
529        update!(self, set_memswap_limit, memres.memory_swap_limit);
530        update!(self, set_tcp_limit, memres.kernel_tcp_memory_limit);
531        update!(self, set_swappiness, memres.swappiness);
532
533        memres.attrs.iter().for_each(|(k, v)| {
534            let _ = self.set(k, v);
535        });
536
537        Ok(())
538    }
539}
540
541impl MemController {
542    /// Contructs a new `MemController` with `root` serving as the root of the control group.
543    pub fn new(root: PathBuf, v2: bool) -> Self {
544        Self {
545            base: root.clone(),
546            path: root,
547            v2,
548        }
549    }
550
551    // for v2
552    pub fn set_mem(&self, m: SetMemory) -> Result<()> {
553        let values = vec![
554            (m.high, "memory.high"),
555            (m.low, "memory.low"),
556            (m.max, "memory.max"),
557            (m.min, "memory.min"),
558        ];
559        for value in values {
560            let v = value.0;
561            let f = value.1;
562            if let Some(v) = v {
563                let v = v.to_string();
564                self.open_path(f, true).and_then(|mut file| {
565                    file.write_all(v.as_ref()).map_err(|e| {
566                        Error::with_cause(WriteFailed(f.to_string(), format!("{:?}", v)), e)
567                    })
568                })?;
569            }
570        }
571        Ok(())
572    }
573
574    // for v2
575    pub fn get_mem(&self) -> Result<SetMemory> {
576        let mut m: SetMemory = Default::default();
577        self.get_max_value("memory.high")
578            .map(|x| m.high = Some(x))?;
579        self.get_max_value("memory.low").map(|x| m.low = Some(x))?;
580        self.get_max_value("memory.max").map(|x| m.max = Some(x))?;
581        self.get_max_value("memory.min").map(|x| m.min = Some(x))?;
582
583        Ok(m)
584    }
585
586    fn memory_stat_v2(&self) -> Memory {
587        let set = self.get_mem().unwrap();
588
589        Memory {
590            fail_cnt: 0,
591            limit_in_bytes: set.max.unwrap().to_i64(),
592            usage_in_bytes: self
593                .open_path("memory.current", false)
594                .and_then(read_u64_from)
595                .unwrap_or(0),
596            max_usage_in_bytes: self
597                .open_path("memory.peak", false)
598                .and_then(read_u64_from)
599                .unwrap_or(0),
600            move_charge_at_immigrate: 0,
601            numa_stat: NumaStat::default(),
602            oom_control: OomControl::default(),
603            soft_limit_in_bytes: set.low.unwrap().to_i64(),
604            stat: self
605                .open_path("memory.stat", false)
606                .and_then(read_string_from)
607                .and_then(parse_memory_stat)
608                .unwrap_or_default(),
609            swappiness: self
610                .open_path("memory.swap.current", false)
611                .and_then(read_u64_from)
612                .unwrap_or(0),
613            use_hierarchy: 0,
614        }
615    }
616
617    /// Gathers overall statistics (and the current state of) about the memory usage of the control
618    /// group's tasks.
619    ///
620    /// See the individual fields for more explanation, and as always, remember to consult the
621    /// kernel Documentation and/or sources.
622    pub fn memory_stat(&self) -> Memory {
623        if self.v2 {
624            return self.memory_stat_v2();
625        }
626
627        Memory {
628            fail_cnt: self
629                .open_path("memory.failcnt", false)
630                .and_then(read_u64_from)
631                .unwrap_or(0),
632            limit_in_bytes: self
633                .open_path("memory.limit_in_bytes", false)
634                .and_then(read_i64_from)
635                .unwrap_or(0),
636            usage_in_bytes: self
637                .open_path("memory.usage_in_bytes", false)
638                .and_then(read_u64_from)
639                .unwrap_or(0),
640            max_usage_in_bytes: self
641                .open_path("memory.max_usage_in_bytes", false)
642                .and_then(read_u64_from)
643                .unwrap_or(0),
644            move_charge_at_immigrate: self
645                .open_path("memory.move_charge_at_immigrate", false)
646                .and_then(read_u64_from)
647                .unwrap_or(0),
648            numa_stat: self
649                .open_path("memory.numa_stat", false)
650                .and_then(read_string_from)
651                .and_then(parse_numa_stat)
652                .unwrap_or_default(),
653            oom_control: self
654                .open_path("memory.oom_control", false)
655                .and_then(read_string_from)
656                .and_then(parse_oom_control)
657                .unwrap_or_default(),
658            soft_limit_in_bytes: self
659                .open_path("memory.soft_limit_in_bytes", false)
660                .and_then(read_i64_from)
661                .unwrap_or(0),
662            stat: self
663                .open_path("memory.stat", false)
664                .and_then(read_string_from)
665                .and_then(parse_memory_stat)
666                .unwrap_or_default(),
667            swappiness: self
668                .open_path("memory.swappiness", false)
669                .and_then(read_u64_from)
670                .unwrap_or(0),
671            use_hierarchy: self
672                .open_path("memory.use_hierarchy", false)
673                .and_then(read_u64_from)
674                .unwrap_or(0),
675        }
676    }
677
678    /// Gathers information about the kernel memory usage of the control group's tasks.
679    pub fn kmem_stat(&self) -> Kmem {
680        Kmem {
681            fail_cnt: self
682                .open_path("memory.kmem.failcnt", false)
683                .and_then(read_u64_from)
684                .unwrap_or(0),
685            limit_in_bytes: self
686                .open_path("memory.kmem.limit_in_bytes", false)
687                .and_then(read_i64_from)
688                .unwrap_or(-1),
689            usage_in_bytes: self
690                .open_path("memory.kmem.usage_in_bytes", false)
691                .and_then(read_u64_from)
692                .unwrap_or(0),
693            max_usage_in_bytes: self
694                .open_path("memory.kmem.max_usage_in_bytes", false)
695                .and_then(read_u64_from)
696                .unwrap_or(0),
697            slabinfo: self
698                .open_path("memory.kmem.slabinfo", false)
699                .and_then(read_string_from)
700                .unwrap_or_default(),
701        }
702    }
703
704    /// Gathers information about the control group's kernel memory usage where said memory is
705    /// TCP-related.
706    pub fn kmem_tcp_stat(&self) -> Tcp {
707        Tcp {
708            fail_cnt: self
709                .open_path("memory.kmem.tcp.failcnt", false)
710                .and_then(read_u64_from)
711                .unwrap_or(0),
712            limit_in_bytes: self
713                .open_path("memory.kmem.tcp.limit_in_bytes", false)
714                .and_then(read_i64_from)
715                .unwrap_or(0),
716            usage_in_bytes: self
717                .open_path("memory.kmem.tcp.usage_in_bytes", false)
718                .and_then(read_u64_from)
719                .unwrap_or(0),
720            max_usage_in_bytes: self
721                .open_path("memory.kmem.tcp.max_usage_in_bytes", false)
722                .and_then(read_u64_from)
723                .unwrap_or(0),
724        }
725    }
726
727    pub fn memswap_v2(&self) -> MemSwap {
728        MemSwap {
729            fail_cnt: self
730                .open_path("memory.swap.events", false)
731                .and_then(flat_keyed_to_hashmap)
732                .map(|x| *x.get("fail").unwrap_or(&0) as u64)
733                .unwrap(),
734            limit_in_bytes: self
735                .open_path("memory.swap.max", false)
736                .and_then(read_i64_from)
737                .unwrap_or(0),
738            usage_in_bytes: self
739                .open_path("memory.swap.current", false)
740                .and_then(read_u64_from)
741                .unwrap_or(0),
742            max_usage_in_bytes: self
743                .open_path("memory.swap.peak", false)
744                .and_then(read_u64_from)
745                .unwrap_or(0),
746        }
747    }
748
749    /// Gathers information about the memory usage of the control group including the swap usage
750    /// (if any).
751    pub fn memswap(&self) -> MemSwap {
752        if self.v2 {
753            return self.memswap_v2();
754        }
755
756        MemSwap {
757            fail_cnt: self
758                .open_path("memory.memsw.failcnt", false)
759                .and_then(read_u64_from)
760                .unwrap_or(0),
761            limit_in_bytes: self
762                .open_path("memory.memsw.limit_in_bytes", false)
763                .and_then(read_i64_from)
764                .unwrap_or(0),
765            usage_in_bytes: self
766                .open_path("memory.memsw.usage_in_bytes", false)
767                .and_then(read_u64_from)
768                .unwrap_or(0),
769            max_usage_in_bytes: self
770                .open_path("memory.memsw.max_usage_in_bytes", false)
771                .and_then(read_u64_from)
772                .unwrap_or(0),
773        }
774    }
775
776    /// Reset the fail counter
777    pub fn reset_fail_count(&self) -> Result<()> {
778        self.open_path("memory.failcnt", true).and_then(|mut file| {
779            file.write_all("0".to_string().as_ref()).map_err(|e| {
780                Error::with_cause(
781                    WriteFailed("memory.failcnt".to_string(), "0".to_string()),
782                    e,
783                )
784            })
785        })
786    }
787
788    /// Reset the kernel memory fail counter
789    pub fn reset_kmem_fail_count(&self) -> Result<()> {
790        // Ignore kmem because there is no kmem in cgroup v2
791        if self.v2 {
792            return Ok(());
793        }
794
795        self.open_path("memory.kmem.failcnt", true)
796            .and_then(|mut file| {
797                file.write_all("0".to_string().as_ref()).map_err(|e| {
798                    Error::with_cause(
799                        WriteFailed("memory.kmem.failcnt".to_string(), "0".to_string()),
800                        e,
801                    )
802                })
803            })
804    }
805
806    /// Reset the TCP related fail counter
807    pub fn reset_tcp_fail_count(&self) -> Result<()> {
808        // Ignore kmem because there is no kmem in cgroup v2
809        if self.v2 {
810            return Ok(());
811        }
812
813        self.open_path("memory.kmem.tcp.failcnt", true)
814            .and_then(|mut file| {
815                file.write_all("0".to_string().as_ref()).map_err(|e| {
816                    Error::with_cause(
817                        WriteFailed("memory.kmem.tcp.failcnt".to_string(), "0".to_string()),
818                        e,
819                    )
820                })
821            })
822    }
823
824    /// Reset the memory+swap fail counter
825    pub fn reset_memswap_fail_count(&self) -> Result<()> {
826        self.open_path("memory.memsw.failcnt", true)
827            .and_then(|mut file| {
828                file.write_all("0".to_string().as_ref()).map_err(|e| {
829                    Error::with_cause(
830                        WriteFailed("memory.memsw.failcnt".to_string(), "0".to_string()),
831                        e,
832                    )
833                })
834            })
835    }
836
837    /// Reset the max memory usage recorded
838    pub fn reset_max_usage(&self) -> Result<()> {
839        self.open_path("memory.max_usage_in_bytes", true)
840            .and_then(|mut file| {
841                file.write_all("0".to_string().as_ref()).map_err(|e| {
842                    Error::with_cause(
843                        WriteFailed("memory.max_usage_in_bytes".to_string(), "0".to_string()),
844                        e,
845                    )
846                })
847            })
848    }
849
850    /// Set the memory usage limit of the control group, in bytes.
851    pub fn set_limit(&self, limit: i64) -> Result<()> {
852        let mut file_name = "memory.limit_in_bytes";
853        let mut limit_str = limit.to_string();
854        if self.v2 {
855            file_name = "memory.max";
856            if limit == -1 {
857                limit_str = "max".to_string();
858            }
859        }
860        self.open_path(file_name, true).and_then(|mut file| {
861            file.write_all(limit_str.as_ref())
862                .map_err(|e| Error::with_cause(WriteFailed(file_name.to_string(), limit_str), e))
863        })
864    }
865
866    /// Set the kernel memory limit of the control group, in bytes.
867    pub fn set_kmem_limit(&self, limit: i64) -> Result<()> {
868        // Ignore kmem because there is no kmem in cgroup v2
869        if self.v2 {
870            return Ok(());
871        }
872
873        self.open_path("memory.kmem.limit_in_bytes", true)
874            .and_then(|mut file| {
875                let r = file.write_all(limit.to_string().as_ref());
876                match r {
877                    Ok(()) => Ok(()),
878                    Err(ref e) if e.raw_os_error() == Some(libc::EOPNOTSUPP) => {
879                        warn!("memory.kmem.limit_in_bytes is unsupported by the kernel");
880                        Ok(())
881                    }
882                    Err(e) => Err(Error::with_cause(
883                        WriteFailed("memory.kmem.limit_in_bytes".to_string(), limit.to_string()),
884                        e,
885                    )),
886                }
887            })
888    }
889
890    /// Set the memory+swap limit of the control group, in bytes.
891    pub fn set_memswap_limit(&self, limit: i64) -> Result<()> {
892        let mut file_name = "memory.memsw.limit_in_bytes";
893        let mut limit_str = limit.to_string();
894        if self.v2 {
895            file_name = "memory.swap.max";
896            if limit == -1 {
897                limit_str = "max".to_string();
898            }
899        }
900        self.open_path(file_name, true).and_then(|mut file| {
901            file.write_all(limit_str.as_ref())
902                .map_err(|e| Error::with_cause(WriteFailed(file_name.to_string(), limit_str), e))
903        })
904    }
905
906    /// Set how much kernel memory can be used for TCP-related buffers by the control group.
907    pub fn set_tcp_limit(&self, limit: i64) -> Result<()> {
908        // Ignore kmem because there is no kmem in cgroup v2
909        if self.v2 {
910            return Ok(());
911        }
912
913        self.open_path("memory.kmem.tcp.limit_in_bytes", true)
914            .and_then(|mut file| {
915                file.write_all(limit.to_string().as_ref()).map_err(|e| {
916                    Error::with_cause(
917                        WriteFailed(
918                            "memory.kmem.tcp.limit_in_bytes".to_string(),
919                            limit.to_string(),
920                        ),
921                        e,
922                    )
923                })
924            })
925    }
926
927    /// Set the soft limit of the control group, in bytes.
928    ///
929    /// This limit is enforced when the system is nearing OOM conditions. Contrast this with the
930    /// hard limit, which is _always_ enforced.
931    pub fn set_soft_limit(&self, limit: i64) -> Result<()> {
932        let mut file_name = "memory.soft_limit_in_bytes";
933        if self.v2 {
934            file_name = "memory.low"
935        }
936        self.open_path(file_name, true).and_then(|mut file| {
937            file.write_all(limit.to_string().as_ref()).map_err(|e| {
938                Error::with_cause(WriteFailed(file_name.to_string(), limit.to_string()), e)
939            })
940        })
941    }
942
943    /// Set how likely the kernel is to swap out parts of the address space used by the control
944    /// group.
945    ///
946    /// Note that a value of zero does not imply that the process will not be swapped out.
947    pub fn set_swappiness(&self, swp: u64) -> Result<()> {
948        let mut file_name = "memory.swappiness";
949        if self.v2 {
950            file_name = "memory.swap.max"
951        }
952
953        self.open_path(file_name, true).and_then(|mut file| {
954            file.write_all(swp.to_string().as_ref()).map_err(|e| {
955                Error::with_cause(WriteFailed(file_name.to_string(), swp.to_string()), e)
956            })
957        })
958    }
959
960    pub fn disable_oom_killer(&self) -> Result<()> {
961        self.open_path("memory.oom_control", true)
962            .and_then(|mut file| {
963                file.write_all("1".to_string().as_ref()).map_err(|e| {
964                    Error::with_cause(
965                        WriteFailed("memory.oom_control".to_string(), "1".to_string()),
966                        e,
967                    )
968                })
969            })
970    }
971
972    pub fn register_oom_event(&self, key: &str) -> Result<Receiver<String>> {
973        if self.v2 {
974            events::notify_on_oom_v2(key, self.get_path())
975        } else {
976            events::notify_on_oom_v1(key, self.get_path())
977        }
978    }
979}
980
981impl ControllIdentifier for MemController {
982    fn controller_type() -> Controllers {
983        Controllers::Mem
984    }
985}
986
987impl CustomizedAttribute for MemController {}
988
989impl<'a> From<&'a Subsystem> for &'a MemController {
990    fn from(sub: &'a Subsystem) -> &'a MemController {
991        unsafe {
992            match sub {
993                Subsystem::Mem(c) => c,
994                _ => {
995                    assert_eq!(1, 0);
996                    let v = std::mem::MaybeUninit::uninit();
997                    v.assume_init()
998                }
999            }
1000        }
1001    }
1002}
1003
1004#[cfg(test)]
1005mod tests {
1006    use crate::memory::{
1007        parse_memory_stat, parse_numa_stat, parse_oom_control, MemoryStat, NumaStat, OomControl,
1008    };
1009
1010    static GOOD_VALUE: &str = "\
1011total=51189 N0=51189 N1=123
1012file=50175 N0=50175 N1=123
1013anon=1014 N0=1014 N1=123
1014unevictable=0 N0=0 N1=123
1015hierarchical_total=1628573 N0=1628573 N1=123
1016hierarchical_file=858151 N0=858151 N1=123
1017hierarchical_anon=770402 N0=770402 N1=123
1018hierarchical_unevictable=20 N0=20 N1=123
1019";
1020
1021    static GOOD_VALUE_NON_HIERARCHICAL: &str = "\
1022total=51189 N0=51189 N1=123
1023file=50175 N0=50175 N1=123
1024anon=1014 N0=1014 N1=123
1025unevictable=0 N0=0 N1=123
1026";
1027
1028    static GOOD_OOMCONTROL_VAL_1: &str = "\
1029oom_kill_disable 0
1030oom_kill 1337
1031";
1032
1033    static GOOD_OOMCONTROL_VAL_2: &str = "\
1034oom_kill_disable 0
1035under_oom 1
1036";
1037
1038    static GOOD_OOMCONTROL_VAL_3: &str = "\
1039oom_kill_disable 0
1040under_oom 1
1041oom_kill 1337
1042";
1043
1044    static GOOD_MEMORYSTAT_VAL: &str = "\
1045cache 178880512
1046rss 4206592
1047rss_huge 0
1048shmem 106496
1049mapped_file 7491584
1050dirty 114688
1051writeback 49152
1052swap 0
1053pgpgin 213928
1054pgpgout 169220
1055pgfault 87064
1056pgmajfault 202
1057inactive_anon 0
1058active_anon 4153344
1059inactive_file 84779008
1060active_file 94273536
1061unevictable 0
1062hierarchical_memory_limit 9223372036854771712
1063hierarchical_memsw_limit 9223372036854771712
1064total_cache 4200333312
1065total_rss 2927677440
1066total_rss_huge 0
1067total_shmem 590061568
1068total_mapped_file 1086164992
1069total_dirty 1769472
1070total_writeback 602112
1071total_swap 0
1072total_pgpgin 5267326291
1073total_pgpgout 5265586647
1074total_pgfault 9947902469
1075total_pgmajfault 25132
1076total_inactive_anon 585981952
1077total_active_anon 2928996352
1078total_inactive_file 1272135680
1079total_active_file 2338816000
1080total_unevictable 81920
1081";
1082
1083    #[test]
1084    fn test_parse_numa_stat() {
1085        let ok = parse_numa_stat(GOOD_VALUE.to_string()).unwrap();
1086        assert_eq!(
1087            ok,
1088            NumaStat {
1089                total_pages: 51189,
1090                total_pages_per_node: vec![51189, 123],
1091                file_pages: 50175,
1092                file_pages_per_node: vec![50175, 123],
1093                anon_pages: 1014,
1094                anon_pages_per_node: vec![1014, 123],
1095                unevictable_pages: 0,
1096                unevictable_pages_per_node: vec![0, 123],
1097
1098                hierarchical_total_pages: 1628573,
1099                hierarchical_total_pages_per_node: vec![1628573, 123],
1100                hierarchical_file_pages: 858151,
1101                hierarchical_file_pages_per_node: vec![858151, 123],
1102                hierarchical_anon_pages: 770402,
1103                hierarchical_anon_pages_per_node: vec![770402, 123],
1104                hierarchical_unevictable_pages: 20,
1105                hierarchical_unevictable_pages_per_node: vec![20, 123],
1106            }
1107        );
1108        let ok = parse_numa_stat(GOOD_VALUE_NON_HIERARCHICAL.to_string()).unwrap();
1109        assert_eq!(
1110            ok,
1111            NumaStat {
1112                total_pages: 51189,
1113                total_pages_per_node: vec![51189, 123],
1114                file_pages: 50175,
1115                file_pages_per_node: vec![50175, 123],
1116                anon_pages: 1014,
1117                anon_pages_per_node: vec![1014, 123],
1118                unevictable_pages: 0,
1119                unevictable_pages_per_node: vec![0, 123],
1120
1121                hierarchical_total_pages: 0,
1122                hierarchical_total_pages_per_node: vec![],
1123                hierarchical_file_pages: 0,
1124                hierarchical_file_pages_per_node: vec![],
1125                hierarchical_anon_pages: 0,
1126                hierarchical_anon_pages_per_node: vec![],
1127                hierarchical_unevictable_pages: 0,
1128                hierarchical_unevictable_pages_per_node: vec![],
1129            }
1130        );
1131    }
1132
1133    #[test]
1134    fn test_parse_oom_control() {
1135        let ok = parse_oom_control("".to_string()).unwrap();
1136        assert_eq!(
1137            ok,
1138            OomControl {
1139                oom_kill_disable: false,
1140                under_oom: false,
1141                oom_kill: 0,
1142            }
1143        );
1144        let ok = parse_oom_control(GOOD_OOMCONTROL_VAL_1.to_string()).unwrap();
1145        assert_eq!(
1146            ok,
1147            OomControl {
1148                oom_kill_disable: false,
1149                under_oom: false,
1150                oom_kill: 0,
1151            }
1152        );
1153        let ok = parse_oom_control(GOOD_OOMCONTROL_VAL_2.to_string()).unwrap();
1154        assert_eq!(
1155            ok,
1156            OomControl {
1157                oom_kill_disable: false,
1158                under_oom: true,
1159                oom_kill: 0,
1160            }
1161        );
1162        let ok = parse_oom_control(GOOD_OOMCONTROL_VAL_3.to_string()).unwrap();
1163        assert_eq!(
1164            ok,
1165            OomControl {
1166                oom_kill_disable: false,
1167                under_oom: true,
1168                oom_kill: 1337,
1169            }
1170        );
1171    }
1172
1173    #[test]
1174    fn test_parse_memory_stat() {
1175        let ok = parse_memory_stat(GOOD_MEMORYSTAT_VAL.to_string()).unwrap();
1176        let raw = ok.raw.clone();
1177        assert_eq!(
1178            ok,
1179            MemoryStat {
1180                cache: 178880512,
1181                rss: 4206592,
1182                rss_huge: 0,
1183                shmem: 106496,
1184                mapped_file: 7491584,
1185                dirty: 114688,
1186                writeback: 49152,
1187                swap: 0,
1188                pgpgin: 213928,
1189                pgpgout: 169220,
1190                pgfault: 87064,
1191                pgmajfault: 202,
1192                inactive_anon: 0,
1193                active_anon: 4153344,
1194                inactive_file: 84779008,
1195                active_file: 94273536,
1196                unevictable: 0,
1197                hierarchical_memory_limit: 9223372036854771712,
1198                hierarchical_memsw_limit: 9223372036854771712,
1199                total_cache: 4200333312,
1200                total_rss: 2927677440,
1201                total_rss_huge: 0,
1202                total_shmem: 590061568,
1203                total_mapped_file: 1086164992,
1204                total_dirty: 1769472,
1205                total_writeback: 602112,
1206                total_swap: 0,
1207                total_pgpgin: 5267326291,
1208                total_pgpgout: 5265586647,
1209                total_pgfault: 9947902469,
1210                total_pgmajfault: 25132,
1211                total_inactive_anon: 585981952,
1212                total_active_anon: 2928996352,
1213                total_inactive_file: 1272135680,
1214                total_active_file: 2338816000,
1215                total_unevictable: 81920,
1216                raw,
1217            }
1218        );
1219    }
1220}