cgroups_rs/fs/
memory.rs

1// Copyright (c) 2018 Levente Kurusa
2// Copyright (c) 2020 Ant Group
3//
4// SPDX-License-Identifier: Apache-2.0 or MIT
5//
6
7//! This module contains the implementation of the `memory` cgroup subsystem.
8//!
9//! See the Kernel's documentation for more information about this subsystem, found at:
10//!  [Documentation/cgroup-v1/memory.txt](https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt)
11use log::warn;
12use std::collections::HashMap;
13use std::io::Write;
14use std::path::PathBuf;
15use std::sync::mpsc::Receiver;
16
17use crate::fs::error::ErrorKind::*;
18use crate::fs::error::*;
19use crate::fs::events;
20use crate::fs::{read_i64_from, read_string_from, read_u64_from};
21
22use crate::fs::flat_keyed_to_hashmap;
23
24use crate::fs::{
25    ControllIdentifier, ControllerInternal, Controllers, CustomizedAttribute, MaxValue,
26    MemoryResources, Resources, Subsystem,
27};
28
29/// A controller that allows controlling the `memory` subsystem of a Cgroup.
30///
31/// In essence, using the memory controller, the user can gather statistics about the memory usage
32/// of the tasks in the control group. Additonally, one can also set powerful limits on their
33/// memory usage.
34#[derive(Debug, Clone)]
35pub struct MemController {
36    base: PathBuf,
37    path: PathBuf,
38    v2: bool,
39}
40
41#[derive(Default, Debug, PartialEq, Eq)]
42#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
43pub struct SetMemory {
44    pub low: Option<MaxValue>,
45    pub high: Option<MaxValue>,
46    pub min: Option<MaxValue>,
47    pub max: Option<MaxValue>,
48}
49
50/// Controls statistics and controls about the OOM killer operating in this control group.
51#[derive(Default, Debug, PartialEq, Eq)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub struct OomControl {
54    /// If true, the OOM killer has been disabled for the tasks in this control group.
55    pub oom_kill_disable: bool,
56    /// Is the OOM killer currently running for the tasks in the control group?
57    pub under_oom: bool,
58    /// How many tasks were killed by the OOM killer so far.
59    pub oom_kill: u64,
60}
61
62#[allow(clippy::unnecessary_wraps)]
63fn parse_oom_control(s: String) -> Result<OomControl> {
64    let spl = s.split_whitespace().collect::<Vec<_>>();
65
66    let oom_kill_disable = if spl.len() > 1 {
67        spl[1].parse::<u64>().unwrap() == 1
68    } else {
69        false
70    };
71
72    let under_oom = if spl.len() > 3 {
73        spl[3].parse::<u64>().unwrap() == 1
74    } else {
75        false
76    };
77
78    let oom_kill = if spl.len() > 5 {
79        spl[5].parse::<u64>().unwrap()
80    } else {
81        0
82    };
83
84    Ok(OomControl {
85        oom_kill_disable,
86        under_oom,
87        oom_kill,
88    })
89}
90
91/// Contains statistics about the NUMA locality of the control group's tasks.
92#[derive(Default, Debug, PartialEq, Eq)]
93#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
94pub struct NumaStat {
95    /// Total amount of pages used by the control group.
96    pub total_pages: u64,
97    /// Total amount of pages used by the control group, broken down by NUMA node.
98    pub total_pages_per_node: Vec<u64>,
99    /// Total amount of file pages used by the control group.
100    pub file_pages: u64,
101    /// Total amount of file pages used by the control group, broken down by NUMA node.
102    pub file_pages_per_node: Vec<u64>,
103    /// Total amount of anonymous pages used by the control group.
104    pub anon_pages: u64,
105    /// Total amount of anonymous pages used by the control group, broken down by NUMA node.
106    pub anon_pages_per_node: Vec<u64>,
107    /// Total amount of unevictable pages used by the control group.
108    pub unevictable_pages: u64,
109    /// Total amount of unevictable pages used by the control group, broken down by NUMA node.
110    pub unevictable_pages_per_node: Vec<u64>,
111
112    /// Same as `total_pages`, but includes the descedant control groups' number as well.
113    pub hierarchical_total_pages: u64,
114    /// Same as `total_pages_per_node`, but includes the descedant control groups' number as well.
115    pub hierarchical_total_pages_per_node: Vec<u64>,
116    /// Same as `file_pages`, but includes the descedant control groups' number as well.
117    pub hierarchical_file_pages: u64,
118    /// Same as `file_pages_per_node`, but includes the descedant control groups' number as well.
119    pub hierarchical_file_pages_per_node: Vec<u64>,
120    /// Same as `anon_pages`, but includes the descedant control groups' number as well.
121    pub hierarchical_anon_pages: u64,
122    /// Same as `anon_pages_per_node`, but includes the descedant control groups' number as well.
123    pub hierarchical_anon_pages_per_node: Vec<u64>,
124    /// Same as `unevictable`, but includes the descedant control groups' number as well.
125    pub hierarchical_unevictable_pages: u64,
126    /// Same as `unevictable_per_node`, but includes the descedant control groups' number as well.
127    pub hierarchical_unevictable_pages_per_node: Vec<u64>,
128}
129
130#[allow(clippy::unnecessary_wraps)]
131fn parse_numa_stat(s: String) -> Result<NumaStat> {
132    // Parse the number of nodes
133    let _nodes = (s.split_whitespace().count() - 8) / 8;
134    let mut ls = s.lines();
135    let total_line = ls.next().unwrap();
136    let file_line = ls.next().unwrap();
137    let anon_line = ls.next().unwrap();
138    let unevict_line = ls.next().unwrap();
139    let hier_total_line = ls.next().unwrap_or_default();
140    let hier_file_line = ls.next().unwrap_or_default();
141    let hier_anon_line = ls.next().unwrap_or_default();
142    let hier_unevict_line = ls.next().unwrap_or_default();
143
144    Ok(NumaStat {
145        total_pages: total_line.split([' ', '=']).collect::<Vec<_>>()[1]
146            .parse::<u64>()
147            .unwrap_or(0),
148        total_pages_per_node: {
149            let spl = &total_line.split(' ').collect::<Vec<_>>()[1..];
150            spl.iter()
151                .map(|x| {
152                    x.split('=').collect::<Vec<_>>()[1]
153                        .parse::<u64>()
154                        .unwrap_or(0)
155                })
156                .collect()
157        },
158        file_pages: file_line.split([' ', '=']).collect::<Vec<_>>()[1]
159            .parse::<u64>()
160            .unwrap_or(0),
161        file_pages_per_node: {
162            let spl = &file_line.split(' ').collect::<Vec<_>>()[1..];
163            spl.iter()
164                .map(|x| {
165                    x.split('=').collect::<Vec<_>>()[1]
166                        .parse::<u64>()
167                        .unwrap_or(0)
168                })
169                .collect()
170        },
171        anon_pages: anon_line.split([' ', '=']).collect::<Vec<_>>()[1]
172            .parse::<u64>()
173            .unwrap_or(0),
174        anon_pages_per_node: {
175            let spl = &anon_line.split(' ').collect::<Vec<_>>()[1..];
176            spl.iter()
177                .map(|x| {
178                    x.split('=').collect::<Vec<_>>()[1]
179                        .parse::<u64>()
180                        .unwrap_or(0)
181                })
182                .collect()
183        },
184        unevictable_pages: unevict_line.split([' ', '=']).collect::<Vec<_>>()[1]
185            .parse::<u64>()
186            .unwrap_or(0),
187        unevictable_pages_per_node: {
188            let spl = &unevict_line.split(' ').collect::<Vec<_>>()[1..];
189            spl.iter()
190                .map(|x| {
191                    x.split('=').collect::<Vec<_>>()[1]
192                        .parse::<u64>()
193                        .unwrap_or(0)
194                })
195                .collect()
196        },
197        hierarchical_total_pages: {
198            if !hier_total_line.is_empty() {
199                hier_total_line.split([' ', '=']).collect::<Vec<_>>()[1]
200                    .parse::<u64>()
201                    .unwrap_or(0)
202            } else {
203                0
204            }
205        },
206        hierarchical_total_pages_per_node: {
207            if !hier_total_line.is_empty() {
208                let spl = &hier_total_line.split(' ').collect::<Vec<_>>()[1..];
209                spl.iter()
210                    .map(|x| {
211                        x.split('=').collect::<Vec<_>>()[1]
212                            .parse::<u64>()
213                            .unwrap_or(0)
214                    })
215                    .collect()
216            } else {
217                Vec::new()
218            }
219        },
220        hierarchical_file_pages: {
221            if !hier_file_line.is_empty() {
222                hier_file_line.split([' ', '=']).collect::<Vec<_>>()[1]
223                    .parse::<u64>()
224                    .unwrap_or(0)
225            } else {
226                0
227            }
228        },
229        hierarchical_file_pages_per_node: {
230            if !hier_file_line.is_empty() {
231                let spl = &hier_file_line.split(' ').collect::<Vec<_>>()[1..];
232                spl.iter()
233                    .map(|x| {
234                        x.split('=').collect::<Vec<_>>()[1]
235                            .parse::<u64>()
236                            .unwrap_or(0)
237                    })
238                    .collect()
239            } else {
240                Vec::new()
241            }
242        },
243        hierarchical_anon_pages: {
244            if !hier_anon_line.is_empty() {
245                hier_anon_line.split([' ', '=']).collect::<Vec<_>>()[1]
246                    .parse::<u64>()
247                    .unwrap_or(0)
248            } else {
249                0
250            }
251        },
252        hierarchical_anon_pages_per_node: {
253            if !hier_anon_line.is_empty() {
254                let spl = &hier_anon_line.split(' ').collect::<Vec<_>>()[1..];
255                spl.iter()
256                    .map(|x| {
257                        x.split('=').collect::<Vec<_>>()[1]
258                            .parse::<u64>()
259                            .unwrap_or(0)
260                    })
261                    .collect()
262            } else {
263                Vec::new()
264            }
265        },
266        hierarchical_unevictable_pages: {
267            if !hier_unevict_line.is_empty() {
268                hier_unevict_line.split([' ', '=']).collect::<Vec<_>>()[1]
269                    .parse::<u64>()
270                    .unwrap_or(0)
271            } else {
272                0
273            }
274        },
275        hierarchical_unevictable_pages_per_node: {
276            if !hier_unevict_line.is_empty() {
277                let spl = &hier_unevict_line.split(' ').collect::<Vec<_>>()[1..];
278                spl.iter()
279                    .map(|x| {
280                        x.split('=').collect::<Vec<_>>()[1]
281                            .parse::<u64>()
282                            .unwrap_or(0)
283                    })
284                    .collect()
285            } else {
286                Vec::new()
287            }
288        },
289    })
290}
291
292#[derive(Default, Debug, PartialEq, Eq)]
293#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
294pub struct MemoryStat {
295    pub cache: u64,
296    pub rss: u64,
297    pub rss_huge: u64,
298    pub shmem: u64,
299    pub mapped_file: u64,
300    pub dirty: u64,
301    pub writeback: u64,
302    pub swap: u64,
303    pub pgpgin: u64,
304    pub pgpgout: u64,
305    pub pgfault: u64,
306    pub pgmajfault: u64,
307    pub inactive_anon: u64,
308    pub active_anon: u64,
309    pub inactive_file: u64,
310    pub active_file: u64,
311    pub unevictable: u64,
312    pub hierarchical_memory_limit: i64,
313    pub hierarchical_memsw_limit: i64,
314    pub total_cache: u64,
315    pub total_rss: u64,
316    pub total_rss_huge: u64,
317    pub total_shmem: u64,
318    pub total_mapped_file: u64,
319    pub total_dirty: u64,
320    pub total_writeback: u64,
321    pub total_swap: u64,
322    pub total_pgpgin: u64,
323    pub total_pgpgout: u64,
324    pub total_pgfault: u64,
325    pub total_pgmajfault: u64,
326    pub total_inactive_anon: u64,
327    pub total_active_anon: u64,
328    pub total_inactive_file: u64,
329    pub total_active_file: u64,
330    pub total_unevictable: u64,
331    pub raw: HashMap<String, u64>,
332}
333
334#[allow(clippy::unnecessary_wraps)]
335fn parse_memory_stat(s: String) -> Result<MemoryStat> {
336    let mut raw = HashMap::new();
337
338    for l in s.lines() {
339        let t: Vec<&str> = l.split(' ').collect();
340        if t.len() != 2 {
341            continue;
342        }
343        let n = t[1].trim().parse::<u64>();
344        if n.is_err() {
345            continue;
346        }
347
348        raw.insert(t[0].to_string(), n.unwrap());
349    }
350
351    Ok(MemoryStat {
352        cache: *raw.get("cache").unwrap_or(&0),
353        rss: *raw.get("rss").unwrap_or(&0),
354        rss_huge: *raw.get("rss_huge").unwrap_or(&0),
355        shmem: *raw.get("shmem").unwrap_or(&0),
356        mapped_file: *raw.get("mapped_file").unwrap_or(&0),
357        dirty: *raw.get("dirty").unwrap_or(&0),
358        writeback: *raw.get("writeback").unwrap_or(&0),
359        swap: *raw.get("swap").unwrap_or(&0),
360        pgpgin: *raw.get("pgpgin").unwrap_or(&0),
361        pgpgout: *raw.get("pgpgout").unwrap_or(&0),
362        pgfault: *raw.get("pgfault").unwrap_or(&0),
363        pgmajfault: *raw.get("pgmajfault").unwrap_or(&0),
364        inactive_anon: *raw.get("inactive_anon").unwrap_or(&0),
365        active_anon: *raw.get("active_anon").unwrap_or(&0),
366        inactive_file: *raw.get("inactive_file").unwrap_or(&0),
367        active_file: *raw.get("active_file").unwrap_or(&0),
368        unevictable: *raw.get("unevictable").unwrap_or(&0),
369        hierarchical_memory_limit: *raw.get("hierarchical_memory_limit").unwrap_or(&0) as i64,
370        hierarchical_memsw_limit: *raw.get("hierarchical_memsw_limit").unwrap_or(&0) as i64,
371        total_cache: *raw.get("total_cache").unwrap_or(&0),
372        total_rss: *raw.get("total_rss").unwrap_or(&0),
373        total_rss_huge: *raw.get("total_rss_huge").unwrap_or(&0),
374        total_shmem: *raw.get("total_shmem").unwrap_or(&0),
375        total_mapped_file: *raw.get("total_mapped_file").unwrap_or(&0),
376        total_dirty: *raw.get("total_dirty").unwrap_or(&0),
377        total_writeback: *raw.get("total_writeback").unwrap_or(&0),
378        total_swap: *raw.get("total_swap").unwrap_or(&0),
379        total_pgpgin: *raw.get("total_pgpgin").unwrap_or(&0),
380        total_pgpgout: *raw.get("total_pgpgout").unwrap_or(&0),
381        total_pgfault: *raw.get("total_pgfault").unwrap_or(&0),
382        total_pgmajfault: *raw.get("total_pgmajfault").unwrap_or(&0),
383        total_inactive_anon: *raw.get("total_inactive_anon").unwrap_or(&0),
384        total_active_anon: *raw.get("total_active_anon").unwrap_or(&0),
385        total_inactive_file: *raw.get("total_inactive_file").unwrap_or(&0),
386        total_active_file: *raw.get("total_active_file").unwrap_or(&0),
387        total_unevictable: *raw.get("total_unevictable").unwrap_or(&0),
388        raw,
389    })
390}
391
392/// Contains statistics about the current usage of memory and swap (together, not seperately) by
393/// the control group's tasks.
394#[derive(Debug)]
395#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
396pub struct MemSwap {
397    /// How many times the limit has been hit.
398    pub fail_cnt: u64,
399    /// Memory and swap usage limit in bytes.
400    pub limit_in_bytes: i64,
401    /// Current usage of memory and swap in bytes.
402    pub usage_in_bytes: u64,
403    /// The maximum observed usage of memory and swap in bytes.
404    pub max_usage_in_bytes: u64,
405}
406
407/// State of and statistics gathered by the kernel about the memory usage of the control group's
408/// tasks.
409#[derive(Debug)]
410#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
411pub struct Memory {
412    /// How many times the limit has been hit.
413    pub fail_cnt: u64,
414    /// The limit in bytes of the memory usage of the control group's tasks.
415    pub limit_in_bytes: i64,
416    /// The current usage of memory by the control group's tasks.
417    pub usage_in_bytes: u64,
418    /// The maximum observed usage of memory by the control group's tasks.
419    pub max_usage_in_bytes: u64,
420    /// Whether moving charges at immigrate is allowed.
421    pub move_charge_at_immigrate: u64,
422    /// Contains various statistics about the NUMA locality of the control group's tasks.
423    ///
424    /// The format of this field (as lifted from the kernel sources):
425    /// ```text
426    /// total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
427    /// file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
428    /// anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
429    /// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
430    /// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
431    /// ```
432    pub numa_stat: NumaStat,
433    /// Various statistics and control information about the Out Of Memory killer.
434    pub oom_control: OomControl,
435    /// Allows setting a limit to memory usage which is enforced when the system (note, _not_ the
436    /// control group) detects memory pressure.
437    pub soft_limit_in_bytes: i64,
438    /// Contains a wide array of statistics about the memory usage of the tasks in the control
439    /// group.
440    pub stat: MemoryStat,
441    /// Set the tendency of the kernel to swap out parts of the address space consumed by the
442    /// control group's tasks.
443    ///
444    /// Note that setting this to zero does *not* prevent swapping, use `mlock(2)` for that
445    /// purpose.
446    pub swappiness: u64,
447    /// If set, then under OOM conditions, the kernel will try to reclaim memory from the children
448    /// of the offending process too. By default, this is not allowed.
449    pub use_hierarchy: u64,
450}
451
452/// The current state of and gathered statistics about the kernel's memory usage for TCP-related
453/// data structures.
454#[derive(Debug)]
455#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
456pub struct Tcp {
457    /// How many times the limit has been hit.
458    pub fail_cnt: u64,
459    /// The limit in bytes of the memory usage of the kernel's TCP buffers by control group's
460    /// tasks.
461    pub limit_in_bytes: i64,
462    /// The current memory used by the kernel's TCP buffers related to these tasks.
463    pub usage_in_bytes: u64,
464    /// The observed maximum usage of memory by the kernel's TCP buffers (that originated from
465    /// these tasks).
466    pub max_usage_in_bytes: u64,
467}
468
469/// Gathered statistics and the current state of limitation of the kernel's memory usage. Note that
470/// this is per-cgroup, so the kernel can of course use more memory, but it will fail operations by
471/// these tasks if it would think that the limits here would be violated. It's important to note
472/// that interrupts in particular might not be able to enforce these limits.
473#[derive(Debug)]
474#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
475pub struct Kmem {
476    /// How many times the limit has been hit.
477    pub fail_cnt: u64,
478    /// The limit in bytes of the kernel memory used by the control group's tasks.
479    pub limit_in_bytes: i64,
480    /// The current usage of kernel memory used by the control group's tasks, in bytes.
481    pub usage_in_bytes: u64,
482    /// The maximum observed usage of kernel memory used by the control group's tasks, in bytes.
483    pub max_usage_in_bytes: u64,
484    /// Contains information about the memory usage of the kernel's caches, per control group.
485    pub slabinfo: String,
486}
487
488impl ControllerInternal for MemController {
489    fn control_type(&self) -> Controllers {
490        Controllers::Mem
491    }
492    fn get_path(&self) -> &PathBuf {
493        &self.path
494    }
495    fn get_path_mut(&mut self) -> &mut PathBuf {
496        &mut self.path
497    }
498    fn get_base(&self) -> &PathBuf {
499        &self.base
500    }
501
502    fn is_v2(&self) -> bool {
503        self.v2
504    }
505
506    fn apply(&self, res: &Resources) -> Result<()> {
507        // get the resources that apply to this controller
508        let memres: &MemoryResources = &res.memory;
509
510        update!(self, set_limit, memres.memory_hard_limit);
511        update!(self, set_soft_limit, memres.memory_soft_limit);
512        update!(self, set_kmem_limit, memres.kernel_memory_limit);
513        update!(self, set_memswap_limit, memres.memory_swap_limit);
514        update!(self, set_tcp_limit, memres.kernel_tcp_memory_limit);
515        update!(self, set_swappiness, memres.swappiness);
516
517        memres.attrs.iter().for_each(|(k, v)| {
518            let _ = self.set(k, v);
519        });
520
521        Ok(())
522    }
523}
524
525impl MemController {
526    /// Contructs a new `MemController` with `root` serving as the root of the control group.
527    pub fn new(point: PathBuf, root: PathBuf, v2: bool) -> Self {
528        Self {
529            base: root,
530            path: point,
531            v2,
532        }
533    }
534
535    // for v2
536    pub fn set_mem(&self, m: SetMemory) -> Result<()> {
537        let values = vec![
538            (m.high, "memory.high"),
539            (m.low, "memory.low"),
540            (m.max, "memory.max"),
541            (m.min, "memory.min"),
542        ];
543        for value in values {
544            let v = value.0;
545            let f = value.1;
546            if let Some(v) = v {
547                let v = v.to_string();
548                self.open_path(f, true).and_then(|mut file| {
549                    file.write_all(v.as_ref()).map_err(|e| {
550                        Error::with_cause(WriteFailed(f.to_string(), format!("{:?}", v)), e)
551                    })
552                })?;
553            }
554        }
555        Ok(())
556    }
557
558    // for v2
559    pub fn get_mem(&self) -> Result<SetMemory> {
560        let m = SetMemory {
561            high: self
562                .get_max_value("memory.high")
563                .map_or(Some(MaxValue::default()), Some),
564            low: self
565                .get_max_value("memory.low")
566                .map_or(Some(MaxValue::Value(0)), Some),
567            max: self
568                .get_max_value("memory.max")
569                .map_or(Some(MaxValue::default()), Some),
570            min: self
571                .get_max_value("memory.min")
572                .map_or(Some(MaxValue::Value(0)), Some),
573        };
574
575        Ok(m)
576    }
577
578    fn memory_stat_v2(&self) -> Memory {
579        // NOTE: get_mem() always returns T, but let's
580        // still do `unwrap_or` for safety.
581        let set = self.get_mem().unwrap_or(SetMemory {
582            low: Some(MaxValue::Value(0)),
583            high: Some(MaxValue::default()),
584            max: Some(MaxValue::default()),
585            min: Some(MaxValue::Value(0)),
586        });
587
588        Memory {
589            fail_cnt: 0,
590            limit_in_bytes: set.max.unwrap().to_i64(),
591            usage_in_bytes: self
592                .open_path("memory.current", false)
593                .and_then(read_u64_from)
594                .unwrap_or(0),
595            max_usage_in_bytes: self
596                .open_path("memory.peak", false)
597                .and_then(read_u64_from)
598                .unwrap_or(0),
599            move_charge_at_immigrate: 0,
600            numa_stat: NumaStat::default(),
601            oom_control: OomControl::default(),
602            soft_limit_in_bytes: set.low.unwrap().to_i64(),
603            stat: self
604                .open_path("memory.stat", false)
605                .and_then(read_string_from)
606                .and_then(parse_memory_stat)
607                .unwrap_or_default(),
608            swappiness: self
609                .open_path("memory.swap.current", false)
610                .and_then(read_u64_from)
611                .unwrap_or(0),
612            use_hierarchy: 0,
613        }
614    }
615
616    /// Gathers overall statistics (and the current state of) about the memory usage of the control
617    /// group's tasks.
618    ///
619    /// See the individual fields for more explanation, and as always, remember to consult the
620    /// kernel Documentation and/or sources.
621    pub fn memory_stat(&self) -> Memory {
622        if self.v2 {
623            return self.memory_stat_v2();
624        }
625
626        Memory {
627            fail_cnt: self
628                .open_path("memory.failcnt", false)
629                .and_then(read_u64_from)
630                .unwrap_or(0),
631            limit_in_bytes: self
632                .open_path("memory.limit_in_bytes", false)
633                .and_then(read_i64_from)
634                .unwrap_or(0),
635            usage_in_bytes: self
636                .open_path("memory.usage_in_bytes", false)
637                .and_then(read_u64_from)
638                .unwrap_or(0),
639            max_usage_in_bytes: self
640                .open_path("memory.max_usage_in_bytes", false)
641                .and_then(read_u64_from)
642                .unwrap_or(0),
643            move_charge_at_immigrate: self
644                .open_path("memory.move_charge_at_immigrate", false)
645                .and_then(read_u64_from)
646                .unwrap_or(0),
647            numa_stat: self
648                .open_path("memory.numa_stat", false)
649                .and_then(read_string_from)
650                .and_then(parse_numa_stat)
651                .unwrap_or_default(),
652            oom_control: self
653                .open_path("memory.oom_control", false)
654                .and_then(read_string_from)
655                .and_then(parse_oom_control)
656                .unwrap_or_default(),
657            soft_limit_in_bytes: self
658                .open_path("memory.soft_limit_in_bytes", false)
659                .and_then(read_i64_from)
660                .unwrap_or(0),
661            stat: self
662                .open_path("memory.stat", false)
663                .and_then(read_string_from)
664                .and_then(parse_memory_stat)
665                .unwrap_or_default(),
666            swappiness: self
667                .open_path("memory.swappiness", false)
668                .and_then(read_u64_from)
669                .unwrap_or(0),
670            use_hierarchy: self
671                .open_path("memory.use_hierarchy", false)
672                .and_then(read_u64_from)
673                .unwrap_or(0),
674        }
675    }
676
677    /// Gathers information about the kernel memory usage of the control group's tasks.
678    pub fn kmem_stat(&self) -> Kmem {
679        Kmem {
680            fail_cnt: self
681                .open_path("memory.kmem.failcnt", false)
682                .and_then(read_u64_from)
683                .unwrap_or(0),
684            limit_in_bytes: self
685                .open_path("memory.kmem.limit_in_bytes", false)
686                .and_then(read_i64_from)
687                .unwrap_or(-1),
688            usage_in_bytes: self
689                .open_path("memory.kmem.usage_in_bytes", false)
690                .and_then(read_u64_from)
691                .unwrap_or(0),
692            max_usage_in_bytes: self
693                .open_path("memory.kmem.max_usage_in_bytes", false)
694                .and_then(read_u64_from)
695                .unwrap_or(0),
696            slabinfo: self
697                .open_path("memory.kmem.slabinfo", false)
698                .and_then(read_string_from)
699                .unwrap_or_default(),
700        }
701    }
702
703    /// Gathers information about the control group's kernel memory usage where said memory is
704    /// TCP-related.
705    pub fn kmem_tcp_stat(&self) -> Tcp {
706        Tcp {
707            fail_cnt: self
708                .open_path("memory.kmem.tcp.failcnt", false)
709                .and_then(read_u64_from)
710                .unwrap_or(0),
711            limit_in_bytes: self
712                .open_path("memory.kmem.tcp.limit_in_bytes", false)
713                .and_then(read_i64_from)
714                .unwrap_or(0),
715            usage_in_bytes: self
716                .open_path("memory.kmem.tcp.usage_in_bytes", false)
717                .and_then(read_u64_from)
718                .unwrap_or(0),
719            max_usage_in_bytes: self
720                .open_path("memory.kmem.tcp.max_usage_in_bytes", false)
721                .and_then(read_u64_from)
722                .unwrap_or(0),
723        }
724    }
725
726    pub fn memswap_v2(&self) -> MemSwap {
727        MemSwap {
728            fail_cnt: self
729                .open_path("memory.swap.events", false)
730                .and_then(flat_keyed_to_hashmap)
731                .map(|x| *x.get("fail").unwrap_or(&0) as u64)
732                .unwrap_or(0),
733            limit_in_bytes: self
734                .open_path("memory.swap.max", false)
735                .and_then(read_i64_from)
736                .unwrap_or(0),
737            usage_in_bytes: self
738                .open_path("memory.swap.current", false)
739                .and_then(read_u64_from)
740                .unwrap_or(0),
741            max_usage_in_bytes: self
742                .open_path("memory.swap.peak", false)
743                .and_then(read_u64_from)
744                .unwrap_or(0),
745        }
746    }
747
748    /// Gathers information about the memory usage of the control group including the swap usage
749    /// (if any).
750    pub fn memswap(&self) -> MemSwap {
751        if self.v2 {
752            return self.memswap_v2();
753        }
754
755        MemSwap {
756            fail_cnt: self
757                .open_path("memory.memsw.failcnt", false)
758                .and_then(read_u64_from)
759                .unwrap_or(0),
760            limit_in_bytes: self
761                .open_path("memory.memsw.limit_in_bytes", false)
762                .and_then(read_i64_from)
763                .unwrap_or(0),
764            usage_in_bytes: self
765                .open_path("memory.memsw.usage_in_bytes", false)
766                .and_then(read_u64_from)
767                .unwrap_or(0),
768            max_usage_in_bytes: self
769                .open_path("memory.memsw.max_usage_in_bytes", false)
770                .and_then(read_u64_from)
771                .unwrap_or(0),
772        }
773    }
774
775    /// Reset the fail counter
776    pub fn reset_fail_count(&self) -> Result<()> {
777        self.open_path("memory.failcnt", true).and_then(|mut file| {
778            file.write_all("0".to_string().as_ref()).map_err(|e| {
779                Error::with_cause(
780                    WriteFailed("memory.failcnt".to_string(), "0".to_string()),
781                    e,
782                )
783            })
784        })
785    }
786
787    /// Reset the kernel memory fail counter
788    pub fn reset_kmem_fail_count(&self) -> Result<()> {
789        // Ignore kmem because there is no kmem in cgroup v2
790        if self.v2 {
791            return Ok(());
792        }
793
794        self.open_path("memory.kmem.failcnt", true)
795            .and_then(|mut file| {
796                file.write_all("0".to_string().as_ref()).map_err(|e| {
797                    Error::with_cause(
798                        WriteFailed("memory.kmem.failcnt".to_string(), "0".to_string()),
799                        e,
800                    )
801                })
802            })
803    }
804
805    /// Reset the TCP related fail counter
806    pub fn reset_tcp_fail_count(&self) -> Result<()> {
807        // Ignore kmem because there is no kmem in cgroup v2
808        if self.v2 {
809            return Ok(());
810        }
811
812        self.open_path("memory.kmem.tcp.failcnt", true)
813            .and_then(|mut file| {
814                file.write_all("0".to_string().as_ref()).map_err(|e| {
815                    Error::with_cause(
816                        WriteFailed("memory.kmem.tcp.failcnt".to_string(), "0".to_string()),
817                        e,
818                    )
819                })
820            })
821    }
822
823    /// Reset the memory+swap fail counter
824    pub fn reset_memswap_fail_count(&self) -> Result<()> {
825        self.open_path("memory.memsw.failcnt", true)
826            .and_then(|mut file| {
827                file.write_all("0".to_string().as_ref()).map_err(|e| {
828                    Error::with_cause(
829                        WriteFailed("memory.memsw.failcnt".to_string(), "0".to_string()),
830                        e,
831                    )
832                })
833            })
834    }
835
836    /// Reset the max memory usage recorded
837    pub fn reset_max_usage(&self) -> Result<()> {
838        self.open_path("memory.max_usage_in_bytes", true)
839            .and_then(|mut file| {
840                file.write_all("0".to_string().as_ref()).map_err(|e| {
841                    Error::with_cause(
842                        WriteFailed("memory.max_usage_in_bytes".to_string(), "0".to_string()),
843                        e,
844                    )
845                })
846            })
847    }
848
849    /// Set the memory usage limit of the control group, in bytes.
850    pub fn set_limit(&self, limit: i64) -> Result<()> {
851        let mut file_name = "memory.limit_in_bytes";
852        let mut limit_str = limit.to_string();
853        if self.v2 {
854            file_name = "memory.max";
855            if limit == -1 {
856                limit_str = "max".to_string();
857            }
858        }
859        self.open_path(file_name, true).and_then(|mut file| {
860            file.write_all(limit_str.as_ref())
861                .map_err(|e| Error::with_cause(WriteFailed(file_name.to_string(), limit_str), e))
862        })
863    }
864
865    /// Set the kernel memory limit of the control group, in bytes.
866    pub fn set_kmem_limit(&self, limit: i64) -> Result<()> {
867        // Ignore kmem because there is no kmem in cgroup v2
868        if self.v2 {
869            return Ok(());
870        }
871
872        self.open_path("memory.kmem.limit_in_bytes", true)
873            .and_then(|mut file| {
874                let r = file.write_all(limit.to_string().as_ref());
875                match r {
876                    Ok(()) => Ok(()),
877                    Err(ref e) if e.raw_os_error() == Some(libc::EOPNOTSUPP) => {
878                        warn!("memory.kmem.limit_in_bytes is unsupported by the kernel");
879                        Ok(())
880                    }
881                    Err(e) => Err(Error::with_cause(
882                        WriteFailed("memory.kmem.limit_in_bytes".to_string(), limit.to_string()),
883                        e,
884                    )),
885                }
886            })
887    }
888
889    /// Set the memory+swap limit of the control group, in bytes.
890    pub fn set_memswap_limit(&self, limit: i64) -> Result<()> {
891        let mut file_name = "memory.memsw.limit_in_bytes";
892        let mut limit_str = limit.to_string();
893        if self.v2 {
894            file_name = "memory.swap.max";
895            if limit == -1 {
896                limit_str = "max".to_string();
897            }
898        }
899        self.open_path(file_name, true).and_then(|mut file| {
900            file.write_all(limit_str.as_ref())
901                .map_err(|e| Error::with_cause(WriteFailed(file_name.to_string(), limit_str), e))
902        })
903    }
904
905    /// Set how much kernel memory can be used for TCP-related buffers by the control group.
906    pub fn set_tcp_limit(&self, limit: i64) -> Result<()> {
907        // Ignore kmem because there is no kmem in cgroup v2
908        if self.v2 {
909            return Ok(());
910        }
911
912        self.open_path("memory.kmem.tcp.limit_in_bytes", true)
913            .and_then(|mut file| {
914                file.write_all(limit.to_string().as_ref()).map_err(|e| {
915                    Error::with_cause(
916                        WriteFailed(
917                            "memory.kmem.tcp.limit_in_bytes".to_string(),
918                            limit.to_string(),
919                        ),
920                        e,
921                    )
922                })
923            })
924    }
925
926    /// Set the soft limit of the control group, in bytes.
927    ///
928    /// This limit is enforced when the system is nearing OOM conditions. Contrast this with the
929    /// hard limit, which is _always_ enforced.
930    pub fn set_soft_limit(&self, limit: i64) -> Result<()> {
931        let mut file_name = "memory.soft_limit_in_bytes";
932        if self.v2 {
933            file_name = "memory.low"
934        }
935        self.open_path(file_name, true).and_then(|mut file| {
936            file.write_all(limit.to_string().as_ref()).map_err(|e| {
937                Error::with_cause(WriteFailed(file_name.to_string(), limit.to_string()), e)
938            })
939        })
940    }
941
942    /// Set how likely the kernel is to swap out parts of the address space used by the control
943    /// group.
944    ///
945    /// Note that a value of zero does not imply that the process will not be swapped out.
946    pub fn set_swappiness(&self, swp: u64) -> Result<()> {
947        let mut file_name = "memory.swappiness";
948        if self.v2 {
949            file_name = "memory.swap.max"
950        }
951
952        self.open_path(file_name, true).and_then(|mut file| {
953            file.write_all(swp.to_string().as_ref()).map_err(|e| {
954                Error::with_cause(WriteFailed(file_name.to_string(), swp.to_string()), e)
955            })
956        })
957    }
958
959    pub fn disable_oom_killer(&self) -> Result<()> {
960        self.open_path("memory.oom_control", true)
961            .and_then(|mut file| {
962                file.write_all("1".to_string().as_ref()).map_err(|e| {
963                    Error::with_cause(
964                        WriteFailed("memory.oom_control".to_string(), "1".to_string()),
965                        e,
966                    )
967                })
968            })
969    }
970
971    pub fn register_oom_event(&self, key: &str) -> Result<Receiver<String>> {
972        if self.v2 {
973            events::notify_on_oom_v2(key, self.get_path())
974        } else {
975            events::notify_on_oom_v1(key, self.get_path())
976        }
977    }
978}
979
980impl ControllIdentifier for MemController {
981    fn controller_type() -> Controllers {
982        Controllers::Mem
983    }
984}
985
986impl CustomizedAttribute for MemController {}
987
988impl<'a> From<&'a Subsystem> for &'a MemController {
989    fn from(sub: &'a Subsystem) -> &'a MemController {
990        unsafe {
991            match sub {
992                Subsystem::Mem(c) => c,
993                _ => {
994                    assert_eq!(1, 0);
995                    let v = std::mem::MaybeUninit::uninit();
996                    v.assume_init()
997                }
998            }
999        }
1000    }
1001}
1002
1003#[cfg(test)]
1004mod tests {
1005    use crate::fs::memory::{
1006        parse_memory_stat, parse_numa_stat, parse_oom_control, MemoryStat, NumaStat, OomControl,
1007    };
1008
1009    static GOOD_VALUE: &str = "\
1010total=51189 N0=51189 N1=123
1011file=50175 N0=50175 N1=123
1012anon=1014 N0=1014 N1=123
1013unevictable=0 N0=0 N1=123
1014hierarchical_total=1628573 N0=1628573 N1=123
1015hierarchical_file=858151 N0=858151 N1=123
1016hierarchical_anon=770402 N0=770402 N1=123
1017hierarchical_unevictable=20 N0=20 N1=123
1018";
1019
1020    static GOOD_VALUE_NON_HIERARCHICAL: &str = "\
1021total=51189 N0=51189 N1=123
1022file=50175 N0=50175 N1=123
1023anon=1014 N0=1014 N1=123
1024unevictable=0 N0=0 N1=123
1025";
1026
1027    static GOOD_OOMCONTROL_VAL_1: &str = "\
1028oom_kill_disable 0
1029oom_kill 1337
1030";
1031
1032    static GOOD_OOMCONTROL_VAL_2: &str = "\
1033oom_kill_disable 0
1034under_oom 1
1035";
1036
1037    static GOOD_OOMCONTROL_VAL_3: &str = "\
1038oom_kill_disable 0
1039under_oom 1
1040oom_kill 1337
1041";
1042
1043    static GOOD_MEMORYSTAT_VAL: &str = "\
1044cache 178880512
1045rss 4206592
1046rss_huge 0
1047shmem 106496
1048mapped_file 7491584
1049dirty 114688
1050writeback 49152
1051swap 0
1052pgpgin 213928
1053pgpgout 169220
1054pgfault 87064
1055pgmajfault 202
1056inactive_anon 0
1057active_anon 4153344
1058inactive_file 84779008
1059active_file 94273536
1060unevictable 0
1061hierarchical_memory_limit 9223372036854771712
1062hierarchical_memsw_limit 9223372036854771712
1063total_cache 4200333312
1064total_rss 2927677440
1065total_rss_huge 0
1066total_shmem 590061568
1067total_mapped_file 1086164992
1068total_dirty 1769472
1069total_writeback 602112
1070total_swap 0
1071total_pgpgin 5267326291
1072total_pgpgout 5265586647
1073total_pgfault 9947902469
1074total_pgmajfault 25132
1075total_inactive_anon 585981952
1076total_active_anon 2928996352
1077total_inactive_file 1272135680
1078total_active_file 2338816000
1079total_unevictable 81920
1080";
1081
1082    #[test]
1083    fn test_parse_numa_stat() {
1084        let ok = parse_numa_stat(GOOD_VALUE.to_string()).unwrap();
1085        assert_eq!(
1086            ok,
1087            NumaStat {
1088                total_pages: 51189,
1089                total_pages_per_node: vec![51189, 123],
1090                file_pages: 50175,
1091                file_pages_per_node: vec![50175, 123],
1092                anon_pages: 1014,
1093                anon_pages_per_node: vec![1014, 123],
1094                unevictable_pages: 0,
1095                unevictable_pages_per_node: vec![0, 123],
1096
1097                hierarchical_total_pages: 1628573,
1098                hierarchical_total_pages_per_node: vec![1628573, 123],
1099                hierarchical_file_pages: 858151,
1100                hierarchical_file_pages_per_node: vec![858151, 123],
1101                hierarchical_anon_pages: 770402,
1102                hierarchical_anon_pages_per_node: vec![770402, 123],
1103                hierarchical_unevictable_pages: 20,
1104                hierarchical_unevictable_pages_per_node: vec![20, 123],
1105            }
1106        );
1107        let ok = parse_numa_stat(GOOD_VALUE_NON_HIERARCHICAL.to_string()).unwrap();
1108        assert_eq!(
1109            ok,
1110            NumaStat {
1111                total_pages: 51189,
1112                total_pages_per_node: vec![51189, 123],
1113                file_pages: 50175,
1114                file_pages_per_node: vec![50175, 123],
1115                anon_pages: 1014,
1116                anon_pages_per_node: vec![1014, 123],
1117                unevictable_pages: 0,
1118                unevictable_pages_per_node: vec![0, 123],
1119
1120                hierarchical_total_pages: 0,
1121                hierarchical_total_pages_per_node: vec![],
1122                hierarchical_file_pages: 0,
1123                hierarchical_file_pages_per_node: vec![],
1124                hierarchical_anon_pages: 0,
1125                hierarchical_anon_pages_per_node: vec![],
1126                hierarchical_unevictable_pages: 0,
1127                hierarchical_unevictable_pages_per_node: vec![],
1128            }
1129        );
1130    }
1131
1132    #[test]
1133    fn test_parse_oom_control() {
1134        let ok = parse_oom_control("".to_string()).unwrap();
1135        assert_eq!(
1136            ok,
1137            OomControl {
1138                oom_kill_disable: false,
1139                under_oom: false,
1140                oom_kill: 0,
1141            }
1142        );
1143        let ok = parse_oom_control(GOOD_OOMCONTROL_VAL_1.to_string()).unwrap();
1144        assert_eq!(
1145            ok,
1146            OomControl {
1147                oom_kill_disable: false,
1148                under_oom: false,
1149                oom_kill: 0,
1150            }
1151        );
1152        let ok = parse_oom_control(GOOD_OOMCONTROL_VAL_2.to_string()).unwrap();
1153        assert_eq!(
1154            ok,
1155            OomControl {
1156                oom_kill_disable: false,
1157                under_oom: true,
1158                oom_kill: 0,
1159            }
1160        );
1161        let ok = parse_oom_control(GOOD_OOMCONTROL_VAL_3.to_string()).unwrap();
1162        assert_eq!(
1163            ok,
1164            OomControl {
1165                oom_kill_disable: false,
1166                under_oom: true,
1167                oom_kill: 1337,
1168            }
1169        );
1170    }
1171
1172    #[test]
1173    fn test_parse_memory_stat() {
1174        let ok = parse_memory_stat(GOOD_MEMORYSTAT_VAL.to_string()).unwrap();
1175        let raw = ok.raw.clone();
1176        assert_eq!(
1177            ok,
1178            MemoryStat {
1179                cache: 178880512,
1180                rss: 4206592,
1181                rss_huge: 0,
1182                shmem: 106496,
1183                mapped_file: 7491584,
1184                dirty: 114688,
1185                writeback: 49152,
1186                swap: 0,
1187                pgpgin: 213928,
1188                pgpgout: 169220,
1189                pgfault: 87064,
1190                pgmajfault: 202,
1191                inactive_anon: 0,
1192                active_anon: 4153344,
1193                inactive_file: 84779008,
1194                active_file: 94273536,
1195                unevictable: 0,
1196                hierarchical_memory_limit: 9223372036854771712,
1197                hierarchical_memsw_limit: 9223372036854771712,
1198                total_cache: 4200333312,
1199                total_rss: 2927677440,
1200                total_rss_huge: 0,
1201                total_shmem: 590061568,
1202                total_mapped_file: 1086164992,
1203                total_dirty: 1769472,
1204                total_writeback: 602112,
1205                total_swap: 0,
1206                total_pgpgin: 5267326291,
1207                total_pgpgout: 5265586647,
1208                total_pgfault: 9947902469,
1209                total_pgmajfault: 25132,
1210                total_inactive_anon: 585981952,
1211                total_active_anon: 2928996352,
1212                total_inactive_file: 1272135680,
1213                total_active_file: 2338816000,
1214                total_unevictable: 81920,
1215                raw,
1216            }
1217        );
1218    }
1219}