below_model/
cgroup.rs

1// Copyright (c) Facebook, Inc. and its affiliates.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use super::*;
16
17/// Collection of all data local to the cgroup, e.g. its memory/io/cpu/pids usage.
18/// Nothing about child cgroups or siblings, and therefore "Single" in its name.
19#[::below_derive::queriable_derives]
20pub struct SingleCgroupModel {
21    pub name: String,
22    pub full_path: String,
23    pub inode_number: Option<u64>,
24    #[queriable(ignore)]
25    pub depth: u32,
26    #[queriable(subquery)]
27    #[queriable(preferred_name = props)]
28    pub properties: Option<CgroupProperties>,
29    #[queriable(subquery)]
30    pub cpu: Option<CgroupCpuModel>,
31    #[queriable(subquery)]
32    #[queriable(preferred_name = mem)]
33    pub memory: Option<CgroupMemoryModel>,
34    #[queriable(subquery)]
35    #[queriable(preferred_name = pids)]
36    pub pids: Option<CgroupPidsModel>,
37    #[queriable(subquery)]
38    #[queriable(preferred_name = io_details)]
39    pub io: Option<BTreeMap<String, CgroupIoModel>>,
40    #[queriable(subquery)]
41    #[queriable(preferred_name = io)]
42    pub io_total: Option<CgroupIoModel>,
43    #[queriable(subquery)]
44    pub pressure: Option<CgroupPressureModel>,
45    #[queriable(subquery)]
46    pub cgroup_stat: Option<CgroupStatModel>,
47    #[queriable(subquery)]
48    #[queriable(preferred_name = mem_numa)]
49    pub memory_numa_stat: Option<BTreeMap<u32, CgroupMemoryNumaModel>>,
50}
51
52/// A model that represents a cgroup subtree. Each instance is a node that uses
53/// the "data" field to represent local data. Otherwise mixing hierarchy and
54/// data makes it hard to define a Field Id type that queries nested cgroups.
55#[derive(Clone, Debug, Default, Serialize, Deserialize)]
56pub struct CgroupModel {
57    pub data: SingleCgroupModel,
58    pub children: BTreeSet<CgroupModel>,
59    /// Total number of cgroups under this subtree, including self
60    pub count: u32,
61    /// Indicate if such cgroup is created
62    pub recreate_flag: bool,
63}
64
65/// Queries a specific SingleCgroupModel inside a CgroupModel tree.
66/// Its String representation looks like this:
67///     path:/system.slice/foo.service/.cpu.usage_pct
68/// The path parameter starts with `path:` and ends with `/.`. This works
69/// because SingleCgroupModelFieldId does not contain slash.
70/// The path is used to drill into the Cgroup Model tree. If Vec empty, the
71/// current CgroupModel is selected and queried with the subquery_id.
72/// The path is optional in parsing and converting to String.
73pub type CgroupModelFieldId = QueriableContainerFieldId<CgroupModel>;
74
75#[derive(Clone, Debug, PartialEq)]
76pub struct CgroupPath {
77    pub path: Vec<String>,
78}
79
80impl std::fmt::Display for CgroupPath {
81    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82        write!(f, "path:/{}/", self.path.join("/"))
83    }
84}
85
86impl FromStr for CgroupPath {
87    type Err = anyhow::Error;
88    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
89        if !s.starts_with("path:/") {
90            return Err(anyhow!("Path is not prefixed with `path:/`: {}", s));
91        }
92        Ok(Self {
93            path: s["path:/".len()..]
94                .split('/')
95                .filter(|part| !part.is_empty())
96                .map(|part| part.to_owned())
97                .collect(),
98        })
99    }
100}
101
102impl QueriableContainer for CgroupModel {
103    type Idx = CgroupPath;
104    type SubqueryId = SingleCgroupModelFieldId;
105    const IDX_PLACEHOLDER: &'static str = "[path:/<cgroup_path>/.]";
106    fn split(s: &str) -> Option<(&str, &str)> {
107        let idx_end = s.rfind("/.")?;
108        Some((&s[..idx_end + 1], &s[idx_end + 2..]))
109    }
110    fn get_item(&self, idx: &Self::Idx) -> Option<&SingleCgroupModel> {
111        self.get_by_path_iter(idx.path.iter())
112            .map(|model| &model.data)
113    }
114}
115
116impl core::borrow::Borrow<str> for CgroupModel {
117    fn borrow(&self) -> &str {
118        &self.data.name
119    }
120}
121
122// We implement equality and ordering based on the cgroup name only so
123// CgroupModel can be stored in a BTreeSet
124impl Ord for CgroupModel {
125    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
126        self.data.name.cmp(&other.data.name)
127    }
128}
129
130impl PartialOrd for CgroupModel {
131    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
132        Some(self.cmp(other))
133    }
134}
135
136impl PartialEq for CgroupModel {
137    fn eq(&self, other: &Self) -> bool {
138        self.data.name == other.data.name
139    }
140}
141
142impl Eq for CgroupModel {}
143
144impl CgroupModel {
145    pub fn new(
146        name: String,
147        full_path: String,
148        depth: u32,
149        sample: &CgroupSample,
150        last: Option<(&CgroupSample, Duration)>,
151    ) -> CgroupModel {
152        let last_if_inode_matches =
153            last.and_then(|(s, d)| match (s.inode_number, sample.inode_number) {
154                (Some(prev_inode), Some(current_inode)) if prev_inode == current_inode => {
155                    Some((s, d))
156                }
157                (None, None) => Some((s, d)),
158                _ => None,
159            });
160        let properties = Some(CgroupProperties::new(sample));
161        let (cpu, io, io_total, recreate_flag) = if let Some((last, delta)) = last_if_inode_matches
162        {
163            // We have cumulative data, create cpu, io models
164            let cpu = match (last.cpu_stat.as_ref(), sample.cpu_stat.as_ref()) {
165                (Some(begin), Some(end)) => Some(CgroupCpuModel::new(begin, end, delta)),
166                _ => None,
167            };
168            let io = match (last.io_stat.as_ref(), sample.io_stat.as_ref()) {
169                (Some(begin), Some(end)) => Some(
170                    end.iter()
171                        .filter_map(|(device_name, end_io_stat)| {
172                            begin.get(device_name).map(|begin_io_stat| {
173                                (
174                                    device_name.clone(),
175                                    CgroupIoModel::new(begin_io_stat, end_io_stat, delta),
176                                )
177                            })
178                        })
179                        .collect::<BTreeMap<String, CgroupIoModel>>(),
180                ),
181                _ => None,
182            };
183            let io_total = io.as_ref().map(|io_map| {
184                io_map
185                    .iter()
186                    .fold(CgroupIoModel::empty(), |acc, (_, model)| acc + model)
187            });
188
189            (cpu, io, io_total, false)
190        } else {
191            // No cumulative data or inode number is different
192            (None, None, None, last.is_some())
193        };
194
195        let memory = Some(CgroupMemoryModel::new(sample, last));
196
197        let pids = Some(CgroupPidsModel::new(sample));
198
199        let pressure = sample.pressure.as_ref().map(CgroupPressureModel::new);
200
201        let cgroup_stat = sample.cgroup_stat.as_ref().map(CgroupStatModel::new);
202
203        let memory_numa_stat = {
204            sample.memory_numa_stat.as_ref().map(|end_numa_nodes| {
205                let begin_numa_nodes = last_if_inode_matches.and_then(|(s, d)| {
206                    s.memory_numa_stat
207                        .as_ref()
208                        .map(|numa_nodes| (numa_nodes, d))
209                });
210                end_numa_nodes
211                    .iter()
212                    .map(|(node_id, stat)| {
213                        let begin_numa_stat = begin_numa_nodes
214                            .and_then(|(nodes, d)| nodes.get(node_id).map(|stat| (stat, d)));
215                        (*node_id, CgroupMemoryNumaModel::new(stat, begin_numa_stat))
216                    })
217                    .collect()
218            })
219        };
220
221        // recursively calculate view of children
222        // `children` is optional, but we treat it the same as an empty map
223        let empty = BTreeMap::new();
224        let children = sample
225            .children
226            .as_ref()
227            .unwrap_or(&empty)
228            .iter()
229            .map(|(child_name, child_sample)| {
230                CgroupModel::new(
231                    child_name.clone(),
232                    format!("{}/{}", full_path, child_name),
233                    depth + 1,
234                    child_sample,
235                    last.and_then(|(last, delta)| {
236                        last.children
237                            .as_ref()
238                            .unwrap_or(&empty)
239                            .get(child_name)
240                            .map(|child_last| (child_last, delta))
241                    }),
242                )
243            })
244            .collect::<BTreeSet<CgroupModel>>();
245        let nr_descendants: u32 = children.iter().fold(0, |acc, c| acc + c.count);
246        CgroupModel {
247            data: SingleCgroupModel {
248                name,
249                full_path,
250                inode_number: sample.inode_number.map(|ino| ino as u64),
251                properties,
252                cpu,
253                memory,
254                pids,
255                io,
256                io_total,
257                pressure,
258                depth,
259                cgroup_stat,
260                memory_numa_stat,
261            },
262            children,
263            count: nr_descendants + 1,
264            recreate_flag,
265        }
266    }
267
268    pub fn aggr_top_level_val(mut self) -> Self {
269        if let Some(memory) = &self.data.memory {
270            // If root model has the value, return it directly
271            if memory.total.is_some() {
272                return self;
273            }
274        }
275        // Manually aggregate specified fields from children
276        let mut aggregated_memory = CgroupMemoryModel::default();
277        for child in &self.children {
278            if let Some(child_memory) = &child.data.memory {
279                aggregated_memory.total = opt_add(aggregated_memory.total, child_memory.total);
280                aggregated_memory.swap = opt_add(aggregated_memory.swap, child_memory.swap);
281                aggregated_memory.events_low =
282                    opt_add(aggregated_memory.events_low, child_memory.events_low);
283                aggregated_memory.events_high =
284                    opt_add(aggregated_memory.events_high, child_memory.events_high);
285                aggregated_memory.events_max =
286                    opt_add(aggregated_memory.events_max, child_memory.events_max);
287                aggregated_memory.events_oom =
288                    opt_add(aggregated_memory.events_oom, child_memory.events_oom);
289                aggregated_memory.events_oom_kill = opt_add(
290                    aggregated_memory.events_oom_kill,
291                    child_memory.events_oom_kill,
292                );
293            }
294        }
295        // Assign aggregated values to root memory model
296        if let Some(memory) = &mut self.data.memory {
297            memory.total = aggregated_memory.total;
298            memory.swap = aggregated_memory.swap;
299            memory.events_low = aggregated_memory.events_low;
300            memory.events_high = aggregated_memory.events_high;
301            memory.events_max = aggregated_memory.events_max;
302            memory.events_oom = aggregated_memory.events_oom;
303            memory.events_oom_kill = aggregated_memory.events_oom_kill;
304        }
305        self
306    }
307
308    fn get_by_path_iter(
309        &self,
310        mut path: impl Iterator<Item = impl AsRef<str>>,
311    ) -> Option<&CgroupModel> {
312        path.try_fold(self, |cur, p| cur.children.get(p.as_ref()))
313    }
314
315    pub fn get_by_path_str(&self, path: &str) -> Option<&CgroupModel> {
316        self.get_by_path_iter(path.split('/').filter(|x| !x.is_empty()))
317    }
318}
319
320impl Nameable for CgroupModel {
321    fn name() -> &'static str {
322        "cgroup"
323    }
324}
325
326impl Recursive for SingleCgroupModel {
327    fn get_depth(&self) -> usize {
328        self.depth as usize
329    }
330}
331
332impl Nameable for SingleCgroupModel {
333    fn name() -> &'static str {
334        "cgroup"
335    }
336}
337
338#[::below_derive::queriable_derives]
339pub struct CgroupCpuModel {
340    pub usage_pct: Option<f64>,
341    pub user_pct: Option<f64>,
342    pub system_pct: Option<f64>,
343    pub nr_periods_per_sec: Option<f64>,
344    pub nr_throttled_per_sec: Option<f64>,
345    pub throttled_pct: Option<f64>,
346}
347
348impl CgroupCpuModel {
349    pub fn new(
350        begin: &cgroupfs::CpuStat,
351        end: &cgroupfs::CpuStat,
352        delta: Duration,
353    ) -> CgroupCpuModel {
354        CgroupCpuModel {
355            usage_pct: usec_pct!(begin.usage_usec, end.usage_usec, delta),
356            user_pct: usec_pct!(begin.user_usec, end.user_usec, delta),
357            system_pct: usec_pct!(begin.system_usec, end.system_usec, delta),
358            nr_periods_per_sec: count_per_sec!(begin.nr_periods, end.nr_periods, delta),
359            nr_throttled_per_sec: count_per_sec!(begin.nr_throttled, end.nr_throttled, delta),
360            throttled_pct: usec_pct!(begin.throttled_usec, end.throttled_usec, delta),
361        }
362    }
363}
364
365#[::below_derive::queriable_derives]
366pub struct CgroupStatModel {
367    pub nr_descendants: Option<u32>,
368    pub nr_dying_descendants: Option<u32>,
369}
370
371impl CgroupStatModel {
372    pub fn new(cgroup_stat: &cgroupfs::CgroupStat) -> CgroupStatModel {
373        CgroupStatModel {
374            nr_descendants: cgroup_stat.nr_descendants,
375            nr_dying_descendants: cgroup_stat.nr_dying_descendants,
376        }
377    }
378}
379
380#[::below_derive::queriable_derives]
381pub struct CgroupIoModel {
382    pub rbytes_per_sec: Option<f64>,
383    pub wbytes_per_sec: Option<f64>,
384    pub rios_per_sec: Option<f64>,
385    pub wios_per_sec: Option<f64>,
386    pub dbytes_per_sec: Option<f64>,
387    pub dios_per_sec: Option<f64>,
388    pub rwbytes_per_sec: Option<f64>,
389    pub cost_usage_pct: Option<f64>,
390    pub cost_wait_pct: Option<f64>,
391    pub cost_indebt_pct: Option<f64>,
392    pub cost_indelay_pct: Option<f64>,
393}
394
395impl CgroupIoModel {
396    pub fn new(begin: &cgroupfs::IoStat, end: &cgroupfs::IoStat, delta: Duration) -> CgroupIoModel {
397        let rbytes_per_sec = count_per_sec!(begin.rbytes, end.rbytes, delta);
398        let wbytes_per_sec = count_per_sec!(begin.wbytes, end.wbytes, delta);
399        let rwbytes_per_sec = opt_add(rbytes_per_sec, wbytes_per_sec);
400        CgroupIoModel {
401            rbytes_per_sec,
402            wbytes_per_sec,
403            rios_per_sec: count_per_sec!(begin.rios, end.rios, delta),
404            wios_per_sec: count_per_sec!(begin.wios, end.wios, delta),
405            dbytes_per_sec: count_per_sec!(begin.dbytes, end.dbytes, delta),
406            dios_per_sec: count_per_sec!(begin.dios, end.dios, delta),
407            rwbytes_per_sec,
408            cost_usage_pct: usec_pct!(begin.cost_usage, end.cost_usage, delta),
409            cost_wait_pct: usec_pct!(begin.cost_wait, end.cost_wait, delta),
410            cost_indebt_pct: usec_pct!(begin.cost_indebt, end.cost_indebt, delta),
411            cost_indelay_pct: usec_pct!(begin.cost_indelay, end.cost_indelay, delta),
412        }
413    }
414
415    pub fn empty() -> CgroupIoModel {
416        // If io.stat file is empty, it means cgroup has no I/O at all. In that
417        // case we default to zero instead of None.
418        CgroupIoModel {
419            rbytes_per_sec: Some(0.0),
420            wbytes_per_sec: Some(0.0),
421            rios_per_sec: Some(0.0),
422            wios_per_sec: Some(0.0),
423            dbytes_per_sec: Some(0.0),
424            dios_per_sec: Some(0.0),
425            rwbytes_per_sec: Some(0.0),
426            cost_usage_pct: Some(0.0),
427            cost_wait_pct: Some(0.0),
428            cost_indebt_pct: Some(0.0),
429            cost_indelay_pct: Some(0.0),
430        }
431    }
432}
433
434impl std::ops::Add<&CgroupIoModel> for CgroupIoModel {
435    type Output = Self;
436
437    fn add(self, other: &Self) -> Self {
438        Self {
439            rbytes_per_sec: opt_add(self.rbytes_per_sec, other.rbytes_per_sec),
440            wbytes_per_sec: opt_add(self.wbytes_per_sec, other.wbytes_per_sec),
441            rios_per_sec: opt_add(self.rios_per_sec, other.rios_per_sec),
442            wios_per_sec: opt_add(self.wios_per_sec, other.wios_per_sec),
443            dbytes_per_sec: opt_add(self.dbytes_per_sec, other.dbytes_per_sec),
444            dios_per_sec: opt_add(self.dios_per_sec, other.dios_per_sec),
445            rwbytes_per_sec: opt_add(self.rwbytes_per_sec, other.rwbytes_per_sec),
446            cost_usage_pct: opt_add(self.cost_usage_pct, other.cost_usage_pct),
447            cost_wait_pct: opt_add(self.cost_wait_pct, other.cost_wait_pct),
448            cost_indebt_pct: opt_add(self.cost_indebt_pct, other.cost_indebt_pct),
449            cost_indelay_pct: opt_add(self.cost_indelay_pct, other.cost_indelay_pct),
450        }
451    }
452}
453
454#[::below_derive::queriable_derives]
455pub struct CgroupMemoryModel {
456    pub total: Option<u64>,
457    pub swap: Option<u64>,
458    pub anon: Option<u64>,
459    pub file: Option<u64>,
460    pub kernel: Option<u64>,
461    pub kernel_stack: Option<u64>,
462    pub slab: Option<u64>,
463    pub sock: Option<u64>,
464    pub shmem: Option<u64>,
465    pub zswap: Option<u64>,
466    pub zswapped: Option<u64>,
467    pub file_mapped: Option<u64>,
468    pub file_dirty: Option<u64>,
469    pub file_writeback: Option<u64>,
470    pub anon_thp: Option<u64>,
471    pub inactive_anon: Option<u64>,
472    pub active_anon: Option<u64>,
473    pub inactive_file: Option<u64>,
474    pub active_file: Option<u64>,
475    pub unevictable: Option<u64>,
476    pub slab_reclaimable: Option<u64>,
477    pub slab_unreclaimable: Option<u64>,
478    pub pgfault: Option<u64>,
479    pub pgmajfault: Option<u64>,
480    pub workingset_refault_anon: Option<u64>,
481    pub workingset_refault_file: Option<u64>,
482    pub workingset_activate_anon: Option<u64>,
483    pub workingset_activate_file: Option<u64>,
484    pub workingset_restore_anon: Option<u64>,
485    pub workingset_restore_file: Option<u64>,
486    pub workingset_nodereclaim: Option<u64>,
487    pub pgrefill: Option<u64>,
488    pub pgscan: Option<u64>,
489    pub pgsteal: Option<u64>,
490    pub pgactivate: Option<u64>,
491    pub pgdeactivate: Option<u64>,
492    pub pglazyfree: Option<u64>,
493    pub pglazyfreed: Option<u64>,
494    pub thp_fault_alloc: Option<u64>,
495    pub thp_collapse_alloc: Option<u64>,
496    pub events_low: Option<u64>,
497    pub events_high: Option<u64>,
498    pub events_max: Option<u64>,
499    pub events_oom: Option<u64>,
500    pub events_oom_kill: Option<u64>,
501    pub events_local_low: Option<u64>,
502    pub events_local_high: Option<u64>,
503    pub events_local_max: Option<u64>,
504    pub events_local_oom: Option<u64>,
505    pub events_local_oom_kill: Option<u64>,
506}
507
508impl std::ops::Add for CgroupMemoryModel {
509    type Output = Self;
510
511    fn add(self, other: Self) -> Self::Output {
512        Self {
513            total: opt_add(self.total, other.total),
514            swap: opt_add(self.swap, other.swap),
515            anon: opt_add(self.anon, other.anon),
516            file: opt_add(self.file, other.file),
517            kernel: opt_add(self.kernel, other.kernel),
518            kernel_stack: opt_add(self.kernel_stack, other.kernel_stack),
519            slab: opt_add(self.slab, other.slab),
520            sock: opt_add(self.sock, other.sock),
521            shmem: opt_add(self.shmem, other.shmem),
522            zswap: opt_add(self.zswap, other.zswap),
523            zswapped: opt_add(self.zswapped, other.zswapped),
524            file_mapped: opt_add(self.file_mapped, other.file_mapped),
525            file_dirty: opt_add(self.file_dirty, other.file_dirty),
526            file_writeback: opt_add(self.file_writeback, other.file_writeback),
527            anon_thp: opt_add(self.anon_thp, other.anon_thp),
528            inactive_anon: opt_add(self.inactive_anon, other.inactive_anon),
529            active_anon: opt_add(self.active_anon, other.active_anon),
530            inactive_file: opt_add(self.inactive_file, other.inactive_file),
531            active_file: opt_add(self.active_file, other.active_file),
532            unevictable: opt_add(self.unevictable, other.unevictable),
533            slab_reclaimable: opt_add(self.slab_reclaimable, other.slab_reclaimable),
534            slab_unreclaimable: opt_add(self.slab_unreclaimable, other.slab_unreclaimable),
535            pgfault: opt_add(self.pgfault, other.pgfault),
536            pgmajfault: opt_add(self.pgmajfault, other.pgmajfault),
537            workingset_refault_anon: opt_add(
538                self.workingset_refault_anon,
539                other.workingset_refault_anon,
540            ),
541            workingset_refault_file: opt_add(
542                self.workingset_refault_file,
543                other.workingset_refault_file,
544            ),
545            workingset_activate_anon: opt_add(
546                self.workingset_activate_anon,
547                other.workingset_activate_anon,
548            ),
549            workingset_activate_file: opt_add(
550                self.workingset_activate_file,
551                other.workingset_activate_file,
552            ),
553            workingset_restore_anon: opt_add(
554                self.workingset_restore_anon,
555                other.workingset_restore_anon,
556            ),
557            workingset_restore_file: opt_add(
558                self.workingset_restore_file,
559                other.workingset_restore_file,
560            ),
561            workingset_nodereclaim: opt_add(
562                self.workingset_nodereclaim,
563                other.workingset_nodereclaim,
564            ),
565            pgrefill: opt_add(self.pgrefill, other.pgrefill),
566            pgscan: opt_add(self.pgscan, other.pgscan),
567            pgsteal: opt_add(self.pgsteal, other.pgsteal),
568            pgactivate: opt_add(self.pgactivate, other.pgactivate),
569            pgdeactivate: opt_add(self.pgdeactivate, other.pgdeactivate),
570            pglazyfree: opt_add(self.pglazyfree, other.pglazyfree),
571            pglazyfreed: opt_add(self.pglazyfreed, other.pglazyfreed),
572            thp_fault_alloc: opt_add(self.thp_fault_alloc, other.thp_fault_alloc),
573            thp_collapse_alloc: opt_add(self.thp_collapse_alloc, other.thp_collapse_alloc),
574            events_low: opt_add(self.events_low, other.events_low),
575            events_high: opt_add(self.events_high, other.events_high),
576            events_max: opt_add(self.events_max, other.events_max),
577            events_oom: opt_add(self.events_oom, other.events_oom),
578            events_oom_kill: opt_add(self.events_oom_kill, other.events_oom_kill),
579            events_local_low: opt_add(self.events_local_low, other.events_local_low),
580            events_local_high: opt_add(self.events_local_high, other.events_local_high),
581            events_local_max: opt_add(self.events_local_max, other.events_local_max),
582            events_local_oom: opt_add(self.events_local_oom, other.events_local_oom),
583            events_local_oom_kill: opt_add(self.events_local_oom_kill, other.events_local_oom_kill),
584        }
585    }
586}
587
588impl CgroupMemoryModel {
589    pub fn new(
590        sample: &CgroupSample,
591        last: Option<(&CgroupSample, Duration)>,
592    ) -> CgroupMemoryModel {
593        let mut model = CgroupMemoryModel {
594            total: sample.memory_current.map(|v| v as u64),
595            swap: sample.memory_swap_current.map(|v| v as u64),
596            zswap: sample.memory_zswap_current.map(|v| v as u64),
597            ..Default::default()
598        };
599        if let Some(events) = &sample.memory_events {
600            if let Some((
601                CgroupSample {
602                    memory_events: Some(last_memory_events),
603                    ..
604                },
605                delta,
606            )) = last
607            {
608                model.events_low = count_per_sec!(last_memory_events.low, events.low, delta, u64);
609                model.events_high =
610                    count_per_sec!(last_memory_events.high, events.high, delta, u64);
611                model.events_max = count_per_sec!(last_memory_events.max, events.max, delta, u64);
612                model.events_oom = count_per_sec!(last_memory_events.oom, events.oom, delta, u64);
613                model.events_oom_kill =
614                    count_per_sec!(last_memory_events.oom_kill, events.oom_kill, delta, u64);
615            }
616        }
617        if let Some(events_local) = &sample.memory_events_local {
618            if let Some((
619                CgroupSample {
620                    memory_events_local: Some(last_memory_events_local),
621                    ..
622                },
623                delta,
624            )) = last
625            {
626                model.events_local_low =
627                    count_per_sec!(last_memory_events_local.low, events_local.low, delta, u64);
628                model.events_local_high =
629                    count_per_sec!(last_memory_events_local.high, events_local.high, delta, u64);
630                model.events_local_max =
631                    count_per_sec!(last_memory_events_local.max, events_local.max, delta, u64);
632                model.events_local_oom =
633                    count_per_sec!(last_memory_events_local.oom, events_local.oom, delta, u64);
634                model.events_local_oom_kill = count_per_sec!(
635                    last_memory_events_local.oom_kill,
636                    events_local.oom_kill,
637                    delta,
638                    u64
639                );
640            }
641        }
642        if let Some(stat) = &sample.memory_stat {
643            model.anon = stat.anon;
644            model.file = stat.file;
645            model.kernel = stat.kernel;
646            model.kernel_stack = stat.kernel_stack;
647            model.slab = stat.slab;
648            model.sock = stat.sock;
649            model.shmem = stat.shmem;
650            // May be set by sample.memory_zswap_current
651            if model.zswap.is_none() {
652                model.zswap = stat.zswap;
653            }
654            model.zswapped = stat.zswapped;
655            model.file_mapped = stat.file_mapped;
656            model.file_dirty = stat.file_dirty;
657            model.file_writeback = stat.file_writeback;
658            model.anon_thp = stat.anon_thp;
659            model.inactive_anon = stat.inactive_anon;
660            model.active_anon = stat.active_anon;
661            model.inactive_file = stat.inactive_file;
662            model.active_file = stat.active_file;
663            model.unevictable = stat.unevictable;
664            model.slab_reclaimable = stat.slab_reclaimable;
665            model.slab_unreclaimable = stat.slab_unreclaimable;
666
667            if let Some((
668                CgroupSample {
669                    memory_stat: Some(last_stat),
670                    ..
671                },
672                delta,
673            )) = last
674            {
675                model.pgfault = count_per_sec!(last_stat.pgfault, stat.pgfault, delta, u64);
676                model.pgmajfault =
677                    count_per_sec!(last_stat.pgmajfault, stat.pgmajfault, delta, u64);
678                model.workingset_refault_anon = count_per_sec!(
679                    last_stat.workingset_refault_anon,
680                    stat.workingset_refault_anon,
681                    delta,
682                    u64
683                );
684                model.workingset_refault_file = count_per_sec!(
685                    last_stat.workingset_refault_file,
686                    stat.workingset_refault_file,
687                    delta,
688                    u64
689                );
690                model.workingset_activate_anon = count_per_sec!(
691                    last_stat.workingset_activate_anon,
692                    stat.workingset_activate_anon,
693                    delta,
694                    u64
695                );
696                model.workingset_activate_file = count_per_sec!(
697                    last_stat.workingset_activate_file,
698                    stat.workingset_activate_file,
699                    delta,
700                    u64
701                );
702                model.workingset_restore_anon = count_per_sec!(
703                    last_stat.workingset_restore_anon,
704                    stat.workingset_restore_anon,
705                    delta,
706                    u64
707                );
708                model.workingset_restore_file = count_per_sec!(
709                    last_stat.workingset_restore_file,
710                    stat.workingset_restore_file,
711                    delta,
712                    u64
713                );
714                model.workingset_nodereclaim = count_per_sec!(
715                    last_stat.workingset_nodereclaim,
716                    stat.workingset_nodereclaim,
717                    delta,
718                    u64
719                );
720                model.pgrefill = count_per_sec!(last_stat.pgrefill, stat.pgrefill, delta, u64);
721                model.pgscan = count_per_sec!(last_stat.pgscan, stat.pgscan, delta, u64);
722                model.pgsteal = count_per_sec!(last_stat.pgsteal, stat.pgsteal, delta, u64);
723                model.pgactivate =
724                    count_per_sec!(last_stat.pgactivate, stat.pgactivate, delta, u64);
725                model.pgdeactivate =
726                    count_per_sec!(last_stat.pgdeactivate, stat.pgdeactivate, delta, u64);
727                model.pglazyfree =
728                    count_per_sec!(last_stat.pglazyfree, stat.pglazyfree, delta, u64);
729                model.pglazyfreed =
730                    count_per_sec!(last_stat.pglazyfreed, stat.pglazyfreed, delta, u64);
731                model.thp_fault_alloc =
732                    count_per_sec!(last_stat.thp_fault_alloc, stat.thp_fault_alloc, delta, u64);
733                model.thp_collapse_alloc = count_per_sec!(
734                    last_stat.thp_collapse_alloc,
735                    stat.thp_collapse_alloc,
736                    delta,
737                    u64
738                );
739            }
740        }
741
742        model
743    }
744}
745
746#[::below_derive::queriable_derives]
747pub struct CgroupPidsModel {
748    pub tids_current: Option<u64>,
749}
750
751impl std::ops::Add for CgroupPidsModel {
752    type Output = Self;
753
754    fn add(self, other: Self) -> Self::Output {
755        Self {
756            tids_current: opt_add(self.tids_current, other.tids_current),
757        }
758    }
759}
760
761impl CgroupPidsModel {
762    pub fn new(sample: &CgroupSample) -> Self {
763        let tids_current = sample.tids_current;
764        CgroupPidsModel { tids_current }
765    }
766}
767
768#[::below_derive::queriable_derives]
769pub struct CgroupPressureModel {
770    pub cpu_some_pct: Option<f64>,
771    pub cpu_full_pct: Option<f64>,
772    pub io_some_pct: Option<f64>,
773    pub io_full_pct: Option<f64>,
774    pub memory_some_pct: Option<f64>,
775    pub memory_full_pct: Option<f64>,
776}
777
778impl CgroupPressureModel {
779    fn new(pressure: &cgroupfs::Pressure) -> CgroupPressureModel {
780        // Use avg10 instead of calculating pressure with the total metric. If
781        // elapsed time between reading pressure total and recording time is too
782        // long, pressure could exceed 100%.
783        CgroupPressureModel {
784            cpu_some_pct: pressure.cpu.some.avg10,
785            cpu_full_pct: pressure.cpu.full.as_ref().and_then(|f| f.avg10),
786            io_some_pct: pressure.io.some.avg10,
787            io_full_pct: pressure.io.full.avg10,
788            memory_some_pct: pressure.memory.some.avg10,
789            memory_full_pct: pressure.memory.full.avg10,
790        }
791    }
792}
793#[::below_derive::queriable_derives]
794pub struct CgroupMemoryNumaModel {
795    pub total: Option<u64>,
796    pub anon: Option<u64>,
797    pub file: Option<u64>,
798    pub kernel_stack: Option<u64>,
799    pub pagetables: Option<u64>,
800    pub shmem: Option<u64>,
801    pub file_mapped: Option<u64>,
802    pub file_dirty: Option<u64>,
803    pub file_writeback: Option<u64>,
804    pub swapcached: Option<u64>,
805    pub anon_thp: Option<u64>,
806    pub file_thp: Option<u64>,
807    pub shmem_thp: Option<u64>,
808    pub inactive_anon: Option<u64>,
809    pub active_anon: Option<u64>,
810    pub inactive_file: Option<u64>,
811    pub active_file: Option<u64>,
812    pub unevictable: Option<u64>,
813    pub slab_reclaimable: Option<u64>,
814    pub slab_unreclaimable: Option<u64>,
815    pub workingset_refault_anon: Option<f64>,
816    pub workingset_refault_file: Option<f64>,
817    pub workingset_activate_anon: Option<f64>,
818    pub workingset_activate_file: Option<f64>,
819    pub workingset_restore_anon: Option<f64>,
820    pub workingset_restore_file: Option<f64>,
821    pub workingset_nodereclaim: Option<f64>,
822}
823
824impl CgroupMemoryNumaModel {
825    pub fn new(
826        begin: &cgroupfs::MemoryNumaStat,
827        last: Option<(&cgroupfs::MemoryNumaStat, Duration)>,
828    ) -> CgroupMemoryNumaModel {
829        let mut model = CgroupMemoryNumaModel {
830            total: None,
831            anon: begin.anon,
832            file: begin.file,
833            kernel_stack: begin.kernel_stack,
834            pagetables: begin.pagetables,
835            shmem: begin.shmem,
836            file_mapped: begin.file_mapped,
837            file_dirty: begin.file_dirty,
838            file_writeback: begin.file_writeback,
839            swapcached: begin.swapcached,
840            anon_thp: begin.anon_thp,
841            file_thp: begin.file_thp,
842            shmem_thp: begin.shmem_thp,
843            inactive_anon: begin.inactive_anon,
844            active_anon: begin.active_anon,
845            inactive_file: begin.inactive_file,
846            active_file: begin.active_file,
847            unevictable: begin.unevictable,
848            slab_reclaimable: begin.slab_reclaimable,
849            slab_unreclaimable: begin.slab_unreclaimable,
850            ..Default::default()
851        };
852        if let (Some(anon), Some(file), Some(kernel_stack), Some(pagetables)) =
853            (model.anon, model.file, model.kernel_stack, model.pagetables)
854        {
855            model.total = Some(
856                anon.saturating_add(file)
857                    .saturating_add(kernel_stack)
858                    .saturating_add(pagetables),
859            );
860        }
861
862        if let Some((l, delta)) = last {
863            model.workingset_refault_anon = count_per_sec!(
864                begin.workingset_refault_anon,
865                l.workingset_refault_anon,
866                delta
867            );
868            model.workingset_refault_file = count_per_sec!(
869                begin.workingset_refault_file,
870                l.workingset_refault_file,
871                delta
872            );
873            model.workingset_activate_anon = count_per_sec!(
874                begin.workingset_activate_anon,
875                l.workingset_activate_anon,
876                delta
877            );
878            model.workingset_activate_file = count_per_sec!(
879                begin.workingset_activate_file,
880                l.workingset_activate_file,
881                delta
882            );
883            model.workingset_restore_anon = count_per_sec!(
884                begin.workingset_restore_anon,
885                l.workingset_restore_anon,
886                delta
887            );
888            model.workingset_restore_file = count_per_sec!(
889                begin.workingset_restore_file,
890                l.workingset_restore_file,
891                delta
892            );
893            model.workingset_nodereclaim = count_per_sec!(
894                begin.workingset_nodereclaim,
895                l.workingset_nodereclaim,
896                delta
897            );
898        }
899        model
900    }
901}
902
903/// Cgroup properties. Without any cgroup configuration changes, these should
904/// typically be static.
905#[::below_derive::queriable_derives]
906pub struct CgroupProperties {
907    pub cgroup_controllers: Option<BTreeSet<String>>,
908    pub cgroup_subtree_control: Option<BTreeSet<String>>,
909    pub tids_max: Option<i64>,
910    pub memory_min: Option<i64>,
911    pub memory_low: Option<i64>,
912    pub memory_high: Option<i64>,
913    pub memory_max: Option<i64>,
914    pub memory_swap_max: Option<i64>,
915    pub memory_zswap_max: Option<i64>,
916    pub cpu_weight: Option<u32>,
917    pub cpu_max_usec: Option<i64>,
918    pub cpu_max_period_usec: Option<u64>,
919    pub cpuset_cpus: Option<cgroupfs::Cpuset>,
920    pub cpuset_cpus_effective: Option<cgroupfs::Cpuset>,
921    pub cpuset_mems: Option<cgroupfs::MemNodes>,
922    pub cpuset_mems_effective: Option<cgroupfs::MemNodes>,
923}
924
925impl CgroupProperties {
926    pub fn new(sample: &CgroupSample) -> Self {
927        Self {
928            cgroup_controllers: sample.cgroup_controllers.clone(),
929            cgroup_subtree_control: sample.cgroup_subtree_control.clone(),
930            tids_max: sample.tids_max,
931            memory_min: sample.memory_min,
932            memory_low: sample.memory_low,
933            memory_high: sample.memory_high,
934            memory_max: sample.memory_max,
935            memory_swap_max: sample.memory_swap_max,
936            memory_zswap_max: sample.memory_zswap_max,
937            cpu_weight: sample.cpu_weight,
938            cpu_max_usec: sample.cpu_max.as_ref().map(|v| v.max_usec),
939            cpu_max_period_usec: sample.cpu_max.as_ref().map(|v| v.period_usec),
940            cpuset_cpus: sample.cpuset_cpus.clone(),
941            cpuset_cpus_effective: sample.cpuset_cpus_effective.clone(),
942            cpuset_mems: sample.cpuset_mems.clone(),
943            cpuset_mems_effective: sample.cpuset_mems_effective.clone(),
944        }
945    }
946}
947
948#[cfg(test)]
949mod tests {
950    use std::str::FromStr;
951
952    use super::*;
953
954    #[test]
955    fn query_nested_cgroup() {
956        let model_json = r#"
957        {
958            "data": { "name": "<root>", "full_path": "", "depth": 0 },
959            "count": 4,
960            "recreate_flag": false,
961            "children": [
962                {
963                    "data": { "name": "system.slice", "full_path": "/system.slice", "depth": 1 },
964                    "count": 2,
965                    "recreate_flag": false,
966                    "children": [
967                        {
968                            "data": { "name": "foo.service", "full_path": "/system.slice/foo.service", "depth": 2 },
969                            "count": 1,
970                            "recreate_flag": false,
971                            "children": []
972                        }
973                    ]
974                },
975                {
976                    "data": { "name": ".hidden.slice", "full_path": "/.hidden.slice", "depth": 1 },
977                    "count": 1,
978                    "recreate_flag": false,
979                    "children": []
980                }
981            ]
982        }
983        "#;
984        let model: CgroupModel =
985            serde_json::from_str(model_json).expect("Failed to deserialize cgroup model JSON");
986        for (field_id, expected) in &[
987            // Ignore consecutive slashes
988            ("path:///////.name", Some("<root>")),
989            ("path:/system.slice/.full_path", Some("/system.slice")),
990            (
991                "path:/system.slice/foo.service/.full_path",
992                Some("/system.slice/foo.service"),
993            ),
994            // Allow path param to contain "/."
995            ("path:/.hidden.slice/.full_path", Some("/.hidden.slice")),
996            // Non-existent cgroups
997            ("path:/no_such.slice/.full_path", None),
998            ("path:/system.slice/no_such.service/.full_path", None),
999        ] {
1000            assert_eq!(
1001                model.query(
1002                    &CgroupModelFieldId::from_str(field_id)
1003                        .map_err(|e| format!("Failed to parse field id {}: {:?}", field_id, e))
1004                        .unwrap()
1005                ),
1006                expected.map(|s| Field::Str(s.to_string()))
1007            );
1008        }
1009    }
1010
1011    #[test]
1012    fn query_model() {
1013        let model_json = r#"
1014        {
1015            "name": "foo.service",
1016            "full_path": "/system.slice/foo.service",
1017            "depth": 1,
1018            "io": {
1019                "sda": {
1020                    "rbytes_per_sec": 42
1021                }
1022            }
1023        }
1024        "#;
1025        let model: SingleCgroupModel = serde_json::from_str(model_json).unwrap();
1026        assert_eq!(
1027            model.query(
1028                &SingleCgroupModelFieldId::from_str("io_details.sda.rbytes_per_sec").unwrap()
1029            ),
1030            Some(Field::F64(42.0))
1031        );
1032    }
1033
1034    #[test]
1035    fn test_aggr_top_level_val() {
1036        /*
1037        Summary: This test verifies the aggregation of top-level values in a CgroupModel, executed in the aggr_top_level_val() function.
1038        The test creates two child cgroup models with memory values, then uses them to create a root cgroup model.
1039        The root cgroup model is aggregated using the aggr_top_level_val() function. During aggregation, local events should not be aggregated.
1040        The test checks that the total memory is aggregated correctly and confirms that local events are not aggregated.
1041        */
1042        // Create two children cgroup models with memory values
1043        let child1 = CgroupModel {
1044            data: SingleCgroupModel {
1045                name: "child1".to_string(),
1046                memory: Some(CgroupMemoryModel {
1047                    total: Some(100),
1048                    swap: Some(50),
1049                    events_low: Some(1),
1050                    events_high: Some(2),
1051                    events_max: Some(3),
1052                    events_oom: Some(4),
1053                    events_oom_kill: Some(5),
1054                    events_local_low: Some(10),
1055                    events_local_high: Some(20),
1056                    ..Default::default()
1057                }),
1058                ..Default::default()
1059            },
1060            ..Default::default()
1061        };
1062        let child2 = CgroupModel {
1063            data: SingleCgroupModel {
1064                name: "child2".to_string(),
1065                memory: Some(CgroupMemoryModel {
1066                    total: Some(200),
1067                    swap: Some(30),
1068                    events_low: Some(1),
1069                    events_high: Some(2),
1070                    events_max: Some(3),
1071                    events_oom: Some(4),
1072                    events_oom_kill: Some(5),
1073                    events_local_low: Some(10),
1074                    events_local_high: Some(20),
1075                    ..Default::default()
1076                }),
1077                ..Default::default()
1078            },
1079            ..Default::default()
1080        };
1081        // Create root cgroup model without pre-existing memory values
1082        let mut root = CgroupModel {
1083            data: SingleCgroupModel {
1084                name: "root".to_string(),
1085                memory: Some(CgroupMemoryModel::default()),
1086                ..Default::default()
1087            },
1088            children: vec![child1, child2].into_iter().collect(),
1089            ..Default::default()
1090        };
1091        // Aggregate top-level values
1092        root = root.aggr_top_level_val();
1093        // Check that the total memory is aggregated correctly
1094        assert_eq!(root.data.memory.as_ref().unwrap().total, Some(300));
1095        assert_eq!(root.data.memory.as_ref().unwrap().swap, Some(80));
1096        assert_eq!(root.data.memory.as_ref().unwrap().events_low, Some(2));
1097        assert_eq!(root.data.memory.as_ref().unwrap().events_high, Some(4));
1098        assert_eq!(root.data.memory.as_ref().unwrap().events_max, Some(6));
1099        assert_eq!(root.data.memory.as_ref().unwrap().events_oom, Some(8));
1100        assert_eq!(root.data.memory.as_ref().unwrap().events_oom_kill, Some(10));
1101        // Confirm local events are not aggregated
1102        assert_eq!(root.data.memory.as_ref().unwrap().events_local_low, None);
1103        assert_eq!(root.data.memory.as_ref().unwrap().events_local_high, None);
1104    }
1105}