Skip to main content

zlayer_agent/
cgroups_stats.rs

1//! Cgroups v2 statistics reader for container metrics
2//!
3//! Provides functionality to read CPU and memory statistics from cgroups v2 filesystem
4//! for container resource monitoring and autoscaling decisions.
5
6use std::path::Path;
7use std::time::Instant;
8
9/// Container resource statistics from cgroups v2
10#[derive(Debug, Clone)]
11pub struct ContainerStats {
12    /// CPU usage in microseconds
13    pub cpu_usage_usec: u64,
14    /// Current memory usage in bytes
15    pub memory_bytes: u64,
16    /// Memory limit in bytes (`u64::MAX` if unlimited)
17    pub memory_limit: u64,
18    /// Timestamp when stats were collected
19    pub timestamp: Instant,
20}
21
22impl ContainerStats {
23    /// Calculate memory usage as a percentage of the limit
24    ///
25    /// Returns 0.0 if there is no limit (`memory_limit` == `u64::MAX`)
26    #[must_use]
27    #[allow(clippy::cast_precision_loss)]
28    pub fn memory_percent(&self) -> f64 {
29        if self.memory_limit == u64::MAX || self.memory_limit == 0 {
30            0.0
31        } else {
32            (self.memory_bytes as f64 / self.memory_limit as f64) * 100.0
33        }
34    }
35}
36
37/// Read container statistics from cgroups v2 filesystem
38///
39/// Reads the following cgroup files:
40/// - `cpu.stat` for CPU usage (`usage_usec` field)
41/// - `memory.current` for current memory usage
42/// - `memory.max` for memory limit
43///
44/// # Arguments
45/// * `cgroup_path` - Path to the container's cgroup directory
46///
47/// # Returns
48/// * `Ok(ContainerStats)` - Container statistics on success
49/// * `Err(io::Error)` - If any cgroup file cannot be read
50///
51/// # Errors
52/// Returns an error if any cgroup file cannot be read.
53///
54/// # Example
55/// ```no_run
56/// use std::path::Path;
57/// use zlayer_agent::cgroups_stats::read_container_stats;
58///
59/// # async fn example() -> std::io::Result<()> {
60/// let cgroup_path = Path::new("/sys/fs/cgroup/system.slice/zlayer-mycontainer.scope");
61/// let stats = read_container_stats(cgroup_path).await?;
62/// println!("CPU usage: {} usec", stats.cpu_usage_usec);
63/// println!("Memory: {} bytes", stats.memory_bytes);
64/// # Ok(())
65/// # }
66/// ```
67pub async fn read_container_stats(cgroup_path: &Path) -> std::io::Result<ContainerStats> {
68    // Read cpu.stat file
69    let cpu_stat_path = cgroup_path.join("cpu.stat");
70    let cpu_stat = tokio::fs::read_to_string(&cpu_stat_path).await?;
71
72    // Read memory.current file
73    let memory_current_path = cgroup_path.join("memory.current");
74    let memory_current = tokio::fs::read_to_string(&memory_current_path).await?;
75
76    // Read memory.max file
77    let memory_max_path = cgroup_path.join("memory.max");
78    let memory_max = tokio::fs::read_to_string(&memory_max_path).await?;
79
80    // Parse cpu.stat
81    // Format: "usage_usec 12345\nuser_usec 6789\nsystem_usec 5556\n..."
82    let cpu_usage_usec = cpu_stat
83        .lines()
84        .find(|line| line.starts_with("usage_usec"))
85        .and_then(|line| line.split_whitespace().nth(1))
86        .and_then(|value| value.parse::<u64>().ok())
87        .unwrap_or(0);
88
89    // Parse memory.current (single integer value)
90    let memory_bytes = memory_current.trim().parse::<u64>().unwrap_or(0);
91
92    // Parse memory.max
93    // Can be "max" (unlimited) or an integer
94    let memory_limit = memory_max.trim().parse::<u64>().unwrap_or(u64::MAX); // "max" will fail to parse, use u64::MAX
95
96    Ok(ContainerStats {
97        cpu_usage_usec,
98        memory_bytes,
99        memory_limit,
100        timestamp: Instant::now(),
101    })
102}
103
104/// Calculate CPU percentage from two consecutive samples
105///
106/// The CPU percentage is calculated as:
107/// ```text
108/// cpu_percent = (delta_usage_usec / (delta_time_usec * num_cpus)) * 100
109/// ```
110///
111/// This accounts for multi-core systems by dividing by the number of CPUs.
112/// A value of 100% means full utilization of one CPU core.
113/// Values can exceed 100% on multi-core systems if multiple cores are utilized.
114///
115/// # Arguments
116/// * `prev` - Previous statistics sample
117/// * `curr` - Current statistics sample
118///
119/// # Returns
120/// CPU usage percentage (0.0 to N*100.0 where N is number of CPUs)
121///
122/// # Example
123/// ```
124/// use zlayer_agent::cgroups_stats::{ContainerStats, calculate_cpu_percent};
125/// use std::time::Instant;
126///
127/// let prev = ContainerStats {
128///     cpu_usage_usec: 1000000,  // 1 second of CPU time
129///     memory_bytes: 1024,
130///     memory_limit: 2048,
131///     timestamp: Instant::now(),
132/// };
133///
134/// // Simulate 0.5 seconds later with 0.25 seconds more CPU time
135/// let curr = ContainerStats {
136///     cpu_usage_usec: 1250000,  // 0.25 more seconds of CPU time
137///     memory_bytes: 1024,
138///     memory_limit: 2048,
139///     timestamp: Instant::now(),  // In reality this would be ~0.5s later
140/// };
141///
142/// let cpu_pct = calculate_cpu_percent(&prev, &curr);
143/// // Result depends on elapsed time and num_cpus
144/// ```
145#[must_use]
146#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
147pub fn calculate_cpu_percent(prev: &ContainerStats, curr: &ContainerStats) -> f64 {
148    // Calculate CPU usage delta in microseconds
149    let usage_delta_usec = curr.cpu_usage_usec.saturating_sub(prev.cpu_usage_usec);
150
151    // Calculate time delta in microseconds
152    let time_delta = curr.timestamp.duration_since(prev.timestamp);
153    let time_delta_usec = time_delta.as_micros() as u64;
154
155    // Avoid division by zero
156    if time_delta_usec == 0 {
157        return 0.0;
158    }
159
160    // Get number of CPU cores
161    let num_cpus = num_cpus::get() as u64;
162
163    // Calculate percentage
164    // Formula: (usage_delta / (time_delta * num_cpus)) * 100
165    // This normalizes to 100% = full single-core utilization
166    (usage_delta_usec as f64 / (time_delta_usec * num_cpus) as f64) * 100.0
167}
168
169/// Calculate CPU percentage with a specified number of CPUs
170///
171/// Same as `calculate_cpu_percent` but allows specifying the number of CPUs
172/// for testing or container-specific CPU limits.
173///
174/// # Arguments
175/// * `prev` - Previous statistics sample
176/// * `curr` - Current statistics sample
177/// * `num_cpus` - Number of CPUs to use in calculation
178///
179/// # Returns
180/// CPU usage percentage
181#[must_use]
182#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
183pub fn calculate_cpu_percent_with_cpus(
184    prev: &ContainerStats,
185    curr: &ContainerStats,
186    num_cpus: u64,
187) -> f64 {
188    let usage_delta_usec = curr.cpu_usage_usec.saturating_sub(prev.cpu_usage_usec);
189    let time_delta = curr.timestamp.duration_since(prev.timestamp);
190    let time_delta_usec = time_delta.as_micros() as u64;
191
192    if time_delta_usec == 0 || num_cpus == 0 {
193        return 0.0;
194    }
195
196    (usage_delta_usec as f64 / (time_delta_usec * num_cpus) as f64) * 100.0
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use std::time::Duration;
203
204    #[test]
205    fn test_memory_percent() {
206        let stats = ContainerStats {
207            cpu_usage_usec: 0,
208            memory_bytes: 512,
209            memory_limit: 1024,
210            timestamp: Instant::now(),
211        };
212        assert!((stats.memory_percent() - 50.0).abs() < 0.01);
213    }
214
215    #[test]
216    fn test_memory_percent_unlimited() {
217        let stats = ContainerStats {
218            cpu_usage_usec: 0,
219            memory_bytes: 512,
220            memory_limit: u64::MAX,
221            timestamp: Instant::now(),
222        };
223        assert!(
224            (stats.memory_percent() - 0.0).abs() < f64::EPSILON,
225            "memory_percent should be 0.0"
226        );
227    }
228
229    #[test]
230    fn test_memory_percent_zero_limit() {
231        let stats = ContainerStats {
232            cpu_usage_usec: 0,
233            memory_bytes: 512,
234            memory_limit: 0,
235            timestamp: Instant::now(),
236        };
237        assert!(
238            (stats.memory_percent() - 0.0).abs() < f64::EPSILON,
239            "memory_percent should be 0.0"
240        );
241    }
242
243    #[test]
244    fn test_calculate_cpu_percent_with_cpus() {
245        let now = Instant::now();
246        let prev = ContainerStats {
247            cpu_usage_usec: 1_000_000, // 1 second
248            memory_bytes: 1024,
249            memory_limit: 2048,
250            timestamp: now,
251        };
252
253        // Simulate 1 second later with 500ms more CPU time (50% of one core)
254        let later = now + Duration::from_secs(1);
255        let curr = ContainerStats {
256            cpu_usage_usec: 1_500_000, // 1.5 seconds total
257            memory_bytes: 1024,
258            memory_limit: 2048,
259            timestamp: later,
260        };
261
262        // With 1 CPU, 500ms usage over 1s = 50%
263        let cpu_pct = calculate_cpu_percent_with_cpus(&prev, &curr, 1);
264        assert!((cpu_pct - 50.0).abs() < 1.0);
265
266        // With 2 CPUs, 500ms usage over 1s = 25% (per-core normalized)
267        let cpu_pct_2 = calculate_cpu_percent_with_cpus(&prev, &curr, 2);
268        assert!((cpu_pct_2 - 25.0).abs() < 1.0);
269    }
270
271    #[test]
272    fn test_calculate_cpu_percent_zero_time() {
273        let now = Instant::now();
274        let stats = ContainerStats {
275            cpu_usage_usec: 1_000_000,
276            memory_bytes: 1024,
277            memory_limit: 2048,
278            timestamp: now,
279        };
280
281        // Same timestamp should return 0
282        let cpu_pct = calculate_cpu_percent_with_cpus(&stats, &stats, 1);
283        assert!(
284            (cpu_pct - 0.0).abs() < f64::EPSILON,
285            "cpu_pct should be 0.0"
286        );
287    }
288
289    #[test]
290    fn test_calculate_cpu_percent_zero_cpus() {
291        let now = Instant::now();
292        let prev = ContainerStats {
293            cpu_usage_usec: 1_000_000,
294            memory_bytes: 1024,
295            memory_limit: 2048,
296            timestamp: now,
297        };
298
299        let later = now + Duration::from_secs(1);
300        let curr = ContainerStats {
301            cpu_usage_usec: 1_500_000,
302            memory_bytes: 1024,
303            memory_limit: 2048,
304            timestamp: later,
305        };
306
307        // Zero CPUs should return 0 (avoid division by zero)
308        let cpu_pct = calculate_cpu_percent_with_cpus(&prev, &curr, 0);
309        assert!(
310            (cpu_pct - 0.0).abs() < f64::EPSILON,
311            "cpu_pct should be 0.0"
312        );
313    }
314
315    #[test]
316    fn test_stats_clone() {
317        let stats = ContainerStats {
318            cpu_usage_usec: 1000,
319            memory_bytes: 2000,
320            memory_limit: 4000,
321            timestamp: Instant::now(),
322        };
323
324        let cloned = stats.clone();
325        assert_eq!(cloned.cpu_usage_usec, stats.cpu_usage_usec);
326        assert_eq!(cloned.memory_bytes, stats.memory_bytes);
327        assert_eq!(cloned.memory_limit, stats.memory_limit);
328    }
329}