1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
use crate::metrics::CpuMetrics;
use procfs::prelude::*;
use procfs::process::all_processes;
use procfs::{CpuTime, KernelStats};
use std::collections::HashMap;
use std::time::Instant;
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
// ---------------------------------------------------------------------------
// Tick helpers
// ---------------------------------------------------------------------------
fn cpu_total(c: &CpuTime) -> u64 {
c.user
+ c.nice
+ c.system
+ c.idle
+ c.iowait.unwrap_or(0)
+ c.irq.unwrap_or(0)
+ c.softirq.unwrap_or(0)
+ c.steal.unwrap_or(0)
}
fn cpu_idle(c: &CpuTime) -> u64 {
c.idle + c.iowait.unwrap_or(0)
}
/// Per-core utilization percentage (0.0–100.0, clamped).
fn core_util_pct(prev: &CpuTime, curr: &CpuTime) -> f64 {
util_pct_from_ticks(
cpu_total(prev),
cpu_idle(prev),
cpu_total(curr),
cpu_idle(curr),
)
.clamp(0.0, 100.0)
}
/// Aggregate utilization expressed as fractional cores in use (0.0..n_cores).
/// Not clamped: kernel rounding can produce values very slightly above n_cores.
fn aggregate_util_cores(prev: &CpuTime, curr: &CpuTime, n_cores: usize) -> f64 {
util_pct_from_ticks(
cpu_total(prev),
cpu_idle(prev),
cpu_total(curr),
cpu_idle(curr),
) / 100.0
* n_cores as f64
}
/// Pure math: percentage of non-idle ticks between two snapshots (0.0–100.0
/// before any clamping). Takes raw pre-computed totals/idles so it can be
/// unit-tested without constructing a `CpuTime` (which has private fields).
fn util_pct_from_ticks(prev_total: u64, prev_idle: u64, curr_total: u64, curr_idle: u64) -> f64 {
let delta_total = curr_total.saturating_sub(prev_total) as f64;
let delta_idle = curr_idle.saturating_sub(prev_idle) as f64;
if delta_total == 0.0 {
return 0.0;
}
(delta_total - delta_idle) / delta_total * 100.0
}
// ---------------------------------------------------------------------------
// Process-tree helpers
// ---------------------------------------------------------------------------
/// Returns a map of { pid to (utime, stime) } for every process in the tree
/// rooted at `root_pid` (root included). Processes that have already exited
/// are silently skipped: this is a TOCTOU race we accept.
fn process_tree_ticks(root_pid: i32) -> HashMap<i32, (u64, u64)> {
// Collect all readable processes in one pass.
let all: Vec<_> = match all_processes() {
Ok(iter) => iter.filter_map(|r| r.ok()).collect(),
Err(_) => return HashMap::new(),
};
// Single .stat() read per process: build both the parent->children map and
// the pid->(utime+cutime, stime+cstime) map in one pass to halve /proc I/O.
//
// cutime/cstime (CPU time of waited-for children) is included so that
// short-lived child processes that both start AND exit within a single
// measurement interval are still captured: once a child is reaped its
// ticks roll up into the parent's cutime/cstime.
//
// Double-counting guard: if a process was alive at the previous snapshot
// and exits before the current one, its pre-snapshot ticks are already in
// prev_proc_ticks AND will re-appear via the parent's cutime delta.
// CpuCollector::collect() subtracts the prev ticks of all such exited
// processes to cancel that overcounting.
let mut children: HashMap<i32, Vec<i32>> = HashMap::new();
let ticks_for: HashMap<i32, (u64, u64)> = all
.iter()
.filter_map(|proc| {
proc.stat().ok().map(|s| {
children.entry(s.ppid).or_default().push(proc.pid);
let user = s.utime + u64::try_from(s.cutime).unwrap_or(0);
let system = s.stime + u64::try_from(s.cstime).unwrap_or(0);
(proc.pid, (user, system))
})
})
.collect();
// BFS from root_pid, collecting (utime, stime) for every reachable node.
let mut result = HashMap::new();
let mut queue = vec![root_pid];
while let Some(pid) = queue.pop() {
if let Some(&ticks) = ticks_for.get(&pid) {
result.insert(pid, ticks);
}
if let Some(kids) = children.get(&pid) {
queue.extend(kids);
}
}
result
}
/// Sum of PSS and VmRSS across all given PIDs, each converted to MiB.
/// One `Process::open` per PID reads both sources. PSS matches Python
/// `memory_mib`; RSS is retained for consumers that need resident set size.
fn process_tree_memory_mib(pids: &[i32]) -> (u64, u64) {
let mut pss_kib = 0u64;
let mut rss_kib = 0u64;
for &pid in pids {
let Some(proc_) = procfs::process::Process::new(pid).ok() else {
continue;
};
if let Ok(rollup) = proc_.smaps_rollup() {
if let Some(bytes) = rollup
.memory_map_rollup
.iter()
.find_map(|m| m.extension.map.get("Pss").copied())
{
pss_kib += bytes / 1024;
}
}
if let Ok(status) = proc_.status() {
if let Some(vmrss) = status.vmrss {
rss_kib += vmrss;
}
}
}
(pss_kib / 1024, rss_kib / 1024)
}
/// Per-process cumulative disk I/O bytes from /proc/pid/io.
/// Returns { pid -> (read_bytes, write_bytes) }.
/// PIDs whose /proc/pid/io is unreadable (e.g. different UID without ptrace)
/// are silently omitted -- the delta for those PIDs will be 0.
fn process_tree_io(pids: &[i32]) -> HashMap<i32, (u64, u64)> {
pids.iter()
.filter_map(|&pid| {
let io = procfs::process::Process::new(pid).ok()?.io().ok()?;
Some((pid, (io.read_bytes, io.write_bytes)))
})
.collect()
}
// ---------------------------------------------------------------------------
// Snapshot + Collector
// ---------------------------------------------------------------------------
struct Snapshot {
/// Aggregate across all logical CPUs (the "cpu" summary line in /proc/stat).
total: CpuTime,
/// Per-logical-CPU entries (cpu0, cpu1, …).
per_core: Vec<CpuTime>,
/// Wall-clock time after all /proc reads; used as the Python-style
/// snapshot timestamp for process CPU rate (Δcpu_secs / Δtimestamp).
instant: Instant,
/// { pid -> (utime, stime) } for root process + all descendants.
/// Empty when no PID is being tracked.
proc_ticks: HashMap<i32, (u64, u64)>,
/// { pid -> (read_bytes, write_bytes) } from /proc/pid/io.
/// Empty when no PID is tracked or /proc/pid/io is unreadable.
proc_io: HashMap<i32, (u64, u64)>,
}
pub struct CpuCollector {
/// Root PID of the process tree to track. None = system-only metrics.
pid: Option<i32>,
prev: Option<Snapshot>,
}
impl CpuCollector {
pub fn new(pid: Option<i32>) -> Self {
Self { pid, prev: None }
}
pub fn collect(&mut self) -> Result<CpuMetrics> {
let tps = procfs::ticks_per_second() as f64;
// Total number of existing processes - matches Python resource-tracker's
// `processes` column. Counted by listing numeric entries in /proc,
// which is O(n_procs) but cheap for a polling interval.
let process_count = std::fs::read_dir("/proc")
.map(|dir| {
let n = dir
.filter_map(|e| e.ok())
.filter(|e| {
e.file_name()
.to_string_lossy()
.chars()
.all(|c| c.is_ascii_digit())
})
.count();
u32::try_from(n).unwrap_or(0)
})
.unwrap_or(0);
let proc_ticks = match self.pid {
Some(root) => process_tree_ticks(root),
None => HashMap::new(),
};
// Read process I/O and memory only when tracking a PID.
let proc_io = if self.pid.is_some() {
let pids: Vec<i32> = proc_ticks.keys().copied().collect();
process_tree_io(&pids)
} else {
HashMap::new()
};
// Process memory is instantaneous (not a delta), compute before storing prev.
let (process_pss_mib, process_rss_mib) = if self.pid.is_some() {
let pids: Vec<i32> = proc_ticks.keys().copied().collect();
let (pss, rss) = process_tree_memory_mib(&pids);
(Some(pss), Some(rss))
} else {
(None, None)
};
// Capture /proc/stat after process-tree reads, then record wall time so
// system and process snapshots and the elapsed denominator share the
// same end point in the poll cycle (issue #20).
let stats = KernelStats::current()?;
let now = Instant::now();
let curr = Snapshot {
total: stats.total,
per_core: stats.cpu_time,
instant: now,
proc_ticks,
proc_io,
};
let metrics = match &self.prev {
// First call: store baseline and return zeros. The caller should
// sleep for one interval then call collect() again for real data.
None => CpuMetrics {
utilization_pct: 0.0,
per_core_pct: vec![0.0; curr.per_core.len()],
utime_secs: 0.0,
stime_secs: 0.0,
process_count,
process_cores_used: self.pid.map(|_| 0.0),
process_child_count: self
.pid
.map(|_| u32::try_from(curr.proc_ticks.len().saturating_sub(1)).unwrap_or(0)),
process_utime_secs: self.pid.map(|_| 0.0),
process_stime_secs: self.pid.map(|_| 0.0),
process_pss_mib,
process_rss_mib,
process_disk_read_bytes: self.pid.map(|_| 0),
process_disk_write_bytes: self.pid.map(|_| 0),
process_gpu_usage: None, // filled by main.rs after GPU query
process_gpu_vram_mib: None, // filled by main.rs after GPU query
process_gpu_utilized: None,
process_tree_pids: curr.proc_ticks.keys().copied().collect(),
},
Some(prev) => {
let n_cores = curr.per_core.len();
// Per-interval CPU time deltas - matches Python resource-tracker's
// utime/stime columns (delta ticks / ticks_per_second).
let utime_secs = (curr.total.user + curr.total.nice)
.saturating_sub(prev.total.user + prev.total.nice)
as f64
/ tps;
let stime_secs = curr.total.system.saturating_sub(prev.total.system) as f64 / tps;
let per_core_pct = prev
.per_core
.iter()
.zip(curr.per_core.iter())
.map(|(p, c)| core_util_pct(p, c))
.collect();
// Cutime double-counting correction (issue #20, bug 1).
//
// proc_ticks stores (utime + cutime, stime + cstime) so that
// short-lived children that start AND exit within one interval are
// captured via the parent's cutime delta.
//
// Side-effect: if a child was alive at the prev snapshot, its
// pre-snapshot ticks appear both in prev_proc_ticks[child] AND
// in the parent's cutime delta once the child is reaped. That
// double-counts the child's pre-snapshot ticks.
//
// Fix: sum the prev ticks of every PID in prev that is absent
// from curr (it exited), then subtract that sum from the raw
// delta. This cancels exactly the overcounting without
// affecting short-lived processes (which were never in prev, so
// their prev ticks are zero).
let (exited_utime, exited_stime): (u64, u64) = if self.pid.is_some() {
prev.proc_ticks
.iter()
.filter(|(pid, _)| !curr.proc_ticks.contains_key(pid))
.fold((0u64, 0u64), |(au, as_), (_, &(pu, ps))| {
(au + pu, as_ + ps)
})
} else {
(0, 0)
};
let utilization_pct = aggregate_util_cores(&prev.total, &curr.total, n_cores);
let process_child_count = self
.pid
.map(|_| u32::try_from(curr.proc_ticks.len().saturating_sub(1)).unwrap_or(0));
// Per-tree utime and stime deltas (seconds this interval).
let process_utime_secs = self.pid.map(|_| {
let raw: u64 = curr
.proc_ticks
.iter()
.map(|(pid, &(cu, _))| {
let pu = prev.proc_ticks.get(pid).map(|&(u, _)| u).unwrap_or(cu);
cu.saturating_sub(pu)
})
.sum();
raw.saturating_sub(exited_utime) as f64 / tps
});
let process_stime_secs = self.pid.map(|_| {
let raw: u64 = curr
.proc_ticks
.iter()
.map(|(pid, &(_, cs))| {
let ps = prev.proc_ticks.get(pid).map(|&(_, s)| s).unwrap_or(cs);
cs.saturating_sub(ps)
})
.sum();
raw.saturating_sub(exited_stime) as f64 / tps
});
// Fractional cores = (Δutime + Δstime) / Δtimestamp, matching
// Python ProcessTracker.cpu_usage (process_utime_secs +
// process_stime_secs share the same corrected tick deltas).
let process_cores_used = match (self.pid, process_utime_secs, process_stime_secs) {
(Some(_), Some(u), Some(s)) => {
let elapsed = (curr.instant - prev.instant).as_secs_f64().max(0.001);
Some(((u + s) / elapsed).max(0.0))
}
_ => None,
};
// Per-interval disk I/O deltas across the process tree.
let process_disk_read_bytes = self.pid.map(|_| {
curr.proc_io
.iter()
.map(|(pid, &(cr, _))| {
let pr = prev.proc_io.get(pid).map(|&(r, _)| r).unwrap_or(cr);
cr.saturating_sub(pr)
})
.sum::<u64>()
});
let process_disk_write_bytes = self.pid.map(|_| {
curr.proc_io
.iter()
.map(|(pid, &(_, cw))| {
let pw = prev.proc_io.get(pid).map(|&(_, w)| w).unwrap_or(cw);
cw.saturating_sub(pw)
})
.sum::<u64>()
});
CpuMetrics {
utilization_pct,
per_core_pct,
utime_secs,
stime_secs,
process_count,
process_cores_used,
process_child_count,
process_utime_secs,
process_stime_secs,
process_pss_mib,
process_rss_mib,
process_disk_read_bytes,
process_disk_write_bytes,
process_gpu_usage: None, // filled by main.rs after GPU query
process_gpu_vram_mib: None, // filled by main.rs after GPU query
process_gpu_utilized: None,
process_tree_pids: curr.proc_ticks.keys().copied().collect(),
}
}
};
self.prev = Some(curr);
Ok(metrics)
}
}
// ---------------------------------------------------------------------------
// Unit tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
// Tests use `util_pct_from_ticks` directly -- `CpuTime` has private fields
// and cannot be constructed in tests. All branching logic in
// `aggregate_util_cores` and `core_util_pct` delegates to this one
// pure function, so testing it covers all paths.
//
// Tick layout: (prev_total, prev_idle, curr_total, curr_idle)
#[test]
fn test_util_pct_all_idle_is_zero() {
// All new ticks went to idle.
assert_eq!(util_pct_from_ticks(0, 0, 1600, 1600), 0.0);
}
#[test]
fn test_util_pct_fully_busy_is_100() {
// 1600 new ticks, 0 idle -> 100%.
let pct = util_pct_from_ticks(0, 0, 1600, 0);
assert!((pct - 100.0).abs() < 0.01, "expected 100.0, got {pct}");
}
#[test]
fn test_util_pct_half_busy_is_50() {
// 1600 new ticks, 800 idle -> 50%.
let pct = util_pct_from_ticks(0, 0, 1600, 800);
assert!((pct - 50.0).abs() < 0.01, "expected 50.0, got {pct}");
}
#[test]
fn test_util_pct_no_delta_is_zero() {
// Identical snapshots: no elapsed ticks.
assert_eq!(util_pct_from_ticks(100, 50, 100, 50), 0.0);
}
/// Aggregate util converts the percentage to fractional cores and does NOT clamp.
/// 99.9% busy on a 4-core machine -> ~3.996 cores, not forced to <= 4.0.
#[test]
fn test_aggregate_util_cores_no_clamp() {
// 999 active ticks, 1 idle, total 1000 -> 99.9% -> 99.9/100*4 = 3.996
let pct = util_pct_from_ticks(0, 0, 1000, 1);
let cores = pct / 100.0 * 4.0_f64;
assert!(cores > 3.9, "expected close to 4.0, got {cores}");
assert!(
cores < 4.05,
"should not greatly exceed n_cores, got {cores}"
);
}
/// Per-core values are clamped to 100 by `core_util_pct`; verify the
/// underlying math exceeds 100 without the clamp (so the clamp is doing work).
#[test]
fn test_util_pct_raw_is_not_clamped() {
// 100% busy -- raw result is exactly 100, clamp has no effect here.
let raw = util_pct_from_ticks(0, 0, 1000, 0);
assert!((raw - 100.0).abs() < 0.01);
// Apply clamp explicitly to show it would cap any value > 100.
assert_eq!(raw.clamp(0.0, 100.0), 100.0);
}
// T-CPU-06: the first call to collect() returns 0.0 for all delta fields
// (utilization_pct, per_core_pct, utime_secs, stime_secs). A warm-up
// sleep then a second collect() produces real data.
#[test]
fn test_first_collect_returns_zero_for_delta_fields() {
let mut collector = CpuCollector::new(None);
let metrics = collector.collect().expect("first collect failed");
assert_eq!(
metrics.utilization_pct, 0.0,
"utilization_pct must be 0.0 on first collect, got {}",
metrics.utilization_pct
);
assert!(
metrics.per_core_pct.iter().all(|&v| v == 0.0),
"per_core_pct must be all-zero on first collect: {:?}",
metrics.per_core_pct
);
assert_eq!(
metrics.utime_secs, 0.0,
"utime_secs must be 0.0 on first collect, got {}",
metrics.utime_secs
);
assert_eq!(
metrics.stime_secs, 0.0,
"stime_secs must be 0.0 on first collect, got {}",
metrics.stime_secs
);
}
// T-CPU-07: first collect() with PID tracking returns Some for process fields.
#[test]
fn test_first_collect_with_pid_returns_some_process_fields() {
let pid = i32::try_from(std::process::id()).expect("PID too large");
let mut collector = CpuCollector::new(Some(pid));
let m = collector.collect().expect("collect() failed");
assert!(
m.process_cores_used.is_some(),
"process_cores_used must be Some when PID is tracked"
);
assert!(
m.process_child_count.is_some(),
"process_child_count must be Some when PID is tracked"
);
assert!(
m.process_pss_mib.is_some(),
"process_pss_mib must be Some when PID is tracked"
);
assert!(
m.process_rss_mib.is_some(),
"process_rss_mib must be Some when PID is tracked"
);
assert!(
m.process_utime_secs.is_some(),
"process_utime_secs must be Some when PID is tracked"
);
assert!(
m.process_stime_secs.is_some(),
"process_stime_secs must be Some when PID is tracked"
);
assert!(
m.process_disk_read_bytes.is_some(),
"process_disk_read_bytes must be Some when PID is tracked"
);
assert!(
m.process_disk_write_bytes.is_some(),
"process_disk_write_bytes must be Some when PID is tracked"
);
}
// T-CPU-08: process tree memory (PSS and RSS) is positive for the running test process.
#[test]
fn test_process_tree_memory_nonzero_for_self() {
let pid = i32::try_from(std::process::id()).expect("PID too large");
let (pss, rss) = process_tree_memory_mib(&[pid]);
assert!(
pss > 0,
"PSS for the current process should be > 0, got {pss}"
);
assert!(
rss > 0,
"RSS for the current process should be > 0, got {rss}"
);
assert!(
pss <= rss,
"PSS ({pss}) should not exceed RSS ({rss}) for a single process"
);
}
// T-CPU-09: process_tree_ticks contains the root PID.
// PID 1 (init/systemd) is used because it is always present and readable
// on any Linux host. Using std::process::id() is unreliable under
// llvm-cov instrumentation: the instrumented binary's own /proc entry
// can be transiently unreadable when many tests run in parallel.
#[test]
fn test_process_tree_ticks_contains_root_pid() {
let ticks = process_tree_ticks(1);
assert!(
ticks.contains_key(&1),
"process_tree_ticks(1) must contain PID 1 (init/systemd is always present)"
);
}
// T-CPU-10: second collect() with PID tracking produces non-negative cores.
#[test]
fn test_second_collect_with_pid_nonneg_cores() {
let pid = i32::try_from(std::process::id()).expect("PID too large");
let mut collector = CpuCollector::new(Some(pid));
let _ = collector.collect().expect("first collect() failed");
let m = collector.collect().expect("second collect() failed");
let cores = m
.process_cores_used
.expect("process_cores_used must be Some");
assert!(
cores >= 0.0,
"process_cores_used must be >= 0.0, got {cores}"
);
}
// T-CPU-11: second collect() with no PID still returns None for all process fields.
#[test]
fn test_second_collect_no_pid_all_process_fields_none() {
let mut collector = CpuCollector::new(None);
let _ = collector.collect().expect("first collect() failed");
let m = collector.collect().expect("second collect() failed");
assert!(
m.process_cores_used.is_none(),
"process_cores_used must be None when not tracking"
);
assert!(
m.process_child_count.is_none(),
"process_child_count must be None when not tracking"
);
assert!(
m.process_pss_mib.is_none(),
"process_pss_mib must be None when not tracking"
);
assert!(
m.process_rss_mib.is_none(),
"process_rss_mib must be None when not tracking"
);
assert!(
m.process_utime_secs.is_none(),
"process_utime_secs must be None when not tracking"
);
assert!(
m.process_stime_secs.is_none(),
"process_stime_secs must be None when not tracking"
);
assert!(
m.process_disk_read_bytes.is_none(),
"process_disk_read_bytes must be None when not tracking"
);
assert!(
m.process_disk_write_bytes.is_none(),
"process_disk_write_bytes must be None when not tracking"
);
}
// T-CPU-12: process_count > 0 (at least one process is always visible).
#[test]
fn test_process_count_positive() {
let mut collector = CpuCollector::new(None);
let m = collector.collect().expect("collect() failed");
assert!(
m.process_count > 0,
"process_count must be > 0, got {}",
m.process_count
);
}
// -----------------------------------------------------------------------
// Issue #20 regression tests: process CPU must never exceed system CPU
// -----------------------------------------------------------------------
// T-CPU-13: cutime correction formula -- direct arithmetic verification.
//
// A child with 500 pre-snapshot user ticks exits between samples and is
// reaped by its parent. The parent's cutime delta therefore covers the
// child's full 2500-tick lifetime. The raw delta overcounts by 500 (the
// pre-snapshot portion already counted via the child's prev entry).
// The correction must subtract exactly those 500 ticks.
#[test]
fn test_cutime_correction_cancels_exited_child_ticks() {
let prev: HashMap<i32, (u64, u64)> = [
(200, (50, 0)), // parent: 50 own ticks at warm-up
(100, (500, 0)), // child: 500 ticks at warm-up
]
.iter()
.cloned()
.collect();
// Between samples: child accumulates 2000 more ticks then exits.
// Parent's cutime = child's full lifetime = 500 + 2000 = 2500.
// Parent runs 250 own ticks.
let curr: HashMap<i32, (u64, u64)> =
[(200, (50 + 250 + 2500, 0))].iter().cloned().collect();
let raw: u64 = curr
.iter()
.map(|(pid, &(cu, cs))| {
let (pu, ps) = prev.get(pid).copied().unwrap_or((cu, cs));
cu.saturating_sub(pu) + cs.saturating_sub(ps)
})
.sum();
assert_eq!(
raw, 2750,
"raw delta must include the double-counted pre-snapshot child ticks"
);
let exited: u64 = prev
.iter()
.filter(|(pid, _)| !curr.contains_key(pid))
.map(|(_, &(pu, ps))| pu + ps)
.sum();
assert_eq!(
exited, 500,
"exited ticks must equal the child's pre-snapshot tick count"
);
let corrected = raw.saturating_sub(exited);
// Correct answer: parent own delta (250) + child post-snapshot delta (2000) = 2250.
assert_eq!(
corrected, 2250,
"corrected delta must exclude the child's pre-snapshot ticks"
);
}
// T-CPU-14: cutime correction handles cascaded exits.
//
// Both a child and grandchild exit between samples. Root's cutime ends up
// containing the full lifetimes of both. Subtracting all exited PIDs'
// pre-snapshot ticks must leave only the ticks actually earned in the
// interval regardless of exit depth.
#[test]
fn test_cutime_correction_handles_cascaded_exits() {
let prev: HashMap<i32, (u64, u64)> = [
(7, (0, 0)), // root: no prior ticks
(8, (100, 0)), // child: 100 pre-snapshot ticks
(9, (200, 0)), // grandchild: 200 pre-snapshot ticks
]
.iter()
.cloned()
.collect();
// Grandchild earns 50 ticks and exits; reaped by child.
// child cutime → 200 + 50 = 250.
// Child earns 50 own ticks then exits; reaped by root.
// child lifetime = 100 + 50 + 250 = 400.
// root cutime → 400.
// Root earns 30 own ticks.
let curr: HashMap<i32, (u64, u64)> = [(7, (30 + 400, 0))].iter().cloned().collect();
let raw: u64 = curr
.iter()
.map(|(pid, &(cu, cs))| {
let (pu, ps) = prev.get(pid).copied().unwrap_or((cu, cs));
cu.saturating_sub(pu) + cs.saturating_sub(ps)
})
.sum();
// raw = 430; overcounts by child_prev (100) + grandchild_prev (200) = 300.
assert_eq!(raw, 430);
let exited: u64 = prev
.iter()
.filter(|(pid, _)| !curr.contains_key(pid))
.map(|(_, &(pu, ps))| pu + ps)
.sum();
assert_eq!(
exited, 300,
"exited = child pre-snap (100) + grandchild pre-snap (200)"
);
let corrected = raw.saturating_sub(exited);
// Correct: root own (30) + child own delta (50) + grandchild own delta (50) = 130.
assert_eq!(corrected, 130);
}
// T-CPU-15: process CPU must not exceed system CPU when a long-running
// child exits between two measurement snapshots.
//
// On busy servers the tracked process often has long-standing children
// that accumulate significant CPU ticks over many intervals. When such a
// child exits between the warm-up and the real sample, its entire lifetime
// rolls into the parent's cutime delta. Without the double-counting
// correction those pre-snapshot ticks are counted a second time, pushing
// the process metric above the system metric.
//
// We compare absolute CPU seconds (process_utime_secs + process_stime_secs
// vs utime_secs + stime_secs) rather than fractional cores because both
// quantities share the same tps divisor and kernel tick accounting.
// fractional-cores comparison divides by wall-clock elapsed, which makes
// the ratio unstable when the measurement window is very short (a fixed
// iteration burn finishes in microseconds on fast CPUs, leaving
// elapsed << TOCTOU gap and inflating process_cores_used).
#[test]
fn test_process_cores_used_does_not_exceed_system_utilization() {
let pid = i32::try_from(std::process::id()).expect("PID too large");
let mut collector = CpuCollector::new(Some(pid));
// Spawn a CPU-busy child to simulate a long-running process on a
// busy server. A shell busy-loop accumulates real utime ticks.
let mut child = std::process::Command::new("sh")
.args(["-c", "while true; do :; done"])
.spawn()
.expect("failed to spawn sh busy-loop -- required for T-CPU-15");
// Let the child accumulate pre-snapshot CPU ticks for 200 ms.
// At 100 HZ that yields ~20 ticks = ~0.2 s that would be double-counted
// without the cutime correction.
std::thread::sleep(std::time::Duration::from_millis(200));
// Warm-up: child is alive with ~200 ms of accumulated CPU ticks.
let _ = collector.collect().expect("warm-up collect failed");
// Kill the child immediately after warm-up. Its full lifetime ticks
// (including the ~0.2 s pre-snapshot portion) roll into parent's cutime
// delta in the next collect(). Without the correction those pre-snapshot
// ticks are double-counted, inflating proc_cpu well above sys_cpu.
child.kill().ok();
child.wait().ok();
let m = collector.collect().expect("second collect failed");
let proc_utime = m
.process_utime_secs
.expect("process_utime_secs must be Some");
let proc_stime = m
.process_stime_secs
.expect("process_stime_secs must be Some");
let proc_cpu = proc_utime + proc_stime;
let sys_cpu = m.utime_secs + m.stime_secs;
// 15 % relative + 50 ms absolute tolerance for the TOCTOU gap between
// /proc/PID/stat and /proc/stat reads. Without the cutime correction,
// proc_cpu would be inflated by ~0.2 s (pre-snapshot child ticks),
// which far exceeds this tolerance and makes the assertion fail.
let tolerance = sys_cpu * 0.15 + 0.05;
assert!(
proc_cpu <= sys_cpu + tolerance,
"process CPU ({proc_cpu:.3}s = {proc_utime:.3}s utime + {proc_stime:.3}s stime) \
must not exceed system CPU ({sys_cpu:.3}s) -- cutime double-counting regression \
for issue #20"
);
}
// T-CPU-16: process_utime_secs must not exceed system utime_secs after a
// child process exits between the warm-up and the real sample.
//
// This directly exercises the cutime double-counting bug from issue #20:
// without the correction, the child's pre-snapshot ticks are counted twice
// (once via the child's prev entry, once via the parent's cutime delta),
// pushing process_utime_secs above utime_secs on an otherwise idle system.
#[test]
fn test_process_utime_no_double_count_after_child_exits() {
let pid = i32::try_from(std::process::id()).expect("PID too large");
let mut collector = CpuCollector::new(Some(pid));
// Spawn a child that burns a little CPU then exits naturally.
// `sh` must be available on any Linux host used for testing.
let mut child = std::process::Command::new("sh")
.args(["-c", "for i in $(seq 1 20000); do :; done"])
.spawn()
.expect("failed to spawn sh -- required for T-CPU-16");
// Let the child accumulate real ticks before the warm-up snapshot so
// there is a meaningful pre-snapshot tick count to double-count.
std::thread::sleep(std::time::Duration::from_millis(20));
// Warm-up: child is alive; its ticks are stored in prev_proc_ticks.
let _ = collector.collect().expect("warm-up collect failed");
// Reap the child. Its full-lifetime ticks roll into parent's cutime.
let _ = child.wait().expect("failed to wait for child");
// Real collect: child is absent from curr_proc_ticks but parent's
// cutime has grown by the child's entire lifetime. Without the
// correction the overcounting would inflate process_utime_secs.
let m = collector.collect().expect("second collect failed");
let proc_utime = m
.process_utime_secs
.expect("process_utime_secs must be Some when a PID is tracked");
let sys_utime = m.utime_secs;
// Allow 5% relative + 50 ms absolute tolerance for /proc timing jitter.
let tolerance = sys_utime * 0.05 + 0.05;
assert!(
proc_utime <= sys_utime + tolerance,
"process_utime_secs ({proc_utime:.3}s) exceeds system utime_secs ({sys_utime:.3}s) -- \
cutime double-counting regression (issue #20)"
);
}
// T-CPU-17: multi-interval accumulation -- child tracked across two snapshots
// before exiting.
//
// This is the scenario shown in examples/repro_cpu_cutime_spike.rs: a child
// burns CPU across several measurement intervals, then exits in the final one.
// The cutime delta for that final interval equals the child's ENTIRE lifetime,
// not just the ticks accumulated since the previous snapshot.
//
// The correction must use the MOST RECENT prev_proc_ticks (updated after the
// intermediate collect), not the original warm-up ticks. If self.prev were
// not updated between intervals, exited_utime would be too small and the
// overcounting would not be fully cancelled.
//
// Without the correction: proc_cpu ≈ child's lifetime at intermediate snapshot
// >> sys_cpu for that short final window.
// With the correction: proc_cpu ≈ only post-intermediate child ticks ≈ 0.
#[test]
fn test_cutime_correction_multi_interval_child_exit() {
let pid = i32::try_from(std::process::id()).expect("PID too large");
let mut collector = CpuCollector::new(Some(pid));
// Spawn a CPU-busy child that accumulates real utime ticks.
let mut child = std::process::Command::new("sh")
.args(["-c", "while true; do :; done"])
.spawn()
.expect("failed to spawn sh busy-loop -- required for T-CPU-17");
// Interval 1 warm-up: child is alive with some initial ticks.
std::thread::sleep(std::time::Duration::from_millis(100));
let _ = collector.collect().expect("warm-up collect failed");
// Interval 2: child continues burning CPU. self.prev is updated so the
// next correction baseline is the child's tick count at this point.
std::thread::sleep(std::time::Duration::from_millis(100));
let _ = collector.collect().expect("intermediate collect failed");
// Interval 3 (final): kill child immediately so its full lifetime since
// interval 2 rolls into parent's cutime. The correction must subtract
// the interval-2 tick count (not the warm-up tick count).
child.kill().ok();
child.wait().ok();
let m = collector.collect().expect("final collect failed");
let proc_utime = m
.process_utime_secs
.expect("process_utime_secs must be Some");
let proc_stime = m
.process_stime_secs
.expect("process_stime_secs must be Some");
let proc_cpu = proc_utime + proc_stime;
let sys_cpu = m.utime_secs + m.stime_secs;
// 15 % relative + 50 ms absolute tolerance for TOCTOU jitter.
// Without the correction, proc_cpu would include ~200 ms of pre-snapshot
// child ticks, far exceeding sys_cpu for this short measurement window.
let tolerance = sys_cpu * 0.15 + 0.05;
assert!(
proc_cpu <= sys_cpu + tolerance,
"process CPU ({proc_cpu:.3}s = {proc_utime:.3}s utime + {proc_stime:.3}s stime) \
must not exceed system CPU ({sys_cpu:.3}s) across multiple intervals -- \
cutime multi-interval regression for issue #20"
);
}
// T-CPU-18: PSS (via smaps_rollup) correctly tracks a file-backed mapping.
//
// This is the regression test for the fix shown in
// examples/repro_memory_rss_vs_used.rs. The old VmRSS approach overcounted
// shared pages: when N processes map the same file each contributes its full
// mapping size to the VmRSS sum, but PSS via /proc/pid/smaps_rollup
// attributes only each process's proportional share.
//
// For a sole mapper with MAP_PRIVATE and all pages touched:
// - RSS increases by >= mapping_mib (all pages in physical RAM)
// - PSS increases by >= mapping_mib (sole mapper gets full proportional share)
// - PSS <= RSS (PSS never over-reports)
// - |PSS_delta - RSS_delta| <= 1 MiB (sole-mapper PSS == RSS for the region)
//
// The last invariant is the regression guard: if PSS were broken (zero or
// reading the wrong field) the delta would diverge from the RSS delta even
// though PSS <= RSS holds trivially for zero.
//
// The multi-process case (N workers sharing the same file, causing
// tree_pss << tree_rss) is demonstrated in examples/repro_memory_rss_vs_used.rs.
#[test]
fn test_pss_tracks_file_backed_mapping() {
use std::fs;
use std::io::Write as _;
use std::os::unix::io::AsRawFd;
const MAPPING_MIB: usize = 4;
const MAPPING_SIZE: usize = MAPPING_MIB * 1024 * 1024;
let pid = i32::try_from(std::process::id()).expect("PID too large");
let path = format!("/tmp/rt_test_pss_{}", std::process::id());
let (pss_before, rss_before) = process_tree_memory_mib(&[pid]);
// Write a temp file that this process will map read-only.
{
let mut f = fs::File::create(&path).expect("cannot create temp file for T-CPU-18");
let chunk = vec![0xABu8; 64 * 1024];
for _ in 0..(MAPPING_SIZE / chunk.len()) {
f.write_all(&chunk).expect("write failed");
}
}
let file = fs::File::open(&path).expect("cannot open temp file for T-CPU-18");
let ptr = unsafe {
libc::mmap(
std::ptr::null_mut(),
MAPPING_SIZE,
libc::PROT_READ,
libc::MAP_PRIVATE,
file.as_raw_fd(),
0,
)
};
assert_ne!(ptr, libc::MAP_FAILED, "mmap failed in T-CPU-18");
// Touch every page to bring all pages into physical RAM (RSS and PSS).
let slice = unsafe { std::slice::from_raw_parts(ptr as *const u8, MAPPING_SIZE) };
let mut checksum = 0u64;
for offset in (0..MAPPING_SIZE).step_by(4096) {
checksum = checksum.wrapping_add(u64::from(slice[offset]));
}
let _ = checksum;
let (pss_after, rss_after) = process_tree_memory_mib(&[pid]);
// Clean up before asserting so a failure does not leak resources.
unsafe { libc::munmap(ptr, MAPPING_SIZE) };
fs::remove_file(&path).ok();
let pss_delta = pss_after.saturating_sub(pss_before);
let rss_delta = rss_after.saturating_sub(rss_before);
assert!(
rss_delta >= MAPPING_MIB as u64,
"RSS must increase by >= {MAPPING_MIB} MiB after touching the mapping: \
before={rss_before} MiB, after={rss_after} MiB (delta={rss_delta} MiB)"
);
assert!(
pss_delta >= MAPPING_MIB as u64,
"PSS must increase by >= {MAPPING_MIB} MiB as sole mapper of the file: \
before={pss_before} MiB, after={pss_after} MiB (delta={pss_delta} MiB)"
);
assert!(
pss_after <= rss_after,
"PSS ({pss_after} MiB) must not exceed RSS ({rss_after} MiB)"
);
// For the sole mapper the PSS delta and RSS delta must agree within 1 MiB.
// A regression that breaks smaps_rollup reading (e.g. returning 0 for PSS)
// would leave pss_delta == 0 while rss_delta >= MAPPING_MIB.
let skew = pss_delta.abs_diff(rss_delta);
assert!(
skew <= 1,
"PSS delta ({pss_delta} MiB) and RSS delta ({rss_delta} MiB) must agree within \
1 MiB for a sole mapper -- larger skew indicates smaps_rollup is not being read"
);
}
}