1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
//! This module extracts just enough info about running processes for emlop's usecase.
//!
//! It would have been natural to use the procinfo crate, but it fails parsing kernel processes, it
//! somehow yields start_times that can be off by a few seconds compared to this implem and the ps
//! program, and it probably parses a bit more than we need. So this is a handmade Linux-only
//! implementaion (does procinfo crate work on BSDs ?), but it's unit-tested against ps and should
//! be fast.
use crate::*;
use anyhow::{ensure, Context};
use std::{fs::{read_dir, DirEntry, File},
io::prelude::*,
path::PathBuf};
#[derive(Debug)]
pub struct Proc {
pub idx: usize,
pub cmdline: String,
pub start: i64,
pub pid: i32,
}
impl std::fmt::Display for Proc {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let pid = format!("Pid {}: ", self.pid);
let capacity = f.precision().unwrap_or(45).saturating_sub(pid.len());
let cmdlen = self.cmdline.len();
if capacity >= cmdlen || cmdlen < 4 {
write!(f, "{pid}{}", &self.cmdline)
} else if capacity > 3 {
write!(f, "{pid}...{}", &self.cmdline[(cmdlen - capacity + 3)..])
} else {
write!(f, "{pid}...")
}
}
}
/// Get command name, arguments, start time, and pid for one process.
fn get_proc_info(filter: &[&str],
entry: &DirEntry,
clocktick: i64,
time_ref: i64,
tmpdirs: &mut Vec<PathBuf>)
-> Option<Proc> {
// Parse pid.
// At this stage we expect `entry` to not always correspond to a process.
let pid = i32::from_str(&entry.file_name().to_string_lossy()).ok()?;
// See linux/Documentation/filesystems/proc.txt Table 1-4: Contents of the stat files.
let mut stat = String::new();
File::open(entry.path().join("stat")).ok()?.read_to_string(&mut stat).ok()?;
// Parse command name, bail out now if it doesn't match.
// The command name is surrounded by parens and may contain spaces.
let (cmd_start, cmd_end) = (stat.find('(')? + 1, stat.rfind(')')?);
let idx = if filter.is_empty() {
usize::MAX
} else {
filter.iter().position(|&f| stat[cmd_start..cmd_end].starts_with(f))?
};
// Parse start time
let start_time = i64::from_str(stat[cmd_end + 1..].split(' ').nth(20)?).ok()?;
// Parse arguments
let mut cmdline = String::new();
File::open(entry.path().join("cmdline")).ok()?.read_to_string(&mut cmdline).ok()?;
cmdline = cmdline.replace('\0', " ").trim().into();
// Find portage tmpdir
extend_tmpdirs(entry.path(), tmpdirs);
// Done
Some(Proc { idx, cmdline, start: time_ref + start_time / clocktick, pid })
}
/// Find tmpdir by looking for "build.log" in the process fds, and add it to the provided vector.
fn extend_tmpdirs(proc: PathBuf, tmpdirs: &mut Vec<PathBuf>) {
if let Ok(entries) = read_dir(proc.join("fd")) {
let procstr = proc.to_string_lossy();
for d in entries.filter_map(|e| {
let p = e.ok()?.path().canonicalize().ok()?;
if p.file_name() != Some(std::ffi::OsStr::new("build.log")) {
return None;
}
let d = p.parent()?.parent()?.parent()?.parent()?.parent()?;
debug!("Tmpdir {} found in {}", d.to_string_lossy(), procstr);
Some(d.to_path_buf())
})
{
if !tmpdirs.contains(&d) {
// Insert at the front because it's a better candidate than cli/default tmpdir
tmpdirs.insert(0, d)
}
}
}
}
/// Get command name, arguments, start time, and pid for all processes.
pub fn get_all_info(filter: &[&str], tmpdirs: &mut Vec<PathBuf>) -> Vec<Proc> {
get_all_info_result(filter, tmpdirs).unwrap_or_else(|e| {
log_err(e);
vec![]
})
}
fn get_all_info_result(filter: &[&str], tmpdirs: &mut Vec<PathBuf>) -> Result<Vec<Proc>, Error> {
// clocktick and time_ref are needed to interpret stat.start_time. time_ref should correspond to
// the system boot time; not sure why it doesn't, but it's still usable as a reference.
// SAFETY: returns a system constant, only failure mode should be a zero/negative value
let clocktick: i64 = unsafe { libc::sysconf(libc::_SC_CLK_TCK) };
ensure!(clocktick > 0, "Failed getting system clock ticks");
let mut uptimestr = String::new();
File::open("/proc/uptime").context("Opening /proc/uptime")?
.read_to_string(&mut uptimestr)
.context("Reading /proc/uptime")?;
let uptime = i64::from_str(uptimestr.split('.').next().unwrap()).unwrap();
let time_ref = epoch_now() - uptime;
// Now iterate through /proc/<pid>
let mut ret: Vec<Proc> = Vec::new();
for entry in read_dir("/proc/").context("Listing /proc/")? {
if let Some(i) = get_proc_info(filter, &entry?, clocktick, time_ref, tmpdirs) {
ret.push(i)
}
}
Ok(ret)
}
#[cfg(test)]
mod tests {
use super::*;
use regex::Regex;
use std::{collections::BTreeMap, process::Command};
use time::{macros::format_description, PrimitiveDateTime};
fn parse_ps_time(s: &str) -> i64 {
let fmt = format_description!("[month repr:short] [day padding:space] [hour]:[minute]:[second] [year]");
PrimitiveDateTime::parse(s, &fmt).expect(&format!("Cannot parse {}", s))
.assume_utc() // We run ps with TZ=UTC
.unix_timestamp()
}
/// Check that our impl get similar results as `ps`
///
/// Ignored by default: False negatives on very busy systems
#[test]
#[ignore]
#[rustfmt::skip]
fn start_time() {
// First get the system's process start times using our implementation
// Store it as pid => (cmd, rust_time, ps_time)
let mut tmpdirs = vec![];
let mut info = get_all_info(&[], &mut tmpdirs)
.iter()
.map(|i| (i.pid, (i.cmdline.clone(), Some(i.start), None)))
.collect::<BTreeMap<i32, (String, Option<i64>, Option<i64>)>>();
// Then get them using the ps implementation (merging them into the same data structure)
let ps_start = epoch_now();
let cmd = Command::new("ps").env("TZ", "UTC")
.env("LC_ALL", "C") // Use a consistent format for datetimes
.args(&["-o",
"pid,lstart", // Output pid and start time
"-ax", // All processes including those "not associated with a terminal"
"--no-header"]) // No headers
.output()
.expect("failed to execute ps");
let re = Regex::new("^ *([0-9]+) [A-Za-z]+ ([a-zA-Z0-9: ]+)$").unwrap();
for lineres in cmd.stdout.lines() {
if let Ok(line) = lineres {
match re.captures(&line) {
Some(c) => {
let pid = c.get(1).unwrap().as_str().parse::<i32>().unwrap();
let time = parse_ps_time(c.get(2).unwrap().as_str());
info.entry(pid)
.and_modify(|t| t.2 = Some(time))
.or_insert(("?".into(), None, Some(time)));
},
None => assert!(false, "Couldn't parse {}", line),
}
}
}
// Check the results. For debugging purposes it's usefull to print everything and only
// assert at the end. Also some cases are considered soft errors and only make the whole
// test fail if they happen a lot.
assert!(info.len() > 5, "Only {} processes found", info.len());
let mut e: u32 = 0;
for (pid, times) in info {
e += match times {
(c, Some(t), None) => {println!("WARN {pid} {} disappeared after rust run\t{c}", fmt_utctime(t)); 1},
(c, None, Some(t)) if t >= ps_start -1 => {println!("WARN {pid} {} appeared right after rust run\t{c}", fmt_utctime(t)); 1},
(c, None, Some(t)) => {println!("ERR {pid} {} seen by ps but not by rust\t{c}", fmt_utctime(t)); 10},
(c, Some(tr), Some(tp)) if (tr-tp).abs() < 2 => {println!("OK {pid} {} {} secs diff\t{c}", fmt_utctime(tr), tr-tp); 0},
(c, Some(tr), Some(tp)) if (tr-tp).abs() < 5 => {println!("WARN {pid} {} {} secs diff\t{c}", fmt_utctime(tr), tr-tp); 1},
(c, Some(tr), Some(tp)) => {println!("ERR {pid} {} {} secs diff\t{c}", fmt_utctime(tr), tr-tp); 5},
(c, None, None) => {println!("ERR {pid}: no times\t{c}"); 10},
}
}
assert!(e < 10, "Got failure score of {e}");
}
#[test]
fn format_info() {
let s = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
let t: Vec<(i32, usize, usize, &str)> = vec![// Precison is way too small, use elipsis starting at 4 chars
(1, 1, 1, "Pid 1: a"),
(1, 2, 1, "Pid 1: ab"),
(2, 3, 1, "Pid 2: abc"),
(3, 4, 1, "Pid 3: ..."),
(4, 5, 1, "Pid 4: ..."),
(330, 1, 1, "Pid 330: a"),
(331, 2, 1, "Pid 331: ab"),
(332, 3, 1, "Pid 332: abc"),
(333, 4, 1, "Pid 333: ..."),
(334, 5, 1, "Pid 334: ..."),
// Here we have enough space
(1, 1, 12, "Pid 1: a"),
(1, 2, 12, "Pid 1: ab"),
(1, 3, 12, "Pid 1: abc"),
(1, 4, 12, "Pid 1: abcd"),
(1, 5, 12, "Pid 1: abcde"),
(12, 4, 12, "Pid 12: abcd"),
(123, 3, 12, "Pid 123: abc"),
(1234, 2, 12, "Pid 1234: ab"),
// Running out of space again, but we can display part of it
(1, 6, 12, "Pid 1: ...ef"),
(1, 7, 12, "Pid 1: ...fg"),
(1, 8, 12, "Pid 1: ...gh"),
(22, 9, 12, "Pid 22: ...i"),];
for (pid, cmdlen, precision, out) in t.into_iter() {
dbg!((pid, cmdlen, precision, out));
let i = Proc { idx: usize::MAX, pid, cmdline: s[..cmdlen].to_string(), start: 0 };
let f = format!("{1:.0$}", precision, i);
assert!(precision < 10 || f.len() <= precision, "{} <= {}", f.len(), precision);
assert_eq!(f, out);
}
}
}