tracexec_core/
proc.rs

1//! This module provides utilities about processing process information(e.g. comm, argv, envp).
2
3use core::fmt;
4use std::{
5  borrow::Cow,
6  collections::{BTreeMap, BTreeSet, HashSet},
7  ffi::CString,
8  fmt::{Display, Formatter},
9  fs,
10  io::{self, BufRead, BufReader, Read},
11  os::raw::c_int,
12  path::{Path, PathBuf},
13  sync::{Arc, LazyLock, RwLock},
14};
15
16use crate::cache::ArcStr;
17use filedescriptor::AsRawFileDescriptor;
18use owo_colors::OwoColorize;
19
20use nix::{
21  fcntl::OFlag,
22  libc::{AT_FDCWD, gid_t},
23  unistd::{Pid, getpid},
24};
25use serde::{Serialize, Serializer, ser::SerializeSeq};
26use snafu::Snafu;
27use tracing::warn;
28
29use crate::{cache::StringCache, event::OutputMsg, pty::UnixSlavePty};
30
31#[allow(unused)]
32pub fn read_argv(pid: Pid) -> color_eyre::Result<Vec<CString>> {
33  let filename = format!("/proc/{pid}/cmdline");
34  let buf = std::fs::read(filename)?;
35  Ok(
36    buf
37      .split(|&c| c == 0)
38      .map(CString::new)
39      .collect::<Result<Vec<_>, _>>()?,
40  )
41}
42
43pub fn read_comm(pid: Pid) -> color_eyre::Result<ArcStr> {
44  let filename = format!("/proc/{pid}/comm");
45  let mut buf = std::fs::read(filename)?;
46  buf.pop(); // remove trailing newline
47  let utf8 = String::from_utf8_lossy(&buf);
48  let mut cache = CACHE.write().unwrap();
49  Ok(cache.get_or_insert(&utf8))
50}
51
52pub fn read_cwd(pid: Pid) -> std::io::Result<ArcStr> {
53  let filename = format!("/proc/{pid}/cwd");
54  let buf = std::fs::read_link(filename)?;
55  Ok(cached_str(&buf.to_string_lossy()))
56}
57
58pub fn read_exe(pid: Pid) -> std::io::Result<ArcStr> {
59  let filename = format!("/proc/{pid}/exe");
60  let buf = std::fs::read_link(filename)?;
61  Ok(cached_str(&buf.to_string_lossy()))
62}
63
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct ProcStatus {
66  pub cred: Cred,
67}
68
69#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
70pub struct Cred {
71  pub groups: Vec<gid_t>,
72  pub uid_real: u32,
73  pub uid_effective: u32,
74  pub uid_saved_set: u32,
75  pub uid_fs: u32,
76  pub gid_real: u32,
77  pub gid_effective: u32,
78  pub gid_saved_set: u32,
79  pub gid_fs: u32,
80}
81
82#[derive(Debug, Clone, PartialEq, Eq, Snafu)]
83pub enum CredInspectError {
84  #[snafu(display("Failed to read credential info: {kind}"))]
85  Io { kind: std::io::ErrorKind },
86  #[snafu(display("Failed to inspect credential info from kernel"))]
87  Inspect,
88}
89
90pub fn read_status(pid: Pid) -> std::io::Result<ProcStatus> {
91  let filename = format!("/proc/{pid}/status");
92  let contents = fs::read_to_string(filename)?;
93  parse_status_contents(&contents)
94}
95
96fn parse_status_contents(contents: &str) -> std::io::Result<ProcStatus> {
97  let mut uid = None;
98  let mut gid = None;
99  let mut groups = None;
100
101  fn parse_ids(s: &str) -> std::io::Result<[u32; 4]> {
102    let mut iter = s.trim_ascii().split_ascii_whitespace().take(4).map(|v| {
103      v.parse()
104        .map_err(|_| std::io::Error::new(io::ErrorKind::InvalidData, "non numeric uid/gid"))
105    });
106    Ok([
107      iter
108        .next()
109        .transpose()?
110        .ok_or_else(|| std::io::Error::new(io::ErrorKind::InvalidData, "not enough uid/gid(s)"))?,
111      iter
112        .next()
113        .transpose()?
114        .ok_or_else(|| std::io::Error::new(io::ErrorKind::InvalidData, "not enough uid/gid(s)"))?,
115      iter
116        .next()
117        .transpose()?
118        .ok_or_else(|| std::io::Error::new(io::ErrorKind::InvalidData, "not enough uid/gid(s)"))?,
119      iter
120        .next()
121        .transpose()?
122        .ok_or_else(|| std::io::Error::new(io::ErrorKind::InvalidData, "not enough uid/gid(s)"))?,
123    ])
124  }
125
126  for line in contents.lines() {
127    if let Some(rest) = line.strip_prefix("Uid:") {
128      uid = Some(parse_ids(rest)?);
129    } else if let Some(rest) = line.strip_prefix("Gid:") {
130      gid = Some(parse_ids(rest)?);
131    } else if let Some(rest) = line.strip_prefix("Groups:") {
132      let r: Result<Vec<_>, _> = rest
133        .trim_ascii()
134        .split_ascii_whitespace()
135        .map(|v| {
136          v.parse()
137            .map_err(|_| std::io::Error::new(io::ErrorKind::InvalidData, "non numeric group id"))
138        })
139        .collect();
140      groups = Some(r?);
141    }
142
143    if uid.is_some() && gid.is_some() && groups.is_some() {
144      break;
145    }
146  }
147
148  let Some([uid_real, uid_effective, uid_saved_set, uid_fs]) = uid else {
149    return Err(std::io::Error::new(
150      io::ErrorKind::InvalidData,
151      "status output does not contain uids",
152    ));
153  };
154  let Some([gid_real, gid_effective, gid_saved_set, gid_fs]) = gid else {
155    return Err(std::io::Error::new(
156      io::ErrorKind::InvalidData,
157      "status output does not contain gids",
158    ));
159  };
160  let Some(groups) = groups else {
161    return Err(std::io::Error::new(
162      io::ErrorKind::InvalidData,
163      "status output does not contain groups",
164    ));
165  };
166
167  Ok(ProcStatus {
168    cred: Cred {
169      groups,
170      uid_real,
171      uid_effective,
172      uid_saved_set,
173      uid_fs,
174      gid_real,
175      gid_effective,
176      gid_saved_set,
177      gid_fs,
178    },
179  })
180}
181
182#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
183pub struct FileDescriptorInfoCollection {
184  #[serde(flatten)]
185  pub fdinfo: BTreeMap<c_int, FileDescriptorInfo>,
186}
187
188impl FileDescriptorInfoCollection {
189  pub fn stdin(&self) -> Option<&FileDescriptorInfo> {
190    self.fdinfo.get(&0)
191  }
192
193  pub fn stdout(&self) -> Option<&FileDescriptorInfo> {
194    self.fdinfo.get(&1)
195  }
196
197  pub fn stderr(&self) -> Option<&FileDescriptorInfo> {
198    self.fdinfo.get(&2)
199  }
200
201  pub fn get(&self, fd: c_int) -> Option<&FileDescriptorInfo> {
202    self.fdinfo.get(&fd)
203  }
204
205  pub fn new_baseline() -> color_eyre::Result<Self> {
206    let mut fdinfo = BTreeMap::new();
207    let pid = getpid();
208    fdinfo.insert(0, read_fdinfo(pid, 0)?);
209    fdinfo.insert(1, read_fdinfo(pid, 1)?);
210    fdinfo.insert(2, read_fdinfo(pid, 2)?);
211
212    Ok(Self { fdinfo })
213  }
214
215  pub fn with_pts(pts: &UnixSlavePty) -> color_eyre::Result<Self> {
216    let mut result = Self::default();
217    let ptyfd = &pts.fd;
218    let raw_fd = ptyfd.as_raw_file_descriptor();
219    let mut info = read_fdinfo(getpid(), raw_fd)?;
220    for fd in 0..3 {
221      info.fd = fd;
222      result.fdinfo.insert(fd, read_fdinfo(getpid(), raw_fd)?);
223    }
224    Ok(result)
225  }
226}
227
228#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
229pub struct FileDescriptorInfo {
230  pub fd: c_int,
231  pub path: OutputMsg,
232  pub pos: usize,
233  #[serde(serialize_with = "serialize_oflags")]
234  pub flags: OFlag,
235  pub mnt_id: c_int,
236  pub ino: u64,
237  pub mnt: ArcStr,
238  pub extra: Vec<ArcStr>,
239}
240
241impl FileDescriptorInfo {
242  pub fn not_same_file_as(&self, other: &Self) -> bool {
243    !self.same_file_as(other)
244  }
245
246  pub fn same_file_as(&self, other: &Self) -> bool {
247    self.ino == other.ino && self.mnt_id == other.mnt_id
248  }
249}
250
251fn serialize_oflags<S>(oflag: &OFlag, serializer: S) -> Result<S::Ok, S::Error>
252where
253  S: Serializer,
254{
255  let mut seq = serializer.serialize_seq(None)?;
256  let mut flag_display = String::with_capacity(16);
257  for f in oflag.iter() {
258    flag_display.clear();
259    bitflags::parser::to_writer(&f, &mut flag_display).unwrap();
260    seq.serialize_element(&flag_display)?;
261  }
262  seq.end()
263}
264
265impl Default for FileDescriptorInfo {
266  fn default() -> Self {
267    Self {
268      fd: Default::default(),
269      path: OutputMsg::Ok("".into()),
270      pos: Default::default(),
271      flags: OFlag::empty(),
272      mnt_id: Default::default(),
273      ino: Default::default(),
274      mnt: Default::default(),
275      extra: Default::default(),
276    }
277  }
278}
279
280pub fn read_fd(pid: Pid, fd: i32) -> std::io::Result<ArcStr> {
281  if fd == AT_FDCWD {
282    return read_cwd(pid);
283  }
284  let filename = format!("/proc/{pid}/fd/{fd}");
285  Ok(cached_str(&std::fs::read_link(filename)?.to_string_lossy()))
286}
287
288/// Read /proc/{pid}/fdinfo/{fd} to get more information about the file descriptor.
289pub fn read_fdinfo(pid: Pid, fd: i32) -> color_eyre::Result<FileDescriptorInfo> {
290  let filename = format!("/proc/{pid}/fdinfo/{fd}");
291  let file = std::fs::File::open(filename)?;
292  let reader = BufReader::new(file);
293  let mut info = FileDescriptorInfo::default();
294  for line in reader.lines() {
295    let line = line?;
296    let mut parts = line.split_ascii_whitespace();
297    let key = parts.next().unwrap_or("");
298    let value = parts.next().unwrap_or("");
299    match key {
300      "pos:" => info.pos = value.parse()?,
301      "flags:" => info.flags = OFlag::from_bits_truncate(c_int::from_str_radix(value, 8)?),
302      "mnt_id:" => info.mnt_id = value.parse()?,
303      "ino:" => info.ino = value.parse()?,
304      _ => {
305        let mut cache = CACHE.write().unwrap();
306        let line = cache.get_or_insert_owned(line);
307        info.extra.push(line)
308      }
309    }
310  }
311  info.mnt = get_mountinfo_by_mnt_id(pid, info.mnt_id)?;
312  info.path = read_fd(pid, fd).map(OutputMsg::Ok)?;
313  Ok(info)
314}
315
316pub fn read_fds(pid: Pid) -> color_eyre::Result<FileDescriptorInfoCollection> {
317  let mut collection = FileDescriptorInfoCollection::default();
318  let filename = format!("/proc/{pid}/fdinfo");
319  for entry in std::fs::read_dir(filename)? {
320    let entry = entry?;
321    let fd = entry.file_name().to_string_lossy().parse()?;
322    collection.fdinfo.insert(fd, read_fdinfo(pid, fd)?);
323  }
324  Ok(collection)
325}
326
327fn get_mountinfo_by_mnt_id(pid: Pid, mnt_id: c_int) -> color_eyre::Result<ArcStr> {
328  let filename = format!("/proc/{pid}/mountinfo");
329  let file = std::fs::File::open(filename)?;
330  let reader = BufReader::new(file);
331  for line in reader.lines() {
332    let line = line?;
333    let parts = line.split_once(' ');
334    if parts.map(|(mount_id, _)| mount_id.parse()) == Some(Ok(mnt_id)) {
335      let mut cache = CACHE.write().unwrap();
336      return Ok(cache.get_or_insert_owned(line));
337    }
338  }
339  let mut cache = CACHE.write().unwrap();
340  Ok(cache.get_or_insert("Not found. This is probably a pipe or something else."))
341}
342
343#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
344#[serde(tag = "what", content = "value", rename_all = "kebab-case")]
345pub enum Interpreter {
346  None,
347  Shebang(ArcStr),
348  ExecutableInaccessible,
349  Error(ArcStr),
350}
351
352impl Display for Interpreter {
353  fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
354    match self {
355      Self::None => write!(f, "{}", "none".bold()),
356      Self::Shebang(s) => write!(f, "{s:?}"),
357      Self::ExecutableInaccessible => {
358        write!(f, "{}", "executable inaccessible".red().bold())
359      }
360      Self::Error(e) => write!(f, "({}: {})", "err".red().bold(), e.red().bold()),
361    }
362  }
363}
364
365pub fn read_interpreter_recursive(exe: impl AsRef<Path>) -> Vec<Interpreter> {
366  let mut exe = Cow::Borrowed(exe.as_ref());
367  let mut interpreters = Vec::new();
368  loop {
369    match read_interpreter(exe.as_ref()) {
370      Interpreter::Shebang(shebang) => {
371        exe = Cow::Owned(PathBuf::from(
372          shebang.split_ascii_whitespace().next().unwrap_or(""),
373        ));
374        interpreters.push(Interpreter::Shebang(shebang));
375      }
376      Interpreter::None => break,
377      err => {
378        interpreters.push(err);
379        break;
380      }
381    };
382  }
383  interpreters
384}
385
386pub fn read_interpreter(exe: &Path) -> Interpreter {
387  fn err_to_interpreter(e: io::Error) -> Interpreter {
388    if e.kind() == io::ErrorKind::PermissionDenied || e.kind() == io::ErrorKind::NotFound {
389      Interpreter::ExecutableInaccessible
390    } else {
391      let mut cache = CACHE.write().unwrap();
392      let e = cache.get_or_insert_owned(e.to_string());
393      Interpreter::Error(e)
394    }
395  }
396  let file = match std::fs::File::open(exe) {
397    Ok(file) => file,
398    Err(e) => return err_to_interpreter(e),
399  };
400  let mut reader = BufReader::new(file);
401  // First, check if it's a shebang script
402  let mut buf = [0u8; 2];
403
404  if let Err(e) = reader.read_exact(&mut buf) {
405    let mut cache = CACHE.write().unwrap();
406    let e = cache.get_or_insert_owned(e.to_string());
407    return Interpreter::Error(e);
408  };
409  if &buf != b"#!" {
410    return Interpreter::None;
411  }
412  // Read the rest of the line
413  let mut buf = Vec::new();
414
415  if let Err(e) = reader.read_until(b'\n', &mut buf) {
416    let mut cache = CACHE.write().unwrap();
417    let e = cache.get_or_insert_owned(e.to_string());
418    return Interpreter::Error(e);
419  };
420  // Get trimmed shebang line [start, end) indices
421  // If the shebang line is empty, we don't care
422  let start = buf
423    .iter()
424    .position(|&c| !c.is_ascii_whitespace())
425    .unwrap_or(0);
426  let end = buf
427    .iter()
428    .rposition(|&c| !c.is_ascii_whitespace())
429    .map(|x| x + 1)
430    .unwrap_or(buf.len());
431  let shebang = String::from_utf8_lossy(&buf[start..end]);
432  let mut cache = CACHE.write().unwrap();
433  let shebang = cache.get_or_insert(&shebang);
434  Interpreter::Shebang(shebang)
435}
436
437pub fn parse_env_entry(item: &str) -> (&str, &str) {
438  // trace!("Parsing envp entry: {:?}", item);
439  let Some(mut sep_loc) = item.as_bytes().iter().position(|&x| x == b'=') else {
440    warn!(
441      "Invalid envp entry: {:?}, assuming value to empty string!",
442      item
443    );
444    return (item, "");
445  };
446  if sep_loc == 0 {
447    // Find the next equal sign
448    sep_loc = item
449      .as_bytes()
450      .iter()
451      .skip(1)
452      .position(|&x| x == b'=')
453      .unwrap_or_else(|| {
454        warn!(
455          "Invalid envp entry starting with '=': {:?}, assuming value to empty string!",
456          item
457        );
458        item.len()
459      });
460  }
461  let (head, tail) = item.split_at(sep_loc);
462  (head, &tail[1..])
463}
464
465pub fn parse_failiable_envp(envp: Vec<OutputMsg>) -> (BTreeMap<OutputMsg, OutputMsg>, bool) {
466  let mut has_dash_var = false;
467  (
468    envp
469      .into_iter()
470      .map(|entry| {
471        if let OutputMsg::Ok(s) | OutputMsg::PartialOk(s) = entry {
472          let (key, value) = parse_env_entry(&s);
473          let mut cache = CACHE.write().unwrap();
474          if key.starts_with('-') {
475            has_dash_var = true;
476          }
477          (
478            OutputMsg::Ok(cache.get_or_insert(key)),
479            OutputMsg::Ok(cache.get_or_insert(value)),
480          )
481        } else {
482          (entry.clone(), entry)
483        }
484      })
485      .collect(),
486    has_dash_var,
487  )
488}
489
490pub fn cached_str(s: &str) -> ArcStr {
491  let mut cache = CACHE.write().unwrap();
492  cache.get_or_insert(s)
493}
494
495pub fn cached_string(s: String) -> ArcStr {
496  let mut cache = CACHE.write().unwrap();
497  cache.get_or_insert_owned(s)
498}
499
500#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
501pub struct EnvDiff {
502  has_added_or_modified_keys_starting_with_dash: bool,
503  pub added: BTreeMap<OutputMsg, OutputMsg>,
504  pub removed: BTreeSet<OutputMsg>,
505  pub modified: BTreeMap<OutputMsg, OutputMsg>,
506}
507
508impl EnvDiff {
509  pub fn is_modified_or_removed(&self, key: &OutputMsg) -> bool {
510    self.modified.contains_key(key) || self.removed.contains(key)
511  }
512
513  /// Whether we need to use `--` to prevent argument injection
514  pub fn need_env_argument_separator(&self) -> bool {
515    self.has_added_or_modified_keys_starting_with_dash
516  }
517}
518
519pub fn diff_env(
520  original: &BTreeMap<OutputMsg, OutputMsg>,
521  envp: &BTreeMap<OutputMsg, OutputMsg>,
522) -> EnvDiff {
523  let mut added = BTreeMap::new();
524  let mut modified = BTreeMap::<OutputMsg, OutputMsg>::new();
525  // Use str to avoid cloning all env vars
526  let mut removed: HashSet<OutputMsg> = original.keys().cloned().collect();
527  let mut has_added_or_modified_keys_starting_with_dash = false;
528  for (key, value) in envp.iter() {
529    // Too bad that we still don't have if- and while-let-chains
530    // https://github.com/rust-lang/rust/issues/53667
531    if let Some(orig_v) = original.get(key) {
532      if orig_v != value {
533        modified.insert(key.clone(), value.clone());
534        if key.as_ref().starts_with('-') {
535          has_added_or_modified_keys_starting_with_dash = true;
536        }
537      }
538      removed.remove(key);
539    } else {
540      added.insert(key.clone(), value.clone());
541      if key.as_ref().starts_with('-') {
542        has_added_or_modified_keys_starting_with_dash = true;
543      }
544    }
545  }
546  EnvDiff {
547    has_added_or_modified_keys_starting_with_dash,
548    added,
549    removed: removed.into_iter().collect(),
550    modified,
551  }
552}
553
554#[derive(Debug, Clone, Serialize)]
555pub struct BaselineInfo {
556  pub cwd: OutputMsg,
557  pub env: BTreeMap<OutputMsg, OutputMsg>,
558  pub fdinfo: FileDescriptorInfoCollection,
559}
560
561impl BaselineInfo {
562  pub fn new() -> color_eyre::Result<Self> {
563    let cwd = cached_str(&std::env::current_dir()?.to_string_lossy()).into();
564    let env = std::env::vars()
565      .map(|(k, v)| {
566        let mut cache = CACHE.write().unwrap();
567        (
568          cache.get_or_insert_owned(k).into(),
569          cache.get_or_insert_owned(v).into(),
570        )
571      })
572      .collect();
573    let fdinfo = FileDescriptorInfoCollection::new_baseline()?;
574    Ok(Self { cwd, env, fdinfo })
575  }
576
577  pub fn with_pts(pts: &UnixSlavePty) -> color_eyre::Result<Self> {
578    let cwd = cached_str(&std::env::current_dir()?.to_string_lossy()).into();
579    let env = std::env::vars()
580      .map(|(k, v)| {
581        let mut cache = CACHE.write().unwrap();
582        (
583          cache.get_or_insert_owned(k).into(),
584          cache.get_or_insert_owned(v).into(),
585        )
586      })
587      .collect();
588    let fdinfo = FileDescriptorInfoCollection::with_pts(pts)?;
589    Ok(Self { cwd, env, fdinfo })
590  }
591}
592
593static CACHE: LazyLock<Arc<RwLock<StringCache>>> =
594  LazyLock::new(|| Arc::new(RwLock::new(StringCache::new())));
595
596#[cfg(test)]
597mod tests {
598  use super::*;
599
600  #[test]
601  fn test_parse_status_contents_valid() {
602    let sample = "\
603Name:\ttestproc
604State:\tR (running)
605Uid:\t1000\t1001\t1002\t1003
606Gid:\t2000\t2001\t2002\t2003
607Threads:\t1
608Groups:\t0\t1\t2
609";
610
611    let status = parse_status_contents(sample).unwrap();
612    assert_eq!(
613      status,
614      ProcStatus {
615        cred: Cred {
616          groups: vec![0, 1, 2],
617          uid_real: 1000,
618          uid_effective: 1001,
619          uid_saved_set: 1002,
620          uid_fs: 1003,
621          gid_real: 2000,
622          gid_effective: 2001,
623          gid_saved_set: 2002,
624          gid_fs: 2003,
625        }
626      }
627    );
628  }
629
630  #[test]
631  fn test_parse_status_contents_missing_gid() {
632    let sample = "Uid:\t1\t2\t3\t4\nGroups:\t0\n";
633    let e = parse_status_contents(sample).unwrap_err();
634    assert_eq!(e.kind(), std::io::ErrorKind::InvalidData);
635  }
636
637  #[test]
638  fn test_parse_status_contents_missing_groups() {
639    let sample = "Uid:\t1\t2\t3\t4\nGid:\t0\t1\t2\t3\n";
640    let e = parse_status_contents(sample).unwrap_err();
641    assert_eq!(e.kind(), std::io::ErrorKind::InvalidData);
642  }
643}