use anyhow::Result;
use regex::Regex;
use serde::Serialize;
use std::collections::HashMap;
use std::time::Instant;
use sysinfo::{ProcessRefreshKind, RefreshKind, System};
use crate::config::Config;
fn detect_container_environment() -> bool {
if std::path::Path::new("/.dockerenv").exists() {
return true;
}
if let Ok(cgroup) = std::fs::read_to_string("/proc/1/cgroup") {
if cgroup.contains("docker") || cgroup.contains("kubepods") || cgroup.contains("containerd")
{
return true;
}
}
if std::env::var("KUBERNETES_SERVICE_HOST").is_ok() {
return true;
}
false
}
#[derive(Debug, Clone, Serialize)]
pub struct OrphanProcess {
pub pid: u32,
pub name: String,
pub cmdline: String,
#[serde(skip)]
pub first_seen: Instant,
pub start_time: u64, }
#[derive(Debug, Serialize)]
pub struct ScanResult {
pub orphans: Vec<OrphanProcess>,
pub orphan_count: usize,
}
pub struct Scanner {
config: Config,
tracked: HashMap<u32, OrphanProcess>,
target_patterns: Vec<Regex>,
whitelist_patterns: Vec<Regex>,
}
impl Scanner {
pub fn new(config: Config) -> Result<Self> {
let target_patterns = config
.targets
.iter()
.map(|p| Regex::new(p))
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(|e| anyhow::anyhow!("Invalid target regex pattern in configuration: {e}"))?;
let whitelist_patterns = config
.whitelist
.iter()
.map(|p| Regex::new(p))
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(|e| {
anyhow::anyhow!("Invalid whitelist regex pattern in configuration: {e}")
})?;
if target_patterns.is_empty() {
tracing::warn!(
"No target patterns configured. Scanner will not detect any orphaned processes. \
Run 'proc-janitor config init' to set up target patterns."
);
}
if detect_container_environment() {
tracing::warn!(
"Container environment detected. All processes may appear as orphans (PPID=1). \
proc-janitor may not work correctly inside containers."
);
}
Ok(Self {
config,
tracked: HashMap::new(),
target_patterns,
whitelist_patterns,
})
}
pub fn scan(&mut self) -> Result<Vec<OrphanProcess>> {
let mut sys = System::new_with_specifics(
RefreshKind::new().with_processes(ProcessRefreshKind::everything()),
);
sys.refresh_processes(sysinfo::ProcessesToUpdate::All);
let now = Instant::now();
let mut current_orphans = Vec::new();
let mut current_pids = std::collections::HashSet::new();
let mut children_map: HashMap<u32, Vec<u32>> = HashMap::new();
for (pid, process) in sys.processes() {
let pid_u32 = pid.as_u32();
current_pids.insert(pid_u32);
if let Some(ppid) = process.parent() {
children_map.entry(ppid.as_u32()).or_default().push(pid_u32);
}
}
let mut orphan_tree_pids = std::collections::HashSet::new();
for (pid, process) in sys.processes() {
if !is_orphan(process) {
continue;
}
let cmdline = get_cmdline(process);
if cmdline.is_empty() {
continue;
}
if !self.matches_target(&cmdline) {
continue;
}
if self.is_whitelisted(&cmdline) {
continue;
}
let pid_u32 = pid.as_u32();
orphan_tree_pids.insert(pid_u32);
collect_descendants(pid_u32, &children_map, &mut orphan_tree_pids);
}
for (pid, process) in sys.processes() {
let pid_u32 = pid.as_u32();
if !orphan_tree_pids.contains(&pid_u32) {
continue;
}
let cmdline = get_cmdline(process);
if cmdline.is_empty() {
continue;
}
if !self.matches_target(&cmdline) {
continue;
}
if self.is_whitelisted(&cmdline) {
continue;
}
let orphan = self
.tracked
.entry(pid_u32)
.or_insert_with(|| OrphanProcess {
pid: pid_u32,
name: process.name().to_string_lossy().to_string(),
cmdline: cmdline.clone(),
first_seen: now,
start_time: process.start_time(),
});
let elapsed = now.duration_since(orphan.first_seen);
if elapsed.as_secs() >= self.config.grace_period {
current_orphans.push(orphan.clone());
}
}
self.tracked.retain(|pid, _| current_pids.contains(pid));
Ok(current_orphans)
}
fn matches_target(&self, cmdline: &str) -> bool {
self.target_patterns.iter().any(|re| re.is_match(cmdline))
}
fn is_whitelisted(&self, cmdline: &str) -> bool {
self.whitelist_patterns
.iter()
.any(|re| re.is_match(cmdline))
}
}
fn get_cmdline(process: &sysinfo::Process) -> String {
process
.cmd()
.iter()
.map(|s| s.to_string_lossy().to_string())
.collect::<Vec<String>>()
.join(" ")
}
fn collect_descendants(
pid: u32,
children_map: &HashMap<u32, Vec<u32>>,
result: &mut std::collections::HashSet<u32>,
) {
if let Some(children) = children_map.get(&pid) {
for &child in children {
if result.insert(child) {
collect_descendants(child, children_map, result);
}
}
}
}
fn is_orphan(process: &sysinfo::Process) -> bool {
process.parent().map(|p| p.as_u32()) == Some(1)
}
pub fn scan() -> Result<ScanResult> {
let mut config = Config::load()?;
config.grace_period = 0;
let mut scanner = Scanner::new(config)?;
scan_with_scanner(&mut scanner)
}
pub fn scan_with_scanner(scanner: &mut Scanner) -> Result<ScanResult> {
let orphans = scanner.scan()?;
let orphan_count = orphans.len();
Ok(ScanResult {
orphans,
orphan_count,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_container_detection_on_host() {
let result = detect_container_environment();
let _ = result;
}
#[test]
fn test_is_orphan_check() {
}
#[test]
fn test_scanner_new_with_empty_targets() {
let config = Config {
scan_interval: 5,
grace_period: 30,
sigterm_timeout: 5,
targets: vec![],
whitelist: vec![],
logging: crate::config::LoggingConfig {
enabled: false,
path: "/tmp/test".to_string(),
retention_days: 7,
},
};
let scanner = Scanner::new(config);
assert!(scanner.is_ok());
}
#[test]
fn test_scanner_new_with_invalid_regex() {
let config = Config {
scan_interval: 5,
grace_period: 30,
sigterm_timeout: 5,
targets: vec!["[invalid".to_string()],
whitelist: vec![],
logging: crate::config::LoggingConfig {
enabled: false,
path: "/tmp/test".to_string(),
retention_days: 7,
},
};
let scanner = Scanner::new(config);
assert!(scanner.is_err());
}
#[test]
fn test_scanner_matches_target() {
let config = Config {
scan_interval: 5,
grace_period: 30,
sigterm_timeout: 5,
targets: vec!["node.*claude".to_string(), "python".to_string()],
whitelist: vec!["node.*server".to_string()],
logging: crate::config::LoggingConfig {
enabled: false,
path: "/tmp/test".to_string(),
retention_days: 7,
},
};
let scanner = Scanner::new(config).unwrap();
assert!(scanner.matches_target("node --experimental-vm-modules claude"));
assert!(scanner.matches_target("python script.py"));
assert!(!scanner.matches_target("cargo build"));
}
#[test]
fn test_scanner_whitelist() {
let config = Config {
scan_interval: 5,
grace_period: 30,
sigterm_timeout: 5,
targets: vec!["node".to_string()],
whitelist: vec!["node.*server".to_string()],
logging: crate::config::LoggingConfig {
enabled: false,
path: "/tmp/test".to_string(),
retention_days: 7,
},
};
let scanner = Scanner::new(config).unwrap();
assert!(scanner.is_whitelisted("node express-server"));
assert!(!scanner.is_whitelisted("node claude-mcp"));
}
}