#[path = "common/mod.rs"]
mod common;
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
#[cfg(target_os = "linux")]
use std::path::Path;
#[cfg(target_os = "linux")]
use std::process::{Command, Stdio};
use std::{fs, thread, time::Duration};
use common::HomeEnvGuard;
use systemg::{config::load_config, daemon::Daemon};
use tempfile::tempdir;
#[test]
fn restart_kills_detached_descendants() {
let temp = tempdir().unwrap();
let dir = temp.path();
let home = dir.join("home");
fs::create_dir_all(&home).unwrap();
let _home = HomeEnvGuard::set(&home);
let pid_dir = dir.join("pids");
fs::create_dir_all(&pid_dir).unwrap();
let config_yaml = format!(
r#"
version: '1'
services:
spawner:
command: "sh -c '
mkdir -p {0} &&
nohup sh -c \"echo \\$\\$ > {0}/child_1.pid && exec sleep 60\" >/dev/null 2>&1 &
nohup sh -c \"echo \\$\\$ > {0}/child_2.pid && exec sleep 60\" >/dev/null 2>&1 &
nohup sh -c \"echo \\$\\$ > {0}/child_3.pid && exec sleep 60\" >/dev/null 2>&1 &
exec sleep 60
'"
deployment:
strategy: "immediate"
"#,
pid_dir.display()
);
let config_path = dir.join("systemg.yaml");
fs::write(&config_path, config_yaml).unwrap();
let config = load_config(Some(config_path.to_str().unwrap())).unwrap();
let daemon = Daemon::from_config(config.clone(), false).unwrap();
daemon.start_services().unwrap();
thread::sleep(Duration::from_millis(500));
let mut child_pids = vec![];
for i in 1..=3 {
let pid_file = pid_dir.join(format!("child_{}.pid", i));
if let Ok(content) = fs::read_to_string(&pid_file)
&& let Ok(pid) = content.trim().parse::<u32>()
{
child_pids.push(pid);
}
}
assert!(
!child_pids.is_empty(),
"Should have spawned child processes"
);
for &pid in &child_pids {
assert!(
common::is_process_alive(pid),
"Child {} should be alive before restart",
pid
);
}
daemon
.restart_service("spawner", &config.services["spawner"])
.unwrap();
thread::sleep(Duration::from_millis(500));
for &pid in &child_pids {
assert!(
!common::is_process_alive(pid),
"Old child {} should be terminated after restart",
pid
);
}
daemon.shutdown_monitor();
daemon.stop_services().unwrap();
}
#[test]
fn rolling_blue_green_switches_slot_and_persists_state() {
let temp = tempdir().expect("create tempdir");
let dir = temp.path();
let home = dir.join("home");
fs::create_dir_all(&home).expect("create home dir");
let _home = HomeEnvGuard::set(&home);
let service_script = dir.join("bg_service.sh");
let port_path = dir.join("active_port.txt");
fs::write(
&service_script,
format!(
"#!/bin/sh\n\
echo \"${{PORT:-none}}\" > \"{}\"\n\
exec tail -f /dev/null\n",
port_path.display()
),
)
.expect("write service script");
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mut perms = fs::metadata(&service_script)
.expect("metadata")
.permissions();
perms.set_mode(0o755);
fs::set_permissions(&service_script, perms).expect("chmod script");
}
let switch_path = dir.join("switch_target.txt");
let state_path = dir.join("bg_state.xml");
let config_path = dir.join("config.yaml");
fs::write(
&config_path,
format!(
r#"version: "1"
services:
bg_app:
command: "{}"
restart_policy: "always"
deployment:
strategy: "rolling"
blue_green:
env_var: "PORT"
slots: ["8000", "8001"]
switch_command: "echo \"{{candidate_slot}}\" > \"{}\""
state_path: "{}"
"#,
service_script.display(),
switch_path.display(),
state_path.display()
),
)
.expect("write config");
let config = load_config(Some(config_path.to_str().unwrap())).expect("load config");
let service_cfg = config.services.get("bg_app").expect("service").clone();
let daemon = Daemon::from_config(config, false).expect("daemon");
daemon
.restart_service("bg_app", &service_cfg)
.expect("restart bg_app");
let new_pid = common::wait_for_pid("bg_app");
assert!(
common::is_process_alive(new_pid),
"service should be running after blue/green restart"
);
let switched_slot = fs::read_to_string(&switch_path)
.expect("read switch target")
.trim()
.to_string();
assert_eq!(switched_slot, "8001");
common::wait_for_path(&port_path);
let active_slot = fs::read_to_string(&port_path)
.expect("read active port")
.trim()
.to_string();
assert_eq!(active_slot, "8001");
let state_xml = fs::read_to_string(&state_path).expect("read state");
assert!(
state_xml.contains("<active_slot_index>1</active_slot_index>"),
"expected state to point at slot 1, got: {state_xml}"
);
daemon.stop_service("bg_app").expect("stop bg_app");
daemon.shutdown_monitor();
}
#[test]
fn stop_succeeds_with_stale_pidfile_entry() {
let temp = tempdir().unwrap();
let dir = temp.path();
let home = dir.join("home");
fs::create_dir_all(&home).unwrap();
let _home = HomeEnvGuard::set(&home);
let mut pid_file = systemg::daemon::PidFile::default();
pid_file.insert("ghost_service", 999999).unwrap();
let config_yaml = r#"
version: '1'
services:
ghost_service:
command: "echo 'should not run'"
"#;
let config_path = dir.join("systemg.yaml");
fs::write(&config_path, config_yaml).unwrap();
let config = load_config(Some(config_path.to_str().unwrap())).unwrap();
let daemon = Daemon::from_config(config, false).unwrap();
daemon.stop_service("ghost_service").unwrap();
let pid_file = systemg::daemon::PidFile::load().unwrap();
assert!(pid_file.pid_for("ghost_service").is_none());
}
#[cfg(target_os = "linux")]
#[test]
fn restart_kills_detached_descendants_via_detacher() {
use std::time::Instant;
let temp = tempdir().expect("failed to create tempdir");
let dir = temp.path();
let home = dir.join("home");
fs::create_dir_all(&home).expect("failed to create home dir");
let _home = HomeEnvGuard::set(&home);
let child_pid_path = dir.join("child.pid");
let script = dir.join("detacher.py");
fs::write(
&script,
r#"#!/usr/bin/env python3
import os
import signal
import time
child_path = os.environ["CHILD_PID_PATH"]
def spawn():
pid = os.fork()
if pid == 0:
os.setsid()
signal.signal(signal.SIGTERM, lambda *_: None)
signal.signal(signal.SIGINT, lambda *_: None)
with open(child_path, "w") as fh:
fh.write(str(os.getpid()))
fh.flush()
while True:
time.sleep(1)
else:
signal.signal(signal.SIGTERM, lambda *_: time.sleep(0.5))
with open(child_path + ".parent", "w") as fh:
fh.write(str(os.getpid()))
fh.flush()
while True:
time.sleep(1)
spawn()
"#,
)
.expect("failed to write detacher script");
let mut perms = fs::metadata(&script).expect("metadata").permissions();
perms.set_mode(0o755);
fs::set_permissions(&script, perms).expect("chmod script");
let config_path = dir.join("config.yaml");
fs::write(
&config_path,
format!(
r#"version: "1"
services:
detacher:
command: "{}"
env:
vars:
CHILD_PID_PATH: "{}"
"#,
script.display(),
child_pid_path.display()
),
)
.expect("failed to write config");
let config = load_config(Some(config_path.to_str().unwrap())).expect("load config");
let service_cfg = config
.services
.get("detacher")
.expect("service present")
.clone();
let daemon = Daemon::from_config(config, false).expect("daemon from config");
daemon.start_services().expect("start services");
let first_parent_pid = common::wait_for_pid("detacher");
common::wait_for_path(&child_pid_path);
let read_child_pid = |path: &Path| -> u32 {
fs::read_to_string(path)
.expect("read child pid")
.trim()
.parse()
.expect("parse child pid")
};
let first_child_pid = read_child_pid(&child_pid_path);
assert!(
common::is_process_alive(first_child_pid),
"detached child should be running"
);
daemon
.restart_service("detacher", &service_cfg)
.expect("restart detacher");
let new_parent_pid = common::wait_for_pid("detacher");
assert_ne!(
first_parent_pid, new_parent_pid,
"restart should record a new parent pid"
);
let mut new_child_pid = 0u32;
let deadline = Instant::now() + Duration::from_secs(5);
while Instant::now() < deadline {
if let Ok(contents) = fs::read_to_string(&child_pid_path)
&& let Ok(pid) = contents.trim().parse::<u32>()
&& pid != first_child_pid
{
new_child_pid = pid;
break;
}
thread::sleep(Duration::from_millis(100));
}
assert!(new_child_pid != 0, "child pid should update after restart");
let mut attempts = 0;
while attempts < 50 && common::is_process_alive(first_child_pid) {
thread::sleep(Duration::from_millis(100));
attempts += 1;
}
assert!(
!common::is_process_alive(first_child_pid),
"detached child should be terminated after restart"
);
assert!(
common::is_process_alive(new_child_pid),
"replacement detached child should be running"
);
daemon.stop_service("detacher").expect("stop detacher");
common::wait_for_pid_removed("detacher");
daemon.shutdown_monitor();
}
#[cfg(unix)]
fn assert_restart_replaces_worker(
service_name: &str,
script_name: &str,
script_contents: &str,
shutdown_monitor_before_restart: bool,
) {
use std::time::Instant;
let temp = tempdir().expect("failed to create tempdir");
let dir = temp.path();
let home = dir.join("home");
fs::create_dir_all(&home).expect("failed to create home dir");
let _home = HomeEnvGuard::set(&home);
let marker_path = dir.join("worker.pid");
let script = dir.join(script_name);
fs::write(&script, script_contents).expect("failed to write script");
let mut perms = fs::metadata(&script).expect("metadata").permissions();
perms.set_mode(0o755);
fs::set_permissions(&script, perms).expect("chmod script");
let config_path = dir.join("config.yaml");
fs::write(
&config_path,
format!(
r#"version: "1"
services:
{service_name}:
command: "{}"
restart_policy: "never"
backoff: "1s"
env:
vars:
WORKER_PID_PATH: "{}"
deployment:
strategy: "immediate"
"#,
script.display(),
marker_path.display(),
),
)
.expect("write config");
let config = load_config(Some(config_path.to_str().unwrap())).expect("load config");
let service_cfg = config
.services
.get(service_name)
.expect("service present")
.clone();
let daemon = Daemon::from_config(config, false).expect("daemon from config");
daemon.start_services().expect("start services");
if shutdown_monitor_before_restart {
daemon.shutdown_monitor();
}
common::wait_for_path(&marker_path);
let first_worker_pid: u32 = fs::read_to_string(&marker_path)
.expect("read first worker pid")
.trim()
.parse()
.expect("parse first worker pid");
assert!(
common::is_process_alive(first_worker_pid),
"first worker should be alive before restart"
);
daemon
.restart_service(service_name, &service_cfg)
.expect("restart service");
let deadline = Instant::now() + Duration::from_secs(5);
let mut new_worker_pid = 0u32;
while Instant::now() < deadline {
if let Ok(contents) = fs::read_to_string(&marker_path)
&& let Ok(pid) = contents.trim().parse::<u32>()
&& pid != first_worker_pid
{
new_worker_pid = pid;
break;
}
thread::sleep(Duration::from_millis(100));
}
assert!(
new_worker_pid != 0,
"replacement worker pid should be observed"
);
let mut attempts = 0;
while attempts < 50 && common::is_process_alive(first_worker_pid) {
thread::sleep(Duration::from_millis(100));
attempts += 1;
}
assert!(
!common::is_process_alive(first_worker_pid),
"old worker should be terminated during restart"
);
assert!(
common::is_process_alive(new_worker_pid),
"replacement worker should be alive after restart"
);
daemon.stop_service(service_name).expect("stop service");
daemon.shutdown_monitor();
}
#[cfg(unix)]
#[test]
fn restart_replaces_worker_without_leak_unix() {
let script = r#"#!/bin/sh
echo $$ > "$WORKER_PID_PATH"
trap 'exit 0' TERM INT
while true; do sleep 1; done
"#;
assert_restart_replaces_worker("steady_worker", "steady_worker.sh", script, false);
}
#[cfg(target_os = "linux")]
#[test]
#[ignore]
fn restart_kills_orphan_parent_exit_linux() {
let script = r#"#!/usr/bin/env python3
import os
import signal
import sys
import time
marker = os.environ["WORKER_PID_PATH"]
pid = os.fork()
if pid == 0:
signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
with open(marker, "w") as fh:
fh.write(str(os.getpid()))
fh.flush()
while True:
time.sleep(1)
# Leader exits immediately; child remains alive.
sys.exit(0)
"#;
assert_restart_replaces_worker("leader_exits", "leader_exits.py", script, true);
}
#[test]
fn stop_succeeds_with_stale_pidfile_entry_with_corrupted_pid() {
let temp = tempdir().expect("failed to create tempdir");
let dir = temp.path();
let home = dir.join("home");
fs::create_dir_all(&home).expect("failed to create home dir");
let _home = HomeEnvGuard::set(&home);
let script = dir.join("stale.sh");
let done_marker = dir.join("stale.done");
fs::write(
&script,
format!(
"#!/bin/sh\nfinish() {{ touch \"{}\"; }}\ntrap finish EXIT TERM INT\nwhile true; do sleep 1; done\n",
done_marker.display()
),
)
.expect("failed to write stale script");
let config_path = dir.join("config.yaml");
fs::write(
&config_path,
r#"version: "1"
services:
stale:
command: "sh ./stale.sh"
"#,
)
.expect("failed to write config");
let config = load_config(Some(config_path.to_str().unwrap())).expect("load config");
let daemon = Daemon::from_config(config, false).expect("daemon from config");
daemon.start_services().expect("start services");
let real_pid = common::wait_for_pid("stale");
let bogus_pid = real_pid.saturating_add(10_000);
let pid_file_path = home.join(".local/share/systemg/pid.xml");
fs::create_dir_all(pid_file_path.parent().unwrap())
.expect("failed to create pid directory");
let fake_contents = format!(
"<PidFile><services><name>stale</name><pid>{}</pid></services></PidFile>",
bogus_pid
);
fs::write(&pid_file_path, fake_contents).expect("failed to corrupt pid file");
daemon.stop_service("stale").expect("stop stale");
common::wait_for_pid_removed("stale");
common::wait_for_path(&done_marker);
daemon.shutdown_monitor();
}
#[cfg(target_os = "linux")]
#[test]
fn zombie_processes_detected() {
let temp = tempdir().unwrap();
let dir = temp.path();
let home = dir.join("home");
fs::create_dir_all(&home).unwrap();
let _home = HomeEnvGuard::set(&home);
let mut parent = Command::new("sh")
.arg("-c")
.arg("sh -c 'sleep 0.1 && exit 0' & sleep 60")
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn()
.unwrap();
let parent_pid = parent.id();
thread::sleep(Duration::from_millis(200));
let stat_path = format!("/proc/{}/stat", parent_pid);
if let Ok(stat) = fs::read_to_string(&stat_path) {
if let Some(state_start) = stat.rfind(')') {
let state_part = &stat[state_start + 1..].trim();
if let Some(state_char) = state_part.chars().next()
&& state_char == 'Z'
{
parent.kill().ok();
parent.wait().ok();
return;
}
}
}
parent.kill().ok();
parent.wait().ok();
}