use std::fs;
use std::io::{Read, Write};
use std::os::unix::io::{AsFd, AsRawFd, BorrowedFd, FromRawFd, OwnedFd};
use std::os::unix::process::CommandExt;
use std::path::Path;
use std::process::{Child, Command, Stdio};
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use nix::mount::{MsFlags, mount};
use nix::poll::{PollFd, PollFlags, PollTimeout, poll};
use nix::pty::openpty;
use nix::sys::reboot::{RebootMode, reboot};
use nix::sys::stat::Mode;
use nix::sys::termios::{SetArg, cfmakeraw, tcgetattr, tcsetattr};
use nix::unistd::mkdir;
const COM2: &str = "/dev/ttyS1";
const COM1: &str = "/dev/ttyS0";
const HVC0: &str = "/dev/hvc0";
pub fn is_pid1() -> bool {
unsafe { libc::getpid() == 1 }
}
fn force_reboot() -> ! {
let _ = reboot(RebootMode::RB_AUTOBOOT);
loop {
std::thread::sleep(std::time::Duration::from_secs(1));
}
}
pub(crate) fn ktstr_guest_init() -> ! {
let t0 = std::time::Instant::now();
std::panic::set_hook(Box::new(|info| {
let bt = std::backtrace::Backtrace::force_capture();
let msg = format!("PANIC: {info}\n{bt}\n");
crate::vmm::shm_ring::write_msg_nonblocking(
crate::vmm::shm_ring::MSG_TYPE_CRASH,
msg.as_bytes(),
);
let _ = fs::write(COM2, &msg);
let _ = fs::write(COM1, &msg);
let _ = std::io::stdout().flush();
let _ = std::io::stderr().flush();
unsafe {
libc::tcdrain(1);
libc::tcdrain(2);
}
std::thread::sleep(std::time::Duration::from_millis(100));
force_reboot();
}));
unsafe {
libc::signal(libc::SIGCHLD, libc::SIG_IGN);
}
mount_filesystems();
let t_mounts = t0.elapsed();
if !Path::new("/.ktstr_init_ok").exists() {
if let Ok(raw) = rmesg::logs_raw(rmesg::Backend::Default, false) {
let _ = fs::write(COM2, &raw);
let _ = fs::write(COM1, &raw);
}
let msg = "FATAL: initramfs extraction incomplete — kernel ran out of \
memory during cpio extraction. This indicates a bug in ktstr's \
memory estimation. Please report this issue. As a workaround, \
try `--memory N` with a larger value.";
let _ = fs::write(COM2, msg);
let _ = fs::write(COM1, msg);
eprintln!("{msg}");
force_reboot();
}
let _ = fs::write("/proc/sys/kernel/bpf_stats_enabled", "1");
if !shell_mode_requested() {
write_com2("KTSTR_INIT_STARTED");
}
redirect_stdio_to_com2();
let t_stdio = t0.elapsed();
if let Ok(cmdline) = fs::read_to_string("/proc/cmdline")
&& let Some(val) = cmdline
.split_whitespace()
.find(|s| s.starts_with("RUST_LOG="))
.and_then(|s| s.strip_prefix("RUST_LOG="))
{
unsafe { std::env::set_var("RUST_LOG", val) };
}
let t_pre_subscriber = t0.elapsed();
tracing_subscriber::fmt()
.with_writer(std::io::stderr)
.with_ansi(false)
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let t_subscriber = t0.elapsed();
tracing::debug!(
mount_ms = t_mounts.as_millis() as u64,
stdio_ms = t_stdio.as_millis() as u64,
pre_subscriber_ms = t_pre_subscriber.as_millis() as u64,
subscriber_ms = t_subscriber.as_millis() as u64,
"guest_init_timing",
);
unsafe {
std::env::set_var("PATH", build_include_path());
}
if shell_mode_requested() {
let _shell_span = tracing::debug_span!("shell_mode").entered();
let console_dev = shell_console_device();
redirect_all_stdio_to(console_dev);
{
let _s = tracing::debug_span!("busybox_install").entered();
let _ = Command::new("/bin/busybox")
.args(["--install", "-s", "/bin"])
.status();
}
mount_devpts();
if let Some(cmd) = shell_exec_cmd() {
tracing::debug!(cmd = %cmd, "shell exec mode");
let stdout_fd = unsafe { BorrowedFd::borrow_raw(1) };
if let Ok(mut termios) = tcgetattr(stdout_fd) {
termios
.output_flags
.remove(nix::sys::termios::OutputFlags::OPOST);
let _ = tcsetattr(stdout_fd, SetArg::TCSANOW, &termios);
}
unsafe {
libc::signal(libc::SIGCHLD, libc::SIG_DFL);
}
let status = Command::new("/bin/busybox")
.args(["sh", "-c", &cmd])
.status();
unsafe {
libc::signal(libc::SIGCHLD, libc::SIG_IGN);
}
let code = match status {
Ok(s) => s.code().unwrap_or(1),
Err(e) => {
eprintln!("ktstr-init: exec failed: {e}");
1
}
};
eprintln!("KTSTR_EXEC_EXIT={code}");
let _ = std::io::stdout().flush();
let _ = std::io::stderr().flush();
unsafe {
libc::tcdrain(1);
}
unsafe {
libc::tcdrain(2);
}
std::thread::sleep(std::time::Duration::from_millis(100));
force_reboot();
}
let kernel_version = fs::read_to_string("/proc/version")
.ok()
.and_then(|v| v.split_whitespace().nth(2).map(|s| s.to_string()))
.unwrap_or_else(|| "unknown".to_string());
let mem_mb = fs::read_to_string("/proc/meminfo").ok().and_then(|s| {
s.lines()
.find(|l| l.starts_with("MemTotal:"))
.and_then(|l| l.split_whitespace().nth(1))
.and_then(|kb| kb.parse::<u64>().ok())
.map(|kb| kb / 1024)
});
println!("ktstr shell");
println!(" kernel: {kernel_version}");
if let Some(mb) = mem_mb {
println!(" memory: {mb} MB");
}
print_topology_line();
print_includes_line();
println!(" tools: busybox (ls, ps, top, dmesg, ip, vi, ...)");
println!(" mounts: /proc /sys /dev /sys/fs/cgroup /sys/fs/bpf /tmp");
println!(" /sys/kernel/debug /sys/kernel/tracing /dev/pts");
println!(" type `exit` for clean shutdown, Ctrl+A X to force-kill");
let _ = std::io::stdout().flush();
tracing::debug!("spawning interactive shell with PTY");
spawn_shell_with_pty();
force_reboot();
}
let args: Vec<String> = {
let content = fs::read_to_string("/args").unwrap_or_default();
let mut a = vec!["/init".to_string()];
a.extend(content.lines().map(|s| s.to_string()));
a
};
tracing::debug!(args = ?args, "parsed /args");
let _s_phase2b = tracing::debug_span!("phase2b_probe_phase_a").entered();
let probe_phase_a = crate::test_support::start_probe_phase_a(&args);
let probes_active = probe_phase_a.is_some();
drop(_s_phase2b);
let _s_phase3 = tracing::debug_span!("phase3_scheduler_start").entered();
create_cgroup_parent_from_sched_args();
exec_shell_script("/sched_enable");
let (mut sched_child, sched_log_path) = start_scheduler();
drop(_s_phase3);
let _s_phase4 = tracing::debug_span!("phase4_shm_trace").entered();
let (trace_stop, trace_handle) = start_trace_pipe();
let shm_stop = start_shm_poll(trace_stop.clone());
drop(_s_phase4);
crate::vmm::shm_ring::signal(1);
let suppress_com2 = Arc::new(AtomicBool::new(probes_active));
let sched_exit_stop = start_sched_exit_monitor(
sched_child.as_ref().map(|c| c.id()),
sched_log_path.as_deref(),
suppress_com2,
);
let _s_phase5 = tracing::debug_span!("phase5_dispatch").entered();
tracing::debug!("dispatching test");
write_com2("KTSTR_PAYLOAD_STARTING");
let code = if let Some(pa) = probe_phase_a {
crate::test_support::maybe_dispatch_vm_test_with_phase_a(&args, pa).unwrap_or(1)
} else {
crate::test_support::maybe_dispatch_vm_test_with_args(&args).unwrap_or(1)
};
drop(_s_phase5);
let _ = std::io::stdout().flush();
let _ = std::io::stderr().flush();
crate::test_support::try_flush_profraw();
let _s_phase6 = tracing::debug_span!("phase6_cleanup").entered();
if let Some(ref mut child) = sched_child {
let _ = child.kill();
let _ = child.wait();
if let Some(ref log_path) = sched_log_path {
dump_sched_output(log_path);
}
}
exec_shell_script("/sched_disable");
if let Some(ref stop) = shm_stop {
stop.store(true, Ordering::Release);
}
if let Some(ref stop) = sched_exit_stop {
stop.store(true, Ordering::Release);
}
let _ = fs::write(
"/sys/kernel/tracing/events/sched_ext/sched_ext_dump/enable",
"0",
);
if let Some(ref stop) = trace_stop {
stop.store(true, Ordering::Release);
}
let _ = fs::write("/sys/kernel/tracing/tracing_on", "0");
if let Some(handle) = trace_handle {
let _ = handle.join();
}
if let Ok(com1) = fs::OpenOptions::new().write(true).open(COM1) {
use std::os::unix::io::AsRawFd;
unsafe {
libc::tcdrain(com1.as_raw_fd());
}
}
unsafe {
libc::tcdrain(1);
}
crate::vmm::shm_ring::write_msg(
crate::vmm::shm_ring::MSG_TYPE_EXIT,
&(code as i32).to_ne_bytes(),
);
write_com2(&format!("KTSTR_EXIT={code}"));
if let Ok(com2) = fs::OpenOptions::new().write(true).open(COM2) {
use std::os::unix::io::AsRawFd;
unsafe {
libc::tcdrain(com2.as_raw_fd());
}
}
std::thread::sleep(std::time::Duration::from_millis(100));
force_reboot()
}
fn redirect_stdio_to_com2() {
use std::os::unix::io::AsRawFd;
let Ok(com2) = fs::OpenOptions::new().write(true).open(COM2) else {
return;
};
let fd = com2.as_raw_fd();
unsafe {
libc::dup2(fd, 1); libc::dup2(fd, 2); }
}
fn shell_mode_requested() -> bool {
fs::read_to_string("/proc/cmdline")
.map(|c| c.split_whitespace().any(|s| s == "KTSTR_MODE=shell"))
.unwrap_or(false)
}
fn shell_exec_cmd() -> Option<String> {
fs::read_to_string("/exec_cmd")
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
}
fn cmdline_val(key: &str) -> Option<String> {
let cmdline = fs::read_to_string("/proc/cmdline").ok()?;
let prefix = format!("{key}=");
cmdline
.split_whitespace()
.find_map(|s| s.strip_prefix(&prefix))
.map(|s| s.to_string())
}
fn build_include_path() -> String {
use std::collections::BTreeSet;
use std::os::unix::fs::PermissionsExt;
let include_dir = std::path::Path::new("/include-files");
let mut dirs = BTreeSet::new();
if include_dir.is_dir() {
for entry in walkdir::WalkDir::new(include_dir).follow_links(true) {
let Ok(entry) = entry else { continue };
if entry.file_type().is_file()
&& entry
.metadata()
.is_ok_and(|m| m.permissions().mode() & 0o111 != 0)
&& let Some(parent) = entry.path().parent()
{
dirs.insert(parent.to_string_lossy().to_string());
}
}
}
let mut path_parts: Vec<String> = dirs.into_iter().collect();
path_parts.push("/bin".to_string());
path_parts.join(":")
}
fn redirect_all_stdio_to(path: &str) {
use std::os::unix::io::AsRawFd;
let Ok(dev) = fs::OpenOptions::new().read(true).write(true).open(path) else {
return;
};
let fd = dev.as_raw_fd();
unsafe {
libc::dup2(fd, 0); libc::dup2(fd, 1); libc::dup2(fd, 2); }
}
fn shell_console_device() -> &'static str {
if Path::new(HVC0).exists() { HVC0 } else { COM2 }
}
fn mount_devpts() {
mkdir_p("/dev/pts");
let result = mount(
Some("devpts"),
"/dev/pts",
Some("devpts"),
MsFlags::empty(),
None::<&str>,
);
if let Err(e) = result {
eprintln!("ktstr-init: mount devpts on /dev/pts: {e}");
}
}
fn spawn_shell_with_pty() {
let pty = match openpty(None, None) {
Ok(p) => p,
Err(e) => {
eprintln!("ktstr-init: openpty failed: {e}");
return;
}
};
let slave_fd = pty.slave.as_raw_fd();
if let (Some(cols), Some(rows)) = (cmdline_val("KTSTR_COLS"), cmdline_val("KTSTR_ROWS"))
&& let (Ok(cols), Ok(rows)) = (cols.parse::<u16>(), rows.parse::<u16>())
{
let ws = libc::winsize {
ws_row: rows,
ws_col: cols,
ws_xpixel: 0,
ws_ypixel: 0,
};
unsafe {
libc::ioctl(slave_fd, libc::TIOCSWINSZ, &ws);
}
}
let term = cmdline_val("KTSTR_TERM").unwrap_or_else(|| "linux".to_string());
let colorterm = cmdline_val("KTSTR_COLORTERM");
let child = unsafe {
let mut cmd = Command::new("/bin/busybox");
cmd.arg("sh")
.env("TERM", &term)
.env("PS1", "\x1b[2m^Ax=quit\x1b[0m \\w # ");
if let Some(ref ct) = colorterm {
cmd.env("COLORTERM", ct);
}
cmd.stdin(Stdio::from(OwnedFd::from_raw_fd(libc::dup(slave_fd))))
.stdout(Stdio::from(OwnedFd::from_raw_fd(libc::dup(slave_fd))))
.stderr(Stdio::from(OwnedFd::from_raw_fd(libc::dup(slave_fd))))
.pre_exec(move || {
if libc::setsid() < 0 {
return Err(std::io::Error::last_os_error());
}
if libc::ioctl(slave_fd, libc::TIOCSCTTY, 0) < 0 {
return Err(std::io::Error::last_os_error());
}
Ok(())
})
.spawn()
};
drop(pty.slave);
let mut child = match child {
Ok(c) => c,
Err(e) => {
eprintln!("ktstr-init: spawn shell: {e}");
return;
}
};
let child_pid = child.id();
let stdin_fd = unsafe { BorrowedFd::borrow_raw(0) };
if let Ok(mut termios) = tcgetattr(stdin_fd) {
cfmakeraw(&mut termios);
let _ = tcsetattr(stdin_fd, SetArg::TCSANOW, &termios);
}
proxy_serial_pty(&pty.master, child_pid);
match child.wait() {
Ok(status) => {
tracing::debug!(?status, "shell exited");
}
Err(e) if e.raw_os_error() == Some(libc::ECHILD) => {}
Err(e) => {
eprintln!("ktstr-init: wait for shell: {e}");
}
}
}
fn proxy_serial_pty(master: &OwnedFd, child_pid: u32) {
let stdin_fd = unsafe { BorrowedFd::borrow_raw(0) };
let stdout_fd = unsafe { BorrowedFd::borrow_raw(1) };
let master_fd = master.as_fd();
let mut buf = [0u8; 4096];
loop {
let mut pollfds = [
PollFd::new(stdin_fd, PollFlags::POLLIN),
PollFd::new(master_fd, PollFlags::POLLIN),
];
match poll(&mut pollfds, PollTimeout::from(200u16)) {
Ok(0) => {
if !Path::new(&format!("/proc/{child_pid}")).exists() {
break;
}
continue;
}
Ok(_) => {}
Err(nix::errno::Errno::EINTR) => continue,
Err(_) => break,
}
if let Some(revents) = pollfds[0].revents() {
if revents.contains(PollFlags::POLLIN) {
match nix::unistd::read(stdin_fd, &mut buf) {
Ok(0) => break,
Ok(n) => {
let _ = nix::unistd::write(master_fd, &buf[..n]);
}
Err(nix::errno::Errno::EINTR) => {}
Err(_) => break,
}
}
if revents.intersects(PollFlags::POLLERR | PollFlags::POLLHUP) {
break;
}
}
if let Some(revents) = pollfds[1].revents() {
if revents.intersects(PollFlags::POLLERR | PollFlags::POLLHUP) {
break;
}
if revents.contains(PollFlags::POLLIN) {
match nix::unistd::read(master_fd, &mut buf) {
Ok(0) => break,
Ok(n) => {
let _ = nix::unistd::write(stdout_fd, &buf[..n]);
}
Err(nix::errno::Errno::EINTR) => {}
Err(_) => break,
}
}
}
}
}
fn print_topology_line() {
if let Some((n, l, c, t)) = parse_topo_from_cmdline() {
let total = l * c * t;
if n > 1 {
println!(
" topology: {n} NUMA nodes, {l} LLC{}, {c} core{}, {t} thread{} ({total} vCPU{})",
if l == 1 { "" } else { "s" },
if c == 1 { "" } else { "s" },
if t == 1 { "" } else { "s" },
if total == 1 { "" } else { "s" },
);
} else {
println!(
" topology: {l} LLC{}, {c} core{}, {t} thread{} ({total} vCPU{})",
if l == 1 { "" } else { "s" },
if c == 1 { "" } else { "s" },
if t == 1 { "" } else { "s" },
if total == 1 { "" } else { "s" },
);
}
} else if let Some(count) = count_online_cpus() {
println!(
" topology: {count} vCPU{}",
if count == 1 { "" } else { "s" }
);
}
}
fn parse_topo_from_cmdline() -> Option<(u32, u32, u32, u32)> {
let val = cmdline_val("KTSTR_TOPO")?;
let parts: Vec<&str> = val.split(',').collect();
if parts.len() != 4 {
return None;
}
let n: u32 = parts[0].parse().ok()?;
let l: u32 = parts[1].parse().ok()?;
let c: u32 = parts[2].parse().ok()?;
let t: u32 = parts[3].parse().ok()?;
Some((n, l, c, t))
}
fn count_online_cpus() -> Option<u32> {
let content = fs::read_to_string("/sys/devices/system/cpu/online").ok()?;
let mut count = 0u32;
for range in content.trim().split(',') {
if let Some((start, end)) = range.split_once('-') {
let s: u32 = start.parse().ok()?;
let e: u32 = end.parse().ok()?;
count += e - s + 1;
} else {
let _: u32 = range.parse().ok()?;
count += 1;
}
}
Some(count)
}
fn print_includes_line() {
let include_dir = Path::new("/include-files");
if !include_dir.is_dir() {
return;
}
let mut files: Vec<(String, bool)> = Vec::new();
for entry in walkdir::WalkDir::new(include_dir)
.min_depth(1)
.sort_by_file_name()
{
let Ok(entry) = entry else { continue };
if !entry.file_type().is_file() {
continue;
}
let rel = entry
.path()
.strip_prefix(include_dir)
.unwrap_or(entry.path());
let name = rel.to_string_lossy().to_string();
let executable = entry
.metadata()
.map(|m| {
use std::os::unix::fs::PermissionsExt;
m.permissions().mode() & 0o111 != 0
})
.unwrap_or(false);
files.push((name, executable));
}
if files.is_empty() {
return;
}
for (i, (name, executable)) in files.iter().enumerate() {
let marker = if *executable { " (executable)" } else { "" };
let path = format!("/include-files/{name}{marker}");
if i == 0 {
println!(" includes: {path}");
} else {
println!(" {path}");
}
}
}
fn mount_filesystems() {
let mounts: &[(&str, &str, &str, bool)] = &[
("/proc", "proc", "proc", true),
("/sys", "sys", "sysfs", true),
("/dev", "dev", "devtmpfs", true),
("/sys/kernel/debug", "debugfs", "debugfs", false),
("/sys/kernel/tracing", "tracefs", "tracefs", false),
("/sys/fs/bpf", "bpffs", "bpf", false),
("/sys/fs/cgroup", "none", "cgroup2", false),
("/tmp", "tmpfs", "tmpfs", true),
("/dev/shm", "tmpfs", "tmpfs", false),
("/run", "tmpfs", "tmpfs", false),
];
for &(target, source, fstype, required) in mounts {
mkdir_p(target);
let result = mount(
Some(source),
target,
Some(fstype),
MsFlags::empty(),
None::<&str>,
);
if let Err(e) = result
&& required
{
eprintln!("ktstr-init: mount {fstype} on {target}: {e}");
}
}
let _ = std::os::unix::fs::symlink("/proc/self/fd", "/dev/fd");
let _ = std::os::unix::fs::symlink("/proc/self/fd/0", "/dev/stdin");
let _ = std::os::unix::fs::symlink("/proc/self/fd/1", "/dev/stdout");
let _ = std::os::unix::fs::symlink("/proc/self/fd/2", "/dev/stderr");
}
fn mkdir_p(path: &str) {
let p = Path::new(path);
if p.exists() {
return;
}
if let Some(parent) = p.parent() {
let ps = parent.to_str().unwrap_or("");
if !ps.is_empty() && ps != "/" && !parent.exists() {
mkdir_p(ps);
}
}
let _ = mkdir(p, Mode::from_bits_truncate(0o755));
}
fn write_com2(msg: &str) {
if let Ok(mut f) = fs::OpenOptions::new().write(true).open(COM2) {
let _ = writeln!(f, "{msg}");
} else {
eprintln!("ktstr-init [COM1 fallback]: {msg}");
}
}
#[tracing::instrument]
fn create_cgroup_parent_from_sched_args() {
let sched_args = match fs::read_to_string("/sched_args") {
Ok(s) => s,
Err(_) => return,
};
let args: Vec<&str> = sched_args.split_whitespace().collect();
for i in 0..args.len() {
if args[i] == "--cell-parent-cgroup"
&& let Some(&path) = args.get(i + 1)
{
let cgroup_dir = format!("/sys/fs/cgroup{path}");
mkdir_p(&cgroup_dir);
let parent = Path::new(&cgroup_dir)
.parent()
.unwrap_or(Path::new("/sys/fs/cgroup"));
let control = parent.join("cgroup.subtree_control");
let _ = fs::write(&control, "+cpuset +cpu");
return;
}
}
}
#[tracing::instrument]
fn start_scheduler() -> (Option<Child>, Option<String>) {
if !Path::new("/scheduler").exists() {
return (None, None);
}
let sched_args = fs::read_to_string("/sched_args")
.unwrap_or_default()
.trim()
.to_string();
let args: Vec<&str> = if sched_args.is_empty() {
vec![]
} else {
sched_args.split_whitespace().collect()
};
let log_path = "/tmp/sched.log";
let log_file = fs::File::create(log_path).ok();
let stdout = match log_file.as_ref().and_then(|f| f.try_clone().ok()) {
Some(f) => Stdio::from(f),
None => Stdio::null(),
};
let stderr = match log_file {
Some(f) => Stdio::from(f),
None => Stdio::null(),
};
let sched_rust_log = match std::env::var("RUST_LOG") {
Ok(existing) => format!("{existing},scx_utils::libbpf_logger=warn"),
Err(_) => "info,scx_utils::libbpf_logger=warn".to_string(),
};
let child = Command::new("/scheduler")
.args(&args)
.env("RUST_LOG", &sched_rust_log)
.stdout(stdout)
.stderr(stderr)
.spawn();
match child {
Ok(mut child) => {
unsafe {
std::env::set_var("SCHED_PID", child.id().to_string());
}
std::thread::sleep(std::time::Duration::from_secs(1));
match child.try_wait() {
Ok(Some(_status)) => {
write_com2("===SCHED_OUTPUT_START===");
dump_file_to_com2(log_path);
write_com2("===SCHED_OUTPUT_END===");
write_com2("SCHEDULER_DIED");
write_com2("KTSTR_EXIT=1");
force_reboot();
}
Ok(None) => {
(Some(child), Some(log_path.to_string()))
}
Err(e) => {
eprintln!("ktstr-init: check scheduler status: {e}");
(Some(child), Some(log_path.to_string()))
}
}
}
Err(e) => {
eprintln!("ktstr-init: spawn scheduler: {e}");
write_com2("===SCHED_OUTPUT_START===");
write_com2(&format!("failed to spawn: {e}"));
write_com2("===SCHED_OUTPUT_END===");
write_com2("SCHEDULER_DIED");
write_com2("KTSTR_EXIT=1");
force_reboot();
}
}
}
fn dump_sched_output(log_path: &str) {
write_com2("===SCHED_OUTPUT_START===");
dump_file_to_com2(log_path);
write_com2("===SCHED_OUTPUT_END===");
}
fn dump_file_to_com2(path: &str) {
if let Ok(content) = fs::read_to_string(path)
&& let Ok(mut f) = fs::OpenOptions::new().write(true).open(COM2)
{
let _ = f.write_all(content.as_bytes());
}
}
fn start_trace_pipe() -> (Option<Arc<AtomicBool>>, Option<std::thread::JoinHandle<()>>) {
let trace_enable = "/sys/kernel/tracing/events/sched_ext/sched_ext_dump/enable";
if Path::new(trace_enable).exists() {
let _ = fs::write(trace_enable, "1");
let stop = Arc::new(AtomicBool::new(false));
let stop_clone = stop.clone();
let handle = std::thread::Builder::new()
.name("trace-pipe".into())
.spawn(move || {
let Ok(mut trace) = fs::File::open("/sys/kernel/tracing/trace_pipe") else {
return;
};
let Ok(mut com1) = fs::OpenOptions::new().write(true).open(COM1) else {
return;
};
let mut buf = [0u8; 4096];
let mut drain_deadline = None;
loop {
if drain_deadline.is_none() && stop_clone.load(Ordering::Acquire) {
drain_deadline =
Some(std::time::Instant::now() + std::time::Duration::from_secs(5));
}
if drain_deadline.is_some_and(|d| std::time::Instant::now() >= d) {
break;
}
match trace.read(&mut buf) {
Ok(0) => break,
Ok(n) => {
let _ = com1.write_all(&buf[..n]);
}
Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(_) => break,
}
}
})
.ok();
(Some(stop), handle)
} else {
(None, None)
}
}
fn start_shm_poll(trace_stop: Option<Arc<AtomicBool>>) -> Option<Arc<AtomicBool>> {
let cmdline = fs::read_to_string("/proc/cmdline").ok()?;
let (shm_base, shm_size) = crate::vmm::shm_ring::parse_shm_params_from_str(&cmdline)?;
let stop = Arc::new(AtomicBool::new(false));
let stop_clone = stop.clone();
std::thread::Builder::new()
.name("shm-poll".into())
.spawn(move || {
shm_poll_loop(shm_base, shm_size, &stop_clone, trace_stop.as_deref());
})
.ok();
Some(stop)
}
fn shm_poll_loop(shm_base: u64, shm_size: u64, stop: &AtomicBool, trace_stop: Option<&AtomicBool>) {
use std::os::unix::io::AsRawFd;
let devmem = match fs::OpenOptions::new()
.read(true)
.write(true)
.open("/dev/mem")
{
Ok(f) => f,
Err(e) => {
eprintln!("ktstr-init: /dev/mem open failed: {e}");
return;
}
};
let m = match crate::vmm::shm_ring::mmap_devmem(devmem.as_raw_fd(), shm_base, shm_size) {
Some(m) => m,
None => {
eprintln!(
"ktstr-init: /dev/mem mmap failed: base={shm_base:#x} size={shm_size:#x} err={}",
std::io::Error::last_os_error(),
);
return;
}
};
let shm_ptr = m.ptr;
crate::vmm::shm_ring::init_shm_ptr(shm_ptr, shm_size as usize);
let dump_offset = crate::vmm::shm_ring::DUMP_REQ_OFFSET;
let stall_offset = crate::vmm::shm_ring::STALL_REQ_OFFSET;
while !stop.load(Ordering::Acquire) {
unsafe {
let dump_byte = *(shm_ptr.add(dump_offset));
if dump_byte == b'D' {
let _ = fs::write("/proc/sysrq-trigger", "D");
*(shm_ptr.add(dump_offset)) = 0;
}
let stall_byte = *(shm_ptr.add(stall_offset));
if stall_byte == b'S' {
let _ = fs::File::create("/tmp/ktstr_stall");
*(shm_ptr.add(stall_offset)) = 0;
}
}
if crate::vmm::shm_ring::read_signal(0) == crate::vmm::shm_ring::SIGNAL_SHUTDOWN_REQ {
eprintln!("ktstr-init: shutdown request received, draining");
if let Some(ts) = trace_stop {
ts.store(true, Ordering::Release);
}
let _ = fs::write("/sys/kernel/tracing/tracing_on", "0");
let _ = std::io::stdout().flush();
let _ = std::io::stderr().flush();
if let Ok(f) = fs::OpenOptions::new().write(true).open(COM1) {
unsafe {
libc::tcdrain(std::os::unix::io::AsRawFd::as_raw_fd(&f));
}
}
if let Ok(f) = fs::OpenOptions::new().write(true).open(COM2) {
unsafe {
libc::tcdrain(std::os::unix::io::AsRawFd::as_raw_fd(&f));
}
}
break;
}
std::thread::sleep(std::time::Duration::from_millis(200));
}
}
fn start_sched_exit_monitor(
sched_pid: Option<u32>,
log_path: Option<&str>,
suppress_com2: Arc<AtomicBool>,
) -> Option<Arc<AtomicBool>> {
let pid = sched_pid?;
let proc_path = format!("/proc/{pid}");
let log_path = log_path.map(|s| s.to_string());
let stop = Arc::new(AtomicBool::new(false));
let stop_clone = stop.clone();
std::thread::Builder::new()
.name("sched-exit-mon".into())
.spawn(move || {
while !stop_clone.load(Ordering::Acquire) {
if !Path::new(&proc_path).exists() {
if !suppress_com2.load(Ordering::Acquire) {
let exit_code: i32 = 1;
crate::vmm::shm_ring::write_msg(
crate::vmm::shm_ring::MSG_TYPE_SCHED_EXIT,
&exit_code.to_ne_bytes(),
);
if let Some(ref path) = log_path {
dump_sched_output(path);
}
}
return;
}
std::thread::sleep(std::time::Duration::from_millis(200));
}
})
.ok();
Some(stop)
}
#[tracing::instrument]
fn exec_shell_script(path: &str) {
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return,
};
for line in content.lines() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
exec_shell_line(line);
}
}
fn exec_shell_line(line: &str) {
if let Some(rest) = line.strip_prefix("echo ")
&& let Some((value, path)) = rest.split_once(" > ")
{
let value = value.trim();
let path = path.trim();
if let Err(e) = fs::write(path, format!("{value}\n")) {
eprintln!("ktstr-init: echo '{value}' > {path}: {e}");
}
return;
}
eprintln!("ktstr-init: unsupported command: {line}");
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn mkdir_p_creates_nested() {
let base = std::env::temp_dir().join("ktstr-rust-init-test-mkdir");
let _ = fs::remove_dir_all(&base);
let nested = base.join("a/b/c");
mkdir_p(nested.to_str().unwrap());
assert!(nested.exists());
let _ = fs::remove_dir_all(&base);
}
#[test]
fn mkdir_p_existing_is_noop() {
let tmp = std::env::temp_dir();
mkdir_p(tmp.to_str().unwrap());
}
#[test]
fn exec_shell_line_echo_redirect() {
let tmp = std::env::temp_dir().join("ktstr-rust-init-echo-test");
let path = tmp.to_str().unwrap();
exec_shell_line(&format!("echo 42 > {path}"));
let content = fs::read_to_string(&tmp).unwrap();
assert_eq!(content, "42\n");
let _ = fs::remove_file(&tmp);
}
#[test]
fn exec_shell_line_unsupported_input_no_panic() {
exec_shell_line("# this is a comment");
}
#[test]
fn is_pid1_false_in_test() {
assert!(!is_pid1());
}
#[test]
fn shell_mode_not_requested_in_test() {
assert!(!shell_mode_requested());
}
#[test]
fn count_online_cpus_returns_some() {
let count = count_online_cpus();
assert!(count.is_some());
assert!(count.unwrap() >= 1);
}
#[test]
fn parse_topo_from_cmdline_not_present_on_host() {
assert!(parse_topo_from_cmdline().is_none());
}
}