use std::fs;
use std::path::{Path, PathBuf};
use std::thread;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use netsky_core::consts::{AGENT0_NAME, AGENTINFINITY_NAME, RESTART_REQUEST_FILE};
use netsky_core::paths::{handoff_archive_dir, home};
use netsky_sh::tmux;
const DRILL_LOG_SUBDIR: &str = "Library/Logs/netsky-drills.log";
const WAIT_SECS: u64 = 150;
const UP_WAIT_SECS: u64 = 720;
const CRASH_SIGIL: &str = "crash-recovery restart by agentinfinity watchdog";
pub fn run(n: u8) -> netsky_core::Result<()> {
match n {
1 => drill_one(),
2 => drill_two(),
_ => netsky_core::bail!("unknown drill {n} (supported: 1, 2)"),
}
}
fn drill_one() -> netsky_core::Result<()> {
let d = Drill::new("drill-1")?;
d.pre_checks()?;
let pre_cutoff = unix_now();
let marker = format!(
"netsky-drill-1 marker {}-{}",
chrono::Utc::now().format("%Y%m%dT%H%M%SZ"),
std::process::id()
);
let body = format!(
"planned restart by netsky-drill-1 at {}.\n\n{marker}\n\nthis is an acceptance-test handoff. \
the new agent0 should /up and report normally. no further action expected.\n",
chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ")
);
fs::write(RESTART_REQUEST_FILE, &body)?;
d.log(&format!(
"staged handoff at {RESTART_REQUEST_FILE} (marker: {marker})"
));
tmux::kill_session(AGENT0_NAME)?;
d.log(&format!(
"killed agent0 tmux session; waiting up to {WAIT_SECS}s for respawn"
));
d.wait_for_session(AGENT0_NAME)?;
if Path::new(RESTART_REQUEST_FILE).exists() {
d.fail("handoff request still present after respawn; watchdog did not claim")?;
}
d.wait_for_envelope_with(&marker, pre_cutoff, UP_WAIT_SECS)?;
d.wait_for_up_marker(AGENT0_NAME, UP_WAIT_SECS)?;
d.pass()
}
fn drill_two() -> netsky_core::Result<()> {
let d = Drill::new("drill-2")?;
d.pre_checks()?;
if Path::new(RESTART_REQUEST_FILE).exists() {
d.fail(&format!(
"stale {RESTART_REQUEST_FILE} present; refusing to trample (drill-2 must not have a handoff request)"
))?;
}
let pre_cutoff = unix_now();
tmux::kill_session(AGENT0_NAME)?;
d.log(&format!(
"killed agent0 tmux session; waiting up to {WAIT_SECS}s for respawn"
));
d.wait_for_session(AGENT0_NAME)?;
d.wait_for_envelope_with(CRASH_SIGIL, pre_cutoff, UP_WAIT_SECS)?;
d.wait_for_up_marker(AGENT0_NAME, UP_WAIT_SECS)?;
d.pass()
}
struct Drill {
name: &'static str,
log_path: PathBuf,
}
impl Drill {
fn new(name: &'static str) -> netsky_core::Result<Self> {
let log_path = home().join(DRILL_LOG_SUBDIR);
if let Some(p) = log_path.parent() {
fs::create_dir_all(p)?;
}
let d = Drill { name, log_path };
d.log("start");
Ok(d)
}
fn pre_checks(&self) -> netsky_core::Result<()> {
if !tmux::session_is_alive(AGENT0_NAME) {
self.fail("agent0 tmux session not present at drill start")?;
}
if !tmux::session_is_alive(AGENTINFINITY_NAME) {
self.fail("agentinfinity tmux session not present at drill start")?;
}
Ok(())
}
fn wait_for_session(&self, sess: &str) -> netsky_core::Result<()> {
let deadline = unix_now() + WAIT_SECS;
while unix_now() < deadline {
if tmux::session_is_alive(sess) {
self.log(&format!("{sess} tmux session observed alive"));
return Ok(());
}
thread::sleep(Duration::from_secs(2));
}
self.fail(&format!("{sess} did not respawn within {WAIT_SECS}s"))
}
fn wait_for_envelope_with(
&self,
needle: &str,
pre_cutoff: u64,
timeout_s: u64,
) -> netsky_core::Result<()> {
let archive = handoff_archive_dir();
fs::create_dir_all(&archive)?;
let deadline = unix_now() + timeout_s;
while unix_now() < deadline {
if let Some(path) = find_envelope(&archive, needle, pre_cutoff) {
self.log(&format!(
"handoff envelope delivered: {}",
path.file_name().unwrap().to_string_lossy()
));
return Ok(());
}
thread::sleep(Duration::from_secs(2));
}
self.fail(&format!(
"handoff envelope with marker not found in {} within {timeout_s}s",
archive.display()
))
}
fn wait_for_up_marker(&self, sess: &str, timeout_s: u64) -> netsky_core::Result<()> {
let deadline = unix_now() + timeout_s;
while unix_now() < deadline {
if let Ok(pane) = tmux::capture_pane(sess, None)
&& pane.lines().any(|l| {
l.split_once("session ")
.is_some_and(|(_, r)| r.chars().next().is_some_and(|c| c.is_ascii_digit()))
})
{
self.log(&format!("/up report line observed in {sess} pane"));
return Ok(());
}
thread::sleep(Duration::from_secs(2));
}
self.fail(&format!(
"/up report line not seen in {sess} pane within {timeout_s}s"
))
}
fn log(&self, msg: &str) {
let line = format!(
"[{}] {} {msg}",
chrono::Utc::now().format("%Y-%m-%dT%H:%M:%SZ"),
self.name
);
println!("{line}");
let _ = fs::OpenOptions::new()
.create(true)
.append(true)
.open(&self.log_path)
.and_then(|mut f| {
use std::io::Write;
writeln!(f, "{line}")
});
}
fn pass(&self) -> netsky_core::Result<()> {
self.log("RESULT: green");
Ok(())
}
fn fail(&self, msg: &str) -> netsky_core::Result<()> {
self.log(&format!("FAIL: {msg}"));
self.log("RESULT: red");
netsky_core::bail!("{msg}")
}
}
fn find_envelope(dir: &Path, needle: &str, pre_cutoff: u64) -> Option<PathBuf> {
let entries = fs::read_dir(dir).ok()?;
for e in entries.flatten() {
let p = e.path();
if p.extension().and_then(|s| s.to_str()) != Some("json") {
continue;
}
let mtime = e
.metadata()
.and_then(|m| m.modified())
.ok()
.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
.map(|d| d.as_secs())
.unwrap_or(0);
if mtime < pre_cutoff {
continue;
}
let body = match fs::read_to_string(&p) {
Ok(b) => b,
Err(_) => continue,
};
if body.contains(needle) {
return Some(p);
}
}
None
}
fn unix_now() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
}