use super::owner::LockOwner;
use super::pid::{PidLiveness, pid_liveness};
use crate::timeutil;
use anyhow::Result;
use std::path::Path;
use time::{Duration, OffsetDateTime};
const PID_REUSE_REVIEW_AFTER_SECONDS: i64 = 7 * 24 * 60 * 60;
const FUTURE_STARTED_AT_GRACE_SECONDS: i64 = 5 * 60;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum LockStalenessAdvisory {
None,
InvalidStartedAt,
FutureStartedAt,
AgedLivePid,
}
impl LockStalenessAdvisory {
pub(crate) fn marker(self) -> Option<&'static str> {
match self {
Self::None => None,
Self::InvalidStartedAt | Self::FutureStartedAt => Some("OWNER TIME REVIEW"),
Self::AgedLivePid => Some("PID REUSE REVIEW"),
}
}
fn operator_note(self, liveness: PidLiveness) -> Option<String> {
let liveness_text = match liveness {
PidLiveness::Running => "appears to be running",
PidLiveness::Indeterminate => "could not be checked conclusively",
PidLiveness::NotRunning => "is no longer running",
};
match self {
Self::None => None,
Self::InvalidStartedAt => Some(format!(
" The owner `started_at` value is missing or invalid, so Ralph cannot use lock age as a PID-reuse signal. The owner PID {liveness_text}; Ralph preserves the lock until an operator verifies it."
)),
Self::FutureStartedAt => Some(format!(
" The owner `started_at` value is more than {} minutes in the future. The owner PID {liveness_text}; Ralph preserves the lock and requires operator verification before unlock.",
FUTURE_STARTED_AT_GRACE_SECONDS / 60
)),
Self::AgedLivePid => Some(format!(
" The owner `started_at` value is older than {} days while the owner PID {liveness_text}. This can be a long-running Ralph process or a reused PID, so Ralph does not auto-clear it; verify the PID, command, and timestamp before using --force or `ralph queue unlock`.",
PID_REUSE_REVIEW_AFTER_SECONDS / 60 / 60 / 24
)),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct LockStaleness {
pub(crate) liveness: PidLiveness,
pub(crate) advisory: LockStalenessAdvisory,
}
impl LockStaleness {
pub(crate) fn is_stale(self) -> bool {
self.liveness.is_definitely_not_running()
}
pub(crate) fn advisory_note(self) -> Option<String> {
self.advisory.operator_note(self.liveness)
}
}
pub(crate) struct ExistingLock {
pub(crate) owner: Option<LockOwner>,
pub(crate) owner_unreadable: bool,
pub(crate) is_stale: bool,
pub(crate) staleness: Option<LockStaleness>,
}
pub(crate) fn inspect_existing_lock(
lock_dir: &Path,
read_owner: impl FnOnce(&Path) -> Result<Option<LockOwner>>,
) -> ExistingLock {
match read_owner(lock_dir) {
Ok(owner) => {
let staleness = owner.as_ref().map(classify_lock_owner);
let is_stale = staleness.is_some_and(LockStaleness::is_stale);
ExistingLock {
owner,
owner_unreadable: false,
is_stale,
staleness,
}
}
Err(_) => ExistingLock {
owner: None,
owner_unreadable: true,
is_stale: false,
staleness: None,
},
}
}
pub(crate) fn classify_lock_owner(owner: &LockOwner) -> LockStaleness {
classify_lock_owner_at(owner, OffsetDateTime::now_utc(), pid_liveness(owner.pid))
}
pub(crate) fn classify_lock_owner_at(
owner: &LockOwner,
now: OffsetDateTime,
liveness: PidLiveness,
) -> LockStaleness {
if liveness.is_definitely_not_running() {
return LockStaleness {
liveness,
advisory: LockStalenessAdvisory::None,
};
}
let advisory = match timeutil::parse_rfc3339_opt(&owner.started_at) {
None => LockStalenessAdvisory::InvalidStartedAt,
Some(started_at) if started_at - now > future_started_at_grace() => {
LockStalenessAdvisory::FutureStartedAt
}
Some(started_at) if now - started_at > pid_reuse_review_after() => {
LockStalenessAdvisory::AgedLivePid
}
Some(_) => LockStalenessAdvisory::None,
};
LockStaleness { liveness, advisory }
}
fn pid_reuse_review_after() -> Duration {
Duration::seconds(PID_REUSE_REVIEW_AFTER_SECONDS)
}
fn future_started_at_grace() -> Duration {
Duration::seconds(FUTURE_STARTED_AT_GRACE_SECONDS)
}
pub(crate) fn format_lock_error(
lock_dir: &Path,
owner: Option<&LockOwner>,
is_stale: bool,
owner_unreadable: bool,
staleness: Option<LockStaleness>,
) -> String {
let mut message = format!("Queue lock already held at: {}", lock_dir.display());
if is_stale {
message.push_str(" (STALE PID)");
} else if let Some(marker) = staleness.and_then(|staleness| staleness.advisory.marker()) {
message.push_str(&format!(" ({marker})"));
}
if owner_unreadable {
message.push_str(" (owner metadata unreadable)");
}
message.push_str("\n\nLock Holder:");
if let Some(owner) = owner {
message.push_str(&format!(
"\n PID: {}\n Label: {}\n Started At: {}\n Command: {}",
owner.pid, owner.label, owner.started_at, owner.command
));
} else {
message.push_str("\n (owner metadata missing)");
}
if is_stale {
message.push_str(
"\n\nStaleness Policy:\n Ralph automatically treats a PID lock as stale only when the owner PID is definitely not running.",
);
} else if let Some(note) = staleness.and_then(LockStaleness::advisory_note) {
message.push_str("\n\nStaleness Policy:\n");
message.push_str(¬e);
}
message.push_str("\n\nSuggested Action:");
if is_stale {
message.push_str(&format!(
"\n The process that held this lock is no longer running.\n Use --force to automatically clear it, or use the built-in unlock command (unsafe if another ralph is running):\n ralph queue unlock\n Or remove the directory manually:\n rm -rf {}",
lock_dir.display()
));
} else {
message.push_str(&format!(
"\n If you are sure no other ralph process is running, use the built-in unlock command:\n ralph queue unlock\n Or remove the lock directory manually:\n rm -rf {}",
lock_dir.display()
));
}
message
}