laburnum 1.17.2

An LSP framework for building language servers and compilers, powered by an incremental query tree with content-addressed storage, task-based dataflow, and parallel queries.
Documentation
// Copyright Two Neutron Stars Incorporated and contributors
// SPDX-License-Identifier: BlueOak-1.0.0

//! Socket utilities for daemon mode.
//!
//! Provides utilities for:
//! - Checking if a daemon is already running
//! - Cleaning up stale sockets
//! - Waiting for a daemon to start
//! - Writing and reading PID files

use {
  super::DaemonConfig,
  crate::protocol::ipc::IpcStream,
  std::{
    fs,
    io,
    time::Duration,
  },
};

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DaemonStatus {
  Running,
  NotRunning,
  StaleSocket,
}

pub async fn check_daemon_status(
  config: &DaemonConfig,
) -> io::Result<DaemonStatus> {
  let endpoint = config.endpoint();

  match IpcStream::connect(&endpoint).await {
    | Ok(_) => Ok(DaemonStatus::Running),
    | Err(e) if e.kind() == io::ErrorKind::ConnectionRefused => {
      #[cfg(unix)]
      {
        if is_socket_stale(config)? {
          return Ok(DaemonStatus::StaleSocket);
        }
      }
      Ok(DaemonStatus::NotRunning)
    },
    | Err(e) if e.kind() == io::ErrorKind::NotFound => {
      Ok(DaemonStatus::NotRunning)
    },
    | Err(e) => Err(e),
  }
}

#[cfg(unix)]
fn is_socket_stale(config: &DaemonConfig) -> io::Result<bool> {
  let pid_path = config.pid_file_path();

  if !pid_path.exists() {
    return Ok(true);
  }

  let pid_str = fs::read_to_string(&pid_path)?;
  let pid: i32 = pid_str
    .trim()
    .parse()
    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

  let is_alive = unsafe { libc::kill(pid, 0) } == 0;

  if is_alive { Ok(false) } else { Ok(true) }
}

#[cfg(unix)]
pub fn cleanup_stale_socket(config: &DaemonConfig) -> io::Result<()> {
  let socket_path = config.socket_path();
  let pid_path = config.pid_file_path();
  let lock_path = config.lock_file_path();

  if socket_path.exists() {
    fs::remove_file(&socket_path)?;
    otel::event!(
      "daemon_stale_socket_removed",
      "path" = socket_path.display().to_string()
    );
  }

  if pid_path.exists() {
    fs::remove_file(&pid_path)?;
  }

  if lock_path.exists() {
    fs::remove_file(&lock_path)?;
  }

  Ok(())
}

#[cfg(windows)]
pub fn cleanup_stale_socket(_config: &DaemonConfig) -> io::Result<()> {
  Ok(())
}

/// Outcome of [`force_stop_daemon`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ForceStopOutcome {
  /// A live daemon process was signalled to stop and its runtime files cleaned
  /// up.
  Stopped { pid: u32 },
  /// No live daemon was found; any leftover runtime files were cleaned up.
  NotRunning,
}

/// Forcibly stop the daemon identified by `config`'s pidfile, without going
/// through the IPC connection.
///
/// This is the fallback for when a graceful, connection-based shutdown is
/// impossible — most notably a version-mismatched daemon that rejects the
/// handshake, which would otherwise be unkillable via the normal stop path.
/// Sends `SIGTERM`, waits briefly for the process to exit, escalates to
/// `SIGKILL` if it lingers, then removes the socket/pid/lock files.
#[cfg(unix)]
pub fn force_stop_daemon(
  config: &DaemonConfig,
) -> io::Result<ForceStopOutcome> {
  let Some(pid) = read_pid_file(config)? else {
    cleanup_stale_socket(config)?;
    return Ok(ForceStopOutcome::NotRunning);
  };

  let pid_i = pid as i32;
  let alive = unsafe { libc::kill(pid_i, 0) } == 0;
  if !alive {
    cleanup_stale_socket(config)?;
    return Ok(ForceStopOutcome::NotRunning);
  }

  // Ask the daemon to terminate, then poll for exit and escalate if it lingers.
  unsafe { libc::kill(pid_i, libc::SIGTERM) };

  let mut terminated = false;
  for _ in 0..50 {
    if unsafe { libc::kill(pid_i, 0) } != 0 {
      terminated = true;
      break;
    }
    std::thread::sleep(Duration::from_millis(20));
  }

  if !terminated {
    unsafe { libc::kill(pid_i, libc::SIGKILL) };
  }

  cleanup_stale_socket(config)?;
  Ok(ForceStopOutcome::Stopped { pid })
}

#[cfg(windows)]
pub fn force_stop_daemon(
  _config: &DaemonConfig,
) -> io::Result<ForceStopOutcome> {
  Ok(ForceStopOutcome::NotRunning)
}

const BACKOFF_DELAYS_MS: [u64; 8] = [10, 20, 50, 100, 200, 500, 1000, 2000];
const MAX_ATTEMPTS: usize = 20;

pub async fn wait_for_daemon(config: &DaemonConfig) -> io::Result<()> {
  let mut attempt = 0;

  while attempt < MAX_ATTEMPTS {
    let delay_idx = attempt.min(BACKOFF_DELAYS_MS.len() - 1);
    let delay = Duration::from_millis(BACKOFF_DELAYS_MS[delay_idx]);

    smol::Timer::after(delay).await;

    match check_daemon_status(config).await? {
      | DaemonStatus::Running => {
        let ws_id = config.workspace_id.clone();
        otel::event!(
          "daemon_ready",
          "attempts" = attempt as i64,
          "workspace_id" = ws_id
        );
        return Ok(());
      },
      | DaemonStatus::StaleSocket => {
        cleanup_stale_socket(config)?;
      },
      | DaemonStatus::NotRunning => {},
    }

    attempt += 1;
  }

  Err(io::Error::new(
    io::ErrorKind::TimedOut,
    format!(
      "daemon failed to start after {} attempts for workspace {}",
      MAX_ATTEMPTS, config.workspace_id
    ),
  ))
}

#[cfg(unix)]
pub fn write_pid_file(config: &DaemonConfig) -> io::Result<()> {
  let pid_path = config.pid_file_path();

  if let Some(parent) = pid_path.parent() {
    fs::create_dir_all(parent)?;
  }

  let pid = std::process::id();
  fs::write(&pid_path, format!("{}\n", pid))?;

  otel::event!(
    "daemon_pid_file_written",
    "path" = pid_path.display().to_string(),
    "pid" = pid as i64
  );

  Ok(())
}

#[cfg(windows)]
pub fn write_pid_file(_config: &DaemonConfig) -> io::Result<()> {
  Ok(())
}

#[cfg(unix)]
pub fn read_pid_file(config: &DaemonConfig) -> io::Result<Option<u32>> {
  let pid_path = config.pid_file_path();

  if !pid_path.exists() {
    return Ok(None);
  }

  let pid_str = fs::read_to_string(&pid_path)?;
  let pid: u32 = pid_str
    .trim()
    .parse()
    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

  Ok(Some(pid))
}

#[cfg(windows)]
pub fn read_pid_file(_config: &DaemonConfig) -> io::Result<Option<u32>> {
  Ok(None)
}

#[cfg(test)]
mod tests {
  use super::*;

  #[test]
  fn test_daemon_status_enum() {
    assert_ne!(DaemonStatus::Running, DaemonStatus::NotRunning);
    assert_ne!(DaemonStatus::NotRunning, DaemonStatus::StaleSocket);
  }

  #[test]
  fn test_backoff_delays() {
    assert_eq!(BACKOFF_DELAYS_MS.len(), 8);
    for i in 1..BACKOFF_DELAYS_MS.len() {
      assert!(BACKOFF_DELAYS_MS[i] > BACKOFF_DELAYS_MS[i - 1]);
    }
  }

  #[test]
  fn test_max_attempts() {
    assert!(MAX_ATTEMPTS > BACKOFF_DELAYS_MS.len());
  }

  #[test]
  #[cfg(unix)]
  fn test_force_stop_daemon_kills_process_and_cleans_up() {
    // Use a unique workspace id so this test owns its own runtime dir.
    let config = DaemonConfig::new(
      "test-force-stop",
      format!("ws-{}", std::process::id()),
    );
    fs::create_dir_all(config.runtime_dir()).unwrap();

    // Stand in for a daemon: a child process we can signal, plus the pidfile
    // and a socket file that force_stop_daemon should clean up.
    let mut child = std::process::Command::new("sleep")
      .arg("60")
      .spawn()
      .expect("spawn sleep");
    let pid = child.id();
    fs::write(config.pid_file_path(), format!("{pid}\n")).unwrap();
    fs::write(config.socket_path(), b"").unwrap();

    let outcome = force_stop_daemon(&config).expect("force_stop_daemon");
    assert_eq!(outcome, ForceStopOutcome::Stopped { pid });

    // The 60s sleep must have been terminated by a signal well before its time.
    // (A real daemon is detached and reaped by init; here it is our direct
    // child, so we reap it ourselves to confirm it actually exited.)
    let status = child.wait().expect("reap child");
    assert!(!status.success(), "child was not terminated: {status:?}");

    // The runtime files must be removed.
    assert!(!config.pid_file_path().exists(), "pidfile not cleaned up");
    assert!(!config.socket_path().exists(), "socket not cleaned up");
  }

  #[test]
  #[cfg(unix)]
  fn test_force_stop_daemon_no_pidfile_is_not_running() {
    let config = DaemonConfig::new(
      "test-force-stop-absent",
      format!("ws-absent-{}", std::process::id()),
    );

    let outcome = force_stop_daemon(&config).expect("force_stop_daemon");
    assert_eq!(outcome, ForceStopOutcome::NotRunning);
  }
}