use crate::error::FpgadError;
use crate::softeners::error::FpgadSoftenerError;
use log::{error, info, warn};
use std::collections::HashMap;
use std::env;
use std::fs::{self, OpenOptions};
use std::os::unix::fs::FileTypeExt; use std::path::{Path, PathBuf};
use std::process::{Child, Command, Stdio};
use std::time::Duration;
use tokio::time::sleep;
#[derive(Debug, Clone)]
pub struct DaemonConfig {
pub name: String,
pub binary_path: PathBuf,
pub socket_path: PathBuf,
pub start_timeout: u64,
pub log_file: Option<PathBuf>,
pub error_constructor: fn(String) -> FpgadSoftenerError,
}
impl DaemonConfig {
pub fn new(
name: String,
binary_path: PathBuf,
socket_path: PathBuf,
start_timeout: u64,
error_constructor: fn(String) -> FpgadSoftenerError,
) -> Self {
Self {
name,
binary_path,
socket_path,
start_timeout,
log_file: None,
error_constructor,
}
}
fn log_file(&self) -> PathBuf {
if let Some(ref log_file) = self.log_file {
log_file.clone()
} else {
let snap_common = std::env::var("SNAP_COMMON").unwrap_or_else(|_| "/tmp".to_string());
let log_dir = PathBuf::from(snap_common).join("log");
let _ = fs::create_dir_all(&log_dir);
log_dir.join(format!("{}.log", self.name))
}
}
}
pub struct DaemonManager {
daemons: Vec<DaemonConfig>,
processes: HashMap<String, Child>,
}
impl DaemonManager {
const MAX_RESTART_ATTEMPTS: u32 = 5;
const MONITOR_RATE: Duration = Duration::from_secs(5);
const RESTART_DELAY: Duration = Duration::from_secs(1);
pub fn new(daemons: Vec<DaemonConfig>) -> Self {
Self {
daemons,
processes: HashMap::new(),
}
}
fn filter_available_daemons(&self) -> Vec<DaemonConfig> {
let mut available = Vec::new();
for daemon in &self.daemons {
if daemon.binary_path.is_file() {
info!(
"Detected {} at {}",
daemon.name,
daemon.binary_path.display()
);
available.push(daemon.clone());
} else {
info!(
"{} not found at {}, skipping",
daemon.name,
daemon.binary_path.display()
);
}
}
available
}
fn cleanup_stale_socket(socket_path: &Path) {
if socket_path.exists() {
match fs::metadata(socket_path) {
Ok(metadata) => {
if metadata.file_type().is_socket() {
info!("Removing stale socket at {}", socket_path.display());
if let Err(e) = fs::remove_file(socket_path) {
warn!("Could not remove stale socket: {}", e);
}
}
}
Err(e) => {
warn!("Warning: Could not stat socket file: {}", e);
}
}
}
}
fn start_daemon(&mut self, daemon: &DaemonConfig) -> Result<(), FpgadError> {
info!("Starting {}...", daemon.name);
Self::cleanup_stale_socket(&daemon.socket_path);
let log_file_path = daemon.log_file();
let log_file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(&log_file_path)
.map_err(|e| {
FpgadError::Softener((daemon.error_constructor)(format!(
"Failed to open log file: {}",
e
)))
})?;
let log_stdout = Stdio::from(log_file.try_clone().map_err(|e| {
FpgadError::Softener((daemon.error_constructor)(format!(
"Failed to clone log file: {}",
e
)))
})?);
let log_stderr = Stdio::from(log_file);
let child = Command::new(&daemon.binary_path)
.stdout(log_stdout)
.stderr(log_stderr)
.spawn()
.map_err(|e| {
FpgadError::Softener((daemon.error_constructor)(format!(
"Failed to spawn daemon: {}",
e
)))
})?;
let pid = child.id();
info!("{} started with PID {}", daemon.name, pid);
info!("Logs will be written to: {}", log_file_path.display());
self.processes.insert(daemon.name.clone(), child);
info!("Waiting for socket at {}...", daemon.socket_path.display());
for _ in 0..daemon.start_timeout {
if daemon.socket_path.exists() {
match fs::metadata(&daemon.socket_path) {
Ok(metadata) if metadata.file_type().is_socket() => {
info!("{} socket detected - startup successful", daemon.name);
return Ok(());
}
_ => {}
}
}
if let Some(process) = self.processes.get_mut(&daemon.name)
&& let Ok(Some(status)) = process.try_wait()
{
error!(
"{} process terminated unexpectedly with status: {}",
daemon.name, status
);
error!("Check logs at: {}", log_file_path.display());
return Err(FpgadError::Softener((daemon.error_constructor)(format!(
"{} failed to start",
daemon.name
))));
}
std::thread::sleep(Duration::from_secs(1));
}
error!(
"Socket didn't appear after {}s. {} startup failed, terminating process",
daemon.start_timeout, daemon.name
);
error!("Check logs at: {}", log_file_path.display());
if let Some(mut process) = self.processes.remove(&daemon.name) {
if let Err(e) = process.kill() {
error!("Failed to kill {} process: {}", daemon.name, e);
} else {
let _ = process.wait();
info!("{} process terminated", daemon.name);
}
}
Err(FpgadError::Softener((daemon.error_constructor)(format!(
"{} socket did not appear within {}s timeout",
daemon.name, daemon.start_timeout
))))
}
pub fn check_and_start_daemons(&mut self) -> Result<(), String> {
let available = self.filter_available_daemons();
if available.is_empty() {
info!("No daemons to start");
return Ok(());
}
for daemon in &available {
self.start_daemon(daemon).map_err(|e| e.to_string())?;
}
Ok(())
}
async fn try_restart_daemon(&mut self, daemon: &DaemonConfig) -> Result<(), FpgadError> {
for attempt in 0..Self::MAX_RESTART_ATTEMPTS {
warn!(
"Attempting to restart {} (attempt {}/{})",
daemon.name,
attempt,
Self::MAX_RESTART_ATTEMPTS
);
sleep(Self::RESTART_DELAY).await;
match self.start_daemon(daemon) {
Ok(_) => {
info!("{} successfully restarted", daemon.name);
return Ok(());
}
Err(e) => {
error!("Failed to restart {}: {}", daemon.name, e);
}
}
}
Err(FpgadError::Softener((daemon.error_constructor)(format!(
"Failed to restart {} after {} attempts",
daemon.name,
Self::MAX_RESTART_ATTEMPTS
))))
}
pub async fn monitor_daemons(&mut self) {
if self.processes.is_empty() {
info!("No daemons to monitor");
return;
}
info!("Monitoring {} daemon(s)...", self.processes.len());
loop {
let mut daemons_to_restart = Vec::new();
for (name, process) in self.processes.iter_mut() {
match process.try_wait() {
Ok(Some(status)) => {
error!("{} process died unexpectedly with status: {}", name, status);
daemons_to_restart.push(name.clone());
}
Ok(None) => {}
Err(e) => {
error!("Error checking process status for {}: {}", name, e);
}
}
}
for daemon_name in daemons_to_restart {
self.processes.remove(&daemon_name);
if let Some(daemon) = self.daemons.iter().find(|d| d.name == daemon_name).cloned() {
self.try_restart_daemon(&daemon).await.unwrap_or_else(|e| {
error!("Abandoning daemon: {}", e);
})
} else {
error!("Could not find daemon config for daemon with name {daemon_name}")
}
}
sleep(Self::MONITOR_RATE).await;
}
}
pub fn cleanup(&mut self) {
info!("Cleaning up daemon processes...");
for (name, mut process) in self.processes.drain() {
match process.try_wait() {
Ok(Some(_)) => {
}
Ok(None) => {
info!("Terminating {} (PID {})...", name, process.id());
if let Err(e) = process.kill() {
error!("Failed to kill {}: {}", name, e);
} else {
let _ = process.wait();
}
}
Err(e) => {
error!("Error checking status of {}: {}", name, e);
}
}
}
}
}
impl Drop for DaemonManager {
fn drop(&mut self) {
self.cleanup();
}
}
fn get_managed_daemons() -> Vec<DaemonConfig> {
let prefix = if let Ok(snap_env) = env::var("SNAP_COMPONENTS") {
snap_env + "/dfx-mgr"
} else {
"".to_string()
};
vec![
DaemonConfig::new(
"dfx-mgrd".to_string(),
PathBuf::from(format!("{}/usr/bin/dfx-mgrd", prefix)),
PathBuf::from("/run/dfx-mgrd.socket"),
10,
FpgadSoftenerError::DfxMgr,
), ]
}
pub async fn run_softener_daemons() {
info!("Starting softener daemon wrapper...");
let daemons = get_managed_daemons();
let mut manager = DaemonManager::new(daemons);
match manager.check_and_start_daemons() {
Ok(_) => {
manager.monitor_daemons().await;
}
Err(e) => {
error!("Failed to start one or more daemons: {}", e);
}
}
info!("Daemon wrapper exiting");
}