Skip to main content

iicp_client/
instance_lock.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! #405 — single-instance lock per node_id.
4//!
5//! Two `iicp-node serve` processes for the SAME node_id fight: each
6//! registration rotates the directory-issued token and invalidates the other's,
7//! so they enter a 401 → re-register war that makes the node flap in the
8//! directory. This guard prevents that by holding a pidfile at
9//! `~/.iicp/run/<node_id>.pid`; a second live process for the same node_id is
10//! refused (unless `--force`). Distinct node_ids are unaffected — a fleet of N
11//! nodes runs fine (each has its own lock).
12//!
13//! **Fail-open**: any filesystem error degrades to a no-op lock (with a warning)
14//! — the guard must never prevent a node from starting.
15
16use std::path::PathBuf;
17
18/// Held for the lifetime of `serve`; removes the pidfile on drop.
19pub struct InstanceLock {
20    path: PathBuf,
21}
22
23impl InstanceLock {
24    /// Acquire the per-node_id lock. `Err(message)` if another LIVE process
25    /// already serves this node_id and `force` is false. Fails open on I/O error.
26    pub fn acquire(node_id: &str, force: bool) -> Result<Self, String> {
27        let dir = match crate::identity::config_dir() {
28            Ok(d) => d.join("run"),
29            Err(_) => {
30                return Ok(Self {
31                    path: PathBuf::new(),
32                })
33            } // fail open
34        };
35        if std::fs::create_dir_all(&dir).is_err() {
36            return Ok(Self {
37                path: PathBuf::new(),
38            }); // fail open
39        }
40        let path = dir.join(format!("{node_id}.pid"));
41        if !force {
42            if let Ok(existing) = std::fs::read_to_string(&path) {
43                if let Ok(pid) = existing.trim().parse::<i32>() {
44                    if pid != std::process::id() as i32 && pid_alive(pid) {
45                        return Err(format!(
46                            "node_id {node_id} is already being served by PID {pid}. \
47                             Stop that process, choose a different --node, or pass --force to take over."
48                        ));
49                    }
50                }
51            }
52        }
53        // Best-effort write; a failure here still yields a (weaker) lock guard.
54        let _ = std::fs::write(&path, std::process::id().to_string());
55        Ok(Self { path })
56    }
57}
58
59impl Drop for InstanceLock {
60    fn drop(&mut self) {
61        if !self.path.as_os_str().is_empty() {
62            let _ = std::fs::remove_file(&self.path);
63        }
64    }
65}
66
67/// True if a process with `pid` is alive. Unix: `kill -0` (no signal sent).
68/// Non-unix: fail open (assume not alive) so the lock never blocks startup.
69#[cfg(unix)]
70fn pid_alive(pid: i32) -> bool {
71    std::process::Command::new("kill")
72        .args(["-0", &pid.to_string()])
73        .status()
74        .map(|s| s.success())
75        .unwrap_or(false)
76}
77
78#[cfg(not(unix))]
79fn pid_alive(_pid: i32) -> bool {
80    false
81}
82
83#[cfg(test)]
84mod tests {
85    use super::InstanceLock;
86
87    fn with_tmp_home<F: FnOnce()>(f: F) {
88        let tmp = std::env::temp_dir().join(format!("iicp_lock_test_{}", std::process::id()));
89        let _ = std::fs::create_dir_all(&tmp);
90        std::env::set_var("IICP_HOME", &tmp);
91        f();
92        let _ = std::fs::remove_dir_all(&tmp);
93    }
94
95    #[test]
96    fn live_foreign_pid_is_refused() {
97        with_tmp_home(|| {
98            // Simulate another live process (same user, signalable) holding the lock
99            // by spawning a real child and writing its PID into the pidfile.
100            let mut child = std::process::Command::new("sleep")
101                .arg("30")
102                .spawn()
103                .expect("spawn sleep");
104            let dir = crate::identity::config_dir().unwrap().join("run");
105            std::fs::create_dir_all(&dir).unwrap();
106            std::fs::write(dir.join("dup-node.pid"), child.id().to_string()).unwrap();
107
108            let r = InstanceLock::acquire("dup-node", false);
109            let forced = InstanceLock::acquire("dup-node", true);
110            let _ = child.kill();
111            let _ = child.wait();
112
113            assert!(r.is_err(), "a live foreign PID must refuse the acquire");
114            assert!(forced.is_ok(), "force must override");
115        });
116    }
117
118    #[test]
119    fn distinct_nodes_and_release_on_drop() {
120        with_tmp_home(|| {
121            // distinct node_ids never conflict (fleet case)
122            let a = InstanceLock::acquire("node-a", false);
123            let b = InstanceLock::acquire("node-b", false);
124            assert!(
125                a.is_ok() && b.is_ok(),
126                "distinct node_ids must both acquire"
127            );
128            // releasing (drop) frees the lock so it can be re-acquired
129            drop(a);
130            assert!(
131                InstanceLock::acquire("node-a", false).is_ok(),
132                "lock must be re-acquirable after drop"
133            );
134        });
135    }
136}