Skip to main content

wire/
pending_pair.rs

1//! Daemon-orchestrated detached pair sessions.
2//!
3//! Problem: `wire pair-host` and `wire pair-join` block for the full pair
4//! timeout (300s default) waiting for the peer to show up. If the operator's
5//! terminal closes or the process is killed, the handshake dies — and on the
6//! relay side leaves a stuck slot that needs `wire pair-abandon` to clean.
7//!
8//! Solution: pair-host/-join write a "pending pair" descriptor file and exit
9//! in milliseconds. The `wire daemon` (already running for inbox sync) picks
10//! up pending files each tick, runs the handshake, and transitions state
11//! through the file. Operator confirms SAS via `wire pair-confirm <code>
12//! <digits>` from any process; daemon finalizes on the next tick.
13//!
14//! State flow (status field on the file):
15//!   request_host / request_guest
16//!     ↓  daemon registers on relay, stores PakeSide in memory
17//!   polling
18//!     ↓  daemon polls for peer's SPAKE2 message; on arrival computes SAS
19//!   sas_ready  (file now has `sas` field set; operator sees it via pair-list)
20//!     ↓  `wire pair-confirm` validates typed digits, sets status=confirmed
21//!   confirmed
22//!     ↓  daemon finalizes (peer card exchange, trust pin); deletes file
23//!   (gone)
24//!
25//! Terminal failure states: `aborted` (any error or user cancel),
26//! `aborted_restart` (daemon restarted mid-handshake; PakeSide lost from
27//! memory; operator must re-issue).
28//!
29//! In-memory PakeSide is the single point of fragility: it's not persisted,
30//! so daemon restart drops live sessions. `cleanup_on_startup` releases the
31//! relay slot and marks the file `aborted_restart` so the operator knows.
32//! Daemon restarts are rare; this is an acceptable tradeoff vs. forking the
33//! `spake2` crate to expose its internal scalar.
34
35use anyhow::{Result, anyhow, bail};
36use serde::{Deserialize, Serialize};
37use serde_json::{Value, json};
38use std::collections::HashMap;
39use std::path::PathBuf;
40use std::sync::{Mutex, OnceLock};
41
42use crate::pair_session::{
43    PairSessionState, pair_session_confirm_sas, pair_session_finalize, pair_session_open,
44    pair_session_try_sas,
45};
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct PendingPair {
49    /// The shared code phrase (e.g. "30-UE2BZG").
50    pub code: String,
51    /// SHA-256 of the domain-tagged code. Used to call pair_abandon on
52    /// failure paths without re-deriving.
53    pub code_hash: String,
54    /// "host" or "guest".
55    pub role: String,
56    pub relay_url: String,
57    /// See state machine in module docs.
58    pub status: String,
59    /// SAS digits (6-char string) once daemon computes them. None until then.
60    #[serde(default)]
61    pub sas: Option<String>,
62    /// Set after pair_session_finalize completes.
63    #[serde(default)]
64    pub peer_did: Option<String>,
65    /// ISO-8601 UTC.
66    pub created_at: String,
67    /// Last error message if status=aborted or aborted_restart.
68    #[serde(default)]
69    pub last_error: Option<String>,
70    /// Relay pair_id, written by daemon after `pair_open` succeeds. Lets a
71    /// fresh daemon process restore the in-memory PairSessionState without
72    /// re-registering on the relay.
73    #[serde(default)]
74    pub pair_id: Option<String>,
75    /// Our slot_id on the relay (we own this slot; bootstrap exchange writes
76    /// to it). Same restore-purpose as pair_id.
77    #[serde(default)]
78    pub our_slot_id: Option<String>,
79    /// Our slot_token (auth for posting to our slot). Already lives in
80    /// relay_state; duplicated here so restore doesn't need a second file read.
81    #[serde(default)]
82    pub our_slot_token: Option<String>,
83    /// Base64 of the 32-byte SPAKE2 seed. Lets restore_pair_session
84    /// reconstruct an equivalent PakeSide. SECURITY: file is in
85    /// $WIRE_HOME/state/wire/pending-pair/ which is user-only by default
86    /// (inherits umask). Pending files live minutes; daemon GCs terminal
87    /// states after 1 hour.
88    #[serde(default)]
89    pub spake2_seed_b64: Option<String>,
90}
91
92pub fn pending_dir() -> Result<PathBuf> {
93    let d = crate::config::state_dir()?.join("pending-pair");
94    std::fs::create_dir_all(&d)?;
95    Ok(d)
96}
97
98fn pending_path(code: &str) -> Result<PathBuf> {
99    // Codes are alphanumeric + dash; sanitize defensively.
100    let safe: String = code
101        .chars()
102        .map(|c| {
103            if c.is_ascii_alphanumeric() || c == '-' {
104                c
105            } else {
106                '_'
107            }
108        })
109        .collect();
110    Ok(pending_dir()?.join(format!("{safe}.json")))
111}
112
113pub fn write_pending(p: &PendingPair) -> Result<()> {
114    let path = pending_path(&p.code)?;
115    let body = serde_json::to_string_pretty(p)?;
116    std::fs::write(&path, body)?;
117    Ok(())
118}
119
120pub fn read_pending(code: &str) -> Result<Option<PendingPair>> {
121    let path = pending_path(code)?;
122    if !path.exists() {
123        return Ok(None);
124    }
125    let body = std::fs::read_to_string(&path)?;
126    Ok(Some(serde_json::from_str(&body)?))
127}
128
129pub fn delete_pending(code: &str) -> Result<()> {
130    let path = pending_path(code)?;
131    if path.exists() {
132        std::fs::remove_file(&path)?;
133    }
134    Ok(())
135}
136
137pub fn list_pending() -> Result<Vec<PendingPair>> {
138    let dir = pending_dir()?;
139    let mut out = Vec::new();
140    if !dir.exists() {
141        return Ok(out);
142    }
143    for entry in std::fs::read_dir(&dir)? {
144        let entry = entry?;
145        if entry.file_type()?.is_file() {
146            let body = std::fs::read_to_string(entry.path())?;
147            if let Ok(p) = serde_json::from_str::<PendingPair>(&body) {
148                out.push(p);
149            }
150        }
151    }
152    out.sort_by(|a, b| a.created_at.cmp(&b.created_at));
153    Ok(out)
154}
155
156/// In-memory map of code → live PairSessionState. Lost on daemon restart;
157/// see `cleanup_on_startup` for recovery.
158static LIVE_SESSIONS: OnceLock<Mutex<HashMap<String, PairSessionState>>> = OnceLock::new();
159
160fn live() -> &'static Mutex<HashMap<String, PairSessionState>> {
161    LIVE_SESSIONS.get_or_init(|| Mutex::new(HashMap::new()))
162}
163
164/// Tracks "is this daemon process the same one that opened the live sessions?"
165/// — a PID file at `$WIRE_HOME/state/wire/daemon.pid` containing the PID of
166/// the daemon process that owns the in-memory `LIVE_SESSIONS` map. On startup:
167/// if the PID file exists AND that PID is alive → previous daemon is somehow
168/// still running (refuse, or no-op cleanup); if PID file exists but PID dead
169/// → previous daemon crashed, run cleanup. If no PID file → first run, no
170/// pending sessions could have a live state anyway, skip cleanup. Then write
171/// our own PID.
172fn daemon_pid_file() -> Result<PathBuf> {
173    Ok(crate::config::state_dir()?.join("daemon.pid"))
174}
175
176fn process_alive(pid: u32) -> bool {
177    #[cfg(target_os = "linux")]
178    {
179        std::path::Path::new(&format!("/proc/{pid}")).exists()
180    }
181    #[cfg(not(target_os = "linux"))]
182    {
183        use std::process::Command;
184        Command::new("kill")
185            .args(["-0", &pid.to_string()])
186            .output()
187            .map(|o| o.status.success())
188            .unwrap_or(false)
189    }
190}
191
192/// Run on daemon startup. Only marks pending files aborted_restart if the
193/// previous daemon (according to PID file) is no longer alive. Idempotent
194/// for the same daemon process (writes its own PID, then re-running this
195/// function on subsequent calls is a no-op).
196pub fn cleanup_on_startup() -> Result<()> {
197    let pid_file = daemon_pid_file()?;
198    let my_pid = std::process::id();
199    let prev_alive = if pid_file.exists() {
200        if let Ok(s) = std::fs::read_to_string(&pid_file) {
201            if let Ok(pid) = s.trim().parse::<u32>() {
202                if pid == my_pid {
203                    // We are the daemon that wrote this PID — already initialized.
204                    return Ok(());
205                }
206                process_alive(pid)
207            } else {
208                false
209            }
210        } else {
211            false
212        }
213    } else {
214        // No previous daemon recorded — anything stale must be from a much
215        // older process that already exited. Treat as "previous daemon dead"
216        // so we clean up rather than leak.
217        false
218    };
219
220    if !prev_alive {
221        // For each non-terminal pending file, try to restore the in-memory
222        // PairSessionState from persisted fields. Falls back to abort if the
223        // file is from a pre-persistence release (no seed) OR restore fails.
224        for mut p in list_pending()? {
225            let transient =
226                p.status == "polling" || p.status == "request_host" || p.status == "request_guest";
227            if !transient {
228                continue;
229            }
230            let can_restore = p.status == "polling"
231                && p.pair_id.is_some()
232                && p.our_slot_id.is_some()
233                && p.our_slot_token.is_some()
234                && p.spake2_seed_b64.is_some();
235            if can_restore {
236                let restore_result = (|| -> Result<()> {
237                    let seed_bytes =
238                        crate::signing::b64decode(p.spake2_seed_b64.as_ref().unwrap())?;
239                    if seed_bytes.len() != 32 {
240                        bail!(
241                            "spake2_seed_b64 decoded to {} bytes, want 32",
242                            seed_bytes.len()
243                        );
244                    }
245                    let mut seed = [0u8; 32];
246                    seed.copy_from_slice(&seed_bytes);
247                    let role = match p.role.as_str() {
248                        "host" => "host",
249                        "guest" => "guest",
250                        _ => bail!("invalid role {:?}", p.role),
251                    };
252                    let s = crate::pair_session::restore_pair_session(
253                        role,
254                        &p.relay_url,
255                        p.pair_id.as_ref().unwrap(),
256                        &p.code,
257                        &p.code_hash,
258                        p.our_slot_id.as_ref().unwrap(),
259                        p.our_slot_token.as_ref().unwrap(),
260                        seed,
261                    )?;
262                    live().lock().unwrap().insert(p.code.clone(), s);
263                    Ok(())
264                })();
265                match restore_result {
266                    Ok(()) => {
267                        // Successful restore — pending file keeps status=polling.
268                        continue;
269                    }
270                    Err(e) => {
271                        // Restore failed — fall through to abort.
272                        p.last_error = Some(format!("restore_pair_session failed: {e}"));
273                    }
274                }
275            }
276            // Unrecoverable: abort (e.g. request_host that never made it past
277            // pair_open before crash, or a file from a pre-persistence build).
278            let client = crate::relay_client::RelayClient::new(&p.relay_url);
279            let _ = client.pair_abandon(&p.code_hash);
280            p.status = "aborted_restart".to_string();
281            if p.last_error.is_none() {
282                p.last_error = Some(
283                    "daemon restarted mid-handshake; SPAKE2 state could not be restored (likely pre-v0.3.12 pending file). Re-issue with a fresh code phrase.".to_string(),
284                );
285            }
286            write_pending(&p)?;
287            crate::os_notify::toast(
288                &format!("wire — pair aborted on restart ({})", p.code),
289                "Daemon restarted mid-handshake. Re-issue: wire pair-host --detach",
290            );
291        }
292    }
293
294    if let Some(parent) = pid_file.parent() {
295        std::fs::create_dir_all(parent).ok();
296    }
297    // P0.4 (0.5.11): daemon writes the versioned JSON pidfile shape, not
298    // a raw int. ensure_up::ensure_background also writes one when it
299    // spawns the daemon, but the daemon's own startup path runs through
300    // cleanup_on_startup too — so this side must also write the new shape
301    // or we'd silently regress to legacy-int on every daemon restart.
302    let bin_path = std::env::current_exe()
303        .map(|p| p.to_string_lossy().to_string())
304        .unwrap_or_default();
305    let started_at = time::OffsetDateTime::now_utc()
306        .format(&time::format_description::well_known::Rfc3339)
307        .unwrap_or_default();
308    let did = crate::config::read_agent_card()
309        .ok()
310        .and_then(|card| {
311            card.get("did")
312                .and_then(serde_json::Value::as_str)
313                .map(str::to_string)
314        });
315    let relay_url = crate::config::read_relay_state()
316        .ok()
317        .and_then(|state| {
318            state
319                .get("self")
320                .and_then(|s| s.get("relay_url"))
321                .and_then(serde_json::Value::as_str)
322                .map(str::to_string)
323        });
324    let record = crate::ensure_up::DaemonPid {
325        schema: crate::ensure_up::DAEMON_PID_SCHEMA.to_string(),
326        pid: my_pid,
327        bin_path,
328        version: env!("CARGO_PKG_VERSION").to_string(),
329        started_at,
330        did,
331        relay_url,
332    };
333    if let Ok(body) = serde_json::to_vec_pretty(&record) {
334        let _ = std::fs::write(&pid_file, body);
335    }
336    Ok(())
337}
338
339/// Terminal-state TTL: aborted / aborted_restart files older than this get
340/// silently deleted in `tick()`. Keeps `pair-list` output tidy without losing
341/// short-term diagnostic value.
342const TERMINAL_TTL_SECS: i64 = 3600;
343
344/// One daemon tick. Walks every pending file and advances it one step in the
345/// state machine. Each file's failures are isolated — a single broken file
346/// doesn't stop processing of the rest. Also GCs old terminal-state files.
347pub fn tick() -> Result<Value> {
348    let mut transitions: Vec<Value> = Vec::new();
349    let now = time::OffsetDateTime::now_utc();
350    for mut p in list_pending()? {
351        let prev_status = p.status.clone();
352
353        // GC long-dead terminal files.
354        if (p.status == "aborted" || p.status == "aborted_restart")
355            && let Ok(created) = time::OffsetDateTime::parse(
356                &p.created_at,
357                &time::format_description::well_known::Rfc3339,
358            )
359            && (now - created).whole_seconds() > TERMINAL_TTL_SECS
360        {
361            let _ = delete_pending(&p.code);
362            continue;
363        }
364
365        if let Err(e) = process_one(&mut p) {
366            p.last_error = Some(format!("{e:#}"));
367            p.status = "aborted".to_string();
368            // Best-effort abandon on relay so we don't leak a slot.
369            let client = crate::relay_client::RelayClient::new(&p.relay_url);
370            let _ = client.pair_abandon(&p.code_hash);
371            let _ = write_pending(&p);
372            live().lock().unwrap().remove(&p.code);
373            // Push: operator should know without checking pair-list.
374            let title = format!("wire — pair aborted ({})", p.code);
375            let body = p
376                .last_error
377                .clone()
378                .unwrap_or_else(|| "(no detail)".to_string());
379            crate::os_notify::toast(&title, &body);
380        }
381        if p.status != prev_status {
382            transitions.push(json!({
383                "code": p.code,
384                "from": prev_status,
385                "to": p.status,
386                "sas": p.sas,
387                "peer_did": p.peer_did,
388            }));
389        }
390    }
391    Ok(json!({"transitions": transitions}))
392}
393
394fn process_one(p: &mut PendingPair) -> Result<()> {
395    match p.status.as_str() {
396        "request_host" => {
397            let s = pair_session_open("host", &p.relay_url, Some(&p.code))?;
398            // Persist restore state to disk BEFORE inserting into live map —
399            // ensures a crash between insert and file-write doesn't lose the
400            // seed/pair_id needed to recover.
401            p.pair_id = Some(s.pair_id.clone());
402            p.our_slot_id = Some(s.our_slot_id.clone());
403            p.our_slot_token = Some(s.our_slot_token.clone());
404            p.spake2_seed_b64 = Some(crate::signing::b64encode(&s.spake2_seed));
405            live().lock().unwrap().insert(p.code.clone(), s);
406            p.status = "polling".to_string();
407            write_pending(p)?;
408        }
409        "request_guest" => {
410            let s = pair_session_open("guest", &p.relay_url, Some(&p.code))?;
411            p.pair_id = Some(s.pair_id.clone());
412            p.our_slot_id = Some(s.our_slot_id.clone());
413            p.our_slot_token = Some(s.our_slot_token.clone());
414            p.spake2_seed_b64 = Some(crate::signing::b64encode(&s.spake2_seed));
415            live().lock().unwrap().insert(p.code.clone(), s);
416            p.status = "polling".to_string();
417            write_pending(p)?;
418        }
419        "polling" => {
420            let mut sessions = live().lock().unwrap();
421            let s = sessions
422                .get_mut(&p.code)
423                .ok_or_else(|| anyhow!("no live session for {} (daemon restart?)", p.code))?;
424            if pair_session_try_sas(s)?.is_some() {
425                p.status = "sas_ready".to_string();
426                p.sas = s.sas.clone();
427                write_pending(p)?;
428                // Push to the operator's desktop so they don't have to remember
429                // to `wire pair-list`. Failures are swallowed in os_notify::toast.
430                let formatted = p
431                    .sas
432                    .as_ref()
433                    .map(|d| format!("{}-{}", &d[..3], &d[3..]))
434                    .unwrap_or_default();
435                let title = format!("wire — pair SAS ready ({})", p.code);
436                let body = format!(
437                    "Digits: {formatted}\nCompare with peer, then:\nwire pair-confirm {} {}",
438                    p.code,
439                    p.sas.as_deref().unwrap_or("")
440                );
441                crate::os_notify::toast(&title, &body);
442            }
443        }
444        "confirmed" => {
445            // Operator typed matching digits via `wire pair-confirm`. Daemon
446            // owns the live PairSessionState and must drive the final SPAKE2
447            // bootstrap exchange itself.
448            let mut sessions = live().lock().unwrap();
449            let s = sessions.get_mut(&p.code).ok_or_else(|| {
450                anyhow!(
451                    "no live session for {} (status=confirmed but session lost; daemon restart between sas_ready and confirmed)",
452                    p.code
453                )
454            })?;
455            let digits = p
456                .sas
457                .clone()
458                .ok_or_else(|| anyhow!("status=confirmed but sas missing"))?;
459            pair_session_confirm_sas(s, &digits)?;
460            // 30s timeout for the bootstrap exchange — both sides should already
461            // be in the same tick window. If this fails, status flips to aborted.
462            let outcome = pair_session_finalize(s, 30)?;
463            p.peer_did = outcome
464                .get("peer_did")
465                .and_then(Value::as_str)
466                .map(str::to_string);
467            sessions.remove(&p.code);
468            delete_pending(&p.code)?;
469            // Push a "paired" toast — closes the loop for the operator.
470            let title = format!("wire — paired ({})", p.code);
471            let body = format!(
472                "Peer: {}\n`wire peers` to confirm.",
473                p.peer_did.as_deref().unwrap_or("?")
474            );
475            crate::os_notify::toast(&title, &body);
476        }
477        // sas_ready (operator hasn't confirmed yet), aborted, aborted_restart:
478        // terminal-from-daemon's-POV — nothing to do.
479        _ => {}
480    }
481    Ok(())
482}