Skip to main content

sqry_daemon/ipc/
server.rs

1//! IPC accept loop.
2//!
3//! Binds a UDS (Unix) or named pipe (Windows), accepts incoming
4//! connections, and spawns a per-connection handler task. Graceful
5//! shutdown is driven by a [`tokio_util::sync::CancellationToken`];
6//! after cancellation, the loop drains active connections bounded by
7//! [`crate::config::DaemonConfig::ipc_shutdown_drain_secs`].
8//!
9//! The two Unix bind branches (`RuntimeDir` vs `Configured`) implement
10//! the Phase 8a iter-1 B2 fix: runtime-dir paths are auto-managed
11//! (parent created 0700, stale socket removed after a liveness probe).
12//! Configured paths also auto-unlink stale sockets after a liveness
13//! probe confirms no process is listening — this is required for
14//! auto-start to work after a daemon stop. Live sockets are never
15//! touched: the daemon refuses to bind if a live daemon is already
16//! listening. Non-socket files at the configured path are always
17//! rejected.
18
19use std::io;
20use std::path::{Path, PathBuf};
21use std::sync::Arc;
22use std::sync::atomic::{AtomicU64, Ordering};
23use std::time::{Duration, Instant};
24
25use anyhow::anyhow;
26use sqry_core::query::executor::QueryExecutor;
27use tokio_util::sync::CancellationToken;
28
29use crate::config::{DaemonConfig, ENV_SOCKET_PATH};
30use crate::error::{DaemonError, DaemonResult};
31use crate::rebuild::RebuildDispatcher;
32use crate::workspace::{WorkspaceBuilder, WorkspaceManager};
33
34use super::methods::HandlerContext;
35use super::router::run_connection;
36use super::shim_registry::ShimRegistry;
37
38/// Top-level IPC server handle. Construct with [`Self::bind`] then
39/// drive with [`Self::run`].
40pub struct IpcServer {
41    listener: Listener,
42    socket_path: PathBuf,
43    manager: Arc<WorkspaceManager>,
44    dispatcher: Arc<RebuildDispatcher>,
45    workspace_builder: Arc<dyn WorkspaceBuilder>,
46    tool_executor: Arc<QueryExecutor>,
47    shim_registry: Arc<ShimRegistry>,
48    shutdown: CancellationToken,
49    active_connections: Arc<AtomicU64>,
50    config: Arc<DaemonConfig>,
51    daemon_version: &'static str,
52}
53
54impl std::fmt::Debug for IpcServer {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        f.debug_struct("IpcServer")
57            .field("socket_path", &self.socket_path)
58            .field("daemon_version", &self.daemon_version)
59            .finish_non_exhaustive()
60    }
61}
62
63impl IpcServer {
64    /// Bind the server. Unix: UnixListener with the two-branch policy;
65    /// Windows: NamedPipeServer with explicit options.
66    pub async fn bind(
67        config: Arc<DaemonConfig>,
68        manager: Arc<WorkspaceManager>,
69        dispatcher: Arc<RebuildDispatcher>,
70        workspace_builder: Arc<dyn WorkspaceBuilder>,
71        tool_executor: Arc<QueryExecutor>,
72        shutdown: CancellationToken,
73    ) -> DaemonResult<Self> {
74        let socket_path = config.socket_path();
75        let listener = Listener::bind(&config, &socket_path).await?;
76        Ok(Self {
77            listener,
78            socket_path,
79            manager,
80            dispatcher,
81            workspace_builder,
82            tool_executor,
83            shim_registry: ShimRegistry::new(),
84            shutdown,
85            active_connections: Arc::new(AtomicU64::new(0)),
86            config,
87            daemon_version: env!("CARGO_PKG_VERSION"),
88        })
89    }
90
91    /// Returns the bound socket path (Unix) or named-pipe name
92    /// (Windows).
93    #[must_use]
94    pub fn socket_path(&self) -> &Path {
95        &self.socket_path
96    }
97
98    /// Return a shared handle to the shim-connection registry.
99    ///
100    /// Task 9's bootstrap path surfaces the count via `daemon/status`,
101    /// and the Phase 8c router / MCP host register shim connections
102    /// through this `Arc`. The registry's internal state is guarded by
103    /// a `parking_lot::Mutex`, so callers must not hold the returned
104    /// `Arc` "actively" (i.e., inside a `.lock()` scope) across
105    /// long-running awaits — see [`ShimRegistry::len`] and
106    /// [`ShimRegistry::is_empty`] for the snapshot-under-lock
107    /// accessors.
108    #[must_use]
109    pub fn shim_registry(&self) -> Arc<ShimRegistry> {
110        Arc::clone(&self.shim_registry)
111    }
112
113    /// Accept loop. Returns when the shutdown token fires.
114    pub async fn run(self) -> DaemonResult<()> {
115        let Self {
116            mut listener,
117            manager,
118            dispatcher,
119            workspace_builder,
120            tool_executor,
121            shim_registry,
122            shutdown,
123            active_connections,
124            config,
125            daemon_version,
126            ..
127        } = self;
128
129        loop {
130            tokio::select! {
131                biased;
132                () = shutdown.cancelled() => {
133                    tracing::info!(
134                        "ipc_server: shutdown requested; draining active connections"
135                    );
136                    break;
137                }
138                res = listener.accept() => match res {
139                    Ok(stream) => {
140                        let ctx = HandlerContext {
141                            manager: Arc::clone(&manager),
142                            dispatcher: Arc::clone(&dispatcher),
143                            workspace_builder: Arc::clone(&workspace_builder),
144                            tool_executor: Arc::clone(&tool_executor),
145                            shim_registry: Arc::clone(&shim_registry),
146                            shutdown: shutdown.clone(),
147                            config: Arc::clone(&config),
148                            daemon_version,
149                        };
150                        active_connections.fetch_add(1, Ordering::AcqRel);
151                        let tracker = Arc::clone(&active_connections);
152                        tokio::spawn(async move {
153                            let conn_result = match stream {
154                                AcceptedStream::Unix(s) => run_connection(s, ctx).await,
155                                #[cfg(windows)]
156                                AcceptedStream::Pipe(s) => run_connection(s, ctx).await,
157                            };
158                            if let Err(e) = conn_result {
159                                tracing::debug!(error = %e,
160                                    "ipc_server: connection terminated with error");
161                            }
162                            tracker.fetch_sub(1, Ordering::AcqRel);
163                        });
164                    }
165                    Err(e) => {
166                        tracing::warn!(error = %e,
167                            "ipc_server: accept failed; continuing");
168                        tokio::time::sleep(Duration::from_millis(100)).await;
169                    }
170                }
171            }
172        }
173
174        // Drain phase.
175        let deadline = Instant::now() + Duration::from_secs(config.ipc_shutdown_drain_secs);
176        while Instant::now() < deadline && active_connections.load(Ordering::Acquire) > 0 {
177            tokio::time::sleep(Duration::from_millis(50)).await;
178        }
179        let lingering = active_connections.load(Ordering::Acquire);
180        if lingering > 0 {
181            tracing::warn!(
182                lingering,
183                "ipc_server: {} connections still active at drain deadline",
184                lingering
185            );
186        }
187        Ok(())
188    }
189}
190
191// ---------------------------------------------------------------------------
192// Accepted-stream enum + Listener.
193// ---------------------------------------------------------------------------
194
195enum AcceptedStream {
196    Unix(tokio::net::UnixStream),
197    #[cfg(windows)]
198    Pipe(tokio::net::windows::named_pipe::NamedPipeServer),
199}
200
201#[cfg(unix)]
202enum Listener {
203    Unix(tokio::net::UnixListener),
204}
205
206#[cfg(windows)]
207enum Listener {
208    Pipe(WindowsPipeAcceptor),
209}
210
211impl Listener {
212    async fn bind(cfg: &DaemonConfig, path: &Path) -> DaemonResult<Self> {
213        #[cfg(unix)]
214        {
215            let l = bind_unix(cfg, path).await?;
216            Ok(Listener::Unix(l))
217        }
218        #[cfg(windows)]
219        {
220            let _ = cfg; // consumed here once for the Windows branch
221            let name = path.to_string_lossy().into_owned();
222            let acceptor = WindowsPipeAcceptor::new(name)?;
223            Ok(Listener::Pipe(acceptor))
224        }
225    }
226
227    async fn accept(&mut self) -> io::Result<AcceptedStream> {
228        match self {
229            #[cfg(unix)]
230            Self::Unix(l) => {
231                let (s, _addr) = l.accept().await?;
232                Ok(AcceptedStream::Unix(s))
233            }
234            #[cfg(windows)]
235            Self::Pipe(a) => {
236                let s = a.accept().await?;
237                Ok(AcceptedStream::Pipe(s))
238            }
239        }
240    }
241}
242
243// ---------------------------------------------------------------------------
244// Unix bind (two-branch policy).
245// ---------------------------------------------------------------------------
246
247#[cfg(unix)]
248enum UnixBindMode {
249    RuntimeDir,
250    Configured,
251}
252
253#[cfg(unix)]
254fn classify_bind_mode(cfg: &DaemonConfig) -> UnixBindMode {
255    if cfg.socket.path.is_some() || std::env::var_os(ENV_SOCKET_PATH).is_some() {
256        UnixBindMode::Configured
257    } else {
258        UnixBindMode::RuntimeDir
259    }
260}
261
262#[cfg(unix)]
263async fn bind_unix(cfg: &DaemonConfig, path: &Path) -> DaemonResult<tokio::net::UnixListener> {
264    match classify_bind_mode(cfg) {
265        UnixBindMode::RuntimeDir => bind_unix_runtime(path).await,
266        UnixBindMode::Configured => bind_unix_configured(path).await,
267    }
268}
269
270#[cfg(unix)]
271async fn bind_unix_runtime(path: &Path) -> DaemonResult<tokio::net::UnixListener> {
272    use std::os::unix::fs::PermissionsExt;
273    if let Some(parent) = path.parent() {
274        std::fs::create_dir_all(parent)?;
275        std::fs::set_permissions(parent, std::fs::Permissions::from_mode(0o700))?;
276    }
277    remove_stale_socket_if_dead(path).await?;
278    let listener = tokio::net::UnixListener::bind(path)?;
279    std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600))?;
280    Ok(listener)
281}
282
283#[cfg(unix)]
284async fn bind_unix_configured(path: &Path) -> DaemonResult<tokio::net::UnixListener> {
285    use std::os::unix::fs::{FileTypeExt, PermissionsExt};
286    match std::fs::symlink_metadata(path) {
287        Ok(meta) if meta.file_type().is_socket() => {
288            if probe_socket_alive(path).await {
289                return Err(DaemonError::Config {
290                    path: path.to_path_buf(),
291                    source: anyhow!("socket path already in use by a live daemon"),
292                });
293            }
294            // Stale socket: liveness probe confirmed no process is listening.
295            // Safe to unlink and rebind regardless of how the path was
296            // configured — the prior daemon is gone.
297            tracing::warn!(
298                path = %path.display(),
299                "stale socket detected at configured path; unlinking and rebinding"
300            );
301            std::fs::remove_file(path)?;
302        }
303        Ok(_) => {
304            return Err(DaemonError::Config {
305                path: path.to_path_buf(),
306                source: anyhow!("configured socket path exists and is not a socket"),
307            });
308        }
309        Err(e) if e.kind() == io::ErrorKind::NotFound => {}
310        Err(e) => return Err(DaemonError::Io(e)),
311    }
312    let listener = tokio::net::UnixListener::bind(path)?;
313    std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600))?;
314    Ok(listener)
315}
316
317#[cfg(unix)]
318async fn remove_stale_socket_if_dead(path: &Path) -> DaemonResult<()> {
319    use std::os::unix::fs::FileTypeExt;
320    match std::fs::symlink_metadata(path) {
321        Ok(meta) if meta.file_type().is_socket() => {
322            if probe_socket_alive(path).await {
323                return Err(DaemonError::Config {
324                    path: path.to_path_buf(),
325                    source: anyhow!("socket path already in use by a live daemon"),
326                });
327            }
328            std::fs::remove_file(path)?;
329        }
330        Ok(_) => {
331            return Err(DaemonError::Config {
332                path: path.to_path_buf(),
333                source: anyhow!("runtime path exists and is not a socket"),
334            });
335        }
336        Err(e) if e.kind() == io::ErrorKind::NotFound => {}
337        Err(e) => return Err(DaemonError::Io(e)),
338    }
339    Ok(())
340}
341
342/// Hard deadline for the async UDS liveness probe. Loopback UDS
343/// handshakes complete in sub-millisecond-to-~1 ms under normal load;
344/// 100 ms is comfortably above that budget while still short enough
345/// that a wedged kernel path (ptrace target, frozen filesystem,
346/// signal-paused peer) does not stall daemon startup. Kernel-level
347/// unresponsiveness classifies the path as "not a live daemon" and
348/// yields to the refuse/unlink fallback.
349#[cfg(unix)]
350const PROBE_TIMEOUT: Duration = Duration::from_millis(100);
351
352/// Async liveness probe for a UDS path.
353///
354/// Returns `true` if a process accepts a UDS connection at `path`
355/// within [`PROBE_TIMEOUT`]; `false` otherwise (stale-socket,
356/// `ENOENT`, or kernel stall past the deadline). Uses tokio's async
357/// UDS connect so the probe never blocks the Tokio reactor — the
358/// future yields to the runtime while the kernel drives the connect
359/// handshake.
360///
361/// On a successful probe the returned `UnixStream` is dropped
362/// immediately: closing the connection is the correct signal to the
363/// peer that this was a liveness ping, not a real client. Remote-peer
364/// RST logs on a healthy daemon are a benign consequence.
365#[cfg(unix)]
366async fn probe_socket_alive(path: &Path) -> bool {
367    match tokio::time::timeout(PROBE_TIMEOUT, tokio::net::UnixStream::connect(path)).await {
368        Ok(Ok(stream)) => {
369            // Explicit drop: the close is the probe's "hang up"
370            // signal to the peer. Keep the drop inline for clarity —
371            // relying on end-of-arm drop works, but an explicit drop
372            // documents the intent.
373            drop(stream);
374            true
375        }
376        Ok(Err(_)) => false,    // ECONNREFUSED / ENOENT / other
377        Err(_elapsed) => false, // kernel stall past deadline
378    }
379}
380
381// ---------------------------------------------------------------------------
382// Windows named-pipe acceptor.
383// ---------------------------------------------------------------------------
384
385#[cfg(windows)]
386struct WindowsPipeAcceptor {
387    name: String,
388    next: Option<tokio::net::windows::named_pipe::NamedPipeServer>,
389}
390
391#[cfg(windows)]
392impl WindowsPipeAcceptor {
393    fn new(name: String) -> io::Result<Self> {
394        let full = pipe_fullname(&name);
395        let next = Some(create_pipe_instance(&full, true)?);
396        Ok(Self { name: full, next })
397    }
398
399    async fn accept(&mut self) -> io::Result<tokio::net::windows::named_pipe::NamedPipeServer> {
400        let server = self.next.take().ok_or_else(|| {
401            io::Error::other("pipe acceptor in invalid state: no pending instance")
402        })?;
403        server.connect().await?;
404        self.next = Some(create_pipe_instance(&self.name, false)?);
405        Ok(server)
406    }
407}
408
409#[cfg(windows)]
410fn pipe_fullname(name: &str) -> String {
411    if name.starts_with(r"\\.\pipe\") {
412        name.to_owned()
413    } else {
414        format!(r"\\.\pipe\{name}")
415    }
416}
417
418#[cfg(windows)]
419fn create_pipe_instance(
420    full_name: &str,
421    first: bool,
422) -> io::Result<tokio::net::windows::named_pipe::NamedPipeServer> {
423    use tokio::net::windows::named_pipe::{PipeMode, ServerOptions};
424    ServerOptions::new()
425        .first_pipe_instance(first)
426        .reject_remote_clients(true)
427        .pipe_mode(PipeMode::Byte)
428        .max_instances(255)
429        .access_inbound(true)
430        .access_outbound(true)
431        .create(full_name)
432}