Skip to main content

kovra_agent/
daemon.rs

1//! Socket lifecycle for the governed ssh-agent (KOV-13, decision Q4:
2//! foreground-only MVP; decision Q5: refuse-and-guide on a pre-existing
3//! `$SSH_AUTH_SOCK`).
4//!
5//! This is the OS edge. On Unix it binds a UNIX socket (mode `0600`), prints the
6//! `SSH_AUTH_SOCK` to export, and serves connections in the foreground until
7//! Ctrl-C, removing the socket on exit. **The socket peer and a real `ssh`
8//! client are `[host]`** — validated on hardware by the human, not asserted by
9//! automated tests (CLAUDE.md rule 4). The *protocol* and *session* logic it
10//! drives ([`crate::protocol`], [`crate::session`]) are fully mock-tested.
11//!
12//! On Windows the transport is a **Named Pipe** (`\\.\pipe\kovra-ssh-agent-<fp>`,
13//! KOV-61) restricted to the current user via an owner-only security descriptor —
14//! the analog of the Unix socket's `0600`. The ssh-agent wire protocol over it is
15//! identical, so [`handle_connection`] is shared verbatim; only [`bind`]/[`serve`]
16//! differ per OS. Pipes leave no filesystem artifact, so there is nothing to
17//! unlink on exit. Validated `[host]` on the HP i7 with a real OpenSSH client
18//! (`ssh-add -l`/`-T`, incl. the high/prod confirmation gate).
19
20use std::path::{Path, PathBuf};
21
22use crate::error::AgentError;
23#[cfg(any(unix, windows))]
24use crate::session::Session;
25
26/// The OS listener the agent serves on: a UNIX-domain socket on Unix, a Named
27/// Pipe server on Windows (KOV-61). On any other platform it is a placeholder
28/// that is never constructed — [`bind`] errors first.
29#[cfg(unix)]
30pub use std::os::unix::net::UnixListener as AgentListener;
31#[cfg(not(any(unix, windows)))]
32pub use stub_impl::AgentListener;
33#[cfg(windows)]
34pub use windows_impl::AgentListener;
35
36/// Refuse to start if `$SSH_AUTH_SOCK` is already set — we never hijack or chain
37/// an existing agent (decision Q5). The caller prints the guidance carried by
38/// [`AgentError::AuthSockAlreadySet`].
39pub fn ensure_no_existing_agent() -> Result<(), AgentError> {
40    if let Some(sock) = std::env::var_os("SSH_AUTH_SOCK") {
41        return Err(AgentError::AuthSockAlreadySet(
42            sock.to_string_lossy().into_owned(),
43        ));
44    }
45    Ok(())
46}
47
48/// A reasonable default socket path under the vault root: `<root>/agent.sock`.
49/// (The vault root is already `0700`, so the socket inherits a private parent.)
50#[cfg(not(windows))]
51pub fn default_socket_path(root: &Path) -> PathBuf {
52    root.join("agent.sock")
53}
54
55/// On Windows the agent transport is a **named pipe**, which lives in a flat
56/// namespace rather than the filesystem. Derive a stable, per-vault pipe name
57/// from a fingerprint of the root so distinct vaults don't collide:
58/// `\\.\pipe\kovra-ssh-agent-<fp>`.
59#[cfg(windows)]
60pub fn default_socket_path(root: &Path) -> PathBuf {
61    let fp = kovra_core::fingerprint(root.to_string_lossy().as_bytes());
62    PathBuf::from(format!(r"\\.\pipe\kovra-ssh-agent-{fp}"))
63}
64
65/// Owned session inputs, so `serve`'s closure can build a session per request
66/// without lifetime entanglement with the listener loop.
67pub struct SessionOwned {
68    /// The custodied keys (with private halves).
69    pub keys: Vec<crate::session::KeypairEntry>,
70    /// The agent scope.
71    pub scope: kovra_core::AgentScope,
72    /// The confirmer.
73    pub confirmer: Box<dyn kovra_core::Confirmer>,
74    /// The audit sink.
75    pub audit: Box<dyn kovra_core::AuditSink>,
76    /// The clock.
77    pub clock: Box<dyn kovra_core::Clock>,
78    /// The confirmation timeout.
79    pub confirm_timeout: std::time::Duration,
80    /// The observed requesting process (I16).
81    pub requesting_process: Option<String>,
82}
83
84#[cfg(any(unix, windows))]
85impl SessionOwned {
86    fn as_session(&self) -> Session<'_> {
87        Session {
88            keys: &self.keys,
89            scope: &self.scope,
90            confirmer: self.confirmer.as_ref(),
91            audit: self.audit.as_ref(),
92            clock: self.clock.as_ref(),
93            confirm_timeout: self.confirm_timeout,
94            requesting_process: self.requesting_process.clone(),
95        }
96    }
97}
98
99/// Remove the socket file on shutdown (best-effort). Idempotent. On Windows the
100/// transport is a named pipe (no filesystem artifact), so this is a harmless
101/// no-op there — the pipe vanishes when its handle closes on exit.
102pub fn cleanup(path: &Path) {
103    let _ = std::fs::remove_file(path);
104}
105
106/// Serve one connection to completion over any byte stream (`Read + Write`): read
107/// a frame, dispatch it through a fresh session, write the framed reply, loop
108/// until the peer closes. Shared by the Unix-socket and Windows-named-pipe rails
109/// — the ssh-agent wire protocol is transport-agnostic. Per-connection errors
110/// propagate to the caller, which isolates them (one bad peer must not take the
111/// daemon down). A malformed/unknown frame answers `SSH_AGENT_FAILURE` rather
112/// than closing (matches the fuzz-target contract: never panic, always a reply).
113#[cfg(any(unix, windows))]
114fn handle_connection<S, F>(mut stream: S, make_session: &mut F) -> Result<(), AgentError>
115where
116    S: std::io::Read + std::io::Write,
117    F: FnMut() -> Result<SessionOwned, AgentError>,
118{
119    use crate::protocol::{encode_failure, frame, parse_request, read_frame};
120
121    loop {
122        let body = match read_frame(&mut stream)? {
123            Some(b) => b,
124            None => return Ok(()), // peer closed at a frame boundary
125        };
126        let reply_body = match parse_request(&body) {
127            Ok(request) => {
128                let owned = make_session()?;
129                let session = owned.as_session();
130                session.handle(&request)?
131            }
132            Err(_) => encode_failure(),
133        };
134        stream.write_all(&frame(&reply_body))?;
135        stream.flush()?;
136    }
137}
138
139#[cfg(unix)]
140pub use unix_impl::{bind, serve};
141
142#[cfg(unix)]
143mod unix_impl {
144    use std::os::unix::net::UnixListener;
145    use std::path::Path;
146
147    use super::{AgentError, SessionOwned, handle_connection};
148
149    /// Bind the agent socket at `path` (mode `0600`), removing a stale socket
150    /// file first. Returns the listener; the caller serves it with [`serve`].
151    pub fn bind(path: &Path) -> Result<UnixListener, AgentError> {
152        // Remove a leftover socket from a previous run (a path that exists but
153        // has no live listener). We only ever remove a socket file, never a
154        // regular file.
155        if path.exists() {
156            let is_socket = std::fs::symlink_metadata(path)
157                .map(|m| {
158                    use std::os::unix::fs::FileTypeExt;
159                    m.file_type().is_socket()
160                })
161                .unwrap_or(false);
162            if is_socket {
163                let _ = std::fs::remove_file(path);
164            } else {
165                return Err(AgentError::Socket(format!(
166                    "{} exists and is not a socket — refusing to overwrite",
167                    path.display()
168                )));
169            }
170        }
171        let listener = UnixListener::bind(path)
172            .map_err(|e| AgentError::Socket(format!("bind {}: {e}", path.display())))?;
173        {
174            use std::os::unix::fs::PermissionsExt;
175            std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600))
176                .map_err(|e| AgentError::Socket(format!("chmod {}: {e}", path.display())))?;
177        }
178        Ok(listener)
179    }
180
181    /// Serve the agent in the **foreground** until the listener is closed or an
182    /// unrecoverable error occurs. Each accepted connection is handled to
183    /// completion (one request → one reply, looping until the peer closes).
184    /// Per-connection errors are isolated: a malformed frame answers
185    /// `SSH_AGENT_FAILURE` and the connection continues; a transport error drops
186    /// just that connection.
187    ///
188    /// `make_session` is called per request to build a fresh [`super::SessionOwned`]
189    /// view over the (possibly re-read) custodied keys.
190    pub fn serve<F>(listener: &UnixListener, mut make_session: F) -> Result<(), AgentError>
191    where
192        F: FnMut() -> Result<SessionOwned, AgentError>,
193    {
194        for incoming in listener.incoming() {
195            match incoming {
196                Ok(stream) => {
197                    if let Err(e) = handle_connection(stream, &mut make_session) {
198                        // Log to stderr and keep serving — one bad peer must not
199                        // take the daemon down. No key bytes are ever in `e`.
200                        eprintln!("kovra ssh-agent: connection error: {e}");
201                    }
202                }
203                Err(e) => {
204                    eprintln!("kovra ssh-agent: accept error: {e}");
205                }
206            }
207        }
208        Ok(())
209    }
210}
211
212#[cfg(windows)]
213pub use windows_impl::{bind, serve};
214
215/// Windows Named-Pipe rail for the governed ssh-agent (KOV-61). The pipe is the
216/// transport analog of the Unix socket; the ssh-agent wire protocol over it is
217/// identical, so [`super::handle_connection`] is reused verbatim. The pipe is
218/// restricted to the **current user** (an SDDL `D:P(A;;GA;;;<sid>)` security
219/// descriptor — the analog of the socket's `0600`). Named pipes leave no
220/// filesystem artifact, so there is nothing to unlink on exit.
221#[cfg(windows)]
222mod windows_impl {
223    use std::os::windows::ffi::OsStrExt;
224    use std::os::windows::io::FromRawHandle;
225    use std::path::Path;
226
227    use windows::Win32::Foundation::{
228        CloseHandle, ERROR_PIPE_CONNECTED, HANDLE, HLOCAL, LocalFree,
229    };
230    use windows::Win32::Security::Authorization::{
231        ConvertSidToStringSidW, ConvertStringSecurityDescriptorToSecurityDescriptorW,
232        SDDL_REVISION_1,
233    };
234    use windows::Win32::Security::{
235        GetTokenInformation, PSECURITY_DESCRIPTOR, SECURITY_ATTRIBUTES, TOKEN_QUERY, TOKEN_USER,
236        TokenUser,
237    };
238    use windows::Win32::Storage::FileSystem::{FILE_FLAGS_AND_ATTRIBUTES, FlushFileBuffers};
239    use windows::Win32::System::Pipes::{
240        ConnectNamedPipe, CreateNamedPipeW, DisconnectNamedPipe, PIPE_READMODE_BYTE,
241        PIPE_REJECT_REMOTE_CLIENTS, PIPE_TYPE_BYTE, PIPE_UNLIMITED_INSTANCES, PIPE_WAIT,
242    };
243    use windows::Win32::System::Threading::{GetCurrentProcess, OpenProcessToken};
244    use windows::core::{HRESULT, HSTRING, PCWSTR, PWSTR};
245
246    use super::{AgentError, SessionOwned, handle_connection};
247
248    /// `PIPE_ACCESS_DUPLEX` (read+write server). A `FILE_FLAGS_AND_ATTRIBUTES`.
249    const PIPE_ACCESS_DUPLEX: u32 = 0x0000_0003;
250    /// Per-instance pipe buffer sizes (advisory).
251    const PIPE_BUF: u32 = 8 * 1024;
252
253    fn win(e: impl std::fmt::Display, what: &str) -> AgentError {
254        AgentError::Socket(format!("{what}: {e}"))
255    }
256
257    /// The Windows listener: owns the wide pipe name, the per-user security
258    /// descriptor (kept alive for the pipe's lifetime), and the first server
259    /// instance created by [`bind`] (so the pipe exists the moment we publish the
260    /// address). Subsequent instances are created per connection by [`serve`].
261    pub struct AgentListener {
262        name: Vec<u16>,
263        sd: PSECURITY_DESCRIPTOR,
264        instance: HANDLE,
265    }
266
267    // SAFETY: the only non-Send fields are a Windows kernel HANDLE and a
268    // security-descriptor heap pointer, both valid and usable from any thread in
269    // the process. Moving the listener to the serving thread is therefore sound.
270    unsafe impl Send for AgentListener {}
271
272    impl Drop for AgentListener {
273        fn drop(&mut self) {
274            // The `instance` handle is consumed by `serve` (wrapped in a File that
275            // closes it); we only free the security descriptor here. If `serve`
276            // never ran, the instance is reclaimed by the OS on process exit.
277            if !self.sd.0.is_null() {
278                // SAFETY: `sd` came from ConvertStringSecurityDescriptor… (LocalAlloc).
279                unsafe {
280                    let _ = LocalFree(Some(HLOCAL(self.sd.0.cast())));
281                }
282            }
283        }
284    }
285
286    /// Create the pipe (first instance) restricted to the current user.
287    pub fn bind(path: &Path) -> Result<AgentListener, AgentError> {
288        let name: Vec<u16> = path
289            .as_os_str()
290            .encode_wide()
291            .chain(std::iter::once(0))
292            .collect();
293        let sd = build_owner_only_sd().map_err(|e| win(e, "pipe security descriptor"))?;
294        match create_instance(&name, sd) {
295            Ok(instance) => Ok(AgentListener { name, sd, instance }),
296            Err(e) => {
297                // SAFETY: free the SD we just allocated before bailing.
298                unsafe {
299                    if !sd.0.is_null() {
300                        let _ = LocalFree(Some(HLOCAL(sd.0.cast())));
301                    }
302                }
303                Err(win(
304                    e,
305                    &format!(
306                        "create named pipe {} (another kovra agent already running?)",
307                        path.display()
308                    ),
309                ))
310            }
311        }
312    }
313
314    /// Serve connections in the foreground: accept on the current instance, run
315    /// the shared protocol loop over it, then create the next instance. One
316    /// connection at a time (foreground MVP, decision Q4).
317    pub fn serve<F>(listener: &AgentListener, mut make_session: F) -> Result<(), AgentError>
318    where
319        F: FnMut() -> Result<SessionOwned, AgentError>,
320    {
321        // `HANDLE` is Copy; we take the first instance and never let the listener's
322        // Drop close it (it is closed here, when the per-connection File drops).
323        let mut current = listener.instance;
324        loop {
325            // Block until a client connects. ERROR_PIPE_CONNECTED means one raced
326            // in between create and connect — also success.
327            match unsafe { ConnectNamedPipe(current, None) } {
328                Ok(()) => {}
329                Err(e) if e.code() == HRESULT::from_win32(ERROR_PIPE_CONNECTED.0) => {}
330                Err(e) => {
331                    eprintln!("kovra ssh-agent: connect error: {e}");
332                    // Drop this instance and make a fresh one.
333                    unsafe {
334                        let _ = CloseHandle(current);
335                    }
336                    current = create_instance(&listener.name, listener.sd)
337                        .map_err(|e| win(e, "re-create pipe instance"))?;
338                    continue;
339                }
340            }
341
342            // Wrap the connected instance as a File (Read + Write); it takes
343            // ownership of the handle and closes it on drop. The ssh-agent wire
344            // protocol is transport-agnostic, so the shared loop handles it.
345            // SAFETY: `current` is a valid, connected pipe handle we own.
346            let file = unsafe { std::fs::File::from_raw_handle(current.0.cast()) };
347            if let Err(e) = handle_connection(file, &mut make_session) {
348                eprintln!("kovra ssh-agent: connection error: {e}");
349            }
350            // `file` dropped here: flush + disconnect are best-effort niceties; the
351            // close already tears the instance down.
352            drop_instance(current);
353
354            // Next instance for the next client.
355            current = create_instance(&listener.name, listener.sd)
356                .map_err(|e| win(e, "create next pipe instance"))?;
357        }
358    }
359
360    /// Best-effort flush + disconnect (the File drop already closed the handle, so
361    /// these may no-op; kept for clarity of the per-connection lifecycle).
362    fn drop_instance(handle: HANDLE) {
363        // SAFETY: FFI; `handle` was just closed by the File drop — these are
364        // best-effort and ignore errors.
365        unsafe {
366            let _ = FlushFileBuffers(handle);
367            let _ = DisconnectNamedPipe(handle);
368        }
369    }
370
371    /// Create one named-pipe server instance with the owner-only SD.
372    fn create_instance(name: &[u16], sd: PSECURITY_DESCRIPTOR) -> windows::core::Result<HANDLE> {
373        let sa = SECURITY_ATTRIBUTES {
374            nLength: std::mem::size_of::<SECURITY_ATTRIBUTES>() as u32,
375            lpSecurityDescriptor: sd.0,
376            bInheritHandle: false.into(),
377        };
378        // SAFETY: FFI; `name` is NUL-terminated and `sa` outlives the call.
379        let handle = unsafe {
380            CreateNamedPipeW(
381                PCWSTR(name.as_ptr()),
382                FILE_FLAGS_AND_ATTRIBUTES(PIPE_ACCESS_DUPLEX),
383                PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT | PIPE_REJECT_REMOTE_CLIENTS,
384                PIPE_UNLIMITED_INSTANCES,
385                PIPE_BUF,
386                PIPE_BUF,
387                0,
388                Some(&sa),
389            )
390        };
391        if handle.is_invalid() {
392            return Err(windows::core::Error::from_thread());
393        }
394        Ok(handle)
395    }
396
397    /// A protected DACL granting full control to **only** the current user — the
398    /// Windows analog of the Unix socket's `0600`. Returned descriptor is freed by
399    /// [`AgentListener`]'s Drop.
400    fn build_owner_only_sd() -> windows::core::Result<PSECURITY_DESCRIPTOR> {
401        let sid = current_user_sid_string()?;
402        let sddl = HSTRING::from(format!("D:P(A;;GA;;;{sid})"));
403        let mut psd = PSECURITY_DESCRIPTOR::default();
404        // SAFETY: FFI; `sddl` is a valid wide string; `psd` receives an allocation.
405        unsafe {
406            ConvertStringSecurityDescriptorToSecurityDescriptorW(
407                &sddl,
408                SDDL_REVISION_1,
409                &mut psd,
410                None,
411            )?;
412        }
413        Ok(psd)
414    }
415
416    /// The current user's SID as an `S-1-…` string, from the process token.
417    fn current_user_sid_string() -> windows::core::Result<String> {
418        // SAFETY: FFI; the token handle is closed before returning; buffers are
419        // sized by the OS via the two-call pattern.
420        unsafe {
421            let mut token = HANDLE::default();
422            OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &mut token)?;
423            let mut len = 0u32;
424            let _ = GetTokenInformation(token, TokenUser, None, 0, &mut len);
425            let mut buf = vec![0u8; len as usize];
426            let info = GetTokenInformation(
427                token,
428                TokenUser,
429                Some(buf.as_mut_ptr().cast()),
430                len,
431                &mut len,
432            );
433            let _ = CloseHandle(token);
434            info?;
435            let token_user = &*(buf.as_ptr() as *const TOKEN_USER);
436            let mut pwstr = PWSTR::null();
437            ConvertSidToStringSidW(token_user.User.Sid, &mut pwstr)?;
438            let s = pwstr
439                .to_string()
440                .map_err(|_| windows::core::Error::from_thread())?;
441            // ConvertSidToStringSidW allocates via LocalAlloc.
442            let _ = LocalFree(Some(HLOCAL(pwstr.0.cast())));
443            Ok(s)
444        }
445    }
446}
447
448#[cfg(not(any(unix, windows)))]
449pub use stub_impl::{AgentListener, bind, serve};
450
451#[cfg(not(any(unix, windows)))]
452mod stub_impl {
453    use std::path::Path;
454
455    use super::{AgentError, SessionOwned};
456
457    const UNSUPPORTED: &str = "the governed ssh-agent is not available on this platform";
458
459    /// Placeholder listener for platforms with neither Unix sockets nor Named
460    /// Pipes; never constructed (`bind` errors first).
461    #[derive(Debug)]
462    pub struct AgentListener(());
463
464    pub fn bind(_path: &Path) -> Result<AgentListener, AgentError> {
465        Err(AgentError::Socket(UNSUPPORTED.into()))
466    }
467
468    pub fn serve<F>(_listener: &AgentListener, _make_session: F) -> Result<(), AgentError>
469    where
470        F: FnMut() -> Result<SessionOwned, AgentError>,
471    {
472        Err(AgentError::Socket(UNSUPPORTED.into()))
473    }
474}
475
476// In-process Named-Pipe transport test (no real `ssh`). Binds a per-test pipe,
477// serves it on a detached thread with an empty key set, then connects a client
478// and round-trips a `REQUEST_IDENTITIES` → `IDENTITIES_ANSWER` (0 identities).
479// Exercises bind (incl. the owner-only security descriptor), the pipe accept, and
480// the shared protocol loop over the pipe. The real `ssh` peer remains `[host]`.
481#[cfg(all(test, windows))]
482mod windows_tests {
483    use std::io::{Read, Write};
484    use std::time::Duration;
485
486    use kovra_core::{
487        AgentScope, ConfirmOutcome, Filter, MockAuditSink, MockClock, MockConfirmer, Operation,
488    };
489
490    use super::{SessionOwned, bind, default_socket_path, serve};
491    use crate::protocol::{SSH_AGENT_IDENTITIES_ANSWER, SSH_AGENTC_REQUEST_IDENTITIES, frame};
492
493    #[test]
494    fn named_pipe_round_trips_request_identities() {
495        let root = tempfile::tempdir().unwrap();
496        let pipe = default_socket_path(root.path());
497        let listener = bind(&pipe).expect("bind named pipe");
498
499        // Serve on a detached thread; an empty key set needs no confirmation.
500        std::thread::spawn(move || {
501            let _ = serve(&listener, || {
502                Ok(SessionOwned {
503                    keys: Vec::new(),
504                    scope: AgentScope {
505                        operations: [Operation::Metadata, Operation::Inject]
506                            .into_iter()
507                            .collect(),
508                        projects: Filter::Any,
509                        environments: Filter::Any,
510                    },
511                    confirmer: Box::new(MockConfirmer::always(ConfirmOutcome::Approved)),
512                    audit: Box::new(MockAuditSink::new()),
513                    clock: Box::new(MockClock::default()),
514                    confirm_timeout: Duration::from_secs(1),
515                    requesting_process: None,
516                })
517            });
518        });
519
520        // The first instance exists from `bind`, so the client connects without a
521        // race even if it opens before `serve` calls ConnectNamedPipe.
522        let mut client = std::fs::OpenOptions::new()
523            .read(true)
524            .write(true)
525            .open(&pipe)
526            .expect("open the agent pipe as a client");
527        client
528            .write_all(&frame(&[SSH_AGENTC_REQUEST_IDENTITIES]))
529            .unwrap();
530        client.flush().unwrap();
531
532        let mut len_buf = [0u8; 4];
533        client.read_exact(&mut len_buf).unwrap();
534        let len = u32::from_be_bytes(len_buf) as usize;
535        let mut body = vec![0u8; len];
536        client.read_exact(&mut body).unwrap();
537
538        assert_eq!(body[0], SSH_AGENT_IDENTITIES_ANSWER, "answer message type");
539        // The identity count (4-byte BE after the type) is zero — no keys custodied.
540        assert_eq!(&body[1..5], &[0, 0, 0, 0], "zero identities");
541    }
542}