kovra_agent/daemon.rs
1//! Socket lifecycle for the governed ssh-agent (KOV-13, decision Q4:
2//! foreground-only MVP; decision Q5: refuse-and-guide on a pre-existing
3//! `$SSH_AUTH_SOCK`).
4//!
5//! This is the OS edge. On Unix it binds a UNIX socket (mode `0600`), prints the
6//! `SSH_AUTH_SOCK` to export, and serves connections in the foreground until
7//! Ctrl-C, removing the socket on exit. **The socket peer and a real `ssh`
8//! client are `[host]`** — validated on hardware by the human, not asserted by
9//! automated tests (CLAUDE.md rule 4). The *protocol* and *session* logic it
10//! drives ([`crate::protocol`], [`crate::session`]) are fully mock-tested.
11//!
12//! On Windows the transport is a **Named Pipe** (`\\.\pipe\kovra-ssh-agent-<fp>`,
13//! KOV-61) restricted to the current user via an owner-only security descriptor —
14//! the analog of the Unix socket's `0600`. The ssh-agent wire protocol over it is
15//! identical, so [`handle_connection`] is shared verbatim; only [`bind`]/[`serve`]
16//! differ per OS. Pipes leave no filesystem artifact, so there is nothing to
17//! unlink on exit. Validated `[host]` on the HP i7 with a real OpenSSH client
18//! (`ssh-add -l`/`-T`, incl. the high/prod confirmation gate).
19
20use std::path::{Path, PathBuf};
21
22use crate::error::AgentError;
23#[cfg(any(unix, windows))]
24use crate::session::Session;
25
26/// The OS listener the agent serves on: a UNIX-domain socket on Unix, a Named
27/// Pipe server on Windows (KOV-61). On any other platform it is a placeholder
28/// that is never constructed — [`bind`] errors first.
29#[cfg(unix)]
30pub use std::os::unix::net::UnixListener as AgentListener;
31#[cfg(not(any(unix, windows)))]
32pub use stub_impl::AgentListener;
33#[cfg(windows)]
34pub use windows_impl::AgentListener;
35
36/// Refuse to start if `$SSH_AUTH_SOCK` is already set — we never hijack or chain
37/// an existing agent (decision Q5). The caller prints the guidance carried by
38/// [`AgentError::AuthSockAlreadySet`].
39pub fn ensure_no_existing_agent() -> Result<(), AgentError> {
40 if let Some(sock) = std::env::var_os("SSH_AUTH_SOCK") {
41 return Err(AgentError::AuthSockAlreadySet(
42 sock.to_string_lossy().into_owned(),
43 ));
44 }
45 Ok(())
46}
47
48/// A reasonable default socket path under the vault root: `<root>/agent.sock`.
49/// (The vault root is already `0700`, so the socket inherits a private parent.)
50#[cfg(not(windows))]
51pub fn default_socket_path(root: &Path) -> PathBuf {
52 root.join("agent.sock")
53}
54
55/// On Windows the agent transport is a **named pipe**, which lives in a flat
56/// namespace rather than the filesystem. Derive a stable, per-vault pipe name
57/// from a fingerprint of the root so distinct vaults don't collide:
58/// `\\.\pipe\kovra-ssh-agent-<fp>`.
59#[cfg(windows)]
60pub fn default_socket_path(root: &Path) -> PathBuf {
61 let fp = kovra_core::fingerprint(root.to_string_lossy().as_bytes());
62 PathBuf::from(format!(r"\\.\pipe\kovra-ssh-agent-{fp}"))
63}
64
65/// Owned session inputs, so `serve`'s closure can build a session per request
66/// without lifetime entanglement with the listener loop.
67pub struct SessionOwned {
68 /// The custodied keys (with private halves).
69 pub keys: Vec<crate::session::KeypairEntry>,
70 /// The agent scope.
71 pub scope: kovra_core::AgentScope,
72 /// The confirmer.
73 pub confirmer: Box<dyn kovra_core::Confirmer>,
74 /// The audit sink.
75 pub audit: Box<dyn kovra_core::AuditSink>,
76 /// The clock.
77 pub clock: Box<dyn kovra_core::Clock>,
78 /// The confirmation timeout.
79 pub confirm_timeout: std::time::Duration,
80 /// The observed requesting process (I16).
81 pub requesting_process: Option<String>,
82}
83
84#[cfg(any(unix, windows))]
85impl SessionOwned {
86 fn as_session(&self) -> Session<'_> {
87 Session {
88 keys: &self.keys,
89 scope: &self.scope,
90 confirmer: self.confirmer.as_ref(),
91 audit: self.audit.as_ref(),
92 clock: self.clock.as_ref(),
93 confirm_timeout: self.confirm_timeout,
94 requesting_process: self.requesting_process.clone(),
95 }
96 }
97}
98
99/// Remove the socket file on shutdown (best-effort). Idempotent. On Windows the
100/// transport is a named pipe (no filesystem artifact), so this is a harmless
101/// no-op there — the pipe vanishes when its handle closes on exit.
102pub fn cleanup(path: &Path) {
103 let _ = std::fs::remove_file(path);
104}
105
106/// Serve one connection to completion over any byte stream (`Read + Write`): read
107/// a frame, dispatch it through a fresh session, write the framed reply, loop
108/// until the peer closes. Shared by the Unix-socket and Windows-named-pipe rails
109/// — the ssh-agent wire protocol is transport-agnostic. Per-connection errors
110/// propagate to the caller, which isolates them (one bad peer must not take the
111/// daemon down). A malformed/unknown frame answers `SSH_AGENT_FAILURE` rather
112/// than closing (matches the fuzz-target contract: never panic, always a reply).
113#[cfg(any(unix, windows))]
114fn handle_connection<S, F>(mut stream: S, make_session: &mut F) -> Result<(), AgentError>
115where
116 S: std::io::Read + std::io::Write,
117 F: FnMut() -> Result<SessionOwned, AgentError>,
118{
119 use crate::protocol::{encode_failure, frame, parse_request, read_frame};
120
121 loop {
122 let body = match read_frame(&mut stream)? {
123 Some(b) => b,
124 None => return Ok(()), // peer closed at a frame boundary
125 };
126 let reply_body = match parse_request(&body) {
127 Ok(request) => {
128 let owned = make_session()?;
129 let session = owned.as_session();
130 session.handle(&request)?
131 }
132 Err(_) => encode_failure(),
133 };
134 stream.write_all(&frame(&reply_body))?;
135 stream.flush()?;
136 }
137}
138
139#[cfg(unix)]
140pub use unix_impl::{bind, serve};
141
142#[cfg(unix)]
143mod unix_impl {
144 use std::os::unix::net::UnixListener;
145 use std::path::Path;
146
147 use super::{AgentError, SessionOwned, handle_connection};
148
149 /// Bind the agent socket at `path` (mode `0600`), removing a stale socket
150 /// file first. Returns the listener; the caller serves it with [`serve`].
151 pub fn bind(path: &Path) -> Result<UnixListener, AgentError> {
152 // Remove a leftover socket from a previous run (a path that exists but
153 // has no live listener). We only ever remove a socket file, never a
154 // regular file.
155 if path.exists() {
156 let is_socket = std::fs::symlink_metadata(path)
157 .map(|m| {
158 use std::os::unix::fs::FileTypeExt;
159 m.file_type().is_socket()
160 })
161 .unwrap_or(false);
162 if is_socket {
163 let _ = std::fs::remove_file(path);
164 } else {
165 return Err(AgentError::Socket(format!(
166 "{} exists and is not a socket — refusing to overwrite",
167 path.display()
168 )));
169 }
170 }
171 let listener = UnixListener::bind(path)
172 .map_err(|e| AgentError::Socket(format!("bind {}: {e}", path.display())))?;
173 {
174 use std::os::unix::fs::PermissionsExt;
175 std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600))
176 .map_err(|e| AgentError::Socket(format!("chmod {}: {e}", path.display())))?;
177 }
178 Ok(listener)
179 }
180
181 /// Serve the agent in the **foreground** until the listener is closed or an
182 /// unrecoverable error occurs. Each accepted connection is handled to
183 /// completion (one request → one reply, looping until the peer closes).
184 /// Per-connection errors are isolated: a malformed frame answers
185 /// `SSH_AGENT_FAILURE` and the connection continues; a transport error drops
186 /// just that connection.
187 ///
188 /// `make_session` is called per request to build a fresh [`super::SessionOwned`]
189 /// view over the (possibly re-read) custodied keys.
190 pub fn serve<F>(listener: &UnixListener, mut make_session: F) -> Result<(), AgentError>
191 where
192 F: FnMut() -> Result<SessionOwned, AgentError>,
193 {
194 for incoming in listener.incoming() {
195 match incoming {
196 Ok(stream) => {
197 if let Err(e) = handle_connection(stream, &mut make_session) {
198 // Log to stderr and keep serving — one bad peer must not
199 // take the daemon down. No key bytes are ever in `e`.
200 eprintln!("kovra ssh-agent: connection error: {e}");
201 }
202 }
203 Err(e) => {
204 eprintln!("kovra ssh-agent: accept error: {e}");
205 }
206 }
207 }
208 Ok(())
209 }
210}
211
212#[cfg(windows)]
213pub use windows_impl::{bind, serve};
214
215/// Windows Named-Pipe rail for the governed ssh-agent (KOV-61). The pipe is the
216/// transport analog of the Unix socket; the ssh-agent wire protocol over it is
217/// identical, so [`super::handle_connection`] is reused verbatim. The pipe is
218/// restricted to the **current user** (an SDDL `D:P(A;;GA;;;<sid>)` security
219/// descriptor — the analog of the socket's `0600`). Named pipes leave no
220/// filesystem artifact, so there is nothing to unlink on exit.
221#[cfg(windows)]
222mod windows_impl {
223 use std::os::windows::ffi::OsStrExt;
224 use std::os::windows::io::FromRawHandle;
225 use std::path::Path;
226
227 use windows::Win32::Foundation::{
228 CloseHandle, ERROR_PIPE_CONNECTED, HANDLE, HLOCAL, LocalFree,
229 };
230 use windows::Win32::Security::Authorization::{
231 ConvertSidToStringSidW, ConvertStringSecurityDescriptorToSecurityDescriptorW,
232 SDDL_REVISION_1,
233 };
234 use windows::Win32::Security::{
235 GetTokenInformation, PSECURITY_DESCRIPTOR, SECURITY_ATTRIBUTES, TOKEN_QUERY, TOKEN_USER,
236 TokenUser,
237 };
238 use windows::Win32::Storage::FileSystem::{FILE_FLAGS_AND_ATTRIBUTES, FlushFileBuffers};
239 use windows::Win32::System::Pipes::{
240 ConnectNamedPipe, CreateNamedPipeW, DisconnectNamedPipe, PIPE_READMODE_BYTE,
241 PIPE_REJECT_REMOTE_CLIENTS, PIPE_TYPE_BYTE, PIPE_UNLIMITED_INSTANCES, PIPE_WAIT,
242 };
243 use windows::Win32::System::Threading::{GetCurrentProcess, OpenProcessToken};
244 use windows::core::{HRESULT, HSTRING, PCWSTR, PWSTR};
245
246 use super::{AgentError, SessionOwned, handle_connection};
247
248 /// `PIPE_ACCESS_DUPLEX` (read+write server). A `FILE_FLAGS_AND_ATTRIBUTES`.
249 const PIPE_ACCESS_DUPLEX: u32 = 0x0000_0003;
250 /// Per-instance pipe buffer sizes (advisory).
251 const PIPE_BUF: u32 = 8 * 1024;
252
253 fn win(e: impl std::fmt::Display, what: &str) -> AgentError {
254 AgentError::Socket(format!("{what}: {e}"))
255 }
256
257 /// The Windows listener: owns the wide pipe name, the per-user security
258 /// descriptor (kept alive for the pipe's lifetime), and the first server
259 /// instance created by [`bind`] (so the pipe exists the moment we publish the
260 /// address). Subsequent instances are created per connection by [`serve`].
261 pub struct AgentListener {
262 name: Vec<u16>,
263 sd: PSECURITY_DESCRIPTOR,
264 instance: HANDLE,
265 }
266
267 // SAFETY: the only non-Send fields are a Windows kernel HANDLE and a
268 // security-descriptor heap pointer, both valid and usable from any thread in
269 // the process. Moving the listener to the serving thread is therefore sound.
270 unsafe impl Send for AgentListener {}
271
272 impl Drop for AgentListener {
273 fn drop(&mut self) {
274 // The `instance` handle is consumed by `serve` (wrapped in a File that
275 // closes it); we only free the security descriptor here. If `serve`
276 // never ran, the instance is reclaimed by the OS on process exit.
277 if !self.sd.0.is_null() {
278 // SAFETY: `sd` came from ConvertStringSecurityDescriptor… (LocalAlloc).
279 unsafe {
280 let _ = LocalFree(Some(HLOCAL(self.sd.0.cast())));
281 }
282 }
283 }
284 }
285
286 /// Create the pipe (first instance) restricted to the current user.
287 pub fn bind(path: &Path) -> Result<AgentListener, AgentError> {
288 let name: Vec<u16> = path
289 .as_os_str()
290 .encode_wide()
291 .chain(std::iter::once(0))
292 .collect();
293 let sd = build_owner_only_sd().map_err(|e| win(e, "pipe security descriptor"))?;
294 match create_instance(&name, sd) {
295 Ok(instance) => Ok(AgentListener { name, sd, instance }),
296 Err(e) => {
297 // SAFETY: free the SD we just allocated before bailing.
298 unsafe {
299 if !sd.0.is_null() {
300 let _ = LocalFree(Some(HLOCAL(sd.0.cast())));
301 }
302 }
303 Err(win(
304 e,
305 &format!(
306 "create named pipe {} (another kovra agent already running?)",
307 path.display()
308 ),
309 ))
310 }
311 }
312 }
313
314 /// Serve connections in the foreground: accept on the current instance, run
315 /// the shared protocol loop over it, then create the next instance. One
316 /// connection at a time (foreground MVP, decision Q4).
317 pub fn serve<F>(listener: &AgentListener, mut make_session: F) -> Result<(), AgentError>
318 where
319 F: FnMut() -> Result<SessionOwned, AgentError>,
320 {
321 // `HANDLE` is Copy; we take the first instance and never let the listener's
322 // Drop close it (it is closed here, when the per-connection File drops).
323 let mut current = listener.instance;
324 loop {
325 // Block until a client connects. ERROR_PIPE_CONNECTED means one raced
326 // in between create and connect — also success.
327 match unsafe { ConnectNamedPipe(current, None) } {
328 Ok(()) => {}
329 Err(e) if e.code() == HRESULT::from_win32(ERROR_PIPE_CONNECTED.0) => {}
330 Err(e) => {
331 eprintln!("kovra ssh-agent: connect error: {e}");
332 // Drop this instance and make a fresh one.
333 unsafe {
334 let _ = CloseHandle(current);
335 }
336 current = create_instance(&listener.name, listener.sd)
337 .map_err(|e| win(e, "re-create pipe instance"))?;
338 continue;
339 }
340 }
341
342 // Wrap the connected instance as a File (Read + Write); it takes
343 // ownership of the handle and closes it on drop. The ssh-agent wire
344 // protocol is transport-agnostic, so the shared loop handles it.
345 // SAFETY: `current` is a valid, connected pipe handle we own.
346 let file = unsafe { std::fs::File::from_raw_handle(current.0.cast()) };
347 if let Err(e) = handle_connection(file, &mut make_session) {
348 eprintln!("kovra ssh-agent: connection error: {e}");
349 }
350 // `file` dropped here: flush + disconnect are best-effort niceties; the
351 // close already tears the instance down.
352 drop_instance(current);
353
354 // Next instance for the next client.
355 current = create_instance(&listener.name, listener.sd)
356 .map_err(|e| win(e, "create next pipe instance"))?;
357 }
358 }
359
360 /// Best-effort flush + disconnect (the File drop already closed the handle, so
361 /// these may no-op; kept for clarity of the per-connection lifecycle).
362 fn drop_instance(handle: HANDLE) {
363 // SAFETY: FFI; `handle` was just closed by the File drop — these are
364 // best-effort and ignore errors.
365 unsafe {
366 let _ = FlushFileBuffers(handle);
367 let _ = DisconnectNamedPipe(handle);
368 }
369 }
370
371 /// Create one named-pipe server instance with the owner-only SD.
372 fn create_instance(name: &[u16], sd: PSECURITY_DESCRIPTOR) -> windows::core::Result<HANDLE> {
373 let sa = SECURITY_ATTRIBUTES {
374 nLength: std::mem::size_of::<SECURITY_ATTRIBUTES>() as u32,
375 lpSecurityDescriptor: sd.0,
376 bInheritHandle: false.into(),
377 };
378 // SAFETY: FFI; `name` is NUL-terminated and `sa` outlives the call.
379 let handle = unsafe {
380 CreateNamedPipeW(
381 PCWSTR(name.as_ptr()),
382 FILE_FLAGS_AND_ATTRIBUTES(PIPE_ACCESS_DUPLEX),
383 PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT | PIPE_REJECT_REMOTE_CLIENTS,
384 PIPE_UNLIMITED_INSTANCES,
385 PIPE_BUF,
386 PIPE_BUF,
387 0,
388 Some(&sa),
389 )
390 };
391 if handle.is_invalid() {
392 return Err(windows::core::Error::from_thread());
393 }
394 Ok(handle)
395 }
396
397 /// A protected DACL granting full control to **only** the current user — the
398 /// Windows analog of the Unix socket's `0600`. Returned descriptor is freed by
399 /// [`AgentListener`]'s Drop.
400 fn build_owner_only_sd() -> windows::core::Result<PSECURITY_DESCRIPTOR> {
401 let sid = current_user_sid_string()?;
402 let sddl = HSTRING::from(format!("D:P(A;;GA;;;{sid})"));
403 let mut psd = PSECURITY_DESCRIPTOR::default();
404 // SAFETY: FFI; `sddl` is a valid wide string; `psd` receives an allocation.
405 unsafe {
406 ConvertStringSecurityDescriptorToSecurityDescriptorW(
407 &sddl,
408 SDDL_REVISION_1,
409 &mut psd,
410 None,
411 )?;
412 }
413 Ok(psd)
414 }
415
416 /// The current user's SID as an `S-1-…` string, from the process token.
417 fn current_user_sid_string() -> windows::core::Result<String> {
418 // SAFETY: FFI; the token handle is closed before returning; buffers are
419 // sized by the OS via the two-call pattern.
420 unsafe {
421 let mut token = HANDLE::default();
422 OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &mut token)?;
423 let mut len = 0u32;
424 let _ = GetTokenInformation(token, TokenUser, None, 0, &mut len);
425 let mut buf = vec![0u8; len as usize];
426 let info = GetTokenInformation(
427 token,
428 TokenUser,
429 Some(buf.as_mut_ptr().cast()),
430 len,
431 &mut len,
432 );
433 let _ = CloseHandle(token);
434 info?;
435 let token_user = &*(buf.as_ptr() as *const TOKEN_USER);
436 let mut pwstr = PWSTR::null();
437 ConvertSidToStringSidW(token_user.User.Sid, &mut pwstr)?;
438 let s = pwstr
439 .to_string()
440 .map_err(|_| windows::core::Error::from_thread())?;
441 // ConvertSidToStringSidW allocates via LocalAlloc.
442 let _ = LocalFree(Some(HLOCAL(pwstr.0.cast())));
443 Ok(s)
444 }
445 }
446}
447
448#[cfg(not(any(unix, windows)))]
449pub use stub_impl::{AgentListener, bind, serve};
450
451#[cfg(not(any(unix, windows)))]
452mod stub_impl {
453 use std::path::Path;
454
455 use super::{AgentError, SessionOwned};
456
457 const UNSUPPORTED: &str = "the governed ssh-agent is not available on this platform";
458
459 /// Placeholder listener for platforms with neither Unix sockets nor Named
460 /// Pipes; never constructed (`bind` errors first).
461 #[derive(Debug)]
462 pub struct AgentListener(());
463
464 pub fn bind(_path: &Path) -> Result<AgentListener, AgentError> {
465 Err(AgentError::Socket(UNSUPPORTED.into()))
466 }
467
468 pub fn serve<F>(_listener: &AgentListener, _make_session: F) -> Result<(), AgentError>
469 where
470 F: FnMut() -> Result<SessionOwned, AgentError>,
471 {
472 Err(AgentError::Socket(UNSUPPORTED.into()))
473 }
474}
475
476// In-process Named-Pipe transport test (no real `ssh`). Binds a per-test pipe,
477// serves it on a detached thread with an empty key set, then connects a client
478// and round-trips a `REQUEST_IDENTITIES` → `IDENTITIES_ANSWER` (0 identities).
479// Exercises bind (incl. the owner-only security descriptor), the pipe accept, and
480// the shared protocol loop over the pipe. The real `ssh` peer remains `[host]`.
481#[cfg(all(test, windows))]
482mod windows_tests {
483 use std::io::{Read, Write};
484 use std::time::Duration;
485
486 use kovra_core::{
487 AgentScope, ConfirmOutcome, Filter, MockAuditSink, MockClock, MockConfirmer, Operation,
488 };
489
490 use super::{SessionOwned, bind, default_socket_path, serve};
491 use crate::protocol::{SSH_AGENT_IDENTITIES_ANSWER, SSH_AGENTC_REQUEST_IDENTITIES, frame};
492
493 #[test]
494 fn named_pipe_round_trips_request_identities() {
495 let root = tempfile::tempdir().unwrap();
496 let pipe = default_socket_path(root.path());
497 let listener = bind(&pipe).expect("bind named pipe");
498
499 // Serve on a detached thread; an empty key set needs no confirmation.
500 std::thread::spawn(move || {
501 let _ = serve(&listener, || {
502 Ok(SessionOwned {
503 keys: Vec::new(),
504 scope: AgentScope {
505 operations: [Operation::Metadata, Operation::Inject]
506 .into_iter()
507 .collect(),
508 projects: Filter::Any,
509 environments: Filter::Any,
510 },
511 confirmer: Box::new(MockConfirmer::always(ConfirmOutcome::Approved)),
512 audit: Box::new(MockAuditSink::new()),
513 clock: Box::new(MockClock::default()),
514 confirm_timeout: Duration::from_secs(1),
515 requesting_process: None,
516 })
517 });
518 });
519
520 // The first instance exists from `bind`, so the client connects without a
521 // race even if it opens before `serve` calls ConnectNamedPipe.
522 let mut client = std::fs::OpenOptions::new()
523 .read(true)
524 .write(true)
525 .open(&pipe)
526 .expect("open the agent pipe as a client");
527 client
528 .write_all(&frame(&[SSH_AGENTC_REQUEST_IDENTITIES]))
529 .unwrap();
530 client.flush().unwrap();
531
532 let mut len_buf = [0u8; 4];
533 client.read_exact(&mut len_buf).unwrap();
534 let len = u32::from_be_bytes(len_buf) as usize;
535 let mut body = vec![0u8; len];
536 client.read_exact(&mut body).unwrap();
537
538 assert_eq!(body[0], SSH_AGENT_IDENTITIES_ANSWER, "answer message type");
539 // The identity count (4-byte BE after the type) is zero — no keys custodied.
540 assert_eq!(&body[1..5], &[0, 0, 0, 0], "zero identities");
541 }
542}