dodot_lib/datastore/mod.rs
1//! State management for dodot.
2//!
3//! The [`DataStore`] trait defines dodot's 8-method storage API.
4//! [`FilesystemDataStore`] implements it using symlinks and sentinel
5//! files on a real (or test) filesystem via the [`Fs`](crate::fs::Fs) trait.
6
7mod filesystem;
8
9pub use filesystem::FilesystemDataStore;
10
11use std::path::{Path, PathBuf};
12
13use crate::Result;
14
15/// Dodot's storage interface.
16///
17/// State is represented entirely by symlinks and sentinel files in the
18/// filesystem — no database, no lock files. The 8 methods break into
19/// three groups:
20///
21/// **Mutations** — modify state:
22/// - [`create_data_link`](DataStore::create_data_link)
23/// - [`create_user_link`](DataStore::create_user_link)
24/// - [`run_and_record`](DataStore::run_and_record)
25/// - [`remove_state`](DataStore::remove_state)
26///
27/// **Queries** — read state:
28/// - [`has_sentinel`](DataStore::has_sentinel)
29/// - [`has_handler_state`](DataStore::has_handler_state)
30/// - [`list_pack_handlers`](DataStore::list_pack_handlers)
31/// - [`list_handler_sentinels`](DataStore::list_handler_sentinels)
32pub trait DataStore: Send + Sync {
33 /// Creates an intermediate symlink in the datastore:
34 /// `handler_data_dir(pack, handler) / filename -> source_file`
35 ///
36 /// Returns the absolute path of the created datastore link.
37 /// Idempotent: if the link exists and already points to the correct
38 /// source, this is a no-op.
39 fn create_data_link(&self, pack: &str, handler: &str, source_file: &Path) -> Result<PathBuf>;
40
41 /// Creates a user-visible symlink:
42 /// `user_path -> datastore_path`
43 ///
44 /// This is the second leg of the double-link architecture.
45 /// Creates parent directories as needed.
46 fn create_user_link(&self, datastore_path: &Path, user_path: &Path) -> Result<()>;
47
48 /// Executes `command` via shell and records a sentinel on success.
49 ///
50 /// Idempotent: if the sentinel already exists, the command is not
51 /// re-run. The sentinel file stores `completed|{timestamp}`.
52 ///
53 /// **Edge case**: if the command succeeds but the sentinel write
54 /// fails, a subsequent call will re-run the command. This is by
55 /// design — re-running is safer than falsely marking as complete.
56 /// Install scripts should be idempotent to handle this.
57 fn run_and_record(
58 &self,
59 pack: &str,
60 handler: &str,
61 executable: &str,
62 arguments: &[String],
63 sentinel: &str,
64 force: bool,
65 ) -> Result<()>;
66
67 /// Checks whether a sentinel exists for this pack/handler.
68 fn has_sentinel(&self, pack: &str, handler: &str, sentinel: &str) -> Result<bool>;
69
70 /// Removes all state for a pack/handler pair.
71 ///
72 /// Deletes the handler data directory and everything in it.
73 fn remove_state(&self, pack: &str, handler: &str) -> Result<()>;
74
75 /// Checks if any state exists for a pack/handler pair.
76 fn has_handler_state(&self, pack: &str, handler: &str) -> Result<bool>;
77
78 /// Lists handler names that have state for a pack.
79 fn list_pack_handlers(&self, pack: &str) -> Result<Vec<String>>;
80
81 /// Lists sentinel file names for a pack/handler.
82 fn list_handler_sentinels(&self, pack: &str, handler: &str) -> Result<Vec<String>>;
83
84 /// Writes a regular file (not a symlink) into the datastore.
85 ///
86 /// Used for preprocessor-expanded files where the datastore holds
87 /// rendered content rather than a symlink to the source.
88 /// Returns the absolute path of the written file.
89 /// Idempotent: overwrites if the file already exists.
90 ///
91 /// `filename` must be a safe relative path — no absolute paths, no
92 /// `..` components. Callers (typically the preprocessing pipeline)
93 /// are expected to validate before calling. Implementations should
94 /// also reject unsafe paths as defense-in-depth.
95 fn write_rendered_file(
96 &self,
97 pack: &str,
98 handler: &str,
99 filename: &str,
100 content: &[u8],
101 ) -> Result<PathBuf>;
102
103 /// Like [`write_rendered_file`], but applies `mode` atomically
104 /// at file-creation time so the rendered bytes never live on
105 /// disk under a more permissive mode (per `secrets.lex` §4.3
106 /// for whole-file `age` / `gpg` plaintext). Default impl
107 /// falls back to `write_rendered_file` followed by an
108 /// `Fs::set_permissions` chmod — semantically equivalent but
109 /// briefly leaves the file at the umask-default mode; real
110 /// impls should override with the atomic
111 /// `Fs::write_file_with_mode` path.
112 fn write_rendered_file_with_mode(
113 &self,
114 pack: &str,
115 handler: &str,
116 filename: &str,
117 content: &[u8],
118 mode: u32,
119 ) -> Result<PathBuf>;
120
121 /// Creates a directory (mkdir -p) inside the datastore and returns
122 /// its absolute path. Used for preprocessor-expanded directory
123 /// entries (e.g. directory markers from tar archives).
124 ///
125 /// Same path-safety constraints as [`write_rendered_file`].
126 fn write_rendered_dir(&self, pack: &str, handler: &str, relative: &str) -> Result<PathBuf>;
127
128 /// Returns the absolute path where a sentinel file would be stored.
129 fn sentinel_path(&self, pack: &str, handler: &str, sentinel: &str) -> std::path::PathBuf;
130}
131
132/// Abstraction over process execution.
133///
134/// [`FilesystemDataStore`] uses this to run commands in
135/// [`run_and_record`](DataStore::run_and_record). Tests can provide a
136/// mock that records calls without spawning processes.
137pub trait CommandRunner: Send + Sync {
138 fn run(&self, executable: &str, arguments: &[String]) -> Result<CommandOutput>;
139
140 /// Variant of [`Self::run`] that returns stdout as raw bytes.
141 /// Required for callers that decrypt binary payloads through a
142 /// subprocess (whole-file `age` / `gpg` preprocessors per
143 /// `secrets.lex` §4) — `String::from_utf8_lossy` on the
144 /// `run` path corrupts non-UTF-8 plaintext, so SSH binary
145 /// keys / X.509 DER certs / kubeconfig blobs would round-trip
146 /// to disk with replacement characters.
147 ///
148 /// Stderr stays a `String` because diagnostic text is
149 /// human-readable in every shipped provider; if a future
150 /// caller emits non-UTF-8 stderr we'll add a bytes variant
151 /// then.
152 ///
153 /// Default impl converts a `run()` result by re-encoding the
154 /// `String` stdout as bytes — that's safe (UTF-8 is a strict
155 /// subset of bytes) but does *not* recover bytes lost to
156 /// `from_utf8_lossy` upstream. Real impls (`ShellCommandRunner`)
157 /// must override and read stdout as raw bytes from the start.
158 fn run_bytes(&self, executable: &str, arguments: &[String]) -> Result<CommandOutputBytes> {
159 let out = self.run(executable, arguments)?;
160 Ok(CommandOutputBytes {
161 exit_code: out.exit_code,
162 stdout: out.stdout.into_bytes(),
163 stderr: out.stderr,
164 })
165 }
166}
167
168/// Output from a command execution.
169#[derive(Debug, Clone)]
170pub struct CommandOutput {
171 pub exit_code: i32,
172 pub stdout: String,
173 pub stderr: String,
174}
175
176/// Output from a command execution where stdout is held as raw
177/// bytes — used by [`CommandRunner::run_bytes`] for callers that
178/// must preserve binary payloads (whole-file decryption via age /
179/// gpg, etc.).
180#[derive(Debug, Clone)]
181pub struct CommandOutputBytes {
182 pub exit_code: i32,
183 pub stdout: Vec<u8>,
184 pub stderr: String,
185}
186
187/// [`CommandRunner`] that spawns a real shell process.
188///
189/// `verbose` controls whether the script's raw stdout/stderr is streamed
190/// through to the user's terminal. Regardless of the flag, lines matching
191/// the `# status:` convention on stdout are always surfaced as live progress
192/// markers, and captured output is returned via [`CommandOutput`] for
193/// callers that want it.
194pub struct ShellCommandRunner {
195 verbose: bool,
196}
197
198impl ShellCommandRunner {
199 pub fn new(verbose: bool) -> Self {
200 Self { verbose }
201 }
202}
203
204pub(crate) fn format_command_for_display(executable: &str, arguments: &[String]) -> String {
205 if arguments.is_empty() {
206 return executable.to_string();
207 }
208
209 let args = arguments
210 .iter()
211 .map(|arg| {
212 if arg.is_empty()
213 || arg.chars().any(char::is_whitespace)
214 || arg.contains('"')
215 || arg.contains('\'')
216 {
217 format!("{arg:?}")
218 } else {
219 arg.clone()
220 }
221 })
222 .collect::<Vec<_>>()
223 .join(" ");
224 format!("{executable} {args}")
225}
226
227/// Strip the `# status:` prefix from a script line, returning the
228/// trimmed message if present.
229///
230/// Matches `#status:`, `# status:`, and any leading whitespace before
231/// the `#`. Designed to be tool-agnostic — a script using this convention
232/// is still valid and meaningful when run manually outside dodot.
233pub(crate) fn parse_status_line(line: &str) -> Option<&str> {
234 let s = line.trim_start();
235 let rest = s.strip_prefix('#')?;
236 let rest = rest.trim_start();
237 let msg = rest.strip_prefix("status:")?;
238 Some(msg.trim())
239}
240
241impl CommandRunner for ShellCommandRunner {
242 fn run(&self, executable: &str, arguments: &[String]) -> Result<CommandOutput> {
243 use std::io::{BufRead, BufReader, IsTerminal, Write};
244 use std::process::{Command, Stdio};
245 use std::sync::{Arc, Mutex};
246 use std::thread;
247
248 let mut child = Command::new(executable)
249 .args(arguments)
250 .stdout(Stdio::piped())
251 .stderr(Stdio::piped())
252 .spawn()
253 .map_err(|e| crate::DodotError::CommandFailed {
254 command: format_command_for_display(executable, arguments),
255 exit_code: -1,
256 stderr: e.to_string(),
257 })?;
258
259 let stdout_pipe = child
260 .stdout
261 .take()
262 .expect("piped stdout missing after spawn");
263 let stderr_pipe = child
264 .stderr
265 .take()
266 .expect("piped stderr missing after spawn");
267
268 // ANSI dim only if the user's stdout is a TTY — keeps colour
269 // codes out of pipes/log files.
270 let tty = std::io::stdout().is_terminal();
271 let dim = if tty { "\x1b[2m" } else { "" };
272 let reset = if tty { "\x1b[0m" } else { "" };
273 let arrow = if tty { "→" } else { "->" };
274
275 let verbose = self.verbose;
276 let stderr_buf = Arc::new(Mutex::new(String::new()));
277
278 // Read raw bytes (not `BufRead::lines()`) so non-UTF-8 output
279 // doesn't stop draining mid-stream — a stalled drain would
280 // deadlock the child once the pipe buffer fills. Decode each
281 // line lossily for display/capture; binary garbage becomes U+FFFD
282 // rather than aborting the read.
283 fn pop_eol(buf: &mut Vec<u8>) {
284 if buf.last() == Some(&b'\n') {
285 buf.pop();
286 }
287 if buf.last() == Some(&b'\r') {
288 buf.pop();
289 }
290 }
291
292 // Drain stderr in a worker thread to avoid pipe-buffer deadlock
293 // (a chatty stderr can block the child if no one's reading).
294 let stderr_thread = {
295 let buf = stderr_buf.clone();
296 thread::spawn(move || {
297 let mut reader = BufReader::new(stderr_pipe);
298 let host_stderr = std::io::stderr();
299 let mut bytes = Vec::new();
300 loop {
301 bytes.clear();
302 match reader.read_until(b'\n', &mut bytes) {
303 Ok(0) | Err(_) => break,
304 Ok(_) => {
305 pop_eol(&mut bytes);
306 let line = String::from_utf8_lossy(&bytes);
307 {
308 let mut guard = buf.lock().expect("stderr buf poisoned");
309 guard.push_str(&line);
310 guard.push('\n');
311 }
312 if verbose {
313 let mut h = host_stderr.lock();
314 let _ = writeln!(h, "{line}");
315 }
316 }
317 }
318 }
319 })
320 };
321
322 // Read stdout on the main thread: capture, scan for `# status:`,
323 // optionally passthrough.
324 let mut stdout_buf = String::new();
325 {
326 let mut reader = BufReader::new(stdout_pipe);
327 let host_stdout = std::io::stdout();
328 let mut bytes = Vec::new();
329 loop {
330 bytes.clear();
331 match reader.read_until(b'\n', &mut bytes) {
332 Ok(0) | Err(_) => break,
333 Ok(_) => {
334 pop_eol(&mut bytes);
335 let line = String::from_utf8_lossy(&bytes);
336 stdout_buf.push_str(&line);
337 stdout_buf.push('\n');
338
339 if let Some(msg) = parse_status_line(&line) {
340 let mut h = host_stdout.lock();
341 let _ = writeln!(h, "{dim}{arrow}{reset} {msg}");
342 }
343 if verbose {
344 let mut h = host_stdout.lock();
345 let _ = writeln!(h, "{line}");
346 }
347 }
348 }
349 }
350 }
351
352 let _ = stderr_thread.join();
353 let stderr_text = stderr_buf.lock().expect("stderr buf poisoned").clone();
354
355 let status = child.wait().map_err(|e| crate::DodotError::CommandFailed {
356 command: format_command_for_display(executable, arguments),
357 exit_code: -1,
358 stderr: e.to_string(),
359 })?;
360 let exit_code = status.code().unwrap_or(-1);
361
362 if !status.success() {
363 // When not verbose, the user hasn't seen any of the script's
364 // stderr — surface it now so a failure is debuggable.
365 if !verbose && !stderr_text.is_empty() {
366 let host_stderr = std::io::stderr();
367 let mut h = host_stderr.lock();
368 let _ = h.write_all(stderr_text.as_bytes());
369 if !stderr_text.ends_with('\n') {
370 let _ = writeln!(h);
371 }
372 }
373 return Err(crate::DodotError::CommandFailed {
374 command: format_command_for_display(executable, arguments),
375 exit_code,
376 stderr: stderr_text,
377 });
378 }
379
380 Ok(CommandOutput {
381 exit_code,
382 stdout: stdout_buf,
383 stderr: stderr_text,
384 })
385 }
386
387 /// Override of the default trait impl: reads stdout as raw
388 /// bytes (no `from_utf8_lossy` decode), so binary payloads
389 /// from age / gpg whole-file decryption survive verbatim.
390 /// Stderr is still buffered as text via the same drainer
391 /// pattern `run` uses — gpg and age both emit
392 /// human-readable diagnostics.
393 fn run_bytes(&self, executable: &str, arguments: &[String]) -> Result<CommandOutputBytes> {
394 use std::io::{Read, Write};
395 use std::process::{Command, Stdio};
396 use std::sync::{Arc, Mutex};
397 use std::thread;
398
399 let mut child = Command::new(executable)
400 .args(arguments)
401 .stdout(Stdio::piped())
402 .stderr(Stdio::piped())
403 .spawn()
404 .map_err(|e| crate::DodotError::CommandFailed {
405 command: format_command_for_display(executable, arguments),
406 exit_code: -1,
407 stderr: e.to_string(),
408 })?;
409
410 let mut stdout_pipe = child
411 .stdout
412 .take()
413 .expect("piped stdout missing after spawn");
414 let stderr_pipe = child
415 .stderr
416 .take()
417 .expect("piped stderr missing after spawn");
418
419 let stderr_buf = Arc::new(Mutex::new(String::new()));
420 let stderr_thread = {
421 let buf = stderr_buf.clone();
422 thread::spawn(move || {
423 let mut s = String::new();
424 let mut reader = std::io::BufReader::new(stderr_pipe);
425 let _ = std::io::Read::read_to_string(&mut reader, &mut s);
426 if let Ok(mut guard) = buf.lock() {
427 guard.push_str(&s);
428 }
429 })
430 };
431
432 // Read stdout as raw bytes on the main thread. No
433 // line-by-line passthrough / status-line parsing here —
434 // those are install-script ergonomics and have no place in
435 // a binary-payload pipe.
436 let mut stdout_buf: Vec<u8> = Vec::new();
437 if let Err(e) = stdout_pipe.read_to_end(&mut stdout_buf) {
438 // Surface the IO error, but still wait for the child
439 // so we don't leak a zombie.
440 let _ = child.wait();
441 let _ = stderr_thread.join();
442 return Err(crate::DodotError::CommandFailed {
443 command: format_command_for_display(executable, arguments),
444 exit_code: -1,
445 stderr: e.to_string(),
446 });
447 }
448
449 let _ = stderr_thread.join();
450 let stderr_text = stderr_buf.lock().expect("stderr buf poisoned").clone();
451
452 let status = child.wait().map_err(|e| crate::DodotError::CommandFailed {
453 command: format_command_for_display(executable, arguments),
454 exit_code: -1,
455 stderr: e.to_string(),
456 })?;
457 let exit_code = status.code().unwrap_or(-1);
458
459 if !status.success() && !stderr_text.is_empty() && !self.verbose {
460 // Mirror `run`'s "surface stderr on failure when not
461 // verbose" pattern so a quiet failure is still
462 // debuggable.
463 let host_stderr = std::io::stderr();
464 let mut h = host_stderr.lock();
465 let _ = h.write_all(stderr_text.as_bytes());
466 if !stderr_text.ends_with('\n') {
467 let _ = writeln!(h);
468 }
469 }
470
471 // Note: unlike `run`, we don't return `Err` on non-zero
472 // exit. Whole-file preprocessors do their own exit-code
473 // mapping (see `age.rs` / `gpg.rs`) and need to inspect
474 // exit_code + stderr to surface actionable hints.
475 Ok(CommandOutputBytes {
476 exit_code,
477 stdout: stdout_buf,
478 stderr: stderr_text,
479 })
480 }
481}
482
483#[cfg(test)]
484mod tests {
485 use super::*;
486
487 #[test]
488 fn parse_status_line_matches_no_space() {
489 assert_eq!(parse_status_line("#status: building"), Some("building"));
490 }
491
492 #[test]
493 fn parse_status_line_matches_one_space() {
494 assert_eq!(
495 parse_status_line("# status: downloading installer"),
496 Some("downloading installer")
497 );
498 }
499
500 #[test]
501 fn parse_status_line_matches_extra_whitespace() {
502 assert_eq!(
503 parse_status_line(" # status: compiling "),
504 Some("compiling")
505 );
506 }
507
508 #[test]
509 fn parse_status_line_rejects_plain_comment() {
510 assert_eq!(parse_status_line("# just a comment"), None);
511 }
512
513 #[test]
514 fn parse_status_line_rejects_non_comment() {
515 assert_eq!(parse_status_line("echo status: foo"), None);
516 }
517
518 #[test]
519 fn parse_status_line_rejects_shebang() {
520 // `#!/usr/bin/env bash` doesn't start the magic word — ignored.
521 assert_eq!(parse_status_line("#!/bin/bash"), None);
522 }
523
524 #[test]
525 fn parse_status_line_returns_empty_message() {
526 // Empty status: still matches (script chose to print a blank progress).
527 assert_eq!(parse_status_line("# status:"), Some(""));
528 }
529
530 #[test]
531 fn shell_runner_streams_and_captures_real_script() {
532 // Smoke-test the real spawn/streaming path. We assert on the
533 // captured CommandOutput; live host-stdout assertions would
534 // require redirecting process-wide stdout and aren't worth the
535 // complexity here.
536 let runner = ShellCommandRunner::new(false);
537 let script = "echo starting; \
538 echo '# status: phase one'; \
539 echo middle; \
540 echo '# status: phase two'; \
541 echo done";
542 let out = runner
543 .run("bash", &["-c".into(), script.into()])
544 .expect("script should succeed");
545 assert!(out.stdout.contains("starting"));
546 assert!(out.stdout.contains("# status: phase one"));
547 assert!(out.stdout.contains("middle"));
548 assert!(out.stdout.contains("# status: phase two"));
549 assert!(out.stdout.contains("done"));
550 assert_eq!(out.exit_code, 0);
551 }
552
553 #[test]
554 fn shell_runner_returns_error_on_nonzero_exit() {
555 let runner = ShellCommandRunner::new(false);
556 let result = runner.run("bash", &["-c".into(), "exit 7".into()]);
557 match result {
558 Err(crate::DodotError::CommandFailed { exit_code, .. }) => {
559 assert_eq!(exit_code, 7);
560 }
561 other => panic!("expected CommandFailed, got {other:?}"),
562 }
563 }
564
565 #[test]
566 fn shell_runner_captures_stderr_in_command_output() {
567 let runner = ShellCommandRunner::new(false);
568 let out = runner
569 .run("bash", &["-c".into(), "echo hello >&2; echo world".into()])
570 .expect("script should succeed");
571 assert!(out.stderr.contains("hello"));
572 assert!(out.stdout.contains("world"));
573 }
574}