Skip to main content

dodot_lib/datastore/
mod.rs

1//! State management for dodot.
2//!
3//! The [`DataStore`] trait defines dodot's 8-method storage API.
4//! [`FilesystemDataStore`] implements it using symlinks and sentinel
5//! files on a real (or test) filesystem via the [`Fs`](crate::fs::Fs) trait.
6
7mod filesystem;
8
9pub use filesystem::FilesystemDataStore;
10
11use std::path::{Path, PathBuf};
12
13use crate::Result;
14
15/// Dodot's storage interface.
16///
17/// State is represented entirely by symlinks and sentinel files in the
18/// filesystem — no database, no lock files. The 8 methods break into
19/// three groups:
20///
21/// **Mutations** — modify state:
22/// - [`create_data_link`](DataStore::create_data_link)
23/// - [`create_user_link`](DataStore::create_user_link)
24/// - [`run_and_record`](DataStore::run_and_record)
25/// - [`remove_state`](DataStore::remove_state)
26///
27/// **Queries** — read state:
28/// - [`has_sentinel`](DataStore::has_sentinel)
29/// - [`has_handler_state`](DataStore::has_handler_state)
30/// - [`list_pack_handlers`](DataStore::list_pack_handlers)
31/// - [`list_handler_sentinels`](DataStore::list_handler_sentinels)
32pub trait DataStore: Send + Sync {
33    /// Creates an intermediate symlink in the datastore:
34    /// `handler_data_dir(pack, handler) / filename -> source_file`
35    ///
36    /// Returns the absolute path of the created datastore link.
37    /// Idempotent: if the link exists and already points to the correct
38    /// source, this is a no-op.
39    fn create_data_link(&self, pack: &str, handler: &str, source_file: &Path) -> Result<PathBuf>;
40
41    /// Creates a user-visible symlink:
42    /// `user_path -> datastore_path`
43    ///
44    /// This is the second leg of the double-link architecture.
45    /// Creates parent directories as needed.
46    fn create_user_link(&self, datastore_path: &Path, user_path: &Path) -> Result<()>;
47
48    /// Executes `command` via shell and records a sentinel on success.
49    ///
50    /// Idempotent: if the sentinel already exists, the command is not
51    /// re-run. The sentinel file stores `completed|{timestamp}`.
52    ///
53    /// **Edge case**: if the command succeeds but the sentinel write
54    /// fails, a subsequent call will re-run the command. This is by
55    /// design — re-running is safer than falsely marking as complete.
56    /// Install scripts should be idempotent to handle this.
57    fn run_and_record(
58        &self,
59        pack: &str,
60        handler: &str,
61        executable: &str,
62        arguments: &[String],
63        sentinel: &str,
64        force: bool,
65    ) -> Result<()>;
66
67    /// Checks whether a sentinel exists for this pack/handler.
68    fn has_sentinel(&self, pack: &str, handler: &str, sentinel: &str) -> Result<bool>;
69
70    /// Removes all state for a pack/handler pair.
71    ///
72    /// Deletes the handler data directory and everything in it.
73    fn remove_state(&self, pack: &str, handler: &str) -> Result<()>;
74
75    /// Checks if any state exists for a pack/handler pair.
76    fn has_handler_state(&self, pack: &str, handler: &str) -> Result<bool>;
77
78    /// Lists handler names that have state for a pack.
79    fn list_pack_handlers(&self, pack: &str) -> Result<Vec<String>>;
80
81    /// Lists sentinel file names for a pack/handler.
82    fn list_handler_sentinels(&self, pack: &str, handler: &str) -> Result<Vec<String>>;
83
84    /// Writes a regular file (not a symlink) into the datastore.
85    ///
86    /// Used for preprocessor-expanded files where the datastore holds
87    /// rendered content rather than a symlink to the source.
88    /// Returns the absolute path of the written file.
89    /// Idempotent: overwrites if the file already exists.
90    ///
91    /// `filename` must be a safe relative path — no absolute paths, no
92    /// `..` components. Callers (typically the preprocessing pipeline)
93    /// are expected to validate before calling. Implementations should
94    /// also reject unsafe paths as defense-in-depth.
95    fn write_rendered_file(
96        &self,
97        pack: &str,
98        handler: &str,
99        filename: &str,
100        content: &[u8],
101    ) -> Result<PathBuf>;
102
103    /// Like [`write_rendered_file`], but applies `mode` atomically
104    /// at file-creation time so the rendered bytes never live on
105    /// disk under a more permissive mode (per `secrets.lex` §4.3
106    /// for whole-file `age` / `gpg` plaintext). Default impl
107    /// falls back to `write_rendered_file` followed by an
108    /// `Fs::set_permissions` chmod — semantically equivalent but
109    /// briefly leaves the file at the umask-default mode; real
110    /// impls should override with the atomic
111    /// `Fs::write_file_with_mode` path.
112    fn write_rendered_file_with_mode(
113        &self,
114        pack: &str,
115        handler: &str,
116        filename: &str,
117        content: &[u8],
118        mode: u32,
119    ) -> Result<PathBuf>;
120
121    /// Creates a directory (mkdir -p) inside the datastore and returns
122    /// its absolute path. Used for preprocessor-expanded directory
123    /// entries (e.g. directory markers from tar archives).
124    ///
125    /// Same path-safety constraints as [`write_rendered_file`].
126    fn write_rendered_dir(&self, pack: &str, handler: &str, relative: &str) -> Result<PathBuf>;
127
128    /// Returns the absolute path where a sentinel file would be stored.
129    fn sentinel_path(&self, pack: &str, handler: &str, sentinel: &str) -> std::path::PathBuf;
130}
131
132/// Abstraction over process execution.
133///
134/// [`FilesystemDataStore`] uses this to run commands in
135/// [`run_and_record`](DataStore::run_and_record). Tests can provide a
136/// mock that records calls without spawning processes.
137pub trait CommandRunner: Send + Sync {
138    fn run(&self, executable: &str, arguments: &[String]) -> Result<CommandOutput>;
139
140    /// Variant of [`Self::run`] that returns stdout as raw bytes.
141    /// Required for callers that decrypt binary payloads through a
142    /// subprocess (whole-file `age` / `gpg` preprocessors per
143    /// `secrets.lex` §4) — `String::from_utf8_lossy` on the
144    /// `run` path corrupts non-UTF-8 plaintext, so SSH binary
145    /// keys / X.509 DER certs / kubeconfig blobs would round-trip
146    /// to disk with replacement characters.
147    ///
148    /// Stderr stays a `String` because diagnostic text is
149    /// human-readable in every shipped provider; if a future
150    /// caller emits non-UTF-8 stderr we'll add a bytes variant
151    /// then.
152    ///
153    /// Default impl converts a `run()` result by re-encoding the
154    /// `String` stdout as bytes — that's safe (UTF-8 is a strict
155    /// subset of bytes) but does *not* recover bytes lost to
156    /// `from_utf8_lossy` upstream. Real impls (`ShellCommandRunner`)
157    /// must override and read stdout as raw bytes from the start.
158    fn run_bytes(&self, executable: &str, arguments: &[String]) -> Result<CommandOutputBytes> {
159        let out = self.run(executable, arguments)?;
160        Ok(CommandOutputBytes {
161            exit_code: out.exit_code,
162            stdout: out.stdout.into_bytes(),
163            stderr: out.stderr,
164        })
165    }
166}
167
168/// Output from a command execution.
169#[derive(Debug, Clone)]
170pub struct CommandOutput {
171    pub exit_code: i32,
172    pub stdout: String,
173    pub stderr: String,
174}
175
176/// Output from a command execution where stdout is held as raw
177/// bytes — used by [`CommandRunner::run_bytes`] for callers that
178/// must preserve binary payloads (whole-file decryption via age /
179/// gpg, etc.).
180#[derive(Debug, Clone)]
181pub struct CommandOutputBytes {
182    pub exit_code: i32,
183    pub stdout: Vec<u8>,
184    pub stderr: String,
185}
186
187/// [`CommandRunner`] that spawns a real shell process.
188///
189/// `verbose` controls whether the script's raw stdout/stderr is streamed
190/// through to the user's terminal. Regardless of the flag, lines matching
191/// the `# status:` convention on stdout are always surfaced as live progress
192/// markers, and captured output is returned via [`CommandOutput`] for
193/// callers that want it.
194pub struct ShellCommandRunner {
195    verbose: bool,
196}
197
198impl ShellCommandRunner {
199    pub fn new(verbose: bool) -> Self {
200        Self { verbose }
201    }
202}
203
204pub(crate) fn format_command_for_display(executable: &str, arguments: &[String]) -> String {
205    if arguments.is_empty() {
206        return executable.to_string();
207    }
208
209    let args = arguments
210        .iter()
211        .map(|arg| {
212            if arg.is_empty()
213                || arg.chars().any(char::is_whitespace)
214                || arg.contains('"')
215                || arg.contains('\'')
216            {
217                format!("{arg:?}")
218            } else {
219                arg.clone()
220            }
221        })
222        .collect::<Vec<_>>()
223        .join(" ");
224    format!("{executable} {args}")
225}
226
227/// Strip the `# status:` prefix from a script line, returning the
228/// trimmed message if present.
229///
230/// Matches `#status:`, `# status:`, and any leading whitespace before
231/// the `#`. Designed to be tool-agnostic — a script using this convention
232/// is still valid and meaningful when run manually outside dodot.
233pub(crate) fn parse_status_line(line: &str) -> Option<&str> {
234    let s = line.trim_start();
235    let rest = s.strip_prefix('#')?;
236    let rest = rest.trim_start();
237    let msg = rest.strip_prefix("status:")?;
238    Some(msg.trim())
239}
240
241impl CommandRunner for ShellCommandRunner {
242    fn run(&self, executable: &str, arguments: &[String]) -> Result<CommandOutput> {
243        use std::io::{BufRead, BufReader, IsTerminal, Write};
244        use std::process::{Command, Stdio};
245        use std::sync::{Arc, Mutex};
246        use std::thread;
247
248        let mut child = Command::new(executable)
249            .args(arguments)
250            .stdout(Stdio::piped())
251            .stderr(Stdio::piped())
252            .spawn()
253            .map_err(|e| crate::DodotError::CommandFailed {
254                command: format_command_for_display(executable, arguments),
255                exit_code: -1,
256                stderr: e.to_string(),
257            })?;
258
259        let stdout_pipe = child
260            .stdout
261            .take()
262            .expect("piped stdout missing after spawn");
263        let stderr_pipe = child
264            .stderr
265            .take()
266            .expect("piped stderr missing after spawn");
267
268        // ANSI dim only if the user's stdout is a TTY — keeps colour
269        // codes out of pipes/log files.
270        let tty = std::io::stdout().is_terminal();
271        let dim = if tty { "\x1b[2m" } else { "" };
272        let reset = if tty { "\x1b[0m" } else { "" };
273        let arrow = if tty { "→" } else { "->" };
274
275        let verbose = self.verbose;
276        let stderr_buf = Arc::new(Mutex::new(String::new()));
277
278        // Read raw bytes (not `BufRead::lines()`) so non-UTF-8 output
279        // doesn't stop draining mid-stream — a stalled drain would
280        // deadlock the child once the pipe buffer fills. Decode each
281        // line lossily for display/capture; binary garbage becomes U+FFFD
282        // rather than aborting the read.
283        fn pop_eol(buf: &mut Vec<u8>) {
284            if buf.last() == Some(&b'\n') {
285                buf.pop();
286            }
287            if buf.last() == Some(&b'\r') {
288                buf.pop();
289            }
290        }
291
292        // Drain stderr in a worker thread to avoid pipe-buffer deadlock
293        // (a chatty stderr can block the child if no one's reading).
294        let stderr_thread = {
295            let buf = stderr_buf.clone();
296            thread::spawn(move || {
297                let mut reader = BufReader::new(stderr_pipe);
298                let host_stderr = std::io::stderr();
299                let mut bytes = Vec::new();
300                loop {
301                    bytes.clear();
302                    match reader.read_until(b'\n', &mut bytes) {
303                        Ok(0) | Err(_) => break,
304                        Ok(_) => {
305                            pop_eol(&mut bytes);
306                            let line = String::from_utf8_lossy(&bytes);
307                            {
308                                let mut guard = buf.lock().expect("stderr buf poisoned");
309                                guard.push_str(&line);
310                                guard.push('\n');
311                            }
312                            if verbose {
313                                let mut h = host_stderr.lock();
314                                let _ = writeln!(h, "{line}");
315                            }
316                        }
317                    }
318                }
319            })
320        };
321
322        // Read stdout on the main thread: capture, scan for `# status:`,
323        // optionally passthrough.
324        let mut stdout_buf = String::new();
325        {
326            let mut reader = BufReader::new(stdout_pipe);
327            let host_stdout = std::io::stdout();
328            let mut bytes = Vec::new();
329            loop {
330                bytes.clear();
331                match reader.read_until(b'\n', &mut bytes) {
332                    Ok(0) | Err(_) => break,
333                    Ok(_) => {
334                        pop_eol(&mut bytes);
335                        let line = String::from_utf8_lossy(&bytes);
336                        stdout_buf.push_str(&line);
337                        stdout_buf.push('\n');
338
339                        if let Some(msg) = parse_status_line(&line) {
340                            let mut h = host_stdout.lock();
341                            let _ = writeln!(h, "{dim}{arrow}{reset} {msg}");
342                        }
343                        if verbose {
344                            let mut h = host_stdout.lock();
345                            let _ = writeln!(h, "{line}");
346                        }
347                    }
348                }
349            }
350        }
351
352        let _ = stderr_thread.join();
353        let stderr_text = stderr_buf.lock().expect("stderr buf poisoned").clone();
354
355        let status = child.wait().map_err(|e| crate::DodotError::CommandFailed {
356            command: format_command_for_display(executable, arguments),
357            exit_code: -1,
358            stderr: e.to_string(),
359        })?;
360        let exit_code = status.code().unwrap_or(-1);
361
362        if !status.success() {
363            // When not verbose, the user hasn't seen any of the script's
364            // stderr — surface it now so a failure is debuggable.
365            if !verbose && !stderr_text.is_empty() {
366                let host_stderr = std::io::stderr();
367                let mut h = host_stderr.lock();
368                let _ = h.write_all(stderr_text.as_bytes());
369                if !stderr_text.ends_with('\n') {
370                    let _ = writeln!(h);
371                }
372            }
373            return Err(crate::DodotError::CommandFailed {
374                command: format_command_for_display(executable, arguments),
375                exit_code,
376                stderr: stderr_text,
377            });
378        }
379
380        Ok(CommandOutput {
381            exit_code,
382            stdout: stdout_buf,
383            stderr: stderr_text,
384        })
385    }
386
387    /// Override of the default trait impl: reads stdout as raw
388    /// bytes (no `from_utf8_lossy` decode), so binary payloads
389    /// from age / gpg whole-file decryption survive verbatim.
390    /// Stderr is still buffered as text via the same drainer
391    /// pattern `run` uses — gpg and age both emit
392    /// human-readable diagnostics.
393    fn run_bytes(&self, executable: &str, arguments: &[String]) -> Result<CommandOutputBytes> {
394        use std::io::{Read, Write};
395        use std::process::{Command, Stdio};
396        use std::sync::{Arc, Mutex};
397        use std::thread;
398
399        let mut child = Command::new(executable)
400            .args(arguments)
401            .stdout(Stdio::piped())
402            .stderr(Stdio::piped())
403            .spawn()
404            .map_err(|e| crate::DodotError::CommandFailed {
405                command: format_command_for_display(executable, arguments),
406                exit_code: -1,
407                stderr: e.to_string(),
408            })?;
409
410        let mut stdout_pipe = child
411            .stdout
412            .take()
413            .expect("piped stdout missing after spawn");
414        let stderr_pipe = child
415            .stderr
416            .take()
417            .expect("piped stderr missing after spawn");
418
419        let stderr_buf = Arc::new(Mutex::new(String::new()));
420        let stderr_thread = {
421            let buf = stderr_buf.clone();
422            thread::spawn(move || {
423                let mut s = String::new();
424                let mut reader = std::io::BufReader::new(stderr_pipe);
425                let _ = std::io::Read::read_to_string(&mut reader, &mut s);
426                if let Ok(mut guard) = buf.lock() {
427                    guard.push_str(&s);
428                }
429            })
430        };
431
432        // Read stdout as raw bytes on the main thread. No
433        // line-by-line passthrough / status-line parsing here —
434        // those are install-script ergonomics and have no place in
435        // a binary-payload pipe.
436        let mut stdout_buf: Vec<u8> = Vec::new();
437        if let Err(e) = stdout_pipe.read_to_end(&mut stdout_buf) {
438            // Surface the IO error, but still wait for the child
439            // so we don't leak a zombie.
440            let _ = child.wait();
441            let _ = stderr_thread.join();
442            return Err(crate::DodotError::CommandFailed {
443                command: format_command_for_display(executable, arguments),
444                exit_code: -1,
445                stderr: e.to_string(),
446            });
447        }
448
449        let _ = stderr_thread.join();
450        let stderr_text = stderr_buf.lock().expect("stderr buf poisoned").clone();
451
452        let status = child.wait().map_err(|e| crate::DodotError::CommandFailed {
453            command: format_command_for_display(executable, arguments),
454            exit_code: -1,
455            stderr: e.to_string(),
456        })?;
457        let exit_code = status.code().unwrap_or(-1);
458
459        if !status.success() && !stderr_text.is_empty() && !self.verbose {
460            // Mirror `run`'s "surface stderr on failure when not
461            // verbose" pattern so a quiet failure is still
462            // debuggable.
463            let host_stderr = std::io::stderr();
464            let mut h = host_stderr.lock();
465            let _ = h.write_all(stderr_text.as_bytes());
466            if !stderr_text.ends_with('\n') {
467                let _ = writeln!(h);
468            }
469        }
470
471        // Note: unlike `run`, we don't return `Err` on non-zero
472        // exit. Whole-file preprocessors do their own exit-code
473        // mapping (see `age.rs` / `gpg.rs`) and need to inspect
474        // exit_code + stderr to surface actionable hints.
475        Ok(CommandOutputBytes {
476            exit_code,
477            stdout: stdout_buf,
478            stderr: stderr_text,
479        })
480    }
481}
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486
487    #[test]
488    fn parse_status_line_matches_no_space() {
489        assert_eq!(parse_status_line("#status: building"), Some("building"));
490    }
491
492    #[test]
493    fn parse_status_line_matches_one_space() {
494        assert_eq!(
495            parse_status_line("# status: downloading installer"),
496            Some("downloading installer")
497        );
498    }
499
500    #[test]
501    fn parse_status_line_matches_extra_whitespace() {
502        assert_eq!(
503            parse_status_line("   #   status:   compiling   "),
504            Some("compiling")
505        );
506    }
507
508    #[test]
509    fn parse_status_line_rejects_plain_comment() {
510        assert_eq!(parse_status_line("# just a comment"), None);
511    }
512
513    #[test]
514    fn parse_status_line_rejects_non_comment() {
515        assert_eq!(parse_status_line("echo status: foo"), None);
516    }
517
518    #[test]
519    fn parse_status_line_rejects_shebang() {
520        // `#!/usr/bin/env bash` doesn't start the magic word — ignored.
521        assert_eq!(parse_status_line("#!/bin/bash"), None);
522    }
523
524    #[test]
525    fn parse_status_line_returns_empty_message() {
526        // Empty status: still matches (script chose to print a blank progress).
527        assert_eq!(parse_status_line("# status:"), Some(""));
528    }
529
530    #[test]
531    fn shell_runner_streams_and_captures_real_script() {
532        // Smoke-test the real spawn/streaming path. We assert on the
533        // captured CommandOutput; live host-stdout assertions would
534        // require redirecting process-wide stdout and aren't worth the
535        // complexity here.
536        let runner = ShellCommandRunner::new(false);
537        let script = "echo starting; \
538            echo '# status: phase one'; \
539            echo middle; \
540            echo '# status: phase two'; \
541            echo done";
542        let out = runner
543            .run("bash", &["-c".into(), script.into()])
544            .expect("script should succeed");
545        assert!(out.stdout.contains("starting"));
546        assert!(out.stdout.contains("# status: phase one"));
547        assert!(out.stdout.contains("middle"));
548        assert!(out.stdout.contains("# status: phase two"));
549        assert!(out.stdout.contains("done"));
550        assert_eq!(out.exit_code, 0);
551    }
552
553    #[test]
554    fn shell_runner_returns_error_on_nonzero_exit() {
555        let runner = ShellCommandRunner::new(false);
556        let result = runner.run("bash", &["-c".into(), "exit 7".into()]);
557        match result {
558            Err(crate::DodotError::CommandFailed { exit_code, .. }) => {
559                assert_eq!(exit_code, 7);
560            }
561            other => panic!("expected CommandFailed, got {other:?}"),
562        }
563    }
564
565    #[test]
566    fn shell_runner_captures_stderr_in_command_output() {
567        let runner = ShellCommandRunner::new(false);
568        let out = runner
569            .run("bash", &["-c".into(), "echo hello >&2; echo world".into()])
570            .expect("script should succeed");
571        assert!(out.stderr.contains("hello"));
572        assert!(out.stdout.contains("world"));
573    }
574}