Skip to main content

snapdir_stores/
shim.rs

1//! External-store **emit-command shim**.
2//!
3//! snapdir's documented extension mechanism for third-party stores is the
4//! *emit-command contract*: a `snapdir-<name>-store` binary on `PATH` does not
5//! transfer anything itself — instead each of its three interface subcommands
6//! **prints a shell script** to stdout, and the orchestrator captures that
7//! script and `eval`s it. The Rust port serves the built-in `file`/`s3`/`b2`/
8//! `gcs` adapters in-process, but for any third-party adapter it preserves this
9//! contract verbatim via [`ExternalStore`].
10//!
11//! # The emit-command contract
12//!
13//! The three emit subcommands and their argument/stdin protocol, as the
14//! orchestrator invokes them (`_snapdir_get_fetch_snapdir_manifest_command`,
15//! `_snapdir_get_fetch_snapdir_files_command`, `_snapdir_get_push_command`):
16//!
17//! ```text
18//! <bin> get-manifest-command     --id <id> --store <store>
19//! <bin> get-fetch-files-command  --id <id> --store <store> --cache-dir <dir>   (manifest on stdin)
20//! <bin> get-push-command         --id <id> --staging-dir <dir> --store <store>
21//! ```
22//!
23//! Each prints a `set -eEuo pipefail; …` script on stdout. The orchestrator
24//! runs that script the way `snapdir_push` does:
25//!
26//! ```text
27//! bash -c "set -eEuo pipefail; trap 'kill 0' INT; <emitted-script> wait"
28//! ```
29//!
30//! Invariants the script encodes (and which this shim therefore inherits, the
31//! same ones the built-in stores keep):
32//!
33//! - **`get-manifest-command`** prints a script that `cat`s the stored manifest
34//!   to stdout, or writes `ID '<id>' not found on --store '<store>'.` to stderr
35//!   and exits 1 → mapped to [`StoreError::ManifestNotFound`].
36//! - **`get-push-command`** is a no-op (`echo "Manifest already exists…"`) when
37//!   the manifest is already present, and otherwise emits the object-transfer
38//!   commands **before** the `commit-manifest` command, so a present manifest
39//!   always implies its objects are present (objects-before-manifest).
40//! - **`get-fetch-files-command`** emits the object-fetch commands followed by
41//!   an `ensure-no-errors` command that scans the transfer log for `ERROR:`
42//!   lines and fails the transaction if any are found (verify discipline).
43//!
44//! # Why shelling out is allowed here
45//!
46//! The zero-runtime-dependency rule bans the shipped binary from shelling to
47//! `b3sum`/`gcloud`/`aws`/`b2`/`sqlite3` for its *own* core work. This shim is
48//! different: it spawns a **third-party** `snapdir-<name>-store` binary, which
49//! is the documented, user-installed extension point. The built-in stores never
50//! reach this code.
51
52use std::ffi::OsStr;
53use std::io::Write;
54use std::path::{Path, PathBuf};
55use std::process::{Command, Stdio};
56
57use snapdir_core::manifest::Manifest;
58use snapdir_core::merkle::Blake3Hasher;
59use snapdir_core::store::{Store, StoreError};
60
61use crate::router::{resolve_adapter, Adapter};
62
63/// The three emit-command subcommands of the store interface.
64const GET_MANIFEST_COMMAND: &str = "get-manifest-command";
65const GET_FETCH_FILES_COMMAND: &str = "get-fetch-files-command";
66const GET_PUSH_COMMAND: &str = "get-push-command";
67
68/// A store backed by a third-party `snapdir-<name>-store` binary, dispatched
69/// through the emit-command contract.
70///
71/// Construct with [`ExternalStore::new`] (resolving the binary name from the
72/// store URL's protocol via the router) or [`ExternalStore::with_binary`] (to
73/// point at a specific binary path/name, e.g. a mock in tests).
74#[derive(Debug, Clone)]
75pub struct ExternalStore {
76    /// The full `--store` URL, passed back to the binary verbatim.
77    store_url: String,
78    /// The `snapdir-<name>-store` binary to spawn (resolved on `PATH`, or an
79    /// explicit path).
80    binary: PathBuf,
81    /// The shell used to `eval` emitted scripts (`bash` by default).
82    shell: String,
83}
84
85impl ExternalStore {
86    /// Builds a shim for `store_url`, resolving the third-party binary name from
87    /// its protocol via [`resolve_adapter`].
88    ///
89    /// # Errors
90    ///
91    /// Returns [`StoreError::Backend`] if the store URL's protocol is invalid,
92    /// or if it resolves to a built-in adapter (`file`/`s3`/`b2`/`gcs`) — those
93    /// are served in-process and must not be routed through the shim.
94    pub fn new(store_url: &str) -> Result<Self, StoreError> {
95        let adapter = resolve_adapter(store_url).map_err(|e| StoreError::Backend {
96            message: e.to_string(),
97            source: Some(Box::new(e)),
98        })?;
99        match adapter {
100            Adapter::External { .. } => Ok(Self::with_binary(store_url, adapter.store_binary())),
101            builtin => Err(StoreError::Backend {
102                message: format!(
103                    "store protocol resolves to built-in adapter '{}' served in-process, \
104                     not via the external-store shim",
105                    builtin.name()
106                ),
107                source: None,
108            }),
109        }
110    }
111
112    /// Builds a shim that dispatches to an explicit `binary` (path or name on
113    /// `PATH`) for `store_url`, bypassing protocol resolution.
114    ///
115    /// Useful for tests (pointing at a mock store script) and for honoring an
116    /// explicit `_SNAPDIR_<PROTO>_STORE_BIN_PATH`-style override.
117    #[must_use]
118    pub fn with_binary(store_url: &str, binary: impl Into<PathBuf>) -> Self {
119        Self {
120            store_url: store_url.to_owned(),
121            binary: binary.into(),
122            shell: "bash".to_owned(),
123        }
124    }
125
126    /// Overrides the shell used to `eval` emitted scripts (default `bash`).
127    #[must_use]
128    pub fn with_shell(mut self, shell: impl Into<String>) -> Self {
129        self.shell = shell.into();
130        self
131    }
132
133    /// The resolved store binary name/path.
134    #[must_use]
135    pub fn binary(&self) -> &Path {
136        &self.binary
137    }
138
139    /// Invokes the store binary's `subcommand` with `args`, optionally feeding
140    /// `stdin`, and returns the emitted shell script (stdout).
141    fn emit(
142        &self,
143        subcommand: &str,
144        args: &[&OsStr],
145        stdin: Option<&[u8]>,
146    ) -> Result<String, StoreError> {
147        let mut cmd = Command::new(&self.binary);
148        cmd.arg(subcommand)
149            .args(args)
150            .stdout(Stdio::piped())
151            .stderr(Stdio::piped())
152            .stdin(if stdin.is_some() {
153                Stdio::piped()
154            } else {
155                Stdio::null()
156            });
157
158        let mut child = cmd.spawn().map_err(|e| StoreError::Backend {
159            message: format!("failed to spawn store binary '{}'", self.binary.display()),
160            source: Some(Box::new(e)),
161        })?;
162
163        if let Some(bytes) = stdin {
164            let mut sink = child.stdin.take().ok_or_else(|| StoreError::Backend {
165                message: "store binary stdin unavailable".to_owned(),
166                source: None,
167            })?;
168            sink.write_all(bytes)?;
169            // Drop closes the pipe so the child sees EOF.
170            drop(sink);
171        }
172
173        let output = child.wait_with_output()?;
174        if !output.status.success() {
175            return Err(StoreError::Backend {
176                message: format!(
177                    "store binary '{}' {} exited with {}: {}",
178                    self.binary.display(),
179                    subcommand,
180                    output.status,
181                    String::from_utf8_lossy(&output.stderr).trim()
182                ),
183                source: None,
184            });
185        }
186        Ok(String::from_utf8_lossy(&output.stdout).into_owned())
187    }
188
189    /// `eval`s an emitted shell `script` the way the orchestrator historically
190    /// did (`bash -c "set -eEuo pipefail; trap 'kill 0' INT; <script> wait"`),
191    /// optionally feeding `stdin`, returning the script's stdout.
192    fn eval(&self, script: &str, stdin: Option<&[u8]>) -> Result<EvalOutput, StoreError> {
193        let wrapped = format!("set -eEuo pipefail;\ntrap 'kill 0' INT;\n{script}\nwait");
194        let mut cmd = Command::new(&self.shell);
195        cmd.arg("-c")
196            .arg(&wrapped)
197            .stdout(Stdio::piped())
198            .stderr(Stdio::piped())
199            .stdin(if stdin.is_some() {
200                Stdio::piped()
201            } else {
202                Stdio::null()
203            });
204
205        let mut child = cmd.spawn().map_err(|e| StoreError::Backend {
206            message: format!("failed to spawn shell '{}'", self.shell),
207            source: Some(Box::new(e)),
208        })?;
209
210        if let Some(bytes) = stdin {
211            let mut sink = child.stdin.take().ok_or_else(|| StoreError::Backend {
212                message: "shell stdin unavailable".to_owned(),
213                source: None,
214            })?;
215            sink.write_all(bytes)?;
216            drop(sink);
217        }
218
219        let output = child.wait_with_output()?;
220        Ok(EvalOutput {
221            success: output.status.success(),
222            code: output.status.code(),
223            stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
224            stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
225        })
226    }
227}
228
229/// Result of `eval`ing an emitted script.
230struct EvalOutput {
231    success: bool,
232    code: Option<i32>,
233    stdout: String,
234    stderr: String,
235}
236
237impl Store for ExternalStore {
238    fn get_manifest(&self, id: &str) -> Result<Manifest, StoreError> {
239        // 1. Ask the store binary for the manifest-fetch script.
240        let args: [&OsStr; 4] = [
241            OsStr::new("--id"),
242            OsStr::new(id),
243            OsStr::new("--store"),
244            OsStr::new(&self.store_url),
245        ];
246        let script = self.emit(GET_MANIFEST_COMMAND, &args, None)?;
247
248        // 2. Run it. The script `cat`s the manifest on success, or writes
249        //    "ID '<id>' not found on --store '<store>'." to stderr + exit 1.
250        let out = self.eval(&script, None)?;
251        if !out.success {
252            if out.stderr.contains("not found on --store") {
253                return Err(StoreError::ManifestNotFound { id: id.to_owned() });
254            }
255            return Err(StoreError::Backend {
256                message: format!(
257                    "{GET_MANIFEST_COMMAND} script for id '{id}' failed (exit {}): {}",
258                    out.code.unwrap_or(-1),
259                    out.stderr.trim()
260                ),
261                source: None,
262            });
263        }
264
265        // 3. Parse + verify the manifest hashes back to `id`.
266        let manifest = Manifest::parse(&out.stdout)?;
267        let hasher = Blake3Hasher;
268        let actual = snapdir_core::snapshot_id(&manifest, &hasher);
269        if actual != id {
270            return Err(StoreError::Integrity {
271                address: snapdir_core::store::manifest_path(id),
272                expected: id.to_owned(),
273                actual,
274            });
275        }
276        Ok(manifest)
277    }
278
279    fn fetch_files(&self, manifest: &Manifest, dest: &Path) -> Result<(), StoreError> {
280        let hasher = Blake3Hasher;
281        let id = snapdir_core::snapshot_id(manifest, &hasher);
282        let manifest_text = manifest.to_string();
283
284        // get-fetch-files-command reads the manifest from stdin and emits the
285        // object-fetch commands + an ensure-no-errors verify command.
286        let args: [&OsStr; 6] = [
287            OsStr::new("--id"),
288            OsStr::new(&id),
289            OsStr::new("--store"),
290            OsStr::new(&self.store_url),
291            OsStr::new("--cache-dir"),
292            dest.as_os_str(),
293        ];
294        let script = self.emit(
295            GET_FETCH_FILES_COMMAND,
296            &args,
297            Some(manifest_text.as_bytes()),
298        )?;
299
300        let out = self.eval(&script, None)?;
301        if !out.success {
302            return Err(StoreError::Backend {
303                message: format!(
304                    "{GET_FETCH_FILES_COMMAND} script for id '{id}' failed (exit {}): {}",
305                    out.code.unwrap_or(-1),
306                    out.stderr.trim()
307                ),
308                source: None,
309            });
310        }
311        // Verify discipline: the emitted ensure-no-errors guards the transfer
312        // log, but scan the combined output for ERROR: lines defensively too.
313        if out.stdout.contains("ERROR:") || out.stderr.contains("ERROR:") {
314            return Err(StoreError::Backend {
315                message: format!(
316                    "{GET_FETCH_FILES_COMMAND} transaction for id '{id}' reported an error: {}",
317                    out.stderr.trim()
318                ),
319                source: None,
320            });
321        }
322        Ok(())
323    }
324
325    fn push(&self, manifest: &Manifest, source: &Path) -> Result<(), StoreError> {
326        let hasher = Blake3Hasher;
327        let id = snapdir_core::snapshot_id(manifest, &hasher);
328
329        // get-push-command emits a no-op when the manifest already exists,
330        // otherwise object-transfer commands BEFORE the commit-manifest command.
331        let args: [&OsStr; 6] = [
332            OsStr::new("--id"),
333            OsStr::new(&id),
334            OsStr::new("--staging-dir"),
335            source.as_os_str(),
336            OsStr::new("--store"),
337            OsStr::new(&self.store_url),
338        ];
339        let script = self.emit(GET_PUSH_COMMAND, &args, None)?;
340
341        let out = self.eval(&script, None)?;
342        if !out.success {
343            return Err(StoreError::Backend {
344                message: format!(
345                    "{GET_PUSH_COMMAND} script for id '{id}' failed (exit {}): {}",
346                    out.code.unwrap_or(-1),
347                    out.stderr.trim()
348                ),
349                source: None,
350            });
351        }
352        if out.stdout.contains("ERROR:") || out.stderr.contains("ERROR:") {
353            return Err(StoreError::Backend {
354                message: format!(
355                    "{GET_PUSH_COMMAND} transaction for id '{id}' reported an error: {}",
356                    out.stderr.trim()
357                ),
358                source: None,
359            });
360        }
361        Ok(())
362    }
363}
364
365#[cfg(test)]
366mod tests {
367    use super::*;
368
369    #[test]
370    fn shim_new_rejects_builtin_adapters() {
371        for url in ["file:///x", "s3://b/x", "b2://b/x", "gs://b/x"] {
372            let err = ExternalStore::new(url).unwrap_err();
373            assert!(
374                matches!(err, StoreError::Backend { .. }),
375                "expected Backend error for built-in {url}, got {err:?}"
376            );
377        }
378    }
379
380    #[test]
381    fn shim_new_resolves_third_party_binary_from_protocol() {
382        let store = ExternalStore::new("mock://bucket/base").unwrap();
383        assert_eq!(store.binary(), Path::new("snapdir-mock-store"));
384    }
385}