Skip to main content

snapdir_core/
store.rs

1//! Storage backend abstraction and the content-addressable path layout.
2//!
3//! A snapdir *store* is any backing location that holds two kinds of
4//! content-addressable blobs:
5//!
6//! - **objects** — the raw bytes of each file, addressed by their content
7//!   checksum, under `.objects/`.
8//! - **manifests** — the snapshot manifest text, addressed by its snapshot id
9//!   (the BLAKE3 of the comment-stripped manifest), under `.manifests/`.
10//!
11//! Both use the same three-level sharded layout, slicing the hex address into
12//! `3 / 3 / 3 / rest` segments to keep any single directory small. This layout
13//! is a **frozen interop contract**: it must match the Bash oracle
14//! (`snapdir`'s `_snapdir_get_object_rel_path` /
15//! `_snapdir_get_manifest_rel_path`) byte-for-byte so that a store written by
16//! either implementation is readable by the other.
17//!
18//! ```text
19//! .objects/<h[0..3]>/<h[3..6]>/<h[6..9]>/<h[9..]>
20//! .manifests/<id[0..3]>/<id[3..6]>/<id[6..9]>/<id[9..]>
21//! ```
22//!
23//! # Sync trait, async implementations
24//!
25//! [`Store`] is a **synchronous, object-safe** trait. The orchestrator's walk
26//! and hash stages are synchronous, and the on-disk [`FileStore`] (a later
27//! gate) is naturally synchronous, so a sync surface keeps the common path
28//! allocation-light and dyn-dispatchable (`&dyn Store`).
29//!
30//! Network stores (S3, B2, GCS) use async native SDKs. They satisfy this sync
31//! trait by owning a private `tokio` runtime and bridging each method with
32//! `runtime.block_on(async { … })`. That bridge lives entirely inside the
33//! concrete store crate; it never leaks `async`/`await` or a runtime
34//! requirement into `snapdir-core` or the orchestrator. This is deliberate:
35//! making the trait `async` would force a runtime onto the otherwise-sync
36//! `FileStore` and the CLI, and would cost object-safety without `async_trait`.
37//!
38//! [`FileStore`]: https://docs.rs/snapdir-file-store
39
40use std::path::Path;
41
42use thiserror::Error;
43
44use crate::manifest::Manifest;
45
46/// Top-level directory under a store that holds content objects.
47pub const OBJECTS_DIR: &str = ".objects";
48
49/// Top-level directory under a store that holds snapshot manifests.
50pub const MANIFESTS_DIR: &str = ".manifests";
51
52/// Returns the relative, sharded path of a content object given its hex
53/// checksum.
54///
55/// The layout is `.objects/<h[0..3]>/<h[3..6]>/<h[6..9]>/<h[9..]>`, matching
56/// the oracle's `_snapdir_get_object_rel_path`. The returned path always uses
57/// forward slashes (the on-disk separator the oracle emits); a store targeting
58/// a native filesystem can feed it straight to [`Path`], and an object-store
59/// backend uses it verbatim as a key.
60///
61/// The checksum is used as-is; callers are expected to pass a lowercase hex
62/// digest as produced by the [`crate::merkle`] hashers. Inputs shorter than
63/// nine characters degrade gracefully (the missing shard segments and/or the
64/// trailing component are simply empty), but that is never a valid snapdir
65/// checksum.
66///
67/// # Examples
68///
69/// ```
70/// use snapdir_core::store::object_path;
71///
72/// let h = "49dc870df1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92";
73/// assert_eq!(
74///     object_path(h),
75///     ".objects/49d/c87/0df/1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92"
76/// );
77/// ```
78#[must_use]
79pub fn object_path(checksum: &str) -> String {
80    sharded_path(OBJECTS_DIR, checksum)
81}
82
83/// Returns the relative, sharded path of a manifest given its snapshot id.
84///
85/// The layout is `.manifests/<id[0..3]>/<id[3..6]>/<id[6..9]>/<id[9..]>`,
86/// matching the oracle's `_snapdir_get_manifest_rel_path`. See [`object_path`]
87/// for separator and input conventions.
88///
89/// # Examples
90///
91/// ```
92/// use snapdir_core::store::manifest_path;
93///
94/// let id = "49dc870df1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92";
95/// assert_eq!(
96///     manifest_path(id),
97///     ".manifests/49d/c87/0df/1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92"
98/// );
99/// ```
100#[must_use]
101pub fn manifest_path(snapshot_id: &str) -> String {
102    sharded_path(MANIFESTS_DIR, snapshot_id)
103}
104
105/// Shared three-level sharding used by both objects and manifests.
106///
107/// Slices `hex` into `[0..3] / [3..6] / [6..9] / [9..]` and joins them under
108/// `prefix` with `/`. Mirrors the oracle's `${id:0:3}` / `${id:3:3}` /
109/// `${id:6:3}` / `${id:9}` expansion exactly, including its behavior on short
110/// inputs (Bash substring expansion past the end yields an empty string rather
111/// than panicking, which `char_slice` reproduces).
112fn sharded_path(prefix: &str, hex: &str) -> String {
113    let s0 = char_slice(hex, 0, 3);
114    let s1 = char_slice(hex, 3, 6);
115    let s2 = char_slice(hex, 6, 9);
116    let rest = char_slice(hex, 9, hex.len());
117    format!("{prefix}/{s0}/{s1}/{s2}/{rest}")
118}
119
120/// Byte-range slice that clamps to the string's length instead of panicking,
121/// matching Bash `${var:start:len}` semantics for the ASCII-hex inputs snapdir
122/// uses. (snapdir addresses are hex, so byte and char offsets coincide.)
123fn char_slice(s: &str, start: usize, end: usize) -> &str {
124    let len = s.len();
125    let start = start.min(len);
126    let end = end.min(len);
127    &s[start..end]
128}
129
130/// Errors a [`Store`] backend can surface.
131///
132/// Backends wrap their own failure types (filesystem I/O, HTTP/SDK errors,
133/// integrity mismatches) into these variants. The orchestrator matches on the
134/// variant, not the wrapped cause, so behavior stays backend-agnostic.
135#[derive(Debug, Error)]
136#[non_exhaustive]
137pub enum StoreError {
138    /// The requested manifest (by snapshot id) was not present in the store.
139    #[error("manifest not found: {id}")]
140    ManifestNotFound {
141        /// The snapshot id that was looked up.
142        id: String,
143    },
144
145    /// A content object referenced by a manifest was not present in the store.
146    #[error("object not found: {checksum}")]
147    ObjectNotFound {
148        /// The object checksum that was looked up.
149        checksum: String,
150    },
151
152    /// Stored bytes did not hash to the address they were filed under (object
153    /// checksum or manifest snapshot id mismatch) — the blob is corrupt or
154    /// tampered.
155    #[error("integrity check failed for {address}: expected {expected}, got {actual}")]
156    Integrity {
157        /// The address (object path or manifest id) being verified.
158        address: String,
159        /// The checksum/id the address claims.
160        expected: String,
161        /// The checksum/id actually computed over the bytes.
162        actual: String,
163    },
164
165    /// A manifest's text could not be parsed into a [`Manifest`].
166    #[error("failed to parse manifest: {0}")]
167    Parse(#[from] crate::manifest::ParseError),
168
169    /// An underlying I/O failure (filesystem, network, SDK).
170    #[error("store I/O error: {0}")]
171    Io(#[from] std::io::Error),
172
173    /// A backend-specific failure that does not fit the typed variants above
174    /// (e.g. an SDK error from a network store). Carries a human-readable
175    /// message and an optional source.
176    #[error("store backend error: {message}")]
177    Backend {
178        /// Human-readable description of the failure.
179        message: String,
180        /// The wrapped backend error, if any.
181        #[source]
182        source: Option<Box<dyn std::error::Error + Send + Sync + 'static>>,
183    },
184}
185
186/// A content-addressable storage backend for snapdir snapshots.
187///
188/// Implementors hold objects under [`object_path`] and manifests under
189/// [`manifest_path`] within some root (a local directory, an S3/GCS/B2 bucket
190/// prefix, …). The trait is the minimal surface the orchestrator needs to read
191/// a snapshot back ([`get_manifest`](Store::get_manifest) +
192/// [`fetch_files`](Store::fetch_files)) and to write one
193/// ([`push`](Store::push)).
194///
195/// It is object-safe: callers can hold `&dyn Store` and pick the concrete
196/// backend at runtime from a `store://` URL.
197///
198/// See the [module docs](crate::store) for why this is synchronous even though
199/// network backends are async internally.
200pub trait Store {
201    /// Reads and parses the manifest stored under `id`'s sharded path,
202    /// verifying that its bytes hash back to `id` before returning it.
203    ///
204    /// # Errors
205    ///
206    /// - [`StoreError::ManifestNotFound`] if no manifest is stored at `id`.
207    /// - [`StoreError::Integrity`] if the stored bytes do not hash to `id`.
208    /// - [`StoreError::Parse`] if the bytes are not a valid manifest.
209    /// - [`StoreError::Io`] / [`StoreError::Backend`] on transport failure.
210    fn get_manifest(&self, id: &str) -> Result<Manifest, StoreError>;
211
212    /// Materializes every entry of `manifest` under `dest`, pulling each
213    /// referenced object from the store and reconstructing the directory tree
214    /// (files, directories, permissions) rooted at `dest`.
215    ///
216    /// Implementations verify each fetched object against its manifest
217    /// checksum.
218    ///
219    /// # Errors
220    ///
221    /// - [`StoreError::ObjectNotFound`] if a referenced object is missing.
222    /// - [`StoreError::Integrity`] if a fetched object is corrupt.
223    /// - [`StoreError::Io`] / [`StoreError::Backend`] on transport failure.
224    fn fetch_files(&self, manifest: &Manifest, dest: &Path) -> Result<(), StoreError>;
225
226    /// Uploads the objects referenced by `manifest` (reading their bytes from
227    /// the tree rooted at `source`) and then the manifest itself, filing each
228    /// under its sharded address.
229    ///
230    /// Implementations are expected to skip blobs already present and to write
231    /// the manifest only after all of its objects have landed, so a manifest is
232    /// never observable before the content it references (mirroring the
233    /// oracle's commit ordering).
234    ///
235    /// # Errors
236    ///
237    /// - [`StoreError::Io`] / [`StoreError::Backend`] on transport failure.
238    /// - [`StoreError::Integrity`] if a source file no longer matches its
239    ///   manifest checksum at upload time.
240    fn push(&self, manifest: &Manifest, source: &Path) -> Result<(), StoreError>;
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    // The canonical cross-check: this exact hash → exact sharded path,
248    // matching the original `_snapdir_get_object_rel_path` in the `snapdir`
249    // script:
250    //   .objects/${c:0:3}/${c:3:3}/${c:6:3}/${c:9}
251    const SAMPLE: &str = "49dc870df1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92";
252
253    #[test]
254    fn store_object_path_matches_oracle_sharding() {
255        assert_eq!(
256            object_path(SAMPLE),
257            ".objects/49d/c87/0df/1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92"
258        );
259    }
260
261    #[test]
262    fn store_manifest_path_matches_oracle_sharding() {
263        assert_eq!(
264            manifest_path(SAMPLE),
265            ".manifests/49d/c87/0df/1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92"
266        );
267    }
268
269    #[test]
270    fn store_sharding_slices_three_three_three_rest() {
271        // Independently reconstruct the oracle slicing to guard the boundaries.
272        let h = SAMPLE;
273        let expected = format!(
274            ".objects/{}/{}/{}/{}",
275            &h[0..3],
276            &h[3..6],
277            &h[6..9],
278            &h[9..]
279        );
280        assert_eq!(object_path(h), expected);
281        assert_eq!(&h[0..3], "49d");
282        assert_eq!(&h[3..6], "c87");
283        assert_eq!(&h[6..9], "0df");
284    }
285
286    #[test]
287    fn store_path_prefixes_are_dot_objects_and_dot_manifests() {
288        assert!(object_path(SAMPLE).starts_with(".objects/"));
289        assert!(manifest_path(SAMPLE).starts_with(".manifests/"));
290    }
291
292    #[test]
293    fn store_sharding_uses_forward_slashes_with_four_components_after_prefix() {
294        let p = object_path(SAMPLE);
295        let parts: Vec<&str> = p.split('/').collect();
296        // [".objects", s0, s1, s2, rest]
297        assert_eq!(parts.len(), 5);
298        assert_eq!(parts[0], ".objects");
299        assert_eq!(parts[1].len(), 3);
300        assert_eq!(parts[2].len(), 3);
301        assert_eq!(parts[3].len(), 3);
302        assert_eq!(parts[4].len(), SAMPLE.len() - 9);
303    }
304
305    #[test]
306    fn store_sharding_clamps_short_inputs_like_bash() {
307        // Bash `${var:0:3}` past the end yields empty rather than erroring.
308        // Four `/` separators between the five (possibly empty) components.
309        assert_eq!(object_path(""), ".objects////");
310        assert_eq!(object_path("ab"), ".objects/ab///");
311        assert_eq!(object_path("abcd"), ".objects/abc/d//");
312        assert_eq!(object_path("abcdefghij"), ".objects/abc/def/ghi/j");
313    }
314
315    // Trait-shape / object-safety compile checks.
316
317    /// A trivial in-memory implementor proving the trait is implementable and
318    /// object-safe; exercised via `&dyn Store` below.
319    struct NoopStore;
320
321    impl Store for NoopStore {
322        fn get_manifest(&self, id: &str) -> Result<Manifest, StoreError> {
323            Err(StoreError::ManifestNotFound { id: id.to_owned() })
324        }
325
326        fn fetch_files(&self, _manifest: &Manifest, _dest: &Path) -> Result<(), StoreError> {
327            Ok(())
328        }
329
330        fn push(&self, _manifest: &Manifest, _source: &Path) -> Result<(), StoreError> {
331            Ok(())
332        }
333    }
334
335    #[test]
336    fn store_trait_is_object_safe_and_implementable() {
337        let store: Box<dyn Store> = Box::new(NoopStore);
338        let dyn_ref: &dyn Store = store.as_ref();
339
340        let manifest = Manifest::new();
341        assert!(dyn_ref
342            .fetch_files(&manifest, Path::new("/tmp/snapdir-dest"))
343            .is_ok());
344        assert!(dyn_ref
345            .push(&manifest, Path::new("/tmp/snapdir-src"))
346            .is_ok());
347
348        match dyn_ref.get_manifest("deadbeef") {
349            Err(StoreError::ManifestNotFound { id }) => assert_eq!(id, "deadbeef"),
350            other => panic!("expected ManifestNotFound, got {other:?}"),
351        }
352    }
353
354    #[test]
355    fn store_error_parse_is_from_manifest_parse_error() {
356        // A malformed manifest line surfaces as StoreError::Parse via #[from].
357        let parse_err = Manifest::parse("F 700").unwrap_err();
358        let store_err: StoreError = parse_err.into();
359        assert!(matches!(store_err, StoreError::Parse(_)));
360    }
361}