snapdir_core/store.rs
1//! Storage backend abstraction and the content-addressable path layout.
2//!
3//! A snapdir *store* is any backing location that holds two kinds of
4//! content-addressable blobs:
5//!
6//! - **objects** — the raw bytes of each file, addressed by their content
7//! checksum, under `.objects/`.
8//! - **manifests** — the snapshot manifest text, addressed by its snapshot id
9//! (the BLAKE3 of the comment-stripped manifest), under `.manifests/`.
10//!
11//! Both use the same three-level sharded layout, slicing the hex address into
12//! `3 / 3 / 3 / rest` segments to keep any single directory small. This layout
13//! is a **frozen interop contract**: it must match the Bash oracle
14//! (`snapdir`'s `_snapdir_get_object_rel_path` /
15//! `_snapdir_get_manifest_rel_path`) byte-for-byte so that a store written by
16//! either implementation is readable by the other.
17//!
18//! ```text
19//! .objects/<h[0..3]>/<h[3..6]>/<h[6..9]>/<h[9..]>
20//! .manifests/<id[0..3]>/<id[3..6]>/<id[6..9]>/<id[9..]>
21//! ```
22//!
23//! # Sync trait, async implementations
24//!
25//! [`Store`] is a **synchronous, object-safe** trait. The orchestrator's walk
26//! and hash stages are synchronous, and the on-disk [`FileStore`] (a later
27//! gate) is naturally synchronous, so a sync surface keeps the common path
28//! allocation-light and dyn-dispatchable (`&dyn Store`).
29//!
30//! Network stores (S3, B2, GCS) use async native SDKs. They satisfy this sync
31//! trait by owning a private `tokio` runtime and bridging each method with
32//! `runtime.block_on(async { … })`. That bridge lives entirely inside the
33//! concrete store crate; it never leaks `async`/`await` or a runtime
34//! requirement into `snapdir-core` or the orchestrator. This is deliberate:
35//! making the trait `async` would force a runtime onto the otherwise-sync
36//! `FileStore` and the CLI, and would cost object-safety without `async_trait`.
37//!
38//! [`FileStore`]: https://docs.rs/snapdir-file-store
39
40use std::path::Path;
41
42use thiserror::Error;
43
44use crate::manifest::Manifest;
45
46/// Top-level directory under a store that holds content objects.
47pub const OBJECTS_DIR: &str = ".objects";
48
49/// Top-level directory under a store that holds snapshot manifests.
50pub const MANIFESTS_DIR: &str = ".manifests";
51
52/// Returns the relative, sharded path of a content object given its hex
53/// checksum.
54///
55/// The layout is `.objects/<h[0..3]>/<h[3..6]>/<h[6..9]>/<h[9..]>`, matching
56/// the oracle's `_snapdir_get_object_rel_path`. The returned path always uses
57/// forward slashes (the on-disk separator the oracle emits); a store targeting
58/// a native filesystem can feed it straight to [`Path`], and an object-store
59/// backend uses it verbatim as a key.
60///
61/// The checksum is used as-is; callers are expected to pass a lowercase hex
62/// digest as produced by the [`crate::merkle`] hashers. Inputs shorter than
63/// nine characters degrade gracefully (the missing shard segments and/or the
64/// trailing component are simply empty), but that is never a valid snapdir
65/// checksum.
66///
67/// # Examples
68///
69/// ```
70/// use snapdir_core::store::object_path;
71///
72/// let h = "49dc870df1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92";
73/// assert_eq!(
74/// object_path(h),
75/// ".objects/49d/c87/0df/1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92"
76/// );
77/// ```
78#[must_use]
79pub fn object_path(checksum: &str) -> String {
80 sharded_path(OBJECTS_DIR, checksum)
81}
82
83/// Returns the relative, sharded path of a manifest given its snapshot id.
84///
85/// The layout is `.manifests/<id[0..3]>/<id[3..6]>/<id[6..9]>/<id[9..]>`,
86/// matching the oracle's `_snapdir_get_manifest_rel_path`. See [`object_path`]
87/// for separator and input conventions.
88///
89/// # Examples
90///
91/// ```
92/// use snapdir_core::store::manifest_path;
93///
94/// let id = "49dc870df1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92";
95/// assert_eq!(
96/// manifest_path(id),
97/// ".manifests/49d/c87/0df/1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92"
98/// );
99/// ```
100#[must_use]
101pub fn manifest_path(snapshot_id: &str) -> String {
102 sharded_path(MANIFESTS_DIR, snapshot_id)
103}
104
105/// Shared three-level sharding used by both objects and manifests.
106///
107/// Slices `hex` into `[0..3] / [3..6] / [6..9] / [9..]` and joins them under
108/// `prefix` with `/`. Mirrors the oracle's `${id:0:3}` / `${id:3:3}` /
109/// `${id:6:3}` / `${id:9}` expansion exactly, including its behavior on short
110/// inputs (Bash substring expansion past the end yields an empty string rather
111/// than panicking, which `char_slice` reproduces).
112fn sharded_path(prefix: &str, hex: &str) -> String {
113 let s0 = char_slice(hex, 0, 3);
114 let s1 = char_slice(hex, 3, 6);
115 let s2 = char_slice(hex, 6, 9);
116 let rest = char_slice(hex, 9, hex.len());
117 format!("{prefix}/{s0}/{s1}/{s2}/{rest}")
118}
119
120/// Byte-range slice that clamps to the string's length instead of panicking,
121/// matching Bash `${var:start:len}` semantics for the ASCII-hex inputs snapdir
122/// uses. (snapdir addresses are hex, so byte and char offsets coincide.)
123fn char_slice(s: &str, start: usize, end: usize) -> &str {
124 let len = s.len();
125 let start = start.min(len);
126 let end = end.min(len);
127 &s[start..end]
128}
129
130/// Errors a [`Store`] backend can surface.
131///
132/// Backends wrap their own failure types (filesystem I/O, HTTP/SDK errors,
133/// integrity mismatches) into these variants. The orchestrator matches on the
134/// variant, not the wrapped cause, so behavior stays backend-agnostic.
135#[derive(Debug, Error)]
136#[non_exhaustive]
137pub enum StoreError {
138 /// The requested manifest (by snapshot id) was not present in the store.
139 #[error("manifest not found: {id}")]
140 ManifestNotFound {
141 /// The snapshot id that was looked up.
142 id: String,
143 },
144
145 /// A content object referenced by a manifest was not present in the store.
146 #[error("object not found: {checksum}")]
147 ObjectNotFound {
148 /// The object checksum that was looked up.
149 checksum: String,
150 },
151
152 /// Stored bytes did not hash to the address they were filed under (object
153 /// checksum or manifest snapshot id mismatch) — the blob is corrupt or
154 /// tampered.
155 #[error("integrity check failed for {address}: expected {expected}, got {actual}")]
156 Integrity {
157 /// The address (object path or manifest id) being verified.
158 address: String,
159 /// The checksum/id the address claims.
160 expected: String,
161 /// The checksum/id actually computed over the bytes.
162 actual: String,
163 },
164
165 /// A manifest's text could not be parsed into a [`Manifest`].
166 #[error("failed to parse manifest: {0}")]
167 Parse(#[from] crate::manifest::ParseError),
168
169 /// An underlying I/O failure (filesystem, network, SDK).
170 #[error("store I/O error: {0}")]
171 Io(#[from] std::io::Error),
172
173 /// A backend-specific failure that does not fit the typed variants above
174 /// (e.g. an SDK error from a network store). Carries a human-readable
175 /// message and an optional source.
176 #[error("store backend error: {message}")]
177 Backend {
178 /// Human-readable description of the failure.
179 message: String,
180 /// The wrapped backend error, if any.
181 #[source]
182 source: Option<Box<dyn std::error::Error + Send + Sync + 'static>>,
183 },
184}
185
186/// A content-addressable storage backend for snapdir snapshots.
187///
188/// Implementors hold objects under [`object_path`] and manifests under
189/// [`manifest_path`] within some root (a local directory, an S3/GCS/B2 bucket
190/// prefix, …). The trait is the minimal surface the orchestrator needs to read
191/// a snapshot back ([`get_manifest`](Store::get_manifest) +
192/// [`fetch_files`](Store::fetch_files)) and to write one
193/// ([`push`](Store::push)).
194///
195/// It is object-safe: callers can hold `&dyn Store` and pick the concrete
196/// backend at runtime from a `store://` URL.
197///
198/// See the [module docs](crate::store) for why this is synchronous even though
199/// network backends are async internally.
200pub trait Store {
201 /// Reads and parses the manifest stored under `id`'s sharded path,
202 /// verifying that its bytes hash back to `id` before returning it.
203 ///
204 /// # Errors
205 ///
206 /// - [`StoreError::ManifestNotFound`] if no manifest is stored at `id`.
207 /// - [`StoreError::Integrity`] if the stored bytes do not hash to `id`.
208 /// - [`StoreError::Parse`] if the bytes are not a valid manifest.
209 /// - [`StoreError::Io`] / [`StoreError::Backend`] on transport failure.
210 fn get_manifest(&self, id: &str) -> Result<Manifest, StoreError>;
211
212 /// Materializes every entry of `manifest` under `dest`, pulling each
213 /// referenced object from the store and reconstructing the directory tree
214 /// (files, directories, permissions) rooted at `dest`.
215 ///
216 /// Implementations verify each fetched object against its manifest
217 /// checksum.
218 ///
219 /// # Errors
220 ///
221 /// - [`StoreError::ObjectNotFound`] if a referenced object is missing.
222 /// - [`StoreError::Integrity`] if a fetched object is corrupt.
223 /// - [`StoreError::Io`] / [`StoreError::Backend`] on transport failure.
224 fn fetch_files(&self, manifest: &Manifest, dest: &Path) -> Result<(), StoreError>;
225
226 /// Uploads the objects referenced by `manifest` (reading their bytes from
227 /// the tree rooted at `source`) and then the manifest itself, filing each
228 /// under its sharded address.
229 ///
230 /// Implementations are expected to skip blobs already present and to write
231 /// the manifest only after all of its objects have landed, so a manifest is
232 /// never observable before the content it references (mirroring the
233 /// oracle's commit ordering).
234 ///
235 /// # Errors
236 ///
237 /// - [`StoreError::Io`] / [`StoreError::Backend`] on transport failure.
238 /// - [`StoreError::Integrity`] if a source file no longer matches its
239 /// manifest checksum at upload time.
240 fn push(&self, manifest: &Manifest, source: &Path) -> Result<(), StoreError>;
241}
242
243#[cfg(test)]
244mod tests {
245 use super::*;
246
247 // The canonical cross-check: this exact hash → exact sharded path,
248 // matching the original `_snapdir_get_object_rel_path` in the `snapdir`
249 // script:
250 // .objects/${c:0:3}/${c:3:3}/${c:6:3}/${c:9}
251 const SAMPLE: &str = "49dc870df1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92";
252
253 #[test]
254 fn store_object_path_matches_oracle_sharding() {
255 assert_eq!(
256 object_path(SAMPLE),
257 ".objects/49d/c87/0df/1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92"
258 );
259 }
260
261 #[test]
262 fn store_manifest_path_matches_oracle_sharding() {
263 assert_eq!(
264 manifest_path(SAMPLE),
265 ".manifests/49d/c87/0df/1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92"
266 );
267 }
268
269 #[test]
270 fn store_sharding_slices_three_three_three_rest() {
271 // Independently reconstruct the oracle slicing to guard the boundaries.
272 let h = SAMPLE;
273 let expected = format!(
274 ".objects/{}/{}/{}/{}",
275 &h[0..3],
276 &h[3..6],
277 &h[6..9],
278 &h[9..]
279 );
280 assert_eq!(object_path(h), expected);
281 assert_eq!(&h[0..3], "49d");
282 assert_eq!(&h[3..6], "c87");
283 assert_eq!(&h[6..9], "0df");
284 }
285
286 #[test]
287 fn store_path_prefixes_are_dot_objects_and_dot_manifests() {
288 assert!(object_path(SAMPLE).starts_with(".objects/"));
289 assert!(manifest_path(SAMPLE).starts_with(".manifests/"));
290 }
291
292 #[test]
293 fn store_sharding_uses_forward_slashes_with_four_components_after_prefix() {
294 let p = object_path(SAMPLE);
295 let parts: Vec<&str> = p.split('/').collect();
296 // [".objects", s0, s1, s2, rest]
297 assert_eq!(parts.len(), 5);
298 assert_eq!(parts[0], ".objects");
299 assert_eq!(parts[1].len(), 3);
300 assert_eq!(parts[2].len(), 3);
301 assert_eq!(parts[3].len(), 3);
302 assert_eq!(parts[4].len(), SAMPLE.len() - 9);
303 }
304
305 #[test]
306 fn store_sharding_clamps_short_inputs_like_bash() {
307 // Bash `${var:0:3}` past the end yields empty rather than erroring.
308 // Four `/` separators between the five (possibly empty) components.
309 assert_eq!(object_path(""), ".objects////");
310 assert_eq!(object_path("ab"), ".objects/ab///");
311 assert_eq!(object_path("abcd"), ".objects/abc/d//");
312 assert_eq!(object_path("abcdefghij"), ".objects/abc/def/ghi/j");
313 }
314
315 // Trait-shape / object-safety compile checks.
316
317 /// A trivial in-memory implementor proving the trait is implementable and
318 /// object-safe; exercised via `&dyn Store` below.
319 struct NoopStore;
320
321 impl Store for NoopStore {
322 fn get_manifest(&self, id: &str) -> Result<Manifest, StoreError> {
323 Err(StoreError::ManifestNotFound { id: id.to_owned() })
324 }
325
326 fn fetch_files(&self, _manifest: &Manifest, _dest: &Path) -> Result<(), StoreError> {
327 Ok(())
328 }
329
330 fn push(&self, _manifest: &Manifest, _source: &Path) -> Result<(), StoreError> {
331 Ok(())
332 }
333 }
334
335 #[test]
336 fn store_trait_is_object_safe_and_implementable() {
337 let store: Box<dyn Store> = Box::new(NoopStore);
338 let dyn_ref: &dyn Store = store.as_ref();
339
340 let manifest = Manifest::new();
341 assert!(dyn_ref
342 .fetch_files(&manifest, Path::new("/tmp/snapdir-dest"))
343 .is_ok());
344 assert!(dyn_ref
345 .push(&manifest, Path::new("/tmp/snapdir-src"))
346 .is_ok());
347
348 match dyn_ref.get_manifest("deadbeef") {
349 Err(StoreError::ManifestNotFound { id }) => assert_eq!(id, "deadbeef"),
350 other => panic!("expected ManifestNotFound, got {other:?}"),
351 }
352 }
353
354 #[test]
355 fn store_error_parse_is_from_manifest_parse_error() {
356 // A malformed manifest line surfaces as StoreError::Parse via #[from].
357 let parse_err = Manifest::parse("F 700").unwrap_err();
358 let store_err: StoreError = parse_err.into();
359 assert!(matches!(store_err, StoreError::Parse(_)));
360 }
361}