snapdir_stores/b2_store.rs
1//! `B2Store`: the `b2://` storage backend, backed by Backblaze B2's
2//! **S3-compatible endpoint** via the native AWS SDK.
3//!
4//! Backblaze B2 exposes an S3-compatible API at a per-region endpoint of the
5//! form `https://s3.<region>.backblazeb2.com` (for example
6//! `https://s3.us-west-004.backblazeb2.com`). A [`B2Store`] is therefore just an
7//! [`S3Store`] pointed at that custom endpoint URL with path-style addressing,
8//! so it reuses the entire S3 transfer path — the same core-sharded
9//! `.objects`/`.manifests` keys, the same push (objects-before-manifest,
10//! skip-if-present) and fetch (download → verify BLAKE3 → retry → atomic write)
11//! discipline. This module adds only the `b2://` URL handling and the
12//! endpoint/region derivation; it does **not** duplicate the store logic.
13//!
14//! ```text
15//! b2://<bucket>/<prefix>/.objects/<sharded checksum> raw object bytes
16//! b2://<bucket>/<prefix>/.manifests/<sharded snapshot id> manifest text
17//! ```
18//!
19//! # URL parsing (frozen contract)
20//!
21//! `b2://bucket/base/dir` parses exactly like `s3://...`
22//! (`_snapdir_export_store_vars`): the bucket is the segment after the `//`
23//! (`cut -d'/' -f3`) and the prefix is everything after it (`cut -d'/' -f4-`)
24//! with a trailing slash stripped (matching `_snapdir_b2_store_get_remote_prefix`).
25//! This reuses [`S3Location::parse`] verbatim, since the derivation is identical.
26//!
27//! # Credentials
28//!
29//! Authentication is delegated to the standard AWS credential chain (see
30//! [`S3Store`]). The Backblaze **application key id** maps to the AWS access key
31//! id and the **application key** maps to the AWS secret access key — i.e. the
32//! usual `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` env vars, a profile, etc.
33//! No bespoke snapdir credential variables are introduced. (The original
34//! implementation shelled out to the `b2` CLI and read `B2_APPLICATION_KEY[_ID]`;
35//! the native S3-compatible path uses the AWS chain instead, which is what the
36//! SDK expects.)
37//!
38//! # Endpoint / region derivation
39//!
40//! The S3-compatible endpoint encodes the B2 region. It is resolved, in order:
41//!
42//! 1. an explicit `endpoint_url` passed to [`B2Store::connect`];
43//! 2. the `SNAPDIR_B2_TEST_ENDPOINT` environment variable (used to point the
44//! gated live test at a real bucket or an S3 emulator);
45//! 3. derived from a region — an explicit `region` argument, else the
46//! `SNAPDIR_B2_REGION` / `AWS_REGION` env vars — as
47//! `https://s3.<region>.backblazeb2.com`.
48//!
49//! When the endpoint is derived from a region the SDK is also told that region
50//! so `SigV4` signing matches Backblaze's expectation.
51
52use std::path::Path;
53
54use std::sync::Arc;
55
56use snapdir_core::manifest::Manifest;
57use snapdir_core::store::{Store, StoreError};
58use snapdir_core::Meter;
59
60use crate::s3_store::{S3Location, S3Store};
61use crate::stream::StreamStore;
62use crate::transfer::TransferConfig;
63
64/// The default Backblaze B2 region used when none is configured. Backblaze
65/// requires a region in the S3-compatible endpoint host; `us-west-004` is a
66/// common default, but a real deployment should set `SNAPDIR_B2_REGION` /
67/// `AWS_REGION` (or pass one explicitly) to match its bucket's region.
68const DEFAULT_B2_REGION: &str = "us-west-004";
69
70/// Builds the Backblaze S3-compatible endpoint URL for a region, i.e.
71/// `https://s3.<region>.backblazeb2.com`.
72#[must_use]
73pub fn endpoint_for_region(region: &str) -> String {
74 format!("https://s3.{region}.backblazeb2.com")
75}
76
77/// A content-addressable store backed by Backblaze B2 via its S3-compatible
78/// endpoint. Thin wrapper over [`S3Store`] configured with the B2 endpoint.
79pub struct B2Store {
80 inner: S3Store,
81}
82
83impl B2Store {
84 /// Connects to a `b2://bucket/prefix` store using Backblaze's
85 /// S3-compatible API.
86 ///
87 /// `endpoint_url` overrides the endpoint outright (handy for emulators or
88 /// an already-known regional host). When `None`, the endpoint is taken from
89 /// `SNAPDIR_B2_TEST_ENDPOINT`, and failing that derived from `region` (or
90 /// the `SNAPDIR_B2_REGION` / `AWS_REGION` env vars, else
91 /// [`DEFAULT_B2_REGION`]) as `https://s3.<region>.backblazeb2.com`.
92 ///
93 /// Credentials and signing are handled by the standard AWS chain; the B2
94 /// application key id/secret map to the AWS access-key/secret-key.
95 ///
96 /// # Errors
97 ///
98 /// [`StoreError::Backend`] if the tokio runtime cannot be created or the
99 /// AWS configuration cannot be loaded (propagated from [`S3Store::connect`]).
100 pub fn connect(
101 store_url: &str,
102 endpoint_url: Option<&str>,
103 region: Option<&str>,
104 ) -> Result<Self, StoreError> {
105 Self::connect_with(store_url, endpoint_url, region, TransferConfig::default())
106 }
107
108 /// Like [`connect`](Self::connect), but carries a [`TransferConfig`] for
109 /// concurrency / bandwidth control. The config lives on the wrapped
110 /// [`S3Store`]; [`connect`](Self::connect) delegates here with
111 /// [`TransferConfig::default`].
112 ///
113 /// # Errors
114 ///
115 /// [`StoreError::Backend`] if the tokio runtime cannot be created or the
116 /// AWS configuration cannot be loaded (propagated from
117 /// [`S3Store::connect_with`]).
118 pub fn connect_with(
119 store_url: &str,
120 endpoint_url: Option<&str>,
121 region: Option<&str>,
122 config: TransferConfig,
123 ) -> Result<Self, StoreError> {
124 let endpoint = resolve_endpoint(endpoint_url, region);
125 // S3Store::connect parses the URL with S3Location::parse, which derives
126 // bucket/prefix identically for b2:// and s3:// (oracle cut -f3 / -f4-).
127 let inner = S3Store::connect_with(store_url, Some(endpoint.as_str()), config)?;
128 Ok(Self { inner })
129 }
130
131 /// Builds a `B2Store` from an already-configured [`S3Store`] (intended for
132 /// tests wiring a client at an emulator/B2 endpoint).
133 #[must_use]
134 pub fn from_s3_store(inner: S3Store) -> Self {
135 Self { inner }
136 }
137
138 /// Attaches (or clears) an optional progress [`Meter`]. B2 has no transfer
139 /// path of its own — it delegates entirely to the wrapped [`S3Store`] — so
140 /// this forwards to [`S3Store::with_meter`]. `None` (the constructor default)
141 /// means zero recording and byte-identical behavior.
142 #[must_use]
143 pub fn with_meter(mut self, meter: Option<Arc<Meter>>) -> Self {
144 self.inner = self.inner.with_meter(meter);
145 self
146 }
147
148 /// The parsed bucket/prefix this store targets (shared with [`S3Store`]).
149 #[must_use]
150 pub fn location(&self) -> &S3Location {
151 self.inner.location()
152 }
153
154 /// The [`TransferConfig`] (concurrency / bandwidth) this store was built
155 /// with, carried on the wrapped [`S3Store`]. Consumed by the transfer loops
156 /// in later gates.
157 #[must_use]
158 pub fn transfer_config(&self) -> &TransferConfig {
159 self.inner.transfer_config()
160 }
161}
162
163impl Store for B2Store {
164 fn get_manifest(&self, id: &str) -> Result<Manifest, StoreError> {
165 self.inner.get_manifest(id)
166 }
167
168 fn fetch_files(&self, manifest: &Manifest, dest: &Path) -> Result<(), StoreError> {
169 self.inner.fetch_files(manifest, dest)
170 }
171
172 fn push(&self, manifest: &Manifest, source: &Path) -> Result<(), StoreError> {
173 self.inner.push(manifest, source)
174 }
175}
176
177impl StreamStore for B2Store {
178 fn has_object(&self, checksum: &str) -> Result<bool, StoreError> {
179 self.inner.has_object(checksum)
180 }
181
182 fn get_object(&self, checksum: &str) -> Result<Vec<u8>, StoreError> {
183 self.inner.get_object(checksum)
184 }
185
186 fn put_object(&self, checksum: &str, bytes: Vec<u8>) -> Result<(), StoreError> {
187 self.inner.put_object(checksum, bytes)
188 }
189
190 fn put_manifest(&self, id: &str, manifest: &Manifest) -> Result<(), StoreError> {
191 self.inner.put_manifest(id, manifest)
192 }
193}
194
195/// Resolves the S3-compatible endpoint to use, applying the precedence
196/// documented on [`B2Store::connect`]: explicit endpoint > `SNAPDIR_B2_TEST_ENDPOINT`
197/// > endpoint derived from the resolved region.
198fn resolve_endpoint(endpoint_url: Option<&str>, region: Option<&str>) -> String {
199 if let Some(ep) = endpoint_url {
200 return ep.to_owned();
201 }
202 if let Ok(ep) = std::env::var("SNAPDIR_B2_TEST_ENDPOINT") {
203 if !ep.is_empty() {
204 return ep;
205 }
206 }
207 endpoint_for_region(&resolve_region(region))
208}
209
210/// Resolves the B2 region: an explicit argument, else `SNAPDIR_B2_REGION`, else
211/// `AWS_REGION`, else [`DEFAULT_B2_REGION`].
212fn resolve_region(region: Option<&str>) -> String {
213 if let Some(r) = region {
214 if !r.is_empty() {
215 return r.to_owned();
216 }
217 }
218 for var in ["SNAPDIR_B2_REGION", "AWS_REGION"] {
219 if let Ok(r) = std::env::var(var) {
220 if !r.is_empty() {
221 return r;
222 }
223 }
224 }
225 DEFAULT_B2_REGION.to_owned()
226}
227
228#[cfg(test)]
229mod tests {
230 use super::*;
231
232 // The canonical content-addressable fixtures from the b2 store test suite.
233 const FOO_CHECKSUM: &str = "49dc870df1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92";
234 const FOO_SHARDED: &str = "49d/c87/0df/1de7fd60794cebce449f5ccdae575affaa67a24b62acb03e039db92";
235 const MANIFEST_ID: &str = "aa91e498f401ea9e6ddbaa1138a0dbeb030fab8defc1252d80c77ebefafbc70d";
236 const MANIFEST_SHARDED: &str =
237 "aa9/1e4/98f/401ea9e6ddbaa1138a0dbeb030fab8defc1252d80c77ebefafbc70d";
238
239 #[test]
240 fn b2_store_parses_bucket_and_prefix_like_oracle() {
241 // Oracle (`_snapdir_export_store_vars`): bucket = cut -f3,
242 // base_dir = cut -f4-. For "b2://my-bucket/my/directory".
243 let loc = S3Location::parse("b2://my-bucket/my/directory");
244 assert_eq!(loc.bucket, "my-bucket");
245 assert_eq!(loc.prefix, "my/directory");
246 }
247
248 #[test]
249 fn b2_store_parse_strips_trailing_slash() {
250 // `_snapdir_b2_store_get_remote_prefix` strips the trailing slash.
251 let loc = S3Location::parse("b2://bucket/long/term/storage/");
252 assert_eq!(loc.bucket, "bucket");
253 assert_eq!(loc.prefix, "long/term/storage");
254 }
255
256 #[test]
257 fn b2_store_parse_bucket_root_has_empty_prefix() {
258 let loc = S3Location::parse("b2://bucket");
259 assert_eq!(loc.bucket, "bucket");
260 assert_eq!(loc.prefix, "");
261
262 let loc_slash = S3Location::parse("b2://bucket/");
263 assert_eq!(loc_slash.bucket, "bucket");
264 assert_eq!(loc_slash.prefix, "");
265 }
266
267 #[test]
268 fn b2_store_object_key_matches_sharded_scheme() {
269 // Key layout must be byte-identical to the frozen S3 sharded scheme so
270 // the bucket is interchangeable across tools.
271 let loc = S3Location::parse("b2://b/long/term/storage");
272 assert_eq!(
273 loc.object_key(FOO_CHECKSUM),
274 format!("long/term/storage/.objects/{FOO_SHARDED}")
275 );
276 }
277
278 #[test]
279 fn b2_store_manifest_key_matches_sharded_scheme() {
280 let loc = S3Location::parse("b2://b/long/term/storage");
281 assert_eq!(
282 loc.manifest_key(MANIFEST_ID),
283 format!("long/term/storage/.manifests/{MANIFEST_SHARDED}")
284 );
285 }
286
287 #[test]
288 fn b2_store_keys_have_no_leading_slash_at_bucket_root() {
289 let loc = S3Location::parse("b2://bucket");
290 assert_eq!(
291 loc.object_key(FOO_CHECKSUM),
292 format!(".objects/{FOO_SHARDED}")
293 );
294 assert_eq!(
295 loc.manifest_key(MANIFEST_ID),
296 format!(".manifests/{MANIFEST_SHARDED}")
297 );
298 }
299
300 #[test]
301 fn b2_store_endpoint_for_region_uses_backblaze_host() {
302 assert_eq!(
303 endpoint_for_region("us-west-004"),
304 "https://s3.us-west-004.backblazeb2.com"
305 );
306 assert_eq!(
307 endpoint_for_region("eu-central-003"),
308 "https://s3.eu-central-003.backblazeb2.com"
309 );
310 }
311
312 #[test]
313 fn b2_store_explicit_endpoint_takes_precedence() {
314 let ep = resolve_endpoint(Some("https://emulator.local:9000"), Some("us-west-004"));
315 assert_eq!(ep, "https://emulator.local:9000");
316 }
317
318 #[test]
319 fn b2_store_endpoint_derived_from_explicit_region() {
320 // With no explicit endpoint and no SNAPDIR_B2_TEST_ENDPOINT set, the
321 // endpoint is derived from the explicit region argument.
322 std::env::remove_var("SNAPDIR_B2_TEST_ENDPOINT");
323 let ep = resolve_endpoint(None, Some("us-west-002"));
324 assert_eq!(ep, "https://s3.us-west-002.backblazeb2.com");
325 }
326
327 #[test]
328 fn b2_store_region_resolution_prefers_explicit_then_default() {
329 // Explicit region wins.
330 assert_eq!(resolve_region(Some("eu-central-003")), "eu-central-003");
331 // Empty explicit region falls through; with no env override set we get
332 // the documented default (guard the env to keep the test hermetic).
333 let saved_b2 = std::env::var("SNAPDIR_B2_REGION").ok();
334 let saved_aws = std::env::var("AWS_REGION").ok();
335 std::env::remove_var("SNAPDIR_B2_REGION");
336 std::env::remove_var("AWS_REGION");
337 assert_eq!(resolve_region(Some("")), DEFAULT_B2_REGION);
338 assert_eq!(resolve_region(None), DEFAULT_B2_REGION);
339 if let Some(v) = saved_b2 {
340 std::env::set_var("SNAPDIR_B2_REGION", v);
341 }
342 if let Some(v) = saved_aws {
343 std::env::set_var("AWS_REGION", v);
344 }
345 }
346
347 // --- Live round-trip, skipped by default --------------------------------
348 //
349 // Requires a Backblaze B2 (or S3-compatible) endpoint plus AWS credentials
350 // (the B2 application key id/secret as AWS access-key/secret-key) in the
351 // environment. Gated behind `SNAPDIR_B2_TEST_ENDPOINT` and
352 // `SNAPDIR_B2_TEST_STORE` (a `b2://bucket/prefix` URL) so it is skipped
353 // unless explicitly configured. Real Backblaze round-trips are exercised by
354 // the later `remote-interop` gate.
355 #[test]
356 fn b2_store_live_round_trip_when_configured() {
357 use snapdir_core::manifest::{ManifestEntry, PathType};
358 use snapdir_core::merkle::{Blake3Hasher, Hasher};
359
360 let (Ok(endpoint), Ok(store)) = (
361 std::env::var("SNAPDIR_B2_TEST_ENDPOINT"),
362 std::env::var("SNAPDIR_B2_TEST_STORE"),
363 ) else {
364 eprintln!(
365 "skipping b2_store live round-trip: set SNAPDIR_B2_TEST_ENDPOINT \
366 and SNAPDIR_B2_TEST_STORE (b2://bucket/prefix) to run it"
367 );
368 return;
369 };
370
371 let hasher = Blake3Hasher::new();
372
373 let src = std::env::temp_dir().join(format!("snapdir-b2-live-{}", std::process::id()));
374 std::fs::create_dir_all(&src).unwrap();
375 std::fs::write(src.join("foo"), b"foo\n").unwrap();
376 let foo_sum = hasher.hash_hex(b"foo\n");
377 let root_sum = snapdir_core::merkle::directory_checksum([foo_sum.as_str()], &hasher);
378 let mut manifest = Manifest::new();
379 manifest.push(ManifestEntry::new(
380 PathType::Directory,
381 "700",
382 root_sum,
383 4,
384 "./",
385 ));
386 manifest.push(ManifestEntry::new(
387 PathType::File,
388 "600",
389 foo_sum,
390 4,
391 "./foo",
392 ));
393 let manifest = Manifest::from_entries(manifest.entries().to_vec());
394 let id = snapdir_core::merkle::snapshot_id(&manifest, &hasher);
395
396 let b2 = B2Store::connect(&store, Some(&endpoint), None).expect("connect");
397 b2.push(&manifest, &src).expect("push");
398 let read_back = b2.get_manifest(&id).expect("get_manifest");
399 assert_eq!(read_back, manifest);
400
401 let dest = std::env::temp_dir().join(format!("snapdir-b2-dest-{}", std::process::id()));
402 std::fs::create_dir_all(&dest).unwrap();
403 b2.fetch_files(&read_back, &dest).expect("fetch_files");
404 assert_eq!(std::fs::read(dest.join("foo")).unwrap(), b"foo\n");
405
406 let _ = std::fs::remove_dir_all(&src);
407 let _ = std::fs::remove_dir_all(&dest);
408 }
409}