git-remote-object-store 0.2.4

Git remote helper backed by cloud object stores (S3, Azure Blob Storage)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
//! Bucket-key builders **and inspectors** for the packchain engine.
//!
//! Centralised so Phase 2/3 push/fetch and Phase 4 direct-file-access
//! all derive identical keys for a given (prefix, ref, sha) tuple. The
//! key shapes mirror the issue-#52 spec:
//!
//! ```text
//! <prefix>/refs/heads/<branch>/chain.json
//! <prefix>/refs/heads/<branch>/path-index.json
//! <prefix>/packs/<content-sha>.pack
//! <prefix>/packs/<content-sha>.idx
//! ```
//!
//! All builders apply the same empty-prefix rule as
//! [`crate::keys::join`] / [`crate::keys::bundle_key`]: an empty (or
//! `None`) prefix yields a key with no leading slash.
//!
//! Inspectors ([`is_chain_json_key`], [`sha_from_pack_key`]) live
//! here too so callers across the engine (`gc`, `list`, `read`)
//! don't grow drift between independent copies.

use std::fmt;

use super::PackchainError;
use super::schema::{ChainSegment, Sha40};

/// Suffix that marks a [`chain_key`] in a listing. Defined once so
/// `gc::list_referenced_packs`, `list::list_refs`, and
/// [`ref_path_from_chain_key`] can't drift apart.
pub(crate) const CHAIN_JSON_SUFFIX: &str = "/chain.json";

/// Returns `true` when `key` ends with [`CHAIN_JSON_SUFFIX`] —
/// i.e. it is a chain manifest key, not a sibling
/// `path-index.json` / `<sha>.bundle` under the same ref directory.
#[must_use]
pub(crate) fn is_chain_json_key(key: &str) -> bool {
    key.ends_with(CHAIN_JSON_SUFFIX)
}

/// Compose the full bucket key for a chain segment's pack from the
/// prefix and the bucket-relative `pack` field stored in `chain.json`.
/// `chain.json` records pack keys as `packs/<sha>.pack` (no leading
/// prefix) so a chain authored with one prefix can be read with
/// another after a `mv`-style rename.
#[must_use]
pub(crate) fn pack_key_from_relative(prefix: Option<&str>, bucket_relative_pack: &str) -> String {
    crate::keys::join(prefix, bucket_relative_pack)
}

/// Strip `<prefix>/` and `/chain.json` to derive the ref path.
///
/// Returns `None` for keys that don't fit the shape — callers
/// upstream filter on [`is_chain_json_key`], so a `None` here
/// signals a deeper inconsistency (an unprefixed key listed under a
/// prefixed bucket, or a sibling-prefix collision like `repo-other/`
/// against `repo`). Centralised so `list::list_refs` and
/// `audit::load_chains` can't drift apart.
#[must_use]
pub(crate) fn ref_path_from_chain_key(prefix: Option<&str>, key: &str) -> Option<String> {
    let without_suffix = key.strip_suffix(CHAIN_JSON_SUFFIX)?;
    match prefix {
        None | Some("") => Some(without_suffix.to_owned()),
        Some(p) => without_suffix
            .strip_prefix(p)
            .and_then(|s| s.strip_prefix('/'))
            .map(str::to_owned),
    }
}

/// Extract the content SHA from a chain segment's `pack` field.
///
/// `pack` must match `[<prefix>/]packs/<sha>.pack` per the chain.json
/// schema. Returns `None` for keys that don't fit the shape; the
/// caller wraps the `None` into its preferred error variant
/// (`MalformedPackEntry` for `read::decode_entry`'s call site,
/// `ParseJson` via `serde_json::Error::custom` for
/// `gc::list_referenced_packs`).
///
/// Defense-in-depth: the parent component is required to be `packs`
/// so a tampered or corrupt `chain.json` cannot surface a SHA whose
/// composed bucket key (via [`pack_key_from_relative`]) escapes the
/// `<prefix>/packs/` namespace.
#[must_use]
pub(crate) fn sha_from_pack_key(pack: &str) -> Option<Sha40> {
    // Require the parent component to be `packs` — either as the
    // sole leading directory (`packs/<sha>.pack`) or as the last
    // segment of a prefix chain (`<...>/packs/<sha>.pack`). The
    // `rsplit_once` short-circuit also rejects bare `<sha>.pack`
    // values with no parent at all.
    let (parent, basename) = pack.rsplit_once('/')?;
    let sha = basename.strip_suffix(".pack")?;
    if parent != "packs" && !parent.ends_with("/packs") {
        return None;
    }
    Sha40::try_new(sha).ok()
}

/// Validate `segment.pack` and return its content SHA, or surface a
/// [`PackchainError::MalformedPackEntry`] when the key is malformed.
/// One helper used by every code path that needs to derive a bucket
/// key (or just validate the format) from a chain segment — keeps the
/// error wording aligned across `fetch`, `compact`, `read`, and `gc`.
pub(crate) fn segment_pack_sha(segment: &ChainSegment) -> Result<Sha40, PackchainError> {
    sha_from_pack_key(&segment.pack).ok_or_else(|| PackchainError::MalformedPackEntry {
        offset: 0,
        reason: format!(
            "chain segment pack key `{}` is not of the form `[<prefix>/]packs/<sha>.pack`",
            segment.pack,
        ),
    })
}

/// `<prefix>/<ref_name>/chain.json` — newest-first chain manifest for
/// `ref_name`.
pub(crate) fn chain_key(prefix: Option<&str>, ref_name: impl fmt::Display) -> String {
    match prefix {
        Some(p) if !p.is_empty() => format!("{p}/{ref_name}/chain.json"),
        _ => format!("{ref_name}/chain.json"),
    }
}

/// `<prefix>/<ref_name>/path-index.json` — nested path→blob map at
/// `ref_name`'s tip commit.
pub(crate) fn path_index_key(prefix: Option<&str>, ref_name: impl fmt::Display) -> String {
    match prefix {
        Some(p) if !p.is_empty() => format!("{p}/{ref_name}/path-index.json"),
        _ => format!("{ref_name}/path-index.json"),
    }
}

/// Extract the content SHA from a fully-qualified pack-or-idx key,
/// verifying that the key matches the given prefix and ends in
/// `.pack` or `.idx`. Returns `None` if the key does not fit the
/// shape `[<prefix>/]packs/<sha>.{pack,idx}` for the supplied prefix.
///
/// Used on the `PackMissing` retry path
/// ([`super::read::chain_references_pack_key`]) to compare a missing
/// bucket key against each chain segment's content SHA without
/// allocating two prefix-joined strings per segment.
#[must_use]
pub(crate) fn pack_sha_from_full_key(prefix: Option<&str>, key: &str) -> Option<Sha40> {
    let unprefixed = match prefix {
        Some(p) if !p.is_empty() => key.strip_prefix(p).and_then(|s| s.strip_prefix('/'))?,
        _ => key,
    };
    let stem = unprefixed
        .strip_prefix("packs/")
        .and_then(|s| s.strip_suffix(".pack").or_else(|| s.strip_suffix(".idx")))?;
    Sha40::try_new(stem).ok()
}

/// `<prefix>/packs/<content_sha>.pack` — pack file keyed by its
/// content SHA (the trailing SHA1 appended by git's PACK format).
pub(crate) fn pack_key(prefix: Option<&str>, content_sha: &Sha40) -> String {
    let sha = content_sha.as_str();
    match prefix {
        Some(p) if !p.is_empty() => format!("{p}/packs/{sha}.pack"),
        _ => format!("packs/{sha}.pack"),
    }
}

/// `<prefix>/packs/<content_sha>.idx` — pack index file matching
/// `pack_key(prefix, content_sha)`.
pub(crate) fn pack_idx_key(prefix: Option<&str>, content_sha: &Sha40) -> String {
    let sha = content_sha.as_str();
    match prefix {
        Some(p) if !p.is_empty() => format!("{p}/packs/{sha}.idx"),
        _ => format!("packs/{sha}.idx"),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    const SHA: &str = "abcdef0123456789abcdef0123456789abcdef01";
    const REF: &str = "refs/heads/main";

    fn sha40() -> Sha40 {
        Sha40::try_new(SHA).unwrap()
    }

    #[test]
    fn chain_key_with_prefix() {
        assert_eq!(
            chain_key(Some("acme"), REF),
            format!("acme/{REF}/chain.json"),
        );
    }

    #[test]
    fn chain_key_without_prefix() {
        assert_eq!(chain_key(None, REF), format!("{REF}/chain.json"));
    }

    #[test]
    fn chain_key_empty_prefix_matches_none() {
        assert_eq!(chain_key(Some(""), REF), chain_key(None, REF));
    }

    #[test]
    fn path_index_key_with_prefix() {
        assert_eq!(
            path_index_key(Some("acme"), REF),
            format!("acme/{REF}/path-index.json"),
        );
    }

    #[test]
    fn path_index_key_without_prefix() {
        assert_eq!(path_index_key(None, REF), format!("{REF}/path-index.json"));
    }

    #[test]
    fn pack_key_with_prefix() {
        let sha = sha40();
        assert_eq!(
            pack_key(Some("acme"), &sha),
            format!("acme/packs/{SHA}.pack")
        );
    }

    #[test]
    fn pack_key_without_prefix() {
        let sha = sha40();
        assert_eq!(pack_key(None, &sha), format!("packs/{SHA}.pack"));
    }

    #[test]
    fn pack_idx_key_with_prefix() {
        let sha = sha40();
        assert_eq!(
            pack_idx_key(Some("acme"), &sha),
            format!("acme/packs/{SHA}.idx"),
        );
    }

    #[test]
    fn pack_idx_key_without_prefix() {
        let sha = sha40();
        assert_eq!(pack_idx_key(None, &sha), format!("packs/{SHA}.idx"));
    }

    #[test]
    fn pack_and_idx_share_basename() {
        // The two keys must differ only in the `.pack` / `.idx`
        // extension. A regression that decoupled them (e.g. a stray
        // separator in one builder) would orphan the index from its
        // pack on every push.
        let sha = sha40();
        let pack = pack_key(Some("acme"), &sha);
        let idx = pack_idx_key(Some("acme"), &sha);
        assert_eq!(
            pack.strip_suffix(".pack").unwrap(),
            idx.strip_suffix(".idx").unwrap()
        );
    }

    // --- inspectors ----------------------------------------------------

    #[test]
    fn is_chain_json_key_accepts_prefixed_and_unprefixed_keys() {
        assert!(is_chain_json_key("repo/refs/heads/main/chain.json"));
        assert!(is_chain_json_key("refs/heads/main/chain.json"));
        assert!(is_chain_json_key("refs/heads/feature/x/chain.json"));
    }

    #[test]
    fn is_chain_json_key_rejects_siblings() {
        assert!(!is_chain_json_key("repo/refs/heads/main/path-index.json"));
        assert!(!is_chain_json_key(&format!(
            "repo/refs/heads/main/{SHA}.bundle"
        )));
        // A key whose basename starts with `chain.json` but has more
        // bytes after — e.g. `chain.json.bak` — must be rejected.
        assert!(!is_chain_json_key("repo/refs/heads/main/chain.json.bak"));
    }

    #[test]
    fn sha_from_pack_key_handles_prefixed_and_unprefixed() {
        let sha = sha_from_pack_key(&format!("packs/{SHA}.pack")).expect("unprefixed");
        assert_eq!(sha.as_str(), SHA);
        let sha = sha_from_pack_key(&format!("acme/repo/packs/{SHA}.pack")).expect("prefixed");
        assert_eq!(sha.as_str(), SHA);
    }

    #[test]
    fn sha_from_pack_key_returns_none_for_malformed() {
        // Missing `.pack` suffix.
        assert!(sha_from_pack_key(&format!("packs/{SHA}")).is_none());
        // Wrong-length sha (39 hex chars).
        assert!(sha_from_pack_key("packs/abcdef0123456789abcdef0123456789abcdef0.pack").is_none());
        // Non-hex character in sha.
        assert!(sha_from_pack_key("packs/zbcdef0123456789abcdef0123456789abcdef01.pack").is_none());
    }

    #[test]
    fn sha_from_pack_key_rejects_non_packs_parent() {
        // Defense-in-depth: a tampered or corrupt chain.json with a
        // pack field whose parent component is NOT `packs` must not
        // surface a SHA. Otherwise `pack_key_from_relative` would
        // compose a GET key outside `<prefix>/packs/`.
        assert!(sha_from_pack_key(&format!("{SHA}.pack")).is_none());
        assert!(sha_from_pack_key(&format!("../{SHA}.pack")).is_none());
        assert!(sha_from_pack_key(&format!("../etc/{SHA}.pack")).is_none());
        assert!(sha_from_pack_key(&format!("evil/{SHA}.pack")).is_none());
        assert!(sha_from_pack_key(&format!("packs-other/{SHA}.pack")).is_none());
        assert!(sha_from_pack_key(&format!("acme/packsfake/{SHA}.pack")).is_none());
        // Sibling-style prefix that does not end in a `packs` segment.
        assert!(sha_from_pack_key(&format!("acme/repo/{SHA}.pack")).is_none());
    }

    #[test]
    fn pack_sha_from_full_key_matches_pack_and_idx_with_prefix() {
        let key = format!("acme/packs/{SHA}.pack");
        assert_eq!(
            pack_sha_from_full_key(Some("acme"), &key).unwrap().as_str(),
            SHA,
        );
        let idx = format!("acme/packs/{SHA}.idx");
        assert_eq!(
            pack_sha_from_full_key(Some("acme"), &idx).unwrap().as_str(),
            SHA,
        );
    }

    #[test]
    fn pack_sha_from_full_key_matches_without_prefix() {
        let key = format!("packs/{SHA}.pack");
        assert_eq!(pack_sha_from_full_key(None, &key).unwrap().as_str(), SHA);
        assert_eq!(
            pack_sha_from_full_key(Some(""), &key).unwrap().as_str(),
            SHA,
        );
        // `.idx` shape with no prefix — closes the with-prefix /
        // without-prefix × pack / idx matrix.
        let idx = format!("packs/{SHA}.idx");
        assert_eq!(pack_sha_from_full_key(None, &idx).unwrap().as_str(), SHA);
        assert_eq!(
            pack_sha_from_full_key(Some(""), &idx).unwrap().as_str(),
            SHA,
        );
    }

    #[test]
    fn pack_sha_from_full_key_rejects_prefix_mismatch() {
        // Unprefixed key with a prefix expected: must be rejected so
        // a chain authored under `acme/` doesn't false-match an
        // un-prefixed missing-key lookup (mirrors the prior pack_key
        // equality semantics).
        let key = format!("packs/{SHA}.pack");
        assert!(pack_sha_from_full_key(Some("acme"), &key).is_none());
        // Sibling-prefix collision: `acme-other` must not match `acme`.
        let key = format!("acme-other/packs/{SHA}.pack");
        assert!(pack_sha_from_full_key(Some("acme"), &key).is_none());
        // Symmetric case: prefixed key against `None` must also be
        // rejected (the missing-key was for a prefixed bucket; a chain
        // authored without a prefix must not false-match). A future
        // relaxation to "ignore prefix when None" would slip past the
        // other rejections without this guard.
        let key = format!("acme/packs/{SHA}.pack");
        assert!(pack_sha_from_full_key(None, &key).is_none());
        let idx = format!("acme/packs/{SHA}.idx");
        assert!(pack_sha_from_full_key(None, &idx).is_none());
    }

    #[test]
    fn pack_sha_from_full_key_rejects_malformed_shapes() {
        // Missing `packs/` segment.
        assert!(pack_sha_from_full_key(None, &format!("blobs/{SHA}.pack")).is_none());
        // Missing extension.
        assert!(pack_sha_from_full_key(None, &format!("packs/{SHA}")).is_none());
        // Wrong-length sha (39 hex chars).
        assert!(
            pack_sha_from_full_key(None, "packs/abcdef0123456789abcdef0123456789abcdef0.pack")
                .is_none()
        );
        // Overlength sha (41 hex chars) — a future relaxation of
        // Sha40::try_new to "at least 40" would slip through without
        // this symmetric guard.
        assert!(pack_sha_from_full_key(None, &format!("packs/{SHA}f.pack")).is_none());
        // Uppercase sha — Sha40 is lowercase-only; a corrupted bucket
        // key with uppercase hex must be rejected.
        assert!(
            pack_sha_from_full_key(None, "packs/ABCDEF0123456789ABCDEF0123456789ABCDEF01.pack")
                .is_none()
        );
    }

    #[test]
    fn pack_sha_from_full_key_rejects_degenerate_shapes() {
        // Empty key (with or without a prefix). A regression that let
        // an empty key flow into the str-prefix machinery would attempt
        // to strip `packs/` from `""` and surface `None`, but pinning
        // the contract here guards against a future refactor that
        // short-circuits before the strip and accidentally returns a
        // bogus SHA.
        assert!(pack_sha_from_full_key(None, "").is_none());
        assert!(pack_sha_from_full_key(Some("acme"), "").is_none());

        // Zero-length SHA stem: the strip suffixes succeed but
        // `Sha40::try_new("")` must reject. Covers both extensions so
        // a future relaxation of one extension's check (but not the
        // other) is caught.
        assert!(pack_sha_from_full_key(None, "packs/.pack").is_none());
        assert!(pack_sha_from_full_key(None, "packs/.idx").is_none());

        // Suffix-after-extension: `packs/<sha>.pack/extra` does NOT end
        // in `.pack` or `.idx` (the strip_suffix's only accept exact
        // suffixes), so the helper must reject rather than treat the
        // trailing component as part of the stem. Guards against a
        // future regex-based rewrite that would happily match
        // `packs/<sha>.pack/whatever`.
        assert!(pack_sha_from_full_key(None, &format!("packs/{SHA}.pack/extra")).is_none());
        assert!(pack_sha_from_full_key(None, &format!("packs/{SHA}.idx/extra")).is_none());

        // Path-traversal-style key: a `../packs/<sha>.pack` leading
        // component must not match the no-prefix case. `strip_prefix`
        // with `Some(p)` already covers the prefixed case (see
        // `pack_sha_from_full_key_rejects_prefix_mismatch`); this
        // pins the symmetric `None`-prefix path.
        assert!(pack_sha_from_full_key(None, &format!("../packs/{SHA}.pack")).is_none());
        assert!(pack_sha_from_full_key(None, &format!("../packs/{SHA}.idx")).is_none());
    }

    #[test]
    fn segment_pack_sha_maps_malformed_to_malformed_pack_entry() {
        let segment = super::super::schema::ChainSegment {
            sha: Sha40::try_new(SHA).unwrap(),
            parent_sha: None,
            pack: format!("packs/{SHA}"),
            bytes: 4_096,
        };
        let err = segment_pack_sha(&segment).unwrap_err();
        assert!(
            matches!(err, PackchainError::MalformedPackEntry { offset: 0, .. }),
            "expected MalformedPackEntry, got {err:?}",
        );
    }
}