s4_server/
repair.rs

1//! v0.9 #106: standalone sidecar repair / verify / sweep tooling.
2//!
3//! The S4 server writes a `<key>.s4index` sidecar after every framed PUT so
4//! Range GETs can do a partial fetch instead of streaming the whole body.
5//! Three failure modes leave the sidecar diverged from the live object and
6//! degrade Range GET to the full-read fallback:
7//!
8//! 1. The sidecar PUT failed after the main object committed (network blip,
9//!    backend throttle).
10//! 2. An operator overwrote the object directly through the backend, leaving
11//!    the sidecar stale (ETag / size mismatch with the new body).
12//! 3. The v0.8.15 H-g multipart-Complete-on-Versioning-Enabled bug emitted
13//!    sidecars bound to the parent key while the body landed under the
14//!    versioning shadow path (`<key>.__s4ver__/<id>`). Those orphans never
15//!    re-pair and lifecycle doesn't reap them.
16//!
17//! [`verify_sidecar`] reports the current state without writing,
18//! [`repair_sidecar`] rebuilds a single sidecar by re-scanning the main
19//! body, and [`sweep_orphan_sidecars`] walks every `*.s4index` in a bucket
20//! and reports / deletes the ones whose paired key is missing or stale.
21//!
22//! All three operate directly against an `aws_sdk_s3::Client` (the operator
23//! points it at the backend, not the S4 gateway, because the gateway hides
24//! `.s4index` from list output by design).
25
26use aws_sdk_s3::Client;
27use s4_codec::index::{
28    SIDECAR_SUFFIX, build_index_from_body, decode_index, encode_index, sidecar_key,
29};
30use thiserror::Error;
31
32/// Default cap on bytes loaded into RAM for sidecar rebuild. Matches the
33/// `--max-body-bytes` default (#178, 5 GiB) — repair needs the full body in
34/// memory because `build_index_from_body` is a single-pass scan. Operators
35/// with larger objects pass `--max-body-bytes` to raise this explicitly so a
36/// runaway `repair-sidecar` on a 50 GiB object surfaces a clear error
37/// instead of swapping the host.
38pub const DEFAULT_REPAIR_BODY_BYTES_CAP: u64 = 5 * 1024 * 1024 * 1024;
39
40/// v0.9 #106-audit-R5 P2-R5 (Codex): hard cap on `<key>.s4index` body
41/// bytes read by `verify-sidecar` / `sweep-orphan-sidecars`. The codec
42/// spec bounds a legitimate sidecar at `MAX_FRAMES (16M) * ENTRY_BYTES
43/// (32) + header (≤ 74 B)` ≈ 512 MiB. Any sidecar object larger than
44/// this cap is either an attacker payload aimed at OOM-ing the
45/// operator's repair process or a confused legacy reserved-name user
46/// data file — neither is something we want to load into RAM before
47/// `decode_index` can reject it. 600 MiB leaves a safety margin over
48/// the 512 MiB legitimate ceiling. Operators with anomalously large
49/// LEGITIMATE sidecars (multi-million-frame objects) should raise the
50/// cap explicitly; until then 600 MiB is the safe-by-default value.
51pub const MAX_SIDECAR_BODY_BYTES: u64 = 600 * 1024 * 1024;
52
53#[derive(Debug, Error)]
54pub enum RepairError {
55    #[error("S3 backend error on {op} {bucket}/{key}: {cause}")]
56    Backend {
57        op: &'static str,
58        bucket: String,
59        key: String,
60        // Named `cause` (not `source`) so thiserror doesn't auto-treat it
61        // as a `#[source]` chain field — the upstream SDK error is already
62        // stringified into `cause`.
63        cause: String,
64    },
65    #[error("frame scan failed on {bucket}/{key}: {cause}")]
66    FrameScan {
67        bucket: String,
68        key: String,
69        cause: String,
70    },
71    #[error("object body {size} bytes exceeds repair cap {cap}; pass --max-body-bytes to raise")]
72    BodyTooLarge { size: u64, cap: u64 },
73    /// HEAD on `{bucket}/{key}` returned no `Content-Length` header. The
74    /// body-size cap that prevents OOM on a runaway repair relies on this
75    /// being available, so the tool fails closed rather than treating a
76    /// missing length as zero (which would silently bypass the cap).
77    #[error(
78        "HEAD {bucket}/{key} returned no Content-Length; cannot enforce body cap, refusing to proceed"
79    )]
80    MissingContentLength { bucket: String, key: String },
81    /// `If-Match` race detector: the object was overwritten between the
82    /// initial HEAD (whose ETag we stamped into the sidecar) and the GET.
83    /// Returned by `repair_sidecar` so the operator can re-run instead of
84    /// writing a sidecar that's immediately stale.
85    #[error(
86        "object {bucket}/{key} was overwritten during repair (HEAD ETag {head_etag} != GET response); re-run repair-sidecar"
87    )]
88    OverwrittenDuringRepair {
89        bucket: String,
90        key: String,
91        head_etag: String,
92    },
93    /// v0.9 #106-audit-R5 P2-R5 (Codex): the `<key>.s4index` body
94    /// the backend reports exceeds [`MAX_SIDECAR_BODY_BYTES`], which
95    /// exceeds the codec spec's max legitimate sidecar (~512 MiB).
96    /// Surfaced before the GET to avoid loading a multi-GiB corrupt
97    /// or attacker-supplied `.s4index` blob into the operator's
98    /// repair process (DoS hardening). Operators with anomalously
99    /// large legitimate sidecars (multi-million-frame objects) can
100    /// raise the cap by changing the constant — but the practical
101    /// answer is "treat the underlying object as not-sidecared
102    /// (the GET path already falls back to a full read in that
103    /// case)" rather than chasing larger sidecars.
104    #[error(
105        "sidecar object {bucket}/{key} is {size} bytes (> {cap}-byte cap); refusing to load — \
106         most likely a legacy reserved-name user object or attacker payload aimed at OOM"
107    )]
108    SidecarTooLarge {
109        bucket: String,
110        key: String,
111        size: u64,
112        cap: u64,
113    },
114    /// v0.9 #106-audit-R3 P2-R3: the object body has no S4F2 frame
115    /// magic — it's a passthrough / raw-bytes object the server
116    /// intentionally never sidecared (service.rs::put_object only
117    /// builds a sidecar when `is_framed && !will_encrypt`). Writing
118    /// an empty `<key>.s4index` would silently break Range GET:
119    /// `FrameIndex::lookup_range` over zero entries returns `None`,
120    /// the GET path falls into the "invalid range" branch instead of
121    /// the correct passthrough-range fallback that exists for
122    /// sidecar-less objects. Surface as a typed error so the
123    /// operator knows the object isn't a candidate for sidecar
124    /// repair (and `verify-sidecar` will already classify it as
125    /// `MissingHarmless` with frame_count=0).
126    #[error(
127        "object {bucket}/{key} body has no S4F2 frame magic — it's a passthrough or \
128         raw-bytes object that the server intentionally never sidecared; \
129         sidecar repair would silently break Range GET. No action required."
130    )]
131    NotFramed { bucket: String, key: String },
132    /// v0.9 #106-audit-R2 P2-INT-1: the object body the backend returned
133    /// is an SSE-S4 (S4E1/S4E2/S4E3/S4E4/S4E5/S4E6) encrypted envelope.
134    /// `repair_sidecar` runs against the BACKEND (not the gateway), so the
135    /// body it sees is ciphertext — feeding that to the frame scanner
136    /// would surface as a confusing `FrameScan` because the S4F2 frame
137    /// magic is hidden inside the encrypted payload. Worse, the v3
138    /// sidecar's `sse_v3` binding (key_id / salt / chunk_size etc.)
139    /// cannot be reconstructed from the backend bytes alone — it
140    /// requires the SSE keyring to decrypt the body and walk the chunk
141    /// layout. The CLI does not (yet) accept `--sse-s4-key`; v0.10
142    /// roadmap is to plumb that through. Until then, surface a clean
143    /// typed error so the operator can route the repair through a
144    /// server-mode rebuild path (re-PUT the object) instead of receiving
145    /// a misleading frame-scan failure.
146    #[error(
147        "object {bucket}/{key} body is an SSE-S4 encrypted envelope ({message}); \
148         encrypted-sidecar repair requires server-mode access to the SSE keyring \
149         (CLI `--sse-s4-key` plumbing is the v0.10 roadmap), \
150         use a server-mode rebuild path or re-PUT the object to regenerate the sidecar"
151    )]
152    EncryptedSidecarUnsupported {
153        bucket: String,
154        key: String,
155        message: String,
156    },
157}
158
159/// Status reported by [`verify_sidecar`]. Discriminates the outcomes a
160/// CI / cron job needs to branch on. The three `Missing*` variants
161/// resolve the P2-C ambiguity Codex caught: small single-frame objects
162/// intentionally have no sidecar (server only writes when
163/// `entries.len() > 1`), so a blanket `Missing` = exit-1 would false-
164/// alert on healthy objects.
165#[derive(Debug, Clone, PartialEq, Eq)]
166pub enum SidecarStatus {
167    /// Sidecar present, parses cleanly, and its v2 etag + size binding
168    /// matches the live HEAD.
169    Ok { frame_count: u64, sidecar_size: u64 },
170    /// No `<key>.s4index` AND the main body scans as a single frame
171    /// (server skips sidecar emission for `entries.len() <= 1` by
172    /// design). Healthy state — Range GET falls back to a full body
173    /// read, but a single-frame object's "full read" *is* its only
174    /// frame, so there's no fast-path to lose. Exit 0.
175    MissingHarmless { frame_count: u64 },
176    /// No `<key>.s4index` AND the main body has 2+ frames. Range GET
177    /// fast-path is lost; `repair-sidecar` will restore it. Exit 1.
178    MissingDivergent { frame_count: u64 },
179    /// No `<key>.s4index` AND the main object body exceeds the deep-
180    /// scan cap, so we can't tell whether it's a healthy single-frame
181    /// or a real divergence. Operator should raise `--max-body-bytes`
182    /// or run `repair-sidecar` to settle it. Exit 0 (ambiguous, not a
183    /// confirmed divergence — better to under-alert than spam).
184    MissingUnknown { size: u64, cap: u64 },
185    /// Sidecar present but its `source_etag` doesn't match the live HEAD —
186    /// the main object was overwritten or the sidecar is from a different
187    /// commit point.
188    StaleEtag {
189        sidecar_etag: String,
190        live_etag: String,
191    },
192    /// Sidecar present and ETag matches, but the recorded body size differs
193    /// (some backends, e.g. lifecycle moves, change bytes without bumping
194    /// ETag). Treated as stale.
195    StaleSize { sidecar_size: u64, live_size: u64 },
196    /// Pre-v0.8.4 sidecar (no source_etag / source_compressed_size). Still
197    /// usable read-only, but a repair will upgrade it to v2.
198    LegacyV1 { frame_count: u64 },
199    /// Sidecar bytes failed to decode. The body is corrupt or someone PUT
200    /// non-S4IX data at the `.s4index` key. A `repair-sidecar` overwrites
201    /// it cleanly.
202    DecodeError { message: String },
203}
204
205#[derive(Debug, Clone)]
206pub struct VerifyReport {
207    pub bucket: String,
208    pub key: String,
209    pub status: SidecarStatus,
210}
211
212impl VerifyReport {
213    /// True when the sidecar is in a state operators don't need to
214    /// action. Used by the CLI to decide exit code (true → 0, false → 1).
215    /// `MissingHarmless` is clean (single-frame objects have no sidecar
216    /// by design); `MissingUnknown` is also reported clean so the CLI
217    /// doesn't false-alert on objects too large to deep-scan — operator
218    /// can still see the hint in stdout and raise `--max-body-bytes`.
219    pub fn is_clean(&self) -> bool {
220        matches!(
221            self.status,
222            SidecarStatus::Ok { .. }
223                | SidecarStatus::LegacyV1 { .. }
224                | SidecarStatus::MissingHarmless { .. }
225                | SidecarStatus::MissingUnknown { .. }
226        )
227    }
228}
229
230#[derive(Debug, Clone)]
231pub struct RepairReport {
232    pub bucket: String,
233    pub key: String,
234    pub frame_count: u64,
235    pub sidecar_bytes_written: u64,
236    pub source_etag: Option<String>,
237    pub source_compressed_size: u64,
238    /// True when a sidecar already existed (we overwrote it). False when we
239    /// wrote one for the first time.
240    pub rebuilt_from_existing: bool,
241}
242
243#[derive(Debug, Clone, PartialEq, Eq)]
244pub enum OrphanReason {
245    /// The paired logical key has no HEAD — sidecar is dangling.
246    PairedMissing,
247    /// Paired key exists but the sidecar's recorded ETag is stale.
248    PairedEtagMismatch {
249        sidecar_etag: String,
250        live_etag: String,
251    },
252    /// Paired key exists, ETag matches, but size differs.
253    PairedSizeMismatch { sidecar_size: u64, live_size: u64 },
254    /// The sidecar bytes failed to decode — either corruption or a non-
255    /// sidecar object that happened to land at a `.s4index` key.
256    SidecarUndecodable { message: String },
257}
258
259#[derive(Debug, Clone)]
260pub struct OrphanReport {
261    pub sidecar_key: String,
262    pub paired_key: String,
263    pub reason: OrphanReason,
264}
265
266#[derive(Debug, Clone)]
267pub struct SweepReport {
268    pub bucket: String,
269    pub sidecars_scanned: u64,
270    pub orphans: Vec<OrphanReport>,
271    /// Count actually deleted when `delete = true` was passed. Always 0 in
272    /// dry-run mode.
273    pub deleted: u64,
274}
275
276/// Verify a single `<bucket>/<key>` sidecar without writing.
277///
278/// When the sidecar is absent, this fetches the main body (capped at
279/// `deep_scan_body_cap`) to scan its frame count — single-frame objects
280/// intentionally have no sidecar (server skips emission when
281/// `entries.len() <= 1`), so the absent-sidecar verdict is
282/// `MissingHarmless` for those rather than a false-alert `Missing`.
283/// Pass [`DEFAULT_REPAIR_BODY_BYTES_CAP`] (5 GiB) for the standard CLI
284/// behaviour.
285pub async fn verify_sidecar(
286    client: &Client,
287    bucket: &str,
288    key: &str,
289    deep_scan_body_cap: u64,
290) -> Result<VerifyReport, RepairError> {
291    let HeadInfo {
292        raw_etag: live_raw_etag,
293        normalized_etag: live_etag,
294        size: live_size,
295    } = head_main(client, bucket, key).await?;
296    let sidecar_k = sidecar_key(key);
297    // v0.9 #106-audit-R5 P2-R5 (Codex): bounded sidecar fetch.
298    // A multi-GiB corrupt or legacy reserved-name user `.s4index`
299    // object would OOM the operator's repair process if we did the
300    // naive unbounded GET. Cap on HEAD-reported size.
301    let bytes = match get_sidecar_bytes_capped(client, bucket, &sidecar_k).await {
302        Ok(Some(b)) => b,
303        Ok(None) => {
304            // P2-C (Codex R3): disambiguate Missing via a body scan
305            // before deciding whether this is a healthy single-frame
306            // object or a real divergence.
307            return Ok(VerifyReport {
308                bucket: bucket.into(),
309                key: key.into(),
310                status: classify_missing_sidecar(
311                    client,
312                    bucket,
313                    key,
314                    live_raw_etag.as_deref(),
315                    live_size,
316                    deep_scan_body_cap,
317                )
318                .await?,
319            });
320        }
321        Err(SidecarFetchOutcome::TooLarge { size, cap }) => {
322            return Err(RepairError::SidecarTooLarge {
323                bucket: bucket.into(),
324                key: sidecar_k,
325                size,
326                cap,
327            });
328        }
329        Err(SidecarFetchOutcome::Other(msg)) => {
330            return Err(RepairError::Backend {
331                op: "GET",
332                bucket: bucket.into(),
333                key: sidecar_k,
334                cause: msg,
335            });
336        }
337    };
338    let sidecar_size = bytes.len() as u64;
339    let idx = match decode_index(bytes) {
340        Ok(i) => i,
341        Err(e) => {
342            return Ok(VerifyReport {
343                bucket: bucket.into(),
344                key: key.into(),
345                status: SidecarStatus::DecodeError {
346                    message: e.to_string(),
347                },
348            });
349        }
350    };
351    let frame_count = idx.entries.len() as u64;
352    // P2-D (Codex R4): both sides of the etag comparison are now
353    // `Option<&str>` so an ETag-less backend `None == None` round-trips
354    // as Ok rather than tripping the stale path.
355    //
356    // P3-A (Codex R5): the size-only binding case `(None, Some(z))` is
357    // a fully valid v2 sidecar (just no ETag because the backend
358    // doesn't emit one). Treat any present-size binding as Ok rather
359    // than falling through to `LegacyV1`, which would falsely tell
360    // the operator that `repair-sidecar` could "upgrade" a sidecar
361    // that already IS the v2 it can produce on that backend.
362    // `LegacyV1` is only the true pre-v0.8.4 case where neither
363    // binding field is present.
364    let status = match (idx.source_etag.as_deref(), idx.source_compressed_size) {
365        (Some(side_etag), _) if Some(side_etag) != live_etag.as_deref() => {
366            SidecarStatus::StaleEtag {
367                sidecar_etag: side_etag.into(),
368                live_etag: live_etag.unwrap_or_default(),
369            }
370        }
371        (_, Some(side_size)) if side_size != live_size => SidecarStatus::StaleSize {
372            sidecar_size: side_size,
373            live_size,
374        },
375        // Any present size binding → Ok (covers full v2 AND the
376        // size-only-binding case from ETag-less repair, P3-A).
377        (_, Some(_)) => SidecarStatus::Ok {
378            frame_count,
379            sidecar_size,
380        },
381        // No size binding at all → genuinely legacy v1. Covers both
382        // (None, None) and the anomalous (Some, None) shape (which
383        // encode_index never emits, but match exhaustiveness needs
384        // coverage).
385        (_, None) => SidecarStatus::LegacyV1 { frame_count },
386    };
387    Ok(VerifyReport {
388        bucket: bucket.into(),
389        key: key.into(),
390        status,
391    })
392}
393
394/// Rebuild `<bucket>/<key>.s4index` from the main object body. Overwrites
395/// any existing sidecar (including stale or corrupt ones). Returns an error
396/// when the main body exceeds `body_bytes_cap`.
397pub async fn repair_sidecar(
398    client: &Client,
399    bucket: &str,
400    key: &str,
401    body_bytes_cap: u64,
402) -> Result<RepairReport, RepairError> {
403    let HeadInfo {
404        raw_etag: head_raw_etag,
405        normalized_etag: head_normalized_etag,
406        size: live_size,
407    } = head_main(client, bucket, key).await?;
408    if live_size > body_bytes_cap {
409        return Err(RepairError::BodyTooLarge {
410            size: live_size,
411            cap: body_bytes_cap,
412        });
413    }
414    // v0.9 #106 TOCTOU guard: pin the GET to the HEAD's ETag via If-Match.
415    // Without this, an overwrite between HEAD and GET would yield a body
416    // whose actual ETag is E2 while we stamp `source_etag = E1`, producing
417    // a sidecar that fails its own version-binding check on the very next
418    // Range GET (operator sees "repair succeeded" then nothing changed).
419    // Backend returns 412 PreconditionFailed if the object changed.
420    //
421    // P1-B (Codex review R1): pass the RAW etag (quoted entity-tag) per
422    // RFC 7232, not the normalized form. Strict S3-compatible backends
423    // reject `If-Match: abc-2` (missing quotes) with 400/412 and the
424    // repair never succeeds. Tolerant backends accept either. The
425    // sidecar's stored `source_etag` still uses the normalized form to
426    // match the server's PUT-path stamping convention.
427    //
428    // P2-D (Codex R4): when the backend doesn't return an ETag at all,
429    // skip `If-Match` entirely. Same posture the server takes in that
430    // case (it stamps `source_etag = None`); the race window stays open
431    // for those backends, but they don't have an ETag we could pin
432    // against anyway.
433    let get_builder = client.get_object().bucket(bucket).key(key);
434    let get_builder = match &head_raw_etag {
435        Some(t) => get_builder.if_match(t.clone()),
436        None => get_builder,
437    };
438    let body = match get_builder.send().await {
439        Ok(resp) => resp
440            .body
441            .collect()
442            .await
443            .map(|agg| agg.into_bytes())
444            .map_err(|e| RepairError::Backend {
445                op: "GET",
446                bucket: bucket.into(),
447                key: key.into(),
448                cause: format!("read body: {e}"),
449            })?,
450        Err(e) => {
451            // PreconditionFailed (412) → object was overwritten between
452            // HEAD and GET. Surface as a typed error so the operator can
453            // re-run instead of writing a stale sidecar.
454            let s = format!("{e}");
455            if s.contains("PreconditionFailed") || s.contains("412") {
456                return Err(RepairError::OverwrittenDuringRepair {
457                    bucket: bucket.into(),
458                    key: key.into(),
459                    head_etag: head_normalized_etag.clone().unwrap_or_default(),
460                });
461            }
462            if is_get_not_found(&e) {
463                return Err(RepairError::Backend {
464                    op: "GET",
465                    bucket: bucket.into(),
466                    key: key.into(),
467                    cause: "object not found (NoSuchKey)".into(),
468                });
469            }
470            return Err(RepairError::Backend {
471                op: "GET",
472                bucket: bucket.into(),
473                key: key.into(),
474                cause: s,
475            });
476        }
477    };
478    // Defense in depth: even with If-Match, double-check the bytes we got
479    // are the size HEAD promised. Backends with quirky range / cache
480    // behaviour have surprised us before — see codec memo on partial
481    // serves that succeeded with the wrong length.
482    if (body.len() as u64) != live_size {
483        return Err(RepairError::Backend {
484            op: "GET",
485            bucket: bucket.into(),
486            key: key.into(),
487            cause: format!(
488                "got {} bytes but HEAD said {}; backend served wrong content length",
489                body.len(),
490                live_size
491            ),
492        });
493    }
494    // v0.9 #106-audit-R2 P2-INT-1: detect SSE-S4 encrypted envelopes
495    // BEFORE handing the body to the frame scanner. The backend serves
496    // the on-disk ciphertext (S4E1..S4E6 magic prefix); `build_index_from_body`
497    // would scan for `S4F2` frame magic inside that ciphertext and surface
498    // an opaque `FrameScan` error. Worse, the v3 sidecar's `sse_v3` binding
499    // (key_id / salt / chunk_size) cannot be reconstructed from backend
500    // bytes alone — the SSE keyring is required to decrypt + walk chunks.
501    // Surface a typed error directing the operator to a server-mode rebuild
502    // path; v0.10 roadmap is to add `--sse-s4-key <path>` to the CLI so
503    // sidecar repair can decrypt the body in-process. See CHANGELOG.
504    if let Some(magic) = detect_sse_magic(&body) {
505        return Err(RepairError::EncryptedSidecarUnsupported {
506            bucket: bucket.into(),
507            key: key.into(),
508            message: format!("body magic {magic} indicates SSE-S4 envelope"),
509        });
510    }
511    let sidecar_k = sidecar_key(key);
512    let rebuilt_from_existing = client
513        .head_object()
514        .bucket(bucket)
515        .key(&sidecar_k)
516        .send()
517        .await
518        .is_ok();
519    let mut idx = build_index_from_body(&body).map_err(|e| RepairError::FrameScan {
520        bucket: bucket.into(),
521        key: key.into(),
522        cause: e.to_string(),
523    })?;
524    // v0.9 #106-audit-R3 P2-R3 (Codex): `build_index_from_body`
525    // on a non-S4F2 body (passthrough / raw bytes) returns Ok with
526    // an empty entries vec rather than an error. Writing that as a
527    // sidecar would silently break Range GET — `lookup_range` over
528    // zero entries returns None, and the GET path then takes the
529    // "no plan" branch instead of the passthrough-range fallback
530    // that exists for sidecar-less objects. Reject cleanly so the
531    // operator knows the object isn't a sidecar-repair candidate.
532    if idx.entries.is_empty() {
533        return Err(RepairError::NotFramed {
534            bucket: bucket.into(),
535            key: key.into(),
536        });
537    }
538    // Stamp the NORMALIZED form so server-side
539    // `sidecar_version_binding_ok` (which compares against the s3s
540    // `ETag::value()` stripped form) sees a match. The raw form was
541    // only needed for the wire-level `If-Match` header above.
542    //
543    // P2-D (Codex R4): pass through `None` when the backend doesn't
544    // return an ETag — the server's binding check treats `None` as
545    // the legacy/back-compat best-effort path. Stamping `Some("")`
546    // would force the check into the mismatch branch and the sidecar
547    // would be immediately rejected as stale.
548    idx.source_etag = head_normalized_etag.clone();
549    idx.source_compressed_size = Some(body.len() as u64);
550    let encoded = encode_index(&idx);
551    let encoded_len = encoded.len() as u64;
552    let frame_count = idx.entries.len() as u64;
553    client
554        .put_object()
555        .bucket(bucket)
556        .key(&sidecar_k)
557        .body(aws_sdk_s3::primitives::ByteStream::from(encoded.to_vec()))
558        .content_type("application/x-s4-index")
559        .send()
560        .await
561        .map_err(|e| RepairError::Backend {
562            op: "PUT",
563            bucket: bucket.into(),
564            key: sidecar_k.clone(),
565            cause: format!("{e}"),
566        })?;
567    // v0.9 #106 P2-B (Codex review round 2): `If-Match` on the GET
568    // only proves the body hadn't changed at GET time. The main object
569    // can still be overwritten during the (a) build_index_from_body
570    // scan and (b) sidecar PUT window — leaving a freshly-written
571    // sidecar stamped with the OLD ETag against the NEW body. The
572    // server-side `sidecar_version_binding_ok` would then trip on
573    // every Range GET and we'd silently report "repair succeeded".
574    //
575    // Final HEAD: if the main object's ETag changed since we read it,
576    // the sidecar we just wrote is already stale. Delete it (so the
577    // operator's next Range GET falls back to the safe full-read path,
578    // not the bad fast-path) and surface `OverwrittenDuringRepair`
579    // so the operator re-runs the repair under quieter conditions.
580    let post = head_main(client, bucket, key).await?;
581    if post.normalized_etag != head_normalized_etag || post.size != live_size {
582        // Best-effort cleanup; ignore the delete's outcome because the
583        // primary error is the race, not the cleanup itself.
584        let _ = client
585            .delete_object()
586            .bucket(bucket)
587            .key(&sidecar_k)
588            .send()
589            .await;
590        return Err(RepairError::OverwrittenDuringRepair {
591            bucket: bucket.into(),
592            key: key.into(),
593            head_etag: head_normalized_etag.unwrap_or_default(),
594        });
595    }
596    Ok(RepairReport {
597        bucket: bucket.into(),
598        key: key.into(),
599        frame_count,
600        sidecar_bytes_written: encoded_len,
601        source_etag: idx.source_etag,
602        source_compressed_size: live_size,
603        rebuilt_from_existing,
604    })
605}
606
607/// Knob controlling which orphan categories `sweep_orphan_sidecars` is
608/// allowed to delete. `SidecarUndecodable` is kept out of the default
609/// `--delete` because v0.8.17-era operators on the
610/// `--allow-legacy-reserved-key-reads` migration hatch can have
611/// legitimate user-PUT objects whose key happens to end in `.s4index` —
612/// those would fail to decode and `--delete` would nuke real user data.
613/// Escalation to `DeletePolicy::IncludeUndecodable` is an explicit
614/// operator opt-in (`--delete-undecodable` on the CLI).
615#[derive(Debug, Clone, Copy, PartialEq, Eq)]
616pub enum DeletePolicy {
617    /// Pure dry-run: classify only, never write to the backend.
618    DryRun,
619    /// Delete `PairedMissing` / `PairedEtagMismatch` / `PairedSizeMismatch`
620    /// orphans. Leave `SidecarUndecodable` in the report — operator must
621    /// inspect those and rerun with `IncludeUndecodable` if they truly
622    /// are corrupt sidecars (and not legacy reserved-name user data).
623    PairBoundOnly,
624    /// All four categories. Use only after confirming there's no legacy
625    /// `--allow-legacy-reserved-key-reads` user data in this bucket.
626    IncludeUndecodable,
627}
628
629impl DeletePolicy {
630    fn allows(&self, reason: &OrphanReason) -> bool {
631        match (self, reason) {
632            (DeletePolicy::DryRun, _) => false,
633            (DeletePolicy::PairBoundOnly, OrphanReason::SidecarUndecodable { .. }) => false,
634            (DeletePolicy::PairBoundOnly, _) => true,
635            (DeletePolicy::IncludeUndecodable, _) => true,
636        }
637    }
638}
639
640/// List every `*.s4index` in `bucket` and report (and optionally delete) the
641/// orphans — sidecars whose paired key is missing or whose recorded
642/// ETag / size disagree with the live HEAD.
643///
644/// See [`DeletePolicy`] for the three deletion levels. Always run
645/// [`DeletePolicy::DryRun`] first to inspect the orphan list.
646pub async fn sweep_orphan_sidecars(
647    client: &Client,
648    bucket: &str,
649    policy: DeletePolicy,
650) -> Result<SweepReport, RepairError> {
651    let mut sidecars_scanned: u64 = 0;
652    let mut orphans: Vec<OrphanReport> = Vec::new();
653    let mut continuation: Option<String> = None;
654    loop {
655        let mut req = client.list_objects_v2().bucket(bucket);
656        if let Some(c) = continuation.as_ref() {
657            req = req.continuation_token(c);
658        }
659        let resp = req.send().await.map_err(|e| RepairError::Backend {
660            op: "ListObjectsV2",
661            bucket: bucket.into(),
662            key: String::new(),
663            cause: format!("{e}"),
664        })?;
665        for obj in resp.contents() {
666            let Some(k) = obj.key() else { continue };
667            if !k.ends_with(SIDECAR_SUFFIX) {
668                continue;
669            }
670            sidecars_scanned += 1;
671            let paired = &k[..k.len() - SIDECAR_SUFFIX.len()];
672            classify_one(client, bucket, k, paired, &mut orphans).await?;
673        }
674        if resp.is_truncated().unwrap_or(false) {
675            continuation = resp.next_continuation_token().map(str::to_owned);
676            if continuation.is_none() {
677                // Defensive: a truncated response with no continuation token
678                // is a backend bug; bail rather than infinite-loop.
679                break;
680            }
681        } else {
682            break;
683        }
684    }
685    let mut deleted = 0u64;
686    for orph in &orphans {
687        if !policy.allows(&orph.reason) {
688            continue;
689        }
690        client
691            .delete_object()
692            .bucket(bucket)
693            .key(&orph.sidecar_key)
694            .send()
695            .await
696            .map_err(|e| RepairError::Backend {
697                op: "DELETE",
698                bucket: bucket.into(),
699                key: orph.sidecar_key.clone(),
700                cause: format!("{e}"),
701            })?;
702        deleted += 1;
703    }
704    Ok(SweepReport {
705        bucket: bucket.into(),
706        sidecars_scanned,
707        orphans,
708        deleted,
709    })
710}
711
712/// P2-C (Codex R3): the server skips sidecar emission for objects whose
713/// frame count is ≤ 1 (small single-PUTs / single-chunk multiparts), so
714/// a missing sidecar can be EITHER an intentional skip OR a real
715/// divergence. Disambiguate by fetching the body (capped) and counting
716/// frames. Returns [`SidecarStatus::MissingUnknown`] when the body
717/// exceeds the cap, so verify-sidecar doesn't false-alert on
718/// large-but-can't-confirm objects.
719async fn classify_missing_sidecar(
720    client: &Client,
721    bucket: &str,
722    key: &str,
723    live_raw_etag: Option<&str>,
724    live_size: u64,
725    cap: u64,
726) -> Result<SidecarStatus, RepairError> {
727    if live_size > cap {
728        return Ok(SidecarStatus::MissingUnknown {
729            size: live_size,
730            cap,
731        });
732    }
733    // Pin the GET to the HEAD's ETag (RFC 7232 quoted form). If a race
734    // overwrites the object between HEAD and GET we'd otherwise scan a
735    // different body than the one HEAD reported on — surface as a
736    // typed error so the operator re-runs.
737    //
738    // P2-D: backends without an ETag have nothing to pin against;
739    // skip If-Match (matches the server-side `None`-tolerance path).
740    let get_builder = client.get_object().bucket(bucket).key(key);
741    let get_builder = match live_raw_etag {
742        Some(t) => get_builder.if_match(t.to_owned()),
743        None => get_builder,
744    };
745    let body = match get_builder.send().await {
746        Ok(resp) => resp
747            .body
748            .collect()
749            .await
750            .map(|agg| agg.into_bytes())
751            .map_err(|e| RepairError::Backend {
752                op: "GET",
753                bucket: bucket.into(),
754                key: key.into(),
755                cause: format!("read body: {e}"),
756            })?,
757        Err(e) => {
758            let s = format!("{e}");
759            if s.contains("PreconditionFailed") || s.contains("412") {
760                return Err(RepairError::OverwrittenDuringRepair {
761                    bucket: bucket.into(),
762                    key: key.into(),
763                    head_etag: live_raw_etag.map(normalize_etag).unwrap_or_default(),
764                });
765            }
766            if is_get_not_found(&e) {
767                return Err(RepairError::Backend {
768                    op: "GET",
769                    bucket: bucket.into(),
770                    key: key.into(),
771                    cause: "object not found (NoSuchKey)".into(),
772                });
773            }
774            return Err(RepairError::Backend {
775                op: "GET",
776                bucket: bucket.into(),
777                key: key.into(),
778                cause: s,
779            });
780        }
781    };
782    // v0.9 #106-audit self-review (post-R2): mirror the encrypted-body
783    // guard from `repair_sidecar` here. Without it, running
784    // `verify-sidecar` against an SSE-S4 chunked object (whose sidecar
785    // is missing — e.g. PUT happened pre-v0.9 before v3 sidecars
786    // shipped) would surface as a confusing FrameScan error instead of
787    // the friendly EncryptedSidecarUnsupported the repair tool already
788    // returns. Same root cause as P2-INT-1; same surface error.
789    if let Some(magic) = detect_sse_magic(&body) {
790        return Err(RepairError::EncryptedSidecarUnsupported {
791            bucket: bucket.into(),
792            key: key.into(),
793            message: format!("body magic {magic} indicates SSE-S4 envelope"),
794        });
795    }
796    // v0.9 #106-audit-R4 P2-R4 (Codex): a passthrough / raw-bytes
797    // body (no S4F2 magic) trips `build_index_from_body` with a
798    // `BadMagic` `FrameError`. From the verify-sidecar perspective
799    // that's the same outcome as a single-frame body: server never
800    // sidecared it, Range GET takes the full-read path, no operator
801    // action needed. Surface `MissingHarmless { frame_count: 0 }`
802    // (clean, exit 0) instead of a FrameScan repair error (exit 1)
803    // so CI / cron jobs don't false-alert on healthy passthrough
804    // objects. Twin of R3 P2-R3 on the repair-side.
805    let idx = match build_index_from_body(&body) {
806        Ok(i) => i,
807        Err(crate::codec::multipart::FrameError::BadMagic { .. }) => {
808            return Ok(SidecarStatus::MissingHarmless { frame_count: 0 });
809        }
810        Err(e) => {
811            return Err(RepairError::FrameScan {
812                bucket: bucket.into(),
813                key: key.into(),
814                cause: e.to_string(),
815            });
816        }
817    };
818    let frame_count = idx.entries.len() as u64;
819    if frame_count <= 1 {
820        Ok(SidecarStatus::MissingHarmless { frame_count })
821    } else {
822        Ok(SidecarStatus::MissingDivergent { frame_count })
823    }
824}
825
826async fn classify_one(
827    client: &Client,
828    bucket: &str,
829    sidecar_k: &str,
830    paired: &str,
831    out: &mut Vec<OrphanReport>,
832) -> Result<(), RepairError> {
833    // v0.9 #106 review P1-A (Codex): MUST decode the listed object first.
834    // Branching on "HEAD paired-key" before reading the candidate would
835    // mis-classify a legitimate `--allow-legacy-reserved-key-reads`
836    // user object (whose key happens to end in `.s4index` and whose
837    // paired stripped key may not exist) as `PairedMissing` — and
838    // `DeletePolicy::PairBoundOnly` would silently delete user data.
839    // The rule is: bytes that don't parse as S4IX magic = user data,
840    // never an orphan-eligible-for-default-delete.
841    // v0.9 #106-audit-R5 P2-R5 (Codex): bounded sidecar fetch.
842    // sweep walks every `*.s4index` in the bucket — a single
843    // multi-GiB attacker-supplied or legacy-user `.s4index` object
844    // would OOM the sweep process with the naive unbounded GET.
845    // TooLarge surfaces as a `SidecarUndecodable` orphan with a
846    // size-explaining message rather than aborting the whole sweep
847    // (one bad sidecar shouldn't stop the rest from being inspected).
848    let bytes = match get_sidecar_bytes_capped(client, bucket, sidecar_k).await {
849        Ok(Some(b)) => b,
850        // ListObjectsV2 saw it; if GET says NotFound now, treat as a
851        // sidecar that vanished mid-sweep — skip rather than report.
852        Ok(None) => return Ok(()),
853        Err(SidecarFetchOutcome::TooLarge { size, cap }) => {
854            out.push(OrphanReport {
855                sidecar_key: sidecar_k.into(),
856                paired_key: paired.into(),
857                reason: OrphanReason::SidecarUndecodable {
858                    message: format!(
859                        "sidecar size {size} > cap {cap}; refused to load (likely legacy user data or attack payload)"
860                    ),
861                },
862            });
863            return Ok(());
864        }
865        Err(SidecarFetchOutcome::Other(msg)) => {
866            return Err(RepairError::Backend {
867                op: "GET",
868                bucket: bucket.into(),
869                key: sidecar_k.into(),
870                cause: msg,
871            });
872        }
873    };
874    let idx = match decode_index(bytes) {
875        Ok(i) => i,
876        Err(e) => {
877            // Not a real S4IX sidecar — flag it under the safer
878            // category. `DeletePolicy::PairBoundOnly` does NOT remove
879            // these; the operator must escalate to
880            // `IncludeUndecodable` after confirming it isn't legacy
881            // user data.
882            out.push(OrphanReport {
883                sidecar_key: sidecar_k.into(),
884                paired_key: paired.into(),
885                reason: OrphanReason::SidecarUndecodable {
886                    message: e.to_string(),
887                },
888            });
889            return Ok(());
890        }
891    };
892    // Bytes decoded as S4IX — now we can safely check the paired key
893    // status. A missing paired key combined with a decodable sidecar
894    // IS a real orphan (the v0.8.15 H-g case, for example).
895    let head_res = client.head_object().bucket(bucket).key(paired).send().await;
896    let (live_etag_norm, live_size) = match head_res {
897        Ok(h) => {
898            // P2-D: `None` means the backend didn't return an ETag.
899            // Preserve the absence rather than coercing to `""` —
900            // comparing `Some("xyz")` from the sidecar against
901            // `Some("")` would always trip stale, falsely orphaning
902            // every paired-OK sidecar on an ETag-less backend.
903            let etag: Option<String> = h.e_tag().map(normalize_etag);
904            let size = h.content_length().unwrap_or(0).max(0) as u64;
905            (etag, size)
906        }
907        Err(e) => {
908            if is_head_not_found(&e) {
909                out.push(OrphanReport {
910                    sidecar_key: sidecar_k.into(),
911                    paired_key: paired.into(),
912                    reason: OrphanReason::PairedMissing,
913                });
914                return Ok(());
915            }
916            return Err(RepairError::Backend {
917                op: "HEAD",
918                bucket: bucket.into(),
919                key: paired.into(),
920                cause: format!("{e}"),
921            });
922        }
923    };
924    // ETag mismatch only fires when BOTH sides have an ETag. If the
925    // sidecar carries Some("x") and the live HEAD has None, that's
926    // not a definitive divergence — could be a backend that recently
927    // dropped ETag support. Skip the mismatch flag for the None side
928    // (matches the server's `sidecar_version_binding_ok` `None`-
929    // tolerance posture).
930    if let (Some(side_etag), Some(live_e)) = (idx.source_etag.as_deref(), live_etag_norm.as_deref())
931        && side_etag != live_e
932    {
933        out.push(OrphanReport {
934            sidecar_key: sidecar_k.into(),
935            paired_key: paired.into(),
936            reason: OrphanReason::PairedEtagMismatch {
937                sidecar_etag: side_etag.into(),
938                live_etag: live_e.into(),
939            },
940        });
941        return Ok(());
942    }
943    if let Some(side_size) = idx.source_compressed_size
944        && side_size != live_size
945    {
946        out.push(OrphanReport {
947            sidecar_key: sidecar_k.into(),
948            paired_key: paired.into(),
949            reason: OrphanReason::PairedSizeMismatch {
950                sidecar_size: side_size,
951                live_size,
952            },
953        });
954    }
955    // Legacy v1 sidecars (no binding fields) are intentionally
956    // tolerated here — read-only Range GETs still work and the
957    // operator gets warned by `verify-sidecar` separately.
958    Ok(())
959}
960
961/// HEAD response distilled to the fields the repair tools care about.
962///
963/// Both etag fields are `Option<String>` so the absent-ETag case
964/// round-trips cleanly through to the sidecar (P2-D, Codex R4). When
965/// `raw_etag = None`, the backend didn't return one — we MUST stamp
966/// `FrameIndex::source_etag = None` to match the server PUT path's
967/// `resp.e_tag.as_ref().map(...)` shape, otherwise
968/// `sidecar_version_binding_ok` would compare `Some("")` against a
969/// missing live ETag and always trip "stale".
970///
971/// - `raw_etag`: wire form (typically `"..."`) — pass to `If-Match`
972///   headers, which per RFC 7232 want the full entity-tag. `None`
973///   means skip `If-Match` entirely (best-effort, same posture the
974///   server takes for ETag-less backends).
975/// - `normalized_etag`: stripped form for comparing against
976///   `FrameIndex::source_etag` (the s3s `ETag::value()` accessor
977///   used by the server PUT path strips quotes).
978struct HeadInfo {
979    raw_etag: Option<String>,
980    normalized_etag: Option<String>,
981    size: u64,
982}
983
984async fn head_main(client: &Client, bucket: &str, key: &str) -> Result<HeadInfo, RepairError> {
985    let head = client
986        .head_object()
987        .bucket(bucket)
988        .key(key)
989        .send()
990        .await
991        .map_err(|e| RepairError::Backend {
992            op: "HEAD",
993            bucket: bucket.into(),
994            key: key.into(),
995            cause: format!("{e}"),
996        })?;
997    let raw_etag = head.e_tag().map(str::to_owned);
998    let normalized_etag = raw_etag.as_deref().map(normalize_etag);
999    // `content_length` is `Option<i64>` on the SDK type — `None` means the
1000    // backend didn't return a Content-Length header. We fail closed rather
1001    // than treating that as zero (which would silently bypass the
1002    // `body_bytes_cap` in `repair_sidecar` and let an unbounded GET
1003    // exhaust RAM). AWS S3 / MinIO / Garage / Ceph RGW all return
1004    // Content-Length on HEAD, so this only trips on exotic / broken
1005    // backends — which the operator should know about.
1006    let size = match head.content_length() {
1007        Some(n) if n >= 0 => n as u64,
1008        Some(_) | None => {
1009            return Err(RepairError::MissingContentLength {
1010                bucket: bucket.into(),
1011                key: key.into(),
1012            });
1013        }
1014    };
1015    Ok(HeadInfo {
1016        raw_etag,
1017        normalized_etag,
1018        size,
1019    })
1020}
1021
1022/// Strip the surrounding `"..."` quotes from an RFC 7232 entity-tag so
1023/// the on-wire form (aws-sdk-s3 returns raw `"..."`) matches the form
1024/// the S4 gateway stamps into `FrameIndex::source_etag` (the s3s
1025/// `ETag::value()` accessor that drives the PUT path strips quotes).
1026///
1027/// Without this normalization, a freshly-written sidecar would falsely
1028/// flag as `StaleEtag` because the strings differ only by the wrapping
1029/// quotes. Both the PUT side (server) and the repair side (this CLI)
1030/// must agree on the canonical form — the de-facto canonical is "no
1031/// surrounding quotes", since that's what the server already writes
1032/// into every v2 sidecar in the wild.
1033fn normalize_etag(s: &str) -> String {
1034    s.trim_matches('"').to_owned()
1035}
1036
1037/// v0.9 #106-audit-R2 P2-INT-1: detect SSE-S4 encrypted envelopes by
1038/// magic prefix. Returns `Some(name)` when the first four bytes match
1039/// one of the SSE frame magics (`S4E1`..`S4E6`); returns `None` for any
1040/// other body, including S4 framed plaintext (`S4F2`) and raw
1041/// compressed / passthrough bodies.
1042///
1043/// Intentionally duplicated here as a 4-byte prefix compare instead of
1044/// reusing `sse::peek_magic` because `peek_magic` length-gates on the
1045/// full S4E1/S4E2 header size (36 bytes) and would return `None` for a
1046/// very short S4E6 stub the way an empty-key edge-case might land —
1047/// the gate is for cryptographic frame validity, not for the
1048/// "is encrypted at all" question this helper answers. The exact magic
1049/// bytes are stable wire-format constants (see `sse::SSE_MAGIC_V{1..6}`)
1050/// and are echoed here so the repair module has no circular dep on the
1051/// SSE module's full surface.
1052fn detect_sse_magic(body: &[u8]) -> Option<&'static str> {
1053    if body.len() < 4 {
1054        return None;
1055    }
1056    match &body[..4] {
1057        b"S4E1" => Some("S4E1"),
1058        b"S4E2" => Some("S4E2"),
1059        b"S4E3" => Some("S4E3"),
1060        b"S4E4" => Some("S4E4"),
1061        b"S4E5" => Some("S4E5"),
1062        b"S4E6" => Some("S4E6"),
1063        _ => None,
1064    }
1065}
1066
1067/// v0.9 #106-audit-R5 P2-R5 (Codex): bounded sidecar fetch.
1068/// HEADs the sidecar key first to learn its size; refuses to GET
1069/// (and thus refuses to allocate) if the size exceeds
1070/// [`MAX_SIDECAR_BODY_BYTES`]. Used by both `verify_sidecar` and
1071/// `classify_one` (sweep) so a multi-GiB corrupt or legacy user
1072/// `.s4index` object can't OOM the operator's repair process.
1073///
1074/// Returns:
1075///   - `Ok(Some(bytes))` when the sidecar exists and fits in the cap
1076///   - `Ok(None)` when the sidecar HEAD returns NotFound (caller
1077///     classifies as `Missing*`)
1078///   - `Err(SidecarFetchOutcome::Other)` when HEAD returns
1079///     Content-Length missing or any other backend error
1080///   - `Err(SidecarFetchOutcome::TooLarge { .. })` when size > cap
1081async fn get_sidecar_bytes_capped(
1082    client: &Client,
1083    bucket: &str,
1084    key: &str,
1085) -> Result<Option<bytes::Bytes>, SidecarFetchOutcome> {
1086    let head = match client.head_object().bucket(bucket).key(key).send().await {
1087        Ok(h) => h,
1088        Err(e) => {
1089            return if is_head_not_found(&e) {
1090                Ok(None)
1091            } else {
1092                Err(SidecarFetchOutcome::Other(format!("HEAD: {e}")))
1093            };
1094        }
1095    };
1096    let size = match head.content_length() {
1097        Some(n) if n >= 0 => n as u64,
1098        Some(_) | None => {
1099            return Err(SidecarFetchOutcome::Other(
1100                "sidecar HEAD returned no Content-Length; refusing to GET unbounded".into(),
1101            ));
1102        }
1103    };
1104    if size > MAX_SIDECAR_BODY_BYTES {
1105        return Err(SidecarFetchOutcome::TooLarge {
1106            size,
1107            cap: MAX_SIDECAR_BODY_BYTES,
1108        });
1109    }
1110    // v0.9 #106-audit-R6 P2-R6 (Codex): pin the GET to the HEAD's
1111    // ETag so a sidecar swap between HEAD and GET can't bypass
1112    // the cap. Without this, an attacker who races
1113    // HEAD(small) → swap(massive) → GET could still OOM the
1114    // process because `collect()` reads whatever the GET response
1115    // delivers, ignoring the HEAD-reported size. With If-Match
1116    // pinned, the swap surfaces as 412 PreconditionFailed → we
1117    // refuse the body without allocating it.
1118    //
1119    // Backends that don't return ETags fall back to a post-GET
1120    // length check below (still a window where collect() runs to
1121    // completion, but the typed `TooLarge` exit replaces what
1122    // would otherwise be a silent OOM-pass).
1123    let raw_etag = head.e_tag().map(str::to_owned);
1124    let get_builder = client.get_object().bucket(bucket).key(key);
1125    let get_builder = match raw_etag {
1126        Some(ref t) => get_builder.if_match(t.clone()),
1127        None => get_builder,
1128    };
1129    match get_builder.send().await {
1130        Ok(resp) => {
1131            let agg = resp
1132                .body
1133                .collect()
1134                .await
1135                .map_err(|e| SidecarFetchOutcome::Other(format!("read body: {e}")))?;
1136            let bytes = agg.into_bytes();
1137            // Defense-in-depth: ETag-less backends bypass
1138            // If-Match; If-Match-non-honouring backends also exist.
1139            // Check the actual body length AFTER collect to catch
1140            // a race-during-collect that exceeded the cap.
1141            if (bytes.len() as u64) > MAX_SIDECAR_BODY_BYTES {
1142                return Err(SidecarFetchOutcome::TooLarge {
1143                    size: bytes.len() as u64,
1144                    cap: MAX_SIDECAR_BODY_BYTES,
1145                });
1146            }
1147            Ok(Some(bytes))
1148        }
1149        Err(e) => {
1150            let s = format!("{e}");
1151            if is_get_not_found(&e) {
1152                // Race: existed at HEAD, gone by GET. Treat as missing.
1153                Ok(None)
1154            } else if s.contains("PreconditionFailed") || s.contains("412") {
1155                // Race: sidecar replaced between HEAD and GET. The
1156                // new sidecar's size is whatever the swap-in is;
1157                // we refuse to load it without re-HEAD'ing under
1158                // operator supervision.
1159                Err(SidecarFetchOutcome::Other(format!(
1160                    "sidecar at {bucket}/{key} was replaced between HEAD and GET (412 \
1161                     PreconditionFailed); re-run when the sidecar is stable"
1162                )))
1163            } else {
1164                Err(SidecarFetchOutcome::Other(format!("GET: {s}")))
1165            }
1166        }
1167    }
1168}
1169
1170enum SidecarFetchOutcome {
1171    Other(String),
1172    TooLarge { size: u64, cap: u64 },
1173}
1174
1175fn is_head_not_found(
1176    e: &aws_sdk_s3::error::SdkError<aws_sdk_s3::operation::head_object::HeadObjectError>,
1177) -> bool {
1178    matches!(
1179        e,
1180        aws_sdk_s3::error::SdkError::ServiceError(svc)
1181            if matches!(
1182                svc.err(),
1183                aws_sdk_s3::operation::head_object::HeadObjectError::NotFound(_)
1184            )
1185    )
1186}
1187
1188fn is_get_not_found(
1189    e: &aws_sdk_s3::error::SdkError<aws_sdk_s3::operation::get_object::GetObjectError>,
1190) -> bool {
1191    matches!(
1192        e,
1193        aws_sdk_s3::error::SdkError::ServiceError(svc)
1194            if matches!(
1195                svc.err(),
1196                aws_sdk_s3::operation::get_object::GetObjectError::NoSuchKey(_)
1197            )
1198    )
1199}
1200
1201/// Parse a `bucket/key` CLI argument. Splits on the **first** `/` only so
1202/// keys with slashes (e.g. `prefix/sub/file.bin`) round-trip cleanly.
1203pub fn parse_bucket_key(arg: &str) -> Result<(&str, &str), String> {
1204    match arg.split_once('/') {
1205        Some((b, k)) if !b.is_empty() && !k.is_empty() => Ok((b, k)),
1206        Some(_) => Err(format!(
1207            "expected `bucket/key`, got {arg:?} — bucket and key must both be non-empty"
1208        )),
1209        None => Err(format!("expected `bucket/key`, got {arg:?} — missing `/`")),
1210    }
1211}
1212
1213#[cfg(test)]
1214mod tests {
1215    use super::*;
1216
1217    #[test]
1218    fn parse_bucket_key_simple() {
1219        assert_eq!(
1220            parse_bucket_key("mybucket/foo.txt"),
1221            Ok(("mybucket", "foo.txt"))
1222        );
1223    }
1224
1225    #[test]
1226    fn parse_bucket_key_with_slashes_in_key() {
1227        assert_eq!(parse_bucket_key("b/a/b/c"), Ok(("b", "a/b/c")));
1228    }
1229
1230    #[test]
1231    fn parse_bucket_key_missing_slash() {
1232        assert!(parse_bucket_key("nokey").is_err());
1233    }
1234
1235    #[test]
1236    fn parse_bucket_key_empty_key() {
1237        assert!(parse_bucket_key("bucket/").is_err());
1238    }
1239
1240    #[test]
1241    fn parse_bucket_key_empty_bucket() {
1242        assert!(parse_bucket_key("/key").is_err());
1243    }
1244
1245    #[test]
1246    fn verify_report_is_clean_truth_table() {
1247        let mk = |status| VerifyReport {
1248            bucket: "b".into(),
1249            key: "k".into(),
1250            status,
1251        };
1252        assert!(
1253            mk(SidecarStatus::Ok {
1254                frame_count: 1,
1255                sidecar_size: 100,
1256            })
1257            .is_clean()
1258        );
1259        assert!(mk(SidecarStatus::LegacyV1 { frame_count: 3 }).is_clean());
1260        // P2-C (Codex R3): single-frame objects intentionally have no
1261        // sidecar — clean state, not divergence.
1262        assert!(mk(SidecarStatus::MissingHarmless { frame_count: 1 }).is_clean());
1263        // Ambiguous (body too large to deep-scan) — report cleanly so
1264        // CI doesn't false-alert; operator sees the hint in stdout.
1265        assert!(
1266            mk(SidecarStatus::MissingUnknown {
1267                size: 10 * 1024 * 1024 * 1024,
1268                cap: 5 * 1024 * 1024 * 1024,
1269            })
1270            .is_clean()
1271        );
1272        // Multi-frame + missing sidecar = real divergence.
1273        assert!(!mk(SidecarStatus::MissingDivergent { frame_count: 5 }).is_clean());
1274        assert!(
1275            !mk(SidecarStatus::StaleEtag {
1276                sidecar_etag: "a".into(),
1277                live_etag: "b".into(),
1278            })
1279            .is_clean()
1280        );
1281        assert!(
1282            !mk(SidecarStatus::StaleSize {
1283                sidecar_size: 1,
1284                live_size: 2,
1285            })
1286            .is_clean()
1287        );
1288        assert!(
1289            !mk(SidecarStatus::DecodeError {
1290                message: "bad".into()
1291            })
1292            .is_clean()
1293        );
1294    }
1295
1296    #[test]
1297    fn delete_policy_allows_truth_table() {
1298        let missing = OrphanReason::PairedMissing;
1299        let etag = OrphanReason::PairedEtagMismatch {
1300            sidecar_etag: "a".into(),
1301            live_etag: "b".into(),
1302        };
1303        let size = OrphanReason::PairedSizeMismatch {
1304            sidecar_size: 1,
1305            live_size: 2,
1306        };
1307        let undecodable = OrphanReason::SidecarUndecodable {
1308            message: "bad bytes".into(),
1309        };
1310
1311        // DryRun: never deletes anything.
1312        assert!(!DeletePolicy::DryRun.allows(&missing));
1313        assert!(!DeletePolicy::DryRun.allows(&etag));
1314        assert!(!DeletePolicy::DryRun.allows(&size));
1315        assert!(!DeletePolicy::DryRun.allows(&undecodable));
1316
1317        // PairBoundOnly: deletes the three pair-bound categories,
1318        // skips Undecodable (HIGH-2 review fix: protects v0.8.17
1319        // legacy reserved-name user data).
1320        assert!(DeletePolicy::PairBoundOnly.allows(&missing));
1321        assert!(DeletePolicy::PairBoundOnly.allows(&etag));
1322        assert!(DeletePolicy::PairBoundOnly.allows(&size));
1323        assert!(!DeletePolicy::PairBoundOnly.allows(&undecodable));
1324
1325        // IncludeUndecodable: explicit operator opt-in deletes all.
1326        assert!(DeletePolicy::IncludeUndecodable.allows(&missing));
1327        assert!(DeletePolicy::IncludeUndecodable.allows(&etag));
1328        assert!(DeletePolicy::IncludeUndecodable.allows(&size));
1329        assert!(DeletePolicy::IncludeUndecodable.allows(&undecodable));
1330    }
1331
1332    /// P3-A (Codex R5): a v2 sidecar with size binding but no ETag
1333    /// (rebuilt on an ETag-less backend) classifies as `Ok`, NOT
1334    /// `LegacyV1`. The latter would tell operators to "repair to
1335    /// upgrade" a sidecar already at the highest binding level the
1336    /// backend supports. This test asserts the exact pattern the
1337    /// status match in `verify_sidecar` relies on.
1338    #[test]
1339    fn verify_status_classifies_etag_less_v2_as_ok_not_legacy() {
1340        // The actual match arms in `verify_sidecar`:
1341        //
1342        //   (Some(s), _) if Some(s) != live → StaleEtag
1343        //   (_, Some(z)) if z != live_size → StaleSize
1344        //   (_, Some(_))                   → Ok        // P3-A fix
1345        //   (None, None)                   → LegacyV1
1346        //
1347        // Mirror that decision tree inline so refactors to the real
1348        // function can't quietly regress without flipping this test.
1349        fn classify(side_etag: Option<&str>, side_size: Option<u64>) -> &'static str {
1350            const LIVE_ETAG: Option<&str> = Some("xyz");
1351            const LIVE_SIZE: u64 = 100;
1352            match (side_etag, side_size) {
1353                (Some(s), _) if Some(s) != LIVE_ETAG => "StaleEtag",
1354                (_, Some(z)) if z != LIVE_SIZE => "StaleSize",
1355                (_, Some(_)) => "Ok",
1356                (_, None) => "LegacyV1",
1357            }
1358        }
1359        // P3-A core case: ETag-less repair stamps (None, Some(size)).
1360        // Must classify as Ok, not LegacyV1.
1361        assert_eq!(classify(None, Some(100)), "Ok");
1362        // Full v2 binding with matching etag + size.
1363        assert_eq!(classify(Some("xyz"), Some(100)), "Ok");
1364        // True v1 legacy (neither field) still surfaces as LegacyV1.
1365        assert_eq!(classify(None, None), "LegacyV1");
1366        // Mismatches still detected.
1367        assert_eq!(classify(Some("abc"), Some(100)), "StaleEtag");
1368        assert_eq!(classify(Some("xyz"), Some(999)), "StaleSize");
1369    }
1370
1371    /// P2-D (Codex R4): on an ETag-less backend the server stamps
1372    /// `source_etag = None`; the verifier MUST treat that as the
1373    /// legacy / best-effort path (Ok / LegacyV1), not flag every
1374    /// such sidecar as stale. This unit test pins the discriminator
1375    /// the `verify_sidecar` status-match arm relies on (the
1376    /// `Option<&str>` equality).
1377    #[test]
1378    fn etag_option_equality_treats_none_none_as_match() {
1379        let side: Option<&str> = None;
1380        let live: Option<&str> = None;
1381        assert!(side == live, "None == None must hold for the no-ETag path");
1382
1383        let side: Option<&str> = Some("abc");
1384        let live: Option<&str> = Some("abc");
1385        assert!(side == live);
1386
1387        let side: Option<&str> = Some("");
1388        let live: Option<&str> = None;
1389        assert!(side != live, "Some(\"\") must NOT equal None — P2-D guard");
1390    }
1391
1392    #[test]
1393    fn normalize_etag_strips_surrounding_quotes() {
1394        // aws-sdk-s3 returns the wire form (with quotes); s3s `value()`
1395        // returns the stripped form. The sidecar's `source_etag` is
1396        // canonical-stripped, so both sides must agree.
1397        assert_eq!(normalize_etag("\"abc-1\""), "abc-1");
1398        // Multipart ETags are `<hex>-<n>` and still get quoted on wire.
1399        assert_eq!(
1400            normalize_etag("\"067e3167e8c481c2aea3650ebb273198-2\""),
1401            "067e3167e8c481c2aea3650ebb273198-2"
1402        );
1403        // Already-stripped form is a no-op (the helper is idempotent so
1404        // callers don't need to branch on the source).
1405        assert_eq!(normalize_etag("abc-1"), "abc-1");
1406        // Defensive: an empty etag stays empty (head responses with no
1407        // ETag header round-trip to the empty string in head_main).
1408        assert_eq!(normalize_etag(""), "");
1409    }
1410
1411    /// P2-R5 (Codex R5 audit): the bounded sidecar fetch helper
1412    /// must enforce [`MAX_SIDECAR_BODY_BYTES`] and surface a typed
1413    /// `SidecarTooLarge` error before allocating. Pin the wire
1414    /// shape of the variant so a future refactor can't silently
1415    /// drop the cap and re-introduce the OOM vector.
1416    #[test]
1417    fn sidecar_too_large_error_shape() {
1418        let err = RepairError::SidecarTooLarge {
1419            bucket: "b".into(),
1420            key: "k.s4index".into(),
1421            size: 2 * MAX_SIDECAR_BODY_BYTES,
1422            cap: MAX_SIDECAR_BODY_BYTES,
1423        };
1424        let rendered = format!("{err}");
1425        assert!(
1426            rendered.contains("b/k.s4index"),
1427            "Display must mention bucket/key — got {rendered:?}"
1428        );
1429        assert!(
1430            rendered.contains(&MAX_SIDECAR_BODY_BYTES.to_string()),
1431            "Display must mention the cap — got {rendered:?}"
1432        );
1433        assert!(
1434            rendered.contains("OOM") || rendered.contains("legacy") || rendered.contains("attack"),
1435            "Display must hint at the threat model — got {rendered:?}"
1436        );
1437        match err {
1438            RepairError::SidecarTooLarge {
1439                bucket,
1440                key,
1441                size,
1442                cap,
1443            } => {
1444                assert_eq!(bucket, "b");
1445                assert_eq!(key, "k.s4index");
1446                assert_eq!(size, 2 * MAX_SIDECAR_BODY_BYTES);
1447                assert_eq!(cap, MAX_SIDECAR_BODY_BYTES);
1448            }
1449            _ => unreachable!("SidecarTooLarge must match its own variant"),
1450        }
1451    }
1452
1453    /// P2-R5: the cap value is load-bearing — too small breaks
1454    /// legitimate sidecars, too large defeats the OOM guard. Pin
1455    /// it at the codec-spec-derived ceiling (16M frames × 32 B per
1456    /// entry + header ≈ 512 MiB, rounded up with safety margin to
1457    /// 600 MiB). Bump only with explicit operator justification.
1458    #[test]
1459    fn max_sidecar_body_bytes_cap_value_pinned() {
1460        assert_eq!(MAX_SIDECAR_BODY_BYTES, 600 * 1024 * 1024);
1461        // Sanity: cap must comfortably exceed the codec spec's
1462        // max legitimate sidecar geometry. Computed dynamically
1463        // from the codec constants so a bump to either side
1464        // surfaces here (clippy flags `assert!(const)` as
1465        // pointless, so we use `assert_eq!` against `false` for
1466        // the negative — if the cap ever DROPS below the spec
1467        // max, this fails loudly).
1468        let spec_max_legitimate: u64 = s4_codec::index::MAX_FRAMES
1469            * (s4_codec::index::ENTRY_BYTES as u64)
1470            + (s4_codec::index::HEADER_FIXED_V2 as u64)
1471            + (s4_codec::index::MAX_ETAG_BYTES as u64);
1472        assert!(
1473            MAX_SIDECAR_BODY_BYTES > spec_max_legitimate,
1474            "cap {MAX_SIDECAR_BODY_BYTES} must exceed spec-max {spec_max_legitimate}",
1475        );
1476    }
1477
1478    /// P2-R3 (Codex R3 audit): `repair-sidecar` on a passthrough /
1479    /// raw-bytes object would previously write an empty sidecar
1480    /// that silently breaks Range GET. Pin the typed error's wire
1481    /// shape so a future refactor can't quietly drop the
1482    /// `NotFramed` branch.
1483    #[test]
1484    fn not_framed_error_shape() {
1485        let err = RepairError::NotFramed {
1486            bucket: "b".into(),
1487            key: "k".into(),
1488        };
1489        let rendered = format!("{err}");
1490        assert!(rendered.contains("b/k"), "Display must mention bucket/key");
1491        assert!(
1492            rendered.contains("S4F2") || rendered.contains("passthrough"),
1493            "Display must hint at the framing reason"
1494        );
1495        // Pattern-match guard: any rename of bucket/key here is a
1496        // compile error both here AND at the repair_sidecar
1497        // construction site.
1498        match err {
1499            RepairError::NotFramed { bucket, key } => {
1500                assert_eq!(bucket, "b");
1501                assert_eq!(key, "k");
1502            }
1503            _ => unreachable!("NotFramed must match its own variant"),
1504        }
1505    }
1506
1507    /// CI-unblock (post-v0.9 audit): the MinIO E2E race test
1508    /// (`repair_sidecar_detects_post_get_overwrite_race`) is
1509    /// inherently timing-dependent and flakes on fast CI runners
1510    /// where the entire repair pipeline completes before the
1511    /// spawned overwrite lands. This deterministic guard pins
1512    /// the error type's wire shape (Display + field accessors)
1513    /// so the post-PUT divergence detector branch in
1514    /// `repair_sidecar` can't be silently refactored into a
1515    /// different error variant without flipping this assertion.
1516    #[test]
1517    fn overwritten_during_repair_error_shape() {
1518        let err = RepairError::OverwrittenDuringRepair {
1519            bucket: "b".into(),
1520            key: "k".into(),
1521            head_etag: "abc-1".into(),
1522        };
1523        let rendered = format!("{err}");
1524        assert!(
1525            rendered.contains("b/k"),
1526            "Display must mention bucket/key — got {rendered:?}"
1527        );
1528        assert!(
1529            rendered.contains("abc-1"),
1530            "Display must mention the pre-race ETag — got {rendered:?}"
1531        );
1532        assert!(
1533            rendered.contains("re-run") || rendered.contains("overwritten"),
1534            "Display must hint that the operator should re-run — got {rendered:?}"
1535        );
1536        // Pattern-match guard: any future destructure of this
1537        // variant elsewhere in the crate must keep these three
1538        // named fields. A rename here would surface as a compile
1539        // error here AND at the production call sites in
1540        // repair_sidecar / classify_missing_sidecar.
1541        match err {
1542            RepairError::OverwrittenDuringRepair {
1543                bucket,
1544                key,
1545                head_etag,
1546            } => {
1547                assert_eq!(bucket, "b");
1548                assert_eq!(key, "k");
1549                assert_eq!(head_etag, "abc-1");
1550            }
1551            _ => unreachable!("OverwrittenDuringRepair must match its own variant"),
1552        }
1553    }
1554
1555    #[test]
1556    fn default_repair_body_cap_matches_max_body_default() {
1557        // Tied to s4-server `--max-body-bytes` default (5 GiB, #178). If
1558        // the default changes there, update both in lockstep.
1559        assert_eq!(DEFAULT_REPAIR_BODY_BYTES_CAP, 5 * 1024 * 1024 * 1024);
1560    }
1561
1562    /// v0.9 #106-audit-R2 P2-INT-1: `detect_sse_magic` returns the
1563    /// correct frame label for every S4Ex prefix, and `None` for the
1564    /// plaintext frame magic (`S4F2`) and short / random inputs. The
1565    /// helper is the discriminator the `EncryptedSidecarUnsupported`
1566    /// branch in `repair_sidecar` relies on; pinning its outputs
1567    /// guards against a silent regression that would resurrect the
1568    /// confusing `FrameScan` failure on encrypted bodies.
1569    #[test]
1570    fn detect_sse_magic_covers_all_envelope_variants() {
1571        assert_eq!(detect_sse_magic(b"S4E1\0\0\0\0"), Some("S4E1"));
1572        assert_eq!(detect_sse_magic(b"S4E2\0\0\0\0"), Some("S4E2"));
1573        assert_eq!(detect_sse_magic(b"S4E3\0\0\0\0"), Some("S4E3"));
1574        assert_eq!(detect_sse_magic(b"S4E4\0\0\0\0"), Some("S4E4"));
1575        assert_eq!(detect_sse_magic(b"S4E5\0\0\0\0"), Some("S4E5"));
1576        assert_eq!(detect_sse_magic(b"S4E6\0\0\0\0"), Some("S4E6"));
1577        // S4F2 = plaintext framed body; must NOT match (or repair
1578        // would falsely reject every framed object as encrypted).
1579        assert_eq!(detect_sse_magic(b"S4F2\0\0\0\0"), None);
1580        // Random bytes, short inputs, and empty body all return None.
1581        assert_eq!(detect_sse_magic(b"NOPE\0"), None);
1582        assert_eq!(detect_sse_magic(b"S4"), None);
1583        assert_eq!(detect_sse_magic(b""), None);
1584    }
1585
1586    /// v0.9 #106-audit-R2 P2-INT-1: pin the Display text + struct shape
1587    /// of the new variant so refactors can't silently drop the operator
1588    /// guidance (server-mode rebuild / re-PUT) or rename the fields the
1589    /// CLI's error formatter reads. Mirrors the existing
1590    /// `overwritten_during_repair_error_shape` test pattern.
1591    #[test]
1592    fn repair_sidecar_rejects_encrypted_body_with_typed_error() {
1593        let err = RepairError::EncryptedSidecarUnsupported {
1594            bucket: "b".into(),
1595            key: "k".into(),
1596            message: "body magic S4E6 indicates SSE-S4 envelope".into(),
1597        };
1598        let rendered = format!("{err}");
1599        assert!(
1600            rendered.contains("b/k"),
1601            "Display must mention bucket/key — got {rendered:?}"
1602        );
1603        assert!(
1604            rendered.contains("S4E6"),
1605            "Display must echo the body magic for operator triage — got {rendered:?}"
1606        );
1607        assert!(
1608            rendered.contains("encrypted-sidecar repair"),
1609            "Display must name the failure mode — got {rendered:?}"
1610        );
1611        assert!(
1612            rendered.contains("re-PUT") || rendered.contains("server-mode"),
1613            "Display must hint at the recovery path — got {rendered:?}"
1614        );
1615        match err {
1616            RepairError::EncryptedSidecarUnsupported {
1617                bucket,
1618                key,
1619                message,
1620            } => {
1621                assert_eq!(bucket, "b");
1622                assert_eq!(key, "k");
1623                assert!(message.contains("S4E6"));
1624            }
1625            _ => unreachable!("EncryptedSidecarUnsupported must match its own variant"),
1626        }
1627    }
1628}
s4_server/repair.rs

s4_server/
repair.rs